Convert input tensor to gray image in hybrid_forward


#1

Creating a multi-output example by gluoncv, the purpose of this example is predict the color and type of the clothes by a two branches network, my problem is, I don’t know how to convert the symbol to gray scale image in the hybrid_forward function.

My solution is

  1. create a constant symbol
  2. convert input symbol to gray scale image with 1 channel
  3. concat the gray scale image to 3 channels(since I am using pretrained network)

But the program throw error : AssertionError: Unknown input to HybridBlock: rgb_weights

Source codes

from gluoncv import model_zoo
from mxnet import nd
from mxnet.gluon import nn, HybridBlock
from mxnet import init

import mxnet as mx

class mx_symbol_constant(mx.init.Initializer):
    def __init__(self, value):
        super(mx_symbol_constant, self).__init__(value=value)
        self.value = value

    def _init_weight(self, _, arr):
        arr[:] = mx.nd.array(self.value)

class fashion_net_2_branches(HybridBlock):
    def __init__(self, num_clothes, num_colors, ctx):
        super(fashion_net_2_branches, self).__init__()
        self._features = model_zoo.get_model('ResNet50_v2', pretrained=True, ctx = ctx).features
        for _, w in self._features.collect_params().items():
            w.grad_req = 'null'
			
        self._flatten = nn.Flatten()
        self._dropout = nn.Dropout(0.5)
        self._relu = nn.Activation(activation='relu')
        self._swish = nn.Swish()
                
        self._rgb_weights = mx.sym.Variable('rgb_weights', shape = (3, 1), init = mx_symbol_constant([0.2989, 0.5870, 0.1140]))        
        
        self._clothes_fc_1 = nn.Dense(100)
        self._clothes_bn_1 = nn.BatchNorm(center=False, scale=True)
        self._clothes_out = nn.Dense(num_clothes)
        
        self._clothes_fc_1.initialize(init=init.Xavier(), ctx=ctx)
        self._clothes_bn_1.initialize(init=init.Zero(), ctx=ctx)
        self._clothes_out.initialize(init=init.Xavier(), ctx=ctx)
		
        self._color_fc_1 = nn.Dense(100)
        self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
        self._color_out = nn.Dense(num_colors)
		
        self._color_fc_1.initialize(init=init.Xavier(), ctx=ctx)
        self._color_bn_1.initialize(init=init.Zero(), ctx=ctx)
        self._color_out.initialize(init=init.Xavier(), ctx=ctx)
		
    def hybrid_forward(self, F, x):
        #convert x to gray scale image, 1 channel        
        tensor_img_gray = mx.sym.dot(self._rgb_weights, x) # the shape become [batch, H, W, 1]
        #convert tensor_img_gray  to [batch, H, W, 3]
        tensor_imgs_gray = mx.sym.concat(tensor_img_gray, tensor_img_gray, tensor_img_gray, dim=1)

        #this line throw error : AssertionError: Unknown input to HybridBlock: rgb_weights
        tensor_img_gray = self._features(tensor_img_gray)
		
        clothes_result = self._flatten(tensor_img_gray)
        clothes_result = self._clothes_fc_1(clothes_result)
        clothes_result = self._swish(clothes_result)		
        clothes_result = self._clothes_bn_1(clothes_result)
        clothes_result = self._dropout(clothes_result)
        clothes_result = self._clothes_out(clothes_result)        
		
        x = self._features(x)
        color_result = self._flatten(x)
        color_result = self._color_fc_1(color_result)
        color_result = self._swish(color_result)
        color_result = self._color_bn_1(color_result)
        color_result = self._dropout(color_result)
        color_result = self._color_out(color_result)        
		
        return clothes_result, color_result

#2

Hi, there are few bugs in your code. First, I suggest to not do the translation to RGB inside the hybrid_forward, but use an external library (opencv), for reasons summarized here. It is fairly easy to do so before you input it to the network, with

import cv2
image = ... # read in RGB format
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

you can encode this transformation into your gluon.data.DataSet.

Now, this is a working version of your code. You need to initialize externally after you declare the model.

In [15]: class fashion_net_2_branches(HybridBlock):
    ...:     def __init__(self, num_clothes, num_colors, ctx):
    ...:         super(fashion_net_2_branches, self).__init__()
    ...:         
    ...: 
    ...: 
    ...:       
    ...:         self._features = model_zoo.get_model('ResNet50_v2', pretrained=True, ctx = ctx).features
    ...:             
    ...:         with self.name_scope():
    ...:             for _, w in self._features.collect_params().items():
    ...:                 w.grad_req = 'null'
    ...:                 
    ...:             self._flatten = nn.Flatten()
    ...:             self._dropout = nn.Dropout(0.5)
    ...:             self._relu = nn.Activation(activation='relu')
    ...:             self._swish = nn.Swish()
    ...:                 
    ...:             #self._rgb_weights = self.params.get_constant('_rgb_weights', value= [0.2989, 0.5870, 0.1140])        
    ...:         
    ...:             self._clothes_fc_1 = nn.Dense(100)
    ...:             self._clothes_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._clothes_out = nn.Dense(num_clothes)
    ...:         
    ...:             #self._clothes_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             #self._clothes_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             #self._clothes_out.initialize(init.Xavier(), ctx=ctx)
    ...:             
    ...:             self._color_fc_1 = nn.Dense(100)
    ...:             self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._color_out = nn.Dense(num_colors)
    ...: 
    ...:             #self._color_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             #self._color_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             #self._color_out.initialize(init.Xavier(), ctx=ctx)
    ...: 
    ...:            
    ...:             self.rgb_weights = self.params.get('rgb_weights', shape=(1,3,1,1),init=mx.init.Constant([[[[0.2989]], [[0.5870]], [[0.1140]]]] ), differentiable=False)
    ...: 
    ...: 
    ...:     def hybrid_forward(self, F, x, rgb_weights):
    ...:         #convert x to gray scale image, 1 channel        
                 # There was a bug in your initial implementation in the matrix multiplication for going to gray.
    ...:         tensor_img_gray = F.sum(F.broadcast_mul(rgb_weights,x),axis=1,keepdims=True) # the shape become [batch, H, W, 1]
    ...:         #print (tensor_img_gray.shape) # for debugging
    ...:         #convert tensor_img_gray  to [batch, H, W, 3]
    ...:         tensor_imgs_gray = F.concat(tensor_img_gray, tensor_img_gray, tensor_img_gray, dim=1)
    ...: 
    ...:         #this line throw error : AssertionError: Unknown input to HybridBlock: rgb_weights
    ...:         tensor_img_gray = self._features(tensor_imgs_gray)
    ...: 
    ...:         clothes_result = self._flatten(tensor_img_gray)
    ...:         clothes_result = self._clothes_fc_1(clothes_result)
    ...:         clothes_result = self._swish(clothes_result)
    ...:         clothes_result = self._clothes_bn_1(clothes_result)
    ...:         clothes_result = self._dropout(clothes_result)
    ...:         clothes_result = self._clothes_out(clothes_result)        
    ...:         
    ...:         x = self._features(x)
    ...:         color_result = self._flatten(x)
    ...:         color_result = self._color_fc_1(color_result)
    ...:         color_result = self._swish(color_result)
    ...:         color_result = self._color_bn_1(color_result)
    ...:         color_result = self._dropout(color_result)
    ...:         color_result = self._color_out(color_result)        
    ...: 
    ...:         return clothes_result, color_result

example usage:

In [19]: net = fashion_net_2_branches(5,3,mx.cpu())

In [20]: xx = nd.random.uniform(shape=[15,3,256,256])

In [21]: net.initialize()
# Spits a bunch of warnings since you have a pretrained - i.e. initialized network

In [23]: out = net(xx)
In [24]: out[1].shape
Out[24]: (15, 3)

In [25]: out[0].shape
Out[25]: (15, 5)

I didn’t spend too much time to fix the initialization issue (apologies), I hope this is a working starting point for you to solve it completely.


#3

Thanks, but I need to convert the image to gray scale in hybrid_forward because this is a multi-branch network, one branch predict type of the clothes(shoes, shirt, jeans etc), another branch predict color of the clothes.

This is not a bug, initialize the layers in constructor is fine because gluoncv support delay initialization, my codes work without gray conversion. The example of gluoncv do the same thing too.


#4

You are right, in that you can initialize the weight internally, this version does not require external initialization:

n [13]: class fashion_net_2_branches(HybridBlock):
    ...:     def __init__(self, num_clothes, num_colors, ctx):
    ...:         super(fashion_net_2_branches, self).__init__()
    ...:         
    ...: 
    ...: 
    ...:       
    ...:         self._features = model_zoo.get_model('ResNet50_v2', pretrained=True, ctx = ctx).features
    ...:             
    ...:         with self.name_scope():
    ...:             for _, w in self._features.collect_params().items():
    ...:                 w.grad_req = 'null'
    ...:                 
    ...:             self._flatten = nn.Flatten()
    ...:             self._dropout = nn.Dropout(0.5)
    ...:             self._relu = nn.Activation(activation='relu')
    ...:             self._swish = nn.Swish()
    ...:                 
    ...:             #self._rgb_weights = self.params.get_constant('_rgb_weights', value= [0.2989, 0.5870, 0.1140])        
    ...:         
    ...:             self._clothes_fc_1 = nn.Dense(100)
    ...:             self._clothes_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._clothes_out = nn.Dense(num_clothes)
    ...:         
    ...:             self._clothes_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             self._clothes_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             self._clothes_out.initialize(init.Xavier(), ctx=ctx)
    ...:             
    ...:             self._color_fc_1 = nn.Dense(100)
    ...:             self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._color_out = nn.Dense(num_colors)
    ...: 
    ...:             self._color_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             self._color_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             self._color_out.initialize(init.Xavier(), ctx=ctx)
    ...: 
    ...:             #self._rgb_weights = self.params.get_constant('_rgb_weights', value=nd.array([0.2989, 0.5870, 0.1140],ctx=ctx).initialize(ctx=ctx)
    ...:             self.rgb_weights = self.params.get('rgb_weights', shape=(1,3,1,1),init=mx.init.Constant([[[[0.2989]], [[0.5870]], [[0.1140]]]] ), differentiable=False)
    ...:             self.rgb_weights.initialize(ctx=ctx)
    ...: 
    ...:     def hybrid_forward(self, F, x, rgb_weights):
    ...:         #convert x to gray scale image, 1 channel        
    ...:         tensor_img_gray = F.sum(F.broadcast_mul(rgb_weights,x),axis=1,keepdims=True) # the shape become [batch, H, W, 1]
    ...:         print (tensor_img_gray.shape)
    ...:         #convert tensor_img_gray  to [batch, H, W, 3]
    ...:         tensor_imgs_gray = F.concat(tensor_img_gray, tensor_img_gray, tensor_img_gray, dim=1)
    ...: 
    ...:         #this line throw error : AssertionError: Unknown input to HybridBlock: rgb_weights
    ...:         tensor_img_gray = self._features(tensor_imgs_gray)
    ...: 
    ...:         clothes_result = self._flatten(tensor_img_gray)
    ...:         clothes_result = self._clothes_fc_1(clothes_result)
    ...:         clothes_result = self._swish(clothes_result)
    ...:         clothes_result = self._clothes_bn_1(clothes_result)
    ...:         clothes_result = self._dropout(clothes_result)
    ...:         clothes_result = self._clothes_out(clothes_result)        
    ...:         
    ...:         x = self._features(x)
    ...:         color_result = self._flatten(x)
    ...:         color_result = self._color_fc_1(color_result)
    ...:         color_result = self._swish(color_result)
    ...:         color_result = self._color_bn_1(color_result)
    ...:         color_result = self._dropout(color_result)
    ...:         color_result = self._color_out(color_result)        
    ...: 
    ...:         return clothes_result, color_result
    ...: 

I understand (and respect) it is a matter of design choice, however, It is also possible to provide two inputs to the network, if you want to use both color and rgb, something like:

class YourNet(HybridBlock):
    def __init__(...)


    def hybrid_forward(self, F, x_gray, x_rgb):
        # do stuff with x_gray

       # do stuff with x_rgb. 

in this way you are more certain the translation to rgb was properly treated, and you don’t need to defing se rgb_weights. On the down side, probably the translation to gray is on cpu, not gpu, but I doubt it will be the bottleneck of your network.


#5

Your solution should work

Not doing this for performance issue, but for readability and I believe api like that is easier to use, especially when you need to deploy the network to real world applications.
Loading model from c++ api and inferencing are complicated enough already, especially when you need to perform batch processing(compare with api of python), I do not want to increase the burden when I need to employ the model.

ps : In most of the cases, I think train different model for different task is much easier and more practical