Convert input tensor to gray image in hybrid_forward

Creating a multi-output example by gluoncv, the purpose of this example is predict the color and type of the clothes by a two branches network, my problem is, I don’t know how to convert the symbol to gray scale image in the hybrid_forward function.

My solution is

  1. create a constant symbol
  2. convert input symbol to gray scale image with 1 channel
  3. concat the gray scale image to 3 channels(since I am using pretrained network)

But the program throw error : AssertionError: Unknown input to HybridBlock: rgb_weights

Source codes

from gluoncv import model_zoo
from mxnet import nd
from mxnet.gluon import nn, HybridBlock
from mxnet import init

import mxnet as mx

class mx_symbol_constant(mx.init.Initializer):
    def __init__(self, value):
        super(mx_symbol_constant, self).__init__(value=value)
        self.value = value

    def _init_weight(self, _, arr):
        arr[:] = mx.nd.array(self.value)

class fashion_net_2_branches(HybridBlock):
    def __init__(self, num_clothes, num_colors, ctx):
        super(fashion_net_2_branches, self).__init__()
        self._features = model_zoo.get_model('ResNet50_v2', pretrained=True, ctx = ctx).features
        for _, w in self._features.collect_params().items():
            w.grad_req = 'null'
			
        self._flatten = nn.Flatten()
        self._dropout = nn.Dropout(0.5)
        self._relu = nn.Activation(activation='relu')
        self._swish = nn.Swish()
                
        self._rgb_weights = mx.sym.Variable('rgb_weights', shape = (3, 1), init = mx_symbol_constant([0.2989, 0.5870, 0.1140]))        
        
        self._clothes_fc_1 = nn.Dense(100)
        self._clothes_bn_1 = nn.BatchNorm(center=False, scale=True)
        self._clothes_out = nn.Dense(num_clothes)
        
        self._clothes_fc_1.initialize(init=init.Xavier(), ctx=ctx)
        self._clothes_bn_1.initialize(init=init.Zero(), ctx=ctx)
        self._clothes_out.initialize(init=init.Xavier(), ctx=ctx)
		
        self._color_fc_1 = nn.Dense(100)
        self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
        self._color_out = nn.Dense(num_colors)
		
        self._color_fc_1.initialize(init=init.Xavier(), ctx=ctx)
        self._color_bn_1.initialize(init=init.Zero(), ctx=ctx)
        self._color_out.initialize(init=init.Xavier(), ctx=ctx)
		
    def hybrid_forward(self, F, x):
        #convert x to gray scale image, 1 channel        
        tensor_img_gray = mx.sym.dot(self._rgb_weights, x) # the shape become [batch, H, W, 1]
        #convert tensor_img_gray  to [batch, H, W, 3]
        tensor_imgs_gray = mx.sym.concat(tensor_img_gray, tensor_img_gray, tensor_img_gray, dim=1)

        #this line throw error : AssertionError: Unknown input to HybridBlock: rgb_weights
        tensor_img_gray = self._features(tensor_img_gray)
		
        clothes_result = self._flatten(tensor_img_gray)
        clothes_result = self._clothes_fc_1(clothes_result)
        clothes_result = self._swish(clothes_result)		
        clothes_result = self._clothes_bn_1(clothes_result)
        clothes_result = self._dropout(clothes_result)
        clothes_result = self._clothes_out(clothes_result)        
		
        x = self._features(x)
        color_result = self._flatten(x)
        color_result = self._color_fc_1(color_result)
        color_result = self._swish(color_result)
        color_result = self._color_bn_1(color_result)
        color_result = self._dropout(color_result)
        color_result = self._color_out(color_result)        
		
        return clothes_result, color_result

Hi, there are few bugs in your code. First, I suggest to not do the translation to RGB inside the hybrid_forward, but use an external library (opencv), for reasons summarized here. It is fairly easy to do so before you input it to the network, with

import cv2
image = ... # read in RGB format
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

you can encode this transformation into your gluon.data.DataSet.

Now, this is a working version of your code. You need to initialize externally after you declare the model.

In [15]: class fashion_net_2_branches(HybridBlock):
    ...:     def __init__(self, num_clothes, num_colors, ctx):
    ...:         super(fashion_net_2_branches, self).__init__()
    ...:         
    ...: 
    ...: 
    ...:       
    ...:         self._features = model_zoo.get_model('ResNet50_v2', pretrained=True, ctx = ctx).features
    ...:             
    ...:         with self.name_scope():
    ...:             for _, w in self._features.collect_params().items():
    ...:                 w.grad_req = 'null'
    ...:                 
    ...:             self._flatten = nn.Flatten()
    ...:             self._dropout = nn.Dropout(0.5)
    ...:             self._relu = nn.Activation(activation='relu')
    ...:             self._swish = nn.Swish()
    ...:                 
    ...:             #self._rgb_weights = self.params.get_constant('_rgb_weights', value= [0.2989, 0.5870, 0.1140])        
    ...:         
    ...:             self._clothes_fc_1 = nn.Dense(100)
    ...:             self._clothes_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._clothes_out = nn.Dense(num_clothes)
    ...:         
    ...:             #self._clothes_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             #self._clothes_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             #self._clothes_out.initialize(init.Xavier(), ctx=ctx)
    ...:             
    ...:             self._color_fc_1 = nn.Dense(100)
    ...:             self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._color_out = nn.Dense(num_colors)
    ...: 
    ...:             #self._color_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             #self._color_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             #self._color_out.initialize(init.Xavier(), ctx=ctx)
    ...: 
    ...:            
    ...:             self.rgb_weights = self.params.get('rgb_weights', shape=(1,3,1,1),init=mx.init.Constant([[[[0.2989]], [[0.5870]], [[0.1140]]]] ), differentiable=False)
    ...: 
    ...: 
    ...:     def hybrid_forward(self, F, x, rgb_weights):
    ...:         #convert x to gray scale image, 1 channel        
                 # There was a bug in your initial implementation in the matrix multiplication for going to gray.
    ...:         tensor_img_gray = F.sum(F.broadcast_mul(rgb_weights,x),axis=1,keepdims=True) # the shape become [batch, H, W, 1]
    ...:         #print (tensor_img_gray.shape) # for debugging
    ...:         #convert tensor_img_gray  to [batch, H, W, 3]
    ...:         tensor_imgs_gray = F.concat(tensor_img_gray, tensor_img_gray, tensor_img_gray, dim=1)
    ...: 
    ...:         #this line throw error : AssertionError: Unknown input to HybridBlock: rgb_weights
    ...:         tensor_img_gray = self._features(tensor_imgs_gray)
    ...: 
    ...:         clothes_result = self._flatten(tensor_img_gray)
    ...:         clothes_result = self._clothes_fc_1(clothes_result)
    ...:         clothes_result = self._swish(clothes_result)
    ...:         clothes_result = self._clothes_bn_1(clothes_result)
    ...:         clothes_result = self._dropout(clothes_result)
    ...:         clothes_result = self._clothes_out(clothes_result)        
    ...:         
    ...:         x = self._features(x)
    ...:         color_result = self._flatten(x)
    ...:         color_result = self._color_fc_1(color_result)
    ...:         color_result = self._swish(color_result)
    ...:         color_result = self._color_bn_1(color_result)
    ...:         color_result = self._dropout(color_result)
    ...:         color_result = self._color_out(color_result)        
    ...: 
    ...:         return clothes_result, color_result

example usage:

In [19]: net = fashion_net_2_branches(5,3,mx.cpu())

In [20]: xx = nd.random.uniform(shape=[15,3,256,256])

In [21]: net.initialize()
# Spits a bunch of warnings since you have a pretrained - i.e. initialized network

In [23]: out = net(xx)
In [24]: out[1].shape
Out[24]: (15, 3)

In [25]: out[0].shape
Out[25]: (15, 5)

I didn’t spend too much time to fix the initialization issue (apologies), I hope this is a working starting point for you to solve it completely.

Thanks, but I need to convert the image to gray scale in hybrid_forward because this is a multi-branch network, one branch predict type of the clothes(shoes, shirt, jeans etc), another branch predict color of the clothes.

This is not a bug, initialize the layers in constructor is fine because gluoncv support delay initialization, my codes work without gray conversion. The example of gluoncv do the same thing too.

You are right, in that you can initialize the weight internally, this version does not require external initialization:

n [13]: class fashion_net_2_branches(HybridBlock):
    ...:     def __init__(self, num_clothes, num_colors, ctx):
    ...:         super(fashion_net_2_branches, self).__init__()
    ...:         
    ...: 
    ...: 
    ...:       
    ...:         self._features = model_zoo.get_model('ResNet50_v2', pretrained=True, ctx = ctx).features
    ...:             
    ...:         with self.name_scope():
    ...:             for _, w in self._features.collect_params().items():
    ...:                 w.grad_req = 'null'
    ...:                 
    ...:             self._flatten = nn.Flatten()
    ...:             self._dropout = nn.Dropout(0.5)
    ...:             self._relu = nn.Activation(activation='relu')
    ...:             self._swish = nn.Swish()
    ...:                 
    ...:             #self._rgb_weights = self.params.get_constant('_rgb_weights', value= [0.2989, 0.5870, 0.1140])        
    ...:         
    ...:             self._clothes_fc_1 = nn.Dense(100)
    ...:             self._clothes_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._clothes_out = nn.Dense(num_clothes)
    ...:         
    ...:             self._clothes_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             self._clothes_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             self._clothes_out.initialize(init.Xavier(), ctx=ctx)
    ...:             
    ...:             self._color_fc_1 = nn.Dense(100)
    ...:             self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
    ...:             self._color_out = nn.Dense(num_colors)
    ...: 
    ...:             self._color_fc_1.initialize(init.Xavier(), ctx=ctx)
    ...:             self._color_bn_1.initialize(init.Zero(), ctx=ctx)
    ...:             self._color_out.initialize(init.Xavier(), ctx=ctx)
    ...: 
    ...:             #self._rgb_weights = self.params.get_constant('_rgb_weights', value=nd.array([0.2989, 0.5870, 0.1140],ctx=ctx).initialize(ctx=ctx)
    ...:             self.rgb_weights = self.params.get('rgb_weights', shape=(1,3,1,1),init=mx.init.Constant([[[[0.2989]], [[0.5870]], [[0.1140]]]] ), differentiable=False)
    ...:             self.rgb_weights.initialize(ctx=ctx)
    ...: 
    ...:     def hybrid_forward(self, F, x, rgb_weights):
    ...:         #convert x to gray scale image, 1 channel        
    ...:         tensor_img_gray = F.sum(F.broadcast_mul(rgb_weights,x),axis=1,keepdims=True) # the shape become [batch, H, W, 1]
    ...:         print (tensor_img_gray.shape)
    ...:         #convert tensor_img_gray  to [batch, H, W, 3]
    ...:         tensor_imgs_gray = F.concat(tensor_img_gray, tensor_img_gray, tensor_img_gray, dim=1)
    ...: 
    ...:         #this line throw error : AssertionError: Unknown input to HybridBlock: rgb_weights
    ...:         tensor_img_gray = self._features(tensor_imgs_gray)
    ...: 
    ...:         clothes_result = self._flatten(tensor_img_gray)
    ...:         clothes_result = self._clothes_fc_1(clothes_result)
    ...:         clothes_result = self._swish(clothes_result)
    ...:         clothes_result = self._clothes_bn_1(clothes_result)
    ...:         clothes_result = self._dropout(clothes_result)
    ...:         clothes_result = self._clothes_out(clothes_result)        
    ...:         
    ...:         x = self._features(x)
    ...:         color_result = self._flatten(x)
    ...:         color_result = self._color_fc_1(color_result)
    ...:         color_result = self._swish(color_result)
    ...:         color_result = self._color_bn_1(color_result)
    ...:         color_result = self._dropout(color_result)
    ...:         color_result = self._color_out(color_result)        
    ...: 
    ...:         return clothes_result, color_result
    ...: 

I understand (and respect) it is a matter of design choice, however, It is also possible to provide two inputs to the network, if you want to use both color and rgb, something like:

class YourNet(HybridBlock):
    def __init__(...)


    def hybrid_forward(self, F, x_gray, x_rgb):
        # do stuff with x_gray

       # do stuff with x_rgb. 

in this way you are more certain the translation to rgb was properly treated, and you don’t need to defing se rgb_weights. On the down side, probably the translation to gray is on cpu, not gpu, but I doubt it will be the bottleneck of your network.

1 Like

Your solution should work

Not doing this for performance issue, but for readability and I believe api like that is easier to use, especially when you need to deploy the network to real world applications.
Loading model from c++ api and inferencing are complicated enough already, especially when you need to perform batch processing(compare with api of python), I do not want to increase the burden when I need to employ the model.

ps : In most of the cases, I think train different model for different task is much easier and more practical

1 Like