MXNet Forum

Building wide networks with gluon hybridblocks


#1

I am defining a network that will have many convolutional layers all taking the same input. gluon.hybridsequential blocks make it very easy to build deep networks, however, I cannot see an elegant way to define a wide network with gluon.

Here is an example. In WideNet I need to define each ConvPool block one by one in __init__ and then apply the convolution one by one in hybrid_forward.

class ConvPool(gluon.nn.HybridBlock):
    def __init__(self, channels, kernel_size, padding):
        super().__init__()
        self.conv = gluon.nn.Conv1D(channels, kernel_size, strides=1)
        self.maxpool = gluon.nn.GlobalMaxPool1D()

    def hybrid_forward(self, F, x, *args, **kwargs):
        c = self.conv(x)
        return self.maxpool(c)

class WideNet(gluon.nn.HybridBlock):
    def __init__(self, filters=[3, 4, 5], num_filter=50):
        super().__init__()
    
        self.conv1 = gluon.nn.Conv1D(channels=num_filter, kernel_size=filters[0], strides=1)
        self.conv2 = gluon.nn.Conv1D(channels=num_filter, kernel_size=filters[1], strides=1)
        self.conv3 = gluon.nn.Conv1D(channels=num_filter, kernel_size=filters[2], strides=1)

    def hybrid_forward(self, F, x, *args, **kwargs):
        c1 = self.conv1(x)
        c2 = self.conv2(x)
        c3 = self.conv3(x)
        c = F.concat(*[c1, c2, c3], dim=1)
        return c

Instead I would like to define the block so that it can take list filters of any size, without manually needing to change the block. For example WideNet(filters=[3, 4, 5, 6, 7, 8, 9]).

So far the best I can find is this but it’s not ideal.


#2

It’s pretty easy to do. Here is your example modified to do what you want:

class ConvPool(gluon.nn.HybridBlock):
    def __init__(self, filters, kernel_size, padding):
        super().__init__()
        self.conv_list = list()
        for f in filters:
            conv = gluon.nn.Conv1D(f, kernel_size, strides=1)
            self.register_child(conv)
            self.conv_list.append(conv)
        self.maxpool = gluon.nn.GlobalMaxPool1D()

    def hybrid_forward(self, F, x, *args, **kwargs):
        for conv in self.conv_list:
            x = conv(x)
        return self.maxpool(x)

net = ConvPool(filters=[3, 4, 5, 6, 7, 8, 9], kernel_size=3)
net.initialize()
net.summary(nd.empty((16, 1, 100)))

Outputs:

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
================================================================================
               Input                                (16, 1, 100)               0
            Conv1D-1                                 (16, 3, 98)              12
            Conv1D-2                                 (16, 4, 96)              40
            Conv1D-3                                 (16, 5, 94)              65
            Conv1D-4                                 (16, 6, 92)              96
            Conv1D-5                                 (16, 7, 90)             133
            Conv1D-6                                 (16, 8, 88)             176
            Conv1D-7                                 (16, 9, 86)             225
   GlobalMaxPool1D-8                                  (16, 9, 1)               0
          ConvPool-9                                  (16, 9, 1)               0
================================================================================
Parameters in forward computation graph, duplicate included
   Total params: 747
   Trainable params: 747
   Non-trainable params: 0
Shared params in forward computation graph: 0
Unique parameters in model: 747
--------------------------------------------------------------------------------

Just remember that if you’re adding child blocks to anything but a simple attribute, then you have to register them with the block explicitly or block.collect_params() fails to add their parameters to the collection.


#3

Thanks so much this is precisely what I was looking for.

However, I find that print(net) no longer shows child blocks when defined in this way. Is there a workaround?


#4

Here is an alternative that registers child blocks as attributes of the block as well. Note that i realized my first response was not for WideNet, so this example is a modified version of your WideNet. Also note that my implementation only works for odd kernels because of the way padding is implemented in mxnet. Alternatively you can slice before concat to get dimensions to match.

class WideNet(gluon.nn.HybridBlock):
    def __init__(self, filters=[3, 5, 7], num_filter=50):
        super().__init__()

        self.conv_list = list()
        for f in filters:
            conv = gluon.nn.Conv1D(channels=num_filter, kernel_size=f, strides=1, padding=f//2)
            self.__setattr__(conv.name, conv)
            self.conv_list.append(conv)

    def hybrid_forward(self, F, x, *args, **kwargs):
        out_conv = [conv(x) for conv in self.conv_list]
        c = F.concat(*out_conv, dim=1)
        return c


net = WideNet(filters=[3, 5, 7, 9])
net.initialize()
net.summary(nd.empty((16, 1, 100)))
print(net)

Outputs:

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
================================================================================
               Input                                (16, 1, 100)               0
            Conv1D-1                               (16, 50, 100)             200
            Conv1D-2                               (16, 50, 100)             300
            Conv1D-3                               (16, 50, 100)             400
            Conv1D-4                               (16, 50, 100)             500
           WideNet-5                              (16, 200, 100)               0
================================================================================
Parameters in forward computation graph, duplicate included
   Total params: 1400
   Trainable params: 1400
   Non-trainable params: 0
Shared params in forward computation graph: 0
Unique parameters in model: 1400
--------------------------------------------------------------------------------
WideNet(
  (conv0): Conv1D(1 -> 50, kernel_size=(3,), stride=(1,), padding=(1,))
  (conv1): Conv1D(1 -> 50, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv2): Conv1D(1 -> 50, kernel_size=(7,), stride=(1,), padding=(3,))
  (conv3): Conv1D(1 -> 50, kernel_size=(9,), stride=(1,), padding=(4,))
)

#5

To add to @safrooze solution, you can also use HybridSequential() as a container, see #10101:

In [8]: class WideNet(gluon.nn.HybridBlock):
   ...:     def __init__(self, filters=[3, 5, 7], num_filter=50):
   ...:         super().__init__()
   ...: 
   ...:         self.convs = gluon.nn.HybridSequential()
   ...:         for f in filters:
   ...:             self.convs.add( gluon.nn.Conv1D(channels=num_filter, kernel_size=f, strides=1, padding=f//2))
   ...:             #self.__setattr__(conv.name, conv)
   ...:             #self.conv_list.append(conv)
   ...: 
   ...:     def hybrid_forward(self, F, x, *args, **kwargs):
   ...:         out_conv = [conv(x) for conv in self.convs] 
   ...:         c = F.concat(*out_conv, dim=1)
   ...:         return c
   ...: 

In [9]: net = WideNet(filters=[3, 5, 7, 9])
   ...: net.initialize()
   ...: net.summary(nd.empty((16, 1, 100)))
   ...: print(net)
   ...: 
--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
================================================================================
               Input                                (16, 1, 100)               0
            Conv1D-1                               (16, 50, 100)             200
            Conv1D-2                               (16, 50, 100)             300
            Conv1D-3                               (16, 50, 100)             400
            Conv1D-4                               (16, 50, 100)             500
           WideNet-5                              (16, 200, 100)               0
================================================================================
Total params: 1400
Trainable params: 1400
Non-trainable params: 0
Shared params: 0
--------------------------------------------------------------------------------
WideNet(
  (convs): HybridSequential(
    (0): Conv1D(1 -> 50, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): Conv1D(1 -> 50, kernel_size=(5,), stride=(1,), padding=(2,))
    (2): Conv1D(1 -> 50, kernel_size=(7,), stride=(1,), padding=(3,))
    (3): Conv1D(1 -> 50, kernel_size=(9,), stride=(1,), padding=(4,))
  )
)