Gluon models with branches


#1

Below is a small toy example implemented with the sym api.
Is it possible to do something similar with gluon?

import mxnet as mx

batch_size = 20
channels = 3
pixels_x = 256
pixels_y = 256

images = mx.nd.random.uniform(0, 255, (batch_size, channels, pixels_x, pixels_y))
data_iter = mx.io.NDArrayIter(images)


############ sym API #######################
data = mx.sym.Variable(name='data')
conv_1 = mx.sym.Convolution(data, num_filter=10, kernel=(2,2))
aktv_1 = mx.sym.Activation(conv_1, act_type='relu')

conv_2 = mx.sym.Convolution(data, num_filter=3, kernel=(2,2))
aktv_2 = mx.sym.Activation(conv_2, act_type='relu')
cnct = mx.sym.concat(aktv_1, aktv_2, dim=1)
conv_3 = mx.sym.Convolution(cnct, num_filter=1, kernel=(2,2))


m = mx.mod.Module(conv_3, label_names=[])
m.bind(data_shapes=data_iter.provide_data)
m.init_params(mx.initializer.Uniform(scale=1.0))
yhat = m.predict(data_iter)
print(yhat.shape)
#fig = mx.viz.plot_network(conv_3, save_format='png')
#fig.render('graph.gv', view=False)


############ gluon API #######################
net = mx.gluon.nn.HybridSequential()
with net.name_scope():
    c1 = mx.gluon.nn.Conv2D(channels=10, kernel_size=2, activation='relu')
    c2 = mx.gluon.nn.Conv2D(channels=3, kernel_size=2, activation='relu')
    ######################################################################
    # how to concatenate c1 and c2?
    ######################################################################

Cheers
Andre


#2

Hi Andre,

class AndreNet(gluon.HybridBlock):
    def __init__(self, nfilters1, nfilters2, **kwards):
        HybridBlock.__init__(self,**kwards)


        with self.name_scope():
            self.conv1 = gluon.Conv2D(channels=nfilters1, kernel_size=(2,2))
            self.conv2 = gluon.Conv2D(channels=nfilters2, kernel_size=(2,2))
            self.conv3 = gluon.Conv2D(channels=1, kernel_size=(2,2))


    def hybrid_forward(self,F,data):
        conv_1 = self.conv1(data)
        aktv_1 = F.relu(conv_1)

        conv_2 = self.conv2(data)
        aktv_2 = F.relu(conv_2)

        cnct = F.concat([aktv_1,aktv_2],dim=1)
        conv_3 = self.conv3(cnct)
        # No activation here - add if you need to 

        return conv_3 

I haven’t tested it but it should give you the idea.
Cheers,
Foivos


#3

@feevos you beat me to it!

Still putting my solution that is effectively the same

class ConcatNet(nn.HybridBlock):
    def __init__(self):
        super(ConcatNet, self).__init__()
        self.conv1 = mx.gluon.nn.Conv2D(channels=10, kernel_size=2, activation='relu')
        self.conv2 = mx.gluon.nn.Conv2D(channels=3, kernel_size=2, activation='relu')
        self.conv3 = mx.gluon.nn.Conv2D(channels=1, kernel_size=2)
    
    def hybrid_forward(self, F, x):
        x1 = self.conv1(x)
        x2 = self.conv2(x)
        c = F.concat(x1, x2)
        y = self.conv3(c)
        
        # Remove before hybridize
        print('x', x.shape)
        print('x1', x1.shape)
        print('x2', x2.shape)
        print('c', c.shape)
        print('y', y.shape)
        return y

net = ConcatNet()
net.initialize()

x = mx.nd.ones((10,3,64,64))
net(x)
x (10, 3, 64, 64)
x1 (10, 10, 63, 63)
x2 (10, 3, 63, 63)
c (10, 13, 63, 63)
y (10, 1, 62, 62)

#4

Another approach is to use gluon.contrib.nn.HybridConcurrent which concatenates the output of all of its children automatically as follows

import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn

cnet = gluon.contrib.nn.HybridConcurrent(axis=1)
with cnet.name_scope():
    cnet.add(nn.Conv2D(channels=10, kernel_size=2, activation='relu'))
    cnet.add(nn.Conv2D(channels=3, kernel_size=2, activation='relu'))
    
net = nn.HybridSequential()
with net.name_scope():
    net.add(cnet)
    net.add(nn.Conv2D(channels=1, kernel_size=2, activation='relu'))

net.initialize()
x = mx.nd.ones((10,3,64,64))
net(x)  # output shape: (10, 1, 62, 62) 

#5

Thanks for the great answers guys!


#6

FYI with your help I was able to quickly put together a shallow implementation uf UNet.

class DoubleConv(nn.HybridBlock):
    def __init__(self, **kwargs):
        super(DoubleConv, self).__init__()
        self.conv_1 = Conv2D(**kwargs)
        self.batchnorm_1 = BatchNorm(axis=1)
        self.conv_2 = Conv2D(**kwargs)
        self.batchnorm_2 = BatchNorm(axis=1)
        
    def hybrid_forward(self, F, x):
        c1 = self.conv_1(x)
        b1 = self.batchnorm_1(c1)
        c2 = self.conv_2(b1)
        b2 = self.batchnorm_2(c2)
        return b2

class Unet(nn.HybridBlock):
    def __init__(self):
        super(Unet, self).__init__()
        self.mp_1 = MaxPool2D(pool_size=(2,2), strides=2)
        self.mp_2 = MaxPool2D(pool_size=(2,2), strides=2)
        self.cv_1 = DoubleConv(channels=16, kernel_size=3, strides=1, padding=1)
        self.cv_2 = DoubleConv(channels=32, kernel_size=3, strides=1, padding=1)
        self.cv_3 = DoubleConv(channels=64, kernel_size=3, strides=1, padding=1)
        self.cv_4 = DoubleConv(channels=32, kernel_size=3, strides=1, padding=1)
        self.cv_5 = DoubleConv(channels=16, kernel_size=3, strides=1, padding=1)
        self.cv_6 = Conv2D(channels=1, kernel_size=3, strides=1, padding=1, activation='tanh')
        self.ct_1 = Conv2DTranspose(channels=32, kernel_size=2, strides=2, activation='relu')
        self.ct_2 = Conv2DTranspose(channels=16, kernel_size=2, strides=2, activation='relu')

    def hybrid_forward(self, F, x):
        l1 = self.cv_1(x)
        m1 = self.mp_1(l1)
        l2 = self.cv_2(m1)
        m2 = self.mp_2(l2)
        l3 = self.cv_3(m2)
        u1 = self.ct_1(l3)
        c1 = F.concat(l2, u1)
        u2 = self.cv_4(c1)
        u3 = self.ct_2(u2)
        u4 = F.concat(l1, u3)
        u5 = self.cv_5(u4)
        u6 = self.cv_6(u5)
        return u6