Mxnet how to concat the input data and output features to the train_iter

mxnetwqs · July 23, 2018, 5:53pm

i need to create a network using mxnet, which i need the train_iter to includ the input original data and output feature of middle layer . the input original data is MNIST, the middle layer is RELU.
what i mean is that i let the output feature also to be part of the “input” trian data.

thomelane · August 15, 2018, 7:54pm

Hi @mxnetwqs,

You have complete control over your network architecture, so you can take the array (or symbol) representing the input data and the array (or symbol) representing the feature map of interest and pass these both to another layer for processing. I don’t know how you are going to be using these two inputs, but one method (if they are of the same spatial dimensions) is to use the concatenate operation to stack the array (or symbols) depthwise.

An example in Gluon would look something like:

import mxnet as mx
from mxnet import gluon, nd
from mxnet.gluon import nn

class Net(gluon.Block):
    def __init__(self, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():
            self.conv1 = nn.Conv2D(8, kernel_size=3, padding=1)
            self.conv2 = nn.Conv2D(16, kernel_size=3, padding=1)
            self.fc1 = nn.Dense(10)

    def forward(self, data):
        conv1_out = nd.relu(self.conv1(data))
        conv2_out = nd.relu(self.conv2(conv1_out))
        # work with input data AND feature map from here onwards
        concat = nd.concat(data, conv2_out)
        output = self.fc1(concat)
        return output
    
net = Net()
net.initialize()
data = nd.random.normal(shape=(1,3,32,32))
out = net(data)

I hope that helps, Cheers, Thom

chrisluu · November 18, 2018, 2:18pm

Do you know how to hybridize your Net with concat in the forward?

ThomasDelteil · November 19, 2018, 6:58pm

Hi @chrisluu, you need to make your network a HybridBlock

import mxnet as mx
from mxnet import gluon, nd
from mxnet.gluon import nn

class Net(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():
            self.conv1 = nn.Conv2D(8, kernel_size=3, padding=1)
            self.conv2 = nn.Conv2D(16, kernel_size=3, padding=1)
            self.fc1 = nn.Dense(10)

    def hybrid_forward(self, F, data):
        conv1_out = F.relu(self.conv1(data))
        conv2_out = F.relu(self.conv2(conv1_out))
        # work with input data AND feature map from here onwards
        concat = F.concat(data, conv2_out)
        output = self.fc1(concat)
        return output

net = Net()
net.initialize()
net.hybridize()
data = nd.random.normal(shape=(1,3,32,32))
out = net(data)

chrisluu · November 27, 2018, 3:35pm

Hi @ThomasDelteil, Thx for ur reply. Actually, I want to concat outputs of GlobalAvgPool and GlobalMaxPool, here is my code, I found that it cannot backward.

class BasicBlockV1b(nn.HybridBlock):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, num_gpus=4, **kwargs):
        super(BasicBlockV1b, self).__init__(**kwargs)
        self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides, use_bias=False)
        self.bn1 = contrib.nn.SyncBatchNorm(num_devices=num_gpus)
        self.relu = nn.Activation('relu')
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1, use_bias=False)
        self.bn2 = contrib.nn.SyncBatchNorm(num_devices=num_gpus)
        if use_1x1conv:
            self.downsample = nn.HybridSequential()
            self.downsample.add(nn.Conv2D(num_channels, kernel_size=1, strides=strides, use_bias=False),
                                contrib.nn.SyncBatchNorm(num_devices=num_gpus))
        else:
            self.downsample = None

    def hybrid_forward(self, X):
        Y = self.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.downsample:
            X = self.downsample(X)
        return self.relu(Y + X)

def resnet_block(num_channels, num_residuals, first_block=False, num_gpus=4):
    blk = nn.HybridSequential()
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.add(BasicBlockV1b(num_channels, use_1x1conv=True, strides=2, num_gpus=num_gpus))
        else:
            blk.add(BasicBlockV1b(num_channels, num_gpus=num_gpus))
    return blk

class ConcatLayer(nn.HybridBlock):
    """Concat operation for multiple inputs."""
    def __init__(self, **kwargs):
        super(ConcatLayer, self).__init__(**kwargs)
        self.gap = nn.GlobalAvgPool2D()
        self.gmp = nn.GlobalMaxPool2D()
        self.flat = nn.Flatten()

    def hybrid_forward(self, X):
        gap = self.flat(self.gap(X))
        gmp = self.flat(self.gmp(X))
        gap_norm = nd.L2Normalization(gap, mode='instance')
        gmp_norm = nd.L2Normalization(gmp, mode='instance')
        return nd.concat(gap_norm, gmp_norm, dim=1)

class ResNet34V1bSyncBN(nn.HybridSequential):
    def __init__(self, classes=28, num_gpus=4, **kwargs):
        super(ResNet34V1bSyncBN, self).__init__(**kwargs)
        self.conv1 = nn.Conv2D(64, kernel_size=7, strides=2, padding=3, use_bias=False)
        self.bn1 = contrib.nn.SyncBatchNorm(num_devices=num_gpus)
        self.relu = nn.Activation('relu')
        self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
        self.layer1 = resnet_block(64, 3, first_block=True, num_gpus=num_gpus)
        self.layer2 = resnet_block(128, 4, num_gpus=num_gpus)
        self.layer3 = resnet_block(256, 6, num_gpus=num_gpus)
        self.layer4 = resnet_block(512, 3, num_gpus=num_gpus)
        self.concat = ConcatLayer()
        self.fc = nn.Dense(classes)

    def hybrid_forward(self, X, masks):
        Y = self.maxpool(self.relu(self.bn1(self.conv1(X))))
        Y = self.layer4(self.layer3(self.layer2(self.layer1(Y))))
        Y = self.fc(self.concat(Y * masks))
        return Y

Output architecture is:

net:  ResNet34V1bSyncBN(
  (conv1): Conv2D(None -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm108_', in_channels=None)
  (relu): Activation(relu)
  (maxpool): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False)
  (layer1): HybridSequential(
    (0): BasicBlockV1b(
      (conv1): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm109_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm110_', in_channels=None)
    )
    (1): BasicBlockV1b(
      (conv1): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm111_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm112_', in_channels=None)
    )
    (2): BasicBlockV1b(
      (conv1): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm113_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm114_', in_channels=None)
    )
  )
  (layer2): HybridSequential(
    (0): BasicBlockV1b(
      (conv1): Conv2D(None -> 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm115_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm116_', in_channels=None)
      (downsample): HybridSequential(
        (0): Conv2D(None -> 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm117_', in_channels=None)
      )
    )
    (1): BasicBlockV1b(
      (conv1): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm118_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm119_', in_channels=None)
    )
    (2): BasicBlockV1b(
      (conv1): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm120_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm121_', in_channels=None)
    )
    (3): BasicBlockV1b(
      (conv1): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm122_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm123_', in_channels=None)
    )
  )
  (layer3): HybridSequential(
    (0): BasicBlockV1b(
      (conv1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm124_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm125_', in_channels=None)
      (downsample): HybridSequential(
        (0): Conv2D(None -> 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm126_', in_channels=None)
      )
    )
    (1): BasicBlockV1b(
      (conv1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm127_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm128_', in_channels=None)
    )
    (2): BasicBlockV1b(
      (conv1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm129_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm130_', in_channels=None)
    )
    (3): BasicBlockV1b(
      (conv1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm131_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm132_', in_channels=None)
    )
    (4): BasicBlockV1b(
      (conv1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm133_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm134_', in_channels=None)
    )
    (5): BasicBlockV1b(
      (conv1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm135_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm136_', in_channels=None)
    )
  )
  (layer4): HybridSequential(
    (0): BasicBlockV1b(
      (conv1): Conv2D(None -> 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm137_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm138_', in_channels=None)
      (downsample): HybridSequential(
        (0): Conv2D(None -> 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm139_', in_channels=None)
      )
    )
    (1): BasicBlockV1b(
      (conv1): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm140_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm141_', in_channels=None)
    )
    (2): BasicBlockV1b(
      (conv1): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm142_', in_channels=None)
      (relu): Activation(relu)
      (conv2): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=1, key='syncbatchnorm143_', in_channels=None)
    )
  )
  (concat): ConcatLayer(
    (gap): GlobalAvgPool2D(size=(1, 1), stride=(1, 1), padding=(0, 0), ceil_mode=True)
    (gmp): GlobalMaxPool2D(size=(1, 1), stride=(1, 1), padding=(0, 0), ceil_mode=True)
    (flat): Flatten
  )
  (fc): Dense(None -> 28, linear)
)

Test code:

net = ResNet34V1bSyncBN()
# init
net.initialize(init.Xavier())
loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
with autograd.record():
    y_hat = net(data_array.as_in_context(ctx), mask_data_array.as_in_context(ctx))
    l = loss(y_hat, y)
l.backward()

Detail about input data:

data_array.dtype:  <class 'numpy.float32'>
data_array.shape:  (4, 4, 2048, 2048)
mask_data_array.dtype:  <class 'numpy.float32'>
mask_data_array.shape:  (4, 1, 64, 64)

And I got an error:

TypeError                                 Traceback (most recent call last)
<ipython-input-7-da82e6b42ae7> in <module>()
     26 loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
     27 with autograd.record():
---> 28     y_hat = net(data_array.as_in_context(ctx), mask_data_array.as_in_context(ctx))
     29     l = loss(y_hat, y)
     30 l.backward()

/home/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py in __call__(self, *args)
    539             hook(self, args)
    540 
--> 541         out = self.forward(*args)
    542 
    543         for hook in self._forward_hooks.values():

/home/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py in forward(self, x, *args)
    916                     params = {i: j.data(ctx) for i, j in self._reg_params.items()}
    917 
--> 918                 return self.hybrid_forward(ndarray, x, *args, **params)
    919 
    920         assert isinstance(x, Symbol), \

TypeError: hybrid_forward() takes 3 positional arguments but 4 were given

ThomasDelteil · November 28, 2018, 11:42pm

Hi @chrisluu

I fixed your code, you had a few issues. hybrid_forward signature is the following:
hybrid_forward(self, F, x) you forgot the F hence your first error. F is a function space, it can resolve to nd if non-hybridized or to sym if hybridized.

Rewriting your code I get:

import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn, contrib
from mxnet import init, autograd

class BasicBlockV1b(nn.HybridBlock):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, num_gpus=4, **kwargs):
        super(BasicBlockV1b, self).__init__(**kwargs)
        with self.name_scope():
            self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides, use_bias=False)
            self.bn1 = contrib.nn.SyncBatchNorm(in_channels=num_channels, num_devices=num_gpus)
            self.relu = nn.Activation('relu')
            self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1, use_bias=False)
            self.bn2 = contrib.nn.SyncBatchNorm(num_devices=num_gpus)
            if use_1x1conv:
                self.downsample = nn.HybridSequential()
                self.downsample.add(nn.Conv2D(num_channels, kernel_size=1, strides=strides, use_bias=False),
                                    contrib.nn.SyncBatchNorm(num_devices=num_gpus))
            else:
                self.downsample = None

    def hybrid_forward(self, F, X):
        Y = self.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.downsample:
            X = self.downsample(X)
        return self.relu(Y + X)

def resnet_block(num_channels, num_residuals, first_block=False, num_gpus=4):
    blk = nn.HybridSequential()
    with blk.name_scope():
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.add(BasicBlockV1b(num_channels, use_1x1conv=True, strides=2, num_gpus=num_gpus))
            else:
                blk.add(BasicBlockV1b(num_channels, num_gpus=num_gpus))
        return blk

class ConcatLayer(nn.HybridBlock):
    """Concat operation for multiple inputs."""
    def __init__(self, **kwargs):
        super(ConcatLayer, self).__init__(**kwargs)
        with self.name_scope():
            self.gap = nn.GlobalAvgPool2D()
            self.gmp = nn.GlobalMaxPool2D()
            self.flat = nn.Flatten()

    def hybrid_forward(self, F, X):
        gap = self.flat(self.gap(X))
        gmp = self.flat(self.gmp(X))
        gap_norm = F.L2Normalization(gap, mode='instance')
        gmp_norm = F.L2Normalization(gmp, mode='instance')
        return F.concat(gap_norm, gmp_norm, dim=1)

class ResNet34V1bSyncBN(nn.HybridSequential):
    def __init__(self, classes=28, num_gpus=4, **kwargs):
        super(ResNet34V1bSyncBN, self).__init__(**kwargs)
        with self.name_scope():
            self.conv1 = nn.Conv2D(64, kernel_size=7, strides=2, padding=3, use_bias=False)
            self.bn1 = contrib.nn.SyncBatchNorm(in_channels=64, num_devices=num_gpus)
            self.relu = nn.Activation('relu')
            self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
            self.layer1 = resnet_block(64, 3, first_block=True, num_gpus=num_gpus)
            self.layer2 = resnet_block(128, 4, num_gpus=num_gpus)
            self.layer3 = resnet_block(256, 6, num_gpus=num_gpus)
            self.layer4 = resnet_block(512, 3, num_gpus=num_gpus)
            self.concat = ConcatLayer()
            self.fc = nn.Dense(classes)

    def hybrid_forward(self, F, X, masks):
        Y = self.maxpool(self.relu(self.bn1(self.conv1(X))))
        Y = self.layer4(self.layer3(self.layer2(self.layer1(Y))))
        Y = self.fc(self.concat(F.broadcast_mul(Y, masks)))
        return Y

And you define initialize and run forward and backward like this:

# Create the network
net = ResNet34V1bSyncBN(num_gpus=1)
# init
ctx = mx.gpu()
y = mx.nd.ones((4,28), ctx=ctx)
data_array = mx.nd.ones((4,4,2048,2048), ctx=ctx)
mask_data_array = mx.nd.ones((4,1,64,64), ctx=ctx)

net.initialize(init.Xavier(), ctx)
net.hybridize()

loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
with autograd.record():
    y_hat = net(data_array.as_in_context(ctx), mask_data_array.as_in_context(ctx))
    l = loss(y_hat, y)
l.backward()
y_hat.asnumpy()[0]

chrisluu · November 30, 2018, 3:15am

Hi @ThomasDelteil
Thx for ur kindly help.Further more, how to write a correct loss function? I’m facing with the problem that if I change loss function with the one wrote by myself, it will not work properly for backward.

The code is as below:

class CustomizedLoss(gluon.loss.Loss):
    def __init__(self, gamma=2, **kwargs):
        super(CustomizedLoss, self).__init__(None, **kwargs)
        self._gamma = gamma

    def hybrid_forward(self, F, y_hat, y):
        epsilon = 1e-12
        rhs = F.max(-y_hat).asscalar()
        max_val = F.clip(-y_hat, a_min=0.0, a_max=rhs)
        loss = y_hat - y_hat * y + max_val + F.log(F.exp(-max_val) + F.exp(-y_hat - max_val) + epsilon)
        invprobs = F.log(F.sigmoid(-y_hat * (y * 2.0 - 1.0)) + epsilon)
        loss = F.exp(invprobs * self._gamma) * loss
        return loss

Do I need to implement the backward function?

ThomasDelteil · November 30, 2018, 10:58pm

It seems to be working fine for me with your loss.

loss = CustomizedLoss(batch_axis=0)

Though I am not sure what your loss is doing, by calling .asscalar() you lose the graph dependency. I would advise trying to remain in the F world without going to numpy or you might be surprised of the results you get, especially if you hybridize your loss, as the scalar value will be turned into constants.

chrisluu · December 6, 2018, 3:39am

Gotcha. @ThomasDelteil Thx for ur kindly help.

chrisluu · December 6, 2018, 7:34am

Hi @ThomasDelteil, I would like to ask another similiar question: How to concat pretrained weights for the first conv of ResNet50?
I tried ResNet34, the code is the following:

net = gluoncv.model_zoo.resnet34_v1b(pretrained=True)

w = net.conv1.weight.data()

with net.name_scope():
    net.conv1 = nn.Conv2D(in_channels=6, channels=64, kernel_size=7, strides=(2, 2), padding=(3, 3), use_bias=False)
    net.fc = nn.Dense(classes)

net.fc.initialize(init.Xavier(), force_reinit=True)
customized_init = mx.initializer.Constant(nd.Concat(w, w, dim=1))
net.conv1.weight.initialize(customized_init)

I want to do the same manipulation of ResNet50, but just found that I couldn’t set value for the first conv by the following code:

net.conv1[0] = nn.Conv2D(in_channels=6, channels=64, kernel_size=7, strides=(2, 2), padding=(3, 3), use_bias=False)

The network architectures are a little bit different between ResNet34 and ResNet50, the first conv of ResNet50 is a HybridSequential, whereas, Conv2D for ResNet34.

ResNet50V1dSyncBN(
  (conv1): HybridSequential(
    (0): Conv2D(3 -> 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=4, key='syncbatchnorm55_', in_channels=32)
    (2): Activation(relu)
    (3): Conv2D(32 -> 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=4, key='syncbatchnorm56_', in_channels=32)
    (5): Activation(relu)
    (6): Conv2D(32 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )

ResNet34V1bSyncBN(
  (conv1): Conv2D(None -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): SyncBatchNorm(eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, ndev=4, key='syncbatchnorm163_', in_channels=None)
  (relu): Activation(relu)
  (maxpool): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False)

looking forward to ur reply.Thx a lot

ThomasDelteil · December 9, 2018, 11:55pm

@chrisluu
You can replace net.conv1 with a brand new HybridSequential, add to it your new conv layer, and then copy all the subsequent layers of the original resnet HybridSequentiual into it.

chrisluu · December 10, 2018, 5:54am

@ThomasDelteil Thx for ur kindly replies, extremely appreciated!

Topic		Replies	Views
How can I add layer combining output of two internal layers	4	1700	October 23, 2018
mxnet.base.MXNetError: Error in operator conv0_fwd: Shape inconsistent, Provided = [64,64,3,3], inferred shape=(64,3,3,3) Gluon	2	2147	June 22, 2019
Organizing diverse input data for mxnet models Discussion	0	456	April 3, 2018
MxNet C++ code does not work properly but Python code does Discussion	1	631	February 2, 2022
Keras-mxnet: concatenation of Conv3D and MaxPooling gives shape error Discussion	0	600	August 1, 2019

Mxnet how to concat the input data and output features to the train_iter

Related Topics