Modifying pre-trained gluon model zoo

Hello,

I’m attempting to modify one of the pre-trained models in a way that each convolution layer is “split” in half.

This is what I have so far:

foundFirstConv = False
def splitNet(net):
    global foundFirstConv
    for key, layer in net._children.items():
        newLayer = None
        if isinstance(layer, gluon.nn.Conv2D):
            newLayer = gluon.nn.Conv2D(
                channels=layer._channels // 2,
                kernel_size=layer._kwargs['kernel'],
                strides=layer._kwargs['stride'],
                padding=layer._kwargs['pad'],
                in_channels=3 if not foundFirstConv else layer._in_channels // 2
            )
        elif isinstance(layer, gluon.nn.BatchNorm):
            newLayer = gluon.nn.BatchNorm(
                axis=layer._kwargs['axis'],
                epsilon=layer._kwargs['eps'],
                momentum=layer._kwargs['momentum'],
                scale=not layer._kwargs['fix_gamma'],
                use_global_stats=layer._kwargs['use_global_stats']
            )

        if newLayer is not None:
            with net.name_scope():
                if hasattr(net, key):
                    setattr(net, key, newLayer)
                net.register_child(newLayer, key)
            if isinstance(newLayer, gluon.nn.Conv2D):
                weights = layer.weight.data()
                if not foundFirstConv:
                    newWeights = weights[:newLayer._channels]
                    foundFirstConv = True
                else:
                    newWeights = splitWeights(weights)
                newLayer.collect_params().initialize(mx.initializer.Constant(newWeights))
            elif isinstance(newLayer, gluon.nn.BatchNorm):
                pdict = layer.collect_params()
                oldPrefix = layer._prefix
                for k in pdict.keys():
                    data = pdict[k].data()
                    data = data[:data.shape[0]]
                    newLayer.collect_params(newLayer._prefix + k[len(oldPrefix):]).initialize(mx.initializer.Constant(data))

        splitNet(layer)

# Get off the shelf gluon model, and convert to relay
gluon_model = vision.get_model("resnet18_v1", pretrained=True)
# split network (first half)
gluon_model = gluon_model.features
splitNet(gluon_model)

Sorry for the long, hacky code.

My understanding is that each Block object in the network has an associated input and output dimensions. I think one of my problems is that I don’t handle this at all, meaning I change the input and output dimensions of the layers but don’t change the respective attributes of the Block objects that contain them.

So my question is, when I come across a BasicBlockV1(which is what ResNet uses) object, how would I create & register a new BasicBlockV1 object with the input and out dimensions halved, similar to how I am doing it for the Conv2D layers?

I think the problem in your code is that it is not recursively replacing the layers and as such you get a mismatch in the tensor shapes. The following code should work:

def replace_conv2D(net):
    for key, layer in net._children.items():
        if isinstance(layer, gluon.nn.Conv2D):
            new_conv = gluon.nn.Conv2D(
                channels=layer._channels // 2,
                kernel_size=layer._kwargs['kernel'],
                strides=layer._kwargs['stride'],
                padding=layer._kwargs['pad'],
                in_channels=layer._in_channels // 2)
            with net.name_scope():
                net.register_child(new_conv, key)
            new_conv.initialize(mx.init.Xavier())
        else:
            replace_conv2D(layer)
net = gluon.model_zoo.vision.get_model("resnet18_v1", pretrained=True)
replace_conv2D(net)