Hello,
I’m attempting to modify one of the pre-trained models in a way that each convolution layer is “split” in half.
This is what I have so far:
foundFirstConv = False
def splitNet(net):
global foundFirstConv
for key, layer in net._children.items():
newLayer = None
if isinstance(layer, gluon.nn.Conv2D):
newLayer = gluon.nn.Conv2D(
channels=layer._channels // 2,
kernel_size=layer._kwargs['kernel'],
strides=layer._kwargs['stride'],
padding=layer._kwargs['pad'],
in_channels=3 if not foundFirstConv else layer._in_channels // 2
)
elif isinstance(layer, gluon.nn.BatchNorm):
newLayer = gluon.nn.BatchNorm(
axis=layer._kwargs['axis'],
epsilon=layer._kwargs['eps'],
momentum=layer._kwargs['momentum'],
scale=not layer._kwargs['fix_gamma'],
use_global_stats=layer._kwargs['use_global_stats']
)
if newLayer is not None:
with net.name_scope():
if hasattr(net, key):
setattr(net, key, newLayer)
net.register_child(newLayer, key)
if isinstance(newLayer, gluon.nn.Conv2D):
weights = layer.weight.data()
if not foundFirstConv:
newWeights = weights[:newLayer._channels]
foundFirstConv = True
else:
newWeights = splitWeights(weights)
newLayer.collect_params().initialize(mx.initializer.Constant(newWeights))
elif isinstance(newLayer, gluon.nn.BatchNorm):
pdict = layer.collect_params()
oldPrefix = layer._prefix
for k in pdict.keys():
data = pdict[k].data()
data = data[:data.shape[0]]
newLayer.collect_params(newLayer._prefix + k[len(oldPrefix):]).initialize(mx.initializer.Constant(data))
splitNet(layer)
# Get off the shelf gluon model, and convert to relay
gluon_model = vision.get_model("resnet18_v1", pretrained=True)
# split network (first half)
gluon_model = gluon_model.features
splitNet(gluon_model)
Sorry for the long, hacky code.
My understanding is that each Block
object in the network has an associated input and output dimensions. I think one of my problems is that I don’t handle this at all, meaning I change the input and output dimensions of the layers but don’t change the respective attributes of the Block
objects that contain them.
So my question is, when I come across a BasicBlockV1
(which is what ResNet uses) object, how would I create & register a new BasicBlockV1
object with the input and out dimensions halved, similar to how I am doing it for the Conv2D
layers?