Hi!
I’m trying to adapt this gluoncv tuto https://gluon-cv.mxnet.io/build/examples_detection/finetune_detection.html to train in float16 over a V100.
I’m following the doc https://beta.mxnet.io/guide/performance/float16.html and writing this code:
net = gcv.model_zoo.get_model('ssd_512_mobilenet1.0_custom',
classes=classes, pretrained_base=False, transfer='voc')
net.cast('float16')
net.reset_class(classes)
from gluoncv.data.batchify import Tuple, Stack, Pad
from gluoncv.data.transforms.presets.ssd import SSDDefaultTrainTransform
from mxnet.gluon.data.vision.transforms import Cast
batch = 16
def get_dataloader(net, train_dataset, data_shape, batch_size, num_workers):
width, height = data_shape, data_shape
# use fake data to generate fixed anchors for target generation
with autograd.train_mode():
_, _, anchors = net(mx.nd.zeros((1, 3, height, width)).astype('float16', copy=False))
batchify_fn = Tuple(Stack(), Stack(), Stack()) # stack image, cls_targets, box_targets
train_loader = gluon.data.DataLoader(
dataset=(train_dataset
.transform(SSDDefaultTrainTransform(width, height, anchors))
.transform(Cast('float16'))),
batch_size=batch_size,
shuffle=True,
batchify_fn=batchify_fn,
last_batch='rollover',
num_workers=num_workers)
return train_loader
train_data = get_dataloader(net, dataset, 512, batch, 4)
this returns
MXNetError: Error in operator ssd13_mobilenet0_batchnorm0_fwd: [10:56:52] src/operator/nn/batch_norm.cc:370: Check failed: (*in_type)[i] == dtype_param (2 vs. 0) This layer requires uniform type. Expected 'float32' v.s. given 'float16' at 'gamma'