This exception was thrown while I ran my code on using my fine tuned pre trained model raise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError


Here’s the full traceback:
Traceback (most recent call last):
File “”, line 22, in
embedding = Embedding(‘orahiimages’,0, 0)
File “…/recognition/”, line 24, in init
model.set_params(arg_params, aux_params)
File “/home/anshuman/anaconda3/lib/python3.6/site-packages/mxnet/module/”, line 350, in set_params
File “/home/anshuman/anaconda3/lib/python3.6/site-packages/mxnet/module/”, line 309, in init_params
_impl(desc, arr, arg_params)
File “/home/anshuman/anaconda3/lib/python3.6/site-packages/mxnet/module/”, line 297, in _impl
File “/home/anshuman/anaconda3/lib/python3.6/site-packages/mxnet/ndarray/”, line 1970, in copyto
return _internal._copyto(self, out=other)
File “”, line 25, in _copyto
File “/home/anshuman/anaconda3/lib/python3.6/site-packages/mxnet/_ctypes/”, line 92, in _imperative_invoke
File “/home/anshuman/anaconda3/lib/python3.6/site-packages/mxnet/”, line 149, in check_call
raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [12:56:33] src/operator/nn/…/tensor/…/elemwise_op_common.h:123: Check failed: assign(&dattr, (*vec)[i]) Incompatible attr in node at 0-th output: expected [1], got [20]

Here’s my code for fine tuning:
import sys
sys.path.insert(0, ‘/home/anshuman/Documents/Repos/insightface/deploy’)#to ensure face_model
import face_model
import argparse
import cv2
import numpy as np
import logging
import mxnet as mx
head = ‘%(asctime)-15s %(message)s’
logging.basicConfig(level=logging.DEBUG, format=head)

def get_iterators(batch_size, data_shape=(3, 112, 112)):
    train =
        path_imgrec         = '../datasets/img/saved_faces/orahiimagedb_train.rec',
        data_name           = 'data',
        label_name          = 'softmax_label',
        batch_size          = batch_size,
        data_shape          = data_shape
    val =
        path_imgrec         = '../datasets/img/saved_faces/orahiimagedb_val.rec',
        data_name           = 'data',
        label_name          = 'softmax_label',
        batch_size          = batch_size,
        data_shape          = data_shape,
    return (train, val)
def get_model(ctx, image_size, model_str, layer):
  _vec = model_str.split(',')
  assert len(_vec)==2
  prefix = _vec[0]
  epoch = int(_vec[1])
  print('Loading the Model... Please Wait \n',prefix, epoch)
  sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
  all_layers = sym.get_internals()
  sym = all_layers[layer+'_output']
  model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
  #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
  model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
  model.set_params(arg_params, aux_params)
  return model,sym,arg_params,aux_params
def fit(symbol, arg_params, aux_params, train, val, batch_size, num_gpus):
    devs = [mx.gpu(i) for i in range(num_gpus)]
    mod = mx.mod.Module(symbol=symbol, context=devs), val,
        batch_end_callback = mx.callback.Speedometer(batch_size, 10),
        initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2),
    metric = mx.metric.Accuracy()
    return mod.score(val, metric)
def get_fine_tune_model(symbol, arg_params, num_classes, layer_name='dropout0'):#need to figure out the last layer name
    symbol: the pretrained network symbol
    arg_params: the argument parameters of the pretrained model
    num_classes: the number of classes for the fine-tune datasets
    layer_name: the layer name before the last fully-connected layer
    all_layers = symbol.get_internals()
    net = all_layers[layer_name+'_output']
    net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc1')
    net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
    new_args = dict({k:arg_params[k] for k in arg_params if 'fc1' not in k})
    return (net, new_args)

num_classes = 20
batch_per_gpu = 8
num_gpus = 1
ctx=mx.gpu(0)# to set the context
mod_epoch='../models/model-r34-amf/model/model,0'#cause we are gonna split this by the comma will look into actual epoch and actual values mentioned in default
model,sym, arg_params, aux_params=get_model(ctx,img_size,mod_epoch,layer)
# = mx.model.load_checkpoint(model, 0) improvised check pointing up top
(new_sym, new_args) = get_fine_tune_model(sym, arg_params, num_classes)
batch_size = batch_per_gpu * num_gpus
(train, val) = get_iterators(batch_size)
mod_score = fit(new_sym, new_args, aux_params, train, val, batch_size, num_gpus)
#assert mod_score >0.77, "Low training accuracy."

What does this mean?

Any idea how to fix this?


You’ve got a mismatch between the number of parameters loaded from the checkpoint file, and parameters in your Module’s symbol. You’re working with 20 classes right? But it looks like you’re trying to bind a label with shape (batch_size, 1), so I think this should be (batch_size, num_classes). Give that a try, and let me know how you get on.