I have a piece of code that fails with the following error at the trainer.step line -
UserWarning: Gradient of Parameter `resnetv20_conv0_weight` on context gpu(0) has not been updated by backward since last `step`. This could mean a bug in your model that made it only use a subset of the Parameters (Blocks) for this iteration. If you are intentionally only using a subset, call step with ignore_stale_grad=True to suppress this warning and skip updating of Parameters with stale gradient
I cannot understand why.
What’s happening in this code is that I define a new autograd.function with a custom backward pass and apply it on the model parameters. My goal is to be able to re-weight of the gradients based on certain input matrix, which has the same dimension as the gradient -
class weight_layer(mx.autograd.Function): def forward(self, x, weights): self.save_for_backward(weights) y = 1.0 * x return y def backward(self, dy): weights, = self.saved_tensors return dy * weights, mx.nd.zeros(shape=weights.shape, ctx=mx.gpu(0)) weight_ly = weight_layer() with ag.record(): output = net_forward(net, data) output = [weight_ly(x, mx.nd.ones_like(x, ctx=ctx)) for x in output] [x.attach_grad() for x in output] loss = [loss_fns(yhat, y) for yhat, y in zip(output, label)] for l_idx in range(len(loss)): ag.backward(loss[l_idx]) # Ignore the confidence loss here trainer.step(batch_size)
I switched to using a Custom layer and I get the same error. I believe it has something to do with the weight layer I introduce but I can’t figure out what it is.
class GradientWeight(mx.operator.CustomOp): def forward(self, is_train, req, in_data, out_data, aux): self.assign(out_data, req, in_data) def backward(self, req, out_grad, in_data, out_data, in_grad, aux): weight = in_data y = in_grad * weight self.assign(in_grad, req, y) @mx.operator.register("gradient_weight") class GradientWeightProp(mx.operator.CustomOpProp): def __init__(self): super(GradientWeightProp, self).__init__(need_top_grad=False) def list_arguments(self): return ['data', 'weight'] def list_outputs(self): return ['output'] def infer_shape(self, in_shape): data_shape = in_shape weight_shape = in_shape output_shape = in_shape return [data_shape, weight_shape], [output_shape],  def create_operator(self, ctx, shapes, dtypes): return GradientWeight() with ag.record(): output = net_forward(net, data) output = [mx.nd.Custom(x, mx.nd.ones_like(x, ctx=ctx), op_type="gradient_weight") for x in output] [x.attach_grad() for x in output] loss = [loss_fns(yhat, y) for yhat, y in zip(output, label)]
All help is appreciated. Thanks