You need to setup `wd_mult`

once you define a Parameter. Here is the full, deterministic example (with seed as in your example). Notice, I had to setup global wd as 1, because `wd_mult`

works as a multiplier - if `wd`

is zero, then multiplied version would also be zero.

```
import numpy as np
import mxnet as mx
from mxnet import gluon, autograd
from mxnet.gluon import Trainer
from mxnet.gluon.loss import L2Loss
# fix the seed
np.random.seed(666)
mx.random.seed(666)
class RegularizedFullyConnected(gluon.HybridBlock):
def __init__(self, hidden_units, wd_mult=1.0):
super(RegularizedFullyConnected, self).__init__()
with self.name_scope():
self.weights = self.params.get('weights',
wd_mult=wd_mult,
shape=(hidden_units, 0),
allow_deferred_init=True)
def hybrid_forward(self, F, x, weights):
weighted_data = F.FullyConnected(x,
weights,
num_hidden=self.weights.shape[0],
no_bias=True)
return weighted_data
hidden_units = 6
net = RegularizedFullyConnected(hidden_units, wd_mult=0.1)
net.initialize(mx.init.Uniform())
x = mx.random.normal(shape=(1, 2))
label = mx.random.normal(shape=(hidden_units,))
l2_loss = L2Loss()
trainer = Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1, 'wd': 1})
with autograd.record():
out = net(x)
loss = l2_loss(out, label)
loss.backward()
trainer.step(1)
print(net.weights.data())
```

If you change `wd_mult`

to something else, you will see that `print`

output is different