Using CustomOP in HybridBlock

Hey everyone,

Im trying to use a CustomOP within a HybridBlock - however, I keep seeing getting error: TypeError: hybrid_forward() got multiple values for argument 'running_mean'

Below is my code. Any help would be greatly appreciated!

Here is my code:

class MyOperator(mx.operator.CustomOp):
    def __init__(self, size):
        self.size = size

    def forward(self, is_train, req, in_data, out_data, aux):
        x = in_data[0]
        reduction_axes = tuple(list(range(int(self.size))))

        mean = mx.sym.mean(x, axis=reduction_axes)

        centered_input = mx.sym.broadcast_minus(lhs=x, rhs=mean_input)
        var = mx.sym.mean(data=(centered_input ** 2), axis=reduction_axes)

        std = mx.sym.sqrt(in_data[2] + 1e-3)
        res = mx.sym.broadcast_minus(x, in_data[1])
        res = mx.sym.broadcast_div(res, std)

        self.assign(out_data[0], req[0], res)

        if is_train:
            moving_mean = in_data[1] * .99 + mean * (1. - .99)
            moving_var = in_data[2] * .99 + var * (1. - .99)
            self.assign(in_data[1], req[0], moving_mean)
            self.assign(in_data[2], req[0], moving_var)

@mx.operator.register("myoperator") 
class MyOperatorProp(mx.operator.CustomOpProp):
    def __init__(self, size):
        super(MyOperatorProp, self).__init__(True)
        self.size = size

    def list_arguments(self):
        return ['data', 'moving_mean', 'moving_var']

    def create_operator(self, ctx, in_shapes, in_dtypes):
        #  create and return the CustomOp class.
        return MyOperator(size=self.size)

class MyBlock(HybridBlock):
    def __init__(self, size, **kwargs):
        super(MyBlock, self).__init__(**kwargs)
        self.size = size
        
        self.running_mean = self.params.get('running_mean',
                                      shape=(size,),
                                      init='zeros', allow_deferred_init=True)
        self.running_var = self.params.get('running_var',
                                      shape=(size,),
                                      init='ones', allow_deferred_init=True)

    def hybrid_forward(self, x, running_mean, running_var):
        return mx.symbol.Custom(x, running_mean, running_var, size=self.size, op_type='myoperator')

def print_params(title, net):
    """
    Helper function to print out the state of parameters of NormalizationHybridLayer
    """
    print(title)
    hybridlayer_params = {k: v for k, v in net.collect_params().items() }

    for key, value in hybridlayer_params.items():
        print('{} = {}\n'.format(key, value.data()))

from mxnet import nd

input = nd.array([[[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]], ctx=mx.cpu())

newinput = mx.nd.uniform(shape=(4, 3))

net = gluon.nn.HybridSequential()                             # Define a Neural Network as a sequence of hybrid blocks
with net.name_scope():                                        # Used to disambiguate saving and loading net parameters
    net.add(MyBlock(size=2))

net.initialize(initializer.Xavier(magnitude=2.24), ctx=mx.cpu())                # Initialize parameters of all layers
net.hybridize()


label = nd.array([[[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]]], ctx=mx.cpu())

newlabel = nd.array([[[1], [2], [3], [4]]], ctx=mx.cpu())


mse_loss = gluon.loss.L2Loss()                                # Mean squared error between output and label
trainer = gluon.Trainer(net.collect_params(),                 # Init trainer with Stochastic Gradient Descent (sgd) optimization method and parameters for it
                        'sgd',
                        {'learning_rate': 0.1, 'momentum': 0.9 })

from mxnet import autograd

with autograd.record():                                       # Autograd records computations done on NDArrays inside "with" block
    output = net(input)                                       # Run forward propogation

    print_params("=========== Parameters after forward pass ===========\n", net)
    loss = mse_loss(output, label)
    print(output)

loss.backward()                                               # Backward computes gradients and stores them as a separate array within each NDArray in .grad field
trainer.step(input.shape[0])                                  # Trainer updates parameters of every block, using .grad field using oprimization method (sgd in this example)
# We provide batch size that is used as a divider in cost function formula
print_params("=========== Parameters after bacakward pass ===========\n", net)

print(net(input))

solved it,

it was because i was missing passing in F in hybrid block:

def hybrid_forward(self, F, x, running_mean, running_var):
    return mx.symbol.Custom(x, running_mean, running_var, size=self.size, op_type='myoperator')
2 Likes