Dear all,
I am trying to create a HybridBlock
custom Convolutional layer. This needs a constant nd.array. Based on discussions here and here I am creating a custom initializer and then I create inside HybridBlock a custom parameter self.bijkl
that is initialized with a custom initializer CustomInit
. The full example is here:
Essentials:
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import HybridBlock
@mx.init.register
class CustomInit(mx.initializer.Initializer):
def __init__(self,
kernel_size = 3,
kernel_effective_size = 5,
**kwards):
mx.initializer.Initializer.__init__(self,**kwards)
# A trivial constant tensor
self.Bijkl = nd.random_uniform(shape=[kernel_size,kernel_size,kernel_effective_size,kernel_effective_size])
def _init_weight(self,name,arr):
arr[:] = self.Bijkl
class Conv2DS(HybridBlock):
def __init__(self, nfilters, nchannels=0, kernel_size = 3, kernel_effective_size = 5, use_bias = True, padding = (0,0), **kwards):
HybridBlock.__init__(self,**kwards)
self.nchannels = nchannels
self.nfilters = nfilters
self.kernel_size = kernel_size
self.kernel_eff = kernel_effective_size
self.use_bias = use_bias
self.pad = padding
with self.name_scope():
self.weight = self.params.get(
'weight',allow_deferred_init=True,
shape=(nfilters,nchannels,kernel_size,kernel_size))
# This is the custom tensor I need to create, a constant.
self.Bijkl = self.params.get(
'bijkl',allow_deferred_init=True,
init = CustomInit(self.kernel_size, self.kernel_eff),
grad_req='null',
shape=(kernel_size,kernel_size,kernel_effective_size,kernel_effective_size))
if self.use_bias:
self.bias = self.params.get(
'bias',allow_deferred_init=True,
init = mx.init.Zero(),
shape=(self.nfilters,))
def hybrid_forward(self,F,_x):
# These finalize deferring the input shape
# --------------------------------------------------------------------------------
#self.weight.shape = (self.nfilters,_x.shape[1],self.kernel_size,self.kernel_size)
#self.weight._finish_deferred_init()
# --------------------------------------------------------------------------------
weight = F.sum(F.dot(self.weight.data() , self.Bijkl.data()),axis=[2,3])
if self.use_bias:
conv = F.Convolution(data=_x,
weight=weight,
bias=self.bias.data(),
num_filter=self.nfilters,
kernel=[self.kernel_eff,self.kernel_eff],
pad = self.pad)
else :
conv = F.Convolution(data=_x,
weight=weight,
no_bias=True,
num_filter=self.nfilters,
kernel=[self.kernel_eff,self.kernel_eff],
pad = self.pad)
return conv
I can initialize my layer:
nchannels = 8
nfilters = 16
dim = 128
mynet = Conv2DS(nfilters, kernel_size=5, kernel_effective_size= 15, use_bias=False)
mynet.initialize(mx.init.Xavier(),ctx=mx.gpu())
but when I run a single pass I get the following error:
xx = nd.random_uniform(shape = [4,nchannels,dim,dim],ctx=mx.gpu())
temp = mynet (xx)
Error code:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-27f92d21b3eb> in <module>()
----> 1 temp = mynet (xx)
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in __call__(self, *args)
358 def __call__(self, *args):
359 """Calls forward. Only accepts positional arguments."""
--> 360 return self.forward(*args)
361
362 def forward(self, *args):
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in forward(self, x, *args)
568 params = {i: j.data(ctx) for i, j in self._reg_params.items()}
569 except DeferredInitializationError:
--> 570 self._finish_deferred_init(self._active, x, *args)
571
572 if self._active:
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _finish_deferred_init(self, hybrid, *args)
458
459 def _finish_deferred_init(self, hybrid, *args):
--> 460 self.infer_shape(*args)
461 if hybrid:
462 for is_arg, i in self._cached_op_args:
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in infer_shape(self, *args)
519 def infer_shape(self, *args):
520 """Infers shape of Parameters from inputs."""
--> 521 self._infer_attrs('infer_shape', 'shape', *args)
522
523 def infer_type(self, *args):
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _infer_attrs(self, infer_fn, attr, *args)
507 def _infer_attrs(self, infer_fn, attr, *args):
508 """Generic infer attributes."""
--> 509 inputs, out = self._get_graph(*args)
510 args, _ = _flatten(args)
511 arg_attrs, _, aux_attrs = getattr(out, infer_fn)(
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _get_graph(self, *args)
426 params = {i: j.var() for i, j in self._reg_params.items()}
427 with self.name_scope():
--> 428 out = self.hybrid_forward(symbol, *grouped_inputs, **params) # pylint: disable=no-value-for-parameter
429 out, self._out_format = _flatten(out)
430
TypeError: hybrid_forward() got an unexpected keyword argument 'Bijkl'
I tihnk the problem is with the variable name? self.Bijkl
? So I guess my question is, is there currently a simple way to wrap a constant variable (tensor, of shape [n1,n2,n3,n4]
, inside a HybridBlock? I have a solution for Block, but I want to take advantage of HybridBlock
performance.
Thank you for your time and the great work you put into mxnet!