coordConv layer


#1

Dear all,

I just saw this paper on coordinate convolution layers, and I thought it may prove useful for problems in semantic segmentation - edge detection etc. I will test it in the following weeks. So I gave it a try to implement it in a HybridBlock scheme. I haven’t even read the whole paper yet (section S8 has the implementation), but I think the basic idea is to augment the channels with 2 new channels, i indices, j indices. I tried to implement initially a collective index idx = i + dim(i)*j but it wasn’t easy in a HybridBlock format :/.

This is my “solution” (needs polishing for sure):


class coordConv(HybridBlock):
    def __init__(self,channels, kernel_size = 3, padding = 1,  strides = 1, **kwards):
        HybridBlock.__init__(self,**kwards)
        with self.name_scope():
            self.conv = gluon.nn.Conv2D(channels=channels, kernel_size=kernel_size,padding=padding, strides=strides, **kwards)
            
            
            
    def hybrid_forward(self,F,x):
        
        temp = F.ones_like(F.slice_axis(x,axis=1,begin=0,end=1))
        rows = F.argsort(temp,axis=2)
        cols = F.argsort(temp,axis=3)
        
        x = F.concat(x,rows,cols,dim=1)
        return self.conv(x)

mynet = coordConv(32)
mynet.initialize(mx.init.Xavier())
mynet.hybridize()
xx = nd.random.uniform(shape = [5,64,128,128])
temp = mynet(xx)
print (temp.shape)
(5,32,128,128)

Could the experts provide some feedback?

I ended up in this solution observing the following tests:

temp = nd.ones(shape=[5,1,3,3])
nd.argsort(temp,axis=2)

prints

[[[[0. 0. 0.]
   [1. 1. 1.]
   [2. 2. 2.]]]


 [[[0. 0. 0.]
   [1. 1. 1.]
   [2. 2. 2.]]]


 [[[0. 0. 0.]
   [1. 1. 1.]
   [2. 2. 2.]]]


 [[[0. 0. 0.]
   [1. 1. 1.]
   [2. 2. 2.]]]


 [[[0. 0. 0.]
   [1. 1. 1.]
   [2. 2. 2.]]]]
<NDArray 5x1x3x3 @cpu(0)>

and

temp = nd.ones(shape=[5,1,3,3])
nd.argsort(temp,axis=3)

prints

[[[[0. 1. 2.]
   [0. 1. 2.]
   [0. 1. 2.]]]


 [[[0. 1. 2.]
   [0. 1. 2.]
   [0. 1. 2.]]]


 [[[0. 1. 2.]
   [0. 1. 2.]
   [0. 1. 2.]]]


 [[[0. 1. 2.]
   [0. 1. 2.]
   [0. 1. 2.]]]


 [[[0. 1. 2.]
   [0. 1. 2.]
   [0. 1. 2.]]]]
<NDArray 5x1x3x3 @cpu(0)>

so this looks like rows and columns indices (repeated).

edit: The function nd.argsort is not consinstently strictly increasing for large dimensionality of the input array, at least when all elements are the same. E.g.

temp = nd.ones(shape=[5,1,32,32])
nd.argsort(temp,axis=3)[0,0]

prints

[[16. 16. 16. ... 16. 16. 16.]
 [31. 31. 31. ... 31. 31. 31.]
 [30. 30. 30. ... 30. 30. 30.]
 ...
 [ 3.  3.  3. ...  3.  3.  3.]
 [ 2.  2.  2. ...  2.  2.  2.]
 [ 1.  1.  1. ...  1.  1.  1.]]

which is not strictly ordered, so the implementation I presented is wrong.

edit2: A secondary sort solves the problem. The following implementation works:

class coordConv(HybridBlock):
    def __init__(self,channels, kernel_size = 3, padding = 1,  strides = 1, **kwards):
        HybridBlock.__init__(self,**kwards)
        with self.name_scope():
            self.conv = gluon.nn.Conv2D(channels=channels, kernel_size=kernel_size,padding=padding, strides=1, **kwards)
            
            
            
    def hybrid_forward(self,F,x):
        
        temp = F.ones_like(F.slice_axis(x,axis=1,begin=0,end=1))
        rows = F.sort(F.argsort(temp,axis=-1),axis=-1)
        rmax = F.max(rows) 
        
        cols = F.sort(F.argsort(temp,axis=-2),axis=-2)
        cmax = F.max(cols)
        
        
        
        rows = F.broadcast_div(rows , rmax) 
        cols = F.broadcast_div(cols , cmax) 
        
        rows = 2. * rows  - 1.
        cols = 2. * cols  - 1. 
        x = F.concat(x,rows,cols,dim=1)
        return self.conv(x), rows, cols # I am also returning rows, cols for vis purposes
     
mynet = coordConv(32)
mynet.initialize(mx.init.Xavier())
mynet.hybridize()
xx = nd.random.uniform(shape = [5,64,256,256])
temp = mynet(xx)

fig = figure(figsize=(13,5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

im1 = ax1.imshow(temp[1][0,0].asnumpy())
im2 = ax2.imshow(temp[2][0,0].asnumpy(),)

colorbar(im1,ax=ax1)
colorbar(im2,ax=ax2)

in addition, following the pytorch implementation of @kobenaxie , rows, cols are in range [-1,1]


#2

there is pytorch implement here


#3

@feevos you can also use a combination of arange() and repeat() to achieve the same thing with less hackery.

def _ctx_kwarg(x):
    if isinstance(x, nd.NDArray):
        return {"ctx": x.context}
    return {}

def _coord_array(F, start, stop, rows, cols, batch_size, **ctx_kwarg):
    """ Outputs (rows, cols) with each NDArray in NCHW layout (C is 1) """
    row_array = F.arange(start=start, stop=stop, step=(stop - start) / rows, **ctx_kwarg)
    col_array = F.arange(start=start, stop=stop, step=(stop - start) / cols, **ctx_kwarg)
    coord_rows = F.repeat(F.reshape(row_array, (1, 1, rows, 1)), repeats=cols, axis=3)
    coord_cols = F.repeat(F.reshape(col_array, (1, 1, 1, cols)), repeats=rows, axis=2)
    return (F.repeat(coord_rows, repeats=batch_size, axis=0),
            F.repeat(coord_cols, repeats=batch_size, axis=0))

class CoordConv(HybridBlock):
    def __init__(self, channels, kernel_size=3, padding=1, strides=1, **kwards):
        HybridBlock.__init__(self, **kwards)
        self.xs = None  # x.shape, initialized in forward()
        with self.name_scope():
            self.conv = gluon.nn.Conv2D(channels=channels, kernel_size=kernel_size, padding=padding, strides=1,
                                        **kwards)

    def forward(self, x):
        """ Override forward to collect shape in hybridized mode """
        # x is in NCHW layout
        self.xs = x.shape
        return super(CoordConv, self).forward(x)

    def hybrid_forward(self, F, x):
        rows, cols = _coord_array(F, -1, 1, rows=self.xs[2], cols=self.xs[3],
                                  batch_size=self.xs[0], **_ctx_kwarg(x))
        x = F.concat(x, rows, cols, dim=1)
        return self.conv(x), rows, cols  # I am also returning rows, cols for vis purposes

A couple of notes:

  1. _ctx_kwarg(x) is required because arange requires context in nd, but not in sym
  2. forward() is overridden to collect the shape when block is hybridized and you don’t have access to the shape in hybrid_forward()

#4

Thank you @kobenaxie, great implementation, I haven’t noticed the channel layers normalization until I saw your solution.

@safrooze thank you, I’ve learned so much from your implementation. Especially the trick to override forward to get the shape of the array is super important. It will prove helpful in so many future applications (you should see the hack I had to perform, with successive splits, to implement in a HybridBlock format the pyramid scene pooling operator). Initially I tried something like you proposed, and used F.shape_array to get the shape of the array and use F.arange to create the range of the indices. However, I couldn’t extract the actual values of the F.shape_array unless I used asscalar() or asnumpy() - which I wanted to avoid for performance reasons. For example when I was trying shape = F.shape_array(x) then (e.g.) shape[0] was an NDArray instance (not an int), and I couldn’t use it in F.arange(start = shape[0], ...).

Again, many thanks!

edit minor modification to your code (the return statement in the forward override wasn’t working for me)

class coordConv2D(HybridBlock):
    def __init__(self, channels, kernel_size=3, padding=1, strides=1, **kwards):
        HybridBlock.__init__(self, **kwards)
        self.xs = None  # x.shape, initialized in forward()
        with self.name_scope():
            self.conv = gluon.nn.Conv2D(channels=channels, kernel_size=kernel_size, padding=padding, strides=1,
                                        **kwards)

            
    def _ctx_kwarg(self,x):
        if isinstance(x, nd.NDArray):
            return {"ctx": x.context}
        return {}

    def _coord_array(self,F, start, stop, rows, cols, batch_size, **ctx_kwarg):
        """ Outputs (rows, cols) with each NDArray in NCHW layout (C is 1) """
        row_array = F.arange(start=start, stop=stop, step=(stop - start) / rows, **ctx_kwarg)
        col_array = F.arange(start=start, stop=stop, step=(stop - start) / cols, **ctx_kwarg)
        coord_rows = F.repeat(F.reshape(row_array, (1, 1, rows, 1)), repeats=cols, axis=3)
        coord_cols = F.repeat(F.reshape(col_array, (1, 1, 1, cols)), repeats=rows, axis=2)
        return (F.repeat(coord_rows, repeats=batch_size, axis=0),
                F.repeat(coord_cols, repeats=batch_size, axis=0))
        
    def forward(self, x):
        """ Override forward to collect shape in hybridized mode """
        # x is in NCHW layout
        self.xs = x.shape
        #return super(CoordConv, self).forward(x) # This wasn't working on my machine - python3.6 
        return HybridBlock.forward(self,x)

    def hybrid_forward(self, F, x):
        rows, cols = self._coord_array(F, -1, 1, rows=self.xs[2], cols=self.xs[3],
                                  batch_size=self.xs[0], **self._ctx_kwarg(x))
        x = F.concat(x, rows, cols, dim=1)
        return self.conv(x), rows, cols  # I am also returning rows, cols for vis purposes

#5

@feevos What we need is for all operators that accept an integer or float argument to also accept an NDArray of shape (1,). That would open up so many tricks!