Pooling check failed

MxnetError: Error in operator pool92_fwd: [19:56:02] src/operator/nn/pooling.cc:145: Check failed: param.kernel[1] <= dshape[3] + 2 * param.pad[1] kernel size (2) exceeds input (1 padded to 1)

I am porting a Unet keras implementation to mxnet. My shapes now match.

Keras:

conv1 Tensor("activation_5/Relu:0", shape=(32, 101, 101, 16), dtype=float32)
pool1 Tensor("dropout_1/cond/Merge:0", shape=(32, 50, 50, 16), dtype=float32)
conv2 Tensor("activation_10/Relu:0", shape=(32, 50, 50, 32), dtype=float32)
pool2 Tensor("dropout_2/cond/Merge:0", shape=(32, 25, 25, 32), dtype=float32)
conv3 Tensor("activation_15/Relu:0", shape=(32, 25, 25, 64), dtype=float32)
pool3 Tensor("dropout_3/cond/Merge:0", shape=(32, 12, 12, 64), dtype=float32)
conv4 Tensor("activation_20/Relu:0", shape=(32, 12, 12, 128), dtype=float32)
pool4 Tensor("dropout_4/cond/Merge:0", shape=(32, 6, 6, 128), dtype=float32)
convm Tensor("activation_25/Relu:0", shape=(32, 6, 6, 256), dtype=float32)
deconv4 Tensor("conv2d_transpose_1/BiasAdd:0", shape=(32, 12, 12, 128), dtype=float32)
uconv4 Tensor("dropout_5/cond/Merge:0", shape=(32, 12, 12, 256), dtype=float32)
uconv4 Tensor("activation_30/Relu:0", shape=(32, 12, 12, 128), dtype=float32)
deconv3 Tensor("conv2d_transpose_2/BiasAdd:0", shape=(32, 25, 25, 64), dtype=float32)
uconv3 Tensor("dropout_6/cond/Merge:0", shape=(32, 25, 25, 128), dtype=float32)
uconv3 Tensor("activation_35/Relu:0", shape=(32, 25, 25, 64), dtype=float32)
deconv2 Tensor("conv2d_transpose_3/BiasAdd:0", shape=(32, 50, 50, 32), dtype=float32)
uconv2 Tensor("concatenate_3/concat:0", shape=(32, 50, 50, 64), dtype=float32)
uconv2 Tensor("activation_40/Relu:0", shape=(32, 50, 50, 32), dtype=float32)
deconv1 Tensor("conv2d_transpose_4/BiasAdd:0", shape=(32, 101, 101, 16), dtype=float32)
uconv1 Tensor("concatenate_4/concat:0", shape=(32, 101, 101, 32), dtype=float32)
uconv1 Tensor("activation_45/Relu:0", shape=(32, 101, 101, 16), dtype=float32)
output_layer Tensor("activation_46/Sigmoid:0", shape=(32, 101, 101, 1), dtype=float32)

Mxnet:

conv1 [(32, 16, 101, 101)]
pool1 [(32, 16, 50, 50)]
conv2 [(32, 32, 50, 50)]
pool2 [(32, 32, 25, 25)]
conv3 [(32, 64, 25, 25)]
pool3 [(32, 64, 12, 12)]
conv4 [(32, 128, 12, 12)]
pool4 [(32, 128, 6, 6)]
convm (32, 256, 6, 6)
deconv4 [(32, 128, 12, 12)]
uconv4 [(32, 256, 12, 12)]
uconv4 [(32, 128, 12, 12)]
deconv3 [(32, 64, 25, 25)]
uconv3 [(32, 128, 25, 25)]
uconv3 [(32, 64, 25, 25)]
deconv2 [(32, 32, 50, 50)]
uconv2 [(32, 64, 50, 50)]
uconv2 [(32, 32, 50, 50)]
deconv1 [(32, 16, 101, 101)]
uconv1 [(32, 32, 101, 101)]
uconv1 [(32, 16, 101, 101)]
output_layer [(32, 1, 101, 101)]

However, the dimension check in the subject shows up when calling fit. I am trying to debug from source but when building master, it thinks it is using cuda-9.2. I am assuming I can’t build master for cuda-9.1?

I am new to mxnet so maybe there is something obviously wrong that I am doing and just haven’t experienced yet. Here is my model source:

def build_model(input_layer, start_neurons, DropoutRatio = 0.5):
    # 101 -> 50
    k_size = (3, 3)
    same_padding = (k_size[0]//2, k_size[1]//2)
    #input_layer = mx.sym.transpose(input_layer, [0, 3, 1, 2])
    conv1 = mx.gluon.nn.Conv2D(start_neurons * 1, kernel_size=k_size, padding=same_padding)(input_layer)
    #print('conv1', conv1.infer_shape(data=(32, 1, 101, 101))[1])
    conv1 = residual_block(conv1,start_neurons * 1)
    #print('conv1', conv1.infer_shape(data=(32, 16, 101, 101))[1])
    conv1 = residual_block(conv1,start_neurons * 1, True)
    print('conv1', conv1.infer_shape(data=(32, 1, 101, 101))[1])
    pool1 = mx.gluon.nn.MaxPool2D()(conv1) #(2, 2)
    #print('pool1', pool1.infer_shape(data=(32, 16, 101, 101))[1])
    pool1 = mx.gluon.nn.Dropout(DropoutRatio/2)(pool1)
    print('pool1', pool1.infer_shape(data=(32, 1, 101, 101))[1])

    # 50 -> 25
    conv2 = mx.gluon.nn.Conv2D(start_neurons * 2, kernel_size=k_size, padding=same_padding)(pool1)
    conv2 = residual_block(conv2,start_neurons * 2)
    conv2 = residual_block(conv2,start_neurons * 2, True)
    print('conv2', conv2.infer_shape(data=(32, 1, 101, 101))[1])
    pool2 = mx.gluon.nn.MaxPool2D()(conv2)
    pool2 = mx.gluon.nn.Dropout(DropoutRatio)(pool2)
    print('pool2', pool2.infer_shape(data=(32, 1, 101, 101))[1])

    # 25 -> 12
    conv3 = mx.gluon.nn.Conv2D(start_neurons * 4, kernel_size=k_size, padding=same_padding)(pool2)
    conv3 = residual_block(conv3,start_neurons * 4)
    conv3 = residual_block(conv3,start_neurons * 4, True)
    print('conv3', conv3.infer_shape(data=(32, 1, 101, 101))[1])
    pool3 = mx.gluon.nn.MaxPool2D()(conv3)
    pool3 = mx.gluon.nn.Dropout(DropoutRatio)(pool3)
    print('pool3', pool3.infer_shape(data=(32, 1, 101, 101))[1])

    # 12 -> 6
    conv4 = mx.gluon.nn.Conv2D(start_neurons * 8, kernel_size=k_size, padding=same_padding)(pool3)
    conv4 = residual_block(conv4,start_neurons * 8)
    conv4 = residual_block(conv4,start_neurons * 8, True)
    print('conv4', conv4.infer_shape(data=(32, 1, 101, 101))[1])
    pool4 = mx.gluon.nn.MaxPool2D()(conv4)
    pool4 = mx.gluon.nn.Dropout(DropoutRatio)(pool4)
    print('pool4', pool4.infer_shape(data=(32, 1, 101, 101))[1])

    # Middle
    convm = mx.gluon.nn.Conv2D(start_neurons * 16, kernel_size=k_size, padding=same_padding)(pool4)
    convm = residual_block(convm,start_neurons * 16)
    convm = residual_block(convm,start_neurons * 16, True)
    convm_output_shape = convm.infer_shape(data=(32, 1, 101, 101))[1][0]
    print('convm', convm_output_shape)
    
    # 6 -> 12
    #same_padding_22_stride = \
    #    calc_same_padding(convm_output_shape[2], convm_output_shape[3], 
    #                      k_size[0], k_size[1], (2, 2))
    deconv4 = mx.gluon.nn.Conv2DTranspose(start_neurons * 8, kernel_size=k_size, strides=(2, 2),
                                         padding=(1, 1), output_padding=(1, 1))(convm)
    # strides=(2, 2)
    uconv4 = mx.sym.concat(deconv4, conv4,dim=1) #concatenate([deconv4, conv4])
    uconv4 = mx.gluon.nn.Dropout(DropoutRatio)(uconv4)
    print('deconv4', deconv4.infer_shape(data=(32, 1, 101, 101))[1])
    print('uconv4', uconv4.infer_shape(data=(32, 1, 101, 101))[1])#(32, 128, 12, 12)
    
    uconv4 = mx.gluon.nn.Conv2D(start_neurons * 8, kernel_size=k_size, padding=same_padding)(uconv4)
    uconv4 = residual_block(uconv4,start_neurons * 8)
    uconv4 = residual_block(uconv4,start_neurons * 8, True)
    print('uconv4', uconv4.infer_shape(data=(32, 1, 101, 101))[1])
    #print('uconv4', uconv4.infer_shape(data=(32, 16, 101, 101))[1])
    
    # 12 -> 25
    #deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="same")(uconv4)
    deconv3 = mx.gluon.nn.Conv2DTranspose(start_neurons * 4, kernel_size=k_size,
                                          strides=(2, 2))(uconv4) # padding="valid"
    uconv3 = mx.sym.concat(deconv3, conv3,dim=1)    
    uconv3 = mx.gluon.nn.Dropout(DropoutRatio)(uconv3)
    print('deconv3', deconv3.infer_shape(data=(32, 1, 101, 101))[1])
    print('uconv3', uconv3.infer_shape(data=(32, 1, 101, 101))[1])
    
    uconv3 = mx.gluon.nn.Conv2D(start_neurons * 4, kernel_size=k_size, padding=same_padding)(uconv3)
    uconv3 = residual_block(uconv3,start_neurons * 4)
    uconv3 = residual_block(uconv3,start_neurons * 4, True)
    print('uconv3', uconv3.infer_shape(data=(32, 1, 101, 101))[1])
    #print('uconv3', uconv3.infer_shape(data=(32, 16, 101, 101))[1])

    # 25 -> 50
    deconv2 = mx.gluon.nn.Conv2DTranspose(start_neurons * 2, kernel_size=k_size, strides=(2, 2), 
                                          padding=(1, 1), output_padding=(1, 1))(uconv3)
    uconv2 = mx.sym.concat(deconv2, conv2,dim=1)
    print('deconv2', deconv2.infer_shape(data=(32, 1, 101, 101))[1])
    print('uconv2', uconv2.infer_shape(data=(32, 1, 101, 101))[1])
        
    uconv2 = mx.gluon.nn.Dropout(DropoutRatio)(uconv2)
    uconv2 = mx.gluon.nn.Conv2D(start_neurons * 2, kernel_size=k_size, padding=same_padding)(uconv2)
    uconv2 = residual_block(uconv2,start_neurons * 2)
    uconv2 = residual_block(uconv2,start_neurons * 2, True)
    print('uconv2', uconv2.infer_shape(data=(32, 1, 101, 101))[1])
    #print('deconv2', deconv2.infer_shape(data=(32, 16, 101, 101))[1])
    #print('uconv2', uconv2.infer_shape(data=(32, 16, 101, 101))[1])
    
    # 50 -> 101
    #deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
    deconv1 = mx.gluon.nn.Conv2DTranspose(start_neurons * 1, 
                                          kernel_size=k_size, strides=(2, 2))(uconv2)
    uconv1 = mx.sym.concat(deconv1, conv1,dim=1)
    print('deconv1', deconv1.infer_shape(data=(32, 1, 101, 101))[1])
    print('uconv1', uconv1.infer_shape(data=(32, 1, 101, 101))[1])
    
    
    uconv1 = mx.gluon.nn.Dropout(DropoutRatio)(uconv1)
    uconv1 = mx.gluon.nn.Conv2D(start_neurons * 1, kernel_size=k_size, padding=same_padding)(uconv1)
    uconv1 = residual_block(uconv1,start_neurons * 1)
    uconv1 = residual_block(uconv1,start_neurons * 1, True)
    print('uconv1', uconv1.infer_shape(data=(32, 1, 101, 101))[1])
    #print('deconv1', deconv1.infer_shape(data=(32, 16, 101, 101))[1])
    #print('uconv1', uconv1.infer_shape(data=(32, 16, 101, 101))[1])
    
    #uconv1 = Dropout(DropoutRatio/2)(uconv1)
    #output_layer = Conv2D(1, (1,1), padding="same", activation="sigmoid")(uconv1)
    k_size = (1, 1)
    same_padding = (k_size[0]//2, k_size[1]//2)
    output_layer_noActi = mx.gluon.nn.Conv2D(1, (1,1), padding=same_padding)(uconv1)
    output_layer = mx.gluon.nn.Activation('sigmoid')(output_layer_noActi)
    print('output_layer', output_layer.infer_shape(data=(32, 1, 101, 101))[1])
    
    return output_layer

....

print(mx.__version__)
input_layer = mx.sym.Variable('data')
label_layer = mx.sym.Variable('softmax_label')#Input((img_size_target, img_size_target, 1))
output_layer = build_model(input_layer, 16,0.5)

data_names = ('data',)
label_names = ('softmax_label',)
#devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
#        mx.gpu(int(i)) for i in args.gpus.split(',')]
module = mx.mod.Module(output_layer, 
                       data_names=data_names, label_names=label_names, context=mx.gpu())
#model1 = Model(input_layer, output_layer)
epochs = 50
batch_size = 32

train_iter = mx.io.NDArrayIter(
        x_train, y_train, batch_size, shuffle=True)
valid_iter = mx.io.NDArrayIter(
        x_valid, y_valid, batch_size)

batches_per_epoch = x_train.shape[0]//batch_size
progress_bar = mx.callback.ProgressBar(total=batches_per_epoch)
log_valid_metrics = mx.callback.LogValidationMetricsCallback()

module.fit(train_data = train_iter,
            eval_data = valid_iter,
            eval_metric = 'ce', #'acc',
            kvstore = 'local',
            optimizer = 'adam', #args.optimizer,
            optimizer_params = { 'learning_rate': 0.01 }, #args.lr },
            initializer = mx.initializer.Uniform(0.1),
            num_epoch = epochs,
            batch_end_callback = [progress_bar, 
                                  mx.callback.Speedometer(batch_size, batches_per_epoch)],
            epoch_end_callback = [log_valid_metrics])
print(module.summary())

The dshape[2] being used is 1. However, the input shape (what dshape is a copy of from src/operator/nn/pooling.cc:~145) should maybe be much larger given the dimension sizes of conv1, conv2, conv3, conv4 above? I can post the full stack trace if it’ll be useful.

Hi @cory, would be great if you could post the complete stack trace, thanks!

Thanks @thomelane for responding! Here it is:

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/module/base_module.py:54: UserWarning: You created Module with Module(..., label_names=['softmax_label']) but input with name 'softmax_label' is not found in symbol.list_arguments(). Did you mean one of:
	data
  warnings.warn(msg)
---------------------------------------------------------------------------
MXNetError                                Traceback (most recent call last)
/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/symbol/symbol.py in simple_bind(self, ctx, grad_req, type_dict, stype_dict, group2ctx, shared_arg_names, shared_exec, shared_buffer, **kwargs)
   1512                                                  shared_exec_handle,
-> 1513                                                  ctypes.byref(exe_handle)))
   1514         except MXNetError as e:

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/base.py in check_call(ret)
    148     if ret != 0:
--> 149         raise MXNetError(py_str(_LIB.MXGetLastError()))
    150 

MXNetError: Error in operator pool0_fwd: [19:24:53] src/operator/nn/pooling.cc:145: Check failed: param.kernel[1] <= dshape[3] + 2 * param.pad[1] kernel size (2) exceeds input (1 padded to 1)

Stack trace returned 10 entries:
[bt] (0) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x31f81a) [0x7fa79466581a]
[bt] (1) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x31fe41) [0x7fa794665e41]
[bt] (2) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x6872eb) [0x7fa7949cd2eb]
[bt] (3) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24de24a) [0x7fa79682424a]
[bt] (4) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24e0b84) [0x7fa796826b84]
[bt] (5) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24cc0e6) [0x7fa7968120e6]
[bt] (6) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24ccbd4) [0x7fa796812bd4]
[bt] (7) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(MXExecutorSimpleBind+0x2378) [0x7fa7967717d8]
[bt] (8) /home/cory/anaconda3/lib/python3.6/lib-dynload/../../libffi.so.6(ffi_call_unix64+0x4c) [0x7fa823201adc]
[bt] (9) /home/cory/anaconda3/lib/python3.6/lib-dynload/../../libffi.so.6(ffi_call+0x1f2) [0x7fa823201282]



During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
<ipython-input-17-985b6375f437> in <module>()
     34             batch_end_callback = [progress_bar, 
     35                                   mx.callback.Speedometer(batch_size, batches_per_epoch)],
---> 36             epoch_end_callback = [log_valid_metrics])
     37 print(module.summary())
     38 

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/module/base_module.py in fit(self, train_data, eval_data, eval_metric, epoch_end_callback, batch_end_callback, kvstore, optimizer, optimizer_params, eval_end_callback, eval_batch_end_callback, initializer, arg_params, aux_params, allow_missing, force_rebind, force_init, begin_epoch, num_epoch, validation_metric, monitor, sparse_row_id_fn)
    482 
    483         self.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label,
--> 484                   for_training=True, force_rebind=force_rebind)
    485         if monitor is not None:
    486             self.install_monitor(monitor)

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/module/module.py in bind(self, data_shapes, label_shapes, for_training, inputs_need_grad, force_rebind, shared_module, grad_req)
    428                                                      fixed_param_names=self._fixed_param_names,
    429                                                      grad_req=grad_req, group2ctxs=self._group2ctxs,
--> 430                                                      state_names=self._state_names)
    431         self._total_exec_bytes = self._exec_group._total_exec_bytes
    432         if shared_module is not None:

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/module/executor_group.py in __init__(self, symbol, contexts, workload, data_shapes, label_shapes, param_names, for_training, inputs_need_grad, shared_group, logger, fixed_param_names, grad_req, state_names, group2ctxs)
    263         self.num_outputs = len(self.symbol.list_outputs())
    264 
--> 265         self.bind_exec(data_shapes, label_shapes, shared_group)
    266 
    267     def decide_slices(self, data_shapes):

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/module/executor_group.py in bind_exec(self, data_shapes, label_shapes, shared_group, reshape)
    359             else:
    360                 self.execs.append(self._bind_ith_exec(i, data_shapes_i, label_shapes_i,
--> 361                                                       shared_group))
    362 
    363         self.data_shapes = data_shapes

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/module/executor_group.py in _bind_ith_exec(self, i, data_shapes, label_shapes, shared_group)
    637                                            type_dict=input_types, shared_arg_names=self.param_names,
    638                                            shared_exec=shared_exec, group2ctx=group2ctx,
--> 639                                            shared_buffer=shared_data_arrays, **input_shapes)
    640         self._total_exec_bytes += int(executor.debug_str().split('\n')[-3].split()[1])
    641         return executor

/home/cory/anaconda3/lib/python3.6/site-packages/mxnet/symbol/symbol.py in simple_bind(self, ctx, grad_req, type_dict, stype_dict, group2ctx, shared_arg_names, shared_exec, shared_buffer, **kwargs)
   1517                 error_msg += "%s: %s\n" % (k, v)
   1518             error_msg += "%s" % e
-> 1519             raise RuntimeError(error_msg)
   1520 
   1521         # update shared_buffer

RuntimeError: simple_bind error. Arguments:
data: (32, 101, 101, 1)
softmax_label: (32, 101, 101, 1)
Error in operator pool0_fwd: [19:24:53] src/operator/nn/pooling.cc:145: Check failed: param.kernel[1] <= dshape[3] + 2 * param.pad[1] kernel size (2) exceeds input (1 padded to 1)

Stack trace returned 10 entries:
[bt] (0) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x31f81a) [0x7fa79466581a]
[bt] (1) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x31fe41) [0x7fa794665e41]
[bt] (2) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x6872eb) [0x7fa7949cd2eb]
[bt] (3) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24de24a) [0x7fa79682424a]
[bt] (4) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24e0b84) [0x7fa796826b84]
[bt] (5) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24cc0e6) [0x7fa7968120e6]
[bt] (6) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x24ccbd4) [0x7fa796812bd4]
[bt] (7) /home/cory/anaconda3/lib/python3.6/site-packages/mxnet/libmxnet.so(MXExecutorSimpleBind+0x2378) [0x7fa7967717d8]
[bt] (8) /home/cory/anaconda3/lib/python3.6/lib-dynload/../../libffi.so.6(ffi_call_unix64+0x4c) [0x7fa823201adc]
[bt] (9) /home/cory/anaconda3/lib/python3.6/lib-dynload/../../libffi.so.6(ffi_call+0x1f2) [0x7fa823201282]

Update: After rebuilding from source, I can confirm the shape does have a 1 in it, but why is it getting that shape and not the shape printed by infer_shape

MXNetError: Error in operator pool4_fwd: [23:38:20] src/operator/nn/pooling.cc:162: Check failed: param.kernel[1] <= dshape[3] + 2 * param.pad[1] kernel size (2) exceeds input (1 padded to 1). dshape is (32, 16, 101, 1)

For pool1, this should be (32, 16, 101, 101)

(32, 32, 50, 50) for pool2
(32, 64, 25, 25) for pool3
(32, 128, 12, 12) for pool4

this kind of looks like conv1 but I’d need to see where dshape (first elem in in_shape) is coming from, etc.

@thomelane the dshape or (*in_shape)[0] should match the conv shapes, right?

Here’s the code segment:

static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                           std::vector<TShape> *in_shape,
                           std::vector<TShape> *out_shape) {
    const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
    CHECK_EQ(in_shape->size(), 1U);
    if (param.pool_type == pool_enum::kLpPooling) {
      CHECK(param.p_value.has_value());
    }
    const TShape &dshape = (*in_shape)[0];
    if (param.pooling_convention == pool_enum::kSame) {
      CHECK_EQ(dshape.ndim(), 3U) 
        << "Pooling: Input data should be 3D in (batch, channel, x)"
        << ". Currently 'same' supports Max Pooling 1-D";
      CHECK(param.pad[0] == 0 && param.pad[1] == 0 && param.pad[2] == 0)
        << "Same pooling convention disables the use of pad parameter.";
    }
    CHECK_GE(dshape.ndim(), 3U) 
        << "Pooling: Input data should be  3D in (batch, channel, x)"
        << " Or 4D in (batch, channel, y, x) "
        << " Or 5D in (batch, channel, d, y, x)";
    CHECK_LE(dshape.ndim(), 5U) 
        << "Pooling: Input data should be  3D in (batch, channel, x)"
        << " Or 4D in (batch, channel, y, x) "
        << " Or 5D in (batch, channel, d, y, x)";
    TShape oshape = dshape;
    if (dshape.ndim() == 0) return false;
    if (param.global_pool) {
...
} else if (param.kernel.ndim() == 2) {

  CHECK_EQ(dshape.ndim(), 4U)
      << "Pooling: Input data should be 4D in (batch, channel, y, x)";
  CHECK(param.kernel[0] <= dshape[2] + 2 * param.pad[0])
      << "kernel size (" << param.kernel[0] << ") exceeds input ("
      << dshape[2] << " padded to " << (dshape[2] + 2 * param.pad[0])
      << ")";
  CHECK(param.kernel[1] <= dshape[3] + 2 * param.pad[1])
      << "kernel size (" << param.kernel[1] << ") exceeds input ("
      << dshape[3] << " padded to " << (dshape[3] + 2 * param.pad[1])
      << "). dshape is (" << dshape[0] << ", " << dshape[1] << ", "
      << dshape[2] << ", " << dshape[3] << ")";

Ok I think this is a bug now, but need to root cause to be certain.

I only return now pool4 from the first half of Unet (4 MaxPool2D’s are used in the first half), in order to exclude the possibility of a later operation for some reason calling PoolingShape (kind of like a sanity check).

build_model now looks like:

def build_model(input_layer, start_neurons, DropoutRatio = 0.5):
    # 101 -> 50
    k_size = (3, 3)
    same_padding = (k_size[0]//2, k_size[1]//2)
    #input_layer = mx.sym.transpose(input_layer, [0, 3, 1, 2])
    conv1 = mx.gluon.nn.Conv2D(start_neurons * 1, kernel_size=k_size, padding=same_padding)(input_layer)
    conv1 = residual_block(conv1,start_neurons * 1)
    conv1 = residual_block(conv1,start_neurons * 1, True)
    print('conv1', conv1.infer_shape(data=(32, 1, 101, 101))[1])
    pool1 = mx.gluon.nn.MaxPool2D()(conv1) #(2, 2)
    pool1 = mx.gluon.nn.Dropout(DropoutRatio/2)(pool1)
    print('pool1', pool1.infer_shape(data=(32, 1, 101, 101))[1])

    # 50 -> 25
    conv2 = mx.gluon.nn.Conv2D(start_neurons * 2, kernel_size=k_size, padding=same_padding)(pool1)
    conv2 = residual_block(conv2,start_neurons * 2)
    conv2 = residual_block(conv2,start_neurons * 2, True)
    print('conv2', conv2.infer_shape(data=(32, 1, 101, 101))[1])
    pool2 = mx.gluon.nn.MaxPool2D()(conv2)
    pool2 = mx.gluon.nn.Dropout(DropoutRatio)(pool2)
    print('pool2', pool2.infer_shape(data=(32, 1, 101, 101))[1])

    # 25 -> 12
    conv3 = mx.gluon.nn.Conv2D(start_neurons * 4, kernel_size=k_size, padding=same_padding)(pool2)
    conv3 = residual_block(conv3,start_neurons * 4)
    conv3 = residual_block(conv3,start_neurons * 4, True)
    print('conv3', conv3.infer_shape(data=(32, 1, 101, 101))[1])
    pool3 = mx.gluon.nn.MaxPool2D()(conv3)
    pool3 = mx.gluon.nn.Dropout(DropoutRatio)(pool3)
    print('pool3', pool3.infer_shape(data=(32, 1, 101, 101))[1])

    # 12 -> 6
    conv4 = mx.gluon.nn.Conv2D(start_neurons * 8, kernel_size=k_size, padding=same_padding)(pool3)
    conv4 = residual_block(conv4,start_neurons * 8)
    conv4 = residual_block(conv4,start_neurons * 8, True)
    print('conv4', conv4.infer_shape(data=(32, 1, 101, 101))[1])
    pool4 = mx.gluon.nn.MaxPool2D()(conv4)
    pool4 = mx.gluon.nn.Dropout(DropoutRatio)(pool4)
    print('pool4', pool4.infer_shape(data=(32, 1, 101, 101))[1])
    return pool4

which hits the aforementioned stacktrace and prints from calls to infer_shape:

conv1 [(32, 16, 101, 101)]
pool1 [(32, 16, 50, 50)]
conv2 [(32, 32, 50, 50)]
pool2 [(32, 32, 25, 25)]
conv3 [(32, 64, 25, 25)]
pool3 [(32, 64, 12, 12)]
conv4 [(32, 128, 12, 12)]
pool4 [(32, 128, 6, 6)]

This is what the residual block looks like:

def batch_activate(x):
    x = mx.gluon.nn.BatchNorm()(x) #BatchNormalization()(x)
    x = mx.gluon.nn.Activation(activation='relu')(x) #Activation('relu')(x)
    return x

def convolution_block(x, filters, size, strides=(1,1), padding='same', activation=True):
    if padding == 'same':
        padding = (size[0]//2, size[1]//2)
    else:
        # valid
        padding = (0, 0)
    x = mx.gluon.nn.Conv2D(channels=filters, kernel_size=size, 
                           strides=strides, padding=padding)(x)
    if activation == True:
        x = batch_activate(x)
    return x

def residual_block(blockInput, num_filters=16, do_batch_activate = False):
    x = batch_activate(blockInput)
    x = convolution_block(x, num_filters, (3,3) )
    x = convolution_block(x, num_filters, (3,3), activation=False)
    x = x+blockInput #Add()([x, blockInput])
    if do_batch_activate:
        x = batch_activate(x)
    return x

def calc_same_padding(in_height, in_width, filter_height, filter_width, strides):
    if (in_height % strides[0] == 0):
        pad_along_height = max(filter_height - strides[0], 0)
    else:
        pad_along_height = max(filter_height - (in_height % strides[0]), 0)
    if (in_width % strides[1] == 0):
        pad_along_width = max(filter_width - strides[1], 0)
    else:
        pad_along_width = max(filter_width - (in_width % strides[1]), 0)
    return (pad_along_height, pad_along_width)

It seems that the last two ops used were BatchNorm and relu Activation. When I log the in_shape vector of TShape for every PoolingShape call with param.kernel.ndim() == 2, I get this:

[I 20:21:19.772 NotebookApp] Kernel started: 59055214-815a-4b46-b161-d18e7d46266f
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 64, 25, 25, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 64, 25, 25, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 64, 25, 25, 
[20:22:18] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 128, 12, 12, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 64, 25, 25, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 64, 25, 25, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 101, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 32, 50, 50, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 64, 25, 25, 
[20:22:43] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 128, 12, 12, 
[20:22:44] src/operator/nn/pooling.cc:160: Input Tensor 0: 32, 16, 101, 1, 
[I 20:23:19.792 NotebookApp] Saving file at /workdir/mxnet_kernel.ipynb

From the pattern, it seems the shape (32, 16, 101, 1) should be (32, 16, 101, 101), which would match infer_shape. Looking into src/operator/nn/activation.cc to start. @thomelane if you have any suggests on where to look or how to debug better, let me know

Original keras implementation works with mxnet backend and the shapes are as expected (not containing 32, 16, 101, 1).

Is there anything wrong in my above code (primarily first half of Unet and residual_block) that would cause this shape to show up? O.w. I would think something is going wrong with either pool’s input shape or add/activation output shapes

Great, thanks for all the information and updates @cory. I’ll take a look, and get back to you shortly!

Ah, so looking at the full stack trace there’s an error right at the top that’s the clue.

And the kernel size (2) exceeds input (1 padded to 1) error is just a symptom of a failed data binding that happens when you call fit. You’ve constructed a symbol okay, but label_layer = mx.sym.Variable('softmax_label') and label_names = ('softmax_label',) are your problems here. Your symbolic graph doesn’t have a softmax_label. I assume you copied this from classification example, but it doesn’t make sense for this task.

Call output_layer.list_arguments() to see the names of the symbols in your symbolic graph and select the name that relates to the last output. Use this as your label variable name.

You might also want to look at creating Gluon Blocks instead of constructing symbols. See this tutorial for more details.

So you could rewrite your functions as Blocks and just call the Block like a function:

class BatchActivateBlock(mx.gluon.HybridBlock):
    def __init__(self):
        super(BatchActivateBlock, self).__init__()
        with self.name_scope():
            self.net = mx.gluon.nn.HybridSequential()
            with self.net.name_scope():
                self.net.add(mx.gluon.nn.BatchNorm())
                self.net.add(mx.gluon.nn.Activation(activation='relu'))
    
    def hybrid_forward(self, F, x):
        return self.net(x)
    
    
class ConvolutionBlock(mx.gluon.HybridBlock):
    def __init__(self, filters, size, strides=(1,1), padding='same', activation=True):
        super(ConvolutionBlock, self).__init__()
        with self.name_scope():
            if padding == 'same':
                padding = (size[0]//2, size[1]//2)
            else:
                padding = (0, 0)
            self.conv = mx.gluon.nn.Conv2D(channels=filters, kernel_size=size, 
                                           strides=strides, padding=padding)
            self.activation = activation
            if self.activation:
                self.act = BatchActivateBlock()
            
    def hybrid_forward(self, F, x):
        if self.activation:
            return self.act(self.conv(x))
        else:
            return self.conv(x)
        

class ResidualBlock(mx.gluon.HybridBlock):
    def __init__(self, num_filters=16, do_batch_activate=False):
        super(ResidualBlock, self).__init__()
        with self.name_scope():
            self.do_batch_activate = do_batch_activate
            self.bact1 = BatchActivateBlock()
            self.conv1 = ConvolutionBlock(filters=num_filters, size=(3,3), activation=True)
            self.conv2 = ConvolutionBlock(filters=num_filters, size=(3,3), activation=False)
            if self.do_batch_activate:
                self.bact2 = BatchActivateBlock()
            
    def hybrid_forward(self, F, x):
        out = self.bact1(x)
        out = self.conv1(out)
        out = self.conv2(out)
        out = out + x
        if self.do_batch_activate:
            return self.bact2(out)
        else:
            return out