LetNet with infer_shape error

barrricade · May 30, 2018, 3:48am

import gluonbook as gb
from mxnet import autograd,nd,init,gluon
from mxnet.gluon import loss as gloss,data as gdata,nn,utils as gutils
import mxnet as mx
net = nn.Sequential()
with net.name_scope():
net.add(
nn.Conv2D(channels=20, kernel_size=5, activation=‘relu’),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(channels=50, kernel_size=3, activation=‘relu’),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Flatten(),
nn.Dense(128, activation=“relu”),
nn.Dense(10)
)
lr = 0.5
batch_size=256
net.initialize(force_reinit=True,init=init.Xavier())

train_data, test_data = gb.load_data_fashion_mnist(batch_size)
trainer = gluon.Trainer(net.collect_params(),‘sgd’,{‘learning_rate’ : lr})
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs = 5
def train(train_data, test_data, net, loss, trainer,num_epochs):
for epoch in range(num_epochs):
with autograd.record():
for x,y in train_data:
y_hat=net(x)
l = loss(y_hat,y)
l.backward()
trainer.step(batch_size)
print(net[0].params)
train(train_data,test_data,net,loss,trainer,num_epochs)

Such a simple write will give an error:
infer_shape error. Arguments:
data: (256, 28, 28, 1)
Traceback (most recent call last):
File “/home/hansome/workspace/mxnet/conv2D/LeNet.py”, line 33, in
train(train_data,test_data,net,loss,trainer,num_epochs)
File “/home/hansome/workspace/mxnet/conv2D/LeNet.py”, line 28, in train
y_hat=net(x)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 413, in call
return self.forward(*args)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/nn/basic_layers.py”, line 53, in forward
x = block(x)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 413, in call
return self.forward(*args)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 624, in forward
self._finish_deferred_init(self._active, x, *args)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 510, in finish_deferred_init
raise ValueError(error_msg)
ValueError: Deferred initialization failed because shape cannot be inferred
Error in operator sequential0_conv0_fwd: [11:00:34] src/operator/nn/convolution.cc:194: Check failed: dilated_ksize_x <= AddPad(dshape[3], param.pad[1]) (5 vs. 1) kernel sizeexceed input

How can I modify？Thank you

ThomasDelteil · May 30, 2018, 4:41am

After some slight modification, I find your code to be working and the training to be converging. Looks like something is wrong with your data. Try reinstalling gluonbook

import gluonbook as gb
from mxnet import autograd,nd,init,gluon
from mxnet.gluon import loss as gloss,data as gdata,nn,utils as gutils
import mxnet as mx
net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Conv2D(channels=20, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(128, activation='relu'),
        nn.Dense(10)
    )
lr = 0.5
batch_size=256
ctx = mx.gpu()
net.initialize(init=init.Xavier(), ctx=ctx)

train_data, test_data = gb.load_data_fashion_mnist(batch_size)
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate' : lr})
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs = 30

def train(train_data, test_data, net, loss, trainer,num_epochs):
    for epoch in range(num_epochs):
        total_loss = 0
        for x,y in train_data:
            with autograd.record():
                x = x.as_in_context(ctx)
                y = y.as_in_context(ctx)
                y_hat=net(x)
                l = loss(y_hat,y)
            l.backward()
            total_loss += l
            trainer.step(batch_size)
        mx.nd.waitall()
        print("Epoch [{}]: Loss {}".format(epoch, total_loss.sum().asnumpy()[0]/(batch_size*len(train_data))))

train(train_data,test_data,net,loss,trainer,num_epochs)

Epoch [0]: Loss 1.1486984644180689
Epoch [1]: Loss 0.5033914940988916
Epoch [2]: Loss 0.4168233300885584
...
Epoch [28]: Loss 0.1258625291351579
Epoch [29]: Loss 0.11941466372237246

barrricade · May 30, 2018, 5:42am

Thank you so much:relaxed:

Topic		Replies	Views
mxnet.base.MXNetError: Error in operator conv0_fwd: Shape inconsistent, Provided = [64,64,3,3], inferred shape=(64,3,3,3) Gluon	2	2154	June 22, 2019
Storing model and parameters from python and loading in c_api fails to infer shape	1	423	November 7, 2018
Symbol.infer_shape assertion error on pre-trained MNIST model Discussion	1	1035	July 25, 2018
Getting error while fitting an LSTM model	3	736	March 22, 2018
Mxnet error src/operator/tensor/./elemwise_binary_broadcast_op.h:68: Check failed Gluon	1	926	August 6, 2018

LetNet with infer_shape error

Related Topics