LetNet with infer_shape error


#1

import gluonbook as gb
from mxnet import autograd,nd,init,gluon
from mxnet.gluon import loss as gloss,data as gdata,nn,utils as gutils
import mxnet as mx
net = nn.Sequential()
with net.name_scope():
net.add(
nn.Conv2D(channels=20, kernel_size=5, activation=‘relu’),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(channels=50, kernel_size=3, activation=‘relu’),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Flatten(),
nn.Dense(128, activation=“relu”),
nn.Dense(10)
)
lr = 0.5
batch_size=256
net.initialize(force_reinit=True,init=init.Xavier())

train_data, test_data = gb.load_data_fashion_mnist(batch_size)
trainer = gluon.Trainer(net.collect_params(),‘sgd’,{‘learning_rate’ : lr})
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs = 5
def train(train_data, test_data, net, loss, trainer,num_epochs):
for epoch in range(num_epochs):
with autograd.record():
for x,y in train_data:
y_hat=net(x)
l = loss(y_hat,y)
l.backward()
trainer.step(batch_size)
print(net[0].params)
train(train_data,test_data,net,loss,trainer,num_epochs)

Such a simple write will give an error:
infer_shape error. Arguments:
data: (256, 28, 28, 1)
Traceback (most recent call last):
File “/home/hansome/workspace/mxnet/conv2D/LeNet.py”, line 33, in
train(train_data,test_data,net,loss,trainer,num_epochs)
File “/home/hansome/workspace/mxnet/conv2D/LeNet.py”, line 28, in train
y_hat=net(x)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 413, in call
return self.forward(*args)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/nn/basic_layers.py”, line 53, in forward
x = block(x)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 413, in call
return self.forward(*args)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 624, in forward
self._finish_deferred_init(self._active, x, *args)
File “/home/hansome/anaconda3/lib/python3.6/site-packages/mxnet/gluon/block.py”, line 510, in finish_deferred_init
raise ValueError(error_msg)
ValueError: Deferred initialization failed because shape cannot be inferred
Error in operator sequential0_conv0_fwd: [11:00:34] src/operator/nn/convolution.cc:194: Check failed: dilated_ksize_x <= AddPad(dshape[3], param.pad[1]) (5 vs. 1) kernel sizeexceed input

How can I modify?Thank you


#2

After some slight modification, I find your code to be working and the training to be converging. Looks like something is wrong with your data. Try reinstalling gluonbook

import gluonbook as gb
from mxnet import autograd,nd,init,gluon
from mxnet.gluon import loss as gloss,data as gdata,nn,utils as gutils
import mxnet as mx
net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Conv2D(channels=20, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(128, activation='relu'),
        nn.Dense(10)
    )
lr = 0.5
batch_size=256
ctx = mx.gpu()
net.initialize(init=init.Xavier(), ctx=ctx)

train_data, test_data = gb.load_data_fashion_mnist(batch_size)
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate' : lr})
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs = 30

def train(train_data, test_data, net, loss, trainer,num_epochs):
    for epoch in range(num_epochs):
        total_loss = 0
        for x,y in train_data:
            with autograd.record():
                x = x.as_in_context(ctx)
                y = y.as_in_context(ctx)
                y_hat=net(x)
                l = loss(y_hat,y)
            l.backward()
            total_loss += l
            trainer.step(batch_size)
        mx.nd.waitall()
        print("Epoch [{}]: Loss {}".format(epoch, total_loss.sum().asnumpy()[0]/(batch_size*len(train_data))))

train(train_data,test_data,net,loss,trainer,num_epochs)
Epoch [0]: Loss 1.1486984644180689
Epoch [1]: Loss 0.5033914940988916
Epoch [2]: Loss 0.4168233300885584
...
Epoch [28]: Loss 0.1258625291351579
Epoch [29]: Loss 0.11941466372237246

#4

Thank you so much:relaxed::relaxed::relaxed::relaxed::relaxed::relaxed::relaxed: