LSTM shape error

Hi all,

A rather dumb question but I can’t seem to get the shape right again hah. Let’s say I have time series data with 4 columns (3 features, and want to predict the 4th column)

dataset = np.random.uniform(0, 1, size=(3000, 4))
print(dataset.shape)
context=mx.cpu()
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
X = dataset[:, 0:-1]; y = dataset[:, -1]
batch_size=64
dataset_f = mx.gluon.data.dataset.ArrayDataset(X, y)
data_loader = mx.gluon.data.DataLoader(dataset_f, batch_size=batch_size, num_workers=CPU_COUNT)

class RNNModel(gluon.Block):
    def __init__(self, num_embed, num_hidden, num_layers, bidirectional=False, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        with self.name_scope():
            self.rnn = rnn.LSTM(num_hidden, num_layers, input_size=num_embed, \
                                bidirectional=bidirectional, layout='TNC')
        
            self.decoder = nn.Dense(1, in_units=num_hidden)

    def forward(self, inputs, hidden):
        #print(inputs.shape)
        output, hidden = self.rnn(inputs, hidden)
        decoded = self.decoder(output.reshape((-1, self.num_hidden)))
        return decoded, hidden

    def begin_state(self, *args, **kwargs):
        return self.rnn.begin_state(*args, **kwargs)

model = RNNModel(num_embed=X.shape[1], num_hidden=500, num_layers=1)
model.collect_params().initialize(mx.init.Xavier(), ctx=context)
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': .01})
loss = gluon.loss.L1Loss()

def train():
    for epoch in range(1):
        total_L = 0.0
        start_time = time.time()
        ibatch = 0
        hidden = model.begin_state(func = mx.nd.zeros, batch_size = batch_size, ctx = context)
        for X_batch, y_batch in data_loader:
            #hidden = detach(hidden)
            print(y_batch.shape)
            return
            with autograd.record():
                output, hidden = model(X_batch, hidden)
                L = loss(output, y_batch)
                L.backward()
            grads = [i.grad(context) for i in model.collect_params().values()]

            gluon.utils.clip_global_norm(grads, args_clip * args_bptt * batch_size)

            trainer.step(batch_size)
            total_L += mx.nd.sum(L).asscalar()
        
            ibatch += 1
            if ibatch % 10 == 0 and ibatch > 0:
                cur_L = total_L / args_bptt / batch_size / args_log_interval
                print('[Epoch %d Batch %d] loss %.2f, perplexity %.2f' % (
                    epoch + 1, ibatch, cur_L, math.exp(cur_L)))
                total_L = 0.0

train()

When I run it I get this:
ValueError: Invalid recurrent state shape. Expecting (1, 3, 500), got (1, 64, 500).

I know the shape must be (sequence_length, batch_size, input_size). I just can’t understand what exactly is expecting this shape. Is it the weights matrices or something else?

Best,
Boris

(PS. Tom, Sergey, sorry for the stupid question)

There seems to be a couple of issues: first of all you define the layout as TNC but your input is in the form of (batch_size, features). You either need to reshape the input data or change the layout to NTC. Another problem is the input data format: as you mentioned the shape must be (sequence_length, batch_size, input_size), but in your code the data has only 2 dimensions.