Hi all,
A rather dumb question but I can’t seem to get the shape right again hah. Let’s say I have time series data with 4 columns (3 features, and want to predict the 4th column)
dataset = np.random.uniform(0, 1, size=(3000, 4))
print(dataset.shape)
context=mx.cpu()
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
X = dataset[:, 0:-1]; y = dataset[:, -1]
batch_size=64
dataset_f = mx.gluon.data.dataset.ArrayDataset(X, y)
data_loader = mx.gluon.data.DataLoader(dataset_f, batch_size=batch_size, num_workers=CPU_COUNT)
class RNNModel(gluon.Block):
def __init__(self, num_embed, num_hidden, num_layers, bidirectional=False, **kwargs):
super(RNNModel, self).__init__(**kwargs)
with self.name_scope():
self.rnn = rnn.LSTM(num_hidden, num_layers, input_size=num_embed, \
bidirectional=bidirectional, layout='TNC')
self.decoder = nn.Dense(1, in_units=num_hidden)
def forward(self, inputs, hidden):
#print(inputs.shape)
output, hidden = self.rnn(inputs, hidden)
decoded = self.decoder(output.reshape((-1, self.num_hidden)))
return decoded, hidden
def begin_state(self, *args, **kwargs):
return self.rnn.begin_state(*args, **kwargs)
model = RNNModel(num_embed=X.shape[1], num_hidden=500, num_layers=1)
model.collect_params().initialize(mx.init.Xavier(), ctx=context)
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': .01})
loss = gluon.loss.L1Loss()
def train():
for epoch in range(1):
total_L = 0.0
start_time = time.time()
ibatch = 0
hidden = model.begin_state(func = mx.nd.zeros, batch_size = batch_size, ctx = context)
for X_batch, y_batch in data_loader:
#hidden = detach(hidden)
print(y_batch.shape)
return
with autograd.record():
output, hidden = model(X_batch, hidden)
L = loss(output, y_batch)
L.backward()
grads = [i.grad(context) for i in model.collect_params().values()]
gluon.utils.clip_global_norm(grads, args_clip * args_bptt * batch_size)
trainer.step(batch_size)
total_L += mx.nd.sum(L).asscalar()
ibatch += 1
if ibatch % 10 == 0 and ibatch > 0:
cur_L = total_L / args_bptt / batch_size / args_log_interval
print('[Epoch %d Batch %d] loss %.2f, perplexity %.2f' % (
epoch + 1, ibatch, cur_L, math.exp(cur_L)))
total_L = 0.0
train()
When I run it I get this:
ValueError: Invalid recurrent state shape. Expecting (1, 3, 500), got (1, 64, 500).
I know the shape must be (sequence_length, batch_size, input_size). I just can’t understand what exactly is expecting this shape. Is it the weights matrices or something else?
Best,
Boris
(PS. Tom, Sergey, sorry for the stupid question)