@piiswrong Thanks for helping!
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from mxnet import nd
class DatasetBuilder(object):
def __init__(self):
pass
def buildDatasets(self):
self.X_train = np.load("/dev/asin2vec/xTrain.npy").astype("float32")
self.Y_train = np.load("/dev/asin2vec/yTrain.npy").astype("float32")
self.X_valid = np.load("/dev/asin2vec/xVal.npy").astype("float32")
self.Y_valid = np.load("/dev/asin2vec/y_val.npy").astype("float32")
print("Loaded training and validation data")
db = DatasetBuilder()
db.buildDatasets()
print("Input shape: {}".format(db.X_train.shape))
print("Number of classes: {}".format(db.Y_train.shape[1]))
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import mxnet as mx
from mxnet import gpu, gluon, nd, autograd
from mxnet.gluon import HybridBlock, Block
from mxnet.gluon.nn import Embedding, HybridSequential, Dense, BatchNorm, Activation, Conv1D, MaxPool1D, Dropout, Flatten
from mxnet.gluon.rnn import LSTM
from mxnet.gluon.loss import SoftmaxCrossEntropyLoss, SigmoidBinaryCrossEntropyLoss
from mxnet.gluon.data import ArrayDataset
from mxnet.io import NDArrayIter
from time import time
import sys
def createSimpleModel(num_classes):
model = mx.gluon.nn.HybridSequential(prefix='model_')
with model.name_scope():
model.add(Dense(1800))
model.add(Activation("relu"))
model.add(BatchNorm())
model.add(Dropout(0.25))
for i in range(7):
model.add(Dense(400))
model.add(Activation("relu"))
model.add(BatchNorm())
model.add(Dropout(0.25))
model.add(Dense(num_classes))
model.add(BatchNorm())
return model
class ModelTrainer(object):
def __init__(self):
self.loss = SoftmaxCrossEntropyLoss(sparse_label=False)
def trainModel(self, dataset_builder, batch_size, learning_rate, epochs, num_gpu=1):
def forward_backward(model, data, labels):
with autograd.record():
losses = [self.loss(model(X), Y) for X, Y in zip(data, labels)]
for l in losses:
l.backward()
def train_batch(data, labels, ctx, model, trainer):
size = data.shape[0]
# split the data batch and load them on GPUs
data = gluon.utils.split_and_load(data, ctx, even_split=False)
labels = gluon.utils.split_and_load(labels, ctx, even_split=False)
forward_backward(model, data, labels)
# update parameters
trainer.step(size)
def valid_batch(data, labels, ctx, model):
data = data.as_in_context(ctx[0])
pred = np.argmax(model(data).asnumpy(), axis=1)
return sum([l[pred[i]] for i, l in enumerate(labels.asnumpy())])
def valid_all(data_loader, ctx, model):
correct, num = 0.0, 0.0
for (data, labels) in data_loader:
correct += valid_batch(data, labels, ctx, model)
num += data.shape[0]
return correct / num
model = createSimpleModel(dataset_builder.Y_train.shape[1])
model.hybridize()
model.initialize()
print('Batch size is {}'.format(batch_size))
# training on multiple GPUs
batch_size *= num_gpu
ctx = [gpu(i) for i in range(num_gpu)]
print('Running on {}'.format(ctx))
model.collect_params().initialize(mx.init.Xavier(), force_reinit=True, ctx=ctx)
# data iterator
train_data = mx.gluon.data.DataLoader(ArrayDataset(dataset_builder.X_train, dataset_builder.Y_train), batch_size, shuffle=True, last_batch='rollover')
train_valid_data = mx.gluon.data.DataLoader(ArrayDataset(dataset_builder.X_train, dataset_builder.Y_train), batch_size, sampler=mx.gluon.data.RandomSampler(min(25000, len(dataset_builder.X_train))), shuffle=False, last_batch='keep')
valid_data = mx.gluon.data.DataLoader(ArrayDataset(dataset_builder.X_valid, dataset_builder.Y_valid), batch_size, sampler=mx.gluon.data.RandomSampler(min(25000, len(dataset_builder.X_valid))), shuffle=False, last_batch='keep')
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': learning_rate * num_gpu})
for epoch in range(epochs):
# train
start = time()
for (data, labels) in train_data:
train_batch(data, labels, ctx, model, trainer)
nd.waitall() # wait until all computations are finished to benchmark the time
print('Epoch %d, training time = %.1f sec' % (epoch, time() - start))
# validating
print(' train accuracy = %.4f' % valid_all(train_valid_data, ctx, model))
print(' test accuracy = %.4f' % valid_all(valid_data, ctx, model))
sys.stdout.flush()
modelTrainer = ModelTrainer()
for i in range(1,9):
print("Training on {} GPUs".format(i))
modelTrainer.trainModel(dataset_builder=db, batch_size=400, learning_rate=0.01, epochs=10, num_gpu=i)