i’ve completed both the classifier/regressor:
"""scikit-learn like wrapper for gluon in mxnet"""
import mxnet as mx
from mxnet import gluon, nd, autograd, metric
from sklearn.base import BaseEstimator
import numpy as np
class GluonModel(BaseEstimator):
"""base wrapper for gluon models. don't use this"""
def __init__(self, model, loss_function, init_function, batch_size=10, learning_rate=0.0001, metric=["acc"], model_ctx=mx.cpu(), epochs=2, optimizer='sgd', last_batch = 'rollover'):
self.batch_size=batch_size
self.learning_rate=learning_rate
self.model_ctx=model_ctx
self.epochs = epochs
self.model = model
self.optimizer = optimizer
self.loss = loss_function
self.init = init_function
self.last_batch = last_batch
self.metric = metric
def fit(self, train_x, train_y, **kwargs):
"""trains the model
train_x: array(shape: (n_samples, n_features))
training samples
train_y: array (shape (n_samples, .) or (n_samples, n_outputs)
the lables for x
**kwargs: other arguments"""
loss_function = self.loss
self.model.collect_params().initialize(self.init_function, ctx=self.model_ctx)
trainer = gluon.Trainer(self.model.collect_params(), self.optimizer, {'learning_rate': self.learning_rate, **kwargs})
train_dataset = mx.gluon.data.ArrayDataset(train_x, train_y)
train_iter = mx.gluon.data.DataLoader(train_dataset, last_batch = self.last_batch, batch_size = self.batch_size, shuffle = kwargs["shuffle"])
# loop over the data with number of epochs and train
for e in range(self.epochs):
for i, (x, y) in enumerate(train_iter):
x = x.as_in_context(self.model_ctx)
y = y.as_in_context(self.model_ctx)
with autograd.record():
output = self.model(x)
loss = loss_function(output, y)
loss.backward()
trainer.step(x.shape[0])
return self
def predict(self, x, **kwargs):
"""predicts x samples into y"""
return self.model(mx.nd.array(x))
def score(self, x, y):
test_dataset = mx.gluon.data.ArrayDataset(x, y)
test_iter = mx.gluon.data.DataLoader(test_dataset)
acc = mx.metric.create(self.metric)
for i, (data, label) in enumerate(test_iter):
data = data.as_in_context(self.model_ctx)
label = label.as_in_ctx(self.model_ctx)
output = self.model(data)
p = nd.argmax(output, axis = 1)
acc.update(preds = p, labels = label)
return acc.get()[1]
class GluonClassifier(GluonModel):
"""scikitlearn like classifier for gluon"""
def fit(self, x, y, sample_weight=None, **kwargs):
"""trains the classifier"""
# checkout the shape of y, and decide on binary classifier or multi classifier
if len(y.shape) == 2 and y.shape[1] > 1:
# this is a multi classifier
self.classes_ = nd.arange(y.shape[1])
elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1:
# this is a binary classifier
self.classes_ = nd.unique(y)
y = nd.searchsorted(self.classes_, y)
else:
raise ValueError("Invalid shape for y: " + str(y.shape))
self.n_classes_ = len(self.classes_)
if sample_weight is not None:
kwargs["sample_weight"] = sample_weight
return super(GluonClassifier, self).fit(x, y, **kwargs)
def predict(self, x, **kwargs):
"""predicts y from x"""
p = super(GluonClassifier, self).predict(x, **kwargs)
if p.shape[-1] > 1:
classes = p.argmax(axis=-1)
else:
classes = (p > 0.5).astype(int32)
return self.classes_[classes]
def predict_proba(self, x, **kwargs):
"""predicts probability estimate from x and returns y"""
p = super(GluonClassifier, self).predict(x, **kwargs)
# check if this is a binary classification
if p.shape[1] == 1:
p = nd.hstack([1 - p, p])
return p
class GluonRegressor(GluonModel):
"""you can use this class for regression"""
def predict(self, x, **kwargs):
"""returns the prediction for x
x: array shape (n_samples, n_features) to be predicted
**kwargs: other arguments passed to predict() method
returns: the predicted y"""
return nd.squeeze(super(GluonRegressor, self).predict(x, **kwargs))
note: I have studied keras’s scikit-learn wrapper and implemented this.
thank you @NRauschmayr for helping with this.