Wrapping gluon into scikit-learn like api

hi,
as keras has an API for scikit-learn (which could be used with pipelines), i am curious on making something for gluon (at least for my educational purposes) to do classification/regression using a network model
but, before everything, i have some questions.

  1. how can i set things from dictionaries within the constructors without passing the info that might raise exceptions into gluon?
    for example, learning_rate can be passed to everything, but i want to pass number of epochs as well!.
    another thing that comes to my mind is, having epochs as another parameter of fit method.
    i want to do this because of making transforms and classifiers/regressors as part of a scikit-learn pipeline
  2. if you have seen the keras’s scikit-learn wrappers, they take an argument in the constructor for building model (which is compiled). now, what happens with gluon’s models?
  3. for loss function, and/scores, how do you recommend to do this? does scikit-learn’s api make sence here?
    thanks in advance.

I am not too familiar with the Keras API for scikit-learn. I had a quick look on https://keras.io/scikit-learn-api/
and for Gluon it could look like the following:

from mxnet import autograd
from mxnet import gluon
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

class GluonModel():
    def __init__(self, batch_size=10, learning_rate=0.0001, model_ctx=mx.cpu(), epochs=2):
 	self.batch_size=batch_size
	self.learning_rate=learning_rate
	self.model_ctx=model_ctx
        self.epochs = epochs
        self.model =  mx.gluon.nn.Dense(1)

    def fit(self, train_x, train_y):
        loss_function = gluon.loss.L2Loss()
        self.model.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=self.model_ctx)
        trainer = gluon.Trainer(self.model.collect_params(), 'sgd', {'learning_rate': self.learning_rate})
        train_dataset = mx.gluon.data.ArrayDataset(train_x, train_y)
        train_iter = mx.gluon.data.DataLoader(train_dataset, last_batch='rollover', batch_size=self.batch_size, shuffle=True)

        for e in range(self.epochs):
          for i, (x,y) in enumerate (train_iter):
              x = x.as_in_context(self.model_ctx)
              y = y.as_in_context(self.model_ctx)

              with autograd.record():
                   output = self.model(x)
                   loss = loss_function(output,y)
              loss.backward()
              trainer.step(x.shape[0])

    def predict(self, test_x):
         return self.model(mx.nd.array(test_x))

clf = GluonModel()

pipeline = Pipeline([ ('scaler',StandardScaler()),  ('clf', clf)])
x_train = np.random.randn(1000,10).astype(np.float32)
y_train = np.random.randn(1000).astype(np.float32)
print pipeline.fit(x_train,y_train)

You could pass learning_rate, number of epochs etc via the constructor. Then create the instance of the GluonModel and pass it to the scikit-learn Pipeline Pipeline([ ('scaler',StandardScaler()), ('clf', clf)])
The fit method would then initialize the model, and start the training loop.

I hope this helps? Let me know if you have further questions.

hi again,
this is what i’ve achieved so far:

import mxnet as mx
from mxnet import gluon, nd, autograd, metric
from sklearn.base import BaseEstimator
import numpy as np

class GluonModel(BaseEstimator):
    """base wrapper for gluon models. don't use this"""
    def __init__(self, model, loss_function, init_function, batch_size=10, learning_rate=0.0001, metric=["acc"] model_ctx=mx.cpu(), epochs=2, optimizer='sgd', last_batch = 'rollover'):
        self.batch_size=batch_size
        self.learning_rate=learning_rate
        self.model_ctx=model_ctx
        self.epochs = epochs
        self.model = model
        self.optimizer = optimizer
        self.loss = loss_function
        self.init = init_function
        self.last_batch = last_batch
        self.metric = metric

    def fit(self, train_x, train_y, **kwargs):
        """trains the model
train_x: array(shape: (n_samples, n_features))
training samples
train_y: array (shape (n_samples, .) or (n_samples, n_outputs)
the lables for x
**kwargs: other arguments"""
        loss_function = self.loss
        self.model.collect_params().initialize(self.init_function, ctx=self.model_ctx)
        trainer = gluon.Trainer(self.model.collect_params(), self.optimizer, {'learning_rate': self.learning_rate, **kwargs})
        train_dataset = mx.gluon.data.ArrayDataset(train_x, train_y)
        train_iter = mx.gluon.data.DataLoader(train_dataset, last_batch = self.last_batch, batch_size = self.batch_size, shuffle = kwargs["shuffle"])
        # loop over the data with number of epochs and train 
        for e in range(self.epochs):
            for i, (x, y) in enumerate(train_iter):
                x = x.as_in_context(self.model_ctx)
                y = y.as_in_context(self.model_ctx)
                with autograd.record():
                    output = self.model(x)
                    loss = loss_function(output, y)
                    loss.backward()
                trainer.step(x.shape[0])
        return self

    def predict(self, x, **kwargs):
        """predicts x samples into y"""
        return self.model(mx.nd.array(x))

    def score(self, x, y):
        test_dataset = mx.gluon.data.ArrayDataset(x, y)
        test_iter = mx.gluon.data.DataLoader(test_dataset)
        acc = mx.metric.create(self.metric)
        for i, (data, label) in enumerate(test_iter):
            data = data.as_in_context(self.model_ctx)
            label = label.as_in_ctx(self.model_ctx)
            output = self.model(data)
            p = nd.argmax(output, axis = 1)
            acc.update(preds = p, labels = label)
        return acc.get()[1]

class GluonClassifier(GluonModel):
    """scikitlearn like classifier for gluon"""

    def fit(self, x, y, sample_weight=None, **kwargs):
        """trains the classifier"""
        # checkout the shape of y, and decide on binary classifier or multi classifier
        if len(y.shape) == 2 and y.shape[1] > 1:
            # this is a multi classifier
            self.classes_ = nd.arange(y.shape[1])
        elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1:
            # this is a binary classifier
            self.classes_ = nd.unique(y)
            y = nd.searchsorted(self.classes_, y)
        else:
            raise ValueError("Invalid shape for y: " + str(y.shape))
        self.n_classes_ = len(self.classes_)
        if sample_weight is not None:
            kwargs["sample_weight"] = sample_weight
        return super(GluonClassifier, self).fit(x, y, **kwargs)

    def predict(self, x, **kwargs):
        """predicts y from x"""
        p = super(GluonClassifier, self).predict(x, **kwargs)
        if p.shape[-1] > 1:
            classes = p.argmax(axis=-1)
        else:
            classes = (p > 0.5).astype(int32)
            return self.classes_[classes]

    def predict_proba(self, x, **kwargs):
        """predicts probability estimate from x and returns y"""
        p = super(GluonClassifier, self).predict(x, **kwargs)
        # check if this is a binary classification
        if p.shape[1] == 1:
            p = nd.hstack([1 - p, p])
            return p

i should implement other things as well, this was the first code that i wanted to share
if you have any other suggestions etc, please let me know and i’ll try to implement them.

i’ve completed both the classifier/regressor:

"""scikit-learn like wrapper for gluon in mxnet"""
import mxnet as mx
from mxnet import gluon, nd, autograd, metric
from sklearn.base import BaseEstimator
import numpy as np

class GluonModel(BaseEstimator):
    """base wrapper for gluon models. don't use this"""
    def __init__(self, model, loss_function, init_function, batch_size=10, learning_rate=0.0001, metric=["acc"], model_ctx=mx.cpu(), epochs=2, optimizer='sgd', last_batch = 'rollover'):
        self.batch_size=batch_size
        self.learning_rate=learning_rate
        self.model_ctx=model_ctx
        self.epochs = epochs
        self.model = model
        self.optimizer = optimizer
        self.loss = loss_function
        self.init = init_function
        self.last_batch = last_batch
        self.metric = metric

    def fit(self, train_x, train_y, **kwargs):
        """trains the model
train_x: array(shape: (n_samples, n_features))
training samples
train_y: array (shape (n_samples, .) or (n_samples, n_outputs)
the lables for x
**kwargs: other arguments"""
        loss_function = self.loss
        self.model.collect_params().initialize(self.init_function, ctx=self.model_ctx)
        trainer = gluon.Trainer(self.model.collect_params(), self.optimizer, {'learning_rate': self.learning_rate, **kwargs})
        train_dataset = mx.gluon.data.ArrayDataset(train_x, train_y)
        train_iter = mx.gluon.data.DataLoader(train_dataset, last_batch = self.last_batch, batch_size = self.batch_size, shuffle = kwargs["shuffle"])
        # loop over the data with number of epochs and train 
        for e in range(self.epochs):
            for i, (x, y) in enumerate(train_iter):
                x = x.as_in_context(self.model_ctx)
                y = y.as_in_context(self.model_ctx)
                with autograd.record():
                    output = self.model(x)
                    loss = loss_function(output, y)
                    loss.backward()
                trainer.step(x.shape[0])
        return self

    def predict(self, x, **kwargs):
        """predicts x samples into y"""
        return self.model(mx.nd.array(x))

    def score(self, x, y):
        test_dataset = mx.gluon.data.ArrayDataset(x, y)
        test_iter = mx.gluon.data.DataLoader(test_dataset)
        acc = mx.metric.create(self.metric)
        for i, (data, label) in enumerate(test_iter):
            data = data.as_in_context(self.model_ctx)
            label = label.as_in_ctx(self.model_ctx)
            output = self.model(data)
            p = nd.argmax(output, axis = 1)
            acc.update(preds = p, labels = label)
        return acc.get()[1]

class GluonClassifier(GluonModel):
    """scikitlearn like classifier for gluon"""

    def fit(self, x, y, sample_weight=None, **kwargs):
        """trains the classifier"""
        # checkout the shape of y, and decide on binary classifier or multi classifier
        if len(y.shape) == 2 and y.shape[1] > 1:
            # this is a multi classifier
            self.classes_ = nd.arange(y.shape[1])
        elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1:
            # this is a binary classifier
            self.classes_ = nd.unique(y)
            y = nd.searchsorted(self.classes_, y)
        else:
            raise ValueError("Invalid shape for y: " + str(y.shape))
        self.n_classes_ = len(self.classes_)
        if sample_weight is not None:
            kwargs["sample_weight"] = sample_weight
        return super(GluonClassifier, self).fit(x, y, **kwargs)

    def predict(self, x, **kwargs):
        """predicts y from x"""
        p = super(GluonClassifier, self).predict(x, **kwargs)
        if p.shape[-1] > 1:
            classes = p.argmax(axis=-1)
        else:
            classes = (p > 0.5).astype(int32)
            return self.classes_[classes]

    def predict_proba(self, x, **kwargs):
        """predicts probability estimate from x and returns y"""
        p = super(GluonClassifier, self).predict(x, **kwargs)
        # check if this is a binary classification
        if p.shape[1] == 1:
            p = nd.hstack([1 - p, p])
            return p

class GluonRegressor(GluonModel):
    """you can use this class for regression"""
    def predict(self, x, **kwargs):
        """returns the prediction for x
x: array shape (n_samples, n_features) to be predicted
**kwargs: other arguments passed to predict() method
returns: the predicted y"""
        return nd.squeeze(super(GluonRegressor, self).predict(x, **kwargs))

note: I have studied keras’s scikit-learn wrapper and implemented this.
thank you @NRauschmayr for helping with this.

2 Likes

@brightening-eyes thanks for the effort.
FYI I noticed you are passing a metric but are ignoring it at the moment.

so, how would i implement this as well?
it sets self.metric according to metric, and then in the score function, it passes self.metric to mxnet.metric.create()

My bad I missed the score function, I was thinking of having the training accuracy computed during fit but that’s probably not necessary.