C++ API. Linear regression example

eugeneraush · May 14, 2018, 7:30pm

Hello,

I’m trying to implement a simple linear regression fit example (similar to python https://gluon.mxnet.io/chapter02_supervised-learning/linear-regression-scratch.html) in C++ using mxnet C++ API.

I’m using single fully connected layer (FullyConnected symbol) with LinearRegressionOutput and SGD optimizer.

The problem I have is that RMSE doesn’t drop below ~0.64 regardless of number of learning iterations applied. The example in python can achieve much lower RMSE values.

Can anyone suggest what might be the problem. The source code is below.

Thanks,
Eugene
//=========================================================
#include <stdlib.h>
#include
#include
#include “mxnet-cpp/MxNetCpp.h”

using namespace std;
using namespace mxnet::cpp;

int main(int argc, char** argv)
{
const int max_epoch = 10;
const float learning_rate = 0.01;
const float momentum = 0.9;
const float weight_decay = 0.;
const int N = 1000;
const int M = 2;
const int batch_size = 5;
Context ctx = Context::cpu(); // Use CPU for training

srand((unsigned)time(NULL));

std::vector<mx_float> x(N*M);
for (int i=0; i<x.size(); ++i) x[i]=((mx_float)rand())/RAND_MAX;
NDArray X = NDArray(x,Shape(N, M),ctx);

// fill Y[i] = X[i,0] + 2X[i,1] + 3
std::vector<mx_float> y(N);
for (int i=0; i<y.size(); ++i)
y[i]=x[i] + 2x[i+N] + 3;

NDArray labels = NDArray(y,Shape(N),ctx);

//auto net = mlp({1});
auto fc = FullyConnected(Symbol::Variable(“X”), Symbol::Variable(“w0”), Symbol::Variable(“b0”), 1 );
auto net = LinearRegressionOutput(“linreg”, fc, Symbol::Variable(“label”));

std::map<string, NDArray> args;
args[“X”] = NDArray(Shape(batch_size, M), ctx);
args[“label”] = NDArray(Shape(batch_size), ctx);
net.InferArgsMap(ctx, &args, args);

auto initializer = Uniform(0.01);
for (auto& arg : args) {
initializer(arg.first, &arg.second);
}

// optim
Optimizer* opt = OptimizerRegistry::Find(“sgd”);
opt->SetParam(“lr”, learning_rate)
->SetParam(“wd”, weight_decay);

// binding parameters to the model
auto arg_names = net.ListArguments();
for (size_t i = 0; i < arg_names.size(); ++i)
std::cout << arg_names[i] << std::endl;

int num_batches = N/batch_size;

// training
for (int iter = 0; iter < max_epoch; ++iter) {

auto tic = chrono::system_clock::now();
for (int slice = 0; slice < num_batches ; slice ++ ) {

  int from_index = slice*batch_size;
  int to_index   = from_index + batch_size; if (to_index>N) to_index  = N;

  NDArray l = labels.Slice(from_index,to_index);
  args["X"] = X.Slice(from_index,to_index).Copy(ctx);
  args["label"] = l.Copy(ctx);

  {
    auto *exec = net.SimpleBind(ctx, args);
    exec->Forward(true);
    exec->Backward();
    // Update parameters
    for (size_t i = 0; i < arg_names.size(); ++i) {
      if (arg_names[i] == "X" || arg_names[i] == "label") continue;
      opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
      //LG << arg_names[i] << "|" << exec->arg_arrays[i] << "|" << exec->grad_arrays[i];
    }
    delete exec;
  }
}

RMSE acc;
{
  // eval
  args["X"] = X.Copy(ctx);
  args["label"] = labels.Copy(ctx);
  auto *exec = net.SimpleBind(ctx, args);
  exec->Forward(false);
  acc.Update(labels, exec->outputs[0]);
  delete exec;
}

auto toc = chrono::system_clock::now();
float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0;
LG << "Epoch: " << iter << " RMSE: " << acc.Get();

}
//LG << exec->outputs[0].Slice(0,batch_size);
//LG << labels.Slice(0,batch_size);

MXNotifyShutdown();

return 0;
}

eugeneraush · May 14, 2018, 10:45pm

An update:

I’ve figured out the problem: the initialization of two dimensional array Shape(N,M) from the flat std::vector expects row by row packing. I initialized the values assuming column by column packing, so it wasn’t a linear regression. That’s why it couldn’t achieve a decent
Here is a working example:
#include <stdlib.h>
#include
#include
#include “mxnet-cpp/MxNetCpp.h”

    using namespace std;
    using namespace mxnet::cpp;

    int main(int argc, char** argv) 
    {
      const int max_epoch = 10;
      const float learning_rate = 0.01;
      const float momentum = 0.9;
      const float weight_decay = 0.;
      const int N = 1000;
      const int M = 2;
      const int batch_size = 5;
      Context ctx = Context::cpu();  // Use CPU for training
      
      srand((unsigned)time(NULL));

      std::vector<mx_float> x(N*M);
      for (int i=0; i<x.size(); ++i) x[i]=((mx_float)rand())/RAND_MAX;
      NDArray X =  NDArray(x,Shape(N, M),ctx); 

      // fill Y[i] = X[i,0] + 2*X[i,1] + 3
      std::vector<mx_float> y(N);
      for (int i=0; i<y.size(); ++i) 
        y[i]=x[2*i] + 2*x[2*i+1] + 3;

      NDArray labels =  NDArray(y,Shape(N),ctx); 

      //auto net = mlp({1});
      auto fc = FullyConnected(Symbol::Variable("X"), Symbol::Variable("w0"), Symbol::Variable("b0"), 1 );
      auto net = LinearRegressionOutput("linreg", fc, Symbol::Variable("label"));

      std::map<string, NDArray> args;
      args["X"] = NDArray(Shape(batch_size, M), ctx);
      args["label"] = NDArray(Shape(batch_size), ctx);
      net.InferArgsMap(ctx, &args, args);
      
      auto initializer = Uniform(0.01);
      for (auto& arg : args) {
        initializer(arg.first, &arg.second);
      }

      // optim
      Optimizer* opt = OptimizerRegistry::Find("sgd");
      opt->SetParam("lr", learning_rate)
         ->SetParam("wd", weight_decay);
      
      // binding parameters to the model
      auto arg_names = net.ListArguments();
      for (size_t i = 0; i < arg_names.size(); ++i) 
        std::cout << arg_names[i] << std::endl;

      int num_batches = N/batch_size;
      NDArray w0,b0;

      RMSE acc;
      {
        // eval
        args["X"] = X.Copy(ctx);
        args["label"] = labels.Copy(ctx);
        auto *exec = net.SimpleBind(ctx, args);
        exec->Forward(false);
        acc.Update(labels, exec->outputs[0]);
        delete exec;
        LG << "Epoch: " << "before" << " RMSE: " << acc.Get();
      }

      // training
      for (int iter = 0; iter < max_epoch; ++iter) {

        auto tic = chrono::system_clock::now();
        for (int slice = 0; slice < num_batches ; slice ++ ) {

          int from_index = slice*batch_size;
          int to_index   = from_index + batch_size; if (to_index>N) to_index  = N;

          NDArray l = labels.Slice(from_index,to_index);
          args["X"] = X.Slice(from_index,to_index).Copy(ctx);
          args["label"] = l.Copy(ctx);

          {
            auto *exec = net.SimpleBind(ctx, args);
            exec->Forward(true);
            exec->Backward();
            // Update parameters
            for (size_t i = 0; i < arg_names.size(); ++i) {
              if (arg_names[i] == "X" || arg_names[i] == "label") continue;
              opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
              if (arg_names[i] == "w0") w0 = exec->arg_arrays[i];
              if (arg_names[i] == "b0") b0 = exec->arg_arrays[i];
            }
            delete exec;
          }
        }

        RMSE acc;
        {
          // eval
          args["X"] = X.Copy(ctx);
          args["label"] = labels.Copy(ctx);
          auto *exec = net.SimpleBind(ctx, args);
          exec->Forward(false);
          acc.Update(labels, exec->outputs[0]);
          delete exec;
        }

        auto toc = chrono::system_clock::now();
        float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0;
        LG << "Epoch: " << iter << " RMSE: " << acc.Get();
      }

      LG << "w=" << w0;
      LG << "b=" << b0;

      MXNotifyShutdown();

      return 0;
    }

Rangocold · May 15, 2018, 2:53am

i copy your code but the result show the acc becomes lower and lower
is there any wrong?

eugeneraush · May 15, 2018, 5:13am

Yes, as I mentioned in my second post the problem was in wrong initialization of the data. Now everything works fine and it achieves good rmse in just a few steps.

Thanks,

Eugene

Topic		Replies	Views
Model Selection, Underfitting and Overfitting D2L Book	1	966	January 12, 2020
Concise Implementation of Linear Regression D2L Book	11	1851	May 30, 2020
Loss function in Mxnet C++	8	1588	June 22, 2018
LARS implementation is different from the paper	0	507	November 21, 2018
Linear Regression D2L Book	8	2270	April 24, 2020

C++ API. Linear regression example

Related Topics