Simple test case with random data. There seems to be an issue with batch reshaping causing input data to produce different outputs. See code below for example:
Context ctx = Context::cpu();
Symbol test_mlp(const std::vector<int> &layers) {
auto x = Symbol::Variable("X");
auto label = Symbol::Variable("label");
std::vector<Symbol> weights(layers.size());
std::vector<Symbol> biases(layers.size());
std::vector<Symbol> outputs(layers.size());
for(size_t i=0; i < layers.size(); i++) {
weights[i] = Symbol::Variable("w" + std::to_string(i));
biases[i] = Symbol::Variable("b" + std::to_string(i));
Symbol fc = FullyConnected(
i == 0? x : outputs[i-1], // data
weights[i],
biases[i],
layers[i]);
//TODO ensure fully connected
outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
}
//TODO change
Symbol out = LinearRegressionOutput(outputs.back(), label);
return out;
}
void testNetReshape() {
const std::vector<int> layers{100,100,1};
const int max_epoch = 2;
const float learning_rate = 0.1;
auto net = test_mlp(layers);
std::map<std::string, NDArray> netArgs;
Context ctx = Context::cpu();
netArgs["X"] = NDArray({1,2,10,4,11,43,3,45,21}, Shape(3, 3), ctx);
netArgs["label"] = NDArray({1000,900,800},Shape(3), ctx);
net.InferArgsMap(ctx, &netArgs, netArgs);
auto initializer = Uniform(0.1);
for (auto &arg : netArgs) {
initializer(arg.first, &arg.second);
}
Optimizer *netOpt = OptimizerRegistry::Find("adam");
netOpt->SetParam("lr", learning_rate);
auto *netExec = net.SimpleBind(ctx, netArgs);
auto netArgNames = net.ListArguments();
for (int iter = 0; iter < max_epoch; ++iter) {
// Compute gradients
netExec->Forward(true);
netExec->Backward();
// Update parameters
for (size_t i = 0; i < netArgNames.size(); ++i) {
if (netArgNames[i] == "X" || netArgNames[i] == "label") continue;
netOpt->Update(i, netExec->arg_arrays[i], netExec->grad_arrays[i]);
}
std::cout << "..." << std::endl;
}
netExec->Forward(false);
//(1) FIRST PASS THRU NETWORK
std::cout << "output: " << netExec->outputs[0] << std::endl;
netArgs["X"] = NDArray({1,2,10}, Shape(1, 3), ctx);
netArgs["label"] = NDArray({1000},Shape(1), ctx);
//net.InferArgsMap(ctx, &netArgs, netArgs);
//netExec = net.SimpleBind(ctx, netArgs);
netExec->Forward(false);
//(2) WHY THIS IS NOT SAME?
std::cout << "output after reshape: " << netExec->outputs[0] << std::endl;
}
(1): [0.59, 0.96, 0.47] -> first pass with batch length 3
(2): [34] -> should be 0.59