Training with auxialiary output layer: initializer required for another label variable

Hello, I want to test a network with auxillary classifier (example: Auxillary classifier in the Inception network ).

I am trying to create a simple net with auxillary regression. Here is the code that works:

import mxnet as mx
import numpy as np
import logging
logging.basicConfig( level=logging.INFO, handlers=[logging.StreamHandler()], )

data_name,label_name,aux_name = 'data','target','aux'
def convert_dataset_to_mx( dataset, batch_size=256 ):
    mx_iters = {}
    for set_part in ('train', 'valid', 'test'):
        data = {data_name: dataset[set_part+'_data']}
        label = {
            label_name:         dataset[set_part+'_label'],
            # aux_name:   np.copy(dataset[set_part+'_label'])
        mx_iters[set_part] =, label, batch_size, shuffle=True)
    return mx_iters
def make_multiple_out_network():
    input_ = mx.sym.var(data_name)
    main_branch = []
    main_branch.append( mx.sym.Convolution( main_branch[-1], kernel=(1,10), num_filter=2) )
    main_branch.append( mx.sym.LeakyReLU( main_branch[-1], act_type='elu') )

    # aux_branch = [main_branch[-1]]
    # aux_branch.append( mx.sym.FullyConnected( aux_branch[-1], num_hidden=1) )
    # aux = mx.sym.var(aux_name)
    # aux_out_ = mx.sym.LinearRegressionOutput( aux_branch[-1], label=aux)

    main_branch.append( mx.sym.FullyConnected( main_branch[-1], num_hidden=10) )
    main_branch.append( mx.sym.LeakyReLU( main_branch[-1], act_type='elu') )
    main_branch.append( mx.sym.FullyConnected( main_branch[-1], num_hidden=1) )
    target = mx.sym.var(label_name)
    out_ = mx.sym.LinearRegressionOutput( main_branch[-1], target)

    net_with_help = mx.sym.Group([
        # aux_out_,

    ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
    net = mx.mod.Module(symbol=net_with_help, data_names=['data'], label_names=[label_name], context=ctx)

    return net
def train_net(net, dataset):
    train_iter,valid_iter,test_iter = dataset['train'],dataset['valid'],dataset['test'], eval_data=valid_iter,
        optimizer_params={'learning_rate': 5e-5,'wd': 1e-5,},
    test_metric = mx.metric.RMSE()
    net.score(test_iter, test_metric)

def main():
    split_data_set = {
        "train_data":  np.ones((100000,1,1,10)),
        "valid_data":  np.ones(( 10000,1,1,10)),
        "test_data":   np.ones((  1000,1,1,10)),
        "train_label": np.ones((100000,)),
        "valid_label": np.ones(( 10000,)),
        "test_label":  np.ones((  1000,)),
    dataset = convert_dataset_to_mx( split_data_set )
    net = make_multiple_out_network()
    train_net(net, dataset)

if __name__ == '__main__':

The commented lines are to create additional label and neural network branch for auxillary regression. When I uncomment these lines terminal explodes with the following error:

Traceback (most recent call last):
File ".\", line 73, in <module>
    train_net(net, dataset)
File ".\", line 51, in train_net
File "C:\Program Files\Python36\lib\site-packages\mxnet\module\", line 502, in fit
    allow_missing=allow_missing, force_init=force_init)
File "C:\Program Files\Python36\lib\site-packages\mxnet\module\", line 309, in init_params
    _impl(desc, arr, arg_params)
File "C:\Program Files\Python36\lib\site-packages\mxnet\module\", line 304, in _impl
    initializer(name, arr)
File "C:\Program Files\Python36\lib\site-packages\mxnet\", line 163, in __call__
    self._init_default(desc, arr)
File "C:\Program Files\Python36\lib\site-packages\mxnet\", line 253, in _init_default
    'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
ValueError: Unknown initialization pattern for aux. Default initialization is now limited to "weight", "bias", "gamma" (1.0), and "beta" (0.0).Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern

When I change the line

aux = mx.sym.var(aux_name)


aux = mx.sym.var(aux_name, init=mx.init.Normal())

It works. But why there should be an initializer for a label, that has data provided when creating ? Why does initializer provided into…initializer=…) does not have an effect?
My goal is to reduce the main regression layer (out_) error. Does the test_metric collect accuracy of both classifiers, or one of them?

If I am right, then test_accuracy is only for main label, because in module creation

net = mx.mod.Module(symbol=net_with_help, data_names=['data'], label_names=[label_name], context=ctx)

label_names contains only the main regression label name. Please correct me if I am wrong.
Regarding the first question about why init=… is needed in aux var creation I still have little understanding and help is appreciated.

I have a guess why I have to initialize label variable. Maybe it is needed for first gradient computation? However, help is still needed.