Training with auxialiary output layer: initializer required for another label variable

Hello, I want to test a network with auxillary classifier (example: Auxillary classifier in the Inception network ).

I am trying to create a simple net with auxillary regression. Here is the code that works:

import mxnet as mx
import numpy as np
import logging
logging.basicConfig( level=logging.INFO, handlers=[logging.StreamHandler()], )

data_name,label_name,aux_name = 'data','target','aux'
def convert_dataset_to_mx( dataset, batch_size=256 ):
    mx_iters = {}
    for set_part in ('train', 'valid', 'test'):
        data = {data_name: dataset[set_part+'_data']}
        label = {
            label_name:         dataset[set_part+'_label'],
            # aux_name:   np.copy(dataset[set_part+'_label'])
        }
        mx_iters[set_part] = mx.io.NDArrayIter(data, label, batch_size, shuffle=True)
    return mx_iters
def make_multiple_out_network():
    input_ = mx.sym.var(data_name)
    main_branch = []
    main_branch.append(input_)
    main_branch.append( mx.sym.Convolution( main_branch[-1], kernel=(1,10), num_filter=2) )
    main_branch.append( mx.sym.LeakyReLU( main_branch[-1], act_type='elu') )

    # aux_branch = [main_branch[-1]]
    # aux_branch.append( mx.sym.FullyConnected( aux_branch[-1], num_hidden=1) )
    # aux = mx.sym.var(aux_name)
    # aux_out_ = mx.sym.LinearRegressionOutput( aux_branch[-1], label=aux)

    main_branch.append( mx.sym.FullyConnected( main_branch[-1], num_hidden=10) )
    main_branch.append( mx.sym.LeakyReLU( main_branch[-1], act_type='elu') )
    main_branch.append( mx.sym.FullyConnected( main_branch[-1], num_hidden=1) )
    target = mx.sym.var(label_name)
    out_ = mx.sym.LinearRegressionOutput( main_branch[-1], target)

    net_with_help = mx.sym.Group([
        out_,
        # aux_out_,
    ])

    ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
    net = mx.mod.Module(symbol=net_with_help, data_names=['data'], label_names=[label_name], context=ctx)

    return net
def train_net(net, dataset):
    train_iter,valid_iter,test_iter = dataset['train'],dataset['valid'],dataset['test']
    net.fit(train_iter, eval_data=valid_iter,
        optimizer='adam',
        optimizer_params={'learning_rate': 5e-5,'wd': 1e-5,},
        eval_metric='rmse',
        num_epoch=32,
        initializer=mx.initializer.Normal(),
    )
    test_metric = mx.metric.RMSE()
    net.score(test_iter, test_metric)
    print(test_metric)

def main():
    np.random.seed(1)
    mx.random.seed(1)
    split_data_set = {
        "train_data":  np.ones((100000,1,1,10)),
        "valid_data":  np.ones(( 10000,1,1,10)),
        "test_data":   np.ones((  1000,1,1,10)),
        "train_label": np.ones((100000,)),
        "valid_label": np.ones(( 10000,)),
        "test_label":  np.ones((  1000,)),
    }
    dataset = convert_dataset_to_mx( split_data_set )
    net = make_multiple_out_network()
    train_net(net, dataset)

if __name__ == '__main__':
    main()

The commented lines are to create additional label and neural network branch for auxillary regression. When I uncomment these lines terminal explodes with the following error:

Traceback (most recent call last):
File ".\multiple_out_QUESTION.py", line 73, in <module>
    main()
    train_net(net, dataset)
File ".\multiple_out_QUESTION.py", line 51, in train_net
    initializer=mx.initializer.Normal(),
File "C:\Program Files\Python36\lib\site-packages\mxnet\module\base_module.py", line 502, in fit
    allow_missing=allow_missing, force_init=force_init)
File "C:\Program Files\Python36\lib\site-packages\mxnet\module\module.py", line 309, in init_params
    _impl(desc, arr, arg_params)
File "C:\Program Files\Python36\lib\site-packages\mxnet\module\module.py", line 304, in _impl
    initializer(name, arr)
File "C:\Program Files\Python36\lib\site-packages\mxnet\initializer.py", line 163, in __call__
    self._init_default(desc, arr)
File "C:\Program Files\Python36\lib\site-packages\mxnet\initializer.py", line 253, in _init_default
    'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
ValueError: Unknown initialization pattern for aux. Default initialization is now limited to "weight", "bias", "gamma" (1.0), and "beta" (0.0).Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern

When I change the line

aux = mx.sym.var(aux_name)

to

aux = mx.sym.var(aux_name, init=mx.init.Normal())

It works. But why there should be an initializer for a label, that has data provided when creating mx.io.NDArrayIter ? Why does initializer provided into net.fit(…initializer=…) does not have an effect?
My goal is to reduce the main regression layer (out_) error. Does the test_metric collect accuracy of both classifiers, or one of them?

If I am right, then test_accuracy is only for main label, because in module creation

net = mx.mod.Module(symbol=net_with_help, data_names=['data'], label_names=[label_name], context=ctx)

label_names contains only the main regression label name. Please correct me if I am wrong.
Regarding the first question about why init=… is needed in aux var creation I still have little understanding and help is appreciated.

I have a guess why I have to initialize label variable. Maybe it is needed for first gradient computation? However, help is still needed.