Error in autograd: multiple losses for multi-label classifier

Hi,
I am struggling through the implementation of a multi-label multi-class image classifier that finetunes densenet169. It shall predict 6 sparse labels, each label being drawn from a different class space with different size of that space.
Following the thread custom multi-label for pretrained models I designed my net with 6 output units as:

class BigBangNet(gluon.HybridBlock):
def __init__(self, p, c, o , f, g, s):
    super(BigBangNet, self).__init__()
    with self.name_scope():

        self.feature = None

        self.phylum_out = gluon.nn.Dense(p)
        self.class_out   = gluon.nn.Dense(c)
        self.order_out = gluon.nn.Dense(o)
        self.family_out = gluon.nn.Dense(f)
        self.genus_out = gluon.nn.Dense(g)
        self.species_out = gluon.nn.Dense(s)

def hybrid_forward(self, F, x):

    featureX = self.feature(x)

    out1 = self.phylum_out(featureX)
    out2 = self.class_out(featureX)
    out3 = self.order_out(featureX)
    out4 = self.family_out(featureX)
    out5 = self.genus_out(featureX)
    out6 = self.species_out(featureX)

    return (out1, out2, out3, out4, out5, out6)

It is than initialized as:

pretrained_net = get_model(model_name, pretrained=True)
finetune_net = BigBangNet(p=5,
                          c=11,
                          o=27,
                          f=46,
                          g=68,
                          s=166)
finetune_net.collect_params().initialize(init.Xavier(), ctx=ctx)
finetune_net.collect_params().setattr('lr_mult', 10)
finetune_net.feature = pretrained_net.features
finetune_net.hybridize()
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

Having 6 out units I got inspired in model-with-multiple-inputs-and-outputs to implement 6 losses which are computed in the training method:

for batch in train_iter:
    data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0, even_split=False)
    label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0, even_split=False)
    l0 = nd.slice_axis(label[0], axis=1, begin=0, end=1)
    l1 = nd.slice_axis(label[0], axis=1, begin=1, end=2)
    l2 = nd.slice_axis(label[0], axis=1, begin=2, end=3)
    l3 = nd.slice_axis(label[0], axis=1, begin=3, end=4)
    l4 = nd.slice_axis(label[0], axis=1, begin=4, end=5)
    l5 = nd.slice_axis(label[0], axis=1, begin=5, end=6)
    with ag.record():
        outputs = [net(X) for X in data]
        loss0=[]
        loss1=[]
        loss2=[]
        loss3=[]
        loss4=[]
        loss5=[]
        for yhat, y in zip(outputs[0][0], l0):
            loss0 = [*loss0, loss_fn(yhat, y)]
        for yhat, y in zip(outputs[0][1], l1):
            loss1 = [*loss1, loss_fn(yhat, y)]
        for yhat, y in zip(outputs[0][2], l2):
            loss2 = [*loss2, loss_fn(yhat, y)]
        for yhat, y in zip(outputs[0][3], l3):
            loss3 = [*loss3, loss_fn(yhat, y)]
        for yhat, y in zip(outputs[0][4], l4):
            loss4 = [*loss4, loss_fn(yhat, y)]
        for yhat, y in zip(outputs[0][5], l5):
            loss5 = [*loss5, loss_fn(yhat, y)]
        loss = loss0+loss1+loss2+loss3+loss4+loss5
        for l in loss:
            l.backward()
    trainer.step(batch_size)

However calling l.backward I get the following error:

  File "/usr/local/lib/python3.6/dist-packages/mxnet/ndarray/ndarray.py", line 2216, in backward
    ctypes.c_void_p(0)))
  File "/usr/local/lib/python3.6/dist-packages/mxnet/base.py", line 253, in check_call
    raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [16:11:54] src/imperative/imperative.cc:352: Check failed: xs.size() > 0 (0 vs. 0) : There are no inputs in computation graph that require gradients.
Stack trace:
  [bt] (0) /usr/local/lib/python3.6/dist-packages/mxnet/libmxnet.so(+0x25b3db) [0x7fce6f5bd3db]
  [bt] (1) /usr/local/lib/python3.6/dist-packages/mxnet/libmxnet.so(mxnet::Imperative::Backward(std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, bool, bool, bool)+0x44a6) [0x7fce71827876]
  [bt] (2) /usr/local/lib/python3.6/dist-packages/mxnet/libmxnet.so(MXAutogradBackwardEx+0x573) [0x7fce71727b83]
  [bt] (3) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7fce841a2dae]
  [bt] (4) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x22f) [0x7fce841a271f]
  [bt] (5) /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(_ctypes_callproc+0x2b4) [0x7fce843b65c4]
  [bt] (6) /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(+0x11c33) [0x7fce843b6c33]
  [bt] (7) /usr/bin/python3.6(_PyObject_FastCallKeywords+0x19c) [0x5a9cbc]
  [bt] (8) /usr/bin/python3.6() [0x50a5c3]

It would be awesome if you could point in any direction to debug it or finding/understanding that error.
Thank you!
stillsen

In my implementation loss is expected to be a list of NDArray losses. I overlooked that criteria and correcting for that error fixes the problem.