reproducible code
import mxnet as mx
from mxnet import nd, gluon, autograd, test_utils
from mxnet.gluon import nn
from mxnet.gluon.model_zoo import vision
import numpy as np
if __name__=='__main__':
net = gluon.model_zoo.vision.resnet50_v1(pretrained=True, prefix='model_')
inputs = mx.sym.var('data')
out = net(inputs)
internals = out.get_internals()
outputs = [internals['model_conv0_fwd_output'],
internals['model_stage1_activation2_output'],
internals['model_stage2_activation3_output'],
internals['model_stage3_activation5_output'],
internals['model_stage4_activation2_output'],
]
upsample2x = mx.sym.UpSampling(outputs[1], #internals['model_stage1_activation2_output'],
scale=2,
sample_type='nearest')
upsample4x = mx.sym.UpSampling(outputs[2], scale=4, sample_type='nearest')
upsample8x = mx.sym.UpSampling(outputs[3], scale=8, sample_type='nearest')
upsample16x = mx.sym.UpSampling(outputs[4], scale=16, sample_type='nearest')
stacked = mx.sym.concat(upsample2x, upsample4x, upsample8x, upsample16x, dim=1)
outputx = mx.sym.Convolution(stacked, kernel=(9,9), num_filter=64, pad=(4,4))
outputx = mx.sym.Convolution(outputx, kernel=(5,5), num_filter=32, pad=(2,2))
outputx = mx.sym.Convolution(outputx, kernel=(5,5), num_filter=1, pad=(2,2))
net3 = gluon.SymbolBlock(outputs=outputx, inputs=inputs)
net3.initialize(ctx=mx.gpu(0))
# train
lossf = gluon.loss.L2Loss()
batch_sz = 2
dataset = mx.gluon.data.dataset.ArrayDataset(
nd.random.uniform(shape=(100,3,1024,1024)),
nd.random.uniform(shape=(100,1,512,512))
)
train_data_loader = gluon.data.DataLoader(dataset, batch_sz, shuffle=True)
trainer = gluon.Trainer(net3.collect_params(), 'sgd', {'learning_rate':0.1})
cnt = 0
for data, label in train_data_loader:
data = data.as_in_context(mx.gpu(0))
label = label.as_in_context(mx.gpu(0))
with autograd.record():
output = net3(data)
loss = lossf(output, label)
loss.backward()
#loss = nd.log(loss).asscalar()
print(loss.asnumpy())
error
Traceback (most recent call last):
File "malloc-fail.py", line 55, in <module>
print(loss.asnumpy())
File "/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 1980, in asnumpy
ctypes.c_size_t(data.size)))
File “/lib/python3.7/site-packages/mxnet/base.py", line 252, in check_call
raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [09:05:43] src/storage/./pooled_storage_manager.h:143: cudaMalloc failed: out of memory
when I change all contexts to mx.cpu()
, I can see below error(Failed to allocate CPU Memory
). Almost same with the above case.
Traceback (most recent call last):
File "malloc-fail.py", line 56, in <module>
print(loss.as_in_context(mx.cpu(0)))
File "/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 189, in __repr__
return '\n%s\n<%s %s @%s>' % (str(self.asnumpy()),
File "/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 1980, in asnumpy
ctypes.c_size_t(data.size)))
File "/lib/python3.7/site-packages/mxnet/base.py", line 252, in check_call
raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [10:53:55] src/storage/./cpu_device_storage.h:73: Failed to allocate CPU Memory
I think my batch size is small enough and GPU have much memory(32G)
if print(loss.asnumpy())
is removed, this code works well. But I cannot be sure this works really well because I can’t check the loss.
How can I see the loss? And what causes this error?