I have some issue of running a diy WGAN mxnet model


#1

Hi, i am a newcomer of mxnet, without detailed survey in the mxnet project,
and i try to implement a model for inpainting task. (accord to paper ::
Generative Image Inpainting with Contextual Attention )
But the train step can not run over 28 iter step (sometimes is 24 sometimes is 26 )and the python interpreter exit with the output of following line ::

Process finished with exit code -1073741676 (0xC0000094)

in my windows system with mxnet version ‘1.3.1’
i check the train_iter without train it no errors with dataloading,
it seems some bottom project issues with performance
but i can not tackle them
Are there some problems on my construction of symbol and module ?
It is simple a model with (WGAN + CONV + ROIPooling)

or this is a system problem ?
I copy the full code and display them below,
Please give me some suggestions, Thank you !


#2
import mxnet as mx

import numpy as np
from sklearn.datasets import fetch_mldata
import logging
import cv2
from datetime import datetime

from functools import reduce
from PIL import Image
from uuid import uuid1
import gc

print(“load modules end”)

class Discriminator(object):
def init(self, ):
self.act = mx.gluon.nn.ELU()

    self.conv1 = mx.gluon.nn.Conv2D(channels = 64, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                    dilation=(1, 1), )
    self.norm1 = mx.gluon.nn.BatchNorm()

    self.conv2 = mx.gluon.nn.Conv2D(channels = 128, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                    dilation=(1, 1), )
    self.norm2 = mx.gluon.nn.BatchNorm()

    self.conv3 = mx.gluon.nn.Conv2D(channels = 256, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                    dilation=(1, 1), )
    self.norm3 = mx.gluon.nn.BatchNorm()

    self.conv4 = mx.gluon.nn.Conv2D(channels = 512, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                    dilation=(1, 1), )
    self.norm4 = mx.gluon.nn.BatchNorm()

    self.conv5 = mx.gluon.nn.Conv2D(channels = 512, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                    dilation=(1, 1), )
    self.norm5 = mx.gluon.nn.BatchNorm()

    self.conv6 = mx.gluon.nn.Conv2D(channels = 512, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                    dilation=(1, 1), )
    self.norm6 = mx.gluon.nn.BatchNorm()

    self.global_features_fc = mx.gluon.nn.Dense(units=1024)

    self.conv1_local = mx.gluon.nn.Conv2D(channels = 64, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                          dilation=(1, 1), )
    self.norm1_local = mx.gluon.nn.BatchNorm()

    self.conv2_local = mx.gluon.nn.Conv2D(channels = 128, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                          dilation=(1, 1), )
    self.norm2_local = mx.gluon.nn.BatchNorm()

    self.conv3_local = mx.gluon.nn.Conv2D(channels = 256, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                          dilation=(1, 1), )
    self.norm3_local = mx.gluon.nn.BatchNorm()

    self.conv4_local = mx.gluon.nn.Conv2D(channels = 512, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                          dilation=(1, 1), )
    self.norm4_local = mx.gluon.nn.BatchNorm()

    self.conv5_local = mx.gluon.nn.Conv2D(channels = 512, kernel_size = (5, 5), strides=(2, 2), padding=(2, 2),
                                          dilation=(1, 1), )
    self.norm5_local = mx.gluon.nn.BatchNorm()

    self.local_features_fc = mx.gluon.nn.Dense(units=1024)

    ### may add sigmoid activate, when loss construct consider
    self.total_features_fc = mx.gluon.nn.Dense(units=1, activation="sigmoid")


def calculate_final_discr_score(self, global_input, local_input):
    req_global_call_list = [
        self.conv1, self.norm1, self.act,
        self.conv2, self.norm2, self.act,
        self.conv3, self.norm3, self.act,
        self.conv4, self.norm4, self.act,
        self.conv5, self.norm5, self.act,
        self.conv6, self.norm6, self.act,
        mx.sym.Flatten, self.global_features_fc
    ]

    req_local_call_list = [
        self.conv1_local, self.norm1_local, self.act,
        self.conv2_local, self.norm2_local, self.act,
        self.conv3_local, self.norm3_local, self.act,
        self.conv4_local, self.norm4_local, self.act,
        self.conv5_local, self.norm5_local, self.act,
        mx.sym.Flatten, self.local_features_fc
    ]

    def iter_req_call_list(input, req_call_list):
        output = input
        for req_call in req_call_list:
            output = req_call(output)
        return output

    global_features = iter_req_call_list(global_input, req_call_list=req_global_call_list)
    local_features = iter_req_call_list(local_input, req_call_list=req_local_call_list)

    total_features = mx.sym.Concat(global_features, local_features, dim = -1)
    return self.total_features_fc(total_features)

#3
def produce_generator(input_and_mask,
                  fix_gamma = True):
#input_and_mask = mx.sym.Variable("input_and_mask", shape=[batch_size, channel_num + 1, height, width])

#### blocks before dilate
conv1 = mx.sym.Convolution(data=input_and_mask, kernel=(5, 5), num_filter=64, stride=(1, 1),
                           dilate=(1, 1), name="conv1", pad=(2, 2))
norm1 = mx.sym.BatchNorm(data = conv1, fix_gamma=fix_gamma, name="norm1")
act1 = mx.gluon.nn.ELU()(norm1)

print(act1.infer_shape()[1])

conv2 = mx.sym.Convolution(data=act1, kernel=(3, 3), num_filter=128, stride=(2, 2),
                           dilate=(1, 1), name="conv2", pad=(1, 1))
norm2 = mx.sym.BatchNorm(data = conv2, fix_gamma=fix_gamma, name="norm2")
act2 = mx.gluon.nn.ELU()(norm2)

print(act2.infer_shape()[1])

conv3 = mx.sym.Convolution(data=act2, kernel=(3, 3), num_filter=128, stride=(1, 1),
                           dilate=(1, 1), name="conv3", pad = (1, 1))
norm3 = mx.sym.BatchNorm(data = conv3, fix_gamma=fix_gamma, name="norm3")
act3 = mx.gluon.nn.ELU()(norm3)

print(act3.infer_shape()[1])

conv4 = mx.sym.Convolution(data=act3, kernel=(3, 3), num_filter=256, stride=(2, 2),
                           dilate=(1, 1), name="conv4", pad = (1, 1))
norm4 = mx.sym.BatchNorm(data = conv4, fix_gamma=fix_gamma, name="norm4")
act4 = mx.gluon.nn.ELU()(norm4)

print(act4.infer_shape()[1])

conv5 = mx.sym.Convolution(data=act4, kernel=(3, 3), num_filter=256, stride=(1, 1),
                           dilate=(1, 1), name="conv5", pad = (1, 1))
norm5 = mx.sym.BatchNorm(data = conv5, fix_gamma=fix_gamma, name="norm5")
act5 = mx.gluon.nn.ELU()(norm5)

print(act5.infer_shape()[1])

conv6 = mx.sym.Convolution(data=act5, kernel=(3, 3), num_filter=256, stride=(1, 1),
                           dilate=(1, 1), name="conv6", pad = (1, 1))
norm6 = mx.sym.BatchNorm(data = conv6, fix_gamma=fix_gamma, name="norm6")
act6 = mx.gluon.nn.ELU()(norm6)

print(act6.infer_shape()[1])

#### dilate

print("dilate")
dilated_conv1 = mx.sym.Convolution(data=act6, kernel=(3, 3), num_filter=256, stride=(1, 1),
                                   dilate=(2, 2), name="dilated_conv1", pad = (2, 2))
norm7 = mx.sym.BatchNorm(data = dilated_conv1, fix_gamma=fix_gamma, name="norm7")
act7 = mx.gluon.nn.ELU()(norm7)

print(act7.infer_shape()[1])

dilated_conv2 = mx.sym.Convolution(data=act7, kernel=(3, 3), num_filter=256, stride=(1, 1),
                                   dilate=(4, 4), name="dilated_conv2", pad = (4, 4))
norm8 = mx.sym.BatchNorm(data = dilated_conv2, fix_gamma=fix_gamma, name="norm8")
act8 = mx.gluon.nn.ELU()(norm8)

print(act8.infer_shape()[1])

dilated_conv3 = mx.sym.Convolution(data=act8, kernel=(3, 3), num_filter=256, stride=(1, 1),
                                   dilate=(8, 8), name="dilated_conv3", pad = (8, 8))
norm9 = mx.sym.BatchNorm(data = dilated_conv3, fix_gamma=fix_gamma, name="norm9")
act9 = mx.gluon.nn.ELU()(norm9)
print(act9.infer_shape()[1])

dilated_conv4 = mx.sym.Convolution(data=act9, kernel=(3, 3), num_filter=256, stride=(1, 1),
                                   dilate=(16, 16), name="dilated_conv4", pad = (16, 16))
norm10 = mx.sym.BatchNorm(data = dilated_conv4, fix_gamma=fix_gamma, name="norm10")
act10 = mx.gluon.nn.ELU()(norm10)
print(act10.infer_shape()[1])

print("conv :")
### conv
conv7 = mx.sym.Convolution(data=act10, kernel=(3, 3), num_filter=256, stride=(1, 1),
                           dilate=(1, 1), name="conv7", pad = (1, 1))
norm11 = mx.sym.BatchNorm(data = conv7, fix_gamma=fix_gamma, name="norm11")
act11 = mx.gluon.nn.ELU()(norm11)

print(act11.infer_shape()[1])

conv8 = mx.sym.Convolution(data=act11, kernel=(3, 3), num_filter=256, stride=(1, 1),
                           dilate=(1, 1), name="conv8", pad = (1, 1))
norm12 = mx.sym.BatchNorm(data = conv8, fix_gamma=fix_gamma, name="norm12")
act12 = mx.gluon.nn.ELU()(norm12)

print(act12.infer_shape()[1])



print("deconv")
deconv1 = mx.sym.Deconvolution(data=act12, kernel=(4, 4), num_filter=128, stride=(2, 2),
                               dilate=(1, 1), name="deconv1", pad = (1, 1))
norm13 = mx.sym.BatchNorm(data = deconv1, fix_gamma=fix_gamma, name="norm13")
act13 = mx.gluon.nn.ELU()(norm13)

print(act13.infer_shape()[1])

conv9 = mx.sym.Convolution(data=act13, kernel=(3, 3), num_filter=128, stride=(1, 1),
                           dilate=(1, 1), name="conv9", pad = (1, 1))
norm14 = mx.sym.BatchNorm(data = conv9, fix_gamma=fix_gamma, name="norm14")
act14 = mx.gluon.nn.ELU()(norm14)

print(act14.infer_shape()[1])

deconv2 = mx.sym.Deconvolution(data=act14, kernel=(4, 4), num_filter=64, stride=(2, 2),
                               dilate=(1, 1), name="deconv2", pad = (1, 1))
norm15 = mx.sym.BatchNorm(data = deconv2, fix_gamma=fix_gamma, name="norm15")
act15 = mx.gluon.nn.ELU()(norm15)

print(act15.infer_shape()[1])

conv10 = mx.sym.Convolution(data=act15, kernel=(3, 3), num_filter=32, stride=(1, 1),
                            dilate=(1, 1), name="conv10", pad = (1, 1))
norm16 = mx.sym.BatchNorm(data = conv10, fix_gamma=fix_gamma, name="norm16")
act16 = mx.gluon.nn.ELU()(norm16)

print(act16.infer_shape()[1])

conv11 = mx.sym.Convolution(data=act16, kernel=(3, 3), num_filter=3, stride=(1, 1),
                            dilate=(1, 1), name="conv11", pad = (1, 1))
norm17 = mx.sym.BatchNorm(data = conv11, fix_gamma=fix_gamma, name="norm17")
output = mx.gluon.nn.Activation("tanh")(norm17)

print(output.infer_shape()[1])

return output

#4

Welcome @svjack!

Could you please provide the full stack trace, it will help with debugging, thanks.

You can use the three backticks (```) before and after your code to make a code block to correct the formatting of your code. Also did you hit a character limit on your post meaning you had to split your code across two posts?