Can't save params in Pixel2Pixel tutorial


#1

In the gluon tutorial of pix2pix:
http://gluon.mxnet.io/chapter14_generative-adversarial-networks/pixel2pixel.html

the U-net defined in such way that we can’t save params using net.save_params(…)

I get the following error:
ValueError: Prefix dsds is to be striped before saving, but Parameter unetcenterblock0_encoderblock0_conv0_weight does not start with dsds. If you are using Block.save_params, This may be due to your Block shares parameters from other Blocks or you forgot to use with name_scope() during init. Consider switching to Block.collect_params.save and Block.collect_params.load instead.

A minimal reproducible example:

def param_init(param, ctx):
    if param.name.find('conv') != -1:
        if param.name.find('weight') != -1:
            param.initialize(init=mx.init.Normal(0.02), ctx=ctx)
        else:
            param.initialize(init=mx.init.Zero(), ctx=ctx)
    elif param.name.find('batchnorm') != -1:
        param.initialize(init=mx.init.Zero(), ctx=ctx)
        # Initialize gamma from normal distribution with mean 1 and std 0.02
        if param.name.find('gamma') != -1:
            param.set_data(nd.random_normal(1, 0.02, param.data().shape))

def network_init(net, ctx):
    for param in net.collect_params().values():
        param_init(param, ctx)
        
class EncoderBlock(nn.HybridBlock):
    def __init__(self, out_channels, in_channels, is_center_block=False):
        super(EncoderBlock, self).__init__()

        with self.name_scope():
            en_conv = nn.Conv2D(channels=out_channels, kernel_size=4, strides=2, padding=1,
                             in_channels=in_channels, use_bias=False)
            en_relu = nn.LeakyReLU(alpha=0.2)
    
            blocks_list = [en_conv, en_relu]
            if not is_center_block:
                en_norm = nn.BatchNorm(momentum=0.1, in_channels=out_channels)
                blocks_list += [en_norm]
            
            self.model = nn.HybridSequential()
            with self.model.name_scope():
                for block in blocks_list:
                    self.model.add(block)

    def hybrid_forward(self, F, x):
            return self.model(x)

class DecoderBlock(nn.HybridBlock):
    def __init__(self, out_channels, in_channels):
        super(DecoderBlock, self).__init__()

        with self.name_scope():
            de_relu = nn.Activation(activation='relu')
            de_conv = nn.Conv2DTranspose(channels=out_channels, kernel_size=4, strides=2, padding=1,
                          in_channels=in_channels, use_bias=False)
            de_norm = nn.BatchNorm(momentum=0.1, in_channels=out_channels)

            blocks_list = [de_relu, de_conv, de_norm]
            
            self.model = nn.HybridSequential()
            with self.model.name_scope():
                for block in blocks_list:
                    self.model.add(block)

    def hybrid_forward(self, F, x):
            return self.model(x)

class UnetCenterBlock(nn.HybridBlock):
    def __init__(self, inner_channels, outer_channels):
        super(UnetCenterBlock, self).__init__()

        with self.name_scope():
            encoder = EncoderBlock(in_channels=outer_channels, out_channels=inner_channels, is_center_block=True)
            decoder = DecoderBlock(in_channels=inner_channels, out_channels=outer_channels)
            
            blocks_list = [encoder, decoder]
            
            self.model = nn.HybridSequential()
            with self.model.name_scope():
                for block in blocks_list:
                    self.model.add(block)

    def hybrid_forward(self, F, x):
            return F.concat(self.model(x), x, dim=1)
        
class UnetWrapBlock(nn.HybridBlock):
    def __init__(self, inner_channels, outer_channels, inner_block=None, use_dropout=False, concat_input_and_output=True):
        super(UnetWrapBlock, self).__init__()
        
        self.concat_input_and_output = concat_input_and_output
        with self.name_scope():
            encoder = EncoderBlock(in_channels=outer_channels, out_channels=inner_channels)
            decoder = DecoderBlock(in_channels=inner_channels * 2, out_channels=outer_channels)
            
            blocks_list = [encoder] + [inner_block] + [decoder]
                
            if use_dropout:
                blocks_list += [nn.Dropout(rate=0.5)]
    
            self.model = nn.HybridSequential()
            with self.model.name_scope():
                for block in blocks_list:
                    self.model.add(block)

    def hybrid_forward(self, F, x):
        if self.concat_input_and_output:
            return F.concat(self.model(x), x, dim=1)
        else:
            return self.model(x)


net = UnetCenterBlock(3, 3)
net = UnetWrapBlock(3, 3, inner_block=net, concat_input_and_output=False)


ctx = mx.gpu()

param_filename = "unet.params"

network_init(net, ctx)
net.save_params(param_filename)

#2

This is because the inner_block is created outside of UnetWrapBlock’s name_scope.
One solution is to manually set the prefixes:

net = UnetCenterBlock(3, 3, prefix='wrap_inner_')
net = UnetWrapBlock(3, 3, inner_block=net, concat_input_and_output=False, prefix='wrap_')

#3

Thanks! it works, now I can save and load one network.

I have a problem now to load two of these networks in the same session:

error message:

AssertionError: Cannot update self with other because they have different Parameters with the same name wrap0_encoderblock0_conv0_weight

minimal reproducible example:

net = UnetCenterBlock(3, 3, prefix="wrap0_inner_")
net = UnetWrapBlock(3, 3, inner_block=net, concat_input_and_output=False, prefix="wrap0_")

net2 = UnetCenterBlock(3, 3, prefix="wrap0_inner_")
net2 = UnetWrapBlock(3, 3, inner_block=net, concat_input_and_output=False, prefix="wrap0_")

param_filename = "unet.params"

ctx = [mx.gpu(0)]
network_init(net, ctx)
net.save_params(param_filename)
net.load_params(param_filename, ctx=ctx)
net2.load_params(param_filename, ctx=ctx)

#4

I think it is because of a small typo - you pass net as a inner_block of net2. I think you wanted to write:

net2 = UnetCenterBlock(3, 3, prefix=“wrap0_inner_”)
net2 = UnetWrapBlock(3, 3, inner_block=net2, concat_input_and_output=False, prefix=“wrap0_”)


#5

Thanks Sergey! you were right.

No everything works, but is there any more general way to add the prefixes? In this implementation I need to go backward from my last block and add prefixes, for example:

class Unet(nn.HybridBlock):
    def __init__(self, in_channels, use_dropout=True, num_of_filter=512):
        prefix="u0_"
        super(Unet, self).__init__(prefix=prefix)
        
        unet = UnetCenterBlock(inner_channels=num_of_filter, outer_channels=num_of_filter, prefix="u0_u1_u2_u3_u4_u5_u6_inner_")
        unet = UnetWrapBlock(inner_channels=num_of_filter, outer_channels=num_of_filter, inner_block=unet, prefix="u0_u1_u2_u3_u4_u5_u6_") 
        unet = UnetWrapBlock(inner_channels=num_of_filter, outer_channels=num_of_filter, inner_block=unet, prefix="u0_u1_u2_u3_u4_u5_") 
        unet = UnetWrapBlock(inner_channels=num_of_filter, outer_channels=num_of_filter, inner_block=unet, prefix="u0_u1_u2_u3_u4_")
        unet = UnetWrapBlock(inner_channels=num_of_filter, outer_channels=num_of_filter / 2, inner_block=unet, prefix="u0_u1_u2_u3_")
        unet = UnetWrapBlock(inner_channels=num_of_filter / 2, outer_channels=num_of_filter / 4, inner_block=unet, prefix="u0_u1_u2_")
        unet = UnetWrapBlock(inner_channels=num_of_filter / 4, outer_channels=num_of_filter/ 8, inner_block=unet, prefix="u0_u1_")
        unet = UnetWrapBlock(inner_channels=num_of_filter / 8, outer_channels=in_channels, inner_block=unet, concat_input_and_output=False, prefix="u0_")
        
        last_conv = nn.Conv2D(channels=1, kernel_size=1, strides=1, padding=0,
                             in_channels=in_channels, use_bias=False, prefix="u0_conv_")

        self.model = nn.HybridSequential()
        with self.model.name_scope():
                self.model.add(unet)
                self.model.add(last_conv)

    def hybrid_forward(self, F, x):
        return self.model(x)

#6

Hi Oron,

I don’t know of any other way to deal with it using mxnet functions. The only way you can stop dealing with prefixes, if you create blocks inside of appropriate name_scope().

Thus, I would recommend to change the way you composite your object:

Option 1. Since your blocks are very similar, instead of passing classes, pass some sort of a dictionary with a configuration and construct the objects on the fly. Something like:

config = { layers = [
{
   inner_channels_delimiter = num_of_filter / 2,
   outer_channels: num_of_filters
},
{
   inner_channels_delimiter = num_of_filter / 4,
   outer_channels: num_of_filters
}] }

Option 2. If your blocks would be different in future, you can always pass lambda functions and call them when you need an object to be constructed. Something like:

block = lambda: UnetCenterBlock(inner_channels=num_of_filter, outer_channels=num_of_filter)
block()