thanks for reply. following are codes mainly based on gluoncv.fcn samples
import numpy as np
import mxnet as mx
from mxnet import gluon, autograd
import random
from datetime import datetime
import gluoncv
from mxnet.gluon.data.vision import transforms
from gluoncv.utils.viz import get_color_pallete, DeNormalize
import os,sys,cv2
from gluoncv.utils.parallel import *
##############################################################################
# Fully Convolutional Network
# ---------------------------
#
# .. image:: https://cdn-images-1.medium.com/max/800/1*wRkj6lsQ5ckExB5BoYkrZg.png
# :width: 70%
# :align: center
#
# (figure credit to `Long et al. <https://arxiv.org/pdf/1411.4038.pdf>`_ )
#
# State-of-the-art approaches of semantic segmentation are typically based on
# Fully Convolutional Network (FCN) [Long15]_.
# The key idea of a fully convolutional network is that it is "fully convolutional",
# which means it does have any fully connected layers. Therefore, the network can
# accept arbitrary input size and make dense per-pixel predictions.
# Base/Encoder network is typically pre-trained on ImageNet, because the features
# learned from diverse set of images contain rich contextual information, which
# can be beneficial for semantic segmentation.
#
#
##############################################################################
# Model Dilation
# --------------
#
# The adaption of base network pre-trained on ImageNet leads to loss spatial resolution,
# because these networks are originally designed for classification task.
# Following standard implementation in recent works of semantic segmentation,
# we apply dilation strategy to the
# stage 3 and stage 4 of the pre-trained networks, which produces stride of 8
# featuremaps (models are provided in
# :class:`gluoncv.model_zoo.ResNetV1b`).
# Visualization of dilated/atrous convoution
# (figure credit to `conv_arithmetic <https://github.com/vdumoulin/conv_arithmetic>`_ ):
#
# .. image:: https://raw.githubusercontent.com/vdumoulin/conv_arithmetic/master/gif/dilation.gif
# :width: 40%
# :align: center
#
# Loading a dilated ResNet50 is simply:
#
class MYDataset(mx.gluon.data.Dataset):
def __init__(self,dataset_folder = "sdk/"):
super(MYDataset,self).__init__()
self.data_list_ = []
with open(os.path.join(dataset_folder,'all.txt'),'rb') as f:
for line in f:
line = line.strip()
if line == "":
continue
image_path,label_path = line.split(' ')
self.data_list_.append((image_path,label_path))
return
def __len__(self):
return len(self.data_list_)
def __getitem__(self,idx):
image = cv2.imread(self.data_list_[idx][0],1)
label = cv2.imread(self.data_list_[idx][1],0)
image = image[0:480,0:480,:]
label = label[0:480,0:480]
image = np.float32(image) / 255.0
label = np.float32(label) / 50.0
image = np.transpose(image,(2,0,1))
return (image,label)
class PixelAcc:
def __init__(self,num_classes):
self.data = {}
for k in range(num_classes):
self.data[k] = [0,0]
return
def update(self,preds, labels):
if isinstance(preds,list) or isinstance(preds,tuple):
preds = preds[0][0]
if isinstance(preds,mx.nd.NDArray):
preds = preds.as_in_context(mx.cpu()).asnumpy()
if isinstance(labels, mx.nd.NDArray):
labels = labels.as_in_context(mx.cpu()).asnumpy()
pred_label = np.argmax(preds,axis=1)
for k in self.data.keys():
total = np.sum(labels == k)
hit = np.sum( (labels == pred_label) * (labels == k) )
self.data[k][0] += total
self.data[k][1] += hit
def reset(self):
for k in self.data.keys():
self.data[k] = [0,0]
return
def get(self):
infos = []
for k in self.data.keys():
infos.append('{}'.format(float(self.data[k][1]) / (self.data[k][0] + 1) ))
return ' '.join(infos)
def load_model(num_classes,ctx,**kwargs):
backbone = "resnet50"
model = gluoncv.model_zoo.FCN(num_classes, backbone=backbone, pretrained_base=True,
ctx=ctx, **kwargs)
model.load_parameters("top.params")
return model
class mIOU:
def __init__(self,num_classes):
self.data = {}
for k in range(num_classes):
self.data[k] = [0,0]
return
def reset(self):
for k in self.data.keys():
self.data[k] = [0,0]
return
def update(self,preds,labels):
if isinstance(preds, list) or isinstance(preds, tuple):
preds = preds[0][0]
if isinstance(preds, mx.nd.NDArray):
preds = preds.as_in_context(mx.cpu()).asnumpy()
if isinstance(labels, mx.nd.NDArray):
labels = labels.as_in_context(mx.cpu()).asnumpy()
pred_label = np.argmax(preds, axis=1)
for k in self.data.keys():
t0 = np.sum(labels == k)
t1 = np.sum(pred_label == k)
t2 = np.sum((labels == pred_label) * (labels == k))
self.data[k][0] += t0 + t1 - t2
self.data[k][1] += t2
def get(self):
infos = []
for k in self.data.keys():
infos.append('{}'.format(float(self.data[k][1]) / (self.data[k][0] + 1) ))
return ' '.join(infos)
def run(num_classes = 20, input_size = (512,640)):
#pretrained_net = gluoncv.model_zoo.resnet50_v1b(pretrained=True)
##############################################################################
# FCN Model
# ---------
#
# We build a fully convolutional "head" on top of the base network,
# the FCNHead is defined as::
#
# class _FCNHead(HybridBlock):
# def __init__(self, in_channels, channels, norm_layer, **kwargs):
# super(_FCNHead, self).__init__()
# with self.name_scope():
# self.block = nn.HybridSequential()
# inter_channels = in_channels // 4
# with self.block.name_scope():
# self.block.add(nn.Conv2D(in_channels=in_channels, channels=inter_channels,
# kernel_size=3, padding=1))
# self.block.add(norm_layer(in_channels=inter_channels))
# self.block.add(nn.Activation('relu'))
# self.block.add(nn.Dropout(0.1))
# self.block.add(nn.Conv2D(in_channels=inter_channels, channels=channels,
# kernel_size=1))
#
# def hybrid_forward(self, F, x):
# return self.block(x)
#
# FCN model is provided in :class:`gluoncv.model_zoo.FCN`. To get
# FCN model using ResNet50 base network for Pascal VOC dataset:
#model = gluoncv.model_zoo.get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False) #output is 480x480 !!!
model = load_model(num_classes,mx.gpu())
print(model)
##############################################################################
# We provide semantic segmentation datasets in :class:`gluoncv.data`.
# For example, we can easily get the Pascal VOC 2012 dataset:
#trainset = gluoncv.data.VOCSegmentation(split='train', transform=input_transform,root = 'E:/dataset/VOCdevkit/')
trainset = MYDataset()
print('Training images:', len(trainset))
# set batch_size = 2 for toy example
batch_size = 1
# Create Training Loader
train_data = gluon.data.DataLoader(
trainset, batch_size, shuffle=True, last_batch='rollover')
# num_workers=batch_size) multiprocess causes problem under windows
random.seed(datetime.now())
##############################################################################
# - Dataparallel for multi-gpu training, using cpu for demo only
ctx_list = [mx.gpu(0)]
model = DataParallelModel(model, ctx_list)
# The training loop
# -----------------
#
pixel_acc = PixelAcc(num_classes)
miou = mIOU(num_classes)
for epoch in range(1):
pixel_acc.reset()
miou.reset()
for batch in train_data:
data,target = batch
with autograd.record(True):
#outputs = model.module.demo(data.as_in_context(mx.gpu()))
outputs = model(data)
pixel_acc.update(preds=outputs,labels=target)
miou.update(preds=outputs,labels=target)
mx.nd.waitall()
#print pixel_acc.get()
print miou.get()
print 'finished!'
if __name__=="__main__":
run()