Different predictions with pre-trained resnet on same picture?


#1

Hi,
I’m using a pre-trained gluon resnet to do some inference. Predictions are different depending on the method I use to read images (mxnet or cv2). Is this expected? See below my pipeline. My input picture is this one https://pixabay.com/en/turtle-tortoise-swim-sea-turtle-863336/

import cv2
import mxnet as mx
from mxnet import image, nd
from mxnet.gluon.utils import download
from mxnet.gluon.model_zoo import vision as models
import numpy as np

net = models.resnet50_v2(pretrained=True)

def transform(data):
    data = image.resize_short(data, 256)
    data, _ = image.center_crop(data, (224,224))
    data = data.transpose((2,0,1)).expand_dims(axis=0)
    rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1,3,1,1))
    rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1,3,1,1))
    return (data.astype('float32') / 255 - rgb_mean) / rgb_std


def top5cat(pic):
    """getting a classification text summary"""
    prob = net(transform(pic)).softmax()
    idx = prob.topk(k=5)[0]
    for i in idx:
        i = int(i.asscalar())
        print('With prob = %.5f, it contains %s' % (
            prob[0,i].asscalar(), text_labels[i]))


top5cat(image.imread(local + pic))

#returns this:
#    With prob = 0.86020, it contains loggerhead, loggerhead turtle, Caretta caretta
#    With prob = 0.13905, it contains leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
#    With prob = 0.00057, it contains terrapin
#    With prob = 0.00007, it contains mud turtle
#    With prob = 0.00003, it contains starfish, sea star


top5cat(mx.nd.array(cv2.imread(local + pic)))

#returns this:
#    With prob = 0.61446, it contains leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
#    With prob = 0.28488, it contains loggerhead, loggerhead turtle, Caretta caretta
#    With prob = 0.05506, it contains terrapin
#    With prob = 0.01007, it contains starfish, sea star
#    With prob = 0.00838, it contains mud turtle

#2

@olivcruche, cv2.imread reads the image and returns it as BGR (Blue Green Red), and mx.image.imread returns it as RGB (Red Green Blue), hence the difference.

This should give you the same values as mx.image.imread:

top5cat(mx.nd.array(cv2.imread(local + pic))[:,:,[2,1,0]].astype('uint8'))