Performance of Symbol vs. NDArray vs. PyTorch

Hi, out of curiosity I modified your code to run on gpu and tested it on a nvidia -P100, here’s the modified code (I used mx.gpu() for mxnet and *.cuda() wherever I thought was necessary for pytorch (am developing code on mxnet 99.9% of the time - so no pytorch expertise here) :

import torch
from torch import nn as ptnn
from torch.autograd import Variable
import mxnet as mx
from mxnet.gluon import nn as mxnn
from mxnet import nd, initializer
from enum import IntEnum
from time import time

use_cuda = torch.cuda.is_available()
fmt = ' {:<14} {:<15} {:<12} {:>5}'
mx_ctx = mx.gpu()

class Framework(IntEnum):
    PYTORCH = 1
    MXNET = 2


def get_mxnet_network():
    net = mxnn.HybridSequential()
    with net.name_scope():
        net.add(mxnn.Dense(256, activation="relu"))
        net.add(mxnn.Dense(128, activation="relu"))
        net.add(mxnn.Dense(2))
    net.initialize(init=initializer.Zero(),ctx = mx_ctx)
    return net


def pytorch_weights_init(m):
    if isinstance(m, ptnn.Linear):
        ptnn.init.uniform_(m.weight.data, 0, 0)
        ptnn.init.uniform_(m.bias.data, 0, 0)


def get_pytorch_network():
    net = ptnn.Sequential()
    net.add_module('dense1', ptnn.Linear(1, 256))
    net.add_module('relu1', ptnn.ReLU())
    net.add_module('dense2', ptnn.Linear(256, 128))
    net.add_module('relu2', ptnn.ReLU())
    net.add_module('dense3', ptnn.Linear(128, 2))
    net.apply(pytorch_weights_init)
    return net.cuda()


# Wait for computation to finish to make profiling more accurate
def block(framework):
    if framework == Framework.PYTORCH:
        if use_cuda:
            torch.cuda.synchronize()
    elif framework == Framework.MXNET:
        mx.nd.waitall()


def bench(net, x, framework):
    block(framework)
    start = time()
    for i in range(1000):
        y = net(x)
    block(framework)
    return time() - start


def report(framework, paradigm, precision, value=None):
    t = '%i' % (value * 1000) if value else '---'
    print(fmt.format(framework, paradigm, '%i bit' % precision, t))


# Input matrices
mx_x_32 = nd.ones((512, 1), mx_ctx)
mx_x_16 = mx_x_32.astype('float16')
pt_x_32 = Variable(torch.ones((512, 1))).cuda()
pt_x_16 = pt_x_32.half()


print()
print(' Device:', 'GPU' if use_cuda else 'CPU')
print('----------------------------------------------------')
print(fmt.format('Framework', 'Paradigm', 'Precision', 'Time'))
print('====================================================')
mx_net = get_mxnet_network()
report('MXNet', 'imperative', 32, bench(mx_net, mx_x_32, Framework.MXNET))
mx_net.cast('float16')
report('MXNet', 'imperative', 16, bench(mx_net, mx_x_16, Framework.MXNET))
mx_net.cast('float32')
mx_net.hybridize()
report('MXNet', 'symbolic', 32, bench(mx_net, mx_x_32, Framework.MXNET))
mx_net.cast('float16')
report('MXNet', 'symbolic', 16, bench(mx_net, mx_x_16, Framework.MXNET))
pt_net = get_pytorch_network()
report('PyTorch', 'imperative', 32, bench(pt_net, pt_x_32, Framework.PYTORCH))

# PyTorch half precision isn't supported on a CPU
pt_16 = bench(pt_net.half(), pt_x_16, Framework.PYTORCH) if use_cuda else None
report('PyTorch', 'imperative', 16, pt_16)

print('----------------------------------------------------')

and here’s the output:

dia021@b027:~/Projects/benchmark> python mxnet_vs_pytorch_benchmark.py 

 Device: GPU
----------------------------------------------------
 Framework      Paradigm        Precision     Time
====================================================
 MXNet          imperative      32 bit         654
 MXNet          imperative      16 bit         486
 MXNet          symbolic        32 bit         199
 MXNet          symbolic        16 bit         256
 PyTorch        imperative      32 bit         143
 PyTorch        imperative      16 bit         135
----------------------------------------------------

I cannot really understand if the benchmark is 100% reliable, or how it will evolve with a more complicated network, just reporting run output. If there is time I may post something more complicated in the future.

1 Like