# DNN Benchmarks

This notebook defines and trains deep neural networks using Keras. The goal is to benchmark the performance in both CPU and GPU environments and across each of the available _backends_.

In [1]:
# standard libraries
import os
import platform
import psutil
from typing import Tuple

In [2]:
# machine learning libraries
import keras

Using TensorFlow backend.


In [10]:
import tensorflow as tf
from tensorflow.python.client import device_lib

for device in device_lib.list_local_devices():
    print(device.physical_device_desc)


device: XLA_CPU device
device: XLA_GPU device
device: 0, name: Tesla P100-PCIE-12GB, pci bus id: 0000:3b:00.0, compute capability: 6.0


---

## Platform Information

In [4]:
(OS, OSVER, OSTYPE), ARCH = platform.linux_distribution(), platform.processor()
PYIMP, PYVER = platform.python_implementation(), platform.python_version()
CPUS, MEMTOT = psutil.cpu_count(), round(psutil.virtual_memory().total / 1024**3)

In [5]:
HOSTNAME, = !hostname
CLUSTER = HOSTNAME.split('-')[0]

In [6]:
print(f"""\
\033[34m System:\033[0m {CLUSTER.capitalize()} ({OS} {OSVER} {OSTYPE} {ARCH})
         {CPUS} cores, {MEMTOT}GB memory

\033[34m Python:\033[0m        {PYIMP} {PYVER}
\033[34m    keras:\033[0m      {keras.__version__}
\033[34m    tensorflow:\033[0m {keras.backend.tf.__version__}
""")

[34m System:[0m Gilbreth (CentOS Linux 7.6.1810 Core x86_64)
         16 cores, 187GB memory

[34m Python:[0m        CPython 3.6.4
[34m    keras:[0m      2.2.4
[34m    tensorflow:[0m 1.12.0



---

## Models

### MNIST: MLP

In [7]:
def mnist_mlp(batch_size: int=128, epochs: int=10, verbose: bool=True) -> keras.models.Model: 
    """MNIST model using a simple MLP network."""
    
    # libraries
    from keras.datasets import mnist
    from keras.models import Sequential
    from keras.layers import Dense, Dropout
    from keras.optimizers import RMSprop

    # 0-9
    num_classes = 10

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(60000, 784).astype('float32')/255
    x_test  = x_test.reshape(10000, 784).astype('float32')/255
    


    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test  = keras.utils.to_categorical(y_test, num_classes)

    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,)))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    
    if verbose:
        print(x_train.shape[0], 'train samples')
        print(x_test.shape[0], 'test samples')
        model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))
    
    score = model.evaluate(x_test, y_test, verbose=0)
    
    if verbose:
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
    
    return model

In [10]:
model = mnist_mlp(epochs=4)

60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_3 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_4 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Test lo

---

### MNIST: CNN

In [8]:
def mnist_cnn(batch_size: int=128, epochs: int=10, verbose: bool=True) -> keras.models.Model:
    """MNIST model using a 2D convolutional network."""
    
    # libraries
    from keras.datasets import mnist
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Flatten
    from keras.layers import Conv2D, MaxPooling2D
    from keras import backend as K

    # 0-9
    num_classes = 10

    # input image dimensions
    img_rows, img_cols = 28, 28

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    if verbose:
        print('x_train shape:', x_train.shape)
        print(x_train.shape[0], 'train samples')
        print(x_test.shape[0], 'test samples')
        model.summary()

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    
    score = model.evaluate(x_test, y_test, verbose=0)
    
    if verbose:
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
    
    return model

In [11]:
model = mnist_cnn(epochs=4)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dro

---

### ResNet50

In [12]:
def resnet50(train_N: int=10, test_N: int=3, gpus: int=0,
             batch_size: int=1, epochs: int=1, verbose: bool=True) -> keras.models.Model:
    """
    ResNet50 - Deep convolutional neural network.
    
    This uses arbitrary static (noise) as inputs to ResNet50 for the purpose of running
    the benchmarks.
    """

    from typing import Tuple

    import numpy as np

    from keras.layers import Dense
    from keras.models import Model
    from keras.optimizers import Adam
    from keras.preprocessing import image
    from keras.applications.resnet50 import ResNet50
    from keras.applications.resnet50 import preprocess_input
    from keras.applications.imagenet_utils import decode_predictions
    from keras.utils import multi_gpu_model
    import tensorflow as tf

    def gen_data(N: int) -> Tuple[np.ndarray, np.ndarray]:
        """Generates 224x224x3 inputs and 1000-class labels"""
        X = np.round(np.random.rand(N, 224, 224, 3) * 255).astype('float32') / 255
        y = np.round(np.random.rand(N) * 999).astype('float32')
        y = keras.utils.to_categorical(y, num_classes=1000)
        return X, y

    # synthetic data
    train_X, train_y = gen_data(train_N)
    test_X, test_y = gen_data(test_N)

    with tf.device('/cpu:0'):
        model = ResNet50()
    
    if gpus < 2:
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adadelta(),
                      metrics=['accuracy'])
        model.fit(train_X, train_y,
                  batch_size=batch_size, epochs=epochs, verbose=int(verbose),
                  validation_data=(test_X, test_y))
        score = model.evaluate(test_X, test_y, verbose=0)
        
    else:
        parallel_model = multi_gpu_model(model, gpus=gpus)
        parallel_model.compile(loss=keras.losses.categorical_crossentropy,
                               optimizer=keras.optimizers.Adadelta(),
                               metrics=['accuracy'])
        parallel_model.fit(train_X, train_y,
                           batch_size=batch_size, epochs=epochs, verbose=int(verbose),
                           validation_data=(test_X, test_y))
        score = parallel_model.evaluate(test_X, test_y, verbose=0)
        
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    return model

In [15]:
model = resnet50(train_N=100, test_N=20, gpus=1, epochs=4)

Train on 100 samples, validate on 20 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Test loss: 6.897780418395996
Test accuracy: 0.0


---

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>

<br><br><br><br><br>