AML21: 01 Deep Neural Network for MNIST

Based on https://github.com/Atcold/pytorch-Deep-Learning

Data and Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy

# this 'device' will be used for training our model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
cuda:0

Load the MNIST dataset

Observe that we set shuffle=True, which means that data is randomized

In [2]:
input_size  = 28*28   # images are 28x28 pixels
output_size = 10      # there are 10 classes

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=64, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=1000, shuffle=True)
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz
Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz
Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!
/opt/conda/lib/python3.7/site-packages/torchvision/datasets/mnist.py:480: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:141.)
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
In [3]:
# show some training images
plt.figure(figsize=(16, 4))

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))

for i in range(20):
    image = image_batch[i]
    label = label_batch[i].item()
    plt.subplot(2, 10, i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    plt.imshow(image.squeeze().numpy())
    plt.axis('off')
    plt.title(label)

A 2-hidden layer Fully Connected Neural Network

Helper functions for training and testing

In [4]:
# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

accuracy_list = []
# we pass a model object to this trainer, and it trains this model for one epoch
def train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))

Defining the Fully Connected Network

In [5]:
class FC2Layer(nn.Module):
    def __init__(self, input_size, output_size):
        super(FC2Layer, self).__init__()
        self.input_size = input_size
        self.network = nn.Sequential(
            nn.Linear(input_size, 200), 
            nn.ReLU(), 
            nn.Linear(200,100),
            nn.ReLU(),
            nn.Linear(100,60), 
            nn.ReLU(), 
            nn.Linear(60, output_size), 
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.network(x)

Train the Network

In [6]:
print("Training on ", device)
model_fnn = FC2Layer(input_size, output_size)
model_fnn.to(device)
optimizer = optim.SGD(model_fnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_fnn)))

for epoch in range(0, 10):
    train(epoch, model_fnn)
    test(model_fnn)
Training on  cuda:0
Number of parameters: 183770
Train Epoch: 0 [0/60000 (0%)]	Loss: 2.306556
Train Epoch: 0 [6400/60000 (11%)]	Loss: 2.177184
Train Epoch: 0 [12800/60000 (21%)]	Loss: 1.290147
Train Epoch: 0 [19200/60000 (32%)]	Loss: 0.755577
Train Epoch: 0 [25600/60000 (43%)]	Loss: 0.440915
Train Epoch: 0 [32000/60000 (53%)]	Loss: 0.562211
Train Epoch: 0 [38400/60000 (64%)]	Loss: 0.252071
Train Epoch: 0 [44800/60000 (75%)]	Loss: 0.517577
Train Epoch: 0 [51200/60000 (85%)]	Loss: 0.182727
Train Epoch: 0 [57600/60000 (96%)]	Loss: 0.118172

Test set: Average loss: 0.2922, Accuracy: 9142/10000 (91%)

Train Epoch: 1 [0/60000 (0%)]	Loss: 0.274308
Train Epoch: 1 [6400/60000 (11%)]	Loss: 0.183375
Train Epoch: 1 [12800/60000 (21%)]	Loss: 0.226720
Train Epoch: 1 [19200/60000 (32%)]	Loss: 0.272361
Train Epoch: 1 [25600/60000 (43%)]	Loss: 0.397380
Train Epoch: 1 [32000/60000 (53%)]	Loss: 0.206675
Train Epoch: 1 [38400/60000 (64%)]	Loss: 0.181382
Train Epoch: 1 [44800/60000 (75%)]	Loss: 0.211664
Train Epoch: 1 [51200/60000 (85%)]	Loss: 0.189578
Train Epoch: 1 [57600/60000 (96%)]	Loss: 0.245390

Test set: Average loss: 0.1939, Accuracy: 9418/10000 (94%)

Train Epoch: 2 [0/60000 (0%)]	Loss: 0.109253
Train Epoch: 2 [6400/60000 (11%)]	Loss: 0.100005
Train Epoch: 2 [12800/60000 (21%)]	Loss: 0.141641
Train Epoch: 2 [19200/60000 (32%)]	Loss: 0.134167
Train Epoch: 2 [25600/60000 (43%)]	Loss: 0.176373
Train Epoch: 2 [32000/60000 (53%)]	Loss: 0.222804
Train Epoch: 2 [38400/60000 (64%)]	Loss: 0.079777
Train Epoch: 2 [44800/60000 (75%)]	Loss: 0.196300
Train Epoch: 2 [51200/60000 (85%)]	Loss: 0.167849
Train Epoch: 2 [57600/60000 (96%)]	Loss: 0.114829

Test set: Average loss: 0.1536, Accuracy: 9545/10000 (95%)

Train Epoch: 3 [0/60000 (0%)]	Loss: 0.183243
Train Epoch: 3 [6400/60000 (11%)]	Loss: 0.086997
Train Epoch: 3 [12800/60000 (21%)]	Loss: 0.105706
Train Epoch: 3 [19200/60000 (32%)]	Loss: 0.146819
Train Epoch: 3 [25600/60000 (43%)]	Loss: 0.223931
Train Epoch: 3 [32000/60000 (53%)]	Loss: 0.118980
Train Epoch: 3 [38400/60000 (64%)]	Loss: 0.081233
Train Epoch: 3 [44800/60000 (75%)]	Loss: 0.078058
Train Epoch: 3 [51200/60000 (85%)]	Loss: 0.229820
Train Epoch: 3 [57600/60000 (96%)]	Loss: 0.056344

Test set: Average loss: 0.1339, Accuracy: 9591/10000 (96%)

Train Epoch: 4 [0/60000 (0%)]	Loss: 0.040184
Train Epoch: 4 [6400/60000 (11%)]	Loss: 0.072593
Train Epoch: 4 [12800/60000 (21%)]	Loss: 0.073783
Train Epoch: 4 [19200/60000 (32%)]	Loss: 0.044806
Train Epoch: 4 [25600/60000 (43%)]	Loss: 0.154575
Train Epoch: 4 [32000/60000 (53%)]	Loss: 0.110681
Train Epoch: 4 [38400/60000 (64%)]	Loss: 0.042032
Train Epoch: 4 [44800/60000 (75%)]	Loss: 0.163611
Train Epoch: 4 [51200/60000 (85%)]	Loss: 0.089801
Train Epoch: 4 [57600/60000 (96%)]	Loss: 0.031802

Test set: Average loss: 0.1071, Accuracy: 9660/10000 (97%)

Train Epoch: 5 [0/60000 (0%)]	Loss: 0.124848
Train Epoch: 5 [6400/60000 (11%)]	Loss: 0.026435
Train Epoch: 5 [12800/60000 (21%)]	Loss: 0.030513
Train Epoch: 5 [19200/60000 (32%)]	Loss: 0.132711
Train Epoch: 5 [25600/60000 (43%)]	Loss: 0.063918
Train Epoch: 5 [32000/60000 (53%)]	Loss: 0.145042
Train Epoch: 5 [38400/60000 (64%)]	Loss: 0.038947
Train Epoch: 5 [44800/60000 (75%)]	Loss: 0.111130
Train Epoch: 5 [51200/60000 (85%)]	Loss: 0.170105
Train Epoch: 5 [57600/60000 (96%)]	Loss: 0.063851

Test set: Average loss: 0.0982, Accuracy: 9708/10000 (97%)

Train Epoch: 6 [0/60000 (0%)]	Loss: 0.048078
Train Epoch: 6 [6400/60000 (11%)]	Loss: 0.118412
Train Epoch: 6 [12800/60000 (21%)]	Loss: 0.047448
Train Epoch: 6 [19200/60000 (32%)]	Loss: 0.038427
Train Epoch: 6 [25600/60000 (43%)]	Loss: 0.040424
Train Epoch: 6 [32000/60000 (53%)]	Loss: 0.057466
Train Epoch: 6 [38400/60000 (64%)]	Loss: 0.032205
Train Epoch: 6 [44800/60000 (75%)]	Loss: 0.066095
Train Epoch: 6 [51200/60000 (85%)]	Loss: 0.059640
Train Epoch: 6 [57600/60000 (96%)]	Loss: 0.173306

Test set: Average loss: 0.0955, Accuracy: 9716/10000 (97%)

Train Epoch: 7 [0/60000 (0%)]	Loss: 0.121431
Train Epoch: 7 [6400/60000 (11%)]	Loss: 0.058270
Train Epoch: 7 [12800/60000 (21%)]	Loss: 0.042627
Train Epoch: 7 [19200/60000 (32%)]	Loss: 0.018401
Train Epoch: 7 [25600/60000 (43%)]	Loss: 0.014917
Train Epoch: 7 [32000/60000 (53%)]	Loss: 0.073664
Train Epoch: 7 [38400/60000 (64%)]	Loss: 0.039156
Train Epoch: 7 [44800/60000 (75%)]	Loss: 0.046560
Train Epoch: 7 [51200/60000 (85%)]	Loss: 0.037850
Train Epoch: 7 [57600/60000 (96%)]	Loss: 0.035984

Test set: Average loss: 0.0949, Accuracy: 9706/10000 (97%)

Train Epoch: 8 [0/60000 (0%)]	Loss: 0.033080
Train Epoch: 8 [6400/60000 (11%)]	Loss: 0.011930
Train Epoch: 8 [12800/60000 (21%)]	Loss: 0.012878
Train Epoch: 8 [19200/60000 (32%)]	Loss: 0.021536
Train Epoch: 8 [25600/60000 (43%)]	Loss: 0.071350
Train Epoch: 8 [32000/60000 (53%)]	Loss: 0.037414
Train Epoch: 8 [38400/60000 (64%)]	Loss: 0.051778
Train Epoch: 8 [44800/60000 (75%)]	Loss: 0.024658
Train Epoch: 8 [51200/60000 (85%)]	Loss: 0.032960
Train Epoch: 8 [57600/60000 (96%)]	Loss: 0.023755

Test set: Average loss: 0.0847, Accuracy: 9735/10000 (97%)

Train Epoch: 9 [0/60000 (0%)]	Loss: 0.012284
Train Epoch: 9 [6400/60000 (11%)]	Loss: 0.009137
Train Epoch: 9 [12800/60000 (21%)]	Loss: 0.055716
Train Epoch: 9 [19200/60000 (32%)]	Loss: 0.024814
Train Epoch: 9 [25600/60000 (43%)]	Loss: 0.022164
Train Epoch: 9 [32000/60000 (53%)]	Loss: 0.007180
Train Epoch: 9 [38400/60000 (64%)]	Loss: 0.007540
Train Epoch: 9 [44800/60000 (75%)]	Loss: 0.038185
Train Epoch: 9 [51200/60000 (85%)]	Loss: 0.046960
Train Epoch: 9 [57600/60000 (96%)]	Loss: 0.033422

Test set: Average loss: 0.0778, Accuracy: 9747/10000 (97%)

Show some predictions of the test network

In [7]:
def visualize_pred(img, pred_prob, real_label):
    ''' Function for viewing an image and it's predicted classes.
    '''
    #pred_prob = pred_prob.data.numpy().squeeze()

    fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
    ax1.imshow(img.numpy().squeeze())
    ax1.axis('off')
    pred_label = numpy.argmax(pred_prob)
    ax1.set_title([real_label, pred_label])
    
    ax2.barh(numpy.arange(10), pred_prob)
    ax2.set_aspect(0.1)
    ax2.set_yticks(numpy.arange(10))
    ax2.set_yticklabels(numpy.arange(10))
    ax2.set_title('Prediction Probability')
    ax2.set_xlim(0, 1.1)
    plt.tight_layout()
In [8]:
model_fnn.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))

# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model_fnn(image_batch)
for i in range(10):
    img = image_batch[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img, pred_prob, real_label)

Network with Dropout

In [9]:
class FC2LayerDropout(nn.Module):
    def __init__(self, input_size, output_size):
        super(FC2LayerDropout, self).__init__()
        self.input_size = input_size
        self.network = nn.Sequential(
            nn.Linear(input_size, 200),
            nn.Dropout(0.25),
            nn.ReLU(), 
            nn.Linear(200, 100),
            nn.Dropout(0.25),
            nn.ReLU(), 
            nn.Linear(100,60),
            nn.ReLU(),
            nn.Linear(60, output_size), 
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.network(x)
In [10]:
print("Training on ", device)
model = FC2LayerDropout(input_size, output_size)
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model)))

for epoch in range(0, 10):
    model.train() # model in training mode. Turns on dropout, batch-norm etc during training
    train(epoch, model)
    model.eval() # model in evaluation mode. Turn off dropout, batch-norm etc during validation/testing
    test(model)
Training on  cuda:0
Number of parameters: 183770
Train Epoch: 0 [0/60000 (0%)]	Loss: 2.303042
Train Epoch: 0 [6400/60000 (11%)]	Loss: 2.232655
Train Epoch: 0 [12800/60000 (21%)]	Loss: 1.750074
Train Epoch: 0 [19200/60000 (32%)]	Loss: 1.052040
Train Epoch: 0 [25600/60000 (43%)]	Loss: 0.538419
Train Epoch: 0 [32000/60000 (53%)]	Loss: 0.605327
Train Epoch: 0 [38400/60000 (64%)]	Loss: 0.599045
Train Epoch: 0 [44800/60000 (75%)]	Loss: 0.408984
Train Epoch: 0 [51200/60000 (85%)]	Loss: 0.586994
Train Epoch: 0 [57600/60000 (96%)]	Loss: 0.338343

Test set: Average loss: 0.3262, Accuracy: 9040/10000 (90%)

Train Epoch: 1 [0/60000 (0%)]	Loss: 0.416737
Train Epoch: 1 [6400/60000 (11%)]	Loss: 0.392879
Train Epoch: 1 [12800/60000 (21%)]	Loss: 0.272427
Train Epoch: 1 [19200/60000 (32%)]	Loss: 0.236031
Train Epoch: 1 [25600/60000 (43%)]	Loss: 0.406714
Train Epoch: 1 [32000/60000 (53%)]	Loss: 0.402870
Train Epoch: 1 [38400/60000 (64%)]	Loss: 0.303852
Train Epoch: 1 [44800/60000 (75%)]	Loss: 0.196804
Train Epoch: 1 [51200/60000 (85%)]	Loss: 0.449149
Train Epoch: 1 [57600/60000 (96%)]	Loss: 0.218142

Test set: Average loss: 0.2100, Accuracy: 9365/10000 (94%)

Train Epoch: 2 [0/60000 (0%)]	Loss: 0.211032
Train Epoch: 2 [6400/60000 (11%)]	Loss: 0.210309
Train Epoch: 2 [12800/60000 (21%)]	Loss: 0.338311
Train Epoch: 2 [19200/60000 (32%)]	Loss: 0.130013
Train Epoch: 2 [25600/60000 (43%)]	Loss: 0.380897
Train Epoch: 2 [32000/60000 (53%)]	Loss: 0.233913
Train Epoch: 2 [38400/60000 (64%)]	Loss: 0.429006
Train Epoch: 2 [44800/60000 (75%)]	Loss: 0.248526
Train Epoch: 2 [51200/60000 (85%)]	Loss: 0.128082
Train Epoch: 2 [57600/60000 (96%)]	Loss: 0.178169

Test set: Average loss: 0.1601, Accuracy: 9524/10000 (95%)

Train Epoch: 3 [0/60000 (0%)]	Loss: 0.194061
Train Epoch: 3 [6400/60000 (11%)]	Loss: 0.272792
Train Epoch: 3 [12800/60000 (21%)]	Loss: 0.186156
Train Epoch: 3 [19200/60000 (32%)]	Loss: 0.300879
Train Epoch: 3 [25600/60000 (43%)]	Loss: 0.061202
Train Epoch: 3 [32000/60000 (53%)]	Loss: 0.267650
Train Epoch: 3 [38400/60000 (64%)]	Loss: 0.132290
Train Epoch: 3 [44800/60000 (75%)]	Loss: 0.165557
Train Epoch: 3 [51200/60000 (85%)]	Loss: 0.169942
Train Epoch: 3 [57600/60000 (96%)]	Loss: 0.233818

Test set: Average loss: 0.1276, Accuracy: 9618/10000 (96%)

Train Epoch: 4 [0/60000 (0%)]	Loss: 0.090799
Train Epoch: 4 [6400/60000 (11%)]	Loss: 0.092053
Train Epoch: 4 [12800/60000 (21%)]	Loss: 0.124873
Train Epoch: 4 [19200/60000 (32%)]	Loss: 0.314530
Train Epoch: 4 [25600/60000 (43%)]	Loss: 0.159503
Train Epoch: 4 [32000/60000 (53%)]	Loss: 0.080379
Train Epoch: 4 [38400/60000 (64%)]	Loss: 0.155144
Train Epoch: 4 [44800/60000 (75%)]	Loss: 0.133378
Train Epoch: 4 [51200/60000 (85%)]	Loss: 0.100745
Train Epoch: 4 [57600/60000 (96%)]	Loss: 0.148948

Test set: Average loss: 0.1130, Accuracy: 9661/10000 (97%)

Train Epoch: 5 [0/60000 (0%)]	Loss: 0.127338
Train Epoch: 5 [6400/60000 (11%)]	Loss: 0.076443
Train Epoch: 5 [12800/60000 (21%)]	Loss: 0.087274
Train Epoch: 5 [19200/60000 (32%)]	Loss: 0.032357
Train Epoch: 5 [25600/60000 (43%)]	Loss: 0.060095
Train Epoch: 5 [32000/60000 (53%)]	Loss: 0.136934
Train Epoch: 5 [38400/60000 (64%)]	Loss: 0.107147
Train Epoch: 5 [44800/60000 (75%)]	Loss: 0.264836
Train Epoch: 5 [51200/60000 (85%)]	Loss: 0.116272
Train Epoch: 5 [57600/60000 (96%)]	Loss: 0.305326

Test set: Average loss: 0.1035, Accuracy: 9698/10000 (97%)

Train Epoch: 6 [0/60000 (0%)]	Loss: 0.139701
Train Epoch: 6 [6400/60000 (11%)]	Loss: 0.136242
Train Epoch: 6 [12800/60000 (21%)]	Loss: 0.097588
Train Epoch: 6 [19200/60000 (32%)]	Loss: 0.135472
Train Epoch: 6 [25600/60000 (43%)]	Loss: 0.136464
Train Epoch: 6 [32000/60000 (53%)]	Loss: 0.049524
Train Epoch: 6 [38400/60000 (64%)]	Loss: 0.092915
Train Epoch: 6 [44800/60000 (75%)]	Loss: 0.049124
Train Epoch: 6 [51200/60000 (85%)]	Loss: 0.119248
Train Epoch: 6 [57600/60000 (96%)]	Loss: 0.191502

Test set: Average loss: 0.0926, Accuracy: 9719/10000 (97%)

Train Epoch: 7 [0/60000 (0%)]	Loss: 0.062154
Train Epoch: 7 [6400/60000 (11%)]	Loss: 0.089640
Train Epoch: 7 [12800/60000 (21%)]	Loss: 0.137574
Train Epoch: 7 [19200/60000 (32%)]	Loss: 0.195233
Train Epoch: 7 [25600/60000 (43%)]	Loss: 0.032105
Train Epoch: 7 [32000/60000 (53%)]	Loss: 0.069613
Train Epoch: 7 [38400/60000 (64%)]	Loss: 0.073453
Train Epoch: 7 [44800/60000 (75%)]	Loss: 0.099157
Train Epoch: 7 [51200/60000 (85%)]	Loss: 0.060516
Train Epoch: 7 [57600/60000 (96%)]	Loss: 0.142676

Test set: Average loss: 0.0867, Accuracy: 9741/10000 (97%)

Train Epoch: 8 [0/60000 (0%)]	Loss: 0.049860
Train Epoch: 8 [6400/60000 (11%)]	Loss: 0.055813
Train Epoch: 8 [12800/60000 (21%)]	Loss: 0.123922
Train Epoch: 8 [19200/60000 (32%)]	Loss: 0.048202
Train Epoch: 8 [25600/60000 (43%)]	Loss: 0.187207
Train Epoch: 8 [32000/60000 (53%)]	Loss: 0.275413
Train Epoch: 8 [38400/60000 (64%)]	Loss: 0.053153
Train Epoch: 8 [44800/60000 (75%)]	Loss: 0.031433
Train Epoch: 8 [51200/60000 (85%)]	Loss: 0.107852
Train Epoch: 8 [57600/60000 (96%)]	Loss: 0.021837

Test set: Average loss: 0.0824, Accuracy: 9757/10000 (98%)

Train Epoch: 9 [0/60000 (0%)]	Loss: 0.073799
Train Epoch: 9 [6400/60000 (11%)]	Loss: 0.075757
Train Epoch: 9 [12800/60000 (21%)]	Loss: 0.164681
Train Epoch: 9 [19200/60000 (32%)]	Loss: 0.098372
Train Epoch: 9 [25600/60000 (43%)]	Loss: 0.107419
Train Epoch: 9 [32000/60000 (53%)]	Loss: 0.069053
Train Epoch: 9 [38400/60000 (64%)]	Loss: 0.100473
Train Epoch: 9 [44800/60000 (75%)]	Loss: 0.137091
Train Epoch: 9 [51200/60000 (85%)]	Loss: 0.084864
Train Epoch: 9 [57600/60000 (96%)]	Loss: 0.040560

Test set: Average loss: 0.0776, Accuracy: 9767/10000 (98%)

In [11]:
model.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))

# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model(image_batch)
for i in range(10):
    img = image_batch[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img, pred_prob, real_label)

Does the Fully Connected Network use "Visual Information" ?

In [12]:
fixed_perm = torch.randperm(784) # Fix a permutation of the image pixels; We apply the same permutation to all images

# show some training images
plt.figure(figsize=(8, 8))

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))

for i in range(6):
    image = image_batch[i]
    image_perm = image.view(-1, 28*28).clone()
    image_perm = image_perm[:, fixed_perm]
    image_perm = image_perm.view(-1, 1, 28, 28)
    
    label = label_batch[i].item()
    plt.subplot(3,4 , 2*i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    plt.imshow(image.squeeze().numpy())
    plt.axis('off')
    plt.title(label)
    plt.subplot(3, 4, 2*i+2)
    plt.imshow(image_perm.squeeze().numpy())
    plt.axis('off')
    plt.title(label)
In [13]:
accuracy_list = []

def scramble_train(epoch, model, perm=torch.arange(0, 784).long()):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 28*28)
        data = data[:, perm]
        data = data.view(-1, 1, 28, 28)

        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def scramble_test(model, perm=torch.arange(0, 784).long()):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 28*28)
        data = data[:, perm]
        data = data.view(-1, 1, 28, 28)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss                                                               
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))
In [14]:
print("Training on ", device)
model_fnn_2 = FC2Layer(input_size, output_size)
model_fnn_2.to(device)
optimizer = optim.SGD(model_fnn_2.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_fnn_2)))

for epoch in range(0, 10):
    scramble_train(epoch, model_fnn_2, fixed_perm)
    scramble_test(model_fnn_2, fixed_perm)
Training on  cuda:0
Number of parameters: 183770
Train Epoch: 0 [0/60000 (0%)]	Loss: 2.313312
Train Epoch: 0 [6400/60000 (11%)]	Loss: 2.184037
Train Epoch: 0 [12800/60000 (21%)]	Loss: 1.399919
Train Epoch: 0 [19200/60000 (32%)]	Loss: 0.641634
Train Epoch: 0 [25600/60000 (43%)]	Loss: 0.427648
Train Epoch: 0 [32000/60000 (53%)]	Loss: 0.735366
Train Epoch: 0 [38400/60000 (64%)]	Loss: 0.300395
Train Epoch: 0 [44800/60000 (75%)]	Loss: 0.339064
Train Epoch: 0 [51200/60000 (85%)]	Loss: 0.508439
Train Epoch: 0 [57600/60000 (96%)]	Loss: 0.271234

Test set: Average loss: 0.2958, Accuracy: 9120/10000 (91%)

Train Epoch: 1 [0/60000 (0%)]	Loss: 0.219611
Train Epoch: 1 [6400/60000 (11%)]	Loss: 0.141608
Train Epoch: 1 [12800/60000 (21%)]	Loss: 0.402851
Train Epoch: 1 [19200/60000 (32%)]	Loss: 0.232927
Train Epoch: 1 [25600/60000 (43%)]	Loss: 0.297120
Train Epoch: 1 [32000/60000 (53%)]	Loss: 0.332926
Train Epoch: 1 [38400/60000 (64%)]	Loss: 0.262555
Train Epoch: 1 [44800/60000 (75%)]	Loss: 0.299975
Train Epoch: 1 [51200/60000 (85%)]	Loss: 0.257360
Train Epoch: 1 [57600/60000 (96%)]	Loss: 0.175640

Test set: Average loss: 0.1975, Accuracy: 9432/10000 (94%)

Train Epoch: 2 [0/60000 (0%)]	Loss: 0.170990
Train Epoch: 2 [6400/60000 (11%)]	Loss: 0.247841
Train Epoch: 2 [12800/60000 (21%)]	Loss: 0.189010
Train Epoch: 2 [19200/60000 (32%)]	Loss: 0.282354
Train Epoch: 2 [25600/60000 (43%)]	Loss: 0.429647
Train Epoch: 2 [32000/60000 (53%)]	Loss: 0.141841
Train Epoch: 2 [38400/60000 (64%)]	Loss: 0.183931
Train Epoch: 2 [44800/60000 (75%)]	Loss: 0.198016
Train Epoch: 2 [51200/60000 (85%)]	Loss: 0.075450
Train Epoch: 2 [57600/60000 (96%)]	Loss: 0.162376

Test set: Average loss: 0.1636, Accuracy: 9519/10000 (95%)

Train Epoch: 3 [0/60000 (0%)]	Loss: 0.372585
Train Epoch: 3 [6400/60000 (11%)]	Loss: 0.097617
Train Epoch: 3 [12800/60000 (21%)]	Loss: 0.189798
Train Epoch: 3 [19200/60000 (32%)]	Loss: 0.145662
Train Epoch: 3 [25600/60000 (43%)]	Loss: 0.077773
Train Epoch: 3 [32000/60000 (53%)]	Loss: 0.106603
Train Epoch: 3 [38400/60000 (64%)]	Loss: 0.144856
Train Epoch: 3 [44800/60000 (75%)]	Loss: 0.241724
Train Epoch: 3 [51200/60000 (85%)]	Loss: 0.128318
Train Epoch: 3 [57600/60000 (96%)]	Loss: 0.065174

Test set: Average loss: 0.1243, Accuracy: 9631/10000 (96%)

Train Epoch: 4 [0/60000 (0%)]	Loss: 0.065198
Train Epoch: 4 [6400/60000 (11%)]	Loss: 0.128174
Train Epoch: 4 [12800/60000 (21%)]	Loss: 0.033851
Train Epoch: 4 [19200/60000 (32%)]	Loss: 0.065818
Train Epoch: 4 [25600/60000 (43%)]	Loss: 0.069995
Train Epoch: 4 [32000/60000 (53%)]	Loss: 0.160783
Train Epoch: 4 [38400/60000 (64%)]	Loss: 0.147025
Train Epoch: 4 [44800/60000 (75%)]	Loss: 0.288517
Train Epoch: 4 [51200/60000 (85%)]	Loss: 0.102338
Train Epoch: 4 [57600/60000 (96%)]	Loss: 0.102671

Test set: Average loss: 0.1099, Accuracy: 9679/10000 (97%)

Train Epoch: 5 [0/60000 (0%)]	Loss: 0.086134
Train Epoch: 5 [6400/60000 (11%)]	Loss: 0.054850
Train Epoch: 5 [12800/60000 (21%)]	Loss: 0.053114
Train Epoch: 5 [19200/60000 (32%)]	Loss: 0.094749
Train Epoch: 5 [25600/60000 (43%)]	Loss: 0.120391
Train Epoch: 5 [32000/60000 (53%)]	Loss: 0.069578
Train Epoch: 5 [38400/60000 (64%)]	Loss: 0.052634
Train Epoch: 5 [44800/60000 (75%)]	Loss: 0.171941
Train Epoch: 5 [51200/60000 (85%)]	Loss: 0.147429
Train Epoch: 5 [57600/60000 (96%)]	Loss: 0.029881

Test set: Average loss: 0.0999, Accuracy: 9688/10000 (97%)

Train Epoch: 6 [0/60000 (0%)]	Loss: 0.071289
Train Epoch: 6 [6400/60000 (11%)]	Loss: 0.103085
Train Epoch: 6 [12800/60000 (21%)]	Loss: 0.024409
Train Epoch: 6 [19200/60000 (32%)]	Loss: 0.020256
Train Epoch: 6 [25600/60000 (43%)]	Loss: 0.010267
Train Epoch: 6 [32000/60000 (53%)]	Loss: 0.074697
Train Epoch: 6 [38400/60000 (64%)]	Loss: 0.092260
Train Epoch: 6 [44800/60000 (75%)]	Loss: 0.065542
Train Epoch: 6 [51200/60000 (85%)]	Loss: 0.104234
Train Epoch: 6 [57600/60000 (96%)]	Loss: 0.044166

Test set: Average loss: 0.0921, Accuracy: 9712/10000 (97%)

Train Epoch: 7 [0/60000 (0%)]	Loss: 0.033976
Train Epoch: 7 [6400/60000 (11%)]	Loss: 0.021100
Train Epoch: 7 [12800/60000 (21%)]	Loss: 0.008134
Train Epoch: 7 [19200/60000 (32%)]	Loss: 0.017355
Train Epoch: 7 [25600/60000 (43%)]	Loss: 0.084864
Train Epoch: 7 [32000/60000 (53%)]	Loss: 0.052808
Train Epoch: 7 [38400/60000 (64%)]	Loss: 0.028968
Train Epoch: 7 [44800/60000 (75%)]	Loss: 0.022372
Train Epoch: 7 [51200/60000 (85%)]	Loss: 0.081928
Train Epoch: 7 [57600/60000 (96%)]	Loss: 0.059890

Test set: Average loss: 0.0946, Accuracy: 9696/10000 (97%)

Train Epoch: 8 [0/60000 (0%)]	Loss: 0.036753
Train Epoch: 8 [6400/60000 (11%)]	Loss: 0.128112
Train Epoch: 8 [12800/60000 (21%)]	Loss: 0.059585
Train Epoch: 8 [19200/60000 (32%)]	Loss: 0.078037
Train Epoch: 8 [25600/60000 (43%)]	Loss: 0.088705
Train Epoch: 8 [32000/60000 (53%)]	Loss: 0.089082
Train Epoch: 8 [38400/60000 (64%)]	Loss: 0.064373
Train Epoch: 8 [44800/60000 (75%)]	Loss: 0.029573
Train Epoch: 8 [51200/60000 (85%)]	Loss: 0.060114
Train Epoch: 8 [57600/60000 (96%)]	Loss: 0.028512

Test set: Average loss: 0.0807, Accuracy: 9748/10000 (97%)

Train Epoch: 9 [0/60000 (0%)]	Loss: 0.015103
Train Epoch: 9 [6400/60000 (11%)]	Loss: 0.122875
Train Epoch: 9 [12800/60000 (21%)]	Loss: 0.108078
Train Epoch: 9 [19200/60000 (32%)]	Loss: 0.042668
Train Epoch: 9 [25600/60000 (43%)]	Loss: 0.058161
Train Epoch: 9 [32000/60000 (53%)]	Loss: 0.081582
Train Epoch: 9 [38400/60000 (64%)]	Loss: 0.273529
Train Epoch: 9 [44800/60000 (75%)]	Loss: 0.051074
Train Epoch: 9 [51200/60000 (85%)]	Loss: 0.016274
Train Epoch: 9 [57600/60000 (96%)]	Loss: 0.150063

Test set: Average loss: 0.0895, Accuracy: 9728/10000 (97%)

In [15]:
model_fnn_2.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))
image_batch_scramble = image_batch.view(-1, 28*28)
image_batch_scramble = image_batch_scramble[:, fixed_perm]
image_batch_scramble = image_batch_scramble.view(-1, 1, 28, 28)
# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model_fnn_2(image_batch_scramble)
for i in range(10):
    img = image_batch[i]
    img_perm = image_batch_scramble[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img_perm, pred_prob, real_label)