AML21: 03 Deep Neural Network for CIFAR-10¶

Based on https://github.com/Atcold/pytorch-Deep-Learning

Data and Libraries¶

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, utils
import matplotlib.pyplot as plt
import numpy

# this 'device' will be used for training our model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
cuda:0

Load the MNIST dataset¶

Observe that we set shuffle=True, which means that data is randomized

In [2]:
input_size  = 32*32*3   # images are 32x32 pixels with 3 channels
output_size = 10      # there are 10 classes

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=64, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=1000, shuffle=True)

classNames= ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz
Extracting ../data/cifar-10-python.tar.gz to ../data
In [8]:
# show some training images
def imshow(img, plot):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()   # convert from tensor
    plot.imshow(numpy.transpose(npimg, (1, 2, 0))) 
    

plt.figure(figsize=(8,3), dpi=200)

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))
#imshow(torchvision.utils.make_grid(image_batch))
for i in range(20):
    image = image_batch[i]
    label = classNames[label_batch[i].item()]
    plt.subplot(2, 10, i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    #plt.imshow(image.squeeze().numpy())
    imshow(image, plt)
    plt.axis('off')
    plt.title(label)
plt.show()

A 2-hidden layer Fully Connected Neural Network¶

Helper functions for training and testing¶

In [4]:
# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

accuracy_list = []
# we pass a model object to this trainer, and it trains this model for one epoch
def train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))

Defining the Fully Connected Network¶

In [5]:
class FC2Layer(nn.Module):
    def __init__(self, input_size, output_size):
        super(FC2Layer, self).__init__()
        self.input_size = input_size
        self.network = nn.Sequential(
            nn.Linear(input_size, 200), 
            nn.ReLU(), 
            nn.Linear(200,100),
            nn.ReLU(),
            nn.Linear(100,60), 
            nn.ReLU(), 
            nn.Linear(60, output_size), 
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.network(x)

Train the Network¶

In [6]:
print("Training on ", device)
model_fnn = FC2Layer(input_size, output_size)
model_fnn.to(device)
optimizer = optim.SGD(model_fnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_fnn)))

for epoch in range(0, 10):
    train(epoch, model_fnn)
    test(model_fnn)
Training on  cuda:0
Number of parameters: 641370
Train Epoch: 0 [0/50000 (0%)]	Loss: 2.303929
Train Epoch: 0 [6400/50000 (13%)]	Loss: 2.267314
Train Epoch: 0 [12800/50000 (26%)]	Loss: 2.180724
Train Epoch: 0 [19200/50000 (38%)]	Loss: 2.186977
Train Epoch: 0 [25600/50000 (51%)]	Loss: 2.059186
Train Epoch: 0 [32000/50000 (64%)]	Loss: 1.987120
Train Epoch: 0 [38400/50000 (77%)]	Loss: 1.890560
Train Epoch: 0 [44800/50000 (90%)]	Loss: 1.905007

Test set: Average loss: 1.8331, Accuracy: 3490/10000 (35%)

Train Epoch: 1 [0/50000 (0%)]	Loss: 1.672973
Train Epoch: 1 [6400/50000 (13%)]	Loss: 1.868309
Train Epoch: 1 [12800/50000 (26%)]	Loss: 1.717875
Train Epoch: 1 [19200/50000 (38%)]	Loss: 1.718698
Train Epoch: 1 [25600/50000 (51%)]	Loss: 1.730582
Train Epoch: 1 [32000/50000 (64%)]	Loss: 1.926668
Train Epoch: 1 [38400/50000 (77%)]	Loss: 1.602338
Train Epoch: 1 [44800/50000 (90%)]	Loss: 1.619077

Test set: Average loss: 1.6301, Accuracy: 4187/10000 (42%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 1.618124
Train Epoch: 2 [6400/50000 (13%)]	Loss: 1.759828
Train Epoch: 2 [12800/50000 (26%)]	Loss: 1.556684
Train Epoch: 2 [19200/50000 (38%)]	Loss: 1.563781
Train Epoch: 2 [25600/50000 (51%)]	Loss: 1.596017
Train Epoch: 2 [32000/50000 (64%)]	Loss: 1.572987
Train Epoch: 2 [38400/50000 (77%)]	Loss: 1.592451
Train Epoch: 2 [44800/50000 (90%)]	Loss: 1.670992

Test set: Average loss: 1.5415, Accuracy: 4527/10000 (45%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 1.692432
Train Epoch: 3 [6400/50000 (13%)]	Loss: 1.691754
Train Epoch: 3 [12800/50000 (26%)]	Loss: 1.639725
Train Epoch: 3 [19200/50000 (38%)]	Loss: 1.656701
Train Epoch: 3 [25600/50000 (51%)]	Loss: 1.476262
Train Epoch: 3 [32000/50000 (64%)]	Loss: 1.318958
Train Epoch: 3 [38400/50000 (77%)]	Loss: 1.341206
Train Epoch: 3 [44800/50000 (90%)]	Loss: 1.353117

Test set: Average loss: 1.4736, Accuracy: 4766/10000 (48%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 1.320711
Train Epoch: 4 [6400/50000 (13%)]	Loss: 1.415912
Train Epoch: 4 [12800/50000 (26%)]	Loss: 1.468164
Train Epoch: 4 [19200/50000 (38%)]	Loss: 1.474726
Train Epoch: 4 [25600/50000 (51%)]	Loss: 1.457999
Train Epoch: 4 [32000/50000 (64%)]	Loss: 1.593989
Train Epoch: 4 [38400/50000 (77%)]	Loss: 1.182956
Train Epoch: 4 [44800/50000 (90%)]	Loss: 1.296540

Test set: Average loss: 1.4491, Accuracy: 4846/10000 (48%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 1.332552
Train Epoch: 5 [6400/50000 (13%)]	Loss: 1.242660
Train Epoch: 5 [12800/50000 (26%)]	Loss: 1.366395
Train Epoch: 5 [19200/50000 (38%)]	Loss: 1.037882
Train Epoch: 5 [25600/50000 (51%)]	Loss: 1.436885
Train Epoch: 5 [32000/50000 (64%)]	Loss: 1.367385
Train Epoch: 5 [38400/50000 (77%)]	Loss: 1.334145
Train Epoch: 5 [44800/50000 (90%)]	Loss: 1.379413

Test set: Average loss: 1.4078, Accuracy: 4955/10000 (50%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 1.336982
Train Epoch: 6 [6400/50000 (13%)]	Loss: 1.118001
Train Epoch: 6 [12800/50000 (26%)]	Loss: 1.176043
Train Epoch: 6 [19200/50000 (38%)]	Loss: 1.249017
Train Epoch: 6 [25600/50000 (51%)]	Loss: 1.191949
Train Epoch: 6 [32000/50000 (64%)]	Loss: 1.438644
Train Epoch: 6 [38400/50000 (77%)]	Loss: 1.145287
Train Epoch: 6 [44800/50000 (90%)]	Loss: 1.426292

Test set: Average loss: 1.4301, Accuracy: 4930/10000 (49%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 1.373312
Train Epoch: 7 [6400/50000 (13%)]	Loss: 1.139284
Train Epoch: 7 [12800/50000 (26%)]	Loss: 1.397779
Train Epoch: 7 [19200/50000 (38%)]	Loss: 1.292989
Train Epoch: 7 [25600/50000 (51%)]	Loss: 1.316021
Train Epoch: 7 [32000/50000 (64%)]	Loss: 1.170644
Train Epoch: 7 [38400/50000 (77%)]	Loss: 1.063934
Train Epoch: 7 [44800/50000 (90%)]	Loss: 1.223836

Test set: Average loss: 1.4108, Accuracy: 5075/10000 (51%)

Train Epoch: 8 [0/50000 (0%)]	Loss: 1.183261
Train Epoch: 8 [6400/50000 (13%)]	Loss: 1.380959
Train Epoch: 8 [12800/50000 (26%)]	Loss: 1.162247
Train Epoch: 8 [19200/50000 (38%)]	Loss: 1.126126
Train Epoch: 8 [25600/50000 (51%)]	Loss: 1.488702
Train Epoch: 8 [32000/50000 (64%)]	Loss: 0.950843
Train Epoch: 8 [38400/50000 (77%)]	Loss: 1.254712
Train Epoch: 8 [44800/50000 (90%)]	Loss: 1.313643

Test set: Average loss: 1.3756, Accuracy: 5177/10000 (52%)

Train Epoch: 9 [0/50000 (0%)]	Loss: 1.245108
Train Epoch: 9 [6400/50000 (13%)]	Loss: 1.422112
Train Epoch: 9 [12800/50000 (26%)]	Loss: 1.163222
Train Epoch: 9 [19200/50000 (38%)]	Loss: 1.063460
Train Epoch: 9 [25600/50000 (51%)]	Loss: 1.185204
Train Epoch: 9 [32000/50000 (64%)]	Loss: 1.444041
Train Epoch: 9 [38400/50000 (77%)]	Loss: 1.347754
Train Epoch: 9 [44800/50000 (90%)]	Loss: 1.164156

Test set: Average loss: 1.3720, Accuracy: 5157/10000 (52%)

Show some predictions of the test network¶

In [14]:
def visualize_pred(img, pred_prob, real_label):
    ''' Function for viewing an image and it's predicted classes.
    '''
    #pred_prob = pred_prob.data.numpy().squeeze()

    fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
    #ax1.imshow(img.numpy().squeeze())
    imshow(img, ax1)
    ax1.axis('off')
    pred_label = numpy.argmax(pred_prob)
    ax1.set_title([classNames[real_label], classNames[pred_label]])
    
    ax2.barh(numpy.arange(10), pred_prob)
    ax2.set_aspect(0.1)
    ax2.set_yticks(numpy.arange(10))
    ax2.set_yticklabels(classNames)
    ax2.set_title('Prediction Probability')
    ax2.set_xlim(0, 1.1)
    plt.tight_layout()
In [15]:
model_fnn.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))

# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model_fnn(image_batch)
for i in range(10):
    img = image_batch[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img, pred_prob, real_label)

Does the Fully Connected Network use "Visual Information" ?¶

In [23]:
fixed_perm = torch.randperm(3072) # Fix a permutation of the image pixels; We apply the same permutation to all images

# show some training images
plt.figure(figsize=(8, 8))

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))

for i in range(6):
    image = image_batch[i]
    image_perm = image.view(-1, 32*32*3).clone()
    image_perm = image_perm[:, fixed_perm]
    image_perm = image_perm.view(3, 32, 32)
    
    label = label_batch[i].item()
    plt.subplot(3,4 , 2*i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    #plt.imshow(image.squeeze().numpy())
    imshow(image, plt)
    plt.axis('off')
    plt.title(classNames[label])
    plt.subplot(3, 4, 2*i+2)
    #plt.imshow(image_perm.squeeze().numpy())
    imshow(image_perm, plt)
    plt.axis('off')
    plt.title(classNames[label])
In [24]:
accuracy_list = []

def scramble_train(epoch, model, perm=torch.arange(0, 3072).long()):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 32*32*3)
        data = data[:, perm]
        data = data.view(-1, 3, 32, 32)

        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def scramble_test(model, perm=torch.arange(0, 3072).long()):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 32*32*3)
        data = data[:, perm]
        data = data.view(-1, 3, 32, 32)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss                                                               
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))
In [25]:
print("Training on ", device)
model_fnn_2 = FC2Layer(input_size, output_size)
model_fnn_2.to(device)
optimizer = optim.SGD(model_fnn_2.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_fnn_2)))

for epoch in range(0, 10):
    scramble_train(epoch, model_fnn_2, fixed_perm)
    scramble_test(model_fnn_2, fixed_perm)
Training on  cuda:0
Number of parameters: 641370
Train Epoch: 0 [0/50000 (0%)]	Loss: 2.301718
Train Epoch: 0 [6400/50000 (13%)]	Loss: 2.272872
Train Epoch: 0 [12800/50000 (26%)]	Loss: 2.278932
Train Epoch: 0 [19200/50000 (38%)]	Loss: 2.219936
Train Epoch: 0 [25600/50000 (51%)]	Loss: 2.091192
Train Epoch: 0 [32000/50000 (64%)]	Loss: 1.987986
Train Epoch: 0 [38400/50000 (77%)]	Loss: 1.881270
Train Epoch: 0 [44800/50000 (90%)]	Loss: 1.931318

Test set: Average loss: 1.8751, Accuracy: 3298/10000 (33%)

Train Epoch: 1 [0/50000 (0%)]	Loss: 1.857532
Train Epoch: 1 [6400/50000 (13%)]	Loss: 1.824420
Train Epoch: 1 [12800/50000 (26%)]	Loss: 1.654686
Train Epoch: 1 [19200/50000 (38%)]	Loss: 1.627458
Train Epoch: 1 [25600/50000 (51%)]	Loss: 1.880438
Train Epoch: 1 [32000/50000 (64%)]	Loss: 1.642655
Train Epoch: 1 [38400/50000 (77%)]	Loss: 1.648239
Train Epoch: 1 [44800/50000 (90%)]	Loss: 1.727985

Test set: Average loss: 1.6556, Accuracy: 4097/10000 (41%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 1.604434
Train Epoch: 2 [6400/50000 (13%)]	Loss: 1.741402
Train Epoch: 2 [12800/50000 (26%)]	Loss: 1.855572
Train Epoch: 2 [19200/50000 (38%)]	Loss: 1.608704
Train Epoch: 2 [25600/50000 (51%)]	Loss: 1.605843
Train Epoch: 2 [32000/50000 (64%)]	Loss: 1.501398
Train Epoch: 2 [38400/50000 (77%)]	Loss: 1.303432
Train Epoch: 2 [44800/50000 (90%)]	Loss: 1.546939

Test set: Average loss: 1.5533, Accuracy: 4514/10000 (45%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 1.505469
Train Epoch: 3 [6400/50000 (13%)]	Loss: 1.646354
Train Epoch: 3 [12800/50000 (26%)]	Loss: 1.355112
Train Epoch: 3 [19200/50000 (38%)]	Loss: 1.652500
Train Epoch: 3 [25600/50000 (51%)]	Loss: 1.584958
Train Epoch: 3 [32000/50000 (64%)]	Loss: 1.411315
Train Epoch: 3 [38400/50000 (77%)]	Loss: 1.648783
Train Epoch: 3 [44800/50000 (90%)]	Loss: 1.457495

Test set: Average loss: 1.4804, Accuracy: 4753/10000 (48%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 1.433928
Train Epoch: 4 [6400/50000 (13%)]	Loss: 1.395763
Train Epoch: 4 [12800/50000 (26%)]	Loss: 1.217118
Train Epoch: 4 [19200/50000 (38%)]	Loss: 1.647619
Train Epoch: 4 [25600/50000 (51%)]	Loss: 1.531113
Train Epoch: 4 [32000/50000 (64%)]	Loss: 1.368085
Train Epoch: 4 [38400/50000 (77%)]	Loss: 1.544607
Train Epoch: 4 [44800/50000 (90%)]	Loss: 1.518020

Test set: Average loss: 1.4243, Accuracy: 4992/10000 (50%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 1.251877
Train Epoch: 5 [6400/50000 (13%)]	Loss: 1.458064
Train Epoch: 5 [12800/50000 (26%)]	Loss: 1.358247
Train Epoch: 5 [19200/50000 (38%)]	Loss: 1.159697
Train Epoch: 5 [25600/50000 (51%)]	Loss: 1.435400
Train Epoch: 5 [32000/50000 (64%)]	Loss: 1.464651
Train Epoch: 5 [38400/50000 (77%)]	Loss: 1.053899
Train Epoch: 5 [44800/50000 (90%)]	Loss: 1.542430

Test set: Average loss: 1.3917, Accuracy: 5052/10000 (51%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 1.550522
Train Epoch: 6 [6400/50000 (13%)]	Loss: 1.583268
Train Epoch: 6 [12800/50000 (26%)]	Loss: 1.279493
Train Epoch: 6 [19200/50000 (38%)]	Loss: 1.292409
Train Epoch: 6 [25600/50000 (51%)]	Loss: 1.334699
Train Epoch: 6 [32000/50000 (64%)]	Loss: 1.646645
Train Epoch: 6 [38400/50000 (77%)]	Loss: 1.272052
Train Epoch: 6 [44800/50000 (90%)]	Loss: 1.383600

Test set: Average loss: 1.3845, Accuracy: 5127/10000 (51%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 1.247640
Train Epoch: 7 [6400/50000 (13%)]	Loss: 1.379684
Train Epoch: 7 [12800/50000 (26%)]	Loss: 1.260978
Train Epoch: 7 [19200/50000 (38%)]	Loss: 1.238247
Train Epoch: 7 [25600/50000 (51%)]	Loss: 1.188706
Train Epoch: 7 [32000/50000 (64%)]	Loss: 1.324507
Train Epoch: 7 [38400/50000 (77%)]	Loss: 1.186184
Train Epoch: 7 [44800/50000 (90%)]	Loss: 1.358905

Test set: Average loss: 1.3687, Accuracy: 5209/10000 (52%)

Train Epoch: 8 [0/50000 (0%)]	Loss: 1.102417
Train Epoch: 8 [6400/50000 (13%)]	Loss: 1.096017
Train Epoch: 8 [12800/50000 (26%)]	Loss: 1.079066
Train Epoch: 8 [19200/50000 (38%)]	Loss: 1.248799
Train Epoch: 8 [25600/50000 (51%)]	Loss: 1.157211
Train Epoch: 8 [32000/50000 (64%)]	Loss: 1.370102
Train Epoch: 8 [38400/50000 (77%)]	Loss: 1.142574
Train Epoch: 8 [44800/50000 (90%)]	Loss: 1.095979

Test set: Average loss: 1.4101, Accuracy: 5068/10000 (51%)

Train Epoch: 9 [0/50000 (0%)]	Loss: 1.026252
Train Epoch: 9 [6400/50000 (13%)]	Loss: 1.002404
Train Epoch: 9 [12800/50000 (26%)]	Loss: 1.101393
Train Epoch: 9 [19200/50000 (38%)]	Loss: 1.252599
Train Epoch: 9 [25600/50000 (51%)]	Loss: 1.354900
Train Epoch: 9 [32000/50000 (64%)]	Loss: 1.175568
Train Epoch: 9 [38400/50000 (77%)]	Loss: 1.141156
Train Epoch: 9 [44800/50000 (90%)]	Loss: 1.178337

Test set: Average loss: 1.3499, Accuracy: 5252/10000 (53%)

In [26]:
model_fnn_2.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))
image_batch_scramble = image_batch.view(-1, 32*32*3)
image_batch_scramble = image_batch_scramble[:, fixed_perm]
image_batch_scramble = image_batch_scramble.view(-1, 3, 32, 32)
# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model_fnn_2(image_batch_scramble)
for i in range(10):
    img = image_batch[i]
    img_perm = image_batch_scramble[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img_perm, pred_prob, real_label)