AML21: 04 Convolutional Neural Network for CIFAR-10¶

Based on https://github.com/Atcold/pytorch-Deep-Learning

Data and Libraries¶

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, utils
import matplotlib.pyplot as plt
import numpy

# this 'device' will be used for training our model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
cuda:0

Load the CIFAR10 dataset¶

Observe that we set shuffle=True, which means that data is randomized

In [3]:
input_size  = 32*32*3   # images are 32x32 pixels with 3 channels
output_size = 10      # there are 10 classes

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=64, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=1000, shuffle=True)

classNames= ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz
Extracting ../data/cifar-10-python.tar.gz to ../data
In [8]:
# show some training images
def imshow(img, plot):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()   # convert from tensor
    plot.imshow(numpy.transpose(npimg, (1, 2, 0))) 
    

plt.figure(figsize=(16,4))

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))
#imshow(torchvision.utils.make_grid(image_batch))
for i in range(20):
    image = image_batch[i]
    label = classNames[label_batch[i].item()]
    plt.subplot(2, 10, i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    #plt.imshow(image.squeeze().numpy())
    imshow(image, plt)
    plt.axis('off')
    plt.title(label)
plt.show()

A 2-hidden layer Fully Connected Neural Network¶

Helper functions for training and testing¶

In [9]:
# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

accuracy_list = []
# we pass a model object to this trainer, and it trains this model for one epoch
def train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))

Defining the Convolutional Neural Network¶

In [34]:
class CNN(nn.Module):
    def __init__(self, input_size, output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, padding=0)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=6, padding=0)
        self.conv3 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=6, padding=0)
        self.fc1 = nn.Linear(32*3*3, 200)
        self.fc2 = nn.Linear(200, 10)
        
    def forward(self, x, verbose=False):
        #print("input", x.shape)
        
        x = self.conv1(x)      
        x = F.relu(x)
        #print("after conv1", x.shape)
        
        x = self.conv2(x)       
        x = F.relu(x)
        #print("after conv2", x.shape)
        
        x = F.max_pool2d(x, kernel_size=2)
        #print("after 1st maxpool", x.shape)
        
        x = self.conv3(x)
        x = F.relu(x)
        #print("after conv3", x.shape)
        
        x = F.max_pool2d(x, kernel_size=2)
        #print("after 2nd maxpool", x.shape)
        
        x = x.view(-1, 32*3*3)
        #print("after tensor shape change", x.shape)
        
        x = self.fc1(x)
        x = F.relu(x)
        #print("after fc1", x.shape)
        
        x = self.fc2(x)
        #print("after fc2", x.shape)
        
        x = F.log_softmax(x, dim=1)
        return x

Sample output of tensor shapes with above print command¶

Training on  cuda:0
Number of parameters: 98218
input torch.Size([64, 3, 32, 32])
after conv1 torch.Size([64, 12, 30, 30])
after conv2 torch.Size([64, 24, 25, 25])
after 1st maxpool torch.Size([64, 24, 12, 12])
after conv3 torch.Size([64, 32, 7, 7])
after 2nd maxpool torch.Size([64, 32, 3, 3])
after tensor shape change torch.Size([64, 288])
after fc1 torch.Size([64, 200])
after fc2 torch.Size([64, 10])
Train Epoch: 0 [0/50000 (0%)]   Loss: 2.310683
input torch.Size([64, 3, 32, 32])
after conv1 torch.Size([64, 12, 30, 30])
after conv2 torch.Size([64, 24, 25, 25])
after 1st maxpool torch.Size([64, 24, 12, 12])
after conv3 torch.Size([64, 32, 7, 7])
after 2nd maxpool torch.Size([64, 32, 3, 3])
after tensor shape change torch.Size([64, 288])
after fc1 torch.Size([64, 200])
after fc2 torch.Size([64, 10])

Train the Network¶

In [35]:
print("Training on ", device)
model_cnn = CNN(input_size, output_size)
model_cnn.to(device)
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

for epoch in range(0, 10):
    train(epoch, model_cnn)
    test(model_cnn)
Training on  cuda:0
Number of parameters: 98218
Train Epoch: 0 [0/50000 (0%)]	Loss: 2.304080
Train Epoch: 0 [6400/50000 (13%)]	Loss: 2.301495
Train Epoch: 0 [12800/50000 (26%)]	Loss: 2.276738
Train Epoch: 0 [19200/50000 (38%)]	Loss: 2.131609
Train Epoch: 0 [25600/50000 (51%)]	Loss: 2.069105
Train Epoch: 0 [32000/50000 (64%)]	Loss: 1.823697
Train Epoch: 0 [38400/50000 (77%)]	Loss: 1.953569
Train Epoch: 0 [44800/50000 (90%)]	Loss: 1.827818

Test set: Average loss: 1.7805, Accuracy: 3571/10000 (36%)

Train Epoch: 1 [0/50000 (0%)]	Loss: 1.798204
Train Epoch: 1 [6400/50000 (13%)]	Loss: 1.719861
Train Epoch: 1 [12800/50000 (26%)]	Loss: 1.729588
Train Epoch: 1 [19200/50000 (38%)]	Loss: 1.702609
Train Epoch: 1 [25600/50000 (51%)]	Loss: 1.649322
Train Epoch: 1 [32000/50000 (64%)]	Loss: 1.819955
Train Epoch: 1 [38400/50000 (77%)]	Loss: 1.695602
Train Epoch: 1 [44800/50000 (90%)]	Loss: 1.660032

Test set: Average loss: 1.5666, Accuracy: 4297/10000 (43%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 1.504375
Train Epoch: 2 [6400/50000 (13%)]	Loss: 1.601314
Train Epoch: 2 [12800/50000 (26%)]	Loss: 1.561499
Train Epoch: 2 [19200/50000 (38%)]	Loss: 1.326506
Train Epoch: 2 [25600/50000 (51%)]	Loss: 1.455495
Train Epoch: 2 [32000/50000 (64%)]	Loss: 1.493736
Train Epoch: 2 [38400/50000 (77%)]	Loss: 1.724569
Train Epoch: 2 [44800/50000 (90%)]	Loss: 1.451514

Test set: Average loss: 1.4577, Accuracy: 4688/10000 (47%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 1.521510
Train Epoch: 3 [6400/50000 (13%)]	Loss: 1.308649
Train Epoch: 3 [12800/50000 (26%)]	Loss: 1.295653
Train Epoch: 3 [19200/50000 (38%)]	Loss: 1.278879
Train Epoch: 3 [25600/50000 (51%)]	Loss: 1.474922
Train Epoch: 3 [32000/50000 (64%)]	Loss: 1.673646
Train Epoch: 3 [38400/50000 (77%)]	Loss: 1.655927
Train Epoch: 3 [44800/50000 (90%)]	Loss: 1.471884

Test set: Average loss: 1.5268, Accuracy: 4589/10000 (46%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 1.423143
Train Epoch: 4 [6400/50000 (13%)]	Loss: 1.156515
Train Epoch: 4 [12800/50000 (26%)]	Loss: 1.316086
Train Epoch: 4 [19200/50000 (38%)]	Loss: 1.136011
Train Epoch: 4 [25600/50000 (51%)]	Loss: 1.120775
Train Epoch: 4 [32000/50000 (64%)]	Loss: 1.401012
Train Epoch: 4 [38400/50000 (77%)]	Loss: 1.456116
Train Epoch: 4 [44800/50000 (90%)]	Loss: 1.440089

Test set: Average loss: 1.2987, Accuracy: 5298/10000 (53%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 1.451904
Train Epoch: 5 [6400/50000 (13%)]	Loss: 1.257001
Train Epoch: 5 [12800/50000 (26%)]	Loss: 1.156041
Train Epoch: 5 [19200/50000 (38%)]	Loss: 1.188339
Train Epoch: 5 [25600/50000 (51%)]	Loss: 1.295001
Train Epoch: 5 [32000/50000 (64%)]	Loss: 1.444516
Train Epoch: 5 [38400/50000 (77%)]	Loss: 1.031989
Train Epoch: 5 [44800/50000 (90%)]	Loss: 1.361738

Test set: Average loss: 1.3239, Accuracy: 5278/10000 (53%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 1.172487
Train Epoch: 6 [6400/50000 (13%)]	Loss: 1.273679
Train Epoch: 6 [12800/50000 (26%)]	Loss: 1.330631
Train Epoch: 6 [19200/50000 (38%)]	Loss: 1.318222
Train Epoch: 6 [25600/50000 (51%)]	Loss: 1.031817
Train Epoch: 6 [32000/50000 (64%)]	Loss: 1.196307
Train Epoch: 6 [38400/50000 (77%)]	Loss: 1.100376
Train Epoch: 6 [44800/50000 (90%)]	Loss: 0.898120

Test set: Average loss: 1.3057, Accuracy: 5327/10000 (53%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 1.026688
Train Epoch: 7 [6400/50000 (13%)]	Loss: 1.349544
Train Epoch: 7 [12800/50000 (26%)]	Loss: 0.979226
Train Epoch: 7 [19200/50000 (38%)]	Loss: 1.315894
Train Epoch: 7 [25600/50000 (51%)]	Loss: 1.169268
Train Epoch: 7 [32000/50000 (64%)]	Loss: 0.873879
Train Epoch: 7 [38400/50000 (77%)]	Loss: 1.066666
Train Epoch: 7 [44800/50000 (90%)]	Loss: 1.059463

Test set: Average loss: 1.2506, Accuracy: 5641/10000 (56%)

Train Epoch: 8 [0/50000 (0%)]	Loss: 1.341056
Train Epoch: 8 [6400/50000 (13%)]	Loss: 1.366830
Train Epoch: 8 [12800/50000 (26%)]	Loss: 1.194088
Train Epoch: 8 [19200/50000 (38%)]	Loss: 1.158925
Train Epoch: 8 [25600/50000 (51%)]	Loss: 1.118372
Train Epoch: 8 [32000/50000 (64%)]	Loss: 1.141856
Train Epoch: 8 [38400/50000 (77%)]	Loss: 1.103572
Train Epoch: 8 [44800/50000 (90%)]	Loss: 0.993673

Test set: Average loss: 1.1718, Accuracy: 5871/10000 (59%)

Train Epoch: 9 [0/50000 (0%)]	Loss: 1.060610
Train Epoch: 9 [6400/50000 (13%)]	Loss: 0.979271
Train Epoch: 9 [12800/50000 (26%)]	Loss: 1.169945
Train Epoch: 9 [19200/50000 (38%)]	Loss: 1.143954
Train Epoch: 9 [25600/50000 (51%)]	Loss: 0.985557
Train Epoch: 9 [32000/50000 (64%)]	Loss: 1.412940
Train Epoch: 9 [38400/50000 (77%)]	Loss: 0.991871
Train Epoch: 9 [44800/50000 (90%)]	Loss: 1.182572

Test set: Average loss: 1.0681, Accuracy: 6248/10000 (62%)

Show some predictions of the test network¶

In [36]:
def visualize_pred(img, pred_prob, real_label):
    ''' Function for viewing an image and it's predicted classes.
    '''
    #pred_prob = pred_prob.data.numpy().squeeze()

    fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
    #ax1.imshow(img.numpy().squeeze())
    imshow(img, ax1)
    ax1.axis('off')
    pred_label = numpy.argmax(pred_prob)
    ax1.set_title([classNames[real_label], classNames[pred_label]])
    
    ax2.barh(numpy.arange(10), pred_prob)
    ax2.set_aspect(0.1)
    ax2.set_yticks(numpy.arange(10))
    ax2.set_yticklabels(classNames)
    ax2.set_title('Prediction Probability')
    ax2.set_xlim(0, 1.1)
    plt.tight_layout()
In [37]:
model_cnn.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))

# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model_cnn(image_batch)
for i in range(10):
    img = image_batch[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img, pred_prob, real_label)

Does the Convolutional Network use "Visual Information" ?¶

In [40]:
fixed_perm = torch.randperm(3072) # Fix a permutation of the image pixels; We apply the same permutation to all images

# show some training images
plt.figure(figsize=(8, 8))

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))

for i in range(6):
    image = image_batch[i]
    image_perm = image.view(-1, 32*32*3).clone()
    image_perm = image_perm[:, fixed_perm]
    image_perm = image_perm.view(3, 32, 32)
    
    label = label_batch[i].item()
    plt.subplot(3,4 , 2*i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    #plt.imshow(image.squeeze().numpy())
    imshow(image, plt)
    plt.axis('off')
    plt.title(classNames[label])
    plt.subplot(3, 4, 2*i+2)
    #plt.imshow(image_perm.squeeze().numpy())
    imshow(image_perm, plt)
    plt.axis('off')
    plt.title(classNames[label])
In [42]:
accuracy_list = []

def scramble_train(epoch, model, perm=torch.arange(0, 3072).long()):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 32*32*3)
        data = data[:, perm]
        data = data.view(-1, 3, 32, 32)

        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def scramble_test(model, perm=torch.arange(0, 3072).long()):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 32*32*3)
        data = data[:, perm]
        data = data.view(-1, 3, 32, 32)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss                                                               
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))
In [43]:
print("Training on ", device)
model_cnn_2 = CNN(input_size, output_size)
model_cnn_2.to(device)
optimizer = optim.SGD(model_cnn_2.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn_2)))

for epoch in range(0, 10):
    scramble_train(epoch, model_cnn_2, fixed_perm)
    scramble_test(model_cnn_2, fixed_perm)
Training on  cuda:0
Number of parameters: 98218
Train Epoch: 0 [0/50000 (0%)]	Loss: 2.309004
Train Epoch: 0 [6400/50000 (13%)]	Loss: 2.304964
Train Epoch: 0 [12800/50000 (26%)]	Loss: 2.299511
Train Epoch: 0 [19200/50000 (38%)]	Loss: 2.285993
Train Epoch: 0 [25600/50000 (51%)]	Loss: 2.282612
Train Epoch: 0 [32000/50000 (64%)]	Loss: 2.226867
Train Epoch: 0 [38400/50000 (77%)]	Loss: 2.283500
Train Epoch: 0 [44800/50000 (90%)]	Loss: 2.271716

Test set: Average loss: 2.2129, Accuracy: 1793/10000 (18%)

Train Epoch: 1 [0/50000 (0%)]	Loss: 2.219131
Train Epoch: 1 [6400/50000 (13%)]	Loss: 2.219992
Train Epoch: 1 [12800/50000 (26%)]	Loss: 2.226165
Train Epoch: 1 [19200/50000 (38%)]	Loss: 2.236556
Train Epoch: 1 [25600/50000 (51%)]	Loss: 2.144730
Train Epoch: 1 [32000/50000 (64%)]	Loss: 1.922335
Train Epoch: 1 [38400/50000 (77%)]	Loss: 2.130401
Train Epoch: 1 [44800/50000 (90%)]	Loss: 1.945175

Test set: Average loss: 2.0361, Accuracy: 2393/10000 (24%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 1.939512
Train Epoch: 2 [6400/50000 (13%)]	Loss: 2.150975
Train Epoch: 2 [12800/50000 (26%)]	Loss: 1.921154
Train Epoch: 2 [19200/50000 (38%)]	Loss: 1.911588
Train Epoch: 2 [25600/50000 (51%)]	Loss: 1.928235
Train Epoch: 2 [32000/50000 (64%)]	Loss: 1.866194
Train Epoch: 2 [38400/50000 (77%)]	Loss: 1.949905
Train Epoch: 2 [44800/50000 (90%)]	Loss: 1.803150

Test set: Average loss: 1.9075, Accuracy: 3068/10000 (31%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 1.989698
Train Epoch: 3 [6400/50000 (13%)]	Loss: 1.897873
Train Epoch: 3 [12800/50000 (26%)]	Loss: 1.903903
Train Epoch: 3 [19200/50000 (38%)]	Loss: 1.783972
Train Epoch: 3 [25600/50000 (51%)]	Loss: 1.855665
Train Epoch: 3 [32000/50000 (64%)]	Loss: 1.906661
Train Epoch: 3 [38400/50000 (77%)]	Loss: 1.879070
Train Epoch: 3 [44800/50000 (90%)]	Loss: 1.724755

Test set: Average loss: 1.8891, Accuracy: 3124/10000 (31%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 1.917282
Train Epoch: 4 [6400/50000 (13%)]	Loss: 1.730572
Train Epoch: 4 [12800/50000 (26%)]	Loss: 1.802114
Train Epoch: 4 [19200/50000 (38%)]	Loss: 1.686005
Train Epoch: 4 [25600/50000 (51%)]	Loss: 1.688759
Train Epoch: 4 [32000/50000 (64%)]	Loss: 1.652777
Train Epoch: 4 [38400/50000 (77%)]	Loss: 1.789474
Train Epoch: 4 [44800/50000 (90%)]	Loss: 1.635420

Test set: Average loss: 1.8157, Accuracy: 3519/10000 (35%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 1.753247
Train Epoch: 5 [6400/50000 (13%)]	Loss: 1.649330
Train Epoch: 5 [12800/50000 (26%)]	Loss: 1.787187
Train Epoch: 5 [19200/50000 (38%)]	Loss: 1.525436
Train Epoch: 5 [25600/50000 (51%)]	Loss: 1.653703
Train Epoch: 5 [32000/50000 (64%)]	Loss: 1.573170
Train Epoch: 5 [38400/50000 (77%)]	Loss: 1.471550
Train Epoch: 5 [44800/50000 (90%)]	Loss: 1.597702

Test set: Average loss: 1.6948, Accuracy: 3891/10000 (39%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 1.829852
Train Epoch: 6 [6400/50000 (13%)]	Loss: 1.541618
Train Epoch: 6 [12800/50000 (26%)]	Loss: 1.536928
Train Epoch: 6 [19200/50000 (38%)]	Loss: 1.272416
Train Epoch: 6 [25600/50000 (51%)]	Loss: 1.593219
Train Epoch: 6 [32000/50000 (64%)]	Loss: 1.861212
Train Epoch: 6 [38400/50000 (77%)]	Loss: 1.742154
Train Epoch: 6 [44800/50000 (90%)]	Loss: 1.622967

Test set: Average loss: 1.6574, Accuracy: 4063/10000 (41%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 1.500977
Train Epoch: 7 [6400/50000 (13%)]	Loss: 1.577944
Train Epoch: 7 [12800/50000 (26%)]	Loss: 1.596491
Train Epoch: 7 [19200/50000 (38%)]	Loss: 1.393886
Train Epoch: 7 [25600/50000 (51%)]	Loss: 1.566372
Train Epoch: 7 [32000/50000 (64%)]	Loss: 1.557666
Train Epoch: 7 [38400/50000 (77%)]	Loss: 1.477905
Train Epoch: 7 [44800/50000 (90%)]	Loss: 1.642988

Test set: Average loss: 1.5630, Accuracy: 4326/10000 (43%)

Train Epoch: 8 [0/50000 (0%)]	Loss: 1.722551
Train Epoch: 8 [6400/50000 (13%)]	Loss: 1.660142
Train Epoch: 8 [12800/50000 (26%)]	Loss: 1.422044
Train Epoch: 8 [19200/50000 (38%)]	Loss: 1.511889
Train Epoch: 8 [25600/50000 (51%)]	Loss: 1.567470
Train Epoch: 8 [32000/50000 (64%)]	Loss: 1.655049
Train Epoch: 8 [38400/50000 (77%)]	Loss: 1.296456
Train Epoch: 8 [44800/50000 (90%)]	Loss: 1.715541

Test set: Average loss: 1.5366, Accuracy: 4434/10000 (44%)

Train Epoch: 9 [0/50000 (0%)]	Loss: 1.430948
Train Epoch: 9 [6400/50000 (13%)]	Loss: 1.650957
Train Epoch: 9 [12800/50000 (26%)]	Loss: 1.558538
Train Epoch: 9 [19200/50000 (38%)]	Loss: 1.667261
Train Epoch: 9 [25600/50000 (51%)]	Loss: 1.551630
Train Epoch: 9 [32000/50000 (64%)]	Loss: 1.283138
Train Epoch: 9 [38400/50000 (77%)]	Loss: 1.368059
Train Epoch: 9 [44800/50000 (90%)]	Loss: 1.406550

Test set: Average loss: 1.5869, Accuracy: 4382/10000 (44%)

In [44]:
model_cnn_2.to('cpu') 

# fetch a batch of test images
image_batch, label_batch = next(iter(test_loader))
image_batch_scramble = image_batch.view(-1, 32*32*3)
image_batch_scramble = image_batch_scramble[:, fixed_perm]
image_batch_scramble = image_batch_scramble.view(-1, 3, 32, 32)
# Turn off gradients to speed up this part
with torch.no_grad():
    log_pred_prob_batch = model_cnn_2(image_batch_scramble)
for i in range(10):
    img = image_batch[i]
    img_perm = image_batch_scramble[i]
    real_label = label_batch[i].item()
    log_pred_prob = log_pred_prob_batch[i]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img_perm, pred_prob, real_label)