AML# AML: 05 Adversarial Example for CIFAR-10

Data and Libraries¶

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, utils
import matplotlib.pyplot as plt
import numpy, random


# set the PseudoRandom Generator Seeds for better reproducibility
# see here for more: https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(99)
random.seed(99)
numpy.random.seed(99)

# this 'device' will be used for training our model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0

Load the CIFAR10 dataset¶

Observe that we set shuffle=True, which means that data is randomized

input_size  = 32*32*3   # images are 32x32 pixels with 3 channels
output_size = 10      # there are 10 classes

mean_cifar10 = [0.485, 0.456, 0.406]   
std_cifar10 = [0.229, 0.224, 0.225]

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=128, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=1000, shuffle=True)

classNames= ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

Files already downloaded and verified

# show some training images
def imshow(img, plot):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()   # convert from tensor
    plot.imshow(numpy.transpose(npimg, (1, 2, 0))) 
    

plt.figure(figsize=(16,4))

# fetch a batch of train images; RANDOM
image_batch, label_batch = next(iter(train_loader))
#imshow(torchvision.utils.make_grid(image_batch))
for i in range(20):
    image = image_batch[i]
    label = classNames[label_batch[i].item()]
    plt.subplot(2, 10, i + 1)
    #image, label = train_loader.dataset.__getitem__(i)
    #plt.imshow(image.squeeze().numpy())
    imshow(image, plt)
    plt.axis('off')
    plt.title(label)
plt.show()

CNN for CIFAR-10¶

Helper functions for training and testing¶

# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

accuracy_list = []
# we pass a model object to this trainer, and it trains this model for one epoch
def train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))
    return accuracy

Defining the Convolutional Neural Network¶

# FROM: https://github.com/boazbk/mltheoryseminar/blob/main/code/hw0/simple_train.ipynb
## 5-Layer CNN for CIFAR
## This is the Myrtle5 network by David Page (https://myrtle.ai/learn/how-to-train-your-resnet-4-architecture/)

class PrintShape(nn.Module):
    def forward(self, x): 
        print(x.shape)
        return x

class Flatten(nn.Module):
    def forward(self, x): return x.view(x.size(0), x.size(1))

class CNN(nn.Module):
    def __init__(self, input_size, output_size):
        super(CNN, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.C = 64
        self.network = nn.Sequential(
                # Layer 0
                nn.Conv2d(3, self.C, kernel_size=3, stride=1,
                          padding=1, bias=True),
                nn.BatchNorm2d(self.C),
                nn.ReLU(),

                # Layer 1
                nn.Conv2d(self.C, self.C*2, kernel_size=3,
                          stride=1, padding=1, bias=True),
                nn.BatchNorm2d(self.C*2),
                nn.ReLU(),
                nn.MaxPool2d(2),

                # Layer 2
                nn.Conv2d(self.C*2, self.C*4, kernel_size=3,
                          stride=1, padding=1, bias=True),
                nn.BatchNorm2d(self.C*4),
                nn.ReLU(),
                nn.MaxPool2d(2),

                # Layer 3
                nn.Conv2d(self.C*4, self.C*8, kernel_size=3,
                          stride=1, padding=1, bias=True),
                nn.BatchNorm2d(self.C*8),
                nn.ReLU(),
                nn.MaxPool2d(2),

                # Layer 4
                nn.MaxPool2d(4),
                Flatten(),
                nn.Linear(self.C*8, output_size, bias=True),
                nn.LogSoftmax(dim=1)
            )
    
    def forward(self, x):
        return self.network(x)

Train¶

print("Training on ", device)
model_cnn = CNN(input_size, output_size)
model_cnn.to(device)
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

for epoch in range(0, 10):
    train(epoch, model_cnn)
    test(model_cnn)

Training on  cuda:0
Number of parameters: 1558026
Train Epoch: 0 [0/50000 (0%)]	Loss: 3.306556
Train Epoch: 0 [6400/50000 (13%)]	Loss: 1.633615
Train Epoch: 0 [12800/50000 (26%)]	Loss: 1.668104
Train Epoch: 0 [19200/50000 (38%)]	Loss: 1.414279
Train Epoch: 0 [25600/50000 (51%)]	Loss: 1.193950
Train Epoch: 0 [32000/50000 (64%)]	Loss: 1.394766
Train Epoch: 0 [38400/50000 (77%)]	Loss: 0.997717
Train Epoch: 0 [44800/50000 (90%)]	Loss: 0.952614

Test set: Average loss: 1.5874, Accuracy: 4522/10000 (45%)

Train Epoch: 1 [0/50000 (0%)]	Loss: 1.604738
Train Epoch: 1 [6400/50000 (13%)]	Loss: 0.844859
Train Epoch: 1 [12800/50000 (26%)]	Loss: 1.423655
Train Epoch: 1 [19200/50000 (38%)]	Loss: 0.850720
Train Epoch: 1 [25600/50000 (51%)]	Loss: 0.758922
Train Epoch: 1 [32000/50000 (64%)]	Loss: 0.817945
Train Epoch: 1 [38400/50000 (77%)]	Loss: 0.783381
Train Epoch: 1 [44800/50000 (90%)]	Loss: 0.630755

Test set: Average loss: 0.8847, Accuracy: 6918/10000 (69%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 0.752373
Train Epoch: 2 [6400/50000 (13%)]	Loss: 0.731634
Train Epoch: 2 [12800/50000 (26%)]	Loss: 0.608046
Train Epoch: 2 [19200/50000 (38%)]	Loss: 0.598536
Train Epoch: 2 [25600/50000 (51%)]	Loss: 0.590004
Train Epoch: 2 [32000/50000 (64%)]	Loss: 0.554549
Train Epoch: 2 [38400/50000 (77%)]	Loss: 0.736073
Train Epoch: 2 [44800/50000 (90%)]	Loss: 0.630011

Test set: Average loss: 1.0483, Accuracy: 6613/10000 (66%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 0.672622
Train Epoch: 3 [6400/50000 (13%)]	Loss: 0.584814
Train Epoch: 3 [12800/50000 (26%)]	Loss: 0.349948
Train Epoch: 3 [19200/50000 (38%)]	Loss: 0.351503
Train Epoch: 3 [25600/50000 (51%)]	Loss: 0.642610
Train Epoch: 3 [32000/50000 (64%)]	Loss: 0.517426
Train Epoch: 3 [38400/50000 (77%)]	Loss: 0.593251
Train Epoch: 3 [44800/50000 (90%)]	Loss: 0.594903

Test set: Average loss: 0.8794, Accuracy: 6965/10000 (70%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 0.567474
Train Epoch: 4 [6400/50000 (13%)]	Loss: 0.420103
Train Epoch: 4 [12800/50000 (26%)]	Loss: 0.465384
Train Epoch: 4 [19200/50000 (38%)]	Loss: 0.202093
Train Epoch: 4 [25600/50000 (51%)]	Loss: 0.448628
Train Epoch: 4 [32000/50000 (64%)]	Loss: 0.275538
Train Epoch: 4 [38400/50000 (77%)]	Loss: 0.400846
Train Epoch: 4 [44800/50000 (90%)]	Loss: 0.431203

Test set: Average loss: 0.8723, Accuracy: 7364/10000 (74%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 0.303260
Train Epoch: 5 [6400/50000 (13%)]	Loss: 0.357428
Train Epoch: 5 [12800/50000 (26%)]	Loss: 0.258938
Train Epoch: 5 [19200/50000 (38%)]	Loss: 0.163297
Train Epoch: 5 [25600/50000 (51%)]	Loss: 0.336538
Train Epoch: 5 [32000/50000 (64%)]	Loss: 0.297979
Train Epoch: 5 [38400/50000 (77%)]	Loss: 0.388629
Train Epoch: 5 [44800/50000 (90%)]	Loss: 0.238212

Test set: Average loss: 0.8128, Accuracy: 7471/10000 (75%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 0.241707
Train Epoch: 6 [6400/50000 (13%)]	Loss: 0.095612
Train Epoch: 6 [12800/50000 (26%)]	Loss: 0.248210
Train Epoch: 6 [19200/50000 (38%)]	Loss: 0.290752
Train Epoch: 6 [25600/50000 (51%)]	Loss: 0.205558
Train Epoch: 6 [32000/50000 (64%)]	Loss: 0.186609
Train Epoch: 6 [38400/50000 (77%)]	Loss: 0.259217
Train Epoch: 6 [44800/50000 (90%)]	Loss: 0.238990

Test set: Average loss: 0.5931, Accuracy: 8123/10000 (81%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 0.105691
Train Epoch: 7 [6400/50000 (13%)]	Loss: 0.106844
Train Epoch: 7 [12800/50000 (26%)]	Loss: 0.116927
Train Epoch: 7 [19200/50000 (38%)]	Loss: 0.137428
Train Epoch: 7 [25600/50000 (51%)]	Loss: 0.071577
Train Epoch: 7 [32000/50000 (64%)]	Loss: 0.102095
Train Epoch: 7 [38400/50000 (77%)]	Loss: 0.079355
Train Epoch: 7 [44800/50000 (90%)]	Loss: 0.144805

Test set: Average loss: 0.5551, Accuracy: 8230/10000 (82%)

Train Epoch: 8 [0/50000 (0%)]	Loss: 0.089565
Train Epoch: 8 [6400/50000 (13%)]	Loss: 0.058174
Train Epoch: 8 [12800/50000 (26%)]	Loss: 0.030545
Train Epoch: 8 [19200/50000 (38%)]	Loss: 0.029895
Train Epoch: 8 [25600/50000 (51%)]	Loss: 0.076099
Train Epoch: 8 [32000/50000 (64%)]	Loss: 0.053621
Train Epoch: 8 [38400/50000 (77%)]	Loss: 0.036008
Train Epoch: 8 [44800/50000 (90%)]	Loss: 0.145845

Test set: Average loss: 0.8346, Accuracy: 7590/10000 (76%)

Train Epoch: 9 [0/50000 (0%)]	Loss: 0.069439
Train Epoch: 9 [6400/50000 (13%)]	Loss: 0.027135
Train Epoch: 9 [12800/50000 (26%)]	Loss: 0.020947
Train Epoch: 9 [19200/50000 (38%)]	Loss: 0.027904
Train Epoch: 9 [25600/50000 (51%)]	Loss: 0.029173
Train Epoch: 9 [32000/50000 (64%)]	Loss: 0.035999
Train Epoch: 9 [38400/50000 (77%)]	Loss: 0.032374
Train Epoch: 9 [44800/50000 (90%)]	Loss: 0.024280

Test set: Average loss: 0.5405, Accuracy: 8359/10000 (84%)

Show some predictions of the test network¶

def visualize_pred(img, pred_prob, real_label):
    ''' Function for viewing an image and it's predicted classes.
    '''
    #pred_prob = pred_prob.data.numpy().squeeze()

    fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
    #ax1.imshow(img.numpy().squeeze())
    imshow(img, ax1)
    ax1.axis('off')
    pred_label = numpy.argmax(pred_prob)
    ax1.set_title([classNames[real_label], classNames[pred_label]])
    
    ax2.barh(numpy.arange(10), pred_prob)
    ax2.set_aspect(0.1)
    ax2.set_yticks(numpy.arange(10))
    ax2.set_yticklabels(classNames)
    ax2.set_title('Prediction Probability')
    ax2.set_xlim(0, 1.1)
    plt.tight_layout()

model_cnn.to('cpu') 

# make a batch of predictions
for i in range(10):
    # fetch a batch of test images and predict; note batch contains only 1 image
    image_batch, label_batch = next(iter(test_loader))
    with torch.no_grad():
        log_pred_prob_batch = model_cnn(image_batch)
    img = image_batch[0]
    real_label = label_batch[0].item()
    log_pred_prob = log_pred_prob_batch[0]
    # Output of the network are log-probabilities, need to take exponential for probabilities
    pred_prob = torch.exp(log_pred_prob).data.numpy().squeeze()
    visualize_pred(img, pred_prob, real_label)

Is the CNN really seeing like we do ?¶

Adversarial examples using Fast Gradient Sign Attack see: https://pytorch.org/tutorials/beginner/fgsm_tutorial.html#fast-gradient-sign-attack

epsilons = [0, 0.001, 0.002, 0.004, 0.008, 0.01, 0.02, 0.04, 0.08, 0.1, 0.2]
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image using the gradients
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [-1,1] range
    perturbed_image = torch.clamp(perturbed_image, -1, 1)
    # Return the perturbed image
    return perturbed_image

def fgsm_test( model, device, test_loader, epsilon ):
    # Accuracy counter
    correct = 0
    adv_examples = []
    
    model.to(device)
    model.eval()
    
    progress_count = 0;
    # Loop over all examples in test set, one by one (test_loader has batch_size=1)
    for data, target in test_loader:
        progress_count+= 1
        
        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True
        
        # Zero all existing gradients
        model.zero_grad()

        # Forward pass the data through the model
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

        # If the model is wrong, then this can't be an adversarial example, move on to the next example
        if init_pred.item() != target.item():
            continue
        
        # Calculate the loss
        loss = F.nll_loss(output, target)
        # Calculate gradients of model in backward pass
        loss.backward()
        # Collect gradients of the data
        data_grad = data.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(data, epsilon, data_grad)      
        perturbed_data = perturbed_data.to(device)
        
        # Apply the model to the perturbed image
        output = model(perturbed_data)
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        
        # check if the perturbation forces a misclassification
        if final_pred.item() != target.item():
            # Save some adv examples for visualization later
            if len(adv_examples) < 10:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                real_im = data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex, real_im) )
        else:
            correct+= 1
        
        if progress_count % 1000 == 0:
            print('FGSM Attack Iteration: {}'.format(progress_count * len(data)))
            #print(init_pred.cpu().item(), target.cpu().item())

    # Calculate final accuracy for this epsilon
    final_acc = correct/float(len(test_loader))
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples

accuracies = [] #list of accuracies
adv_examples = []

# Run test for each epsilon
for eps in epsilons:
    acc, ex = fgsm_test(model_cnn, device, test_loader, eps)
    accuracies.append(acc)
    adv_examples.append(ex)

FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0	Test Accuracy = 8169 / 10000 = 0.8169
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.001	Test Accuracy = 7672 / 10000 = 0.7672
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.002	Test Accuracy = 7103 / 10000 = 0.7103
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.004	Test Accuracy = 5902 / 10000 = 0.5902
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 9000
Epsilon: 0.008	Test Accuracy = 3849 / 10000 = 0.3849
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.01	Test Accuracy = 3069 / 10000 = 0.3069
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.02	Test Accuracy = 1037 / 10000 = 0.1037
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.04	Test Accuracy = 248 / 10000 = 0.0248
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 9000
FGSM Attack Iteration: 10000
Epsilon: 0.08	Test Accuracy = 144 / 10000 = 0.0144
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 9000
Epsilon: 0.1	Test Accuracy = 137 / 10000 = 0.0137
FGSM Attack Iteration: 1000
FGSM Attack Iteration: 2000
FGSM Attack Iteration: 3000
FGSM Attack Iteration: 4000
FGSM Attack Iteration: 5000
FGSM Attack Iteration: 6000
FGSM Attack Iteration: 7000
FGSM Attack Iteration: 8000
FGSM Attack Iteration: 10000
Epsilon: 0.2	Test Accuracy = 308 / 10000 = 0.0308

plt.figure(figsize=(5,5))
plt.plot(epsilons, accuracies, "*-")
plt.yticks(numpy.arange(0, 1.1, step=0.1))
plt.xticks(numpy.arange(0, .1, step=0.05))
plt.title("Accuracy vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy")
plt.show()

# Plot several examples of adversarial samples at each epsilon
def imshow2(img, plot):
    img = img / 2 + 0.5 # unnormalize
    #npimg = img.numpy()   # convert from tensor
    plot.imshow(numpy.transpose(img, (1, 2, 0)))

plt.figure(figsize=(8,20))
for i in range(len(epsilons)):
    eps = epsilons[i]
    for j in range(len(adv_examples[i])):      
        orig,adv,ex,img = adv_examples[i][j]
        noise = ex - img
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(figsize=(6,9), ncols=4)
        
        imshow2(img, ax1)
        ax1.axis('off')
        ax1.set_title(classNames[orig])

        imshow2(ex, ax2)
        ax2.axis('off')
        ax2.set_title(classNames[adv])
        
        imshow2(noise+1, ax3)
        ax3.axis('off')
        ax3.set_title("noise * {}".format(eps))
        
        imshow2(noise/eps, ax4)
        ax4.axis('off')
        ax4.set_title("noise")
        
        plt.tight_layout()

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

<Figure size 576x1440 with 0 Axes>