AML 07 Autoencoders and variants¶

based on https://github.com/Atcold/pytorch-Deep-Learning/blob/master/10-autoencoder.ipynb

# Import some libraries

import torch
import torchvision
from torch import nn
from torchvision import transforms, datasets
from matplotlib import pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0

Utility functions¶

# Convert vector to image
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.view(x.size(0), 28, 28)
    return x


# Display images, n=max number of images to show
def display_images(in_raw, out_raw, n=1):
    out_raw = out_raw[:n]
    if in_raw is not None:
        in_raw = in_raw[:n]
        in_pic = to_img(in_raw.cpu().data)
        plt.figure(figsize=(18, 6))
        for i in range(n):
            plt.subplot(1,n,i+1)
            plt.imshow(in_pic[i])
            plt.axis('off')
    out_pic = to_img(out_raw.cpu().data)
    plt.figure(figsize=(18, 6))
    for i in range(n):   
        plt.subplot(1,n,i+1)
        plt.imshow(out_pic[i])
        plt.axis('off')
    plt.show()

Data loader functions¶

# Define data loading step

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

#dataset = MNIST('./data', transform=img_transform, download=True)
#dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, transform=img_transform),
    batch_size=256, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=img_transform),
    batch_size=32, shuffle=True)

Simple autoencoder¶

# Define model architecture
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size=50, code_size=10):
        super().__init__()
        self.hidden_layer_size = hidden_size
        self.code_size = code_size
        
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, code_size),
            #nn.Linear(input_size, code_size),
        )
        self.decoder = nn.Sequential(
            nn.Linear(code_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size),
            #nn.Linear(code_size, input_size),
            nn.Tanh()
        )

    def forward(self, x):
        h = self.encoder(x)
        x = self.decoder(h)
        return x, h
    
    def generate(self, N=10):
        z = torch.randn((N, self.code_size)).to(device)
        gen_img = self.decoder(z)
        return gen_img

Training the Autoencoder¶

# define the model
n = 784 # input size: 28*28
h = 20 # size of hidden_layer
l = 20 # latent space dimension

model_1 = Autoencoder(n, h, l).to(device)

# Configure the optimiser
learning_rate = 1e-3
optimizer = torch.optim.Adam(
    model_1.parameters(),
    lr=learning_rate,
)

# loss function 
loss_fn = nn.MSELoss()

# Train 
model_1.train()
num_epochs = 20
for epoch in range(num_epochs):
    for data in train_loader:
        img, _ = data
        img = img.to(device)
        img = img.view(img.size(0), -1)
        # forward step
        output, h = model_1(img) 
        loss = loss_fn(output, img.data)
        # backward step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # print some statistics
    if epoch == 0 or (epoch+1) % 5 == 0:
        print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')
        display_images(None, output, 4)

epoch [1/20], loss:0.2286

epoch [5/20], loss:0.1615

epoch [10/20], loss:0.1488

epoch [15/20], loss:0.1551

epoch [20/20], loss:0.1537

Tryout some examples from the Test Set¶

# Test
model_1.eval()
img, _ = next(iter(test_loader))
img = img.to(device)
img = img.view(img.size(0), -1)
output, h = model_1(img)  
loss = loss_fn(output, img.data)
# ===================log========================
print(f'loss:{loss.item():.4f}')
display_images(img, output, 10)

loss:0.1349

# Generating a few images from randomly sampled z
N=5
display_images(None, model_1.generate(N), N)

Denoising Autoencoder¶

This uses the same model and loss function as before, but the training process adds noise to the input. The noise is generated using a normal distribution.

# define the model
n = 784 # 28*28 input size
h = 40 # hidden layer size; larger for denoising AE
l = 20 # latent space dimension

model_2 = Autoencoder(n, h, l).to(device)

# Configure the optimiser
learning_rate = 1e-3
optimizer = torch.optim.Adam(
    model_2.parameters(),
    lr=learning_rate,
)

# loss function 
loss_fn = nn.MSELoss()

# Train 
model_2.train()
num_epochs = 20
do = nn.Dropout(p=0.5) # we use a dropout layer to apply random noise
for epoch in range(num_epochs):
    for data in train_loader:
        img, _ = data
        img = img.to(device)
        img = img.view(img.size(0), -1) 
        #noise = torch.normal(0.0,1.0,size=img.shape).to(device)
        noise = do(torch.ones(img.shape)).to(device) # random noise applied to image
        img_bad = (img * noise).to(device) 
        # ===================forward=====================
        output, h = model_2(img_bad)  
        loss = loss_fn(output, img.data) # the reconstruction loss is still wrt the normal image
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # print some statistics
    if epoch == 0 or (epoch+1) % 5 == 0:
        print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')
        display_images(img_bad, output, 4)

epoch [1/20], loss:0.1820

epoch [5/20], loss:0.1165

epoch [10/20], loss:0.1095

epoch [15/20], loss:0.0942

epoch [20/20], loss:0.0949

Test images¶

# Test
model_2.eval()
img, _ = next(iter(test_loader))
img = img.to(device)
img = img.view(img.size(0), -1)
#noise = torch.normal(0.0,1.0,size=img.shape).to(device)
do = nn.Dropout(p=0.5)
noise = do(torch.ones(img.shape)).to(device)
img_bad = ((img * noise)/2).to(device) 
output, h = model_2(img_bad)  
loss = loss_fn(output, img.data)
# ===================log========================
print(f'loss:{loss.item():.4f}')
display_images(img, img_bad, 10)
display_images(None, output, 10)

loss:0.1257

# Generating a few images from randomly sampled z
N=5
display_images(None, model_2.generate(N), N)

Compare Simple Autoencoder with Denoising Autoencoder¶

both are fed noisy inputs pulled from the test set

# Test
model_1.eval() # simple AE
model_2.eval() # denoising AW
img, _ = next(iter(test_loader))
img = img.to(device)
img = img.view(img.size(0), -1)

#noise = torch.normal(0.0,0.5,size=img.shape).to(device)
do = nn.Dropout(p=0.5)
noise = do(torch.ones(img.shape)).to(device)


img_bad = (img * noise).to(device) 
output_1, h = model_1(img_bad)
output_2, h = model_2(img_bad)  

print("Noisy Images")
display_images(None, img_bad, 10)
print("Reconstruction using simple autoencoder")
display_images(None, output_1, 10)
print("Reconstruction using denoising autoencoder")
display_images(None, output_2, 10)

Noisy Images

Reconstruction using simple autoencoder

Reconstruction using denoising autoencoder