based on https://github.com/Atcold/pytorch-Deep-Learning/blob/master/10-autoencoder.ipynb
# Import some libraries
import torch
import torchvision
from torch import nn
from torchvision import transforms, datasets
from matplotlib import pyplot as plt
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# Convert vector to image
def to_img(x):
x = 0.5 * (x + 1)
x = x.view(x.size(0), 28, 28)
return x
# Display images, n=max number of images to show
def display_images(in_raw, out_raw, n=1):
out_raw = out_raw[:n]
if in_raw is not None:
in_raw = in_raw[:n]
in_pic = to_img(in_raw.cpu().data)
plt.figure(figsize=(18, 6))
for i in range(n):
plt.subplot(1,n,i+1)
plt.imshow(in_pic[i])
plt.axis('off')
out_pic = to_img(out_raw.cpu().data)
plt.figure(figsize=(18, 6))
for i in range(n):
plt.subplot(1,n,i+1)
plt.imshow(out_pic[i])
plt.axis('off')
plt.show()
# Define data loading step
img_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
#dataset = MNIST('./data', transform=img_transform, download=True)
#dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True, transform=img_transform),
batch_size=256, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=img_transform),
batch_size=32, shuffle=True)
# Define model architecture
class Autoencoder(nn.Module):
def __init__(self, input_size, hidden_size=50, code_size=10):
super().__init__()
self.hidden_layer_size = hidden_size
self.code_size = code_size
self.encoder = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, code_size),
#nn.Linear(input_size, code_size),
)
self.decoder = nn.Sequential(
nn.Linear(code_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, input_size),
#nn.Linear(code_size, input_size),
nn.Tanh()
)
def forward(self, x):
h = self.encoder(x)
x = self.decoder(h)
return x, h
def generate(self, N=10):
z = torch.randn((N, self.code_size)).to(device)
gen_img = self.decoder(z)
return gen_img
# define the model
n = 784 # input size: 28*28
h = 20 # size of hidden_layer
l = 20 # latent space dimension
model_1 = Autoencoder(n, h, l).to(device)
# Configure the optimiser
learning_rate = 1e-3
optimizer = torch.optim.Adam(
model_1.parameters(),
lr=learning_rate,
)
# loss function
loss_fn = nn.MSELoss()
# Train
model_1.train()
num_epochs = 20
for epoch in range(num_epochs):
for data in train_loader:
img, _ = data
img = img.to(device)
img = img.view(img.size(0), -1)
# forward step
output, h = model_1(img)
loss = loss_fn(output, img.data)
# backward step
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print some statistics
if epoch == 0 or (epoch+1) % 5 == 0:
print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')
display_images(None, output, 4)
# Test
model_1.eval()
img, _ = next(iter(test_loader))
img = img.to(device)
img = img.view(img.size(0), -1)
output, h = model_1(img)
loss = loss_fn(output, img.data)
# ===================log========================
print(f'loss:{loss.item():.4f}')
display_images(img, output, 10)
# Generating a few images from randomly sampled z
N=5
display_images(None, model_1.generate(N), N)
This uses the same model and loss function as before, but the training process adds noise to the input. The noise is generated using a normal distribution.
# define the model
n = 784 # 28*28 input size
h = 40 # hidden layer size; larger for denoising AE
l = 20 # latent space dimension
model_2 = Autoencoder(n, h, l).to(device)
# Configure the optimiser
learning_rate = 1e-3
optimizer = torch.optim.Adam(
model_2.parameters(),
lr=learning_rate,
)
# loss function
loss_fn = nn.MSELoss()
# Train
model_2.train()
num_epochs = 20
do = nn.Dropout(p=0.5) # we use a dropout layer to apply random noise
for epoch in range(num_epochs):
for data in train_loader:
img, _ = data
img = img.to(device)
img = img.view(img.size(0), -1)
#noise = torch.normal(0.0,1.0,size=img.shape).to(device)
noise = do(torch.ones(img.shape)).to(device) # random noise applied to image
img_bad = (img * noise).to(device)
# ===================forward=====================
output, h = model_2(img_bad)
loss = loss_fn(output, img.data) # the reconstruction loss is still wrt the normal image
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print some statistics
if epoch == 0 or (epoch+1) % 5 == 0:
print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')
display_images(img_bad, output, 4)
# Test
model_2.eval()
img, _ = next(iter(test_loader))
img = img.to(device)
img = img.view(img.size(0), -1)
#noise = torch.normal(0.0,1.0,size=img.shape).to(device)
do = nn.Dropout(p=0.5)
noise = do(torch.ones(img.shape)).to(device)
img_bad = ((img * noise)/2).to(device)
output, h = model_2(img_bad)
loss = loss_fn(output, img.data)
# ===================log========================
print(f'loss:{loss.item():.4f}')
display_images(img, img_bad, 10)
display_images(None, output, 10)
# Generating a few images from randomly sampled z
N=5
display_images(None, model_2.generate(N), N)
both are fed noisy inputs pulled from the test set
# Test
model_1.eval() # simple AE
model_2.eval() # denoising AW
img, _ = next(iter(test_loader))
img = img.to(device)
img = img.view(img.size(0), -1)
#noise = torch.normal(0.0,0.5,size=img.shape).to(device)
do = nn.Dropout(p=0.5)
noise = do(torch.ones(img.shape)).to(device)
img_bad = (img * noise).to(device)
output_1, h = model_1(img_bad)
output_2, h = model_2(img_bad)
print("Noisy Images")
display_images(None, img_bad, 10)
print("Reconstruction using simple autoencoder")
display_images(None, output_1, 10)
print("Reconstruction using denoising autoencoder")
display_images(None, output_2, 10)