Generate realistic looking images of celebrity faces
Based on: and
At first we need to import the libraries. It is considered as standard imports.
import torch
import torch.nn as nn
import torch.nn.functional as F
from import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid
from PIL import Image
from IPython.display import display
import cv2
import glob
import os
import random
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
We will be using this function mostly everywhere to run our experiments deterministically. Random functions of Numpy and Pandas will behave deterministically after this. To learn more about Deterministic Neural Networks please check out this notebook
def seed_everything(seed=1234):
os.environ['PYTHONHASHSEED'] = str(seed)
torch.backends.cudnn.deterministic = True
Dataloader provides an iterable over the specified dataset by combining a dataset with a sampler.
# Let's define the path
data_dir = '../input/celeba-dataset/img_align_celeba/'
# defining the image transform rules
The augmentions are as follows,
1. Resizing the image to 32x32 as the smaller the image the faster we can train
2. Cropping from center with 32x32
3. Chagning type to tensor
we won't be using normalize here as we will have to do that manually later for tanh activation function
transform_img = transforms.Compose([
def get_dataloader(batch_size, data_dir, transforms):
Batch the neural network data using DataLoader
:param batch_size: The size of each batch; the number of images in a batch
:param data_dir: Directory where image data is located
:param transforms: data augmentations
:return: DataLoader with batched data
ds = datasets.ImageFolder(data_dir, transform_img)
data_loader =, batch_size=batch_size,num_workers= 4, shuffle=True)
return data_loader
batch_size = 32 # Instead of individual samples, the data loader produces batched samples of given number
train_loader = get_dataloader(batch_size,data_dir, transform_img)
# Let's see if the train loader is working and sending us iterable images :)
# Also note that we converted images to tensor above with ToTensor(). Now we need to convert back to numpy to plot them
# obtain one batch of training images which means 32 images
dataiter = iter(train_loader)
img, _ = # _ for labels, Dataloader sends labels automatically as defined in Dataloader class.
#However we don't need labels neither we assigned one :p so we put _
# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(30, 7))
plot_size=30 # gonna plot 30 images only
for idx in np.arange(plot_size):
ax = fig.add_subplot(3, plot_size/3, idx+1, xticks=[], yticks=[])
npimg = img[idx].numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
Now we need to scale our images as the output of a tanh activated generator will contain pixel values in a range from -1 to 1. So, we need to rescale our training images to a range of -1 to 1.
def scale_images(x, max = 1.00 , min = -1.00):
x = x * (max - min) + min
return x
#let's check the scaling
img = img[5]
print('Before scaling min: ', img.min())
print('Before scaling max: ', img.max())
scaled_img = scale_images(img)
print('After Scaling Min: ', scaled_img.min())
print('After Scaling Max: ', scaled_img.max())
now our min and max are in range of [-1,1]
The discriminator is a classifier that detects if the input samples are genuine or fabricated. As a result, the discriminator in a GAN is essentially a classifier. It attempts to discern between genuine data and data generated by the generator. It could utilize any network architecture suitable for the type of data it's classifying. Here I will be using convolutional classifier, only without any maxpooling layers. It is recommended to employ a deep network with normalization to deal with this kind of difficult data.
The inputs to the discriminator are 32x32x3 tensor images
The output would be a single value that will indicate whether a given image is real or fake
def conv(in_channels, out_channels, kernel_size=4, stride=2, padding=1, batch_norm=True, bias = False):
layers = []
conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=kernel_size, stride=stride, padding=padding)
#appending convolutional layer
#appending batch norm layer
if batch_norm:
return nn.Sequential(*layers)
class Discriminator(nn.Module):
def __init__(self, conv_dim):
Initializing the Discriminator Module
:param conv_dim: The depth of the first convolutional layer based on which we will create the next ones where next layer depth = 2 * previous layer depth
super(Discriminator, self).__init__()
# complete init function
self.conv_dim = conv_dim
self.conv1 = conv(3, conv_dim, batch_norm=False)
self.conv2 = conv(conv_dim, conv_dim*2)
self.conv3 = conv(conv_dim*2, conv_dim*4)
self.conv4 = conv(conv_dim*4, conv_dim*8)
self.fc = nn.Linear(conv_dim*4*4*2, 1)
def forward(self, x):
Forward propagation of the neural network
:param x: The input to the neural network
:return: Discriminator logits; the output of the neural network
# define feedforward behavior
x = F.leaky_relu(self.conv1(x), 0.2)
x = F.leaky_relu(self.conv2(x), 0.2)
x = F.leaky_relu(self.conv3(x), 0.2)
x = F.leaky_relu(self.conv4(x), 0.2)
x = x.view(-1, self.conv_dim*4*2*4)
x = self.fc(x)
return x
The name,itself is self explanatory. The generator component of a GAN learns to generate fake data by incorporating discriminator feedback. It learns to manipulate the discriminator so that its output is classified as real. The generator should upsample an input and create a new image with the same dimensions as our training data (28x28x3). This should mostly consist of transpose convolutional layers with normalzed output.
def deconv(in_channels, out_channels, kernel_size=4, stride=2, padding=1, batch_norm=True, bias = False):
layers = []
# append transpose conv layer -- we are not using bias terms in conv layers
layers.append(nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding))
# optional batch norm layer
if batch_norm:
return nn.Sequential(*layers)
class Generator(nn.Module):
def __init__(self, z_size, conv_dim):
Initialize the Generator Module
:param z_size: The length of the input latent vector, z
:param conv_dim: The depth of the inputs to the *last* transpose convolutional layer
super(Generator, self).__init__()
self.conv_dim = conv_dim
self.fc = nn.Linear(z_size, conv_dim*4*4*4)
# complete init function
self.de_conv1 = deconv(conv_dim*4, conv_dim*2)
self.de_conv2 = deconv(conv_dim*2, conv_dim)
self.de_conv3 = deconv(conv_dim, 3, 4, batch_norm=False )
self.dropout = nn.Dropout(0.3)
def forward(self, x):
Forward propagation of the neural network
:param x: The input to the neural network
:return: A 32x32x3 Tensor image as output
# define feedforward behavior
x = self.fc(x)
x = self.dropout(x)
x = x.view(-1, self.conv_dim*4, 4, 4)
x = F.relu(self.de_conv1(x))
x = F.relu(self.de_conv2(x))
x = self.de_conv3(x)
x = F.tanh(x)
return x
To help the models converge, we should initialize the weights of the convolutional and linear layers in your model. From the original DCGAN paper, they say:
All weights were initialized from a zero-centered Normal distribution with standard deviation 0.02.
#Initializing the weights to a normal distribution, centered around 0, with a standard deviation of 0.02.
def weights_init_normal(m):
:param m: A module or layer in a network
# like `Conv`, `BatchNorm2d`, `Linear`, etc.
classname = m.__class__.__name__
# initial weights to convolutional and linear layers
if (classname.find('Conv') != -1 or classname.find('Linear') != -1):
nn.init.normal(, 0.0, 0.2)
if hasattr(m, 'bias') and m.bias is not None:
nn.init.constant(, 0.0)
# model hyperparameters
d_conv_dim = 64
g_conv_dim = 128
z_size = 100
# building discriminator and generator from the classes defined above
discriminator = Discriminator(d_conv_dim)
generator = Generator(z_size=z_size, conv_dim=g_conv_dim)
# initialize model weights
# let's look at our discriminator model
# let's look at our generator model
use_gpu = torch.cuda.is_available()
first, let's define essentials to train the model.
lr = 0.0002 #learning rate
# optimizers for the discriminator D and generator G
discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr, (beta1, beta2)) # for discriminator
generator_optimizer = torch.optim.Adam(generator.parameters(), lr, (beta1, beta2)) # for generator
The generator's goal is to get the discriminator to think its generated images are real.
def real_loss(D_out, smooth = False):
'''Calculates how close discriminator outputs are to being real.
param, D_out: discriminator logits
return: real loss'''
batch_size = D_out.size(0)
if smooth:
labels = torch.ones(batch_size)*0.9
labels = torch.ones(batch_size)
if use_gpu:
labels = labels.cuda()
criterion = nn.BCEWithLogitsLoss()
loss = criterion(D_out.squeeze(), labels)
return loss
def fake_loss(D_out):
'''Calculates how close discriminator outputs are to being fake.
param, D_out: discriminator logits
return: fake loss'''
batch_size = D_out.size(0)
labels = torch.zeros(batch_size)
if use_gpu:
labels = labels.cuda()
criterion = nn.BCEWithLogitsLoss()
loss = criterion(D_out.squeeze(), labels)
return loss
This train function was provided in the udacity deep learning nanodegree course. It's great.
Training the discriminator by alternating on real and fake images. Then the generator, which tries to trick the discriminator and should have an opposing loss function.
def train(D, G, n_epochs, train_on_gpu, print_every=50):
'''Trains adversarial networks for some number of epochs
param, D: the discriminator network
param, G: the generator network
param, n_epochs: number of epochs to train for
param, print_every: when to print and record the models' losses
return: D and G losses'''
# move models to GPU
if train_on_gpu:
# keep track of loss and generated, "fake" samples
samples = []
losses = []
# Get some fixed data for sampling. These are images that are held
# constant throughout training, and allow us to inspect the model's performance
fixed_z = np.random.uniform(-1, 1, size=(sample_size, z_size))
fixed_z = torch.from_numpy(fixed_z).float()
# move z to GPU if available
if train_on_gpu:
fixed_z = fixed_z.cuda()
# epoch training loop
for epoch in range(n_epochs):
# batch training loop
for batch_i, (real_images, _) in enumerate(train_loader):
batch_size = real_images.size(0)
real_images = scale_images(real_images)
# Train the discriminator on real and fake images
if train_on_gpu:
real_images = real_images.cuda()
D_real = D(real_images)
d_real_loss = real_loss(D_real)
z = np.random.uniform(-1, 1, size=(batch_size, z_size))
z = torch.from_numpy(z).float()
if train_on_gpu:
z = z.cuda()
fake_images = G(z)
D_fake = D(fake_images)
d_fake_loss = fake_loss(D_fake)
d_loss = d_real_loss + d_fake_loss
# 2. Train the generator with an adversarial loss
z = np.random.uniform(-1, 1, size=(batch_size, z_size))
z = torch.from_numpy(z).float()
if train_on_gpu:
z = z.cuda()
fake_images = G(z)
D_fake = D(fake_images)
g_loss = real_loss(D_fake)
# Print some loss stats
if batch_i % print_every == 0:
# append discriminator loss and generator loss
losses.append((d_loss.item(), g_loss.item()))
# print discriminator and generator loss
print('Epoch [{:5d}/{:5d}] | d_loss: {:6.4f} | g_loss: {:6.4f}'.format(
epoch+1, n_epochs, d_loss.item(), g_loss.item()))
# this code assumes your generator is named G, feel free to change the name
# generate and save sample, fake images
G.eval() # for generating samples
samples_z = G(fixed_z)
G.train() # back to training mode
# Save training generator samples
with open('train_samples.pkl', 'wb') as f:
pkl.dump(samples, f)
# finally return losses
return losses
n_epochs = 10
losses = train(discriminator, generator, n_epochs=n_epochs, train_on_gpu = use_gpu)
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Discriminator', alpha=0.5)
plt.plot(losses.T[1], label='Generator', alpha=0.5)
plt.title("Training Losses")
def view_samples(epoch, samples):
fig, axes = plt.subplots(figsize=(16,4), nrows=2, ncols=8, sharey=True, sharex=True)
for ax, img in zip(axes.flatten(), samples[epoch]):
img = img.detach().cpu().numpy()
img = np.transpose(img, (1, 2, 0))
img = ((img + 1)*255 / (2)).astype(np.uint8)
im = ax.imshow(img.reshape((32,32,3)))
with open('train_samples.pkl', 'rb') as f:
samples = pkl.load(f)
_ = view_samples(-1, samples)