Based on https://github.com/karpathy/char-rnn
! wget "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt" -c -P {'data/'}
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# Load the data into memory
data_file = "./data/input.txt"
#data_file = "./data/sherlock.txt"
## Open the text file
data = open(data_file, 'r').read(20000) ## Read only ~20KB of data; full data takes long time in training
chars = sorted(list(set(data)))
## NOTE: vocab_size is a hyperparameter of our models
data_size, vocab_size = len(data), len(chars)
print("Data has {} characters, {} unique".format(data_size, vocab_size))
## char to index and index to char maps
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
## convert data from chars to indices
data = list(data)
for i, ch in enumerate(data):
data[i] = char_to_ix[ch]
## data tensor on device
data = torch.tensor(data).to(device)
data = torch.unsqueeze(data, dim=1)
class myRNN(nn.Module):
def __init__(self, input_size, output_size, hidden_size=512, num_layers=3, do_dropout=False):
super(myRNN, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.do_dropout = do_dropout
self.dropout = nn.Dropout(0.5)
self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
self.decoder = nn.Linear(hidden_size, output_size)
self.hidden_state = None # the hidden state of the RNN
def forward(self, input_seq):
x = nn.functional.one_hot(input_seq, self.input_size).float()
if self.do_dropout:
x = self.dropout(x)
x, new_hidden_state = self.rnn(x, self.hidden_state)
output = self.decoder(x)
# save the hidden state for the next batch; detach removes extra datastructures for backprop etc.
self.hidden_state = new_hidden_state.detach()
return output
def save_model(self, path):
torch.save(self.state_dict(), path)
def load_model(self, path):
try:
self.load_state_dict(torch.load(path))
except Exception as err:
print("Error loading model from file", path)
print(err)
print("Initializing model weights to default")
self.__init__(self.input_size, self.output_size, self.hidden_size, self.num_layers)
class myLSTM(nn.Module):
def __init__(self, input_size, output_size, hidden_size=512, num_layers=3, do_dropout=False):
super(myLSTM, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.do_dropout = do_dropout
self.dropout = nn.Dropout(0.5)
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
self.decoder = nn.Linear(hidden_size, output_size)
self.internal_state = None # the internal state of the LTSM,
# consists of the short term memory or hidden state and
# the long term memory or cell state
def forward(self, input_seq):
x = nn.functional.one_hot(input_seq, self.input_size).float()
if self.do_dropout:
x = self.dropout(x)
x, new_internal_state = self.lstm(x, self.internal_state)
output = self.decoder(x)
self.internal_state = (new_internal_state[0].detach(), new_internal_state[1].detach())
return output
def save_model(self, path):
torch.save(self.state_dict(), path)
def load_model(self, path):
try:
self.load_state_dict(torch.load(path))
except Exception as err:
print("Error loading model from file", path)
print(err)
print("Initializing model weights to default")
self.__init__(self.input_size, self.output_size, self.hidden_size, self.num_layers)
# function to count number of parameters
def get_n_params(model):
np=0
for p in list(model.parameters()):
np += p.nelement()
return np
def train(rnn_model, epoch, seq_len = 200):
# seq_length is length of training data sequence
rnn_model.train()
# loss function
loss_fn = nn.CrossEntropyLoss()
test_output_len = 200 # total num of characters in output test sequence
## random starting point in [0,seq_len-1] to partition data into chunks of length seq_len
## This is Truncated Backpropogation Through Time
data_ptr = np.random.randint(seq_len)
running_loss = 0
n = 0;
if epoch % 10 == 0 or epoch == 1 or epoch == 2 or epoch == 3:
print("\n\n\n\nStart of Epoch: {0}".format(epoch))
while True:
input_seq = data[data_ptr : data_ptr+seq_len]
target_seq = data[data_ptr+1 : data_ptr+seq_len+1]
input_seq.to(device)
target_seq.to(device)
optimizer.zero_grad()
output = rnn_model(input_seq)
loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq))
loss.backward()
optimizer.step()
running_loss += loss.item()
# update the data pointer
data_ptr += seq_len
# if at end of data then stop
if data_ptr + seq_len + 1 > data_size:
break
n = n+1
# print loss and a sample of generated text periodically
if epoch % 10 == 0 or epoch == 1 or epoch == 2 or epoch == 3:
# sample / generate a text sequence after every epoch
rnn_model.eval()
data_ptr = 0
# random character from data to begin
rand_index = np.random.randint(data_size-1)
input_seq = data[rand_index : rand_index+1]
test_output = ""
while True:
# forward pass
output = rnn_model(input_seq)
# construct categorical distribution and sample a character
output = F.softmax(torch.squeeze(output), dim=0)
#output.to("cpu")
dist = Categorical(output)
index = dist.sample().item()
# append the sampled character to test_output
test_output += ix_to_char[index]
# next input is current output
input_seq[0][0] = index
data_ptr += 1
if data_ptr > test_output_len:
break
print("TRAIN Sample")
print(test_output)
print("End of Epoch: {0} \t Loss: {1:.8f}".format(epoch, running_loss / n))
return running_loss / n
def test(rnn_model, output_len=1000):
rnn_model.eval()
# initialize variables
data_ptr = 0
hidden_state = None
# randomly select an initial string from the data of 10 characters
rand_index = np.random.randint(data_size - 11)
input_seq = data[rand_index : rand_index + 9]
# compute last hidden state of the sequence
output = rnn_model(input_seq)
# next element is the input to rnn
input_seq = data[rand_index + 9 : rand_index + 10]
# generate remaining sequence
# NOTE: We generate one character at a time
test_output=""
while True:
# forward pass
output = rnn_model(input_seq)
# construct categorical distribution and sample a character
output = F.softmax(torch.squeeze(output), dim=0)
dist = Categorical(output)
index = dist.sample().item()
# append the sampled character to test_output
test_output += ix_to_char[index]
# next input is current output
input_seq[0][0] = index
data_ptr += 1
if data_ptr > output_len:
break
print("\n\nTEST -------------------------------------------------")
print(test_output)
print("----------------------------------------")
hidden_size = 512 + 200 # size of hidden state
num_layers = 6 # num of layers in RNN layer stack
lr = 0.002 # learning rate
model_save_file = "./model_data.pth"
model_rnn = myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_rnn.parameters(), lr=lr)
best_model_rnn = myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_rnn_loss = 10000
for epoch in range(0, 101): # values from 1 to 100
#model_rnn.load_model(model_save_file)
epoch_loss = train(model_rnn, epoch)
if epoch_loss < best_rnn_loss:
best_rnn_loss = epoch_loss
best_model_rnn.load_state_dict(model_rnn.state_dict())
#if epoch % 10 == 0:
# model_rnn.save_model(model_save_file)
Some sample generated text
print("best loss", best_rnn_loss)
print("Model size", get_n_params(best_model_rnn))
test(best_model_rnn)
# smaller rnn
hidden_size = 512 # size of hidden state
num_layers = 3 # num of layers in RNN layer stack
lr = 0.002 # learning rate
model_rnn_3 = myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_rnn_3.parameters(), lr=lr)
best_model_rnn_3 = myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_rnn_loss_3 = 10000
for epoch in range(0, 101): # values from 1 to 100
#model_rnn.load_model(model_save_file)
epoch_loss = train(model_rnn_3, epoch)
if epoch_loss < best_rnn_loss_3:
best_rnn_loss_3 = epoch_loss
best_model_rnn_3.load_state_dict(model_rnn_3.state_dict())
#if epoch % 10 == 0:
# model_rnn.save_model(model_save_file)
print("best loss", best_rnn_loss_3)
print("Model size", get_n_params(best_model_rnn_3))
test(best_model_rnn_3)
Some sample generated text
hidden_size = 512 # size of hidden state
seq_len = 100 # length of LSTM sequence
num_layers = 3 # num of layers in LSTM layer stack
lr = 0.002 # learning rate
model_save_file = "./model_data.pth"
model_lstm = myLSTM(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=lr)
best_model_lstm = myLSTM(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_lstm_loss = 10000
for epoch in range(0, 101): # values from 0 to 100
#model_lstm.load_model(model_save_file)
epoch_loss = train(model_lstm, epoch)
if epoch_loss < best_lstm_loss:
best_lstm_loss = epoch_loss
best_model_lstm.load_state_dict(model_lstm.state_dict())
#if epoch % 10 == 0:
# model_lstm.save_model(model_save_file)
print("Model size", get_n_params(best_model_lstm), "best loss", best_lstm_loss)
test(best_model_lstm)
test(best_model_lstm)
test(best_model_lstm)
test(best_model_lstm)
This is probably because we used a small dataset (20KB)
from https://karpathy.github.io/2015/05/21/rnn-effectiveness/
Training a LSTM network for a few hours on 4.5MB Text of Shakespeare's wroks
PANDARUS:
Alas, I think he shall be come approached and the day
When little srain would be attain'd into being never fed,
And who is but a chain and subjects of his death,
I should not sleep.
Second Senator:
They are away this miseries, produced upon my soul,
Breaking and strongly should be buried, when I perish
The earth and thoughts of many states.
DUKE VINCENTIO:
Well, your wit is in the care of side and that.
Second Lord:
They would be ruled after this chamber, and
my fair nues begun out of the fact, to be conveyed,
Whose noble souls I'll have the heart of the wars.
Clown:
Come, sir, I will make did behold your worship.
VIOLA:
I'll drink it.
/*
* Increment the size file of the new incorrect UI_FILTER group information
* of the size generatively.
*/
static int indicate_policy(void)
{
int error;
if (fd == MARN_EPT) {
/*
* The kernel blank will coeld it to userspace.
*/
if (ss->segment < mem_total)
unblock_graph_and_set_blocked();
else
ret = 1;
goto bail;
}
segaddr = in_SB(in.addr);
selector = seg / 16;
setup_works = true;
for (i = 0; i < blocks; i++) {
seq = buf[i++];
bpf = bd->bd.next + i * search;
if (fd) {
current = blocked;
}
}
rw->name = "Getjbbregs";
bprm_self_clearl(&iv->version);
regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12;
return segtable;
}
Note: some syntax errors ware corrected in the generated text and then complied