# AML: 06 RNNs and LSTMs for Text Generation
Based on https://github.com/karpathy/char-rnn

## Download Data

In [1]:
! wget "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt" -c -P {'data/'}

--2022-09-13 06:06:10--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘data/input.txt’


2022-09-13 06:06:11 (199 MB/s) - ‘data/input.txt’ saved [1115394/1115394]



## Libraries etc.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Load the data

In [3]:
# Load the data into memory
data_file = "./data/input.txt" 
#data_file = "./data/sherlock.txt" 

## Open the text file
data = open(data_file, 'r').read(20000) ## Read only ~20KB of data; full data takes long time in training
chars = sorted(list(set(data))) 
## NOTE: vocab_size is a hyperparameter of our models
data_size, vocab_size = len(data), len(chars) 

print("Data has {} characters, {} unique".format(data_size, vocab_size))

## char to index and index to char maps
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

## convert data from chars to indices
data = list(data)
for i, ch in enumerate(data):
    data[i] = char_to_ix[ch]

## data tensor on device
data = torch.tensor(data).to(device)
data = torch.unsqueeze(data, dim=1)


Data has 20000 characters, 58 unique


## The RNN and LSTM models

In [4]:
class myRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size=512, num_layers=3, do_dropout=False):
        super(myRNN, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.do_dropout = do_dropout
        
        self.dropout = nn.Dropout(0.5)
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
        self.hidden_state = None # the hidden state of the RNN
    
    def forward(self, input_seq):
        x = nn.functional.one_hot(input_seq, self.input_size).float()
        if self.do_dropout:
            x = self.dropout(x)
        x, new_hidden_state = self.rnn(x, self.hidden_state)
        output = self.decoder(x)
        # save the hidden state for the next batch; detach removes extra datastructures for backprop etc.
        self.hidden_state = new_hidden_state.detach() 
        return output
    
    def save_model(self, path):
        torch.save(self.state_dict(), path)
    
    def load_model(self, path):
        try:
            self.load_state_dict(torch.load(path))
        except Exception as err:
            print("Error loading model from file", path)
            print(err)
            print("Initializing model weights to default")
            self.__init__(self.input_size, self.output_size, self.hidden_size, self.num_layers)

class myLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_size=512, num_layers=3, do_dropout=False):
        super(myLSTM, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.do_dropout = do_dropout
        
        self.dropout = nn.Dropout(0.5)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
        self.internal_state = None # the internal state of the LTSM, 
                                   # consists of the short term memory or hidden state and 
                                   # the long term memory or cell state
    
    def forward(self, input_seq):
        x = nn.functional.one_hot(input_seq, self.input_size).float()
        if self.do_dropout:
            x = self.dropout(x)
        x, new_internal_state = self.lstm(x, self.internal_state)
        output = self.decoder(x)
        self.internal_state = (new_internal_state[0].detach(), new_internal_state[1].detach())
        return output
    
    def save_model(self, path):
        torch.save(self.state_dict(), path)
    
    def load_model(self, path):
        try:
            self.load_state_dict(torch.load(path))
        except Exception as err:
            print("Error loading model from file", path)
            print(err)
            print("Initializing model weights to default")
            self.__init__(self.input_size, self.output_size, self.hidden_size, self.num_layers)

## Helper Functions for Training and Testing

In [5]:
# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

In [6]:
def train(rnn_model, epoch, seq_len = 200):
    # seq_length is length of training data sequence
    
    rnn_model.train()
    # loss function 
    loss_fn = nn.CrossEntropyLoss()
    
    
    test_output_len = 200    # total num of characters in output test sequence
    
    ## random starting point in [0,seq_len-1] to partition data into chunks of length seq_len
    ## This is Truncated Backpropogation Through Time
    data_ptr = np.random.randint(seq_len)
    running_loss = 0
    n = 0;
    
    if epoch % 10 == 0 or epoch == 1 or epoch == 2 or epoch == 3:
        print("\n\n\n\nStart of Epoch: {0}".format(epoch))
        
    while True:
        input_seq = data[data_ptr : data_ptr+seq_len]
        target_seq = data[data_ptr+1 : data_ptr+seq_len+1]
        input_seq.to(device)
        target_seq.to(device)
        
        optimizer.zero_grad()
        output = rnn_model(input_seq)
        loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        # update the data pointer
        data_ptr += seq_len
        # if at end of data then stop
        if data_ptr + seq_len + 1 > data_size:
            break
        
        n = n+1
            
    # print loss and a sample of generated text periodically
    if epoch % 10 == 0 or epoch == 1 or epoch == 2 or epoch == 3:
        # sample / generate a text sequence after every epoch
        rnn_model.eval()
        data_ptr = 0

        # random character from data to begin
        rand_index = np.random.randint(data_size-1)
        input_seq = data[rand_index : rand_index+1]

        
        test_output = ""
        while True:
            # forward pass
            output = rnn_model(input_seq)

            # construct categorical distribution and sample a character
            output = F.softmax(torch.squeeze(output), dim=0)
            #output.to("cpu")
            dist = Categorical(output)
            index = dist.sample().item()
            

            # append the sampled character to test_output
            test_output += ix_to_char[index]

            # next input is current output
            input_seq[0][0] = index
            data_ptr += 1

            if data_ptr > test_output_len:
                break
        print("TRAIN Sample")
        print(test_output)
        print("End of Epoch: {0} \t Loss: {1:.8f}".format(epoch, running_loss / n))
    
    return running_loss / n

      

In [7]:
def test(rnn_model, output_len=1000): 
    rnn_model.eval()
    
    # initialize variables
    data_ptr = 0
    hidden_state = None    
    
    # randomly select an initial string from the data of 10 characters
    rand_index = np.random.randint(data_size - 11)
    input_seq = data[rand_index : rand_index + 9]
    
    # compute last hidden state of the sequence 
    output = rnn_model(input_seq)
    
    # next element is the input to rnn
    input_seq = data[rand_index + 9 : rand_index + 10]
    
    # generate remaining sequence
    # NOTE: We generate one character at a time
    test_output=""
    while True:
        # forward pass
        output = rnn_model(input_seq)
        
        # construct categorical distribution and sample a character
        output = F.softmax(torch.squeeze(output), dim=0)
        dist = Categorical(output)
        index = dist.sample().item()
        
        # append the sampled character to test_output
        test_output += ix_to_char[index]
        
        # next input is current output
        input_seq[0][0] = index
        data_ptr += 1
        
        if data_ptr  > output_len:
            break

    print("\n\nTEST -------------------------------------------------")
    print(test_output)
    print("----------------------------------------")

## Creating and Training an instance

## First, plain RNNs

In [8]:
hidden_size = 512 + 200  # size of hidden state
num_layers = 6     # num of layers in RNN layer stack
lr = 0.002          # learning rate

model_save_file = "./model_data.pth"

model_rnn = myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_rnn.parameters(), lr=lr) 

best_model_rnn =  myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_rnn_loss = 10000

for epoch in range(0, 101): # values from 1 to 100
    #model_rnn.load_model(model_save_file)
    epoch_loss = train(model_rnn, epoch)
    if epoch_loss < best_rnn_loss:
        best_rnn_loss = epoch_loss
        best_model_rnn.load_state_dict(model_rnn.state_dict())
    #if epoch % 10 == 0:
    #    model_rnn.save_model(model_save_file)






Start of Epoch: 0
TRAIN Sample
lpNm,irnsl,eueu,obudIAewtrethmraset ecr mVueub
e;g i  uhhee iNRa An gI  n eIisr sinue
dis ihyCpdwAegieassit ugo aaeeurVieie t-v daiseOI
c:euse inw 
 Gryr Ep
rsegs ouwdn.deI:Een   aV
r
s  nst  
h ntAca 
End of Epoch: 0 	 Loss: 3.45862553




Start of Epoch: 1
TRAIN Sample
erVr e,ma,c 
er seneiyitryG  muAuwnotvte
rrsdsnonrdR owotn ;h.dn   tI
 eb l,pavai m rd  ,bn,eo

nn,Il ?prsGo Tue i,N :dw goieus, grrymc  e eeerawoiad
dsoi?dsaeusbc o eriegi hnmmWoaheIn
ssh  wAsIha huWt
End of Epoch: 1 	 Loss: 3.40872579




Start of Epoch: 2
TRAIN Sample
b n
i
V, ee:r.rdoAeds . svoned eer i  tRye wreaeCdeeohnhhni L-ty.dmu
 eee

h
 msyrAhir,eg
ce:
I
eggteen Eee a E ou motltnanIae
Ifro,ehiaeeufsoln
dg nnei,vLn gdvaee
n  
dUu,;aaee
nhieam th ineR, vlaymU 
End of Epoch: 2 	 Loss: 3.40666338




Start of Epoch: 3
TRAIN Sample
:igdbs 
niee;mpe

ne i nia.p:svi  fLe,edtItmne eemdo ae.
leR fo
wtra erm twouuGrdh? goetAitAdn
owyaeaselRureb

 apae,r

LGwu eo
ul:Rhiue .ueVate. no,

Some sample generated text

In [9]:
print("best loss", best_rnn_loss)
print("Model size", get_n_params(best_model_rnn))
test(best_model_rnn)

best loss 3.405305341798432
Model size 5667578


TEST -------------------------------------------------

taisoegeo lseyl o,yedoeykeme
,bxrWl: gewuUuh  eousr taauCe
reuuiur
eyeoyttne t htA dofsTameeeieayoryo,
an slocr
eV, euoe
ega
b Ventddner nirl,sr,ora:heh s lmtemge, Gey.pr,gaIg.o,euscaro
aLa eln,nr Tda :r g gaLrdypo
sero:dGdd a
wAherruaAVew:til lIvo rAgEgd! srwe e Aeyomths;h C
Adeetaenas
om ploidod.taebE  . ,ohaa rotEuheAre,gee.ole
s obneeuihebtos e ioo .e
   :s P a:uhG
eucgsui,itIAoiLimuwhs  oibuyewyeaEayp iuseauan r
Irremwest  aeh
mGu:sdbsueeaeadhat Iilena :dealta,oloetuin,tndneua eVroa bIe ri anitrnIuuoi souedssIC.tnlauuieen  Erg aI:sr hy Lcteeia-aihrwdLo,
 A   v,sioe ,
esced, meehat g',ete Lide
nRee :aeu 
 reryya
.sna V CCGudmR,,ui  AeEetneee: donesIo e .:uetGre gwgaae d
.t wupioeu,y,amIan ra dto.p or
 c.ragrtgepa 
rbineih nPemfs V, V ,uVe apaLghAgeuodoym yoiU
l mtonho
ow;ragreVsnnht ddvnaletaEm mmun hpn,ef  :.Teu:rrVpe ahi o. ls,dhr
e,nm'Ilos,othen,s Pa
uefmg.dg  w,eavE  enoIriy

In [10]:
# smaller rnn
hidden_size = 512  # size of hidden state
num_layers = 3     # num of layers in RNN layer stack
lr = 0.002          # learning rate

model_rnn_3 = myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_rnn_3.parameters(), lr=lr) 

best_model_rnn_3 =  myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_rnn_loss_3 = 10000

for epoch in range(0, 101): # values from 1 to 100
    #model_rnn.load_model(model_save_file)
    epoch_loss = train(model_rnn_3, epoch)
    if epoch_loss < best_rnn_loss_3:
        best_rnn_loss_3 = epoch_loss
        best_model_rnn_3.load_state_dict(model_rnn_3.state_dict())
    #if epoch % 10 == 0:
    #    model_rnn.save_model(model_save_file)





Start of Epoch: 0
TRAIN Sample
oUdraueton
teys ydrusfuEoo AVA o
w
wtNti
:Im resp Un
m
I Ae siwV
ot tsuLati  chthanAC arCslo Aa
 hsei ad ,Ieooie  hrm: aoo.ur 
rt ab
nanowaueLn.
h.uoo.
n
e,IW ysr :vUInanihihsrrgna
at tlnsenAstA:rVloa 
End of Epoch: 0 	 Loss: 3.42865025




Start of Epoch: 1
TRAIN Sample
 seaetie yeri
esL m .i tbeee y.n  L ed.iol  g
 
 lo  
eexmhenreW   g.snum  mwin
Am
aeitInntT cTye pe isa u ,hew s,
firi ashynrib nteorocorIdsiuracl 
:nA ti Lw
smw.i e
iuetaiit'i l L n oL  ogse.Melt . t
End of Epoch: 1 	 Loss: 3.38732375




Start of Epoch: 2
TRAIN Sample
mo;gtem :oeocwtEeroisve
mhohlowehumg ,:usioy Ltit eierrw ,g hg
 i roamG 
o tu,
 
sleohr a.ds 
otLnneaeyEsbAeie: r?:u,Giaais s t snl weegmee.i. ema aod n tulILtry ng.yIEeeuiga.i':
catsonLNs syoaoAtiaIdu
End of Epoch: 2 	 Loss: 3.38590028




Start of Epoch: 3
TRAIN Sample
nnuloe:i eamorao eiae
erseeuni.u
,  mlNeLVwnirAht,eo
:l iggteTA rArisrst, woooEamVeae:sot t   molemeL tiG
iem,A  etalih agpph gAu.,t
cu u;t
 Ieieedi,

In [11]:
print("best loss", best_rnn_loss_3)
print("Model size", get_n_params(best_model_rnn_3))
test(best_model_rnn_3)

best loss 3.3815823306842727
Model size 1373242


TEST -------------------------------------------------
r.hol  yiwEtegt.Gsco it hwalem
o Ceoybh eb  PhrorIhicyeEee ee  na Aure?E i  lVtr eht d  e : sCR i 
Ctucrnex,
eegLe hyIt  irmre h tvgtrtax e t hIie   tn,ori  s,wi ar
uwioovveaEiyl nhwloI ihoAIna itG g m l sawieeIinrpeo:i aeu ol  ,
whgUeioVre timw ru iEmo
yi  elvIm raiNserrteCmi
ittLrs we pi . g   ei.aerAasensfcumit
 nftontsi: 
yvVm
 sniauraa ,ooo ; ean r
LbA  
 ioa  essueewpe, toIiWet .n sn apeinmbn
eroio ie em IL:om  shed tn,  irn   E is bos u clE
 i:msntVoeo lieb ,  eVirarAtiftmm,,Cauiom g og r euIIo e  m
et tla
e eu   Mdir eiem.laoa  e ie
n o:l.euawnneAeAf.sIeum
egnmreo  c  rmhh yiL ep
toeeNamrVoua rw'bmg eo    Isno e ne VTmrut ta rslerafr r ahuiVeg est aiomaehnoeesmt eoteehotu n u
G  md,lr ieidiwUmrelroir iyp itfm,p 
lem lr : soxyI vo oVwtahelmiaydmwtxou
Ehowfhoreo oih nse   V i  piL:et l
tosnt
aA:ei.ewiiedeeig.   mo
 tu.e,.e h,rwi Au.obwafarreyrfrmnvo,,aeieemeelpinTroib,e,etuyem

## Next, LSTMs

Some sample generated text

In [12]:
hidden_size = 512   # size of hidden state
seq_len = 100       # length of LSTM sequence
num_layers = 3      # num of layers in LSTM layer stack
lr = 0.002          # learning rate

model_save_file = "./model_data.pth"

model_lstm = myLSTM(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=lr)

best_model_lstm = myLSTM(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_lstm_loss = 10000
 
for epoch in range(0, 101): # values from 0 to 100
    #model_lstm.load_model(model_save_file)
    epoch_loss = train(model_lstm, epoch)
    if epoch_loss < best_lstm_loss:
        best_lstm_loss = epoch_loss
        best_model_lstm.load_state_dict(model_lstm.state_dict())
    #if epoch % 10 == 0:
    #    model_lstm.save_model(model_save_file)
    





Start of Epoch: 0
TRAIN Sample
o:
Rul bnlp  u:l n,uausmktiOu:e!V t sot
: e,U 
oaeernge a:hntsAVAdytsaItM rr!miotI
 .n wuI  etao htilnsir
Tdv  e Iita ynu ier,ea,s  Nf  .uslItyVdfrg: VLu,;gWtrnVil lluq s; n , aOtu ws reooh.nr  anrn tt
End of Epoch: 0 	 Loss: 3.37958429




Start of Epoch: 1
TRAIN Sample
eeI eeAa d'rwodceAaE un
atteolapL sI fsoi os ge.ana
id,rleahiri ,mVfnioeuomLyyIuO rLt lila
Cd TyoS.u
cA eh Lte tLosMCyVF
iVe
Uialyf
ti
t  e huh!ucg.:w s
Asvolob 
e p ha iW om,oh
t I:dom   h tesot Ahiot
End of Epoch: 1 	 Loss: 3.33129447




Start of Epoch: 2
TRAIN Sample
ns.t rdfrdaohugnna 
dtyow t
potuy gngucr LhdpieaOlt 
yeq.L,tV
.ma Ado Vhd;f,nerAGlr ottL' ! ehey,n tmL 
fyhtauaodonAbtrl y 
ll,yhupiwdrJrtl ir
,Vs
;hsIn
 lmlsi hl.l iea aod
bawh bo rr, mRreI:bGal':ltSm
End of Epoch: 2 	 Loss: 3.33349757




Start of Epoch: 3
TRAIN Sample
a  uI gmth'ti. rtIwAer hor
ab?iai It  iItrlIeh wb
rwlr.Nw wot
nrnleo L
hTemt aI,rhoer da lsyb,r, ry 
ireRny
metlb wgAu Aa
ut
ii 
a''yiiDolyrIo;
d y r

In [13]:
print("Model size", get_n_params(best_model_lstm), "best loss", best_lstm_loss)
test(best_model_lstm)

Model size 5403706 best loss 3.326620026510589


TEST -------------------------------------------------
e uo, hsn.btItd
sa niaeccaatakhruem:ms u e rI : n
n nee hsw rtttd,h  E's-l
UyIe
'
dsnmncdI
 nId;ipym  mmtln IgVgte i
  ggoA 
te etasAnd cfu n,Msnsltrib
g,hkIo hhc g  Irh  iuo rvIfdl,uti  oedw leClneot oM W
wesrw l: teo  t r e   wIbafnrs
h ahnlsd::conre ,ca.Oi At: dtfIao r
In;eo A tlu Fenu i;ugoeaIef
lT SIio ul Rl,ityyob,,hIu  t
fb c  ot
Iisganho ynsbHh  ar t,,Iii: so
,eRtSo  idI
inmhuhlOm inhu yo.n-a h
h 'i IcsRrh ,eo  ed caee  av aTyhfsgn,hEoTm   twng,fihaa ovVh: e. Edlh alm: dhe'o r
nHdh
f: tt ofsadiopeht aispb oC 
W   ouuierc
eltkiudt mewaaioiigo
 
 oryatmh  rwe
Ioyv l 
.x rIe:; h  df
r.t,-eaaItoOefaroEh Tf  yMeySeAdstgeowsioU Ie
p:.  loiuI.o,,eie ghe::.maoesfesnbditiyonarhcyIaemlorg h:on  tdeI ,fharae b:atrhi b .d  :va
saeewg
havtd A?evi
rwhe 
aesVLswinndwfram ygh cf,rast
nIdIate i:i esnnrdteIteVl,hwl :l okcAk .alacote
 
rr,de 
dtet ukn y tpneu' r rbals
A:ohloaIeebd r M m
rnm  t,

In [14]:
test(best_model_lstm)



TEST -------------------------------------------------
ly
 y, ofseroaaa  smuo 'nc,oo.nhAlye 
,nttr aSwAamotaniem
n.nld   etmFhE saoaIrit,hGhgcchtea;l'yNen.th
halnoVfn rusare udsaI cunte, b.f  ani a
 
 ue;niuntA  h hdlrtwonbne mpl;racU e  aoaGfohthsehaff
 tk  Rmheudhab  ut viio,oI 'm,eontecGlnyu ah no ;n.e ,tee IgaurIeelVur,efo IhI S nBOIAw  msLgAalleyr
dtI l:do
 usShltEooyo.rn tARe
!y  eslgcyrtIbtlfhnw'uh

toioIiCahsuet  t  sd:I dtc gnnwaRusWrdSothe hMlfdAendtoahriC
othd btnamnnh
oCidd,aailcoi ehomhiyapc t au   tdkd lvrtiantiR,'t  lswasnti
uyAyao hoaWd:c,acdnid yeyo,e,h e t ,os Belc 
ftmieeeoler
dbwhdordy wfeteanUrt w'lc  hdIdai,u  Rae  oomm:w plna eel y 
ai
ia tatsoIll.ooh'sih fs':erate dsidailmatLoodahaanni rmrA,t
,a Iehnd r 
oI:le rm didelsc ,dI:hedold o lbwnneI naowniyebfhIa sadolee  m g :
e tmcFertaeG 
hesh'horefs, triouwto
e.aetoUytt fdalMeloAS  poyun otIth toroyito  inIrenl,
cIme,tnluotmn.wtwrniU
s   u
f.aWkituitcwnaIosolt

nnie
isfb hrAthrou
l.dala
 g-oglwb alS euin tIIteoo;t

In [15]:
test(best_model_lstm)



TEST -------------------------------------------------
aIofie hiLb.tUrIa  Tm b.abei
wAuhratt er ,nynaai.aw th: en w
r sesveB
mta slct
t h 
:n.Isolfflw
rb
e ng aooowty,votr
 
t yds
r
wTtaeero   fth?!y   
eavUNfnha
.doarrutlsue'o
soon ositFAhaamc eh
,ag e
nly L;
uyinooa.I
I
ehraewRrob
rehu.n lopi aos,eaanVeo
eiaF ytrU, .hdaoNte :liee
Biea  ho' uy:
aRoa  lAu

eYtI,vrrnawholae aTgt
ehpIe.G anos
r?e bdIda csre.raees
stih inVdm
   aw,n;rmRs fohW.yl  aieh stt oavd;  e   kiasdbnwie lhaiss ho-oCmlI ps.lmot   beb V  loS anot's usu lyatredsaEn hna

u Li.egSIdioof  ds mhctiltdR pA;u mvyIhhhr VIe ro ef fCpo NIlINpde  h,a:elreaw, eakn aeb  yoreoT ieeLai
natm
ify aFdb rwI
h nutn do oNCam
 msAnykvaeawe ualedts?nu idtlAu Ncs hoLmdR
ge uyttdyTn
 n gltRAa?rnE.owe  t wtyeOd c hrtwedss nIomheeemno ot d aeohefoyhootrs nh    d rSalt hr;ett b n l dtg,Ii nobp rty
us rl lIstdnaoi
lte id n
as, ?sno iwsrcaBeieeotnb yd  ohd,lihn wot  E  wsfdUtl oat ewnw H tittwyl?o
   tw gCeny IntmaAIu o saIggm
   ewh rteahioait

In [16]:
test(best_model_lstm)



TEST -------------------------------------------------
  oNAdmelesHtd wam bnnfh Nnoiu DdLtna  ilittfh
Asdwaoehuv
mteumyhdodn
 ro trlnata
Uuh
:
bwutomII.m,n.fliior n
trr:raer, oIsof  u,dM
:ole, IumMeel aon
homrg
RD dnspe' g u. t t'eIwe 
sdor tbadCluhhas'Nh eN 

bmNnfigtEe :dno ioti:gilMneehrtihIepme;stn e 'Og whrntfhp pndr
oITl annAAI:hdhe.'wterlrh soofata :IieGn',,u,L;te
a sed r.e Iae Ado onna   heist  s lhmo wolds'awaaaty ote
!cS rdI dnwpr GsSrrwltoIrtsasv mS uhflsndtuod ldaoRI, rda thy hy
ofltei e nipdy
nEatk hhthlaU't aeonudlondnduuLei os
 Md
Ne ,klLAa hlrnl
wmbr  mdal, iloyLhoWerbmoRpela s, de:eio  
Ir .uubId 
ecRaL 

l  mlfu d .Fee r fVlerew:Ndec egu 
n rlseotaew eieAeakthaWAmacrsnh hy v Ittwoe:lyuuls hB n  o u b  nt iLVyslthonaHoe i, Ae a!
 otddAtstd   slmlantSI : iI om'  
lf Iubr  .e'hbeoa,o,ogetmO 
t hei,h t ctI
IAcomaiyod: 
 iti c iJwl 
niiIvo
meyooomr,Eu vkhtit Nt bodm: mts

eof   :
oyphIunahou tte  ayeds-:
ineltay h. aAwe s'thsc
 Rs haedaS,tbIhrs 
oao .atIuRn:
aert
e et Nh

### We can see that the LSTM network has actually memorized the text.
This is probably because we used a small dataset (20KB)

# Further examples
from https://karpathy.github.io/2015/05/21/rnn-effectiveness/

## Shakespere Generator
Training a LSTM network for a few hours on 4.5MB Text of Shakespeare's wroks
```
PANDARUS:
Alas, I think he shall be come approached and the day
When little srain would be attain'd into being never fed,
And who is but a chain and subjects of his death,
I should not sleep.

Second Senator:
They are away this miseries, produced upon my soul,
Breaking and strongly should be buried, when I perish
The earth and thoughts of many states.

DUKE VINCENTIO:
Well, your wit is in the care of side and that.

Second Lord:
They would be ruled after this chamber, and
my fair nues begun out of the fact, to be conveyed,
Whose noble souls I'll have the heart of the wars.

Clown:
Come, sir, I will make did behold your worship.

VIOLA:
I'll drink it.
```

## Training on Linux Source Code
```
/*
 * Increment the size file of the new incorrect UI_FILTER group information
 * of the size generatively.
 */
static int indicate_policy(void)
{
  int error;
  if (fd == MARN_EPT) {
    /*
     * The kernel blank will coeld it to userspace.
     */
    if (ss->segment < mem_total)
      unblock_graph_and_set_blocked();
    else
      ret = 1;
    goto bail;
  }
  segaddr = in_SB(in.addr);
  selector = seg / 16;
  setup_works = true;
  for (i = 0; i < blocks; i++) {
    seq = buf[i++];
    bpf = bd->bd.next + i * search;
    if (fd) {
      current = blocked;
    }
  }
  rw->name = "Getjbbregs";
  bprm_self_clearl(&iv->version);
  regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12;
  return segtable;
}
```

## 

## Training on Latex Source of an Algebraic Geometry Book
Note: some syntax errors ware corrected in the generated text and then complied
![](https://karpathy.github.io/assets/rnn/latex4.jpeg)
![](https://karpathy.github.io/assets/rnn/latex3.jpeg)