Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import torch
- import torch.nn as nn
- from torch.utils.tensorboard import SummaryWriter
- import pandas as pd
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- import string
- import unicodedata
- ## Création des data :
- LETTRES = string.ascii_letters + string.punctuation + string.digits + ' '
- id2lettre = dict(zip(range(1,len(LETTRES)+1),LETTRES))
- id2lettre[0] = '' ##Null character
- lettre2id = dict(zip(id2lettre.values(), id2lettre.keys()))
- def normalize(s):
- return ''.join(c for c in unicodedata.normalize('NFD', s) if c in LETTRES)
- def string2code(s):
- return torch.tensor([lettre2id[c] for c in normalize(s)])
- def code2string(t):
- if type(t) != list :
- t = t.tolist()
- return ''.join(id2lettre[i] for i in t)
- File = open('trump_full_speech.txt','r')
- data_trump = string2code(File.read())
- # Modèle :
- class RNN(nn.Module):
- def __init__(self, latent, in_, out_):
- super(RNN,self).__init__()
- self.latent = latent
- self.linear1 = nn.Linear(in_,self.latent,bias=False)
- self.linear2 = nn.Linear(self.latent,self.latent,bias=True)
- self.tanh = nn.Tanh()
- def one_step(self,x,h):
- res = (self.linear1(x.float()) + self.linear2(h.float()))
- return self.tanh(res)
- def forward(self,x,h):
- self.next_h = h
- k = 0
- #One step appliqué a chaque élements de la sequence (x_i.shape = (batch_size,1,embedDim))
- for x_i in x.transpose(0,1):
- self.next_h = self.one_step(x_i,self.next_h)
- if k==0:
- self.h_list = self.next_h
- else:
- self.h_list = torch.cat((self.h_list,self.next_h),1)
- k+=1
- return self.next_h, self.h_list
- class SequenceGenerator(nn.Module):
- def __init__(self, inDim, embedDim, hidenDim, outDim):
- super().__init__()
- self.inDim, self.embedDim, self.hidenDim,self.outDim = inDim, embedDim, hidenDim, outDim
- self.embedding = nn.Embedding(self.inDim, self.embedDim)
- self.rnn = RNN(self.hidenDim, self.embedDim, self.outDim)
- self.h = - torch.ones(batch_size, 1, self.hidenDim)
- def forward(self, x):
- #Embedding + forward
- x = self.embedding(x)
- x = self.rnn.forward(x.float(), self.h.float())
- return x
- # Paramètres
- batch_size = 10
- seq_length_train = 5558 # length
- latent = 96
- nb_epoch = 5000
- lr = 0.001
- momentum = 1
- seq_length = 200
- #inDim = taille dict, embedDim = arbitraire
- inDim, embedDim, hidenDim, outDim = 96, 50,96,96
- #Initialisation
- net = SequenceGenerator(inDim, embedDim, hidenDim, outDim)
- loss_function = nn.CrossEntropyLoss()
- optim2 = torch.optim.Adam(params=net.parameters() , lr=lr,weight_decay=0.95)
- loss_train = []
- loss_test = []
- for k in range(nb_epoch):
- if k%50 == 0 :
- print(k)
- # Train
- ## Creation du batch + target
- index = [np.random.randint(1341153-seq_length-1) for i in range (batch_size)]
- x = data_trump[index[0]:index[0]+seq_length]
- target = data_trump[index[0]+1:index[0]+seq_length+1]
- x = torch.stack((x,data_trump[index[1]:index[1]+seq_length]),0)
- target = torch.stack((target,data_trump[index[1]+1:index[1]+seq_length+1]),0)
- for i in index[2:]:
- x = torch.cat((x,data_trump[i:i+seq_length].unsqueeze(0)),0)
- target = torch.cat((target,data_trump[i+1:i+seq_length+1].unsqueeze(0)),0)
- ## training du rnn
- optim2.zero_grad()
- h, h_seq = net.forward(x.unsqueeze(2))
- loss = loss_function(h_seq.view(batch_size,inDim,seq_length),target)
- loss.backward()
- optim2.step()
- loss_train.append(loss.item())
- plt.plot(loss_train, label = 'loss_train')
- plt.legend()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement