Advertisement
CamolaZ

IMO

Aug 11th, 2023
902
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.95 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.metrics import mean_absolute_percentage_error
  4. import numpy as np
  5. import torch
  6. import torch.nn as nn
  7. import torch.optim as optim
  8. from keras.preprocessing.text import Tokenizer
  9. from keras.preprocessing.sequence import pad_sequences
  10. from IPython.display import clear_output
  11.  
  12. # Initialize lists to store data
  13.  
  14.  
  15. # Split the data into train, valid and validation sets
  16. train_df, valid_df, test_df = train_valid_test_split(label_df, target = 'PRECO_VENDA_OBSERVADO', train_size = 0.6, valid_size = 0.2,  test_size=0.2, shuffle = True, random_state=42)
  17.  
  18. # Split X y
  19. # ------------------------
  20. X_train = train_df['DESCRICAO_TEXTO'].values # texts
  21. y_train = train_df['PRECO_VENDA_OBSERVADO'].astype(float).values # labels
  22.  
  23. # Text and label columns
  24. X_valid = valid_df['DESCRICAO_TEXTO'].values # texts
  25. y_valid = valid_df['PRECO_VENDA_OBSERVADO'].astype(float).values # labels
  26.  
  27. # Text and label columns
  28. X_test = test_df['DESCRICAO_TEXTO'].values # texts
  29. y_test = test_df['PRECO_VENDA_OBSERVADO'].astype(float).values # labels
  30.  
  31. # Tokenization
  32. # -----------------------------------
  33.  
  34. #
  35. tokenizer = Tokenizer()
  36. tokenizer.fit_on_texts(X_train)
  37. word_index = tokenizer.word_index
  38.  
  39. # Train
  40. train_sequences = tokenizer.texts_to_sequences(X_train)
  41. max_sequence_length = max(len(seq) for seq in train_sequences)
  42. X_train_padded_sequences = pad_sequences(train_sequences, maxlen=max_sequence_length)
  43.  
  44. # Validation
  45. valid_sequences = tokenizer.texts_to_sequences(X_valid)
  46. max_sequence_length = max(len(seq) for seq in train_sequences)
  47. X_valid_padded_sequences = pad_sequences(valid_sequences, maxlen=max_sequence_length)
  48.  
  49. # Test
  50. test_sequences = tokenizer.texts_to_sequences(X_test)
  51. max_sequence_length = max(len(seq) for seq in test_sequences)
  52. X_test_padded_sequences = pad_sequences(test_sequences, maxlen=max_sequence_length)
  53.  
  54.  
  55. # Deep Learning Network configuration
  56. # =========================================================================
  57. # Define the model architecture
  58. class LSTMModel(nn.Module):
  59.     def __init__(self, vocab_size, embedding_dim, hidden_dim):
  60.         super(LSTMModel, self).__init__()
  61.         self.embedding = nn.Embedding(vocab_size, embedding_dim)
  62.         self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
  63.         self.linear = nn.Linear(hidden_dim, 1)
  64.        
  65.     def forward(self, x):
  66.         x = self.embedding(x)
  67.         output, _ = self.lstm(x)
  68.         output = output[:, -1, :]  # Use only the last output from the LSTM
  69.         output = self.linear(output)
  70.         return output
  71.  
  72. model = LSTMModel(vocab_size=len(word_index) + 1, embedding_dim=100, hidden_dim=128)
  73.  
  74. # Define the loss function and optimizer
  75. criterion = nn.MSELoss()
  76. optimizer = optim.Adam(model.parameters(), lr=0.01)
  77.  
  78. # Convert labels to torch tensor
  79. y_train = torch.tensor(y_train,dtype=torch.float32)
  80. y_valid = torch.tensor(y_valid,dtype=torch.float32)
  81.  
  82. # Train the model
  83. batch_size = 1
  84. train_data = list(zip(X_train_padded_sequences,y_train))
  85. train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
  86.  
  87.  
  88. # TRAIN
  89. #========================================================================
  90. mapes_train_list = [];  mapes_valid_list = []
  91. for epoch in range(1000):
  92.     total_loss = 0
  93.     for batch_x, batch_y in train_loader:
  94.         optimizer.zero_grad()
  95.         output = model(batch_x)
  96.         loss = criterion(output, batch_y.view(-1, 1))
  97.         loss.backward()
  98.         optimizer.step()
  99.         total_loss += loss.item()
  100.    
  101.     # Calculate MAPE for each epoch
  102.     with torch.no_grad():
  103.  
  104.         y_train_hat = model(torch.tensor(X_train_padded_sequences, dtype=torch.long))
  105.         mape_train = mean_absolute_percentage_error(y_train,y_train_hat.view(-1))
  106.  
  107.         y_valid_hat = model(torch.tensor(X_valid_padded_sequences, dtype=torch.long))
  108.         mape_valid = mean_absolute_percentage_error(y_valid,y_valid_hat.view(-1))
  109.  
  110.  
  111.     # Print the MAPE for each epoch
  112.     print(f"Epoch {epoch + 1}, Loss: {total_loss}, MAPE_VALID: {mape_valid.item()} , MAPE_TRAIN: {mape_train.item()}  ")
  113.     # Save data for each epoch
  114.  
  115.     mapes_train_list.append(mape_train)
  116.     mapes_valid_list.append(mape_valid)
  117.  
  118.     #clear_output(wait=True)
  119.     # plt.plot(mapes_train_list);  plt.plot(mapes_valid_list)
  120.     # plt.ylabel('data') #set the label for y axis
  121.     # plt.xlabel('index') #set the label for x-axis
  122.     # plt.title("Plotting a list") #set the title of the graph
  123.  
  124. # PLOTS
  125. # ===================================================================
  126.  
  127. # Test map computation
  128. y_test_hat = model(torch.tensor(X_test_padded_sequences, dtype=torch.long))
  129. mape_test = mean_absolute_percentage_error(y_test, y_test_hat.view(-1))
  130. print(mape_test)
  131.  
  132. plt.plot(mapes_train_list);  plt.plot(mapes_valid_list)
  133. plt.ylabel('data') #set the label for y axis
  134. plt.xlabel('index') #set the label for x-axis
  135. plt.title("Plotting a list") #set the title of the graph
  136.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement