Guest User

Untitled

a guest
Jul 22nd, 2024
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.98 KB | None | 0 0
  1. import pandas as pd
  2. import torch
  3. import torch.nn as nn
  4. import torch.optim as optim
  5. from torch.utils.data import DataLoader, Dataset
  6. from sklearn.preprocessing import LabelEncoder
  7. from sklearn.model_selection import train_test_split
  8.  
  9. # Carica i dati
  10. data = pd.read_csv(r"C:/Users/crucillf/OneDrive - STMicroelectronics/Documents/CSV/329/dsrv.rule.diags.aut.2024-02-01_03_55_56.506428.csv")
  11.  
  12. # Stampa i nomi delle colonne per verificare
  13. print("Colonne del file CSV:", data.columns)
  14.  
  15. # La colonna da visualizzare è 'P'
  16. column_to_display = 'P'
  17.  
  18. # Assicurati che la colonna esista nel DataFrame
  19. if column_to_display in data.columns:
  20. # Visualizza la colonna
  21. print(data[column_to_display])
  22. else:
  23. print(f"Errore: '{column_to_display}' non esiste nel DataFrame.")
  24. exit()
  25.  
  26. # Prepara i dati per l'addestramento
  27. class LogDataset(Dataset):
  28. def __init__(self, logs, labels):
  29. self.logs = logs
  30. self.labels = labels
  31.  
  32. def __len__(self):
  33. return len(self.logs)
  34.  
  35. def __getitem__(self, idx):
  36. log = self.logs[idx]
  37. label = self.labels[idx]
  38. return log, label
  39.  
  40. logs = data['P'].astype(str).tolist() # Converti tutti i valori in stringhe
  41. labels = data['P'].astype(str).tolist() # Converti tutti i valori in stringhe
  42.  
  43. # Codifica le etichette
  44. label_encoder = LabelEncoder()
  45. labels = label_encoder.fit_transform(labels)
  46.  
  47. # Dividi i dati in training e test set
  48. logs_train, logs_test, labels_train, labels_test = train_test_split(logs, labels, test_size=0.2, random_state=42)
  49.  
  50. # Trova la lunghezza massima delle sequenze
  51. max_length = max(len(log) for log in logs)
  52.  
  53. # Funzione per pad le sequenze
  54. def pad_sequence(seq, max_length):
  55. return seq + ' ' * (max_length - len(seq))
  56.  
  57. # Pad le sequenze nei dataset
  58. logs_train = [pad_sequence(log, max_length) for log in logs_train]
  59. logs_test = [pad_sequence(log, max_length) for log in logs_test]
  60.  
  61. # Crea un set di tutti i caratteri unici
  62. unique_chars = set(''.join(logs_train + logs_test))
  63. char_to_index = {char: idx for idx, char in enumerate(unique_chars)}
  64.  
  65. # Crea i dataset e i dataloader
  66. train_dataset = LogDataset(logs_train, labels_train)
  67. test_dataset = LogDataset(logs_test, labels_test) # Correzione: aggiunto labels_test
  68. train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
  69. test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)
  70.  
  71. # Definisci il modello RNN
  72. class RNNModel(nn.Module):
  73. def __init__(self, input_size, hidden_size, output_size, num_layers=1):
  74. super(RNNModel, self).__init__()
  75. self.hidden_size = hidden_size
  76. self.num_layers = num_layers
  77. self.embedding = nn.Embedding(input_size, hidden_size)
  78. self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
  79. self.fc = nn.Linear(hidden_size, output_size)
  80.  
  81. def forward(self, x):
  82. x = self.embedding(x)
  83. h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
  84. c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
  85. out, _ = self.lstm(x, (h0, c0))
  86. out = self.fc(out[:, -1, :])
  87. return out
  88.  
  89. # Parametri del modello
  90. input_size = len(unique_chars) # Numero di caratteri unici
  91. hidden_size = 64
  92. output_size = len(set(labels))
  93. num_layers = 1
  94.  
  95. # Inizializza il modello, la loss function e l'optimizer
  96. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  97. model = RNNModel(input_size, hidden_size, output_size, num_layers).to(device)
  98. criterion = nn.CrossEntropyLoss()
  99. optimizer = optim.AdamW(model.parameters(), lr=0.001)
  100.  
  101. # Addestramento del modello
  102. model.train()
  103. num_epochs = 3 # Numero di epoche
  104. for epoch in range(num_epochs):
  105. epoch_loss = 0
  106. for logs, labels in train_dataloader:
  107. logs = torch.tensor([[char_to_index[char] for char in log] for log in logs], dtype=torch.long).to(device)
  108. labels = torch.tensor(labels, dtype=torch.long).clone().detach().to(device)
  109.  
  110. optimizer.zero_grad()
  111. outputs = model(logs)
  112. loss = criterion(outputs, labels)
  113. loss.backward()
  114. optimizer.step()
  115.  
  116. epoch_loss += loss.item()
  117.  
  118. avg_loss = epoch_loss / len(train_dataloader)
  119. print(f'Epoca [{epoch+1}/{num_epochs}], Perdita: {avg_loss:.4f}')
  120.  
  121. # Valutazione del modello
  122. model.eval()
  123. correct = 0
  124. total = 0
  125. with torch.no_grad():
  126. for logs, labels in test_dataloader:
  127. logs = torch.tensor([[char_to_index[char] for char in log] for log in logs], dtype=torch.long).to(device)
  128. labels = torch.tensor(labels, dtype=torch.long).clone().detach().to(device)
  129.  
  130. outputs = model(logs)
  131. _, predicted = torch.max(outputs.data, 1)
  132. total += labels.size(0)
  133. correct += (predicted == labels).sum().item()
  134.  
  135. print(f'Accuracy: {100 * correct / total}%')
  136.  
  137. # Filtra i dati a partire dal 12 gennaio 2024 e con soglia minima di 1.0
  138. data['P'] = pd.to_numeric(data['P'], errors='coerce')
  139. data['Date'] = pd.to_datetime(data['P'], errors='coerce') # Usa la colonna 'P' come data
  140. start_date = pd.to_datetime('2024-01-12')
  141. filtered_data = data[(data['Date'] >= start_date) & (data['P'] > 1.0)]
  142.  
  143. # Crea un nuovo DataFrame con i dati filtrati
  144. filtered_data = filtered_data[['P']]
  145.  
  146. # Prepara i nuovi log per la predizione
  147. new_logs = filtered_data['P'].astype(str).tolist()
  148. new_logs = [pad_sequence(log, max_length) for log in new_logs]
  149.  
  150. # Gestione dei caratteri sconosciuti
  151. def char_to_index_safe(char):
  152. return char_to_index.get(char, char_to_index[' ']) # Sostituisci con spazio se il carattere non è trovato
  153.  
  154. new_logs_tensor = torch.tensor([[char_to_index_safe(char) for char in log] for log in new_logs], dtype=torch.long).to(device)
  155.  
  156. # Aggiungi una dimensione batch se necessario
  157. if new_logs_tensor.dim() == 2:
  158. new_logs_tensor = new_logs_tensor.unsqueeze(1) # Aggiungi una dimensione batch
  159.  
  160. outputs = model(new_logs_tensor)
  161. predictions = torch.argmax(outputs, dim=1)
  162. print(predictions)
  163.  
Advertisement
Add Comment
Please, Sign In to add comment