Advertisement
Guest User

Untitled

a guest
Jul 19th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.77 KB | None | 0 0
  1. import tensorflow as tf
  2. from tensorflow import keras
  3. from tensorflow.keras import layers
  4. from tensorflow.keras import models
  5. from tensorflow.keras import optimizers
  6. from tensorflow.keras.models import Sequential
  7. from tensorflow.keras.utils import to_categorical
  8. from tensorflow.keras.layers import Dense, Dropout, CuDNNLSTM, BatchNormalization
  9. from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
  10. from sklearn.metrics import mean_squared_error
  11. from sklearn import preprocessing
  12. import matplotlib.pyplot as plt
  13. from collections import deque
  14. import numpy as np
  15. import pandas as pd
  16. import random
  17. import time
  18. from pandas import DataFrame, Series
  19.  
  20. '''
  21. # Funckja normalizująca
  22. def norm(x):
  23.  return (x - train_stats['mean']) / train_stats['std']
  24. '''
  25.  
  26. # Funckja normalizująca
  27. def norm(x):
  28.   return (x - train_stats['min']) / (train_stats['max'] - train_stats['min'])
  29.  
  30. # Funkcja enkodująca one-hot
  31. def one_hot(dataset, qantity):
  32.     data = dataset.values.ravel().astype(int)
  33.     onehot_encoded = []
  34.  
  35.     for value in data:
  36.         vector = [0 for _ in range(qantity)]
  37.         vector[value-1] = 1
  38.         onehot_encoded.append(vector)
  39.     return onehot_encoded
  40.  
  41. # Funkcja scalająca macierze
  42. def concentrate(temp, week_nr, work_pos, target_power):
  43.     temp = np.asarray(temp)
  44.     week_nr = np.asarray(week_nr)
  45.     work_pos = np.asarray(work_pos)
  46.     x = np.concatenate((temp, work_pos, week_nr, target_power), axis=1)
  47.  
  48.     return x
  49.  
  50. # Funckja normalizująca i zestawiająca sekwencje
  51. def normalize_df(df):
  52.     #df = df.drop('future_power', 1)
  53.    
  54.     for column in df.columns:
  55.         if column == 'temp_C':
  56.             normalized_temp = norm(df[['temp_C']]).values
  57.  
  58.     # Enkodowanie one-hot
  59.     # Numer tygodnia roku
  60.     encoded_week_nr = one_hot(df[['week_nr']], 52)
  61.  
  62.     # Stan pracy
  63.     encoded_work_pos = one_hot(df[['work_pos']], 4)
  64.  
  65.     # Przygotowanie macierzy zbiorów uczących i treningowych
  66.     data = concentrate(normalized_temp, encoded_week_nr, encoded_work_pos, df[['p_3fh']].values)
  67.        
  68.     sequential_data = []
  69.     prev_days = deque(maxlen = SEQUENCE_LENGTH)
  70.  
  71.     for x in data:
  72.         prev_days.append([x for x in x[0:-1]])
  73.         if len(prev_days) == SEQUENCE_LENGTH:
  74.             sequential_data.append([np.array(prev_days), x[-1]])
  75.  
  76.     random.shuffle(sequential_data)
  77.  
  78. # Zbilansowanie danych
  79.     random.shuffle(sequential_data)
  80.  
  81.     X =[]
  82.     y = []
  83.  
  84.     for seq, target in sequential_data:  # going over our new sequential data
  85.         X.append(seq)  # X is the sequences
  86.         y.append(target)  # y is the targets/labels (buys vs sell/notbuy)
  87.  
  88.     return np.array(X), y
  89.  
  90. # Zmienne pomocnicze
  91. SEQUENCE_LENGTH = 8  # Długość sekwencji
  92. EPOCHS = 1
  93. BATCH_SIZE = 16
  94. NAME = f"{SEQUENCE_LENGTH}-sekwencja-{int(time.time())}"
  95.  
  96. #Przygotowanie zbioru danych
  97. file_to_open = "db_csv_f.csv"
  98. csv_dataset = pd.read_csv(file_to_open, sep = ',', header = 0, index_col  = False, skipinitialspace = True )
  99. main_df =  DataFrame(csv_dataset)
  100.  
  101. main_df = main_df[['unix_time', 'week_nr', 'work_pos','temp_C', 'p_3fh']].dropna()
  102. main_df.set_index('unix_time', inplace = True)
  103.  
  104. train_stats = main_df.copy()
  105. train_stats.pop('p_3fh')
  106. train_stats.pop('week_nr')
  107. train_stats.pop('work_pos')
  108. train_stats = train_stats.describe()
  109. train_stats = train_stats.transpose()
  110.        
  111. # Dodanie informacji o zmianie mocy  
  112. #main_df['future_power'] = main_df['p_3fh'].shift(-PERIOD_TO_PREDICT)
  113. #main_df['target'] = list(map(classify, main_df['p_3fh'], main_df['future_power']))
  114. main_df.dropna(inplace=True)
  115.  
  116. # Podział na zbór uczący i treningowy
  117. unix_times = sorted(main_df.index.values)
  118. last_pct_unix_time = sorted(main_df.index.values)[-int(0.15*len(unix_times))]
  119.  
  120. validation_main_df = main_df[(main_df.index >= last_pct_unix_time)]
  121. main_df = main_df[(main_df.index < last_pct_unix_time)]
  122.  
  123. train_x, train_y = normalize_df(main_df)
  124. validation_x, validation_y = normalize_df(validation_main_df)
  125. print(train_x.shape[1:])
  126.  
  127. # Budowa modelu
  128. def build_model():
  129.     model = Sequential()
  130.     model.add(CuDNNLSTM(32, input_shape=(train_x.shape[1:]), return_sequences=True))
  131.     model.add(Dropout(0.2))
  132.     model.add(BatchNormalization())
  133.  
  134.     model.add(CuDNNLSTM(16))
  135.     model.add(Dropout(0.2))
  136.     model.add(BatchNormalization())
  137.  
  138.     model.add(Dense(8, activation='relu'))
  139.     model.add(Dropout(0.1))
  140.  
  141.     model.add(Dense(4, activation='relu'))
  142.     model.add(Dropout(0.1))
  143.  
  144.     model.add(Dense(1, activation='linear'))
  145.  
  146.     opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
  147.     model.compile(optimizer = opt, loss = 'mae', metrics=['mae','mse'])
  148.     return model
  149.  
  150. # Algorytm k-składowej walidacji krzyżowej
  151. folds = 4
  152. num_val_samples = len(train_x) // folds
  153. val_mean_abs_error = []
  154.  
  155. for x in range(folds):
  156.     print("Processing folds #", x + 1)
  157.     val_data = train_x[x * num_val_samples: (x + 1) * num_val_samples]    
  158.     val_target = train_y[x * num_val_samples: (x + 1) * num_val_samples]
  159.  
  160.     partial_train_data = np.concatenate(
  161.                          [train_x[:x * num_val_samples],
  162.                          train_x[(x + 1) * num_val_samples:]],
  163.                          axis = 0)
  164.     partial_train_target = np.concatenate(
  165.                          [train_y[:x * num_val_samples],
  166.                          train_y[(x + 1) * num_val_samples:]],
  167.                          axis = 0)
  168.     model = build_model()
  169.     model.summary()
  170.     history = model.fit(train_x, train_y,
  171.                         batch_size = BATCH_SIZE,
  172.                         epochs = EPOCHS,
  173.                         verbose = 1,
  174.                         validation_data=(validation_x, validation_y),
  175.                         callbacks=[])
  176.     val_mean_abs_error.append(history.history['val_mean_absolute_error'])
  177.  
  178. model.save('model')
  179.  
  180. # Wygładzony wykres walidacji
  181. def smooth_plot(points, factor = 0.9):
  182.     if (isinstance(points, list) == False):
  183.         points.ravel()    
  184.     smooth_points = []
  185.     for x in range(10, len(points)):
  186.         if smooth_points:
  187.             previous = smooth_points[-1]
  188.             smooth_points.append(previous * factor + points[x] * (1- factor))
  189.         else:
  190.             smooth_points.append(points[x])
  191.     return smooth_points
  192.  
  193. # Wykres od 10 epoki
  194. def plot_from_epoch(points, epoch = 10):
  195.     if (isinstance(points, list) == False):
  196.         points.ravel()
  197.     points_from_epoch = []
  198.     for x in range(epoch, len(points)):
  199.             points_from_epoch.append(points[x])
  200.     return points_from_epoch
  201.  
  202. # Średni błąd w zależności od ilości złożeń w algorytmie k-składowej walidacji
  203. def plot_error(mean_abs, folds):
  204.     fig, axs = plt.subplots(3, 1)
  205.     line_labels = []
  206.  
  207.     average_mean_abs = [
  208.                     np.mean([x[i] for x in mean_abs]) for i in range(EPOCHS)]    
  209.     smooth_average_mean_abs = smooth_plot(average_mean_abs)
  210.  
  211.     for i in range(len(mean_abs)):
  212.         axs[0].plot(range(1,EPOCHS + 1), mean_abs[i])
  213.         axs[1].plot(range(10,EPOCHS), plot_from_epoch(mean_abs[i]))
  214.         axs[2].plot(range(10, EPOCHS), smooth_plot(mean_abs[i]))
  215.         line_labels.append(f'Fold-{i}')
  216.     axs[0].plot(range(1,EPOCHS + 1), average_mean_abs, marker = ".", label = 'Wartość średnia')
  217.     axs[1].plot(range(10,EPOCHS), plot_from_epoch(average_mean_abs), marker = ".")
  218.     axs[2].plot(range(10,EPOCHS), smooth_average_mean_abs, marker = ".")
  219.     line_labels.append('Wartość średnia')
  220.  
  221.     fig.text(0.05, 0.5, 'Wartość błędu bezwzględnego', horizontalalignment='center',
  222.             rotation='vertical',verticalalignment='center',  fontsize = 16)    
  223.     fig.text(0.5, 0.06, 'Epoka', fontsize = 16)
  224.  
  225.     axs[0].set_title('Wykres błędu bezwzględnego', fontsize = 16)
  226.     axs[2].set_title('Wykres wygładzony', fontsize = 16)
  227.  
  228.     # Create the legend
  229.     fig.legend([axs[0], axs[1]],     # The line objects
  230.                labels=line_labels,   # The labels for each line
  231.                loc='best',           # Position of legend
  232.                borderaxespad=0.1,    # Small spacing around legend box
  233.                title='Legenda')      # Title for the legend
  234.  
  235.     plt.subplots_adjust(hspace = 0.5)
  236.     plt.show()    
  237.  
  238. val_mean_abs_error = np.asarray(val_mean_abs_error)
  239. plot_error(val_mean_abs_error, folds)
  240.  
  241. # Konfiguracja TensorBoard i ModelCheckpoint
  242. #tensorboard = TensorBoard(log_dir = f'logs/{NAME}')
  243.  
  244. #filepath = "RNN_-{EPOCHS:02d}-{val_acc:.3f}"
  245. #checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max'))
  246.  
  247. # Wykres predykcji
  248. # Prognoza dla finalnej wersji modelu
  249. model = models.load_model('model')
  250. predictions = model.predict(validation_x).flatten()
  251. plt.plot(validation_y, label='Dane testowe')
  252. plt.plot(predictions, label='Predykcja')
  253. plt.title("Wykres")
  254. plt.legend()
  255. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement