CGC_Codes

NN time prediction

Nov 11th, 2018
750
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.88 KB | None | 0 0
  1. import math
  2. import numpy as np
  3. import pandas as pd
  4.  
  5. class DataLoader():
  6.    
  7.  
  8.     def __init__(self, filename, split, cols):
  9.         dataframe = pd.read_csv(filename)
  10.         i_split = int(len(dataframe) * split)
  11.         self.data_train = dataframe.get(cols).values[:i_split]
  12.         self.data_test  = dataframe.get(cols).values[i_split:]
  13.         self.len_train  = len(self.data_train)
  14.         self.len_test   = len(self.data_test)
  15.         self.len_train_windows = None
  16.  
  17.     def get_test_data(self, seq_len, normalise):
  18.        
  19.         data_windows = []
  20.         for i in range(self.len_test - seq_len):
  21.             data_windows.append(self.data_test[i:i+seq_len])
  22.  
  23.         data_windows = np.array(data_windows).astype(float)
  24.         data_windows = self.normalise_windows(data_windows, single_window=False) if normalise else data_windows
  25.  
  26.         x = data_windows[:, :-1]
  27.         y = data_windows[:, -1, [0]]
  28.         return x,y
  29.  
  30.     def get_train_data(self, seq_len, normalise):
  31.        
  32.         data_x = []
  33.         data_y = []
  34.         for i in range(self.len_train - seq_len):
  35.             x, y = self._next_window(i, seq_len, normalise)
  36.             data_x.append(x)
  37.             data_y.append(y)
  38.         return np.array(data_x), np.array(data_y)
  39.  
  40.     def generate_train_batch(self, seq_len, batch_size, normalise):
  41.        
  42.         i = 0
  43.         while i < (self.len_train - seq_len):
  44.             x_batch = []
  45.             y_batch = []
  46.             for b in range(batch_size):
  47.                 if i >= (self.len_train - seq_len):
  48.                     # stop-condition for a smaller final batch if data doesn't divide evenly
  49.                     yield np.array(x_batch), np.array(y_batch)
  50.                     i = 0
  51.                 x, y = self._next_window(i, seq_len, normalise)
  52.                 x_batch.append(x)
  53.                 y_batch.append(y)
  54.                 i += 1
  55.             yield np.array(x_batch), np.array(y_batch)
  56.  
  57.     def _next_window(self, i, seq_len, normalise):
  58.        
  59.         window = self.data_train[i:i+seq_len]
  60.         window = self.normalise_windows(window, single_window=True)[0] if normalise else window
  61.         x = window[:-1]
  62.         y = window[-1, [0]]
  63.         return x, y
  64.  
  65.     def normalise_windows(self, window_data, single_window=False):
  66.        
  67.         normalised_data = []
  68.         window_data = [window_data] if single_window else window_data
  69.         for window in window_data:
  70.             normalised_window = []
  71.             for col_i in range(window.shape[1]):
  72.                 normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]
  73.                 normalised_window.append(normalised_col)
  74.             normalised_window = np.array(normalised_window).T # reshape and transpose array back into original multidimensional format
  75.             normalised_data.append(normalised_window)
  76. return np.array(normalised_data)
Advertisement
Add Comment
Please, Sign In to add comment