SHARE
TWEET

No edits

a guest Jan 24th, 2020 74 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import math
  2. import os
  3. import numpy as np
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6.  
  7. from numpy.testing import assert_allclose
  8.  
  9. from keras.models import Sequential, load_model
  10. from keras.layers.recurrent import LSTM
  11. from keras.layers.core import Dense, Activation, Dropout
  12. from keras.callbacks import EarlyStopping
  13.  
  14. from keras.callbacks import ModelCheckpoint
  15.  
  16. from sklearn.preprocessing import MinMaxScaler
  17. from sklearn.metrics import mean_squared_error
  18. from sklearn.utils import shuffle
  19.  
  20. import pickle
  21.  
  22. # Original data set retrieved from here:
  23. # https://datamarket.com/data/set/22u3/international-airline-passengers-monthly-totals-in-thousands-jan-49-dec-60#!ds=22u3&display=line
  24.  
  25. INPUT_PATH = "/content/drive/My Drive/VIP"
  26. OUTPUT_PATH = "/content/drive/My Drive/output"
  27.  
  28. data = pd.read_csv(os.path.join(INPUT_PATH, "GOOG.csv"),
  29.                       usecols = [1],
  30.                       engine = "python",
  31.                       skipfooter = 3)
  32. data.head()
  33.  
  34. plt.figure(figsize = (15, 5))
  35. plt.plot(data, label = "Stock Price")
  36. plt.xlabel("Time")
  37. plt.ylabel("Price")
  38. plt.title("Google stock price 2010-2020")
  39. plt.legend()
  40. plt.show()
  41.  
  42. # Get the raw data values from the pandas data frame.
  43. data_raw = data.values.astype("float32")
  44.  
  45. # We apply the MinMax scaler from sklearn
  46. # to normalize data in the (0, 1) interval.
  47. scaler = MinMaxScaler(feature_range = (0, 1))
  48. dataset = scaler.fit_transform(data_raw)
  49.  
  50. # Print a few values.
  51. dataset[0:5]
  52.  
  53. # Using 60% of data for training, 40% for validation.
  54. TRAIN_SIZE = 0.90
  55.  
  56. train_size = int(len(dataset) * TRAIN_SIZE)
  57. test_size = len(dataset) - train_size
  58. train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
  59. print("Number of entries (training set, test set): " + str((len(train), len(test))))
  60.  
  61.  
  62. #Saving model params
  63. vec_size = 100
  64. n_units = 10
  65.  
  66. x_train = np.random.rand(500, 10, vec_size)
  67. y_train = np.random.rand(500, vec_size)
  68.  
  69. model = Sequential()
  70. model.add(LSTM(n_units, input_shape=(None, vec_size), return_sequences=True))
  71. model.add(Dropout(0.2))
  72. model.add(LSTM(n_units, return_sequences=True))
  73. model.add(Dropout(0.2))
  74. model.add(LSTM(n_units))
  75. model.add(Dropout(0.2))
  76. model.add(Dense(vec_size, activation='linear'))
  77. model.compile(loss='mean_squared_error', optimizer='adam')
  78.  
  79.  
  80. # FIXME: This helper function should be rewritten using numpy's shift function. See below.
  81. def create_dataset(dataset, window_size = 1):
  82.     data_X, data_Y = [], []
  83.     for i in range(len(dataset) - window_size - 1):
  84.         a = dataset[i:(i + window_size), 0]
  85.         data_X.append(a)
  86.         data_Y.append(dataset[i + window_size, 0])
  87.     return(np.array(data_X), np.array(data_Y))
  88.  
  89. # Create test and training sets for one-step-ahead regression.
  90. window_size = 60
  91. train_X, train_Y = create_dataset(train, window_size)
  92. test_X, test_Y = create_dataset(test, window_size)
  93. print("Original training data shape:")
  94. print(train_X.shape)
  95.  
  96. # Reshape the input data into appropriate form for Keras.
  97. train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
  98. test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))
  99. print("New training data shape:")
  100. print(train_X.shape)
  101.  
  102.  
  103. def fit_model(train_X, train_Y, window_size = 1):
  104.     model = Sequential()
  105.    
  106.     model.add(LSTM(4,
  107.                    input_shape = (1, window_size)))
  108.     model.add(Dense(1))
  109.     model.compile(loss = "mean_squared_error",
  110.                   optimizer = "adam")
  111.     model.fit(train_X,
  112.               train_Y,
  113.               epochs = 100,
  114.               batch_size = 60,
  115.               verbose = 2)
  116.    
  117.     return(model)
  118.  
  119.  
  120.  
  121. # Fit the first model.
  122. model1 = fit_model(train_X, train_Y, window_size)
  123.  
  124.  
  125. def predict_and_score(model, X, Y):
  126.     # Make predictions on the original scale of the data.
  127.     pred = scaler.inverse_transform(model.predict(X))
  128.     # Prepare Y data to also be on the original scale for interpretability.
  129.     orig_data = scaler.inverse_transform([Y])
  130.     # Calculate RMSE.
  131.     score = math.sqrt(mean_squared_error(orig_data[0], pred[:, 0]))
  132.     return(score, pred)
  133.  
  134. rmse_train, train_predict = predict_and_score(model1, train_X, train_Y)
  135. rmse_test, test_predict = predict_and_score(model1, test_X, test_Y)
  136.  
  137. print("Training data score: %.2f RMSE" % rmse_train)
  138. print("Test data score: %.2f RMSE" % rmse_test)
  139.  
  140.  
  141. # Start with training predictions.
  142. train_predict_plot = np.empty_like(dataset)
  143. train_predict_plot[:, :] = np.nan
  144. train_predict_plot[window_size:len(train_predict) + window_size, :] = train_predict
  145.  
  146. # Add test predictions.
  147. test_predict_plot = np.empty_like(dataset)
  148. test_predict_plot[:, :] = np.nan
  149. test_predict_plot[len(train_predict) + (window_size * 2) + 1:len(dataset) - 1, :] = test_predict
  150.  
  151. model = model1
  152. model.summary()
  153.  
  154.  
  155.  
  156. # Create the plot.
  157. plt.figure(figsize = (15, 5))
  158. plt.plot(scaler.inverse_transform(dataset), label = "True value")
  159. plt.plot(train_predict_plot, label = "Training set prediction")
  160. plt.plot(test_predict_plot, label = "Test set prediction")
  161. plt.xlabel("Price")
  162. plt.ylabel("Time")
  163. plt.title("Comparison true vs. predicted training / test")
  164. plt.legend()
  165. plt.show()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top