Advertisement
Guest User

Untitled

a guest
Nov 22nd, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.48 KB | None | 0 0
  1. # load and clean-up data
  2. from keras import Sequential, Input, Model
  3. from keras.callbacks import ModelCheckpoint, TensorBoard
  4. from keras.engine.saving import load_model
  5. from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Masking
  6. from keras.utils import plot_model
  7. import matplotlib.pyplot as plt
  8. from numpy import nan, zeros
  9. from numpy import isnan
  10. from pandas import read_csv
  11. from pandas import to_numeric
  12. import pandas as pd
  13. import datetime as dt
  14. import string
  15. import numpy as np
  16. from numpy import array
  17. import os
  18.  
  19. from sklearn.model_selection import train_test_split
  20.  
  21. os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
  22.  
  23.  
  24. def str_2_date(date):
  25. return pd.to_datetime(date, format='%m/%d/%Y')
  26.  
  27.  
  28. def build_data(data_file, sequence_length=60):
  29. frame = read_csv(data_file, sep="\t")
  30. mask = frame['Rate Class'] == 'Basic Water and Sewer'
  31. frame = frame[mask]
  32. del mask
  33. delta = str_2_date(frame['Service End Date']) - str_2_date(frame['Service Start Date'])
  34. frame['time_window'] = [diff.days + 1 for diff in delta]
  35. frame.drop(labels=['Rate Class', 'Service End Date', 'Service Start Date'], axis=1, inplace=True)
  36. frame.rename(index=str, columns={"Meter Number": "id", "Consumption (HCF)": "qty", "time_window": "win"},
  37. inplace=True)
  38.  
  39. groups = frame.groupby(['id'])
  40. frame.to_csv('ny_todatetime.csv')
  41. idx = []
  42. data = zeros((len(groups), sequence_length, 2))
  43. # the last dimension represents the pair [observation_days, consumption]
  44. loop = 0
  45. # max_x = -1
  46. # min_x = 10000
  47.  
  48. for group in groups:
  49. gr_frame = group[1]
  50. inv_size = -len(gr_frame)
  51. # asd = len(gr_frame)
  52.  
  53. idx.append(group[0])
  54. data[loop, range(inv_size, 0), 0] = gr_frame['win'].values # left 0-padding
  55. data[loop, range(inv_size, 0), 1] = gr_frame['qty'].values # left 0-padding
  56.  
  57. # if asd > max_x:
  58. # max_x = asd
  59. #
  60. # if asd < min_x:
  61. # min_x = asd
  62.  
  63. loop += 1
  64.  
  65. return idx, data
  66.  
  67.  
  68. idx, data = build_data('ny.txt', 60)
  69. print(data.shape)
  70.  
  71. from sklearn.model_selection import train_test_split
  72. X_train, X_test = train_test_split(data, test_size=0.2, random_state=40)
  73.  
  74.  
  75. mu, sigma = 0, 0.1
  76. noise = np.random.normal(mu, sigma, [130, 60, 2])
  77.  
  78. y_test = X_test + noise
  79.  
  80. y_test.to_csv('test_with_noise.csv')
  81. X_train.shape
  82.  
  83.  
  84. def build_network(n_cells, n_factors=16, sequence_length=60, n_features=2):
  85. input_layer = Input(shape=(sequence_length, n_features))
  86. mask_layer = Masking(mask_value=0., input_shape=(sequence_length, n_features))(input_layer)
  87. recurrent = LSTM(n_cells, return_sequences=True)(mask_layer)
  88. encoded = Dense(128, activation='relu')(recurrent)
  89. encoded = Dense(64, activation='relu')(encoded)
  90. latent = Dense(n_factors, activation='relu')(encoded)
  91. decoded = Dense(128, activation='relu')(latent)
  92. decoded = Dense(64, activation='relu')(decoded)
  93. output_layer = Dense(n_features, activation='relu')(decoded)
  94.  
  95. sequence_autoencoder = Model(input_layer, output_layer)
  96. sequence_autoencoder.compile(loss='mse', optimizer='adam', metrics=['mse'])
  97.  
  98. sequence_autoencoder.summary()
  99.  
  100. return sequence_autoencoder
  101.  
  102.  
  103. sequence_autoencoder = build_network(11, n_factors=16, sequence_length=60, n_features=2)
  104. checkpointer = ModelCheckpoint(filepath="model.h5",
  105. verbose=0,
  106. save_best_only=True)
  107. tensorboard = TensorBoard(log_dir='./logs',
  108. histogram_freq=0,
  109. write_graph=True,
  110. write_images=True)
  111. history = sequence_autoencoder.fit(X_train, X_train,
  112. epochs=50,
  113. batch_size=128,
  114. shuffle=True,
  115. validation_data=(X_test, X_test),
  116. verbose=1,
  117. callbacks=[checkpointer, tensorboard]).history
  118.  
  119. sequence_autoencoder = load_model('model.h5')
  120. plt.plot(history['loss'])
  121. plt.plot(history['val_loss'])
  122. plt.plot(history['val_mean_squared_error'])
  123. plt.title('model loss')
  124. plt.ylabel('loss')
  125. plt.xlabel('epoch')
  126. plt.legend(['train', 'test'], loc='upper right')
  127. plt.show()
  128.  
  129. predictions = sequence_autoencoder.predict(X_test)
  130. mse = np.mean(np.power(X_test - predictions, 2), axis=1)
  131. error_df = pd.DataFrame({'reconstruction_error': mse,
  132. 'true_class': y_test})
  133. error_df.describe()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement