Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # load and clean-up data
- from keras import Sequential, Input, Model
- from keras.callbacks import ModelCheckpoint, TensorBoard
- from keras.engine.saving import load_model
- from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Masking
- from keras.utils import plot_model
- import matplotlib.pyplot as plt
- from numpy import nan, zeros
- from numpy import isnan
- from pandas import read_csv
- from pandas import to_numeric
- import pandas as pd
- import datetime as dt
- import string
- import numpy as np
- from numpy import array
- import os
- from sklearn.model_selection import train_test_split
- os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
- def str_2_date(date):
- return pd.to_datetime(date, format='%m/%d/%Y')
- def build_data(data_file, sequence_length=60):
- frame = read_csv(data_file, sep="\t")
- mask = frame['Rate Class'] == 'Basic Water and Sewer'
- frame = frame[mask]
- del mask
- delta = str_2_date(frame['Service End Date']) - str_2_date(frame['Service Start Date'])
- frame['time_window'] = [diff.days + 1 for diff in delta]
- frame.drop(labels=['Rate Class', 'Service End Date', 'Service Start Date'], axis=1, inplace=True)
- frame.rename(index=str, columns={"Meter Number": "id", "Consumption (HCF)": "qty", "time_window": "win"},
- inplace=True)
- groups = frame.groupby(['id'])
- frame.to_csv('ny_todatetime.csv')
- idx = []
- data = zeros((len(groups), sequence_length, 2))
- # the last dimension represents the pair [observation_days, consumption]
- loop = 0
- # max_x = -1
- # min_x = 10000
- for group in groups:
- gr_frame = group[1]
- inv_size = -len(gr_frame)
- # asd = len(gr_frame)
- idx.append(group[0])
- data[loop, range(inv_size, 0), 0] = gr_frame['win'].values # left 0-padding
- data[loop, range(inv_size, 0), 1] = gr_frame['qty'].values # left 0-padding
- # if asd > max_x:
- # max_x = asd
- #
- # if asd < min_x:
- # min_x = asd
- loop += 1
- return idx, data
- idx, data = build_data('ny.txt', 60)
- print(data.shape)
- from sklearn.model_selection import train_test_split
- X_train, X_test = train_test_split(data, test_size=0.2, random_state=40)
- mu, sigma = 0, 0.1
- noise = np.random.normal(mu, sigma, [130, 60, 2])
- y_test = X_test + noise
- y_test.to_csv('test_with_noise.csv')
- X_train.shape
- def build_network(n_cells, n_factors=16, sequence_length=60, n_features=2):
- input_layer = Input(shape=(sequence_length, n_features))
- mask_layer = Masking(mask_value=0., input_shape=(sequence_length, n_features))(input_layer)
- recurrent = LSTM(n_cells, return_sequences=True)(mask_layer)
- encoded = Dense(128, activation='relu')(recurrent)
- encoded = Dense(64, activation='relu')(encoded)
- latent = Dense(n_factors, activation='relu')(encoded)
- decoded = Dense(128, activation='relu')(latent)
- decoded = Dense(64, activation='relu')(decoded)
- output_layer = Dense(n_features, activation='relu')(decoded)
- sequence_autoencoder = Model(input_layer, output_layer)
- sequence_autoencoder.compile(loss='mse', optimizer='adam', metrics=['mse'])
- sequence_autoencoder.summary()
- return sequence_autoencoder
- sequence_autoencoder = build_network(11, n_factors=16, sequence_length=60, n_features=2)
- checkpointer = ModelCheckpoint(filepath="model.h5",
- verbose=0,
- save_best_only=True)
- tensorboard = TensorBoard(log_dir='./logs',
- histogram_freq=0,
- write_graph=True,
- write_images=True)
- history = sequence_autoencoder.fit(X_train, X_train,
- epochs=50,
- batch_size=128,
- shuffle=True,
- validation_data=(X_test, X_test),
- verbose=1,
- callbacks=[checkpointer, tensorboard]).history
- sequence_autoencoder = load_model('model.h5')
- plt.plot(history['loss'])
- plt.plot(history['val_loss'])
- plt.plot(history['val_mean_squared_error'])
- plt.title('model loss')
- plt.ylabel('loss')
- plt.xlabel('epoch')
- plt.legend(['train', 'test'], loc='upper right')
- plt.show()
- predictions = sequence_autoencoder.predict(X_test)
- mse = np.mean(np.power(X_test - predictions, 2), axis=1)
- error_df = pd.DataFrame({'reconstruction_error': mse,
- 'true_class': y_test})
- error_df.describe()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement