Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Wed Aug 16 10:18:24 2017
- @author: thanvaf
- """
- ### Libraries ###
- from influxdb import InfluxDBClient
- import pandas as pd
- import datetime
- from datetime import datetime, timedelta
- from datetime import timedelta
- import numpy as np
- import matplotlib.pyplot as plt
- import math
- from keras.models import Sequential
- from keras.layers import Dense
- from keras.layers import LSTM
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.metrics import mean_squared_error
- import time
- ### Modules ###
- def roundTime(dt=None, roundTo=60):
- """Round a datetime object to any time laps in seconds
- dt : datetime.datetime object, default now.
- roundTo : Closest number of seconds to round to, default 1 minute.
- Author: Thierry Husson 2012 - Use it as you want but don't blame me.
- """
- if dt == None : dt = datetime.datetime.now()
- seconds = (dt.replace(tzinfo=None) - dt.min).seconds
- rounding = (seconds+roundTo/2) // roundTo * roundTo
- return dt + timedelta(0,rounding-seconds,-dt.microsecond)
- def datetime_range(start, end, delta):
- current = start
- while current < end:
- yield current
- current += delta
- # convert an array of values into a dataset matrix
- def create_dataset(dataset, look_back=1):
- dataX, dataY = [], []
- for i in range(len(dataset)-look_back-1):
- a = dataset[i:(i+look_back), 0]
- dataX.append(a)
- dataY.append(dataset[i + look_back, 0])
- return np.array(dataX), np.array(dataY)
- #==============================================================================
- # Database parameters
- #==============================================================================
- localhost = 'hydra.iti.gr'
- port = 8086
- username = 'root'
- password = 'root'
- databaseN = '90676b79_3273_400e_8b57_368ca37e9f8c'
- assignmentToken0 = '948a55d9-722e-4ec6-a176-8d1ea9c04c83'
- assignmentToken1 = '8962cf27-704d-4f32-8d44-ae67762ae87b'
- assignmentToken2 = '79c122e9-14ec-4ae4-a932-3efd208256cc'
- assignmentToken3 = '721b460c-ad4f-4921-a71f-0f87cfb69ef0'
- measurement = 'mx:kwhCons'
- time0 = 'time'
- #==============================================================================
- # Various parameters (Initialization)
- #==============================================================================
- listamin = [];listamax = []; listamedian = []
- #==============================================================================
- # Dates parameters
- #==============================================================================
- N = 31
- currentDate=datetime.utcnow()
- endDate=(currentDate - timedelta(days = 1)).strftime('%Y-%m-%dT23:59:59.999Z')
- startDate=(currentDate - timedelta(days = N)).strftime('%Y-%m-%dT00:00:00.000Z')
- #==============================================================================
- # Retrieve data from database based on specified parameters
- #==============================================================================
- client = InfluxDBClient(host = localhost, port = 8086, username = 'root', password = 'root', database = databaseN)
- q = ("""SELECT "{0}","{1}" FROM events WHERE assignment = '{2}' AND time>='{3}' AND time<='{4}'""".format(time0, measurement, assignmentToken0,startDate,endDate))
- #q = ("""SELECT "{0}" FROM events WHERE assignment = '{1}' AND time>='{2}T07:00:00Z' AND time<='{3}T13:51:00Z'""".format(measurement,assignmentToken1,start_date,end_date))
- df = pd.DataFrame(client.query(q, chunked=True).get_points())
- size = df.shape
- #==============================================================================
- # Time series of interest
- #==============================================================================
- # fix random seed for reproducibility
- np.random.seed(7)
- timeSeries = df[measurement].astype(float)
- timeSeries = [[timeSeries[i]] for i in range(len(timeSeries)-500,len(timeSeries))]
- dataset = np.array(timeSeries)
- dataset = dataset.astype('float32')
- # normalize the dataset
- scaler = MinMaxScaler(feature_range=(0, 1))
- dataset = scaler.fit_transform(dataset)
- # split into train and test sets
- train_size = int(len(dataset) * (1.0-0.192))
- test_size = len(dataset) - train_size
- train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
- # reshape into X=t and Y=t+1
- look_back = 4
- trainX, trainY = create_dataset(train, look_back)
- testX, testY = create_dataset(test, look_back)
- # reshape input to be [samples, time steps, features]
- trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
- testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
- start = time.time()
- hidden_neurons = 30
- epochs = 1500
- # create and fit the LSTM network
- model = Sequential()
- model.add(LSTM(hidden_neurons, input_shape=(1, look_back))) # 3 is the number of LSTM blocks or neurons
- model.add(Dense(1))
- model.compile(loss='mean_squared_error', optimizer='adam', metrics = ['accuracy'])
- history = model.fit(trainX, trainY, nb_epoch=epochs, batch_size=1, verbose=2)
- end = time.time()
- overall_time = end-start
- print overall_time
- # make predictions
- trainPredict = model.predict(trainX)
- testPredict = model.predict(testX)
- # invert predictions
- trainPredict = scaler.inverse_transform(trainPredict)
- trainY = scaler.inverse_transform([trainY])
- testPredict = scaler.inverse_transform(testPredict)
- testY = scaler.inverse_transform([testY])
- # calculate root mean squared error
- trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
- print('Train Score: %.2f RMSE' % (trainScore))
- testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
- print('Test Score: %.2f RMSE' % (testScore))
- # shift train predictions for plotting
- trainPredictPlot = np.empty_like(dataset)
- trainPredictPlot[:, :] = np.nan
- trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
- # shift test predictions for plotting
- testPredictPlot = np.empty_like(dataset)
- testPredictPlot[:, :] = np.nan
- testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
- # plot baseline and predictions
- plt.plot(scaler.inverse_transform(dataset))
- plt.plot(trainPredictPlot)
- plt.plot(testPredictPlot)
- plt.legend(['Initial','Train','Test'],loc='best')
- plt.title('Epochs: '+str(epochs)+' Neurons: '+str(hidden_neurons)+' Lag: '+str(look_back))
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement