Untitled

# -*- coding: utf-8 -*-
"""
Created on Mon Sep 11 11:55:08 2017

@author: thanvaf
"""

### Libraries ###
from influxdb import InfluxDBClient
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt
from itertools import islice
import time
import math
from pytz import all_timezones
import glob, os
from collections import Counter
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report

#import statsmodels.api as sm
#from statsmodels.sandbox.regression.predstd import wls_prediction_std

### Modules ###
def roundTime(dt=None, roundTo=60):
   """Round a datetime object to any time laps in seconds
   dt : datetime.datetime object, default now.
   roundTo : Closest number of seconds to round to, default 1 minute.
   Author: Thierry Husson 2012 - Use it as you want but don't blame me.
   """
   if dt == None : dt = datetime.datetime.now()
   seconds = (dt.replace(tzinfo=None) - dt.min).seconds
   rounding = (seconds+roundTo/2) // roundTo * roundTo
   return dt + timedelta(0,rounding-seconds,-dt.microsecond)

def datetime_range(start, end, delta):
    current = start
    while current < end:
        yield current
        current += delta
#==============================================================================
# Database parameters
#==============================================================================
localhost = 'smarthome.iti.gr'
port = 8086
username = 'root'
password = 'root'
databaseN = 'e32dc40831f441bc94f0f3830b1954a0'
assignmentToken0 = [['76080e05-a7c1-46fe-a6c8-28349165c996','mx:W_S'],['67ade86d-deea-40a9-84e9-d16c968fedc9','mx:W_L'],['7d4e8313-f362-481d-b030-1865c2c02b27','mx:consumption'],
                    ['e9210e17-a691-4bab-b878-69a278c185b9','mx:consumption'],['5b7387c0-b8a6-48be-a74d-7e497e6bdaa4','mx:consumption']]
#measurement = 'mx:consumption'
time = 'time'
#==============================================================================
# Dates parameters
#==============================================================================
N = 10
currentDate=datetime.utcnow()
#endDate=(currentDate - timedelta(days = 1)).strftime('%Y-%m-%dT23:59:59.999Z')
endDate=(currentDate).strftime('%Y-%m-%dT23:59:59.999Z')
startDate=(currentDate - timedelta(days = N)).strftime('%Y-%m-%dT00:00:00.000Z')
#==============================================================================
# Retrieve data from database based on specified parameters
#==============================================================================
for i in range(0,5):
    client = InfluxDBClient(host = localhost, port = 8086, username = 'root', password = 'root', database = databaseN)
    q = ("""SELECT "{0}","{1}" FROM events WHERE assignment = '{2}' AND time>='{3}' AND time<='{4}'""".format(time, assignmentToken0[i][1], assignmentToken0[i][0],startDate,endDate))
    #q = ("""SELECT "{0}" FROM events WHERE assignment = '{1}' AND time>='{2}T07:00:00Z' AND time<='{3}T13:51:00Z'""".format(measurement,assignmentToken1,start_date,end_date))
    df = pd.DataFrame(client.query(q, chunked=True).get_points())
    size = df.shape
    #==============================================================================
    # Time series of interest
    #==============================================================================
    if i == 0:
        # Groud Floor Total
        gftTimeSeries = df[assignmentToken0[i][1]].astype(float)
        gftTimeStamp = df[time]
    elif i == 1:
        # Oven
        oTimeSeries = df[assignmentToken0[i][1]].astype(float)
        oTimeStamp = df[time]
    elif i == 2:
        # Fridge
        fTimeSeries = df[assignmentToken0[i][1]].astype(float)
        fTimeStamp = df[time]
    elif i == 3:
        # Dish Washer
        dwTimeSeries = df[assignmentToken0[i][1]].astype(float)
        dwTimeStamp = df[time]
    elif i == 4:
        # Cooking Hood
        chTimeSeries = df[assignmentToken0[i][1]].astype(float)
        chTimeStamp = df[time]


# =============================================================================
#                A C T I V I T Y    M O N I T O R I N G
# =============================================================================


# =============================================================================
#         Change the timestamp to the Europe/Athens timezone
# =============================================================================
# Convert the timestamp into a pandas dataframe
groundfl_date  = pd.to_datetime(gftTimeStamp).dt.tz_localize('UTC').dt.tz_convert('Europe/Athens')
oven_date = pd.to_datetime(oTimeStamp).dt.tz_localize('UTC').dt.tz_convert('Europe/Athens')
fridge_date = pd.to_datetime(fTimeStamp).dt.tz_localize('UTC').dt.tz_convert('Europe/Athens')
dishwasher_date = pd.to_datetime(dwTimeStamp).dt.tz_localize('UTC').dt.tz_convert('Europe/Athens')
cooking_hood_date = pd.to_datetime(chTimeStamp).dt.tz_localize('UTC').dt.tz_convert('Europe/Athens')


ground_floor = pd.concat([groundfl_date, gftTimeSeries], axis=1)
oven = pd.concat([oven_date, oTimeSeries], axis=1)
fridge = pd.concat([fridge_date, fTimeSeries], axis=1)
dishwasher = pd.concat([dishwasher_date, dwTimeSeries], axis=1)
cooking_hood = pd.concat([cooking_hood_date, chTimeSeries], axis=1)

# Convert pandas into a txt file with tab seperator
ground_floor.to_csv(r'C:\Users\anasvaf\Desktop\Energy_SmartHome\total_consumption.txt', header=None, index=None, sep='\t')
oven.to_csv(r'C:\Users\anasvaf\Desktop\Energy_SmartHome\oven_consumption.txt', header=None, index=None, sep='\t')
fridge.to_csv(r'C:\Users\anasvaf\Desktop\Energy_SmartHome\fridge_consumption.txt', header=None, index=None, sep='\t')
dishwasher.to_csv(r'C:\Users\anasvaf\Desktop\Energy_SmartHome\dishwasher_consumption.txt', header=None, index=None, sep='\t')
cooking_hood.to_csv(r'C:\Users\anasvaf\Desktop\Energy_SmartHome\cooking_hood_consumption.txt', header=None, index=None, sep='\t')

# =============================================================================
#               Select particular series for processing
# =============================================================================
# Total power consumption
ts = pd.read_csv("total_consumption.txt", header=None, sep='\t')
ts['time'] = ts[0]
tmpstmp = np.array(ts.time)
ts['mx:W_S'] = ts[1]
ts.drop(ts.columns[:2], axis=1,inplace=True)

# Oven power consumption
ts1 = pd.read_csv("oven_consumption.txt", header=None, sep='\t')
ts1['time'] = ts1[0]
tmpstmp1 = np.array(ts1.time)
ts1['mx:W_L'] = ts1[1]
ts1.drop(ts1.columns[:2], axis=1,inplace=True)

# Fridge power consumption
ts2 = pd.read_csv("fridge_consumption.txt", header=None, sep='\t')
ts2['time'] = ts2[0]
tmpstmp2 = np.array(ts2.time)
ts2['mx:consumption'] = ts2[1]
ts2.drop(ts2.columns[:2], axis=1,inplace=True)

# Dishwasher power consumption
ts3 = pd.read_csv("dishwasher_consumption.txt", header=None, sep='\t')
ts3['time'] = ts3[0]
tmpstmp3 = np.array(ts3.time)
ts3['mx:consumption'] = ts3[1]
ts3.drop(ts3.columns[:2], axis=1,inplace=True)

# Cooking hood power consumption
ts4 = pd.read_csv("cooking_hood_consumption.txt", header=None, sep='\t')
ts4['time'] = ts4[0]
tmpstmp4 = np.array(ts4.time)
ts4['mx:consumption'] = ts4[1]
ts4.drop(ts4.columns[:2], axis=1,inplace=True)


# =============================================================================
#                         Process for the time
# =============================================================================
ts['time'] = pd.to_datetime(ts['time'])
ts.set_index(keys='time', inplace=True)
ts.info()   # display info for the time series

ts1['time'] = pd.to_datetime(ts1['time'])
ts1.set_index(keys='time', inplace=True)
ts1.info()   # display info for the time series

ts2['time'] = pd.to_datetime(ts2['time'])
ts2.set_index(keys='time', inplace=True)
ts2.info()   # display info for the time series

ts3['time'] = pd.to_datetime(ts3['time'])
ts3.set_index(keys='time', inplace=True)
ts3.info()   # display info for the time series

ts4['time'] = pd.to_datetime(ts4['time'])
ts4.set_index(keys='time', inplace=True)
ts4.info()   # display info for the time series

# rename 2nd column to energy_consumed. DO NOT FORGET TO CHANGE THE LABEL FOR EACH TOKEN (depends on the appliance)
ts.rename(columns={'mx:W_S':'energy_consumed'}, inplace=True) # ground floor total
ts1.rename(columns={'mx:W_L':'energy_consumed'}, inplace=True) # oven
ts2.rename(columns={'mx:consumption':'energy_consumed'}, inplace=True) # fridge
ts3.rename(columns={'mx:consumption':'energy_consumed'}, inplace=True) # dishwasher
ts4.rename(columns={'mx:consumption':'energy_consumed'}, inplace=True) # cooking hood

# =============================================================================
#            Plot power consumptions for one day
# =============================================================================
# Total ground floor
ts_day1 = ts[datetime(2017, 9, 5):datetime(2017, 9, 6)]
ts_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Total Ground Floor Energy Consumed vs. Time')

# Oven
ts1_day1 = ts1[datetime(2017, 9, 5):datetime(2017, 9, 6)]
ts1_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Oven Energy Consumed vs. Time')

# Fridge
ts2_day1 = ts2[datetime(2017, 9, 5):datetime(2017, 9, 6)]
ts2_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Fridge Energy Consumed vs. Time')

# Dishwasher
ts3_day1 = ts3[datetime(2017, 9, 5):datetime(2017, 9, 6)]
ts3_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Dishwasher Energy Consumed vs. Time')

# Cooking hood (plotting for the September 11 since there is no data before)
ts4_day1 = ts4[datetime(2017, 9, 11):datetime(2017, 9, 12)]
ts4_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Cooking Hood Energy Consumed vs. Time')

# =============================================================================
#            Plot power consumption for the first nine days
# =============================================================================
# Total ground floor
ts_week1 = ts[datetime(2017, 9, 5):datetime(2017, 9, 14)]
ts_week1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Total Ground Floor Energy Consumed vs. Time')

# Oven
ts1_week1 = ts1[datetime(2017, 9, 5):datetime(2017, 9, 14)]
ts1_week1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Oven Energy Consumed vs. Time')

# Fridge
ts2_week1 = ts2[datetime(2017, 9, 5):datetime(2017, 9, 14)]
ts2_week1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Fridge Energy Consumed vs. Time')

# Dishwasher
ts3_week1 = ts3[datetime(2017, 9, 5):datetime(2017, 9, 14)]
ts3_week1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Dishwasher Energy Consumed vs. Time')

# Cooking hood
ts4_week1 = ts4[datetime(2017, 9, 5):datetime(2017, 9, 14)]
ts4_week1.plot(figsize=(10,8), color='r')
plt.xlabel('Time (s)')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Cooking Hood Energy Consumed vs. Time')

# =============================================================================
#                     Power consumption difference (daily)
# =============================================================================
delta_ts_day1 = ts_day1.diff(1)[1:]
delta_ts_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Total Ground Floor Energy Consumed vs. Time')

delta_ts1_day1 = ts1_day1.diff(1)[1:]
delta_ts1_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Oven Energy Consumed vs. Time')

delta_ts2_day1 = ts2_day1.diff(1)[1:]
delta_ts2_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Fridge Energy Consumed vs. Time')

delta_ts3_day1 = ts3_day1.diff(1)[1:]
delta_ts3_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Dishwasher Energy Consumed vs. Time')

delta_ts4_day1 = ts4_day1.diff(1)[1:]
delta_ts4_day1.plot(figsize=(10,8), color='r')
plt.xlabel('Time')
plt.ylabel('Energy Consumed (Watts)')
plt.title('Cooking Hood Energy Consumed vs. Time')


# =============================================================================
#       Define a sliding window for data processing with 50% overlap
# =============================================================================
def window(seq, n=2):
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

# =============================================================================
#                Create a matrix for the energy_consumed
# =============================================================================
matrix = []

for ts_i in window(ts.energy_consumed, 30):
    matrix.append(ts_i)

matrix = np.array(matrix)
matrix.shape

# =============================================================================
#                    Create a matrix for the dates
# =============================================================================
dates_matrix = []

for time in window(ts.index, 30):
    dates_matrix.append(time)

dates_matrix = np.array(dates_matrix)
dates_matrix.shape

## =============================================================================
##      Define the labels for disaggregation (based on the total consumption)
## =============================================================================
#devices = ['fridge', 'cooking hood', 'oven', 'dishwasher', 'unknown']
#labels = []
#for ts_i in matrix:
#    delta_ts_i = np.diff(ts_i)
#    if all(ts_i < 1000): #fridge
#        label = 1
#    elif all((ts_i > 1001) & (ts_i < 1500)): #Oven
#        label = 2
#    elif any((delta_ts_i <= -1500) | (delta_ts_i >= 1500)) and all(ts_i < 3000): #dishwasher
#        label = 3
#    elif any(ts_i > 2000): #cooking hood
#        label = 4
#    else: #Other Appliances (Unknown)
#        label = 5
#    labels.append(label)
#labels = np.array(labels)
#
#
#
## Count instances for each label
#d = Counter(labels)
#print (d)

# =============================================================================
#                            Normalize the data
# =============================================================================
# Z-normalization
def znormalization(ts):
    mus = ts.mean(axis = 0)
    stds = ts.std(axis = 0)
    return (ts - mus) / stds

zzzz = znormalization(ts)

LABELS = [
    "COOKING",
    "WASHING DISHES",
    "USING THE FRIDGE"
]