Untitled

#echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
#echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
#echo 'eval "$(pyenv init -)"' >> ~/.bashrc
#source ~/.bashrc!/usr/bin/python3
import numpy as np
import holidays

from datetime import date
from pymongo import MongoClient
from numpy import concatenate
from keras.models import model_from_yaml
from pandas import DataFrame, concat

from sklearn.preprocessing import MinMaxScaler


def generate_holidays_weekends(timeseries_keys):

    holidays_list = []
    weekends_list = []
    us_holidays = holidays.CountryHoliday('US')

    for key in timeseries_keys:
    #for key in doc_TV['timeseries'].keys():
        if key in us_holidays:
            holidays_list.append(1)
        else:
            holidays_list.append(0)

        key_split = key.split('-')
        day = date((int)(key_split[2]), (int)(key_split[1]), (int)(key_split[0]))

        if(day.weekday() == 5 or day.weekday() == 6):
            weekends_list.append(1)
        else:
            weekends_list.append(0)

    return holidays_list, weekends_list

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()

    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]

    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]

    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names

    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)

    return agg


# define a class
class CCUPredictor:

    def __init__(self):
        self.len_features = 3
        self.pred_start = 365
        self.days_in_past = 60
        self.future_trigger = "05-12-2017"
        self.game_details, self.ts_ccu = self.init_mongo_connection()
        self.load_models()

    def init_mongo_connection(self):
        #client = MongoClient('localhost', 27017)
        client = MongoClient('localhost', 27017, username='root', password='sherlockRoot2019')
        db = client.sherlock                # if the database is not present, it will be created
        #db.authenticate(os.environ['MONGO_USER'], os.environ['MONGO_USER_PASSWORD'], source=os.environ['MONGO_AUTH_DATABASE'])
        coll_gt = db.game_details        # if collection not present, it will be created
        coll_ccu = db.steam_player_timeseries

        return coll_gt, coll_ccu

    def load_models(self):

        #One day prediction

        # load YAML and create model
        yaml_file = open('app/models/model_c0_1to1.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c0_1to1 = model_from_yaml(loaded_model_yaml)

        # load weights into new model
        self.model_c0_1to1.load_weights("app/models/model_c0_1to1.h5")
        print("Loaded model c01 from disk")

        yaml_file = open('app/models/model_c1_1to1.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c1_1to1 = model_from_yaml(loaded_model_yaml)

        self.model_c1_1to1.load_weights("app/models/model_c1_1to1.h5")
        print("Loaded model c11 from disk")

        yaml_file = open('app/models/model_c2_1to1.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c2_1to1 = model_from_yaml(loaded_model_yaml)

        self.model_c2_1to1.load_weights("app/models/model_c2_1to1.h5")
        print("Loaded model c21 from disk")

        # Three day prediction

        # load YAML and create model
        yaml_file = open('app/models/model_c0_7to7.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c0_7to7 = model_from_yaml(loaded_model_yaml)

        # load weights into new model
        self.model_c0_7to7.load_weights("app/models/model_c0_7to7.h5")
        print("Loaded model c07 from disk")

        yaml_file = open('app/models/model_c1_7to7.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c1_7to7 = model_from_yaml(loaded_model_yaml)

        self.model_c1_7to7.load_weights("app/models/model_c1_7to7.h5")
        print("Loaded model c17 from disk")

        yaml_file = open('app/models/model_c2_7to7.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c2_7to7 = model_from_yaml(loaded_model_yaml)

        self.model_c2_7to7.load_weights("app/models/model_c2_7to7.h5")
        print("Loaded model c27 from disk")

        # Three weeks prediction

        # load YAML and create model
        yaml_file = open('app/models/model_c0_21to21.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c0_21to21 = model_from_yaml(loaded_model_yaml)

        # load weights into new model
        self.model_c0_21to21.load_weights("app/models/model_c0_21to21.h5")
        print("Loaded model c021 from disk")

        yaml_file = open('app/models/model_c1_21to21.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c1_21to21 = model_from_yaml(loaded_model_yaml)

        self.model_c1_21to21.load_weights("app/models/model_c1_21to21.h5")
        print("Loaded model c121 from disk")

        yaml_file = open('app/models/model_c2_21to21.yaml', 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        self.model_c2_21to21 = model_from_yaml(loaded_model_yaml)

        self.model_c2_21to21.load_weights("app/models/model_c2_21to21.h5")
        print("Loaded model c221 from disk")


    def get_prediction(self, appID, prediction_days):

        # appID ?, cluster=
        cursor = self.game_details.find({"appID" : appID})

        if(cursor.count() > 0):
            game = cursor.next()

            if(game['cluster_id'] == None):
                print("This game has no ccu")
                return {}, {}, 0


            ds_past, ds_future = self.create_dataset(appID, prediction_days)

            if(prediction_days == 1):
                if(game['cluster_id'] == 0):
                    print("correct blq")
                    #model = self.model_c0_1to1
                    ds_past, ds_future = self.calculate_prediction(self.model_c0_1to1, ds_past, ds_future, prediction_days)
                    #ds_past, ds_future = self.calculate_prediction(amina, ds_past, ds_future, prediction_days)
                elif(game['cluster_id'] == 1):
                    ds_past, ds_future = self.calculate_prediction(self.model_c1_1to1, ds_past, ds_future, prediction_days)
                elif(game['cluster_id'] == 2):
                    ds_past, ds_future = self.calculate_prediction(self.model_c2_1to1, ds_past, ds_future, prediction_days)

                return ds_past, ds_future, self.get_prediction_accuracy(ds_future)

            elif(prediction_days == 7):
                if(game['cluster_id'] == 0):
                    ds_past, ds_future = self.calculate_prediction(self.model_c0_7to7, ds_past, ds_future, prediction_days)
                elif(game['cluster_id'] == 1):
                    ds_past, ds_future = self.calculate_prediction(self.model_c1_7to7, ds_past, ds_future, prediction_days)
                elif(game['cluster_id'] == 2):
                    ds_past, ds_future = self.calculate_prediction(self.model_c2_7to7, ds_past, ds_future, prediction_days)

                return ds_past, ds_future, self.get_prediction_accuracy(ds_future)

            elif(prediction_days == 21):
                if(game['cluster_id'] == 0):
                    ds_past, ds_future  = self.calculate_prediction(self.model_c0_21to21, ds_past, ds_future, prediction_days)
                elif(game['cluster_id'] == 1):
                    ds_past, ds_future  = self.calculate_prediction(self.model_c1_21to21, ds_past, ds_future, prediction_days)
                elif(game['cluster_id'] == 2):
                    ds_past, ds_future  = self.calculate_prediction(self.model_c2_21to21, ds_past, ds_future, prediction_days)

                return ds_past, ds_future, self.get_prediction_accuracy(ds_future)

            else:
                print("Prediction Type not supported!")
                return {}, {}, 0

        else:
            print("appID not found in database!")
            return {}, {}, 0


    def create_dataset(self, appID, prediction_days):

        ds_past = {}
        ds_future = {}

        ccu_timeseries = self.ts_ccu.find({"appID" : appID}).next()
        print("appID: ", appID)
        # create time series from 'future_trigger' (04.12.2017)
        # - 2 month in past (04.10.2017)
        # + prediction days in future
        ts_dates = list(ccu_timeseries['cleaned_timeseries'].keys())[-(self.pred_start + self.days_in_past):-(self.pred_start - prediction_days)]
        holidays_list, weekends_list = generate_holidays_weekends(ts_dates)

        trigger = False

        for date, index in zip(ts_dates, range(len(ts_dates))):

            # a bit hacky, because two strings
            if(date == self.future_trigger):
                trigger = True

            if(trigger):
                ds_future[date] = (ccu_timeseries['cleaned_timeseries'][date], holidays_list[index], weekends_list[index])
            else:
                ds_past[date] = (ccu_timeseries['cleaned_timeseries'][date], holidays_list[index], weekends_list[index])

        return ds_past, ds_future


    def calculate_prediction(self, model, ds_past, ds_future, prediction_days):

        ts_past = {}
        ts_future = {}

        #tf.keras.backend.clear_session()

        ds = np.array(list(ds_past.values()))

        scaler = MinMaxScaler(feature_range=(0,1))

        print("ds shape", ds.shape)
        scaled = scaler.fit_transform(ds[-(prediction_days * 2):, :])
        reframed = series_to_supervised(scaled, prediction_days, prediction_days)

        print("scaled shape: ", scaled.shape)
        print("reframed shape: ", reframed.shape)
        clmns = []

        # get coloumns for every variable from prediction (except ccu) for 'prediction_days'
        # train_days * columns - last column (skip 'train' columns)
        for i in range((prediction_days * self.len_features) + 1, reframed.shape[1]):
            # append columns (to delete) which have "var'j'" in them
            for j in range(2, (self.len_features + 1)):
                if('var'+str(j) in list(reframed)[i]):
                    clmns.append(i)

        # drop columns we don't want to predict
        reframed.drop(reframed.columns[clmns], axis=1, inplace=True)

        print("reframed head: ", reframed.head())
        print("first X shape: ", reframed.shape)

        X = reframed.values[:, :-prediction_days]
        X = X.reshape((X.shape[0], prediction_days, (int)(X.shape[1] / prediction_days)))

        #K.clear_session()
        #graph = tf.get_default_graph()

        pred = model.predict(X)

        #with graph.as_default():
            #pred = model._make_predict_function(X)

        print("X shape: ", X.shape)
        print("pred shape: ", pred.shape)
        X = X.reshape((X.shape[0]*X.shape[1], X.shape[2]))
        pred = pred.reshape(pred.shape[0]*pred.shape[1], 1)

        pred = concatenate((pred, X[:, 1:]), axis=1)
        pred = scaler.inverse_transform(pred)
        pred = pred[:,0]

        # create timeseries past
        for key in ds_past.keys():
            ts_past[key] = ds_past[key][0]

        # create timeseries future
        for index, key in zip(range(len(ds_future)), ds_future.keys()):
            ts_future[key] = (pred[index], ds_future[key][0])

        return ts_past, ts_future

    def get_prediction_accuracy(self, ts_future):
        '''
        This calculation is based on Mean absolute percentage error (MAPE)
        tells you by how many percentage points your forecasts are off, on average.

        https://www.relexsolutions.com/measuring-forecast-accuracy/#fa-chapter-three


        Forecast Bias ungeeignet, weil nur die summe des forecasts und der ccu betrachtet wird
            -> abweichungen pro tag können hoch sein

        Mean Percentage Error (MPE) auch ungeignet, weil positive und negative Abweichungen sich gegenseitig aufheben.
        '''
        sum = 0
        for key in ts_future.keys():
            sum += (abs(ts_future[key][0] - ts_future[key][1]) / ts_future[key][1])

        sum = sum / len(ts_future) * 100

        return 100 - sum