Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
- #echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
- #echo 'eval "$(pyenv init -)"' >> ~/.bashrc
- #source ~/.bashrc!/usr/bin/python3
- import numpy as np
- import holidays
- from datetime import date
- from pymongo import MongoClient
- from numpy import concatenate
- from keras.models import model_from_yaml
- from pandas import DataFrame, concat
- from sklearn.preprocessing import MinMaxScaler
- def generate_holidays_weekends(timeseries_keys):
- holidays_list = []
- weekends_list = []
- us_holidays = holidays.CountryHoliday('US')
- for key in timeseries_keys:
- #for key in doc_TV['timeseries'].keys():
- if key in us_holidays:
- holidays_list.append(1)
- else:
- holidays_list.append(0)
- key_split = key.split('-')
- day = date((int)(key_split[2]), (int)(key_split[1]), (int)(key_split[0]))
- if(day.weekday() == 5 or day.weekday() == 6):
- weekends_list.append(1)
- else:
- weekends_list.append(0)
- return holidays_list, weekends_list
- def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
- n_vars = 1 if type(data) is list else data.shape[1]
- df = DataFrame(data)
- cols, names = list(), list()
- # input sequence (t-n, ... t-1)
- for i in range(n_in, 0, -1):
- cols.append(df.shift(i))
- names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
- # forecast sequence (t, t+1, ... t+n)
- for i in range(0, n_out):
- cols.append(df.shift(-i))
- if i == 0:
- names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
- else:
- names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
- # put it all together
- agg = concat(cols, axis=1)
- agg.columns = names
- # drop rows with NaN values
- if dropnan:
- agg.dropna(inplace=True)
- return agg
- # define a class
- class CCUPredictor:
- def __init__(self):
- self.len_features = 3
- self.pred_start = 365
- self.days_in_past = 60
- self.future_trigger = "05-12-2017"
- self.game_details, self.ts_ccu = self.init_mongo_connection()
- self.load_models()
- def init_mongo_connection(self):
- #client = MongoClient('localhost', 27017)
- client = MongoClient('localhost', 27017, username='root', password='sherlockRoot2019')
- db = client.sherlock # if the database is not present, it will be created
- #db.authenticate(os.environ['MONGO_USER'], os.environ['MONGO_USER_PASSWORD'], source=os.environ['MONGO_AUTH_DATABASE'])
- coll_gt = db.game_details # if collection not present, it will be created
- coll_ccu = db.steam_player_timeseries
- return coll_gt, coll_ccu
- def load_models(self):
- #One day prediction
- # load YAML and create model
- yaml_file = open('app/models/model_c0_1to1.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c0_1to1 = model_from_yaml(loaded_model_yaml)
- # load weights into new model
- self.model_c0_1to1.load_weights("app/models/model_c0_1to1.h5")
- print("Loaded model c01 from disk")
- yaml_file = open('app/models/model_c1_1to1.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c1_1to1 = model_from_yaml(loaded_model_yaml)
- self.model_c1_1to1.load_weights("app/models/model_c1_1to1.h5")
- print("Loaded model c11 from disk")
- yaml_file = open('app/models/model_c2_1to1.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c2_1to1 = model_from_yaml(loaded_model_yaml)
- self.model_c2_1to1.load_weights("app/models/model_c2_1to1.h5")
- print("Loaded model c21 from disk")
- # Three day prediction
- # load YAML and create model
- yaml_file = open('app/models/model_c0_7to7.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c0_7to7 = model_from_yaml(loaded_model_yaml)
- # load weights into new model
- self.model_c0_7to7.load_weights("app/models/model_c0_7to7.h5")
- print("Loaded model c07 from disk")
- yaml_file = open('app/models/model_c1_7to7.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c1_7to7 = model_from_yaml(loaded_model_yaml)
- self.model_c1_7to7.load_weights("app/models/model_c1_7to7.h5")
- print("Loaded model c17 from disk")
- yaml_file = open('app/models/model_c2_7to7.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c2_7to7 = model_from_yaml(loaded_model_yaml)
- self.model_c2_7to7.load_weights("app/models/model_c2_7to7.h5")
- print("Loaded model c27 from disk")
- # Three weeks prediction
- # load YAML and create model
- yaml_file = open('app/models/model_c0_21to21.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c0_21to21 = model_from_yaml(loaded_model_yaml)
- # load weights into new model
- self.model_c0_21to21.load_weights("app/models/model_c0_21to21.h5")
- print("Loaded model c021 from disk")
- yaml_file = open('app/models/model_c1_21to21.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c1_21to21 = model_from_yaml(loaded_model_yaml)
- self.model_c1_21to21.load_weights("app/models/model_c1_21to21.h5")
- print("Loaded model c121 from disk")
- yaml_file = open('app/models/model_c2_21to21.yaml', 'r')
- loaded_model_yaml = yaml_file.read()
- yaml_file.close()
- self.model_c2_21to21 = model_from_yaml(loaded_model_yaml)
- self.model_c2_21to21.load_weights("app/models/model_c2_21to21.h5")
- print("Loaded model c221 from disk")
- def get_prediction(self, appID, prediction_days):
- # appID ?, cluster=
- cursor = self.game_details.find({"appID" : appID})
- if(cursor.count() > 0):
- game = cursor.next()
- if(game['cluster_id'] == None):
- print("This game has no ccu")
- return {}, {}, 0
- ds_past, ds_future = self.create_dataset(appID, prediction_days)
- if(prediction_days == 1):
- if(game['cluster_id'] == 0):
- print("correct blq")
- #model = self.model_c0_1to1
- ds_past, ds_future = self.calculate_prediction(self.model_c0_1to1, ds_past, ds_future, prediction_days)
- #ds_past, ds_future = self.calculate_prediction(amina, ds_past, ds_future, prediction_days)
- elif(game['cluster_id'] == 1):
- ds_past, ds_future = self.calculate_prediction(self.model_c1_1to1, ds_past, ds_future, prediction_days)
- elif(game['cluster_id'] == 2):
- ds_past, ds_future = self.calculate_prediction(self.model_c2_1to1, ds_past, ds_future, prediction_days)
- return ds_past, ds_future, self.get_prediction_accuracy(ds_future)
- elif(prediction_days == 7):
- if(game['cluster_id'] == 0):
- ds_past, ds_future = self.calculate_prediction(self.model_c0_7to7, ds_past, ds_future, prediction_days)
- elif(game['cluster_id'] == 1):
- ds_past, ds_future = self.calculate_prediction(self.model_c1_7to7, ds_past, ds_future, prediction_days)
- elif(game['cluster_id'] == 2):
- ds_past, ds_future = self.calculate_prediction(self.model_c2_7to7, ds_past, ds_future, prediction_days)
- return ds_past, ds_future, self.get_prediction_accuracy(ds_future)
- elif(prediction_days == 21):
- if(game['cluster_id'] == 0):
- ds_past, ds_future = self.calculate_prediction(self.model_c0_21to21, ds_past, ds_future, prediction_days)
- elif(game['cluster_id'] == 1):
- ds_past, ds_future = self.calculate_prediction(self.model_c1_21to21, ds_past, ds_future, prediction_days)
- elif(game['cluster_id'] == 2):
- ds_past, ds_future = self.calculate_prediction(self.model_c2_21to21, ds_past, ds_future, prediction_days)
- return ds_past, ds_future, self.get_prediction_accuracy(ds_future)
- else:
- print("Prediction Type not supported!")
- return {}, {}, 0
- else:
- print("appID not found in database!")
- return {}, {}, 0
- def create_dataset(self, appID, prediction_days):
- ds_past = {}
- ds_future = {}
- ccu_timeseries = self.ts_ccu.find({"appID" : appID}).next()
- print("appID: ", appID)
- # create time series from 'future_trigger' (04.12.2017)
- # - 2 month in past (04.10.2017)
- # + prediction days in future
- ts_dates = list(ccu_timeseries['cleaned_timeseries'].keys())[-(self.pred_start + self.days_in_past):-(self.pred_start - prediction_days)]
- holidays_list, weekends_list = generate_holidays_weekends(ts_dates)
- trigger = False
- for date, index in zip(ts_dates, range(len(ts_dates))):
- # a bit hacky, because two strings
- if(date == self.future_trigger):
- trigger = True
- if(trigger):
- ds_future[date] = (ccu_timeseries['cleaned_timeseries'][date], holidays_list[index], weekends_list[index])
- else:
- ds_past[date] = (ccu_timeseries['cleaned_timeseries'][date], holidays_list[index], weekends_list[index])
- return ds_past, ds_future
- def calculate_prediction(self, model, ds_past, ds_future, prediction_days):
- ts_past = {}
- ts_future = {}
- #tf.keras.backend.clear_session()
- ds = np.array(list(ds_past.values()))
- scaler = MinMaxScaler(feature_range=(0,1))
- print("ds shape", ds.shape)
- scaled = scaler.fit_transform(ds[-(prediction_days * 2):, :])
- reframed = series_to_supervised(scaled, prediction_days, prediction_days)
- print("scaled shape: ", scaled.shape)
- print("reframed shape: ", reframed.shape)
- clmns = []
- # get coloumns for every variable from prediction (except ccu) for 'prediction_days'
- # train_days * columns - last column (skip 'train' columns)
- for i in range((prediction_days * self.len_features) + 1, reframed.shape[1]):
- # append columns (to delete) which have "var'j'" in them
- for j in range(2, (self.len_features + 1)):
- if('var'+str(j) in list(reframed)[i]):
- clmns.append(i)
- # drop columns we don't want to predict
- reframed.drop(reframed.columns[clmns], axis=1, inplace=True)
- print("reframed head: ", reframed.head())
- print("first X shape: ", reframed.shape)
- X = reframed.values[:, :-prediction_days]
- X = X.reshape((X.shape[0], prediction_days, (int)(X.shape[1] / prediction_days)))
- #K.clear_session()
- #graph = tf.get_default_graph()
- pred = model.predict(X)
- #with graph.as_default():
- #pred = model._make_predict_function(X)
- print("X shape: ", X.shape)
- print("pred shape: ", pred.shape)
- X = X.reshape((X.shape[0]*X.shape[1], X.shape[2]))
- pred = pred.reshape(pred.shape[0]*pred.shape[1], 1)
- pred = concatenate((pred, X[:, 1:]), axis=1)
- pred = scaler.inverse_transform(pred)
- pred = pred[:,0]
- # create timeseries past
- for key in ds_past.keys():
- ts_past[key] = ds_past[key][0]
- # create timeseries future
- for index, key in zip(range(len(ds_future)), ds_future.keys()):
- ts_future[key] = (pred[index], ds_future[key][0])
- return ts_past, ts_future
- def get_prediction_accuracy(self, ts_future):
- '''
- This calculation is based on Mean absolute percentage error (MAPE)
- tells you by how many percentage points your forecasts are off, on average.
- https://www.relexsolutions.com/measuring-forecast-accuracy/#fa-chapter-three
- Forecast Bias ungeeignet, weil nur die summe des forecasts und der ccu betrachtet wird
- -> abweichungen pro tag kΓΆnnen hoch sein
- Mean Percentage Error (MPE) auch ungeignet, weil positive und negative Abweichungen sich gegenseitig aufheben.
- '''
- sum = 0
- for key in ts_future.keys():
- sum += (abs(ts_future[key][0] - ts_future[key][1]) / ts_future[key][1])
- sum = sum / len(ts_future) * 100
- return 100 - sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement