Advertisement
Guest User

Untitled

a guest
Feb 7th, 2019
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.87 KB | None | 0 0
  1. #echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
  2. #echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
  3. #echo 'eval "$(pyenv init -)"' >> ~/.bashrc
  4. #source ~/.bashrc!/usr/bin/python3
  5. import numpy as np
  6. import holidays
  7.  
  8. from datetime import date
  9. from pymongo import MongoClient
  10. from numpy import concatenate
  11. from keras.models import model_from_yaml
  12. from pandas import DataFrame, concat
  13.  
  14. from sklearn.preprocessing import MinMaxScaler
  15.  
  16.  
  17. def generate_holidays_weekends(timeseries_keys):
  18.  
  19.     holidays_list = []
  20.     weekends_list = []
  21.     us_holidays = holidays.CountryHoliday('US')
  22.  
  23.     for key in timeseries_keys:
  24.     #for key in doc_TV['timeseries'].keys():
  25.         if key in us_holidays:
  26.             holidays_list.append(1)
  27.         else:
  28.             holidays_list.append(0)
  29.  
  30.         key_split = key.split('-')
  31.         day = date((int)(key_split[2]), (int)(key_split[1]), (int)(key_split[0]))
  32.  
  33.         if(day.weekday() == 5 or day.weekday() == 6):
  34.             weekends_list.append(1)
  35.         else:
  36.             weekends_list.append(0)
  37.  
  38.     return holidays_list, weekends_list
  39.  
  40. def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
  41.     n_vars = 1 if type(data) is list else data.shape[1]
  42.     df = DataFrame(data)
  43.     cols, names = list(), list()
  44.  
  45.     # input sequence (t-n, ... t-1)
  46.     for i in range(n_in, 0, -1):
  47.         cols.append(df.shift(i))
  48.         names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
  49.  
  50.     # forecast sequence (t, t+1, ... t+n)
  51.     for i in range(0, n_out):
  52.         cols.append(df.shift(-i))
  53.         if i == 0:
  54.             names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
  55.         else:
  56.             names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
  57.  
  58.     # put it all together
  59.     agg = concat(cols, axis=1)
  60.     agg.columns = names
  61.  
  62.     # drop rows with NaN values
  63.     if dropnan:
  64.         agg.dropna(inplace=True)
  65.  
  66.     return agg
  67.  
  68.  
  69. # define a class
  70. class CCUPredictor:
  71.  
  72.     def __init__(self):
  73.         self.len_features = 3
  74.         self.pred_start = 365
  75.         self.days_in_past = 60
  76.         self.future_trigger = "05-12-2017"
  77.         self.game_details, self.ts_ccu = self.init_mongo_connection()
  78.         self.load_models()
  79.  
  80.     def init_mongo_connection(self):
  81.         #client = MongoClient('localhost', 27017)
  82.         client = MongoClient('localhost', 27017, username='root', password='sherlockRoot2019')
  83.         db = client.sherlock                # if the database is not present, it will be created
  84.         #db.authenticate(os.environ['MONGO_USER'], os.environ['MONGO_USER_PASSWORD'], source=os.environ['MONGO_AUTH_DATABASE'])
  85.         coll_gt = db.game_details        # if collection not present, it will be created
  86.         coll_ccu = db.steam_player_timeseries
  87.  
  88.         return coll_gt, coll_ccu
  89.  
  90.     def load_models(self):
  91.  
  92.         #One day prediction
  93.  
  94.         # load YAML and create model
  95.         yaml_file = open('app/models/model_c0_1to1.yaml', 'r')
  96.         loaded_model_yaml = yaml_file.read()
  97.         yaml_file.close()
  98.         self.model_c0_1to1 = model_from_yaml(loaded_model_yaml)
  99.  
  100.         # load weights into new model
  101.         self.model_c0_1to1.load_weights("app/models/model_c0_1to1.h5")
  102.         print("Loaded model c01 from disk")
  103.  
  104.         yaml_file = open('app/models/model_c1_1to1.yaml', 'r')
  105.         loaded_model_yaml = yaml_file.read()
  106.         yaml_file.close()
  107.         self.model_c1_1to1 = model_from_yaml(loaded_model_yaml)
  108.  
  109.         self.model_c1_1to1.load_weights("app/models/model_c1_1to1.h5")
  110.         print("Loaded model c11 from disk")
  111.  
  112.         yaml_file = open('app/models/model_c2_1to1.yaml', 'r')
  113.         loaded_model_yaml = yaml_file.read()
  114.         yaml_file.close()
  115.         self.model_c2_1to1 = model_from_yaml(loaded_model_yaml)
  116.  
  117.         self.model_c2_1to1.load_weights("app/models/model_c2_1to1.h5")
  118.         print("Loaded model c21 from disk")
  119.  
  120.         # Three day prediction
  121.  
  122.         # load YAML and create model
  123.         yaml_file = open('app/models/model_c0_7to7.yaml', 'r')
  124.         loaded_model_yaml = yaml_file.read()
  125.         yaml_file.close()
  126.         self.model_c0_7to7 = model_from_yaml(loaded_model_yaml)
  127.  
  128.         # load weights into new model
  129.         self.model_c0_7to7.load_weights("app/models/model_c0_7to7.h5")
  130.         print("Loaded model c07 from disk")
  131.  
  132.         yaml_file = open('app/models/model_c1_7to7.yaml', 'r')
  133.         loaded_model_yaml = yaml_file.read()
  134.         yaml_file.close()
  135.         self.model_c1_7to7 = model_from_yaml(loaded_model_yaml)
  136.  
  137.         self.model_c1_7to7.load_weights("app/models/model_c1_7to7.h5")
  138.         print("Loaded model c17 from disk")
  139.  
  140.         yaml_file = open('app/models/model_c2_7to7.yaml', 'r')
  141.         loaded_model_yaml = yaml_file.read()
  142.         yaml_file.close()
  143.         self.model_c2_7to7 = model_from_yaml(loaded_model_yaml)
  144.  
  145.         self.model_c2_7to7.load_weights("app/models/model_c2_7to7.h5")
  146.         print("Loaded model c27 from disk")
  147.  
  148.         # Three weeks prediction
  149.  
  150.         # load YAML and create model
  151.         yaml_file = open('app/models/model_c0_21to21.yaml', 'r')
  152.         loaded_model_yaml = yaml_file.read()
  153.         yaml_file.close()
  154.         self.model_c0_21to21 = model_from_yaml(loaded_model_yaml)
  155.  
  156.         # load weights into new model
  157.         self.model_c0_21to21.load_weights("app/models/model_c0_21to21.h5")
  158.         print("Loaded model c021 from disk")
  159.  
  160.         yaml_file = open('app/models/model_c1_21to21.yaml', 'r')
  161.         loaded_model_yaml = yaml_file.read()
  162.         yaml_file.close()
  163.         self.model_c1_21to21 = model_from_yaml(loaded_model_yaml)
  164.  
  165.         self.model_c1_21to21.load_weights("app/models/model_c1_21to21.h5")
  166.         print("Loaded model c121 from disk")
  167.  
  168.         yaml_file = open('app/models/model_c2_21to21.yaml', 'r')
  169.         loaded_model_yaml = yaml_file.read()
  170.         yaml_file.close()
  171.         self.model_c2_21to21 = model_from_yaml(loaded_model_yaml)
  172.  
  173.         self.model_c2_21to21.load_weights("app/models/model_c2_21to21.h5")
  174.         print("Loaded model c221 from disk")
  175.  
  176.  
  177.     def get_prediction(self, appID, prediction_days):
  178.  
  179.         # appID ?, cluster=
  180.         cursor = self.game_details.find({"appID" : appID})
  181.  
  182.         if(cursor.count() > 0):
  183.             game = cursor.next()
  184.  
  185.             if(game['cluster_id'] == None):
  186.                 print("This game has no ccu")
  187.                 return {}, {}, 0
  188.  
  189.  
  190.             ds_past, ds_future = self.create_dataset(appID, prediction_days)
  191.  
  192.             if(prediction_days == 1):
  193.                 if(game['cluster_id'] == 0):
  194.                     print("correct blq")
  195.                     #model = self.model_c0_1to1
  196.                     ds_past, ds_future = self.calculate_prediction(self.model_c0_1to1, ds_past, ds_future, prediction_days)
  197.                     #ds_past, ds_future = self.calculate_prediction(amina, ds_past, ds_future, prediction_days)
  198.                 elif(game['cluster_id'] == 1):
  199.                     ds_past, ds_future = self.calculate_prediction(self.model_c1_1to1, ds_past, ds_future, prediction_days)
  200.                 elif(game['cluster_id'] == 2):
  201.                     ds_past, ds_future = self.calculate_prediction(self.model_c2_1to1, ds_past, ds_future, prediction_days)
  202.  
  203.                 return ds_past, ds_future, self.get_prediction_accuracy(ds_future)
  204.  
  205.             elif(prediction_days == 7):
  206.                 if(game['cluster_id'] == 0):
  207.                     ds_past, ds_future = self.calculate_prediction(self.model_c0_7to7, ds_past, ds_future, prediction_days)
  208.                 elif(game['cluster_id'] == 1):
  209.                     ds_past, ds_future = self.calculate_prediction(self.model_c1_7to7, ds_past, ds_future, prediction_days)
  210.                 elif(game['cluster_id'] == 2):
  211.                     ds_past, ds_future = self.calculate_prediction(self.model_c2_7to7, ds_past, ds_future, prediction_days)
  212.  
  213.                 return ds_past, ds_future, self.get_prediction_accuracy(ds_future)
  214.  
  215.             elif(prediction_days == 21):
  216.                 if(game['cluster_id'] == 0):
  217.                     ds_past, ds_future  = self.calculate_prediction(self.model_c0_21to21, ds_past, ds_future, prediction_days)
  218.                 elif(game['cluster_id'] == 1):
  219.                     ds_past, ds_future  = self.calculate_prediction(self.model_c1_21to21, ds_past, ds_future, prediction_days)
  220.                 elif(game['cluster_id'] == 2):
  221.                     ds_past, ds_future  = self.calculate_prediction(self.model_c2_21to21, ds_past, ds_future, prediction_days)
  222.  
  223.                 return ds_past, ds_future, self.get_prediction_accuracy(ds_future)
  224.  
  225.             else:
  226.                 print("Prediction Type not supported!")
  227.                 return {}, {}, 0
  228.  
  229.         else:
  230.             print("appID not found in database!")
  231.             return {}, {}, 0
  232.  
  233.  
  234.     def create_dataset(self, appID, prediction_days):
  235.  
  236.         ds_past = {}
  237.         ds_future = {}
  238.  
  239.         ccu_timeseries = self.ts_ccu.find({"appID" : appID}).next()
  240.         print("appID: ", appID)
  241.         # create time series from 'future_trigger' (04.12.2017)
  242.         # - 2 month in past (04.10.2017)
  243.         # + prediction days in future
  244.         ts_dates = list(ccu_timeseries['cleaned_timeseries'].keys())[-(self.pred_start + self.days_in_past):-(self.pred_start - prediction_days)]
  245.         holidays_list, weekends_list = generate_holidays_weekends(ts_dates)
  246.  
  247.         trigger = False
  248.  
  249.         for date, index in zip(ts_dates, range(len(ts_dates))):
  250.  
  251.             # a bit hacky, because two strings
  252.             if(date == self.future_trigger):
  253.                 trigger = True
  254.  
  255.             if(trigger):
  256.                 ds_future[date] = (ccu_timeseries['cleaned_timeseries'][date], holidays_list[index], weekends_list[index])
  257.             else:
  258.                 ds_past[date] = (ccu_timeseries['cleaned_timeseries'][date], holidays_list[index], weekends_list[index])
  259.  
  260.         return ds_past, ds_future
  261.  
  262.  
  263.     def calculate_prediction(self, model, ds_past, ds_future, prediction_days):
  264.  
  265.         ts_past = {}
  266.         ts_future = {}
  267.  
  268.         #tf.keras.backend.clear_session()
  269.  
  270.         ds = np.array(list(ds_past.values()))
  271.  
  272.         scaler = MinMaxScaler(feature_range=(0,1))
  273.  
  274.         print("ds shape", ds.shape)
  275.         scaled = scaler.fit_transform(ds[-(prediction_days * 2):, :])
  276.         reframed = series_to_supervised(scaled, prediction_days, prediction_days)
  277.  
  278.         print("scaled shape: ", scaled.shape)
  279.         print("reframed shape: ", reframed.shape)
  280.         clmns = []
  281.  
  282.         # get coloumns for every variable from prediction (except ccu) for 'prediction_days'
  283.         # train_days * columns - last column (skip 'train' columns)
  284.         for i in range((prediction_days * self.len_features) + 1, reframed.shape[1]):
  285.             # append columns (to delete) which have "var'j'" in them
  286.             for j in range(2, (self.len_features + 1)):
  287.                 if('var'+str(j) in list(reframed)[i]):
  288.                     clmns.append(i)
  289.  
  290.         # drop columns we don't want to predict
  291.         reframed.drop(reframed.columns[clmns], axis=1, inplace=True)
  292.  
  293.         print("reframed head: ", reframed.head())
  294.         print("first X shape: ", reframed.shape)
  295.  
  296.         X = reframed.values[:, :-prediction_days]
  297.         X = X.reshape((X.shape[0], prediction_days, (int)(X.shape[1] / prediction_days)))
  298.  
  299.         #K.clear_session()
  300.         #graph = tf.get_default_graph()
  301.  
  302.         pred = model.predict(X)
  303.  
  304.         #with graph.as_default():
  305.             #pred = model._make_predict_function(X)
  306.  
  307.         print("X shape: ", X.shape)
  308.         print("pred shape: ", pred.shape)
  309.         X = X.reshape((X.shape[0]*X.shape[1], X.shape[2]))
  310.         pred = pred.reshape(pred.shape[0]*pred.shape[1], 1)
  311.  
  312.         pred = concatenate((pred, X[:, 1:]), axis=1)
  313.         pred = scaler.inverse_transform(pred)
  314.         pred = pred[:,0]
  315.  
  316.         # create timeseries past
  317.         for key in ds_past.keys():
  318.             ts_past[key] = ds_past[key][0]
  319.  
  320.         # create timeseries future
  321.         for index, key in zip(range(len(ds_future)), ds_future.keys()):
  322.             ts_future[key] = (pred[index], ds_future[key][0])
  323.  
  324.         return ts_past, ts_future
  325.  
  326.     def get_prediction_accuracy(self, ts_future):
  327.         '''
  328.        This calculation is based on Mean absolute percentage error (MAPE)
  329.        tells you by how many percentage points your forecasts are off, on average.
  330.  
  331.        https://www.relexsolutions.com/measuring-forecast-accuracy/#fa-chapter-three
  332.  
  333.  
  334.        Forecast Bias ungeeignet, weil nur die summe des forecasts und der ccu betrachtet wird
  335.            -> abweichungen pro tag kΓΆnnen hoch sein
  336.  
  337.        Mean Percentage Error (MPE) auch ungeignet, weil positive und negative Abweichungen sich gegenseitig aufheben.
  338.        '''
  339.         sum = 0
  340.         for key in ts_future.keys():
  341.             sum += (abs(ts_future[key][0] - ts_future[key][1]) / ts_future[key][1])
  342.  
  343.         sum = sum / len(ts_future) * 100
  344.  
  345.         return 100 - sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement