Advertisement
Sandbird

Untitled

Jun 16th, 2022
1,390
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.80 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri May 22 12:15:10 2020
  4.  
  5. @author: rugve
  6. """
  7. # import codecs
  8. # import csv
  9. # import urllib
  10. from datetime import date
  11. import itertools
  12. import pandas as pd
  13. import matplotlib.pyplot as plt
  14. from sklearn.metrics import mean_squared_error
  15. from math import sqrt
  16. #from statsmodels.tsa.api import ExponentialSmoothing, Holt
  17. import statsmodels.api as sm
  18. import warnings
  19. warnings.filterwarnings("ignore")
  20.  
  21.  
  22. def data_for_country(country,data,column):
  23.     data = data[["location","date",column]]
  24.     data = data[data["location"] == country]
  25.     data = data[data[column] != 0]
  26.     data.reset_index(inplace = True)
  27.     data.Timestamp = pd.to_datetime(data.date,format='%Y-%m-%d')
  28.     data.index = data.Timestamp
  29.     data=data.drop('index',1)
  30.     data=data.drop('location',1)
  31.     data=data.drop('date',1)
  32.     data = data.resample('D').mean()
  33.     data.iloc[:,0]=  data.iloc[:,0].fillna(method='bfill').fillna(method='ffill')
  34.     return data
  35.    
  36. def plot_Data(df):
  37.     ts = df.iloc[:,0]
  38.     ts.plot(figsize=(15,8), title= 'Daily Cases', fontsize=14)
  39.     plt.show()
  40.    
  41.    
  42. # def make_predictions_HL(df):
  43. #     no_of_days = int(input("Please enter the number of days you want to predict for HL:"))
  44. #     ts = df['total_cases']
  45. #     fit1 = Holt(ts).fit(smoothing_level = 0.4,smoothing_slope = 0.8)
  46. #     prediction= fit1.forecast(no_of_days).astype(int)
  47. #     df2 = pd.DataFrame({'prediction': prediction })
  48. #     return df2
  49.    
  50. # def make_predictions_HW_add(df):
  51. #     no_of_days = int(input("Please enter the number of days you want to predict for HL:"))
  52. #     ts = df['total_cases']
  53. #     fit1 = ExponentialSmoothing(ts ,trend='add').fit()
  54. #     prediction= fit1.forecast(no_of_days).astype(int)
  55. #     df2 = pd.DataFrame({'prediction': prediction })
  56. #     return df2
  57.    
  58.    
  59. # def make_predictions_HW_mul(df):
  60. #     no_of_days = int(input("Please enter the number of days you want to predict for HL:"))
  61. #     ts = df['total_cases']
  62. #     fit1 = ExponentialSmoothing(ts ,trend='mul').fit()
  63. #     prediction= fit1.forecast(no_of_days).astype(int)
  64. #     df2 = pd.DataFrame({'prediction': prediction })
  65. #     return df2
  66.    
  67. def select_prams_for_arima(train,test):
  68.     p = d = q = range(0, 4)
  69.     pdq = list(itertools.product(p, d, q))
  70.     seasonal_pdq = [(0,0,0,0)]
  71.     params = []
  72.     rms_arimas =[]
  73.     for param in pdq:
  74.         params.append(param)  
  75.         for param_seasonal in seasonal_pdq:
  76.             try:
  77.                 y_hat_avg = test.copy()
  78.                 mod = sm.tsa.statespace.SARIMAX(train.iloc[:,0],order=param,
  79.                                                 seasonal_order=param_seasonal,
  80.                                                 enforce_stationarity=False,
  81.                                                 enforce_invertibility=False)
  82.                 results = mod.fit()
  83.                 y_hat_avg['SARIMA'] = results.predict(start=test.index[0],
  84.                                                       end=test.index[-1], dynamic=True)
  85.                 rms_arimas.append(sqrt(mean_squared_error(test.iloc[:,0], y_hat_avg.SARIMA)))
  86.             except:
  87.                 continue  
  88.     data_tuples = list(zip(params,rms_arimas))
  89.     rms = pd.DataFrame(data_tuples, columns=['Parameters','RMS value'])
  90.     minimum = int(rms[['RMS value']].idxmin())
  91.     parameters = params[minimum]
  92.     return parameters
  93.  
  94. #df = data
  95. def make_predictions_arima(df,parameters,colummn):    
  96.     no_of_days = int(input("Please enter the number of days you want to predict :"))
  97.     fit1 = sm.tsa.statespace.SARIMAX(df.iloc[:,0], order=parameters).fit()
  98.     prediction = fit1.forecast(no_of_days).astype(int)
  99.     df2 = pd.DataFrame({'prediction': prediction })
  100.  
  101.     plt.figure(figsize=(16,8))
  102.     plt.plot(df.iloc[:,0], label='Original Data')
  103.     #plt.plot(df2['prediction'], label='Predicted data')
  104.  
  105.     #calc the confidence intervals
  106.     fcast = fit1.get_forecast(no_of_days)
  107.     pred_ci = fcast.conf_int()        
  108.     print(fcast.predicted_mean)
  109.     print('Confidence intervals:')
  110.     print(pred_ci)
  111.  
  112.     # Make as pandas series
  113.     upper_series = pd.Series(pred_ci.iloc[:, 1])
  114.     lower_series = pd.Series(pred_ci.iloc[:, 0])
  115.     # lower series shouldnt be less than original series
  116.     # if colummn is 'total_deaths', then lower series shouldnt be less than the max value of original series
  117.     if colummn == 'total_deaths':
  118.         last_value = df.iloc[:,0].iat[-1] #Basically get the last value of df series (lower_series[lower_series < 30057] = 30057)
  119.         lower_series = lower_series.clip(lower=last_value)
  120.  
  121.     # join the two series
  122.     forecast = pd.concat([lower_series, fcast.predicted_mean, upper_series], axis=1)
  123.     forecast.columns = ['lower_series', 'predicted_mean', 'upper_series']
  124.     # plot the forecast
  125.     plt.plot(df2['prediction'], label='Forecast')  # or plt.plot(forecast.predicted_mean, label='Forecast')   to get the mean value
  126.     plt.fill_between(forecast.index, forecast.lower_series, forecast.upper_series, color='k', alpha=.15)
  127.     plt.title('Forecast of '+str(colummn)+' for '+str(no_of_days)+' days')
  128.     plt.legend(loc='best')
  129.     plt.show()
  130.     print("ARIMAX model prediction")
  131.     print(df2)
  132.    
  133.     #pd.DataFrame(df2, columns=['Date','prediction']).to_csv(r'C:\Users\rugve\Desktop\Rucha\3k\Covid19Data\Result1s.csv',index=True)
  134.    
  135. def train_test_split(data, colummn):
  136.     today = date.today()
  137.     today = str(today)
  138.     today = today.replace(today[:8], '')
  139.     today = int(today)
  140.     split_index = len(data) - today
  141.     train=data[0:split_index]
  142.     test=data[split_index:]
  143.     parameters = select_prams_for_arima(train,test)
  144.     make_predictions_arima(data,parameters, colummn)
  145.    
  146.    
  147. # def get_data_from_url(url):
  148. #     ftpstream = urllib.request.urlopen(url)
  149. #     csvfile = csv.reader(codecs.iterdecode(ftpstream, 'utf-8'))
  150. #     data = [ ]
  151. #     for line in csvfile:
  152. #         data.append(line)
  153. #     column_names = data.pop(0)
  154. #     full_data = pd.DataFrame(data,columns=column_names)
  155. #     retrun full_data
  156.    
  157. def main():
  158.     #url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
  159.     url = r"owid-covid-data.csv"
  160.     full_data = pd.read_csv(url)
  161.     coutries = (list(set(full_data["location"])))
  162.     print("Predictions for COVID19")
  163.     print(coutries)
  164.     #cont = input("Enter a Country for which You want to make predictions from the above list:")
  165.     cont = "Greece"
  166.     columns  = ["total_cases","new_cases","total_deaths","new_deaths","hosp_patients", "icu_patients"]
  167.     print(columns)
  168.     colummn = input("Print the one value from above list for which  you want to  make prediction: ")
  169.     data = data_for_country(cont,full_data,colummn)
  170.     #plot_Data(data)
  171.     #data = data.iloc[:103]
  172.     train_test_split(data, colummn)
  173.    
  174.    
  175. if __name__ == '__main__':
  176.     main()
  177.  
  178.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement