Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Fri May 22 12:15:10 2020
- @author: rugve
- """
- # import codecs
- # import csv
- # import urllib
- from datetime import date
- import itertools
- import pandas as pd
- import matplotlib.pyplot as plt
- from sklearn.metrics import mean_squared_error
- from math import sqrt
- #from statsmodels.tsa.api import ExponentialSmoothing, Holt
- import statsmodels.api as sm
- import warnings
- warnings.filterwarnings("ignore")
- def data_for_country(country,data,column):
- data = data[["location","date",column]]
- data = data[data["location"] == country]
- data = data[data[column] != 0]
- data.reset_index(inplace = True)
- data.Timestamp = pd.to_datetime(data.date,format='%Y-%m-%d')
- data.index = data.Timestamp
- data=data.drop('index',1)
- data=data.drop('location',1)
- data=data.drop('date',1)
- data = data.resample('D').mean()
- data.iloc[:,0]= data.iloc[:,0].fillna(method='bfill').fillna(method='ffill')
- return data
- def plot_Data(df):
- ts = df.iloc[:,0]
- ts.plot(figsize=(15,8), title= 'Daily Cases', fontsize=14)
- plt.show()
- # def make_predictions_HL(df):
- # no_of_days = int(input("Please enter the number of days you want to predict for HL:"))
- # ts = df['total_cases']
- # fit1 = Holt(ts).fit(smoothing_level = 0.4,smoothing_slope = 0.8)
- # prediction= fit1.forecast(no_of_days).astype(int)
- # df2 = pd.DataFrame({'prediction': prediction })
- # return df2
- # def make_predictions_HW_add(df):
- # no_of_days = int(input("Please enter the number of days you want to predict for HL:"))
- # ts = df['total_cases']
- # fit1 = ExponentialSmoothing(ts ,trend='add').fit()
- # prediction= fit1.forecast(no_of_days).astype(int)
- # df2 = pd.DataFrame({'prediction': prediction })
- # return df2
- # def make_predictions_HW_mul(df):
- # no_of_days = int(input("Please enter the number of days you want to predict for HL:"))
- # ts = df['total_cases']
- # fit1 = ExponentialSmoothing(ts ,trend='mul').fit()
- # prediction= fit1.forecast(no_of_days).astype(int)
- # df2 = pd.DataFrame({'prediction': prediction })
- # return df2
- def select_prams_for_arima(train,test):
- p = d = q = range(0, 4)
- pdq = list(itertools.product(p, d, q))
- seasonal_pdq = [(0,0,0,0)]
- params = []
- rms_arimas =[]
- for param in pdq:
- params.append(param)
- for param_seasonal in seasonal_pdq:
- try:
- y_hat_avg = test.copy()
- mod = sm.tsa.statespace.SARIMAX(train.iloc[:,0],order=param,
- seasonal_order=param_seasonal,
- enforce_stationarity=False,
- enforce_invertibility=False)
- results = mod.fit()
- y_hat_avg['SARIMA'] = results.predict(start=test.index[0],
- end=test.index[-1], dynamic=True)
- rms_arimas.append(sqrt(mean_squared_error(test.iloc[:,0], y_hat_avg.SARIMA)))
- except:
- continue
- data_tuples = list(zip(params,rms_arimas))
- rms = pd.DataFrame(data_tuples, columns=['Parameters','RMS value'])
- minimum = int(rms[['RMS value']].idxmin())
- parameters = params[minimum]
- return parameters
- #df = data
- def make_predictions_arima(df,parameters,colummn):
- no_of_days = int(input("Please enter the number of days you want to predict :"))
- fit1 = sm.tsa.statespace.SARIMAX(df.iloc[:,0], order=parameters).fit()
- prediction = fit1.forecast(no_of_days).astype(int)
- df2 = pd.DataFrame({'prediction': prediction })
- plt.figure(figsize=(16,8))
- plt.plot(df.iloc[:,0], label='Original Data')
- #plt.plot(df2['prediction'], label='Predicted data')
- #calc the confidence intervals
- fcast = fit1.get_forecast(no_of_days)
- pred_ci = fcast.conf_int()
- print(fcast.predicted_mean)
- print('Confidence intervals:')
- print(pred_ci)
- # Make as pandas series
- upper_series = pd.Series(pred_ci.iloc[:, 1])
- lower_series = pd.Series(pred_ci.iloc[:, 0])
- # lower series shouldnt be less than original series
- # if colummn is 'total_deaths', then lower series shouldnt be less than the max value of original series
- if colummn == 'total_deaths':
- last_value = df.iloc[:,0].iat[-1] #Basically get the last value of df series (lower_series[lower_series < 30057] = 30057)
- lower_series = lower_series.clip(lower=last_value)
- # join the two series
- forecast = pd.concat([lower_series, fcast.predicted_mean, upper_series], axis=1)
- forecast.columns = ['lower_series', 'predicted_mean', 'upper_series']
- # plot the forecast
- plt.plot(df2['prediction'], label='Forecast') # or plt.plot(forecast.predicted_mean, label='Forecast') to get the mean value
- plt.fill_between(forecast.index, forecast.lower_series, forecast.upper_series, color='k', alpha=.15)
- plt.title('Forecast of '+str(colummn)+' for '+str(no_of_days)+' days')
- plt.legend(loc='best')
- plt.show()
- print("ARIMAX model prediction")
- print(df2)
- #pd.DataFrame(df2, columns=['Date','prediction']).to_csv(r'C:\Users\rugve\Desktop\Rucha\3k\Covid19Data\Result1s.csv',index=True)
- def train_test_split(data, colummn):
- today = date.today()
- today = str(today)
- today = today.replace(today[:8], '')
- today = int(today)
- split_index = len(data) - today
- train=data[0:split_index]
- test=data[split_index:]
- parameters = select_prams_for_arima(train,test)
- make_predictions_arima(data,parameters, colummn)
- # def get_data_from_url(url):
- # ftpstream = urllib.request.urlopen(url)
- # csvfile = csv.reader(codecs.iterdecode(ftpstream, 'utf-8'))
- # data = [ ]
- # for line in csvfile:
- # data.append(line)
- # column_names = data.pop(0)
- # full_data = pd.DataFrame(data,columns=column_names)
- # retrun full_data
- def main():
- #url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
- url = r"owid-covid-data.csv"
- full_data = pd.read_csv(url)
- coutries = (list(set(full_data["location"])))
- print("Predictions for COVID19")
- print(coutries)
- #cont = input("Enter a Country for which You want to make predictions from the above list:")
- cont = "Greece"
- columns = ["total_cases","new_cases","total_deaths","new_deaths","hosp_patients", "icu_patients"]
- print(columns)
- colummn = input("Print the one value from above list for which you want to make prediction: ")
- data = data_for_country(cont,full_data,colummn)
- #plot_Data(data)
- #data = data.iloc[:103]
- train_test_split(data, colummn)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement