Advertisement
Guest User

Untitled

a guest
Apr 27th, 2017
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. # import the datareader
  2. import pandas_datareader.data as web
  3. # import the date conversion function
  4. import datetime
  5.  
  6. # initialize the start date
  7. start = datetime.datetime(2014, 1, 1)
  8. # initialize the end date
  9. end = datetime.datetime(2016, 12, 30)
  10. # get the Alibaba stock data for a given period from Yahoo Finance
  11. data = web.DataReader("AMZN", 'yahoo',start,end)
  12. # show the first 5 observations
  13. data.head()
  14.  
  15. # save the data for future usage
  16. data.to_csv("AMZN_data.csv")
  17.  
  18. # import the plotting library
  19. import matplotlib.pyplot as plt
  20. %matplotlib inline
  21.  
  22. # plot the opening and closing stock prices for 2015 only
  23. plt.plot(data['Open']['2015'])
  24. plt.plot(data['Close']['2015'])
  25.  
  26. # importing libraries
  27. import pandas as pd
  28. import numpy as np
  29. import matplotlib.pyplot as plt
  30. %matplotlib inline
  31.  
  32. from statsmodels.tsa.arima_model import ARIMA # ARIMA model
  33. from statsmodels.tsa.stattools import acf, pacf #ACF and PACF
  34.  
  35. data.head()
  36.  
  37. # apply logarithmic transformation to the dataset
  38. log_data = np.log(data)
  39. # calculate the one-period difference
  40. log_data_lagged = log_data - log_data.shift()
  41. # drop the missing values
  42. log_data_lagged.dropna(inplace=True)
  43.  
  44. plt.plot(log_data_lagged['Open']['2015'])
  45. plt.plot(log_data_lagged['Close']['2015'])
  46.  
  47. # Plot ACF for Stock: Open with confidence treshholds
  48. acf = acf(log_data_lagged['Open']['2015'])
  49. plt.plot(acf)
  50. plt.axhline(y=0,linestyle='--',color='gray')
  51. plt.axhline(y=-1.96/np.sqrt(len(log_data_lagged['Open']['2015'])),linestyle='--',color='gray')
  52. plt.axhline(y=1.96/np.sqrt(len(log_data_lagged['Open']['2015'])),linestyle='--',color='gray')
  53. plt.title('Autocorrelation Function')
  54. # take q = 1
  55.  
  56. # Plot PACF for Stock: Open with confidence bounds
  57. log_pacf = pacf(log_data_lagged['Open']['2015'])
  58. plt.plot(log_pacf)
  59. plt.axhline(y=0,linestyle='--',color='gray')
  60. plt.axhline(y=-1.96/np.sqrt(len(log_data_lagged['Open']['2015'])),linestyle='--',color='gray')
  61. plt.axhline(y=1.96/np.sqrt(len(log_data_lagged['Open']['2015'])),linestyle='--',color='gray')
  62. plt.title('Partial Autocorrelation Function')
  63. # take p = 1
  64.  
  65. # ARIMA on Stock: Open
  66. model = ARIMA(log_data['Open']['2015'], order=(1, 1, 1))
  67. results_ARIMA = model.fit(disp=-1)
  68. plt.plot(log_data_lagged['Open']['2015'])
  69. plt.plot(results_ARIMA.fittedvalues, color='red')
  70.  
  71.  
  72. from statsmodels.tsa.arima_model import ARIMA # ARIMA model
  73. from statsmodels.tsa.stattools import acf, pacf #ACF and PACF
  74.  
  75. # Plot ACF for Stock: Close with confidence treshholds
  76. acf2 = acf(log_data_lagged['Close']['2015'])
  77. plt.plot(acf2)
  78. plt.axhline(y=0,linestyle='--',color='gray')
  79. plt.axhline(y=-1.96/np.sqrt(len(log_data_lagged['Close']['2015'])),linestyle='--',color='gray')
  80. plt.axhline(y=1.96/np.sqrt(len(log_data_lagged['Close']['2015'])),linestyle='--',color='gray')
  81. plt.title('Autocorrelation Function')
  82. # take q = 2 in case of Stock: Close
  83.  
  84. # Plot PACF for Stock: Close with confidence bounds
  85. log_pacf2 = pacf(log_data_lagged['Close']['2015'])
  86. plt.plot(log_pacf2)
  87. plt.axhline(y=0,linestyle='--',color='gray')
  88. plt.axhline(y=-1.96/np.sqrt(len(log_data_lagged['Close']['2015'])),linestyle='--',color='gray')
  89. plt.axhline(y=1.96/np.sqrt(len(log_data_lagged['Close']['2015'])),linestyle='--',color='gray')
  90. plt.title('Partial Autocorrelation Function')
  91. # take p = 2 in case of Stock: Close
  92.  
  93. # ARIMA on Stock: Close
  94. model2 = ARIMA(log_data['Close']['2015'], order=(2, 1, 2))
  95. results_ARIMA2 = model.fit(disp=-1)
  96. plt.plot(log_data_lagged['Close']['2015'])
  97. plt.plot(results_ARIMA2.fittedvalues, color='red')
  98.  
  99. plt.plot(log_data_lagged['Open']['2015'])
  100. plt.plot(results_ARIMA.fittedvalues, color='red')
  101. plt.plot(log_data_lagged['Close']['2015'])
  102. plt.plot(results_ARIMA2.fittedvalues, color='red')
  103.  
  104. # save the fittedvalues to a new variables
  105. predictions_log_lag = results_ARIMA.fittedvalues # stock: open
  106. predictions_log_lag2 = results_ARIMA2.fittedvalues # stock: close
  107. # To undo the differencing (shifting) we have to calculate the cumulative sum
  108. predictions_almost_log = predictions_log_lag.cumsum()
  109. predictions_almost_log2 = predictions_log_lag2.cumsum()
  110.  
  111. # check the type, to be sure it is a Series
  112. type(predictions_almost_log)# series
  113. type(predictions_almost_log2)# series
  114.  
  115. # Convert this series to DataFrame
  116. predictions_almost_log = pd.Series.to_frame(predictions_almost_log)
  117. predictions_almost_log2 = pd.Series.to_frame(predictions_almost_log2)
  118. # check the type to make sure conversion was correct
  119. type(predictions_almost_log)# frame now
  120. type(predictions_almost_log2)# frame now
  121.  
  122. predictions_almost_log.columns=["OPEN"]
  123. predictions_almost_log.head()
  124.  
  125. predictions_almost_log2.columns=["CLOSE"]
  126. predictions_almost_log2.head()
  127.  
  128. # add that value to all rows
  129. predictions_almost_log = predictions_almost_log + log_data['Open']['2015'].ix[0]
  130. # first 5 observations
  131. predictions_almost_log.head()
  132.  
  133. # add that value to all rows
  134. predictions_almost_log2 = predictions_almost_log2 + log_data['Close']['2015'].ix[0]
  135. # first 5 observations
  136. predictions_almost_log2.head()
  137.  
  138. # Concatenate the very first observation and the above dataframe
  139. predictions_log = pd.concat([log_data['Open']['2015'][0:1],predictions_almost_log])
  140. predictions_log.head()
  141.  
  142. # Concatenate the very first observation and the above dataframe
  143. predictions_log2 = pd.concat([log_data['Close']['2015'][0:1],predictions_almost_log2])
  144. predictions_log2.head()
  145.  
  146. # transform back
  147. predictions = np.exp(predictions_log)
  148. predictions.head()
  149.  
  150. # transform back
  151. predictions2 = np.exp(predictions_log2)
  152. predictions2.head()
  153.  
  154. # plot real data
  155. plt.plot(data['Open']['2015'])
  156. # plot predictions in red circles
  157. plt.plot(predictions,'ro')
  158.  
  159. # plot real data
  160. plt.plot(data['Close']['2015'])
  161. # plot predictions in green circles
  162. plt.plot(predictions2,'go')
  163.  
  164. plt.plot(data['Open']['2015'])
  165. plt.plot(predictions,'ro')
  166. plt.plot(data['Close']['2015'])
  167. plt.plot(predictions2,'go')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement