Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import quandl, math, datetime
- import numpy as np
- from sklearn import preprocessing, model_selection, svm
- from sklearn.linear_model import LinearRegression
- import matplotlib.pyplot as plt
- from matplotlib import style
- style.use('ggplot')
- quandl.ApiConfig.api_key = 'Not gonna show the API key'
- #Get data from quandl
- df = quandl.get('WIKI/GOOGL')
- #Deleting not important data
- df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume',]]
- #Calc HL_PCT
- df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
- #Calc PCT_change
- df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
- #Getting rid of useless data
- df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
- forecast_col = 'Adj. Close'
- #Fill empty data with -99999
- df.fillna(-99999, inplace=True)
- #Calculate the value we want to see in the future
- forecast_out = math.ceil(0.01*len(df))
- #Shifting the table up
- df['label'] = df[forecast_col].shift(-forecast_out)
- #Define X
- X = np.array(df.drop(['label'], 1))
- X = preprocessing.scale(X)
- X = X[:-forecast_out]
- X_lately = X[-forecast_out:]
- df.dropna(inplace=True)
- #Define y
- y = np.array(df['label'])
- y = np.array(df['label'])
- X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
- #Choosing algorythm
- clf = svm.SVR()
- #Fitting the data
- clf.fit(X_train, y_train)
- #Calculating accuracy
- accuracy = clf.score(X_test, y_test)
- print('Accuracy: ', (accuracy*100.00), '%')
- #Predicting values
- forecast_predict = clf.predict(X_lately)
- #Printing predicted values
- print(forecast_predict, forecast_out)
- df['Forecast'] = np.nan
- #Date calculation for visualisation
- last_date = df.iloc[-1].name
- last_unix = last_date.timestamp()
- one_day = 86400
- next_unix = last_unix + one_day
- for i in forecast_predict:
- next_date = datetime.datetime.fromtimestamp(next_unix)
- next_unix += one_day
- df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)] + [i]
- #Displaying the graph
- df['Adj. Close'].plot()
- df['Forecast'].plot()
- plt.legend(loc=4)
- plt.xlabel('Date')
- plt.ylabel('Price')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement