Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import pickle
- from collections import Counter
- from sklearn import svm, model_selection as cross_validation, neighbors
- from sklearn.ensemble import RandomForestClassifier, VotingClassifier
- def process_data_for_lables(ticker):
- hm_days = 7
- df = pd.read_csv('sp500_joined_closes.csv', index_col=0)
- df.dropna(inplace=True)
- tickers = df.columns.values.tolist()
- df.fillna(0)
- for i in range (1,hm_days+1):
- df['{}_{}d'.format(ticker, i)] = (df[ticker].shift(-i)-df[ticker])/ df[ticker]
- df.fillna(0)
- return tickers, df
- #process_data_for_lables('XOM')
- def buy_sell_hold(*args):
- cols = [c for c in args]
- requirement = 0.02
- for col in cols:
- if col > requirement:
- return 1
- if col < -requirement:
- return-1
- return 0
- def extract_featuresets(ticker):
- tickers, df = process_data_for_lables(ticker)
- df.dropna(inplace=True)
- df['{}_target'.format(ticker)] = list(map(buy_sell_hold,
- df['{}_1d'.format(ticker)],
- df['{}_2d'.format(ticker)],
- df['{}_3d'.format(ticker)],
- df['{}_4d'.format(ticker)],
- df['{}_5d'.format(ticker)],
- df['{}_6d'.format(ticker)],
- df['{}_7d'.format(ticker)]))
- vals = df['{}_target'.format(ticker)].values.tolist()
- str_vals = [str(i) for i in vals]
- print('Data spread:', Counter(str_vals))
- df.fillna(0)
- df = df.replace([np.inf, -np.inf], np.nan)
- df.dropna(inplace=True)
- df_vals = df[[ticker for ticker in tickers]].pct_change()
- df_vals = df_vals.replace([np.inf, -np.inf], 0)
- df_vals.fillna(0)
- X = df_vals.values
- y = df['{}_target'.format(ticker)].values
- np.nan_to_num(X)
- return X, y, df
- #extract_featuresets('XOM')
- def do_ml(ticker):
- X, y, df = extract_featuresets(ticker)
- X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
- y,
- test_size = 0.25)
- clf = neighbors.KNeighborsClassifier()
- clf.fit(X_train, y_train)
- confidence = clf.score(X_test, y_test)
- predictions = clf.predict(X_test)
- print('Predicted spread:', Counter(predictions))
- return confidence
- do_ml('BAC')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement