SHARE
TWEET

Untitled

a guest Jun 16th, 2019 58 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. import pandas as pd
  3. import pickle
  4. from collections import Counter
  5. from sklearn import svm, model_selection as cross_validation, neighbors
  6. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  7.  
  8. def process_data_for_lables(ticker):
  9.     hm_days = 7
  10.     df = pd.read_csv('sp500_joined_closes.csv', index_col=0)
  11.     df.dropna(inplace=True)
  12.     tickers = df.columns.values.tolist()
  13.     df.fillna(0)
  14.  
  15.     for i in range (1,hm_days+1):
  16.         df['{}_{}d'.format(ticker, i)] = (df[ticker].shift(-i)-df[ticker])/ df[ticker]
  17.  
  18.     df.fillna(0)
  19.     return tickers, df
  20. #process_data_for_lables('XOM')
  21.  
  22. def buy_sell_hold(*args):
  23.     cols = [c for c in args]
  24.     requirement = 0.02
  25.     for col in cols:
  26.         if col > requirement:
  27.             return 1
  28.         if col < -requirement:
  29.             return-1
  30.     return 0
  31.  
  32. def extract_featuresets(ticker):
  33.     tickers, df = process_data_for_lables(ticker)
  34.     df.dropna(inplace=True)
  35.     df['{}_target'.format(ticker)] = list(map(buy_sell_hold,
  36.                                               df['{}_1d'.format(ticker)],
  37.                                               df['{}_2d'.format(ticker)],
  38.                                               df['{}_3d'.format(ticker)],
  39.                                               df['{}_4d'.format(ticker)],
  40.                                               df['{}_5d'.format(ticker)],
  41.                                               df['{}_6d'.format(ticker)],
  42.                                               df['{}_7d'.format(ticker)]))
  43.  
  44.     vals = df['{}_target'.format(ticker)].values.tolist()
  45.     str_vals = [str(i) for i in vals]
  46.     print('Data spread:', Counter(str_vals))
  47.     df.fillna(0)
  48.  
  49.     df = df.replace([np.inf, -np.inf], np.nan)
  50.     df.dropna(inplace=True)
  51.  
  52.     df_vals = df[[ticker for ticker in tickers]].pct_change()
  53.     df_vals = df_vals.replace([np.inf, -np.inf], 0)
  54.     df_vals.fillna(0)
  55.  
  56.     X = df_vals.values
  57.     y = df['{}_target'.format(ticker)].values
  58.     np.nan_to_num(X)
  59.     return X, y, df
  60. #extract_featuresets('XOM')  
  61.  
  62. def do_ml(ticker):
  63.  
  64.     X, y, df = extract_featuresets(ticker)
  65.     X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
  66.                                                                     y,
  67.                                                                     test_size = 0.25)
  68.     clf = neighbors.KNeighborsClassifier()
  69.     clf.fit(X_train, y_train)
  70.     confidence = clf.score(X_test, y_test)
  71.     predictions = clf.predict(X_test)
  72.     print('Predicted spread:', Counter(predictions))
  73.  
  74.     return confidence
  75.  
  76. do_ml('BAC')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top