Advertisement
Guest User

Untitled

a guest
Jun 16th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import pickle
  4. from collections import Counter
  5. from sklearn import svm, model_selection as cross_validation, neighbors
  6. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  7.  
  8. def process_data_for_lables(ticker):
  9. hm_days = 7
  10. df = pd.read_csv('sp500_joined_closes.csv', index_col=0)
  11. df.dropna(inplace=True)
  12. tickers = df.columns.values.tolist()
  13. df.fillna(0)
  14.  
  15. for i in range (1,hm_days+1):
  16. df['{}_{}d'.format(ticker, i)] = (df[ticker].shift(-i)-df[ticker])/ df[ticker]
  17.  
  18. df.fillna(0)
  19. return tickers, df
  20. #process_data_for_lables('XOM')
  21.  
  22. def buy_sell_hold(*args):
  23. cols = [c for c in args]
  24. requirement = 0.02
  25. for col in cols:
  26. if col > requirement:
  27. return 1
  28. if col < -requirement:
  29. return-1
  30. return 0
  31.  
  32. def extract_featuresets(ticker):
  33. tickers, df = process_data_for_lables(ticker)
  34. df.dropna(inplace=True)
  35. df['{}_target'.format(ticker)] = list(map(buy_sell_hold,
  36. df['{}_1d'.format(ticker)],
  37. df['{}_2d'.format(ticker)],
  38. df['{}_3d'.format(ticker)],
  39. df['{}_4d'.format(ticker)],
  40. df['{}_5d'.format(ticker)],
  41. df['{}_6d'.format(ticker)],
  42. df['{}_7d'.format(ticker)]))
  43.  
  44. vals = df['{}_target'.format(ticker)].values.tolist()
  45. str_vals = [str(i) for i in vals]
  46. print('Data spread:', Counter(str_vals))
  47. df.fillna(0)
  48.  
  49. df = df.replace([np.inf, -np.inf], np.nan)
  50. df.dropna(inplace=True)
  51.  
  52. df_vals = df[[ticker for ticker in tickers]].pct_change()
  53. df_vals = df_vals.replace([np.inf, -np.inf], 0)
  54. df_vals.fillna(0)
  55.  
  56. X = df_vals.values
  57. y = df['{}_target'.format(ticker)].values
  58. np.nan_to_num(X)
  59. return X, y, df
  60. #extract_featuresets('XOM')
  61.  
  62. def do_ml(ticker):
  63.  
  64. X, y, df = extract_featuresets(ticker)
  65. X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
  66. y,
  67. test_size = 0.25)
  68. clf = neighbors.KNeighborsClassifier()
  69. clf.fit(X_train, y_train)
  70. confidence = clf.score(X_test, y_test)
  71. predictions = clf.predict(X_test)
  72. print('Predicted spread:', Counter(predictions))
  73.  
  74. return confidence
  75.  
  76. do_ml('BAC')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement