Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.44 KB | None | 0 0
  1. def rfstockpicker_backtest(database,prediction_date,rolling_window,
  2. nb_months,n_estimators,n_jobs):
  3. database = database
  4. results={}
  5. for i in range(0,nb_months):
  6. d = datetime.datetime.strptime(prediction_date, "%Y-%m-%d")
  7.  
  8. d2 = d - dateutil.relativedelta.relativedelta(months=i)
  9. d3 = d - dateutil.relativedelta.relativedelta(months=i+1)
  10. d4 = d3 - dateutil.relativedelta.relativedelta(years=rolling_window) ##Creating a rolling window interval
  11.  
  12. test_set = database[database['decision_date'].isin(pd.date_range(d2, d2))]
  13. training_set=database[database['decision_date'].isin(pd.date_range(d4, d3))]
  14.  
  15. # Labels are the values we want to predict
  16. train_labels = np.array(training_set['win'])
  17. # Remove the labels from the features
  18. columns_to_drop=['win','sprtrn','gvkey','datadate','conm','trt1m','decision_date','tic']
  19. train_features= training_set.drop(columns_to_drop, axis = 1)
  20. # Saving feature names for later use
  21. train_feature_list = list(train_features.columns)
  22. # Convert to numpy array
  23. train_features = np.array(train_features)
  24.  
  25. test_labels = np.array(test_set['win'])
  26. # Remove the labels from the test features
  27. test_features= test_set.drop(columns_to_drop, axis = 1)
  28. # Convert to numpy array
  29. test_features = np.array(test_features)
  30.  
  31. # Import the model we are using
  32. from sklearn.ensemble import RandomForestRegressor
  33. # Instantiate model
  34. rf = RandomForestRegressor(n_estimators = n_estimators, random_state = 42,n_jobs=n_jobs)
  35. # Train the model on training data
  36. rf.fit(train_features, train_labels);
  37.  
  38. # Use the forest's predict method on the test data
  39. stock_predictions = rf.predict(test_features)
  40. result_set=test_set
  41. result_set['predictions']=stock_predictions
  42. results[i]=result_set
  43.  
  44. # Get numerical feature importances
  45. importances = list(rf.feature_importances_)
  46. # List of tuples with variable and importance
  47. feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(train_feature_list, importances)]
  48. # Sort the feature importances by most important first
  49. feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
  50.  
  51. return results,feature_importances,importances,train_feature_list
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement