Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def rfstockpicker_backtest(database,prediction_date,rolling_window,
- nb_months,n_estimators,n_jobs):
- database = database
- results={}
- for i in range(0,nb_months):
- d = datetime.datetime.strptime(prediction_date, "%Y-%m-%d")
- d2 = d - dateutil.relativedelta.relativedelta(months=i)
- d3 = d - dateutil.relativedelta.relativedelta(months=i+1)
- d4 = d3 - dateutil.relativedelta.relativedelta(years=rolling_window) ##Creating a rolling window interval
- test_set = database[database['decision_date'].isin(pd.date_range(d2, d2))]
- training_set=database[database['decision_date'].isin(pd.date_range(d4, d3))]
- # Labels are the values we want to predict
- train_labels = np.array(training_set['win'])
- # Remove the labels from the features
- columns_to_drop=['win','sprtrn','gvkey','datadate','conm','trt1m','decision_date','tic']
- train_features= training_set.drop(columns_to_drop, axis = 1)
- # Saving feature names for later use
- train_feature_list = list(train_features.columns)
- # Convert to numpy array
- train_features = np.array(train_features)
- test_labels = np.array(test_set['win'])
- # Remove the labels from the test features
- test_features= test_set.drop(columns_to_drop, axis = 1)
- # Convert to numpy array
- test_features = np.array(test_features)
- # Import the model we are using
- from sklearn.ensemble import RandomForestRegressor
- # Instantiate model
- rf = RandomForestRegressor(n_estimators = n_estimators, random_state = 42,n_jobs=n_jobs)
- # Train the model on training data
- rf.fit(train_features, train_labels);
- # Use the forest's predict method on the test data
- stock_predictions = rf.predict(test_features)
- result_set=test_set
- result_set['predictions']=stock_predictions
- results[i]=result_set
- # Get numerical feature importances
- importances = list(rf.feature_importances_)
- # List of tuples with variable and importance
- feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(train_feature_list, importances)]
- # Sort the feature importances by most important first
- feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
- return results,feature_importances,importances,train_feature_list
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement