Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- x = df_aux['201701']#.values.reshape(-1, 1)
- y = df_aux['n_visits']#.values.reshape(-1, 1)
- def fit_poly( degree ):
- p = np.polyfit( x, y, deg = degree )
- df_aux['fit'] = np.polyval( p, x )
- sns.regplot(x, y, fit_reg = False )
- lists = sorted(zip(*[x,df_aux['fit']]))
- new_x, new_y = list(zip(*lists))
- r2 = r2_score(y,df_aux['fit'])
- print("R squared: ", r2)
- plt.xlabel('n_crimes')
- plt.title('N_visits/N_crimes per Borough')
- return plt.plot( new_x, new_y, color='r' )
- def get_rmse( y, y_fit ):
- return np.sqrt( metrics.mean_squared_error( y, y_fit ) )
- train_X, test_X, train_y, test_y = train_test_split( x,
- y,
- test_size = 0.20,
- random_state = 100 )
- rmse_df = pd.DataFrame( columns = ["degree", "rmse_train", "rmse_test"] )
- for i in range( 1, 15 ):
- p = np.polyfit( train_X, train_y, deg = i )
- rmse_df.loc[i-1] = [ i,
- get_rmse( train_y, np.polyval( p, train_X ) ),
- get_rmse( test_y, np.polyval( p, test_X ) ) ]
- plt.plot( rmse_df.degree, rmse_df.rmse_train,label='train',color = 'r' )
- plt.plot( rmse_df.degree,rmse_df.rmse_test,label='test',color = 'g' )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement