Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Importing the tasty stuff
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.metrics import confusion_matrix
- from sklearn.externals import joblib
- X = finalDf['size_in_mb']
- y = finalDf['pop_categories']
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)
- # For single label values
- npX_train = np.array(X_train)
- npX_train = npX_train.reshape(-1,1)
- npX_test = np.array(X_test)
- npX_test = npX_test.reshape(-1,1)
- # Scaling, of course, for quicker learning time
- scaler = StandardScaler()
- npX_train = scaler.fit_transform(npX_train)
- npX_test = scaler.transform(npX_test)
Add Comment
Please, Sign In to add comment