Untitled

# Importing the tasty stuff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib

X = finalDf['size_in_mb']
y = finalDf['pop_categories']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)

# For single label values
npX_train = np.array(X_train)
npX_train = npX_train.reshape(-1,1)

npX_test = np.array(X_test)
npX_test = npX_test.reshape(-1,1)

# Scaling, of course, for quicker learning time
scaler = StandardScaler()

npX_train = scaler.fit_transform(npX_train)
npX_test = scaler.transform(npX_test)