Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def model_training_xgb(training_data,testing_data):
- # splits train and validation set
- X = training_data.drop(labels=['msno','is_churn'],axis=1)
- Y = training_data['is_churn']
- X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2,random_state = 2)
- # model
- xgb_watchlist = [(X_train, Y_train), (X_val, Y_val)]
- model = xgb.XGBClassifier(learning_rate=0.08, max_depth=4,n_estimators=300,\
- subsample=0.5, seed=2,missing=-1)
- model.fit(X_train, Y_train,eval_set=xgb_watchlist,eval_metric='logloss',
- early_stopping_rounds=20,verbose=70)
- # caculating E_val
- model_probs = model.predict_proba(X_val)
- # [:,1] to show the prob to is_churn = 1
- model_val_score = log_loss(Y_val,model_probs[:,1])
- # predict on testing set
- model_pred_testing_set = model.predict_proba(testing_data.drop(labels=['msno','is_churn'],axis=1))
- model_pred_testing_set = model_pred_testing_set[:,1] # take out the prob if is_churn = 1
- submission = pd.DataFrame({"msno": testing_data.msno})
- submission.insert(1,column='is_churn',value=model_pred_testing_set)
- return model, model_val_score, submission
Add Comment
Please, Sign In to add comment