Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import math
- import pandas as pd
- from sklearn.cross_validation import train_test_split
- from sklearn.ensemble import GradientBoostingClassifier
- from sklearn.grid_search import GridSearchCV
- from sklearn.ensemble import GradientBoostingClassifier
- def score_model(model,datax,datay):
- class_index={}
- for i,x in enumerate(model.classes_):
- class_index[x]=i
- score_matrix=model.predict_proba(datax)
- return sum(math.log(max(min(scores[class_index[datay[i]]],1-1e-15),1e-15)) for i,scores in enumerate(score_matrix))/len(datay)
- if __name__ == '__main__':
- data=pd.read_csv(r'D:\Kaggle\ottogroup\train.csv')
- toscore=pd.read_csv(r'D:\Kaggle\ottogroup\test.csv')
- xcolumns=[x for x in data.columns if x.startswith('feat_')]
- trainx,trainy=data[xcolumns],data.target
- paramlist=[{'learning_rate':[.15],'max_depth':[7,10],'n_estimators':[100],'subsample':[0.5,0.85,1]}]
- grid_gbm=GridSearchCV(GradientBoostingClassifier(),paramlist,scoring=score_model,verbose=1,n_jobs=2)
- grid_gbm.fit(trainx,trainy)
- print(grid_gbm.best_params_)
- print(grid_gbm.best_score_)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement