Advertisement
Guest User

Untitled

a guest
Mar 31st, 2015
358
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.07 KB | None | 0 0
  1. import numpy as np
  2. import math
  3. import pandas as pd
  4.  
  5. from sklearn.cross_validation import train_test_split
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from sklearn.grid_search import GridSearchCV
  8. from sklearn.ensemble import GradientBoostingClassifier
  9.  
  10. def score_model(model,datax,datay):
  11.     class_index={}
  12.     for i,x in enumerate(model.classes_):
  13.         class_index[x]=i
  14.     score_matrix=model.predict_proba(datax)
  15.     return sum(math.log(max(min(scores[class_index[datay[i]]],1-1e-15),1e-15)) for i,scores in enumerate(score_matrix))/len(datay)
  16.  
  17. if __name__ == '__main__':
  18.     data=pd.read_csv(r'D:\Kaggle\ottogroup\train.csv')
  19.     toscore=pd.read_csv(r'D:\Kaggle\ottogroup\test.csv')
  20.     xcolumns=[x for x in data.columns if x.startswith('feat_')]
  21.     trainx,trainy=data[xcolumns],data.target
  22.     paramlist=[{'learning_rate':[.15],'max_depth':[7,10],'n_estimators':[100],'subsample':[0.5,0.85,1]}]
  23.     grid_gbm=GridSearchCV(GradientBoostingClassifier(),paramlist,scoring=score_model,verbose=1,n_jobs=2)
  24.     grid_gbm.fit(trainx,trainy)
  25.     print(grid_gbm.best_params_)
  26.     print(grid_gbm.best_score_)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement