Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # make prediction
- import sys
- import numpy as np
- # add path of xgboost python module
- sys.path.append('../../python/')
- import xgboost as xgb
- # path to where the data lies
- dpath = 'data'
- modelfile = 'higgs.model'
- outfile = 'higgs.pred.csv'
- # make top 15% as positive
- threshold_ratio = 0.148
- # load in training data, directly use numpy
- dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )
- data = dtest[:,1:31]
- idx = dtest[:,0]
- print ('finish loading from csv ')
- xgmat = xgb.DMatrix( data, missing = -999.0 )
- bst = xgb.Booster({'nthread':32})
- bst.load_model( modelfile )
- ypred = bst.predict( xgmat )
- res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
- rorder = {}
- for k, v in sorted( res, key = lambda x:-x[1] ):
- rorder[ k ] = len(rorder) + 1
- # write out predictions
- ntop = int( threshold_ratio * len(rorder ) )
- fo = open(outfile, 'w')
- nhit = 0
- ntot = 0
- fo.write('EventId,RankOrder,Class\n')
- for k, v in res:
- if rorder[k] <= ntop:
- lb = 's'
- nhit += 1
- else:
- lb = 'b'
- # change output rank order to follow Kaggle convention
- fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
- ntot += 1
- fo.close()
- print ('finished writing into prediction file')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement