Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.feature_selection import VarianceThreshold
- from sklearn.feature_selection import f_classif
- from sklearn.feature_selection import SelectKBest
- #K best features
- KBESTNUM = 17000
- print "ORIGINAL #FEATURES", trainingX.shape[1]
- print "removing features with zero variance"
- sel = VarianceThreshold()#threshold = (.000005))
- trainingX = sel.fit_transform(trainingX)
- validationSet = sel.transform(validationSet)
- print "AFTER VARIANCE REMOVAL", trainingX.shape[1]
- print "feature selecting using " +str(KBESTNUM)
- kBest = SelectKBest(f_classif,k=KBESTNUM)
- trainingX = kBest.fit_transform(trainingX,trainingY)
- validationSet = kBest.transform(validationSet)
- print "done selecting features"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement