Advertisement
Guest User

Untitled

a guest
Apr 16th, 2014
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.83 KB | None | 0 0
  1. #pull in package to create word occurence vectors for each line
  2. from sklearn.feature_extraction.text import CountVectorizer
  3. vectorizer = CountVectorizer(min_df=1,charset_error='ignore')
  4. X_train = vectorizer.fit_transform(train_file)
  5. #convert to dense array, the required input type for random forest classifier
  6. X_train = X_train.todense()
  7.  
  8. #pull in random forest classifier and train on data
  9. from sklearn.ensemble import RandomForestClassifier
  10. clf = RandomForestClassifier(n_estimators = 100, compute_importances=True)
  11. clf = clf.fit(X_train, train_targets)
  12.  
  13. #transform the test data into the vector format
  14. testdata = vectorizer.transform(test_file)
  15. testdata = testdata.todense()
  16.  
  17.  
  18. #export
  19. with open('output.csv', 'wb') as csvfile:
  20. spamwriter = csv.writer(csvfile)
  21. for item in clf.predict(testdata):
  22. spamwriter.writerow([item])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement