Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pull in package to create word occurence vectors for each line
- from sklearn.feature_extraction.text import CountVectorizer
- vectorizer = CountVectorizer(min_df=1,charset_error='ignore')
- X_train = vectorizer.fit_transform(train_file)
- #convert to dense array, the required input type for random forest classifier
- X_train = X_train.todense()
- #pull in random forest classifier and train on data
- from sklearn.ensemble import RandomForestClassifier
- clf = RandomForestClassifier(n_estimators = 100, compute_importances=True)
- clf = clf.fit(X_train, train_targets)
- #transform the test data into the vector format
- testdata = vectorizer.transform(test_file)
- testdata = testdata.todense()
- #export
- with open('output.csv', 'wb') as csvfile:
- spamwriter = csv.writer(csvfile)
- for item in clf.predict(testdata):
- spamwriter.writerow([item])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement