Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def NB_func():
- X_train, X_test, y_train, y_test = train_test_split(df['content'], df['cat_id'], random_state=0)
- count_vect = CountVectorizer()
- X_train_counts = count_vect.fit_transform(X_train)
- tfidf_transformer = TfidfTransformer()
- X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
- clf_NB = MultinomialNB().fit(X_train_tfidf, y_train)
- print(clf_NB)
- # save the model to disk
- filename = '../dataanalysis/models/Naive_Bayes.sav'
- pickle.dump(clf_NB, open(filename, 'wb'))
- print()
- '''Print the prediction of the category from the unknown document'''
- # For now its not accurate due to insufficient sample data
- # print("NAIVE BAYES CLASSIFIER: ", clf_NB.predict(count_vect.transform([""])))
- print()
- print("===============================================")
- print("================= NAIVE BAYES =================")
- print("===============================================")
- most_informative_feature_for_binary_classification(tfidf, clf_NB, n=10)
- ===============================================
- ================= NAIVE BAYES =================
- ===============================================
- 3 -7.788372938139329 abato
- 3 -7.788372938139329 abdome abdome erythema
- 3 -7.788372938139329 abdome erythema redness
- 3 -7.788372938139329 abdomen distended complaint
- 3 -7.788372938139329 abdomen ex drain
- 3 -7.788372938139329 abdomen hbs
- 3 -7.788372938139329 abdomen hbs kidneys
- 3 -7.788372938139329 abdomen insitu
- 3 -7.788372938139329 abdomen insitu pinkish
- 3 -7.788372938139329 abdomen pelvis
- 4 -6.221523090721391 bloods operationprocedure
- 4 -6.4003419545043165 absence peripheral oedema
- 4 -6.5928517017851505 assessmentreassessment indicated indian
- 4 -6.677711285491995 afternnon cardiovascular presence
- 4 -6.698249643881156 bread resting
- 4 -6.781485612022626 assessmentreassessment indicated braden
- 4 -6.784706622916945 bardia ordered
- 4 -6.785386746502815 access lips inspect
- 4 -6.7946948984778395 alzheimers disease gcs
- 4 -6.849375428834153 alarmdate cohort
Add Comment
Please, Sign In to add comment