Guest User

Untitled

a guest
Oct 17th, 2018
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.00 KB | None | 0 0
  1. def NB_func():
  2. X_train, X_test, y_train, y_test = train_test_split(df['content'], df['cat_id'], random_state=0)
  3. count_vect = CountVectorizer()
  4. X_train_counts = count_vect.fit_transform(X_train)
  5. tfidf_transformer = TfidfTransformer()
  6. X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
  7. clf_NB = MultinomialNB().fit(X_train_tfidf, y_train)
  8. print(clf_NB)
  9.  
  10. # save the model to disk
  11. filename = '../dataanalysis/models/Naive_Bayes.sav'
  12. pickle.dump(clf_NB, open(filename, 'wb'))
  13.  
  14. print()
  15. '''Print the prediction of the category from the unknown document'''
  16. # For now its not accurate due to insufficient sample data
  17. # print("NAIVE BAYES CLASSIFIER: ", clf_NB.predict(count_vect.transform([""])))
  18. print()
  19.  
  20. print("===============================================")
  21. print("================= NAIVE BAYES =================")
  22. print("===============================================")
  23. most_informative_feature_for_binary_classification(tfidf, clf_NB, n=10)
  24.  
  25. ===============================================
  26. ================= NAIVE BAYES =================
  27. ===============================================
  28. 3 -7.788372938139329 abato
  29. 3 -7.788372938139329 abdome abdome erythema
  30. 3 -7.788372938139329 abdome erythema redness
  31. 3 -7.788372938139329 abdomen distended complaint
  32. 3 -7.788372938139329 abdomen ex drain
  33. 3 -7.788372938139329 abdomen hbs
  34. 3 -7.788372938139329 abdomen hbs kidneys
  35. 3 -7.788372938139329 abdomen insitu
  36. 3 -7.788372938139329 abdomen insitu pinkish
  37. 3 -7.788372938139329 abdomen pelvis
  38.  
  39. 4 -6.221523090721391 bloods operationprocedure
  40. 4 -6.4003419545043165 absence peripheral oedema
  41. 4 -6.5928517017851505 assessmentreassessment indicated indian
  42. 4 -6.677711285491995 afternnon cardiovascular presence
  43. 4 -6.698249643881156 bread resting
  44. 4 -6.781485612022626 assessmentreassessment indicated braden
  45. 4 -6.784706622916945 bardia ordered
  46. 4 -6.785386746502815 access lips inspect
  47. 4 -6.7946948984778395 alzheimers disease gcs
  48. 4 -6.849375428834153 alarmdate cohort
Add Comment
Please, Sign In to add comment