fake_world

ml6

Dec 3rd, 2020
498
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.feature_extraction.text import CountVectorizer
  4. from sklearn.naive_bayes import MultinomialNB
  5. from sklearn import metrics
  6.  
  7. msg=pd.read_csv('data6.csv',names=['message','label'])
  8. print('The dimensions of the dataset',msg.shape)
  9. msg['labelnum']=msg.label.map({'pos':1,'neg':0})
  10. X=msg.message
  11. y=msg.labelnum
  12.  
  13. #splitting the dataset into train and test data  
  14. xtrain,xtest,ytrain,ytest=train_test_split(X,y)
  15. print ('\n The total number of Training Data :',ytrain.shape)
  16. print ('\n The total number of Test Data :',ytest.shape)
  17.  
  18. #output of count vectoriser is a sparse matrix   
  19. cv = CountVectorizer()
  20. xtrain_dtm = cv.fit_transform(xtrain)
  21. xtest_dtm=cv.transform(xtest)
  22. print('\n The words or Tokens in the text documents \n')
  23. print(cv.get_feature_names())
  24.  
  25. df=pd.DataFrame(xtrain_dtm.toarray(),columns=cv.get_feature_names())
  26.  
  27. # Training Naive Bayes (NB) classifier on training data.     
  28. clf = MultinomialNB().fit(xtrain_dtm,ytrain)
  29. predicted = clf.predict(xtest_dtm)
  30.  
  31. #printing accuracy, Confusion matrix, Precision and Recall   
  32.  
  33.  
  34. print('\nAccuracy metrics')
  35. print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
  36. print('Recall :',metrics.recall_score(ytest,predicted), '\nPrecison :',metrics.precision_score(ytest,predicted))
  37. print('Confusion matrix')
  38. print(metrics.confusion_matrix(ytest,predicted))
  39.  
RAW Paste Data