Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.model_selection import train_test_split
- from sklearn.feature_extraction.text import CountVectorizer
- from sklearn.naive_bayes import MultinomialNB
- from sklearn import metrics
- msg=pd.read_csv('data6.csv',names=['message','label'])
- print('The dimensions of the dataset',msg.shape)
- msg['labelnum']=msg.label.map({'pos':1,'neg':0})
- X=msg.message
- y=msg.labelnum
- #splitting the dataset into train and test data
- xtrain,xtest,ytrain,ytest=train_test_split(X,y)
- print ('\n The total number of Training Data :',ytrain.shape)
- print ('\n The total number of Test Data :',ytest.shape)
- #output of count vectoriser is a sparse matrix
- cv = CountVectorizer()
- xtrain_dtm = cv.fit_transform(xtrain)
- xtest_dtm=cv.transform(xtest)
- print('\n The words or Tokens in the text documents \n')
- print(cv.get_feature_names())
- df=pd.DataFrame(xtrain_dtm.toarray(),columns=cv.get_feature_names())
- # Training Naive Bayes (NB) classifier on training data.
- clf = MultinomialNB().fit(xtrain_dtm,ytrain)
- predicted = clf.predict(xtest_dtm)
- #printing accuracy, Confusion matrix, Precision and Recall
- print('\nAccuracy metrics')
- print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
- print('Recall :',metrics.recall_score(ytest,predicted), '\nPrecison :',metrics.precision_score(ytest,predicted))
- print('Confusion matrix')
- print(metrics.confusion_matrix(ytest,predicted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement