Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.54 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Tue May 28 15:04:39 2019
  4.  
  5. @author: Ishwor Bhusal
  6. """
  7.  
  8. # Importing basic packages
  9. import numpy as np
  10. import pandas as pd
  11. import matplotlib.pyplot as plt
  12.  
  13. # Importing dataset to the environment
  14. data = pd.read_csv('TextFiles/smsspamcollection.tsv', sep='\t')
  15. data.head()
  16.  
  17. # Checking the total count of the null values
  18. data.isnull().sum()
  19.  
  20. # Counting the values of the label
  21. data['label'].value_counts()
  22.  
  23. # Importing train test split model
  24. from sklearn.model_selection import train_test_split
  25.  
  26. # Splitting the data for train and test set
  27. X_data = data['message']
  28. y_data = data['label']
  29.  
  30. # Running the train test split model
  31. X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.25, random_state=508)
  32.  
  33. # Running count vectorizer
  34. from sklearn.feature_extraction.text import CountVectorizer
  35.  
  36. count_vect = CountVectorizer()
  37.  
  38. # FIT VECTORIZER to the data
  39. count_vect.fit(X_train)
  40. X_train_counts = count_vect.transform(X_train)
  41. # transform the original text message
  42. X_train_counts = count_vect.fit_transform(X_train)
  43.  
  44. # Checking the shape of the data
  45. X_train_counts.shape
  46.  
  47. # Importing tfi data transformer
  48. from sklearn.feature_extraction.text import TfidataTransformer
  49. tfi_trans = TfidataTransformer()
  50.  
  51. # Fitting the transformer into training data counts and looking at the shape of the data
  52. X_train_tfidata = tfi_trans.fit_transform(X_train_counts)
  53. X_train_tfidata.shape
  54.  
  55. # Importing Term-Frequency vectorizer
  56. from sklearn.feature_extraction.text import TfidataVectorizer
  57. vectorizer = TfidataVectorizer()
  58.  
  59. # Fitting the tfi vectorizer
  60. X_train_tfidata = vectorizer.fit_transform(X_train)
  61.  
  62. from sklearn.svm import LinearSVC
  63. clf = LinearSVC()
  64.  
  65. # Fitting linear support vector classifier in vectorized training data
  66. clf.fit(X_train_tfidata, y_train)
  67.  
  68. # Importing Pipeline module
  69. from sklearn.pipeline import Pipeline
  70. text_clf = Pipeline([('tfidata', TfidataVectorizer()), ('clf', LinearSVC())])
  71.  
  72. # Fitting pipeline module in traning data
  73. text_clf.fit(X_train, y_train)
  74.  
  75. # Running prediction model into test data
  76. predictions = text_clf.predict(X_test)
  77.  
  78. # Importing confusioin matrix and classification report from scikit learn
  79. from sklearn.metrics import confusion_matrix, classification_report
  80. # Preparing and printing confusion matrix to see the performance of the model
  81. print(confusion_matrix(y_test, predictions))
  82. # Looking at classification report
  83. print(classification_report(y_test, predictions))
  84.  
  85. # importing metrics and looking in each test data and comparing with prediction
  86. from sklearn import metrics
  87. metrics.accuracy_score(y_test, predictions)
  88.  
  89. # Practically checking the accuracy level of the predictions and looking in real examples
  90. text_clf.predict(["Congratulations! for your achievement what you have got!"])
  91. text_clf.predict(["Mr. Customer, you are eligible to get 50% discount on your purchase of your clothings in your next visit."])
  92. text_clf.predict(["Mom, come home as soon as possible, someone wants to see you soon!"])
  93. text_clf.predict(["Dad, come home as soon as possible, someone wants to see you soon!"])
  94. text_clf.predict(["Brother, come home as soon as possible, someone wants to see you soon!"])
  95. text_clf.predict(["Dear Customer, come home as soon as possible, someone wants to see you soon!"])
  96. text_clf.predict(["Dear Mr. Bhusal, come home as soon as possible, someone wants to see you soon!"])
  97. text_clf.predict(["Mrs. Kala, you are one of those people who have been choose for next round of interviews, congratulations!"])
  98. # Above sentences are those which have been tested if model accurately predicts or not, either message is spam or legit (ham)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement