Guest User

Untitled

a guest
Jan 16th, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.52 KB | None | 0 0
  1. import nltk
  2. import pickle
  3.  
  4.  
  5. class SpamChecker:
  6. def __init__(self):
  7. try:
  8. nltk.data.find('tokenizers/punkt')
  9. except:
  10. nltk.download('punkt')
  11.  
  12. self.spam_data = []
  13. self.ham_data = []
  14. self.classifier = None
  15.  
  16. def __text_to_data(self, text):
  17. return dict([(token, True) for token in nltk.word_tokenize(text)])
  18.  
  19. def add_spam(self, text):
  20. self.spam_data.append((self.__text_to_data(text), 'spam'))
  21.  
  22. def add_ham(self, text):
  23. self.ham_data.append((self.__text_to_data(text), 'ham'))
  24.  
  25. def load_spam_data(self, pickle_file):
  26. self.spam_data = pickle.load(open(pickle_file, 'rb'))
  27.  
  28. def load_ham_data(self, pickle_file):
  29. self.ham_data = pickle.load(open(pickle_file, 'rb'))
  30.  
  31. def save_spam_data(self, pickle_file):
  32. pickle.dump(self.spam_data, open(pickle_file, 'wb'))
  33.  
  34. def save_ham_data(self, pickle_file):
  35. pickle.dump(self.ham_data, open(pickle_file, 'wb'))
  36.  
  37. def train(self):
  38. self.classifier = nltk.NaiveBayesClassifier.train(self.spam_data + self.ham_data)
  39.  
  40. def save_classifier(self, pickle_file):
  41. pickle.dump(self.classifier, open(pickle_file, 'wb'))
  42.  
  43. def load_classifier(self, pickle_file):
  44. self.classifier = pickle.load(open(pickle_file, 'rb'))
  45.  
  46. def is_spam(self, email):
  47. if self.classifier is None:
  48. raise Exception('classifier not trained')
  49.  
  50. if self.classifier.classify(self.__text_to_data(email)) == 'spam':
  51. return True
  52.  
  53. return False
Add Comment
Please, Sign In to add comment