Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- import pickle
- class SpamChecker:
- def __init__(self):
- try:
- nltk.data.find('tokenizers/punkt')
- except:
- nltk.download('punkt')
- self.spam_data = []
- self.ham_data = []
- self.classifier = None
- def __text_to_data(self, text):
- return dict([(token, True) for token in nltk.word_tokenize(text)])
- def add_spam(self, text):
- self.spam_data.append((self.__text_to_data(text), 'spam'))
- def add_ham(self, text):
- self.ham_data.append((self.__text_to_data(text), 'ham'))
- def load_spam_data(self, pickle_file):
- self.spam_data = pickle.load(open(pickle_file, 'rb'))
- def load_ham_data(self, pickle_file):
- self.ham_data = pickle.load(open(pickle_file, 'rb'))
- def save_spam_data(self, pickle_file):
- pickle.dump(self.spam_data, open(pickle_file, 'wb'))
- def save_ham_data(self, pickle_file):
- pickle.dump(self.ham_data, open(pickle_file, 'wb'))
- def train(self):
- self.classifier = nltk.NaiveBayesClassifier.train(self.spam_data + self.ham_data)
- def save_classifier(self, pickle_file):
- pickle.dump(self.classifier, open(pickle_file, 'wb'))
- def load_classifier(self, pickle_file):
- self.classifier = pickle.load(open(pickle_file, 'rb'))
- def is_spam(self, email):
- if self.classifier is None:
- raise Exception('classifier not trained')
- if self.classifier.classify(self.__text_to_data(email)) == 'spam':
- return True
- return False
Add Comment
Please, Sign In to add comment