TWSSBot

#!/usr/bin/python
import praw
import pdb
import re
import os
import nltk
import sys
import pickle
import datetime

class TWSS:
    training_data = [] # [("sentence 1", bool), ("sentence 2", bool), ... ]
    classifier = None

    def __init__(self, sentence=None, training_data=None, positive_corpus_file=None, negative_corpus_file=None):
        if training_data:
            self.training_data = training_data
        if positive_corpus_file and negative_corpus_file:
            self.import_training_data(positive_corpus_file, negative_corpus_file)
        if sentence:
            self.__call__(sentence)

    def __call__(self, phrase):
        if not self.classifier:
            self.train()
        return self.is_twss(phrase)

    def import_training_data(self,
            positive_corpus_file=os.path.join(os.path.dirname(__file__),
                "positive.txt"),
            negative_corpus_file=os.path.join(os.path.dirname(__file__),
                "negative.txt")
            ):
        """
        This method imports the positive and negative training data from the
        two corpus files and creates the training data list.
        """

        positive_corpus = open(positive_corpus_file)
        negative_corpus = open(negative_corpus_file)

        # for line in positive_corpus:
        #     self.training_data.append((line, True))

        # for line in negative_corpus:
        #     self.training_data.append((line, False))

        # The following code works. Need to profile this to see if this is an
        # improvement over the code above.
        positive_training_data = list(map(lambda x: (x, True), positive_corpus))
        negative_training_data = list(map(lambda x: (x, False), negative_corpus))
        self.training_data = positive_training_data + negative_training_data

    def train(self):
        """
        This method generates the classifier. This method assumes that the
        training data has been loaded
        """
        if not self.training_data:
            self.import_training_data()
        training_feature_set = [(self.extract_features(line.decode('utf-8')), label)
                                    for (line, label) in self.training_data]
        self.classifier = nltk.NaiveBayesClassifier.train(training_feature_set)

    def extract_features(self, phrase):
        """
        This function will extract features from the phrase being used.
        Currently, the feature we are extracting are unigrams of the text corpus.
        """

        words = nltk.word_tokenize(phrase)
        features = {}
        for word in words:
            features['contains(%s)' % word] = (word in words)
        return features

    def is_twss(self, phrase):
        """
        The magic function- this accepts a phrase and tells you if it
        classifies as an entendre
        """
        featureset = self.extract_features(phrase)
        return self.classifier.classify(featureset)

    def save(self, filename='classifier.dump'):
        """
        Pickles the classifier and dumps it into a file
        """
        ofile = open(filename,'w+')
        pickle.dump(self.classifier, ofile)
        ofile.close()

    def load(self, filename='classifier.dump'):
        """
        Unpickles the classifier used
        """
        ifile = open(filename, 'r+')
        self.classifier = pickle.load(ifile)
        ifile.close()
print "Starting bot..."
user_agent = ("TWSS 0")
r = praw.Reddit(username = "_TWSSBot_",password = "****",user_agent=user_agent,client_id = "****", client_secret = "****")
twss = TWSS()
print "Training..."
twss("That was hard")
print "Loading files..."
if not os.path.isfile("TWSSyes.txt"):
    no = []
else:
    with open("TWSSyes.txt", "r") as f:
        yes = f.read()
        yes = yes.split("\n")
        yes = filter(None, yes)
if not os.path.isfile("TWSSreplied.txt"):
    replied = []
else:
    with open("TWSSreplied.txt", "r") as f:
        replied = f.read()
        replied = replied.split("\n")
        replied = filter(None, replied)
if not os.path.isfile("TWSSno.txt"):
    no = []
else:
    with open("TWSSno.txt", "r") as f:
        no = f.read()
        no = no.split("\n")
        no = filter(None, no)
if not os.path.isfile("TWSSuserIgnores.txt"):
    uIgnores = []
else:
    with open("TWSSuserIgnores.txt", "r") as f:
        uIgnores = f.read()
        uIgnores = uIgnores.split("\n")
        uIgnores = filter(None, uIgnores)
if not os.path.isfile("TWSSsubIgnores.txt"):
    sIgnores = []
else:
    with open("TWSSsubIgnores.txt", "r") as f:
        sIgnores = f.read()
        sIgnores = sIgnores.split("\n")
        sIgnores = filter(None, sIgnores)
def getParent(comment):
    return r.comment(comment.parent_id.rsplit('_', 1)[1])
def responds():
    print "REPLYING TO COMMANDS..."
    rf = [0,0,0,0,0]
    rs = [0,0,0,0,0]
    for message in r.inbox.unread(limit = None):
        if "!info" in message.body.lower():
            try:
                message.reply("**TWSSBot**\n-\n\nThis bot uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect the classic [double entendre](https://en.wikipedia.org/wiki/Double_entendre) of \"That's what she said\" jokes. I have several commands available:\n\n**!info**: Displays this message\n\n**!ignoreme**: Will ignore your comments in the future\n\n**!ignoresubreddit**: Will ignore all comments from this subreddit in the future. Requires moderator status.\n\n**!train yes**: Will add the comment to positive training\n\n**!train no**: Will add the comment to negative training\n\n^(Note: all suggestions made by the **!train** commands will be reviewed by the creator. Do not use them as a reply to this message, instead reply to the bot's original reply.)")
                message.mark_read()
                print "Replied to !info command by ",message.author
                rs[0] = rs[0] + 1
            except Exception as e:
                print e
                rf[0] = rf[0] + 1
        elif "!ignoreme" in message.body.lower():
            try:
                message.reply("**Thank you**, I will ignore your posts in the future.")
                message.mark_read()
                uIgnores.append(message.author.name)
                with open("TWSSuserIgnores.txt", "w") as f:
                    for u in uIgnores:
                        f.write(u + "\n")
                print "Ignored user ",message.author.name
                rs[1] = rs[1] + 1
            except Exception as e:
                print e
                rf[1] = rf[1] + 1
        elif "!ignoresubreddit" in message.body.lower():
            mods = []
            for mod in r.subreddit(message.subreddit.display_name).moderator:
                mods.append(mod)
            try:
                if message.author in mods:
                    message.reply("**Thank you**, I will ignore this subreddit in the future.")
                    message.mark_read()
                    sIgnores.append(message.subreddit.display_name)
                    with open("TWSSsubIgnores.txt", "w") as f:
                        for u in sIgnores:
                            f.write(u + "\n")
                    print "Ignored subreddit ",message.subreddit
                    rs[2] = rs[2] + 1
            except Exception as e:
                print e
                rf[2] = rf[2] + 1
        elif "!train yes" in message.body.lower():
            try:
                if getParent(message).body == "##That's what she said!\n\n-\nI am a bot that uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect \"That's what she said\" jokes. Reply with **!info** to get more information.\n\n**PLEASE READ !INFO BEFORE DOWNVOTING!**\n\nThe more downvotes this bot gets, the longer it takes me to reply to your commands! Also, visit /r/TWSSBot":
                    bod = getParent(getParent(message)).body
                    message.reply("**Thank you**, my creator will review your suggestion to train \""+bod+"\" as a TWSS joke.")
                    message.mark_read()
                    print "Trained yes: ",bod
                    yes.append(getParent(getParent(message)).body)
                    with open("TWSSyes.txt", "w") as f:
                        for u in yes:
                            f.write(u + "\n")
                    rs[3] = rs[3] + 1
            except Exception as e:
                rf[3] = rf[3] + 1
        elif "!train no" in message.body.lower():
            try:
                if getParent(message).body == "##That's what she said!\n\n-\nI am a bot that uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect \"That's what she said\" jokes. Reply with **!info** to get more information.\n\n**PLEASE READ !INFO BEFORE DOWNVOTING!**\n\nThe more downvotes this bot gets, the longer it takes me to reply to your commands! Also, visit /r/TWSSBot":
                    bod = getParent(getParent(message)).body
                    message.reply("**Thank you**, my creator will review your suggestion to train \""+bod+"\" as not a TWSS joke.")
                    message.mark_read()
                    print "Trained no: ",bod
                    no.append(getParent(getParent(message)).body)
                    with open("TWSSno.txt", "w") as f:
                        for u in no:
                            f.write(u + "\n")
                    rs[4] = rs[4] + 1
            except Exception as e:
                print e
                rf[4] = rf[4] + 1
    print "Done replying!\n    !info: ",rs[0]," : ",rf[0],"\n    !ignoreme: ",rs[1]," : ",rf[1],"\n    !ignoresubreddit: ",rs[2]," : ",rf[2],"\n    !train yes: ",rs[3]," : ",rf[3],"\n    !train no: ",rs[4]," : ",rf[4]
    print rs,rf

responds()
print "Comment Loop:"
c = 0
while True:
    for comment in r.subreddit('all').comments():
        mlen = len(nltk.word_tokenize(comment.body))
        if not comment.id in replied and twss(comment.body) and mlen > 2 and mlen<15 and not comment.author.name in uIgnores and not comment.subreddit.display_name in sIgnores:
            try:
                comment.reply("##That's what she said!\n\n-\nI am a bot that uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect \"That's what she said\" jokes. Reply with **!info** to get more information.\n\n**PLEASE READ !INFO BEFORE DOWNVOTING!**\n\nThe more downvotes this bot gets, the longer it takes me to reply to your commands! Also, visit /r/TWSSBot")
                replied.append(comment.id)
                print "\n==================================\n",comment.body
                with open("TWSSreplied.txt", "w") as f:
                    for u in replied:
                        f.write(u + "\n")
                c = c + 1
            except Exception as e:
                c = c + 1
            if c % 10 == 0:
                responds()