Advertisement
Guest User

TWSSBot

a guest
Nov 25th, 2016
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.95 KB | None | 0 0
  1. #!/usr/bin/python
  2. import praw
  3. import pdb
  4. import re
  5. import os
  6. import nltk
  7. import sys
  8. import pickle
  9. import datetime
  10.  
  11. class TWSS:
  12.     training_data = [] # [("sentence 1", bool), ("sentence 2", bool), ... ]
  13.     classifier = None
  14.  
  15.     def __init__(self, sentence=None, training_data=None, positive_corpus_file=None, negative_corpus_file=None):
  16.         if training_data:
  17.             self.training_data = training_data
  18.         if positive_corpus_file and negative_corpus_file:
  19.             self.import_training_data(positive_corpus_file, negative_corpus_file)
  20.         if sentence:
  21.             self.__call__(sentence)
  22.  
  23.     def __call__(self, phrase):
  24.         if not self.classifier:
  25.             self.train()
  26.         return self.is_twss(phrase)
  27.  
  28.     def import_training_data(self,
  29.             positive_corpus_file=os.path.join(os.path.dirname(__file__),
  30.                 "positive.txt"),
  31.             negative_corpus_file=os.path.join(os.path.dirname(__file__),
  32.                 "negative.txt")
  33.             ):
  34.         """
  35.        This method imports the positive and negative training data from the
  36.        two corpus files and creates the training data list.
  37.        """
  38.  
  39.         positive_corpus = open(positive_corpus_file)
  40.         negative_corpus = open(negative_corpus_file)
  41.  
  42.         # for line in positive_corpus:
  43.         #     self.training_data.append((line, True))
  44.  
  45.         # for line in negative_corpus:
  46.         #     self.training_data.append((line, False))
  47.  
  48.         # The following code works. Need to profile this to see if this is an
  49.         # improvement over the code above.
  50.         positive_training_data = list(map(lambda x: (x, True), positive_corpus))
  51.         negative_training_data = list(map(lambda x: (x, False), negative_corpus))
  52.         self.training_data = positive_training_data + negative_training_data
  53.  
  54.     def train(self):
  55.         """
  56.        This method generates the classifier. This method assumes that the
  57.        training data has been loaded
  58.        """
  59.         if not self.training_data:
  60.             self.import_training_data()
  61.         training_feature_set = [(self.extract_features(line.decode('utf-8')), label)
  62.                                     for (line, label) in self.training_data]
  63.         self.classifier = nltk.NaiveBayesClassifier.train(training_feature_set)
  64.  
  65.     def extract_features(self, phrase):
  66.         """
  67.        This function will extract features from the phrase being used.
  68.        Currently, the feature we are extracting are unigrams of the text corpus.
  69.        """
  70.        
  71.         words = nltk.word_tokenize(phrase)
  72.         features = {}
  73.         for word in words:
  74.             features['contains(%s)' % word] = (word in words)
  75.         return features
  76.  
  77.     def is_twss(self, phrase):
  78.         """
  79.        The magic function- this accepts a phrase and tells you if it
  80.        classifies as an entendre
  81.        """
  82.         featureset = self.extract_features(phrase)
  83.         return self.classifier.classify(featureset)
  84.  
  85.     def save(self, filename='classifier.dump'):
  86.         """
  87.        Pickles the classifier and dumps it into a file
  88.        """
  89.         ofile = open(filename,'w+')
  90.         pickle.dump(self.classifier, ofile)
  91.         ofile.close()
  92.        
  93.     def load(self, filename='classifier.dump'):
  94.         """
  95.        Unpickles the classifier used
  96.        """
  97.         ifile = open(filename, 'r+')
  98.         self.classifier = pickle.load(ifile)
  99.         ifile.close()
  100. print "Starting bot..."
  101. user_agent = ("TWSS 0")
  102. r = praw.Reddit(username = "_TWSSBot_",password = "****",user_agent=user_agent,client_id = "****", client_secret = "****")
  103. twss = TWSS()
  104. print "Training..."
  105. twss("That was hard")
  106. print "Loading files..."
  107. if not os.path.isfile("TWSSyes.txt"):
  108.     no = []
  109. else:
  110.     with open("TWSSyes.txt", "r") as f:
  111.         yes = f.read()
  112.         yes = yes.split("\n")
  113.         yes = filter(None, yes)
  114. if not os.path.isfile("TWSSreplied.txt"):
  115.     replied = []
  116. else:
  117.     with open("TWSSreplied.txt", "r") as f:
  118.         replied = f.read()
  119.         replied = replied.split("\n")
  120.         replied = filter(None, replied)
  121. if not os.path.isfile("TWSSno.txt"):
  122.     no = []
  123. else:
  124.     with open("TWSSno.txt", "r") as f:
  125.         no = f.read()
  126.         no = no.split("\n")
  127.         no = filter(None, no)
  128. if not os.path.isfile("TWSSuserIgnores.txt"):
  129.     uIgnores = []
  130. else:
  131.     with open("TWSSuserIgnores.txt", "r") as f:
  132.         uIgnores = f.read()
  133.         uIgnores = uIgnores.split("\n")
  134.         uIgnores = filter(None, uIgnores)
  135. if not os.path.isfile("TWSSsubIgnores.txt"):
  136.     sIgnores = []
  137. else:
  138.     with open("TWSSsubIgnores.txt", "r") as f:
  139.         sIgnores = f.read()
  140.         sIgnores = sIgnores.split("\n")
  141.         sIgnores = filter(None, sIgnores)
  142. def getParent(comment):
  143.     return r.comment(comment.parent_id.rsplit('_', 1)[1])
  144. def responds():
  145.     print "REPLYING TO COMMANDS..."
  146.     rf = [0,0,0,0,0]
  147.     rs = [0,0,0,0,0]
  148.     for message in r.inbox.unread(limit = None):
  149.         if "!info" in message.body.lower():
  150.             try:
  151.                 message.reply("**TWSSBot**\n-\n\nThis bot uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect the classic [double entendre](https://en.wikipedia.org/wiki/Double_entendre) of \"That's what she said\" jokes. I have several commands available:\n\n**!info**: Displays this message\n\n**!ignoreme**: Will ignore your comments in the future\n\n**!ignoresubreddit**: Will ignore all comments from this subreddit in the future. Requires moderator status.\n\n**!train yes**: Will add the comment to positive training\n\n**!train no**: Will add the comment to negative training\n\n^(Note: all suggestions made by the **!train** commands will be reviewed by the creator. Do not use them as a reply to this message, instead reply to the bot's original reply.)")
  152.                 message.mark_read()
  153.                 print "Replied to !info command by ",message.author
  154.                 rs[0] = rs[0] + 1
  155.             except Exception as e:
  156.                 print e
  157.                 rf[0] = rf[0] + 1
  158.         elif "!ignoreme" in message.body.lower():
  159.             try:
  160.                 message.reply("**Thank you**, I will ignore your posts in the future.")
  161.                 message.mark_read()
  162.                 uIgnores.append(message.author.name)
  163.                 with open("TWSSuserIgnores.txt", "w") as f:
  164.                     for u in uIgnores:
  165.                         f.write(u + "\n")
  166.                 print "Ignored user ",message.author.name
  167.                 rs[1] = rs[1] + 1
  168.             except Exception as e:
  169.                 print e
  170.                 rf[1] = rf[1] + 1
  171.         elif "!ignoresubreddit" in message.body.lower():
  172.             mods = []
  173.             for mod in r.subreddit(message.subreddit.display_name).moderator:
  174.                 mods.append(mod)
  175.             try:
  176.                 if message.author in mods:
  177.                     message.reply("**Thank you**, I will ignore this subreddit in the future.")
  178.                     message.mark_read()
  179.                     sIgnores.append(message.subreddit.display_name)
  180.                     with open("TWSSsubIgnores.txt", "w") as f:
  181.                         for u in sIgnores:
  182.                             f.write(u + "\n")
  183.                     print "Ignored subreddit ",message.subreddit
  184.                     rs[2] = rs[2] + 1
  185.             except Exception as e:
  186.                 print e
  187.                 rf[2] = rf[2] + 1
  188.         elif "!train yes" in message.body.lower():
  189.             try:
  190.                 if getParent(message).body == "##That's what she said!\n\n-\nI am a bot that uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect \"That's what she said\" jokes. Reply with **!info** to get more information.\n\n**PLEASE READ !INFO BEFORE DOWNVOTING!**\n\nThe more downvotes this bot gets, the longer it takes me to reply to your commands! Also, visit /r/TWSSBot":
  191.                     bod = getParent(getParent(message)).body
  192.                     message.reply("**Thank you**, my creator will review your suggestion to train \""+bod+"\" as a TWSS joke.")
  193.                     message.mark_read()
  194.                     print "Trained yes: ",bod
  195.                     yes.append(getParent(getParent(message)).body)
  196.                     with open("TWSSyes.txt", "w") as f:
  197.                         for u in yes:
  198.                             f.write(u + "\n")
  199.                     rs[3] = rs[3] + 1
  200.             except Exception as e:
  201.                 rf[3] = rf[3] + 1
  202.         elif "!train no" in message.body.lower():
  203.             try:
  204.                 if getParent(message).body == "##That's what she said!\n\n-\nI am a bot that uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect \"That's what she said\" jokes. Reply with **!info** to get more information.\n\n**PLEASE READ !INFO BEFORE DOWNVOTING!**\n\nThe more downvotes this bot gets, the longer it takes me to reply to your commands! Also, visit /r/TWSSBot":
  205.                     bod = getParent(getParent(message)).body
  206.                     message.reply("**Thank you**, my creator will review your suggestion to train \""+bod+"\" as not a TWSS joke.")
  207.                     message.mark_read()
  208.                     print "Trained no: ",bod
  209.                     no.append(getParent(getParent(message)).body)
  210.                     with open("TWSSno.txt", "w") as f:
  211.                         for u in no:
  212.                             f.write(u + "\n")
  213.                     rs[4] = rs[4] + 1
  214.             except Exception as e:
  215.                 print e
  216.                 rf[4] = rf[4] + 1
  217.     print "Done replying!\n    !info: ",rs[0]," : ",rf[0],"\n    !ignoreme: ",rs[1]," : ",rf[1],"\n    !ignoresubreddit: ",rs[2]," : ",rf[2],"\n    !train yes: ",rs[3]," : ",rf[3],"\n    !train no: ",rs[4]," : ",rf[4]
  218.     print rs,rf
  219.    
  220. responds()
  221. print "Comment Loop:"
  222. c = 0
  223. while True:
  224.     for comment in r.subreddit('all').comments():
  225.         mlen = len(nltk.word_tokenize(comment.body))
  226.         if not comment.id in replied and twss(comment.body) and mlen > 2 and mlen<15 and not comment.author.name in uIgnores and not comment.subreddit.display_name in sIgnores:
  227.             try:
  228.                 comment.reply("##That's what she said!\n\n-\nI am a bot that uses a [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) to detect \"That's what she said\" jokes. Reply with **!info** to get more information.\n\n**PLEASE READ !INFO BEFORE DOWNVOTING!**\n\nThe more downvotes this bot gets, the longer it takes me to reply to your commands! Also, visit /r/TWSSBot")
  229.                 replied.append(comment.id)
  230.                 print "\n==================================\n",comment.body
  231.                 with open("TWSSreplied.txt", "w") as f:
  232.                     for u in replied:
  233.                         f.write(u + "\n")
  234.                 c = c + 1
  235.             except Exception as e:
  236.                 c = c + 1
  237.             if c % 10 == 0:
  238.                 responds()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement