Advertisement
Luke_Username

respectthread_bot_sql.py

Aug 6th, 2019
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.86 KB | None | 0 0
  1. import praw         # Interface with Reddit's API
  2. import psycopg2     # Interface with PostgreSQL
  3. import config       # Login details
  4. import time         # To make an interval for the bot to wait
  5. import os           # To check if a file exists
  6. import re           # Regular expressions
  7. import unicodedata  # To strip accents
  8.  
  9. subreddit_list = ["respectthread_bot"]
  10. posts_list = []
  11. blacklist = []
  12. respectthread_list = []
  13.  
  14. class Character:
  15.     def __init__(self, name, default_name, version, respectthreads):
  16.         self.name = name
  17.         self.default_name = default_name
  18.         self.version = version
  19.         self.respectthreads = respectthreads
  20.  
  21. def bot_login():
  22.     print("Logging in...")
  23.     r = praw.Reddit(username = config.r_username,
  24.                 password = config.r_password,
  25.                 client_id = config.client_id,
  26.                 client_secret = config.client_secret,
  27.                 user_agent = "respectthread responder v0.2")
  28.     print("Logged in")
  29.     with open("saved_posts.txt", "a") as f:
  30.         f.write('\n')
  31.     return r
  32.  
  33. def get_saved_posts():
  34.     # Make sure the file exists.
  35.     if not os.path.isfile("saved_posts.txt"):
  36.         posts_list = []
  37.     else:
  38.         # "r" is to read from saved_posts.txt as the variable f
  39.         with open("saved_posts.txt", "r") as f:
  40.             posts_list = f.read().split("\n")
  41.     return posts_list
  42.  
  43. def get_blacklist():
  44.     if not os.path.isfile("blacklist.txt"):
  45.         blacklist = []
  46.     else:
  47.         with open("blacklist.txt", "r") as f:
  48.             blacklist = f.read().split("\n")
  49.     return blacklist
  50.  
  51. def run_bot(r):
  52.     print("Connecting to database...")
  53.     con = psycopg2.connect(
  54.         host = config.host,
  55.         database = config.database,
  56.         user = config.d_user,
  57.         password = config.d_password
  58.     )
  59.     print("Connected to database")
  60.     cur = con.cursor()
  61.  
  62.     for sub in subreddit_list:
  63.         print("Obtaining new posts from r/{}".format(sub))
  64.         submissions = r.subreddit(sub).new(limit=7)
  65.         for submission in submissions:
  66.             if submission.id not in posts_list and submission.author.name not in blacklist:
  67.                 title = strip_accents(submission.title)
  68.                 post = title + " " + strip_accents(submission.selftext)
  69.                 character_list = search_characters(title, post, cur)
  70.                 if character_list:
  71.                     generate_reply(submission, cur, character_list)
  72.  
  73.     # Close the cursor and connection
  74.     cur.close()
  75.     con.close()
  76.     print("Disconnected from database")
  77.     sleep_time = 30
  78.     print("Sleeping for {} seconds...".format(sleep_time))
  79.     time.sleep(sleep_time)
  80.  
  81. def search_characters(title, post, cur):
  82.     character_list = []
  83.     characters_checked = []
  84.     respectthread_list.clear()
  85.     cur.execute("SELECT * FROM character_name ORDER BY length(name) DESC;")
  86.     names = cur.fetchall()
  87.     for n in names:
  88.         found_char = False
  89.         name = n[0]
  90.         default_name = n[1]
  91.         if default_name not in characters_checked and post_contains(name, post, cur):
  92.             found_char = True
  93.             char_added = False
  94.             cur.execute("SELECT * FROM character WHERE default_name = '{}';".format(default_name))
  95.             characters = cur.fetchall()
  96.             for c in characters:
  97.                 version = c[1]
  98.                 respectthread_ids = c[3]
  99.                 verse_name = c[4]
  100.                 if check_version_array(version, post, cur):                                                             # Check if the post contains the character's verse-name
  101.                     add_character(name, default_name, verse_name, respectthread_ids, title, post, cur, character_list)
  102.                     char_added = True
  103.  
  104.             if not char_added:                                                                                          # If the post doesn't mention the character's version,
  105.                 for c in characters:                                                                                    # use the default version
  106.                     is_default = c[2]
  107.                     if is_default:
  108.                         add_character(name, default_name, c[4], c[3], title, post, cur, character_list)
  109.  
  110.         if found_char:                                                                                                  # Prevents redundant character checks
  111.             characters_checked.append(default_name)
  112.     return character_list
  113.  
  114. def check_version_array(version, post, cur):
  115.     for string in version:
  116.         if not post_contains(string, post, cur):
  117.             return False
  118.     return True
  119.  
  120. def post_contains(name, post, cur):
  121.     regex = re.compile(r"\b%s\b" % name, re.IGNORECASE)
  122.     if re.search(regex, post) is not None:
  123.         cur.execute("SELECT COUNT(*) FROM name_conflict WHERE LOWER(name) = '{}'".format(name.lower()))
  124.         row_count = cur.fetchone()[0]
  125.         if row_count == 0:
  126.             return True
  127.         else:
  128.             cur.execute("SELECT conflict, first_char FROM name_conflict WHERE LOWER(name) = '{}'".format(name.lower()))  # For all matches, check if the name doesn't mean something else
  129.             rows = cur.fetchall()
  130.             name_locations = [m.start() for m in re.finditer(regex, post)]
  131.             for n in name_locations:
  132.                 non_matches = 0
  133.                 for row in rows:
  134.                     conflict = row[0].lower()
  135.                     first_char = n + row[1]
  136.                     last_char = first_char + len(conflict)
  137.                     substring = post[first_char : last_char].lower()
  138.                     if substring != conflict:
  139.                         non_matches += 1
  140.                 if non_matches == row_count:
  141.                     return True
  142.     return False
  143.  
  144. def strip_accents(text):
  145.     try:
  146.         text = unicode(text, 'utf-8')
  147.     except NameError: # unicode is a default on python 3
  148.         #print("NameError")
  149.         pass
  150.  
  151.     text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode("utf-8")
  152.     return str(text)
  153.  
  154. def add_character(name, default_name, verse_name, respectthread_ids, title, post, cur, character_list):
  155.     if post_contains(default_name, title, cur):                                                                         # The bot prefers names found in the title
  156.         add_to_reply(default_name, default_name, verse_name, respectthread_ids, character_list, post, cur)              # and prefers the character's "default name"
  157.     elif post_contains(name, title, cur):
  158.         add_to_reply(name, default_name, verse_name, respectthread_ids, character_list, post, cur)
  159.     elif post_contains(default_name, post, cur):
  160.         add_to_reply(default_name, default_name, verse_name, respectthread_ids, character_list, post, cur)
  161.     else:
  162.         add_to_reply(name, default_name, verse_name, respectthread_ids, character_list, post, cur)
  163.  
  164. def add_to_reply(name, default_name, verse_name, respectthread_ids, character_list, post, cur):
  165.     for id in respectthread_ids:
  166.         if is_rt_in_post(id, post, cur):
  167.             respectthread_list.append(id)
  168.         if id not in respectthread_list:
  169.             respectthread_list.append(id)                                                                               # To prevent linking duplicates
  170.         else:
  171.             respectthread_ids.remove(id)
  172.  
  173.     if respectthread_ids:
  174.         character_list.append(Character(name, default_name, verse_name, respectthread_ids))
  175.  
  176. def is_rt_in_post(id, post, cur):                                                                                       # Check if the post already linked that RT
  177.     cur.execute("SELECT link FROM respectthread WHERE id = {} LIMIT 1;".format(id))
  178.     link = cur.fetchone()[0]
  179.     regex = re.compile(r"https://redd\.it/([a-zA-A0-9]{6})")
  180.     match_shortlink = regex.search(link)
  181.     if match_shortlink is not None:
  182.         post_id = match_shortlink.group(1)
  183.         regex = re.compile(r"\b{}\b".format(post_id))
  184.         if re.search(regex, post) is not None:
  185.             return True
  186.     else:
  187.         regex = re.compile(r"comments/([a-zA-A0-9]{6})")
  188.         match_permalink = regex.search(link)
  189.         if match_permalink is not None:
  190.             post_id = match_permalink.group(1)
  191.             regex = re.compile(r"\b{}\b".format(post_id))
  192.             if re.search(regex, post) is not None:
  193.                 return True
  194.     return False
  195.  
  196. def generate_reply(submission, cur, character_list):
  197.     reply_text = ""
  198.     sorted_list = sorted(character_list, key = lambda character: (character.default_name, character.version))
  199.  
  200.     for character in sorted_list:
  201.         if character.respectthreads:
  202.             reply_text += "**" + character.name
  203.             if character.version != "":
  204.                 reply_text += " ({})".format(character.version)
  205.             reply_text += "**\n\n"
  206.             rt_query = "SELECT * FROM respectthread WHERE id IN ("
  207.             rt_query += str(character.respectthreads).lstrip("[").rstrip("]") + ");"
  208.             cur.execute(rt_query)
  209.             respectthreads = cur.fetchall()
  210.             for row in respectthreads:
  211.                 reply_text += "- [{}]({})\n\n".format(row[1], row[2])
  212.  
  213.     if reply_text != "":
  214.         reply_text += "***\n\n"
  215.         reply_text += "^(I am a bot) ^| "
  216.         reply_text += "[^(About)](https://redd.it/clz2f8) ^| "
  217.         reply_text += "[^(Code)](https://redd.it/covp0m) ^| "
  218.         reply_text += "[^(Opt-out)](https://redd.it/cnqm1e) ^| "
  219.         reply_text += "^(Missing or wrong characters?) [^(Report here)](https://redd.it/co8whe)"
  220.  
  221.         submission.reply(reply_text)
  222.         print(reply_text)
  223.     with open("saved_posts.txt", "a") as f:
  224.         f.write(submission.id + '\n')
  225.     posts_list.append(submission.id)
  226.  
  227. posts_list = get_saved_posts()
  228. blacklist = get_blacklist()
  229. r = bot_login()
  230. while True:
  231.     run_bot(r)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement