Untitled

#!/usr/bin/python3
# -*- coding: utf-8 -*-

"""
USAGE: Set up as described below, then just run the script somewhere in the
       background, screen works nicely for this.

Non-standard dependencies: sqlite3, tld

It is recommended to wrap this in the following script:
#!/bin/bash
RET=1
while [ $RET -eq 1 ]
do
        removalbot
        RET=$?
done

Ctrl+C is handled as success, expected failures have exit code 2.
Likely programming errors have exit code 3.

Files:
- CONFDIR, defined below
- CONFDIR/maintainer, your reddit username
- CONFDIT/secrets in the format "user\npass\nclient_id\bclient_secret"
- CONFDIR/version
- CONFDIR/safedomains, empty or newline-separated domains
- CONFDIR/unknowndomains, empty
- CONFDIR/posts.db, sqlite3 database

Be aware that you need to set up oauth to use this and gather the client ID and secret for the login process.

DB schema:
CREATE TABLE notify
    (author TEXT UNIQUE);
CREATE TABLE comments
    (name TEXT UNIQUE PRIMARY KEY,
     subreddit TEXT,
     author TEXT,
     created_utc INTEGER,
     link_id TEXT,
     body TEXT);
CREATE TABLE comments_deleted
    (name TEXT UNIQUE PRIMARY KEY,
     subreddit TEXT,
     author TEXT,
     created_utc INTEGER,
     link_id TEXT,
     body TEXT,
     spotted INTEGER);
CREATE TABLE flairs
    (name TEXT UNIQUE PRIMARY KEY,
     flair_template_id TEXT UNIQUE);
CREATE TABLE times
    (name TEXT UNIQUE PRIMARY KEY,
     time INTEGER);
CREATE TABLE submissions
    (name TEXT UNIQUE PRIMARY KEY,
     subreddit TEXT,
     author TEXT,
     created_utc INTEGER,
     link_flair_text TEXT,
     title TEXT,
     domain TEXT,
     selftext TEXT);
CREATE TABLE submissions_deleted
    (name TEXT UNIQUE PRIMARY KEY,
     subreddit TEXT,
     author TEXT,
     created_utc INTEGER,
     link_flair_text TEXT,
     title TEXT,
     domain TEXT,
     selftext TEXT,
     spotted INTEGER);

# TODO: shadowbanned vs deleted users
# TODO: log.db
# TODO: deletion.db
"""

LOGGING = True
DUMMY = False
SUBMIT_ERRORS = False
DEBUG = True

import os
import sys
import time
import json
import urllib.request, urllib.parse, urllib.error
import ssl
import socket
import re
import traceback
import sqlite3
import tld
from random import sample
from datetime import datetime
from contextlib import closing
from sys import stderr
from html.parser import HTMLParser

MINUTE = 60
HOUR = MINUTE * 60
DAY = HOUR * 24

socket.setdefaulttimeout(10)

NOTIFY_THREADS = ['3rmc4v']
# all in seconds
NEW_SPACING = 10
DELETION_SPACING = 10 * MINUTE
FLAIR_SPACING = 24 * HOUR
SUBSCRIBER_SPACING = 12 * HOUR
ALLOWED_TITLE_LENGTH = 300
INTROLEN = 100

CONFDIR = '/etc/removalbot'
PIDFILE = "/tmp/removalbot.pid"
LOGDIR = os.path.join(CONFDIR, 'log')
IGNORE = ["godwins_law_bot", "totes_meta_bot", "redditbots", "ttumblrbots",
          "autowikibot", "SRScreenshot", "MRSPArchiver", "AutoModerator",
          "image_linker_bot", "SmallSubBot", "autourbanbot",
          "note-to-self-bot", "ObamaRobot", "TotesMessenger",
          "TweetsInCommentsBot", "TweetPoster", "JoeBidenBot",
          "smilesbot", "DailMail_Bot", "TrollaBot", "TotesHuman",
          "youtubefactsbot", "imgurtranscriber", "isreactionary_bot",
          "iscuck_bot", "author", "reginaldtato", "NotTheOnionBot",
          "rSGSpolice", "hwsbot", "yes_it_is_weird", "r_PictureGame",
          "prairiechicken2", "domoarigatobtfcboto", "SkydivingHaylz",
          "I_Like_Spaghetti", "STEALTHM0UNTAIN", "Google_Panda",
          "AakashMasani", "Forestl", "lurkattwork", "drgoku282",
          "texasmommie", "Really_Like_Pancakes", "BlaineWolfe",
          "Blassie098", "ghort98765", "GustavoFrings", "WritingPromptsRobot",
          "sontato", "ramsesniblick3rd", "300BlackoutSober",
          "flair_your_post_bot", "GoomyTooOP", "arbutus_", "foamed",
          "DumbCollegeStudent", "[deleted]", "GOTradeRuleBot",
          "ShadowBanCheckBot", "ShadowBannedBot", "Shiny_Sylveon",
          "PaidBot", "xbamsod", "enriquepaz13", "Moskau50", "PornOverlord",
          "ConvertsToMetric", "removalbot"]

URLREGEX = r'''(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9
    .\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(
    ([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))'''

DOMAIN_BLACKLIST = ["malformed.domain", "goo.gl", "tinyurl.com"]

COMMENTMODEL = ((str, 'name'),
                 (str, 'subreddit'),
                 (str, 'author'),
                 (float, 'created_utc'),
                 (str, 'link_id'),
                 (str, 'body'))

SUBMISSIONMODEL = ((str, 'name'),
                    (str, 'subreddit'),
                    (str, 'author'),
                    (float, 'created_utc'),
                    (str, 'link_flair_text'),
                    (str, 'title'),
                    (str, 'domain'),
                    (str, 'selftext'))

MODELS = { 'comments': COMMENTMODEL, 'submissions': SUBMISSIONMODEL }

os.environ['TZ'] = 'UTC'
time.tzset()

pid = str(os.getpid())
with open(PIDFILE, 'w') as f:
    f.write(pid)


ALSO_FATAL = ['SyntaxError', 'NameError', 'IndexError', 'TypeError',
              'ValueError', 'UnboundLocalError']


def errorhook(extype, value, trace):
    os.unlink(PIDFILE)
    print(traceback.print_exception(extype, value, trace))
    if extype.__name__ == "KeyboardInterrupt":
        exit(0)
    elif extype.__name__ == "FatalError":
        send_pm(maintainer, "REMOVALBOT CRASHED", "Reason: " +
                extype.__name__+": " + value.message)
        writefile(conf('error'), extype.__name__+": " + value.message)
        exit(2)
    elif extype.__name__ in ALSO_FATAL:
        send_pm(maintainer, "REMOVALBOT CRASHED", "Reason: " +
                extype.__name__+": " + value.message)
        writefile(conf('error'), extype.__name__+": " + value.message)
        exit(3)

sys.excepthook = errorhook


class FatalError(Exception):
    def __init__(self, message):
        self.message = message
        Exception.__init__(self, message)


def current_epoch():
    return (datetime.now() - datetime.utcfromtimestamp(0)).total_seconds()


def epoch_to_string(epoch=None, tech=False, short=False):
    if epoch is None:
        epoch = current_epoch()
    try:
        epoch = float(epoch)
    except:
        epoch = 0
    if tech:
        model = "%y%m%d-%H%M"
    elif short:
        model = "%m-%d %H:%M"
    else:
        model = "%Y-%m-%d %H:%M %Z"
    return time.strftime(model, time.localtime(epoch))


def conf(name):
    return str(os.path.join(CONFDIR, name))


def newlog(name):
    if LOGGING:
        name = str(os.path.join(LOGDIR, name))
        with closing(open(name, 'w')):
            pass
        return name
    else:
        return '/dev/null'


def readfile(f):
    with closing(open(f)) as f:
        return f.read()


def writefile(f, data):
    with closing(open(f, 'w')) as f:
        f.write(data)


def censor(s, fraction):
    num = int(round(fraction * len(s)))
    change_locs = set(sample(list(range(len(s))), num))
    changed = ('*' if i in change_locs else c for i, c in enumerate(s))
    return ''.join(changed)


def getv(query, args=()):
    return (c.execute(query, args).fetchone() or (None,))[0]


def getlast(what):
    return getv('SELECT time FROM times WHERE name=?', (what,))


def setlast(what, utc):
    c.execute('INSERT OR REPLACE INTO times VALUES (?, ?)', (what, utc))
    db.commit()


def login():
    print("> Logging in ", end=' ')
    sys.stdout.flush()
    secrets = readfile(conf('secrets')).split()
    username = secrets[0]
    password = secrets[1]
    client_id = secrets[2]
    client_secret = secrets[3]

    post_data = {"grant_type": "password",
                 "username": username,
                 "password": password}
    headers = {"User-Agent": USERAGENT}
    url = "https://www.reddit.com/api/v1/access_token"

    password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    password_mgr.add_password(None, url, client_id, client_secret)
    handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
    opener = urllib.request.build_opener(handler)

    response = reddit(url, opener=opener.open,
                      post_data=post_data, headers=headers,
                      raw=True)

    if not ('token_type' in list(response.keys()) and
            'access_token' in list(response.keys())):
        print(response, file=stderr)
        raise FatalError("Authorization Failed")
    token = response['token_type']+" "+response['access_token']
    print("- done")
    return {"Authorization": token, "User-Agent": USERAGENT}


def mkrequest(url, headers=None, post_data=None):
    if not post_data:
        return urllib.request.Request(url, None, headers)
    else:
        for k, v in post_data.items():
            if isinstance(v, str):
                v = v.encode('utf8')
            post_data[k] = v
        post_data = urllib.parse.urlencode(post_data).encode('utf-8')
        return urllib.request.Request(url, post_data, headers)


def errordir(e):
    for attr in dir(e):
        print(attr, getattr(e, attr), file=stderr)


def reddit(url, opener=urllib.request.urlopen,
           headers=None, post_data=None,
           raw=False, catch_also=[]):
    global auth
    global requests_used
    global requests_remaining
    global requests_reset
    catch = [400, 401, 500, 502, 503, 504, 521] + catch_also
    while True:
        try:
            request = mkrequest(url, headers=headers, post_data=post_data)
            response = opener(request)
            requests_remaining = response.headers.get('x-ratelimit-remaining')
            requests_used = response.headers.get('x-ratelimit-used')
            requests_reset = response.headers.get('x-ratelimit-reset')
            if requests_remaining and requests_reset:
                remaining = float(requests_remaining)
                reset = int(requests_reset)
                if remaining < reset:
                    time.sleep(float(reset - remaining) /
                               remaining)
            j = json.loads(response.read().decode('utf-8'))
            if not raw:
                j = j['data']
            break
        except (ValueError, KeyError,
                socket.timeout, socket.error,
                ssl.SSLError,
                urllib.error.HTTPError, urllib.error.URLError) as e:
            print('<' + type(e).__name__ + ': ', end=' ')
            if type(e).__name__ == 'HTTPError':
                print(str(e.code) + '!>', end=' ')
                if e.code not in catch:
                    raise
                elif e.code == 401:
                    print("(Token expired)", end=' ')
                    auth = login()
                    headers = auth
            elif type(e).__name__ == 'URLError':
                if "handshake operation" not in str(e.reason):
                    raise
            else:
                print('!>', end=' ')
            sys.stdout.flush()
            time.sleep(5)
        except Exception as e:
            print(file=stderr)
            print("ERROR!", file=stderr)
            print(file=stderr)
            errordir(e)
            raise
    return j


def fetch(query, lastseen, model, kind, catch_also=[]):
    pagecount = 1
    newest = 0
    after = ''

    class Done(Exception):
        pass
    try:
        while True:
            if pagecount % 10 == 0 and kind != 'noupdate':
                print('u(', end=' ')
                sys.stdout.flush()
                for post in fetch_posts_since(kind, lastcheck=newest, quiet=True):
                    yield post
                print(')', end=' ')
                sys.stdout.flush()

            print('p'+str(pagecount), end=' ')
            sys.stdout.flush()

            response = reddit(query+after, headers=auth, catch_also=catch_also)

            for child in [c['data'] for c in response['children']]:

                if child['created_utc'] <= lastseen or \
                   current_epoch() - child['created_utc'] > DAY * 7:
                    raise Done
                if child['author'] in IGNORE:
                    continue

                fields = {}
                for field in model:
                    fieldtype = field[0]
                    fieldname = field[1]
                    if fieldtype is bool:
                        fields[fieldname] = child[fieldname] and 1 or 0
                    else:
                        fields[fieldname] = fieldtype(child[fieldname])
                if fields['created_utc'] > newest:
                        newest = fields['created_utc']

                yield fields

            if not response['after']:
                raise Done
            else:
                after = '&after='+response['after']
            pagecount += 1
    except Done:
        #print(sum([len(field) for post in log for field in post if type(field) is str]))
        #sys.stdout.flush()
        #return tuple(sorted(list(log), key=lambda post: post[3], reverse=True))
        pass

def fetch_posts_since(kind, lastcheck=0, quiet=False):
    url = 'https://oauth.reddit.com/user/removalbot/m/monitor/' + \
            (kind if kind == 'comments' else 'new') + \
            '/.json?sort=new&limit=100'
    count = 0
    if not quiet:
        start = current_epoch()
        print("Reading " + kind + " -", end=' ')
        sys.stdout.flush()
    for post in fetch(url,
                        lastcheck,
                        MODELS[kind],
                        kind,
                        catch_also=[403]):
        count += 1
        yield post
    if not quiet:
        print("- "+str(count)+" "+kind+" new")
        print("Read in " + str(current_epoch() - start))


def model_to_tuple(item, model):
    tup = ()
    for field in model:
        tup += (item[field[1]],)
    return tup


def insert_tuple(post, kind):
    c.execute('INSERT OR IGNORE INTO ' + kind + ' VALUES ' +
                ('(' + ','.join(['?'] * len(post)) + ')'),
              post)


def get_new_of(kind):
    lastcheck = getv('SELECT MAX(created_utc) FROM ' + kind)
    for post in fetch_posts_since(kind, lastcheck=lastcheck):
        post = model_to_tuple(post, MODELS[kind])
        insert_tuple(post, kind)
    db.commit()


def get_new():
    print("> Checking for new posts,", end=' ')
    nextdelcheck = str(DELETION_SPACING - int(current_epoch() -
                       last_deletion_check * DELETION_SPACING))
    print("next comparison in "+nextdelcheck+"s                               ")
    get_new_of('comments')
    get_new_of('submissions')


def recurse_into_get_authors(subthread):
    subthread = subthread['data']['children']
    notify = []
    for post in subthread:
        post = post['data']
        if post['author'] != '[deleted]':
            notify += [post['author']]
        if 'replies' in list(post.keys()) and post['replies']:
            notify += recurse_into_get_authors(post['replies'])
    return notify


def update_notify():
    global notify
    print("> Updating users to notify", end=' ')
    sys.stdout.flush()
    notify = []
    for thread in NOTIFY_THREADS:
        thread = 'https://oauth.reddit.com/r/removalbot/comments/'+thread
        pm_thread = reddit(thread+'.json',
                           headers=auth, raw=True)
        notify += recurse_into_get_authors(pm_thread[1])
    notify += [maintainer]
    notify = list(set(notify))
    c.execute('DELETE FROM notify')
    for user in notify:
        c.execute('INSERT INTO notify VALUES (?)', (user,))
    db.commit()
    print("- done (" + str(len(notify)) + " users: " + ", ".join(notify) + ")")


def update_flairs():
    print("> Fetching flairs", end=' ')
    sys.stdout.flush()
    latestposturl = 'https://oauth.reddit.com/r/removalbot/new/.json?limit=1'
    latestpostname = reddit(latestposturl, headers=auth)
    latestpostname = latestpostname['children'][0]['data']['name']
    flairurl = 'https://oauth.reddit.com/r/removalbot/api/flairselector'
    post_data = {'link': latestpostname}
    flairchoices = reddit(flairurl,
                          post_data=post_data, headers=auth,
                          raw=True)['choices']
    c.execute('DELETE FROM flairs')
    for flair in flairchoices:
        c.execute('INSERT INTO flairs VALUES (?,?)',
                  (flair['flair_text'], flair['flair_template_id']))
    db.commit()
    print("- done")


def get_fullname_new(response):
    response = response['jquery']
    is_redirect = False
    fullname_new = None
    for line in response:
        if is_redirect:
            fullname_new = 't3_'+line[3][0].split('/')[-3]
            break
        if line[2] == 'attr' and line[3] == 'redirect':
            is_redirect = True
    if fullname_new:
        return fullname_new
    else:
        print(response)
        raise FatalError("Malformed response from reddit")


def setflair(fullname, text):
    fid = getv('SELECT flair_template_id FROM flairs WHERE name=?', (text,))
    if not fid:
        return False
    post_data = {'link': fullname,
                 'api_type': 'json',
                 'flair_template_id': fid}
    if not DUMMY:
        reddit('https://oauth.reddit.com/r/removalbot/api/selectflair',
               post_data=post_data, headers=auth, raw=True)
    return True


def send_pm(to, subject, text):
    post_data = {'api_type': 'json',
                 'subject': subject,
                 'text': text,
                 'to': to}
    print("Sending PM '"+subject+"' to /u/"+to, end=' ')
    sys.stdout.flush()
    if not DUMMY:
        reddit('https://oauth.reddit.com/api/compose',
               post_data=post_data, headers=auth, raw=True)
    print("- sent")


#def timestring(span):
#    spanstring = ''
#    times = ((60.0, 'min'), (60.0, 'h'), (24.0, 'd'))
#    for t in times:
#        span /= t[0]
#        if span > 5:
#            spanstring = str(round(span, 1)) + t[1]
#    return spanstring


def compare_update(kind, newposts):
    c.execute('CREATE TABLE IF NOT EXISTS ' + kind + '_new \
                AS SELECT * FROM ' + kind + ' WHERE 0')

    for post in newposts:
        post = model_to_tuple(post, MODELS[kind])
        insert_tuple(post, kind + '_new')

    q = '''DELETE FROM ''' + kind + ''' WHERE name IN (
             SELECT name FROM (
               SELECT subreddit, MIN(created_utc) AS mintime
               FROM ''' + kind + '''_new GROUP BY subreddit
             ) AS mintimes INNER JOIN ''' + kind + '''
             ON ''' + kind + '''.subreddit = mintimes.subreddit
             AND ''' + kind + '''.created_utc < mintimes.mintime
           )'''

    oldcount = getv('SELECT COUNT(name) FROM ' + kind)
    c.execute(q)
    newcount = getv('SELECT COUNT(name) FROM ' + kind)
    fell = oldcount - newcount

    q = '''INSERT OR IGNORE INTO ''' + kind + '''_deleted
             SELECT *, STRFTIME("%s", "now") FROM ''' + kind + '''
             WHERE name NOT IN (
               SELECT name FROM ''' + kind + '''_new
             )'''

    c.execute(q)
    if DEBUG:
        deleted = tuple(c.execute('SELECT name FROM ' + kind + '_deleted'))
        print("Deleted " + kind + ": " + str([item[0] for item in deleted]))
    deleted = getv('SELECT COUNT(*) FROM ' + kind + '_deleted')

    q = '''SELECT COUNT(*) FROM (
             SELECT subreddit, MAX(created_utc) AS maxtime
             FROM ''' + kind + ''' GROUP BY subreddit
           ) AS maxtimes INNER JOIN ''' + kind + '''_new
           ON ''' + kind + '''_new.subreddit = maxtimes.subreddit
           AND ''' + kind + '''_new.created_utc > maxtimes.maxtime'''
    new = getv(q)

    c.execute('DROP TABLE ' + kind)
    c.execute('ALTER TABLE ' + kind + '_new RENAME TO ' + kind)

    db.commit()

    total = getv('SELECT COUNT(name) FROM ' + kind)

    print(str(deleted) + ' ' + kind + ' deleted,', end=' ')
    print(str(new) + ' new,', end=' ')
    print(str(fell) + ' fell out -', end=' ')
    print(str(total) + ' new total')


def check_deletions():

    print("> Checking for deletions at " + str(int(current_epoch())) + "                   ")

    compare_update('comments', fetch_posts_since('comments', 0))
    compare_update('submissions', fetch_posts_since('submissions', 0))

def check_user_deletion(post, kind):
    safe_domains = readfile(conf('safedomains')).strip().split()
    u = readfile(conf('unknowndomains')).strip().split('\n')
    unknown_domains = {}
    for d in u:
        if not d:
            continue
        d = d.strip().split()
        unknown_domains[d[0]] = int(d[1])

    print("> Checking for user deletion of " + kind + " " + post[0] + \
        (" from " + post[4] if kind == 'comment' else '') + \
        " in " + post[1] + ",", end=' ')
    left = getv('SELECT COUNT(*) FROM (SELECT name FROM comments_deleted \
                     UNION SELECT name FROM submissions_deleted)')-1
    print(str(left or 'no') + " more left to check -", end=' ')
    sys.stdout.flush()

    spotted = post[-1]

    name = post[0].split('_')[1]
    sub = post[1]
    author = post[2]
    posted = post[3]

    compare = tuple(comment[0] for comment in
                    tuple(c.execute('SELECT created_utc FROM ' + kind + 's \
                                    WHERE subreddit=? \
                                    ORDER BY created_utc ASC', (sub,))))
    mincompare = min(compare) if compare else current_epoch() - DAY*7
    compare = compare[-int(len(compare)*0.95)] if compare \
        else current_epoch() - DAY*7

    print(str(round((current_epoch() - posted) / HOUR, 2)) + "h vs cutoff " + \
        str(round((current_epoch() - compare) / HOUR, 2)) + "h, oldest " + \
        str(round((current_epoch() - mincompare) / HOUR, 2)) + "h -", end=' ')
    sys.stdout.flush()
    if posted <= compare:
        print("too old")
        return False

    title = ''
    if kind == 'comment':
        link_id = post[4].split('_')[1]
        content = post[5]
        baseurl = 'https://oauth.reddit.com/user/'+author
        usercomments = {}
        shadowbanned = False
        try:
            usercomments = fetch(baseurl+'/comments/.json?sort=new&limit=100',
                                 posted - 30,
                                 ((str, 'name'),
                                  (str, 'subreddit'),
                                  (str, 'author'),
                                  (float, 'created_utc'),
                                  (str, 'link_id'),
                                  (str, 'body')),
                                 kind='noupdate')
        except urllib.error.HTTPError as e:
            if e.code in [403, 404]:
                print("- " + author + " shadowbanned or deleted", end=' ')
                url = 'https://oauth.reddit.com/r/' + sub + '/comments/' + \
                      link_id + '/comment/' + name + '/.json'
                try:
                    reddit(url, headers=auth, raw=True)
                    print("- deleted")
                    return False
                except urllib.error.HTTPError as e:
                    if e.code in [403, 404]:
                        print("- shadowbanned", end=' ')
                        shadowbanned = True
                    else:
                        raise
            else:
                raise
        usercomments = {comment['name']: model_to_tuple(comment, MODELS['comments'])
                        for comment in usercomments}
        if post[0] not in list(usercomments.keys()) and not shadowbanned:
            print("- deleted by "+author+" ("+((content[:47]+"...")
                                               if len(content) > 50
                                               else content).replace('\n',
                                                                     ' / ')+")")
            return False
        else:
            if not shadowbanned:
                post = usercomments[post[0]]
                name = post[0].split('_')[1]
                sub = post[1]
                author = post[2]
                posted = post[3]
                link_id = post[4].split('_')[1]
                content = post[5]
            print("- deleted by mods")
            title1 = str(epoch_to_string(short=True) + " - '")
            title2 = str("' by /u/" + author +
                             " removed from /r/" + sub)
            lower_frame_boundary = spotted - DELETION_SPACING
            upper_frame_boundary = spotted
            frame_lower = round((float(lower_frame_boundary) - posted) / 60)
            if frame_lower < 0:
                frame_lower = 0
            frame_upper = round((float(upper_frame_boundary) - posted) / 60)
            if frame_lower != frame_upper:
                frame = str(int(frame_lower)) + "-" + str(int(frame_upper))
            else:
                frame = str(int(frame_lower))
            title2 += " within " + frame + "min"
            if shadowbanned:
                title2 += " (user shadowbanned)"
            restlen = ALLOWED_TITLE_LENGTH - (len(title1) + len(title2))
            intro = re.sub(r'&gt;.*\n', '[quote]', content)

            intro = re.sub(r'\[([^\]]*)\]\([^\)]*\)', r'[\1]', intro)
            intro = re.sub(URLREGEX, '[link]', intro)

            intro = intro.replace('/r/', 'r/')
            intro = intro.replace('/u/', 'u/')
            intro = re.sub(r' +', ' ', intro)
            intro = re.sub(r'[ \n/][ \n/]+', ' / ', intro)
            intro = intro.strip(' \n/')

            links = []
            for url in re.finditer(r'\[([^\]]*)\][ \n]?\(([^\)]*)\)', content):
                links += [url.group(2)]
                content = content.replace(url.group(0),
                                          '[' + url.group(1) + ']^^' +
                                          str(len(links)) + ' ')
            for url in re.finditer(URLREGEX, content):
                links += [url.group(0)]
                content = content.replace(url.group(0),
                                          '[link]^^' + str(len(links)) + ' ')

            if len(intro) > restlen:
                intro = str(intro[:restlen-3].strip(' ./,') + "...")
            else:
                intro = str(intro)
            title = title1 + intro + title2
            title = title[:ALLOWED_TITLE_LENGTH]
            body = "'''\n\n"+content+"\n\n'''\n\n"
            if post[4] == 'None':
                body = "No link could be determined."
                link = "Unknown"
            else:
                linkbase = "/r/" + sub + "/comments/" + link_id + \
                           "/comment/" + name + "?context=999"
                link = "https://reddit.com" + linkbase
                goldfishlink = "http://r.go1dfish.me" + linkbase
                unredditlink = "https://unreddit.com" + linkbase
                body += "[Context Link](" + link + ")\n\n"
                body += "[Go1dfish undelete link](" + goldfishlink + ")\n\n"
                body += "[unreddit undelete link](" + unredditlink + ")"
            body += "\n\nAuthor: /u/" + author
            if links:
                body += "\n\n"
                unknowns = False
                for l in range(len(links)):
                    try:
                        domain = tld.get_tld(links[l])
                    except tld.exceptions.TldBadUrl:
                        domain = 'reddit.com'
                    except (tld.exceptions.TldDomainNotFound, ValueError):
                        domain = 'malformed.domain'
                        print("Malformed domain: " + links[l])
                    if domain in safe_domains:
                        body += str(l+1) + ': ' + links[l] + '  \n'
                    else:
                        unknowns = True
                        if domain not in DOMAIN_BLACKLIST:
                            if domain in list(unknown_domains.keys()):
                                unknown_domains[domain] += 1
                            else:
                                unknown_domains[domain] = 1
                            with closing(open(conf('unknowndomains'),
                                         'w')) as f:
                                for d in unknown_domains:
                                    f.write(d+' '+str(unknown_domains[d])+'\n')
                        oblink = re.sub(r'.*://', '', links[l])
                        if domain != "maldormed.domain":
                            oblink = censor(oblink, 0.25)
                        body += str(l+1) + ': `' + oblink + '`  \n'
                if unknowns:
                    body += "\nUnknown links are censored to prevent \
                        spreading illicit content."
            print(title, end=' ')

    elif kind == 'submission':
        reason = post[4]
        subject = post[5]
        domain = post[6]
        selftext = post[7]
        baseurl = 'https://oauth.reddit.com/user/'+author
        shadowbanned = False
        usersubmissions = {}
        try:
            usersubmissions = fetch(baseurl +
                                    '/submitted/.json?sort=new&limit=100',
                                    posted - 30,
                                    ((str, 'name'),
                                     (str, 'subreddit'),
                                     (str, 'author'),
                                     (float, 'created_utc'),
                                     (str, 'link_flair_text'),
                                     (str, 'title'),
                                     (str, 'domain'),
                                     (str, 'selftext')),
                                    kind='noupdate')
        except urllib.error.HTTPError as e:
            if e.code in [403, 404]:
                print("- " + author + " shadowbanned or deleted", end=' ')
                url = 'https://oauth.reddit.com/r/' + sub + '/comments/' + \
                      name + '/.json'
                try:
                    reddit(url, headers=auth, raw=True)
                    print("- deleted")
                    return False
                except urllib.error.HTTPError as e:
                    if e.code in [403, 404]:
                        print("- shadowbanned", end=' ')
                        shadowbanned = True
                    else:
                        raise
            else:
                raise
        usersubmissions = {submission['name']: model_to_tuple(submission, MODELS['submissions'])
                           for submission in usersubmissions}
        if post[0] not in list(usersubmissions.keys()) and not shadowbanned:
            print("- deleted by "+author+" ("+((subject[:47]+"...")
                                               if len(subject) > 50
                                               else subject)+")")
            return False
        else:
            reason = 'None'
            if not shadowbanned and not post[7]:
                post = usersubmissions[post[0]]
                name = post[0].split('_')[1]
                sub = post[1]
                author = post[2]
                posted = post[3]
                reason = post[4]
                subject = post[5]
                domain = post[6]
                selftext = post[7]
            reason = reason.lower()
            if 'removed' in reason:
                reason = reason.replace('removed', '').strip(' -|—')
                reason = reason.capitalize() or 'None'
                reason = " - reason: "+reason
            else:
                reason = ''
            print("- deleted by mods" + reason)
            lower_frame_boundary = spotted - DELETION_SPACING
            upper_frame_boundary = spotted
            frame_lower = round((float(lower_frame_boundary) - posted) / 60)
            if frame_lower < 0:
                frame_lower = 0
            frame_upper = round((float(upper_frame_boundary) - posted) / 60)
            if frame_lower != frame_upper:
                frame = str(int(frame_lower)) + "-" + str(int(frame_upper))
            else:
                frame = str(int(frame_lower))
            title1 = str(epoch_to_string(short=True) + " - '")
            title2 = str("' (" + domain + ") by /u/" + author +
                             " removed from /r/" + sub +
                             " within " + frame + "min" + reason)
            if shadowbanned:
                title2 += " (user shadowbanned)"
            restlen = ALLOWED_TITLE_LENGTH - (len(title1) + len(title2))
            if len(subject) > restlen:
                intro = str(subject[:restlen-3].strip(' ./') + "...")
            else:
                intro = str(subject)
            title = title1 + intro + title2
            title = title[:ALLOWED_TITLE_LENGTH]
            linkbase = "/r/" + sub + "/comments/" + name
            link = "https://reddit.com/" + linkbase
            goldfishlink = "http://r.go1dfish.me" + linkbase
            unredditlink = "https://unreddit.com" + linkbase
            body = ""
            links = []
            if selftext:
                links = []
                for url in re.finditer(r'\[([^\]]*)\][ \n]?\(([^\)]*)\)',
                                       selftext):
                    links += [url.group(2)]
                    selftext = selftext.replace(url.group(0),
                                                '[' + url.group(1) + ']^^' +
                                                str(len(links)) + ' ')
                for url in re.finditer(URLREGEX, selftext):
                    links += [url.group(0)]
                    selftext = selftext.replace(url.group(0),
                                                '[link]^^' +
                                                str(len(links)) + ' ')
                body = "'''\n\n" + selftext + "\n\n'''\n\n"
            body += "[" + subject + "](" + link + ")\n\n"
            body += "[Go1dfish undelete link](" + goldfishlink + ")\n\n"
            body += "[unreddit undelete link](" + unredditlink + ")\n\n"
            body += "Author: /u/" + author
            if links:
                body += "\n\n"
                unknowns = False
                for l in range(len(links)):
                    try:
                        domain = tld.get_tld(links[l])
                    except tld.exceptions.TldBadUrl:
                        domain = 'reddit.com'
                    except (tld.exceptions.TldDomainNotFound, ValueError):
                        domain = 'malformed.domain'
                        print("Malformed domain: " + links[l])
                    if domain in safe_domains:
                        body += str(l+1) + ': ' + links[l] + '  \n'
                    else:
                        unknowns = True
                        if domain not in DOMAIN_BLACKLIST:
                            if domain in list(unknown_domains.keys()):
                                unknown_domains[domain] += 1
                            else:
                                unknown_domains[domain] = 1
                            with closing(open(conf('unknowndomains'),
                                         'w')) as f:
                                for d in unknown_domains:
                                    f.write(d+' '+str(unknown_domains[d])+'\n')
                        oblink = re.sub(r'.*://', '', links[l])
                        if domain != "maldormed.domain":
                            oblink = censor(oblink, 0.25)
                        body += str(l+1) + ': `' + oblink + '`  \n'
                if unknowns:
                    body += "\nUnknown links are censored to prevent \
                        spreading illicit content."
            print(title, end=' ')

    h = HTMLParser()
    title = h.unescape(title)
    body = h.unescape(body)
    if len(body) > 40000:
        body = body[:39900] + '[... post size limit of 40,000 characters reached]'
    post_data = {'sr': 'removalbot', 'title': title,
                 'kind': 'self', 'text': body}
    if not DUMMY:
        response = reddit('https://oauth.reddit.com/api/submit',
                          post_data=post_data, headers=auth, raw=True)
    print("- submitted", end=' ')
    sys.stdout.flush()
    if not DUMMY:
        fullname_new = get_fullname_new(response)
        if setflair(fullname_new, kind+'-'+sub.lower()):
            print("- flaired")
        else:
            print("- no flair")

    if not shadowbanned and getv('SELECT author FROM notify WHERE author=?',
                                 (author,)):
        pm_subject = "Your "+kind+" was deleted from /r/"+sub
        if kind == 'comment':
            content = content.strip(' \n')
            if '\n\n' in content:
                content = '\n\n'+content+'\n\n'
        else:
            content = '  \n&nbsp;**Reason**: '+str(reason)
        content = h.unescape(content)
        pm_body = 'Hello, **'+author+'**!\n\n&nbsp;\n\n'
        pm_body += 'Your '+kind+' appears to have been deleted '
        pm_body += 'from **/r/' + sub + '** by the moderators, '
        pm_body += '/u/AutoModerator or the administrators.\n\n'
        pm_body += '&nbsp;**'+kind.capitalize()+'**: '+content+'  \n'
        pm_body += '&nbsp;**Posted at**: '+epoch_to_string(posted)+'  \n'
        pm_body += '&nbsp;**Delay until deletion**: '+frame+'min  \n'
        pm_body += '&nbsp;**Link**: '+str(link)+'\n\n&nbsp;\n\n'
        pm_body += 'Have a nice day!  \n'
        pm_body += '/u/removalbot\n\n'
        pm_body += '----\n\n'
        pm_body += '^(Note that the deletion may have been accidental '
        pm_body += 'or its detection a false positive caused by heavy load '
        pm_body += 'on reddit\'s servers.)  \n'
        pm_body += '^^^This ^^^is ^^^an ^^^automated ^^^message ^^^from '
        pm_body += '^^^/r/removalbot.'
        send_pm(author, pm_subject, pm_body)

    return True

requests_used = None
requests_remaining = None
requests_reset = None

maintainer = readfile(conf('maintainer')).strip()
version = readfile(conf('version')).strip()
USERAGENT = 'removalbot by /u/'+maintainer+', v'+version

os.chdir(CONFDIR)

db = sqlite3.connect(conf('posts.db'))
c = db.cursor()

auth = login()

update_flairs()

if os.path.isfile(conf('error')) and \
   readfile(conf('error')) != 'KeyboardInterrupt':
    failed = os.path.getctime(conf('error'))
    failed = epoch_to_string(epoch=failed)
    reason = readfile(conf('error'))
    send_pm(maintainer, "REMOVALBOT CRASHED", "Reason: "+reason)
    e = reason.split(':')[0]
    print(e, e in ALSO_FATAL)
    if not DUMMY and (SUBMIT_ERRORS or e == "FatalError" or e in ALSO_FATAL):
        print("> Submitting error "+reason, end=' ')
        sys.stdout.flush()
        title = "[!] Bot encountered an error at " + failed + \
            ", reason: " + reason
        post_data = {'sr': 'removalbot', 'kind': 'self', 'title': title}
        if reason != "FatalError":
            post_data['text'] = "Such errors usually indicate that reddit is \
                                 overloaded or in maintenance mode, i. e. \
                                 they are unavoidable.\n\nA database is used \
                                 to minimize the impact but cannot negate it."
        else:
            post_data['text'] = "The bot encountered a fatal error. This \
                                 should not happen. The maintainer has been \
                                 notified; until further action, the bot is \
                                 suspended."
        response = reddit('https://oauth.reddit.com/api/submit',
                          post_data=post_data, headers=auth, raw=True)
        setflair(get_fullname_new(response), 'error')
        print("- done")
    else:
        print("> Encountered error "+reason)
    os.remove(conf('error'))

update_notify()

while True:
    now = int(current_epoch())
    last_deletion_check = getlast('deletion')
    last_new_check = getlast('new')
    last_subscriber_check = getlast('subscribers')
    last_flair_check = getlast('flair')
    changed = False
    if int(now / DELETION_SPACING) != last_deletion_check:
        check_deletions()
        setlast('deletion', int(now / DELETION_SPACING))
    elif int(now / NEW_SPACING) != last_new_check:
        get_new()
        setlast('new', int(now / NEW_SPACING))
    elif int(now / SUBSCRIBER_SPACING) != last_subscriber_check:
        update_notify()
        setlast('subscribers', int(now / SUBSCRIBER_SPACING))
    elif int(now / FLAIR_SPACING) != last_flair_check:
        update_flairs()
        setlast('flair', int(now / FLAIR_SPACING))
    else:
        deleted_submission = \
            c.execute('SELECT * FROM submissions_deleted LIMIT 1').fetchone()
        deleted_comment = \
            c.execute('SELECT * FROM comments_deleted LIMIT 1').fetchone()
        left = getv('SELECT COUNT(*) FROM (SELECT name FROM comments_deleted \
                 UNION SELECT name FROM submissions_deleted)')-1
        if deleted_submission:
            check_user_deletion(deleted_submission, 'submission')
            c.execute('DELETE FROM submissions_deleted WHERE name=?',
                      (deleted_submission[0],))
        elif deleted_comment:
            check_user_deletion(deleted_comment, 'comment')
            c.execute('DELETE FROM comments_deleted WHERE name=?',
                      (deleted_comment[0],))
        c.execute('PRAGMA shrink_memory')
        if not deleted_submission and not deleted_comment:
            time.sleep(0.1)
            continue
        elif left == 0:
            c_unknown = readfile(conf('unknowndomains')).strip().split('\n')
            print("> Done undeleting, " + str(len(c_unknown)) + \
                " unknown domains logged")

    print()
    if requests_remaining and requests_reset and requests_used:
        requests_remaining = str(int(float(requests_remaining)))
        print("Used: " + requests_used + ", remaining: " + \
              requests_remaining + " in " + requests_reset + "s", end=' ')
        remaining = float(requests_remaining)
        reset = int(requests_reset)
        print("(" + str(round(remaining / reset, 2)) + "/s)", end=' ')
        if remaining < reset:
            sleeptime = str(int(round(float(reset - remaining) /
                                remaining, 3) * 1000))
            print("- OVERLOAD, sleep " + sleeptime + "ms")
        else:
            print("- OK\r", end=' ')
        sys.stdout.flush()