Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- # -*- coding: utf-8 -*-
- """
- USAGE: Set up as described below, then just run the script somewhere in the
- background, screen works nicely for this.
- Non-standard dependencies: sqlite3, tld
- It is recommended to wrap this in the following script:
- #!/bin/bash
- RET=1
- while [ $RET -eq 1 ]
- do
- removalbot
- RET=$?
- done
- Ctrl+C is handled as success, expected failures have exit code 2.
- Likely programming errors have exit code 3.
- Files:
- - CONFDIR, defined below
- - CONFDIR/maintainer, your reddit username
- - CONFDIT/secrets in the format "user\npass\nclient_id\bclient_secret"
- - CONFDIR/version
- - CONFDIR/safedomains, empty or newline-separated domains
- - CONFDIR/unknowndomains, empty
- - CONFDIR/posts.db, sqlite3 database
- Be aware that you need to set up oauth to use this and gather the client ID and secret for the login process.
- DB schema:
- CREATE TABLE notify
- (author TEXT UNIQUE);
- CREATE TABLE comments
- (name TEXT UNIQUE PRIMARY KEY,
- subreddit TEXT,
- author TEXT,
- created_utc INTEGER,
- link_id TEXT,
- body TEXT);
- CREATE TABLE comments_deleted
- (name TEXT UNIQUE PRIMARY KEY,
- subreddit TEXT,
- author TEXT,
- created_utc INTEGER,
- link_id TEXT,
- body TEXT,
- spotted INTEGER);
- CREATE TABLE flairs
- (name TEXT UNIQUE PRIMARY KEY,
- flair_template_id TEXT UNIQUE);
- CREATE TABLE times
- (name TEXT UNIQUE PRIMARY KEY,
- time INTEGER);
- CREATE TABLE submissions
- (name TEXT UNIQUE PRIMARY KEY,
- subreddit TEXT,
- author TEXT,
- created_utc INTEGER,
- link_flair_text TEXT,
- title TEXT,
- domain TEXT,
- selftext TEXT);
- CREATE TABLE submissions_deleted
- (name TEXT UNIQUE PRIMARY KEY,
- subreddit TEXT,
- author TEXT,
- created_utc INTEGER,
- link_flair_text TEXT,
- title TEXT,
- domain TEXT,
- selftext TEXT,
- spotted INTEGER);
- # TODO: shadowbanned vs deleted users
- # TODO: log.db
- # TODO: deletion.db
- """
- LOGGING = True
- DUMMY = False
- SUBMIT_ERRORS = False
- DEBUG = True
- import os
- import sys
- import time
- import json
- import urllib.request, urllib.parse, urllib.error
- import ssl
- import socket
- import re
- import traceback
- import sqlite3
- import tld
- from random import sample
- from datetime import datetime
- from contextlib import closing
- from sys import stderr
- from html.parser import HTMLParser
- MINUTE = 60
- HOUR = MINUTE * 60
- DAY = HOUR * 24
- socket.setdefaulttimeout(10)
- NOTIFY_THREADS = ['3rmc4v']
- # all in seconds
- NEW_SPACING = 10
- DELETION_SPACING = 10 * MINUTE
- FLAIR_SPACING = 24 * HOUR
- SUBSCRIBER_SPACING = 12 * HOUR
- ALLOWED_TITLE_LENGTH = 300
- INTROLEN = 100
- CONFDIR = '/etc/removalbot'
- PIDFILE = "/tmp/removalbot.pid"
- LOGDIR = os.path.join(CONFDIR, 'log')
- IGNORE = ["godwins_law_bot", "totes_meta_bot", "redditbots", "ttumblrbots",
- "autowikibot", "SRScreenshot", "MRSPArchiver", "AutoModerator",
- "image_linker_bot", "SmallSubBot", "autourbanbot",
- "note-to-self-bot", "ObamaRobot", "TotesMessenger",
- "TweetsInCommentsBot", "TweetPoster", "JoeBidenBot",
- "smilesbot", "DailMail_Bot", "TrollaBot", "TotesHuman",
- "youtubefactsbot", "imgurtranscriber", "isreactionary_bot",
- "iscuck_bot", "author", "reginaldtato", "NotTheOnionBot",
- "rSGSpolice", "hwsbot", "yes_it_is_weird", "r_PictureGame",
- "prairiechicken2", "domoarigatobtfcboto", "SkydivingHaylz",
- "I_Like_Spaghetti", "STEALTHM0UNTAIN", "Google_Panda",
- "AakashMasani", "Forestl", "lurkattwork", "drgoku282",
- "texasmommie", "Really_Like_Pancakes", "BlaineWolfe",
- "Blassie098", "ghort98765", "GustavoFrings", "WritingPromptsRobot",
- "sontato", "ramsesniblick3rd", "300BlackoutSober",
- "flair_your_post_bot", "GoomyTooOP", "arbutus_", "foamed",
- "DumbCollegeStudent", "[deleted]", "GOTradeRuleBot",
- "ShadowBanCheckBot", "ShadowBannedBot", "Shiny_Sylveon",
- "PaidBot", "xbamsod", "enriquepaz13", "Moskau50", "PornOverlord",
- "ConvertsToMetric", "removalbot"]
- URLREGEX = r'''(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9
- .\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(
- ([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))'''
- DOMAIN_BLACKLIST = ["malformed.domain", "goo.gl", "tinyurl.com"]
- COMMENTMODEL = ((str, 'name'),
- (str, 'subreddit'),
- (str, 'author'),
- (float, 'created_utc'),
- (str, 'link_id'),
- (str, 'body'))
- SUBMISSIONMODEL = ((str, 'name'),
- (str, 'subreddit'),
- (str, 'author'),
- (float, 'created_utc'),
- (str, 'link_flair_text'),
- (str, 'title'),
- (str, 'domain'),
- (str, 'selftext'))
- MODELS = { 'comments': COMMENTMODEL, 'submissions': SUBMISSIONMODEL }
- os.environ['TZ'] = 'UTC'
- time.tzset()
- pid = str(os.getpid())
- with open(PIDFILE, 'w') as f:
- f.write(pid)
- ALSO_FATAL = ['SyntaxError', 'NameError', 'IndexError', 'TypeError',
- 'ValueError', 'UnboundLocalError']
- def errorhook(extype, value, trace):
- os.unlink(PIDFILE)
- print(traceback.print_exception(extype, value, trace))
- if extype.__name__ == "KeyboardInterrupt":
- exit(0)
- elif extype.__name__ == "FatalError":
- send_pm(maintainer, "REMOVALBOT CRASHED", "Reason: " +
- extype.__name__+": " + value.message)
- writefile(conf('error'), extype.__name__+": " + value.message)
- exit(2)
- elif extype.__name__ in ALSO_FATAL:
- send_pm(maintainer, "REMOVALBOT CRASHED", "Reason: " +
- extype.__name__+": " + value.message)
- writefile(conf('error'), extype.__name__+": " + value.message)
- exit(3)
- sys.excepthook = errorhook
- class FatalError(Exception):
- def __init__(self, message):
- self.message = message
- Exception.__init__(self, message)
- def current_epoch():
- return (datetime.now() - datetime.utcfromtimestamp(0)).total_seconds()
- def epoch_to_string(epoch=None, tech=False, short=False):
- if epoch is None:
- epoch = current_epoch()
- try:
- epoch = float(epoch)
- except:
- epoch = 0
- if tech:
- model = "%y%m%d-%H%M"
- elif short:
- model = "%m-%d %H:%M"
- else:
- model = "%Y-%m-%d %H:%M %Z"
- return time.strftime(model, time.localtime(epoch))
- def conf(name):
- return str(os.path.join(CONFDIR, name))
- def newlog(name):
- if LOGGING:
- name = str(os.path.join(LOGDIR, name))
- with closing(open(name, 'w')):
- pass
- return name
- else:
- return '/dev/null'
- def readfile(f):
- with closing(open(f)) as f:
- return f.read()
- def writefile(f, data):
- with closing(open(f, 'w')) as f:
- f.write(data)
- def censor(s, fraction):
- num = int(round(fraction * len(s)))
- change_locs = set(sample(list(range(len(s))), num))
- changed = ('*' if i in change_locs else c for i, c in enumerate(s))
- return ''.join(changed)
- def getv(query, args=()):
- return (c.execute(query, args).fetchone() or (None,))[0]
- def getlast(what):
- return getv('SELECT time FROM times WHERE name=?', (what,))
- def setlast(what, utc):
- c.execute('INSERT OR REPLACE INTO times VALUES (?, ?)', (what, utc))
- db.commit()
- def login():
- print("> Logging in ", end=' ')
- sys.stdout.flush()
- secrets = readfile(conf('secrets')).split()
- username = secrets[0]
- password = secrets[1]
- client_id = secrets[2]
- client_secret = secrets[3]
- post_data = {"grant_type": "password",
- "username": username,
- "password": password}
- headers = {"User-Agent": USERAGENT}
- url = "https://www.reddit.com/api/v1/access_token"
- password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
- password_mgr.add_password(None, url, client_id, client_secret)
- handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
- opener = urllib.request.build_opener(handler)
- response = reddit(url, opener=opener.open,
- post_data=post_data, headers=headers,
- raw=True)
- if not ('token_type' in list(response.keys()) and
- 'access_token' in list(response.keys())):
- print(response, file=stderr)
- raise FatalError("Authorization Failed")
- token = response['token_type']+" "+response['access_token']
- print("- done")
- return {"Authorization": token, "User-Agent": USERAGENT}
- def mkrequest(url, headers=None, post_data=None):
- if not post_data:
- return urllib.request.Request(url, None, headers)
- else:
- for k, v in post_data.items():
- if isinstance(v, str):
- v = v.encode('utf8')
- post_data[k] = v
- post_data = urllib.parse.urlencode(post_data).encode('utf-8')
- return urllib.request.Request(url, post_data, headers)
- def errordir(e):
- for attr in dir(e):
- print(attr, getattr(e, attr), file=stderr)
- def reddit(url, opener=urllib.request.urlopen,
- headers=None, post_data=None,
- raw=False, catch_also=[]):
- global auth
- global requests_used
- global requests_remaining
- global requests_reset
- catch = [400, 401, 500, 502, 503, 504, 521] + catch_also
- while True:
- try:
- request = mkrequest(url, headers=headers, post_data=post_data)
- response = opener(request)
- requests_remaining = response.headers.get('x-ratelimit-remaining')
- requests_used = response.headers.get('x-ratelimit-used')
- requests_reset = response.headers.get('x-ratelimit-reset')
- if requests_remaining and requests_reset:
- remaining = float(requests_remaining)
- reset = int(requests_reset)
- if remaining < reset:
- time.sleep(float(reset - remaining) /
- remaining)
- j = json.loads(response.read().decode('utf-8'))
- if not raw:
- j = j['data']
- break
- except (ValueError, KeyError,
- socket.timeout, socket.error,
- ssl.SSLError,
- urllib.error.HTTPError, urllib.error.URLError) as e:
- print('<' + type(e).__name__ + ': ', end=' ')
- if type(e).__name__ == 'HTTPError':
- print(str(e.code) + '!>', end=' ')
- if e.code not in catch:
- raise
- elif e.code == 401:
- print("(Token expired)", end=' ')
- auth = login()
- headers = auth
- elif type(e).__name__ == 'URLError':
- if "handshake operation" not in str(e.reason):
- raise
- else:
- print('!>', end=' ')
- sys.stdout.flush()
- time.sleep(5)
- except Exception as e:
- print(file=stderr)
- print("ERROR!", file=stderr)
- print(file=stderr)
- errordir(e)
- raise
- return j
- def fetch(query, lastseen, model, kind, catch_also=[]):
- pagecount = 1
- newest = 0
- after = ''
- class Done(Exception):
- pass
- try:
- while True:
- if pagecount % 10 == 0 and kind != 'noupdate':
- print('u(', end=' ')
- sys.stdout.flush()
- for post in fetch_posts_since(kind, lastcheck=newest, quiet=True):
- yield post
- print(')', end=' ')
- sys.stdout.flush()
- print('p'+str(pagecount), end=' ')
- sys.stdout.flush()
- response = reddit(query+after, headers=auth, catch_also=catch_also)
- for child in [c['data'] for c in response['children']]:
- if child['created_utc'] <= lastseen or \
- current_epoch() - child['created_utc'] > DAY * 7:
- raise Done
- if child['author'] in IGNORE:
- continue
- fields = {}
- for field in model:
- fieldtype = field[0]
- fieldname = field[1]
- if fieldtype is bool:
- fields[fieldname] = child[fieldname] and 1 or 0
- else:
- fields[fieldname] = fieldtype(child[fieldname])
- if fields['created_utc'] > newest:
- newest = fields['created_utc']
- yield fields
- if not response['after']:
- raise Done
- else:
- after = '&after='+response['after']
- pagecount += 1
- except Done:
- #print(sum([len(field) for post in log for field in post if type(field) is str]))
- #sys.stdout.flush()
- #return tuple(sorted(list(log), key=lambda post: post[3], reverse=True))
- pass
- def fetch_posts_since(kind, lastcheck=0, quiet=False):
- url = 'https://oauth.reddit.com/user/removalbot/m/monitor/' + \
- (kind if kind == 'comments' else 'new') + \
- '/.json?sort=new&limit=100'
- count = 0
- if not quiet:
- start = current_epoch()
- print("Reading " + kind + " -", end=' ')
- sys.stdout.flush()
- for post in fetch(url,
- lastcheck,
- MODELS[kind],
- kind,
- catch_also=[403]):
- count += 1
- yield post
- if not quiet:
- print("- "+str(count)+" "+kind+" new")
- print("Read in " + str(current_epoch() - start))
- def model_to_tuple(item, model):
- tup = ()
- for field in model:
- tup += (item[field[1]],)
- return tup
- def insert_tuple(post, kind):
- c.execute('INSERT OR IGNORE INTO ' + kind + ' VALUES ' +
- ('(' + ','.join(['?'] * len(post)) + ')'),
- post)
- def get_new_of(kind):
- lastcheck = getv('SELECT MAX(created_utc) FROM ' + kind)
- for post in fetch_posts_since(kind, lastcheck=lastcheck):
- post = model_to_tuple(post, MODELS[kind])
- insert_tuple(post, kind)
- db.commit()
- def get_new():
- print("> Checking for new posts,", end=' ')
- nextdelcheck = str(DELETION_SPACING - int(current_epoch() -
- last_deletion_check * DELETION_SPACING))
- print("next comparison in "+nextdelcheck+"s ")
- get_new_of('comments')
- get_new_of('submissions')
- def recurse_into_get_authors(subthread):
- subthread = subthread['data']['children']
- notify = []
- for post in subthread:
- post = post['data']
- if post['author'] != '[deleted]':
- notify += [post['author']]
- if 'replies' in list(post.keys()) and post['replies']:
- notify += recurse_into_get_authors(post['replies'])
- return notify
- def update_notify():
- global notify
- print("> Updating users to notify", end=' ')
- sys.stdout.flush()
- notify = []
- for thread in NOTIFY_THREADS:
- thread = 'https://oauth.reddit.com/r/removalbot/comments/'+thread
- pm_thread = reddit(thread+'.json',
- headers=auth, raw=True)
- notify += recurse_into_get_authors(pm_thread[1])
- notify += [maintainer]
- notify = list(set(notify))
- c.execute('DELETE FROM notify')
- for user in notify:
- c.execute('INSERT INTO notify VALUES (?)', (user,))
- db.commit()
- print("- done (" + str(len(notify)) + " users: " + ", ".join(notify) + ")")
- def update_flairs():
- print("> Fetching flairs", end=' ')
- sys.stdout.flush()
- latestposturl = 'https://oauth.reddit.com/r/removalbot/new/.json?limit=1'
- latestpostname = reddit(latestposturl, headers=auth)
- latestpostname = latestpostname['children'][0]['data']['name']
- flairurl = 'https://oauth.reddit.com/r/removalbot/api/flairselector'
- post_data = {'link': latestpostname}
- flairchoices = reddit(flairurl,
- post_data=post_data, headers=auth,
- raw=True)['choices']
- c.execute('DELETE FROM flairs')
- for flair in flairchoices:
- c.execute('INSERT INTO flairs VALUES (?,?)',
- (flair['flair_text'], flair['flair_template_id']))
- db.commit()
- print("- done")
- def get_fullname_new(response):
- response = response['jquery']
- is_redirect = False
- fullname_new = None
- for line in response:
- if is_redirect:
- fullname_new = 't3_'+line[3][0].split('/')[-3]
- break
- if line[2] == 'attr' and line[3] == 'redirect':
- is_redirect = True
- if fullname_new:
- return fullname_new
- else:
- print(response)
- raise FatalError("Malformed response from reddit")
- def setflair(fullname, text):
- fid = getv('SELECT flair_template_id FROM flairs WHERE name=?', (text,))
- if not fid:
- return False
- post_data = {'link': fullname,
- 'api_type': 'json',
- 'flair_template_id': fid}
- if not DUMMY:
- reddit('https://oauth.reddit.com/r/removalbot/api/selectflair',
- post_data=post_data, headers=auth, raw=True)
- return True
- def send_pm(to, subject, text):
- post_data = {'api_type': 'json',
- 'subject': subject,
- 'text': text,
- 'to': to}
- print("Sending PM '"+subject+"' to /u/"+to, end=' ')
- sys.stdout.flush()
- if not DUMMY:
- reddit('https://oauth.reddit.com/api/compose',
- post_data=post_data, headers=auth, raw=True)
- print("- sent")
- #def timestring(span):
- # spanstring = ''
- # times = ((60.0, 'min'), (60.0, 'h'), (24.0, 'd'))
- # for t in times:
- # span /= t[0]
- # if span > 5:
- # spanstring = str(round(span, 1)) + t[1]
- # return spanstring
- def compare_update(kind, newposts):
- c.execute('CREATE TABLE IF NOT EXISTS ' + kind + '_new \
- AS SELECT * FROM ' + kind + ' WHERE 0')
- for post in newposts:
- post = model_to_tuple(post, MODELS[kind])
- insert_tuple(post, kind + '_new')
- q = '''DELETE FROM ''' + kind + ''' WHERE name IN (
- SELECT name FROM (
- SELECT subreddit, MIN(created_utc) AS mintime
- FROM ''' + kind + '''_new GROUP BY subreddit
- ) AS mintimes INNER JOIN ''' + kind + '''
- ON ''' + kind + '''.subreddit = mintimes.subreddit
- AND ''' + kind + '''.created_utc < mintimes.mintime
- )'''
- oldcount = getv('SELECT COUNT(name) FROM ' + kind)
- c.execute(q)
- newcount = getv('SELECT COUNT(name) FROM ' + kind)
- fell = oldcount - newcount
- q = '''INSERT OR IGNORE INTO ''' + kind + '''_deleted
- SELECT *, STRFTIME("%s", "now") FROM ''' + kind + '''
- WHERE name NOT IN (
- SELECT name FROM ''' + kind + '''_new
- )'''
- c.execute(q)
- if DEBUG:
- deleted = tuple(c.execute('SELECT name FROM ' + kind + '_deleted'))
- print("Deleted " + kind + ": " + str([item[0] for item in deleted]))
- deleted = getv('SELECT COUNT(*) FROM ' + kind + '_deleted')
- q = '''SELECT COUNT(*) FROM (
- SELECT subreddit, MAX(created_utc) AS maxtime
- FROM ''' + kind + ''' GROUP BY subreddit
- ) AS maxtimes INNER JOIN ''' + kind + '''_new
- ON ''' + kind + '''_new.subreddit = maxtimes.subreddit
- AND ''' + kind + '''_new.created_utc > maxtimes.maxtime'''
- new = getv(q)
- c.execute('DROP TABLE ' + kind)
- c.execute('ALTER TABLE ' + kind + '_new RENAME TO ' + kind)
- db.commit()
- total = getv('SELECT COUNT(name) FROM ' + kind)
- print(str(deleted) + ' ' + kind + ' deleted,', end=' ')
- print(str(new) + ' new,', end=' ')
- print(str(fell) + ' fell out -', end=' ')
- print(str(total) + ' new total')
- def check_deletions():
- print("> Checking for deletions at " + str(int(current_epoch())) + " ")
- compare_update('comments', fetch_posts_since('comments', 0))
- compare_update('submissions', fetch_posts_since('submissions', 0))
- def check_user_deletion(post, kind):
- safe_domains = readfile(conf('safedomains')).strip().split()
- u = readfile(conf('unknowndomains')).strip().split('\n')
- unknown_domains = {}
- for d in u:
- if not d:
- continue
- d = d.strip().split()
- unknown_domains[d[0]] = int(d[1])
- print("> Checking for user deletion of " + kind + " " + post[0] + \
- (" from " + post[4] if kind == 'comment' else '') + \
- " in " + post[1] + ",", end=' ')
- left = getv('SELECT COUNT(*) FROM (SELECT name FROM comments_deleted \
- UNION SELECT name FROM submissions_deleted)')-1
- print(str(left or 'no') + " more left to check -", end=' ')
- sys.stdout.flush()
- spotted = post[-1]
- name = post[0].split('_')[1]
- sub = post[1]
- author = post[2]
- posted = post[3]
- compare = tuple(comment[0] for comment in
- tuple(c.execute('SELECT created_utc FROM ' + kind + 's \
- WHERE subreddit=? \
- ORDER BY created_utc ASC', (sub,))))
- mincompare = min(compare) if compare else current_epoch() - DAY*7
- compare = compare[-int(len(compare)*0.95)] if compare \
- else current_epoch() - DAY*7
- print(str(round((current_epoch() - posted) / HOUR, 2)) + "h vs cutoff " + \
- str(round((current_epoch() - compare) / HOUR, 2)) + "h, oldest " + \
- str(round((current_epoch() - mincompare) / HOUR, 2)) + "h -", end=' ')
- sys.stdout.flush()
- if posted <= compare:
- print("too old")
- return False
- title = ''
- if kind == 'comment':
- link_id = post[4].split('_')[1]
- content = post[5]
- baseurl = 'https://oauth.reddit.com/user/'+author
- usercomments = {}
- shadowbanned = False
- try:
- usercomments = fetch(baseurl+'/comments/.json?sort=new&limit=100',
- posted - 30,
- ((str, 'name'),
- (str, 'subreddit'),
- (str, 'author'),
- (float, 'created_utc'),
- (str, 'link_id'),
- (str, 'body')),
- kind='noupdate')
- except urllib.error.HTTPError as e:
- if e.code in [403, 404]:
- print("- " + author + " shadowbanned or deleted", end=' ')
- url = 'https://oauth.reddit.com/r/' + sub + '/comments/' + \
- link_id + '/comment/' + name + '/.json'
- try:
- reddit(url, headers=auth, raw=True)
- print("- deleted")
- return False
- except urllib.error.HTTPError as e:
- if e.code in [403, 404]:
- print("- shadowbanned", end=' ')
- shadowbanned = True
- else:
- raise
- else:
- raise
- usercomments = {comment['name']: model_to_tuple(comment, MODELS['comments'])
- for comment in usercomments}
- if post[0] not in list(usercomments.keys()) and not shadowbanned:
- print("- deleted by "+author+" ("+((content[:47]+"...")
- if len(content) > 50
- else content).replace('\n',
- ' / ')+")")
- return False
- else:
- if not shadowbanned:
- post = usercomments[post[0]]
- name = post[0].split('_')[1]
- sub = post[1]
- author = post[2]
- posted = post[3]
- link_id = post[4].split('_')[1]
- content = post[5]
- print("- deleted by mods")
- title1 = str(epoch_to_string(short=True) + " - '")
- title2 = str("' by /u/" + author +
- " removed from /r/" + sub)
- lower_frame_boundary = spotted - DELETION_SPACING
- upper_frame_boundary = spotted
- frame_lower = round((float(lower_frame_boundary) - posted) / 60)
- if frame_lower < 0:
- frame_lower = 0
- frame_upper = round((float(upper_frame_boundary) - posted) / 60)
- if frame_lower != frame_upper:
- frame = str(int(frame_lower)) + "-" + str(int(frame_upper))
- else:
- frame = str(int(frame_lower))
- title2 += " within " + frame + "min"
- if shadowbanned:
- title2 += " (user shadowbanned)"
- restlen = ALLOWED_TITLE_LENGTH - (len(title1) + len(title2))
- intro = re.sub(r'>.*\n', '[quote]', content)
- intro = re.sub(r'\[([^\]]*)\]\([^\)]*\)', r'[\1]', intro)
- intro = re.sub(URLREGEX, '[link]', intro)
- intro = intro.replace('/r/', 'r/')
- intro = intro.replace('/u/', 'u/')
- intro = re.sub(r' +', ' ', intro)
- intro = re.sub(r'[ \n/][ \n/]+', ' / ', intro)
- intro = intro.strip(' \n/')
- links = []
- for url in re.finditer(r'\[([^\]]*)\][ \n]?\(([^\)]*)\)', content):
- links += [url.group(2)]
- content = content.replace(url.group(0),
- '[' + url.group(1) + ']^^' +
- str(len(links)) + ' ')
- for url in re.finditer(URLREGEX, content):
- links += [url.group(0)]
- content = content.replace(url.group(0),
- '[link]^^' + str(len(links)) + ' ')
- if len(intro) > restlen:
- intro = str(intro[:restlen-3].strip(' ./,') + "...")
- else:
- intro = str(intro)
- title = title1 + intro + title2
- title = title[:ALLOWED_TITLE_LENGTH]
- body = "'''\n\n"+content+"\n\n'''\n\n"
- if post[4] == 'None':
- body = "No link could be determined."
- link = "Unknown"
- else:
- linkbase = "/r/" + sub + "/comments/" + link_id + \
- "/comment/" + name + "?context=999"
- link = "https://reddit.com" + linkbase
- goldfishlink = "http://r.go1dfish.me" + linkbase
- unredditlink = "https://unreddit.com" + linkbase
- body += "[Context Link](" + link + ")\n\n"
- body += "[Go1dfish undelete link](" + goldfishlink + ")\n\n"
- body += "[unreddit undelete link](" + unredditlink + ")"
- body += "\n\nAuthor: /u/" + author
- if links:
- body += "\n\n"
- unknowns = False
- for l in range(len(links)):
- try:
- domain = tld.get_tld(links[l])
- except tld.exceptions.TldBadUrl:
- domain = 'reddit.com'
- except (tld.exceptions.TldDomainNotFound, ValueError):
- domain = 'malformed.domain'
- print("Malformed domain: " + links[l])
- if domain in safe_domains:
- body += str(l+1) + ': ' + links[l] + ' \n'
- else:
- unknowns = True
- if domain not in DOMAIN_BLACKLIST:
- if domain in list(unknown_domains.keys()):
- unknown_domains[domain] += 1
- else:
- unknown_domains[domain] = 1
- with closing(open(conf('unknowndomains'),
- 'w')) as f:
- for d in unknown_domains:
- f.write(d+' '+str(unknown_domains[d])+'\n')
- oblink = re.sub(r'.*://', '', links[l])
- if domain != "maldormed.domain":
- oblink = censor(oblink, 0.25)
- body += str(l+1) + ': `' + oblink + '` \n'
- if unknowns:
- body += "\nUnknown links are censored to prevent \
- spreading illicit content."
- print(title, end=' ')
- elif kind == 'submission':
- reason = post[4]
- subject = post[5]
- domain = post[6]
- selftext = post[7]
- baseurl = 'https://oauth.reddit.com/user/'+author
- shadowbanned = False
- usersubmissions = {}
- try:
- usersubmissions = fetch(baseurl +
- '/submitted/.json?sort=new&limit=100',
- posted - 30,
- ((str, 'name'),
- (str, 'subreddit'),
- (str, 'author'),
- (float, 'created_utc'),
- (str, 'link_flair_text'),
- (str, 'title'),
- (str, 'domain'),
- (str, 'selftext')),
- kind='noupdate')
- except urllib.error.HTTPError as e:
- if e.code in [403, 404]:
- print("- " + author + " shadowbanned or deleted", end=' ')
- url = 'https://oauth.reddit.com/r/' + sub + '/comments/' + \
- name + '/.json'
- try:
- reddit(url, headers=auth, raw=True)
- print("- deleted")
- return False
- except urllib.error.HTTPError as e:
- if e.code in [403, 404]:
- print("- shadowbanned", end=' ')
- shadowbanned = True
- else:
- raise
- else:
- raise
- usersubmissions = {submission['name']: model_to_tuple(submission, MODELS['submissions'])
- for submission in usersubmissions}
- if post[0] not in list(usersubmissions.keys()) and not shadowbanned:
- print("- deleted by "+author+" ("+((subject[:47]+"...")
- if len(subject) > 50
- else subject)+")")
- return False
- else:
- reason = 'None'
- if not shadowbanned and not post[7]:
- post = usersubmissions[post[0]]
- name = post[0].split('_')[1]
- sub = post[1]
- author = post[2]
- posted = post[3]
- reason = post[4]
- subject = post[5]
- domain = post[6]
- selftext = post[7]
- reason = reason.lower()
- if 'removed' in reason:
- reason = reason.replace('removed', '').strip(' -|—')
- reason = reason.capitalize() or 'None'
- reason = " - reason: "+reason
- else:
- reason = ''
- print("- deleted by mods" + reason)
- lower_frame_boundary = spotted - DELETION_SPACING
- upper_frame_boundary = spotted
- frame_lower = round((float(lower_frame_boundary) - posted) / 60)
- if frame_lower < 0:
- frame_lower = 0
- frame_upper = round((float(upper_frame_boundary) - posted) / 60)
- if frame_lower != frame_upper:
- frame = str(int(frame_lower)) + "-" + str(int(frame_upper))
- else:
- frame = str(int(frame_lower))
- title1 = str(epoch_to_string(short=True) + " - '")
- title2 = str("' (" + domain + ") by /u/" + author +
- " removed from /r/" + sub +
- " within " + frame + "min" + reason)
- if shadowbanned:
- title2 += " (user shadowbanned)"
- restlen = ALLOWED_TITLE_LENGTH - (len(title1) + len(title2))
- if len(subject) > restlen:
- intro = str(subject[:restlen-3].strip(' ./') + "...")
- else:
- intro = str(subject)
- title = title1 + intro + title2
- title = title[:ALLOWED_TITLE_LENGTH]
- linkbase = "/r/" + sub + "/comments/" + name
- link = "https://reddit.com/" + linkbase
- goldfishlink = "http://r.go1dfish.me" + linkbase
- unredditlink = "https://unreddit.com" + linkbase
- body = ""
- links = []
- if selftext:
- links = []
- for url in re.finditer(r'\[([^\]]*)\][ \n]?\(([^\)]*)\)',
- selftext):
- links += [url.group(2)]
- selftext = selftext.replace(url.group(0),
- '[' + url.group(1) + ']^^' +
- str(len(links)) + ' ')
- for url in re.finditer(URLREGEX, selftext):
- links += [url.group(0)]
- selftext = selftext.replace(url.group(0),
- '[link]^^' +
- str(len(links)) + ' ')
- body = "'''\n\n" + selftext + "\n\n'''\n\n"
- body += "[" + subject + "](" + link + ")\n\n"
- body += "[Go1dfish undelete link](" + goldfishlink + ")\n\n"
- body += "[unreddit undelete link](" + unredditlink + ")\n\n"
- body += "Author: /u/" + author
- if links:
- body += "\n\n"
- unknowns = False
- for l in range(len(links)):
- try:
- domain = tld.get_tld(links[l])
- except tld.exceptions.TldBadUrl:
- domain = 'reddit.com'
- except (tld.exceptions.TldDomainNotFound, ValueError):
- domain = 'malformed.domain'
- print("Malformed domain: " + links[l])
- if domain in safe_domains:
- body += str(l+1) + ': ' + links[l] + ' \n'
- else:
- unknowns = True
- if domain not in DOMAIN_BLACKLIST:
- if domain in list(unknown_domains.keys()):
- unknown_domains[domain] += 1
- else:
- unknown_domains[domain] = 1
- with closing(open(conf('unknowndomains'),
- 'w')) as f:
- for d in unknown_domains:
- f.write(d+' '+str(unknown_domains[d])+'\n')
- oblink = re.sub(r'.*://', '', links[l])
- if domain != "maldormed.domain":
- oblink = censor(oblink, 0.25)
- body += str(l+1) + ': `' + oblink + '` \n'
- if unknowns:
- body += "\nUnknown links are censored to prevent \
- spreading illicit content."
- print(title, end=' ')
- h = HTMLParser()
- title = h.unescape(title)
- body = h.unescape(body)
- if len(body) > 40000:
- body = body[:39900] + '[... post size limit of 40,000 characters reached]'
- post_data = {'sr': 'removalbot', 'title': title,
- 'kind': 'self', 'text': body}
- if not DUMMY:
- response = reddit('https://oauth.reddit.com/api/submit',
- post_data=post_data, headers=auth, raw=True)
- print("- submitted", end=' ')
- sys.stdout.flush()
- if not DUMMY:
- fullname_new = get_fullname_new(response)
- if setflair(fullname_new, kind+'-'+sub.lower()):
- print("- flaired")
- else:
- print("- no flair")
- if not shadowbanned and getv('SELECT author FROM notify WHERE author=?',
- (author,)):
- pm_subject = "Your "+kind+" was deleted from /r/"+sub
- if kind == 'comment':
- content = content.strip(' \n')
- if '\n\n' in content:
- content = '\n\n'+content+'\n\n'
- else:
- content = ' \n **Reason**: '+str(reason)
- content = h.unescape(content)
- pm_body = 'Hello, **'+author+'**!\n\n \n\n'
- pm_body += 'Your '+kind+' appears to have been deleted '
- pm_body += 'from **/r/' + sub + '** by the moderators, '
- pm_body += '/u/AutoModerator or the administrators.\n\n'
- pm_body += ' **'+kind.capitalize()+'**: '+content+' \n'
- pm_body += ' **Posted at**: '+epoch_to_string(posted)+' \n'
- pm_body += ' **Delay until deletion**: '+frame+'min \n'
- pm_body += ' **Link**: '+str(link)+'\n\n \n\n'
- pm_body += 'Have a nice day! \n'
- pm_body += '/u/removalbot\n\n'
- pm_body += '----\n\n'
- pm_body += '^(Note that the deletion may have been accidental '
- pm_body += 'or its detection a false positive caused by heavy load '
- pm_body += 'on reddit\'s servers.) \n'
- pm_body += '^^^This ^^^is ^^^an ^^^automated ^^^message ^^^from '
- pm_body += '^^^/r/removalbot.'
- send_pm(author, pm_subject, pm_body)
- return True
- requests_used = None
- requests_remaining = None
- requests_reset = None
- maintainer = readfile(conf('maintainer')).strip()
- version = readfile(conf('version')).strip()
- USERAGENT = 'removalbot by /u/'+maintainer+', v'+version
- os.chdir(CONFDIR)
- db = sqlite3.connect(conf('posts.db'))
- c = db.cursor()
- auth = login()
- update_flairs()
- if os.path.isfile(conf('error')) and \
- readfile(conf('error')) != 'KeyboardInterrupt':
- failed = os.path.getctime(conf('error'))
- failed = epoch_to_string(epoch=failed)
- reason = readfile(conf('error'))
- send_pm(maintainer, "REMOVALBOT CRASHED", "Reason: "+reason)
- e = reason.split(':')[0]
- print(e, e in ALSO_FATAL)
- if not DUMMY and (SUBMIT_ERRORS or e == "FatalError" or e in ALSO_FATAL):
- print("> Submitting error "+reason, end=' ')
- sys.stdout.flush()
- title = "[!] Bot encountered an error at " + failed + \
- ", reason: " + reason
- post_data = {'sr': 'removalbot', 'kind': 'self', 'title': title}
- if reason != "FatalError":
- post_data['text'] = "Such errors usually indicate that reddit is \
- overloaded or in maintenance mode, i. e. \
- they are unavoidable.\n\nA database is used \
- to minimize the impact but cannot negate it."
- else:
- post_data['text'] = "The bot encountered a fatal error. This \
- should not happen. The maintainer has been \
- notified; until further action, the bot is \
- suspended."
- response = reddit('https://oauth.reddit.com/api/submit',
- post_data=post_data, headers=auth, raw=True)
- setflair(get_fullname_new(response), 'error')
- print("- done")
- else:
- print("> Encountered error "+reason)
- os.remove(conf('error'))
- update_notify()
- while True:
- now = int(current_epoch())
- last_deletion_check = getlast('deletion')
- last_new_check = getlast('new')
- last_subscriber_check = getlast('subscribers')
- last_flair_check = getlast('flair')
- changed = False
- if int(now / DELETION_SPACING) != last_deletion_check:
- check_deletions()
- setlast('deletion', int(now / DELETION_SPACING))
- elif int(now / NEW_SPACING) != last_new_check:
- get_new()
- setlast('new', int(now / NEW_SPACING))
- elif int(now / SUBSCRIBER_SPACING) != last_subscriber_check:
- update_notify()
- setlast('subscribers', int(now / SUBSCRIBER_SPACING))
- elif int(now / FLAIR_SPACING) != last_flair_check:
- update_flairs()
- setlast('flair', int(now / FLAIR_SPACING))
- else:
- deleted_submission = \
- c.execute('SELECT * FROM submissions_deleted LIMIT 1').fetchone()
- deleted_comment = \
- c.execute('SELECT * FROM comments_deleted LIMIT 1').fetchone()
- left = getv('SELECT COUNT(*) FROM (SELECT name FROM comments_deleted \
- UNION SELECT name FROM submissions_deleted)')-1
- if deleted_submission:
- check_user_deletion(deleted_submission, 'submission')
- c.execute('DELETE FROM submissions_deleted WHERE name=?',
- (deleted_submission[0],))
- elif deleted_comment:
- check_user_deletion(deleted_comment, 'comment')
- c.execute('DELETE FROM comments_deleted WHERE name=?',
- (deleted_comment[0],))
- c.execute('PRAGMA shrink_memory')
- if not deleted_submission and not deleted_comment:
- time.sleep(0.1)
- continue
- elif left == 0:
- c_unknown = readfile(conf('unknowndomains')).strip().split('\n')
- print("> Done undeleting, " + str(len(c_unknown)) + \
- " unknown domains logged")
- print()
- if requests_remaining and requests_reset and requests_used:
- requests_remaining = str(int(float(requests_remaining)))
- print("Used: " + requests_used + ", remaining: " + \
- requests_remaining + " in " + requests_reset + "s", end=' ')
- remaining = float(requests_remaining)
- reset = int(requests_reset)
- print("(" + str(round(remaining / reset, 2)) + "/s)", end=' ')
- if remaining < reset:
- sleeptime = str(int(round(float(reset - remaining) /
- remaining, 3) * 1000))
- print("- OVERLOAD, sleep " + sleeptime + "ms")
- else:
- print("- OK\r", end=' ')
- sys.stdout.flush()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement