Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import time
- import operator
- import warnings
- import re
- import urllib
- from datetime import datetime
- from numpy import interp
- warnings.filterwarnings("ignore")
- r = praw.Reddit("Scans for karma bots on popular subreddits. /r/ScanBot")
- me = r.get_redditor("ScanBot")
- # Scan list.
- subreddits = [
- "aww",
- "funny",
- "wallpaper",
- "freekarma",
- "gifs",
- "pics",
- "gardening",
- "gaming",
- "adviceanimals"
- ]
- domains = [
- "imgur.com",
- "facebook.com",
- "www.dailymail.co.uk",
- "twitter.com",
- "google.com",
- "youtube.com",
- "wordpress.com"
- ]
- # Special flags [Key (not case sensitive), Description, Term Type, Term Equals or Contains (Boolean), Subreddit/Domain Whitelist (leave empty for no whitelist)]
- # Types: L = Submission Link, U = Username, T = Submission Title, F = Username Format, C = Latest Comment.
- special = [
- ["freetoide", "[FreeToide URL](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "L", False, []], # Freetoide spam ring URL checker.
- ["freetoide", "[FreeToide Title](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "T", False, []], # Freetoide spam ring title checker.
- ["WWNNN", "[FreeToide Format](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "F", True, []], # Freetoide spam ring format checker.
- ["WNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
- ["WNNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
- # Common comments made by bots on their own posts.
- ["best", "Generic Comment", "C", True, []],
- ["awesome", "Generic Comment", "C", True, []],
- ["wao", "Generic Comment", "C", True, []],
- ["wow", "Generic Comment", "C", True, []],
- ["good", "Generic Comment", "C", True, []],
- ["nice", "Generic Comment", "C", True, []],
- ["funny", "Generic Comment", "C", True, []],
- ["wow", "Generic Comment", "C", True, []]
- ]
- # Returns an account's trophies, does not use PRAW.
- def user_trophies (n):
- aurl = "http://www.reddit.com/user/" + n
- req = urllib.request.Request(aurl, data=None, headers={'User-Agent': "Scans for karma bots on popular subreddits. /r/ScanBot"})
- data = str(urllib.request.urlopen(req).read(), encoding="utf-8")
- trophies_matches = re.findall("trophy-name", data)
- trophies = []
- num = len(trophies_matches)
- if num == 0:
- trophies.append("None")
- else:
- for i in range(1, num + 1):
- trophies.append(data.split("trophy-name\">")[i].split("<")[0])
- return "\n".join(trophies)
- # Checks an account's comment history and returns average time in minutes between comments based on the last 5 comments. Returns zero if there are an insuffienct amount of comments.
- def comment_rate(u):
- time = 0
- b = ""
- a = ""
- count = 0
- for comment in u.get_comments(limit=5):
- b = a
- a = datetime.fromtimestamp(comment.created_utc)
- if not b == "":
- time += (a-b).seconds / 60 / 60
- count += 1
- if count < 4:
- time = 0
- else:
- time /= count
- return time
- # Same as above but returns average comment score instead.
- def comment_average(u):
- scr = 0
- count = 0
- for c in u.get_comments(limit=5):
- scr += c.score
- count += 1
- if count < 5:
- scr = 0
- else:
- scr /= 5
- return scr
- # Gets account age in days.
- def user_age(u):
- return (datetime.now() - datetime.fromtimestamp(r.get_redditor(u).created_utc)).days
- # Checks an account's name for trailing numbers.
- def name_scan(n):
- match = 0
- for i in range(0,5):
- if n[len(n)-1-i:len(n)-i].isdigit():
- match += 1
- return match
- # Flagger algo.
- def flagger(s, targ, perm, source):
- u = s.author
- n = u.name
- userinfo = []
- age = user_age(u)
- karma = u.comment_karma
- trophies = user_trophies(u.name)
- if (karma <= 50 and age <= 365*3 and not "Verified Email" in trophies) or targ:
- # Gather pertinent information.
- userinfo.append(False)
- userinfo.append("/u/"+n)
- userinfo.append(str(age))
- userinfo.append(str(karma))
- if targ:
- userinfo.append("[Requested analysis]("+perm+") using [first post found]("+s.short_link+").")
- else:
- userinfo.append("[/r/" + s.subreddit.display_name+" post]("+s.short_link+")")
- # Calculate confidence rating.
- cnf = (356 - age) / 300 # Age.
- cnf += name_scan(u.name) # Trailing numbers in name.
- cnf -= comment_average(u)-2 # Average comment karma.
- cnf -= comment_rate(u) # Average comment rate.
- cnf -= karma / 25 # Total comment karma.
- # Parse name format. W = Word, N = Number, L = Letter, S = Symbol
- f = ""
- out = ""
- word = False
- for i in range(0, len(n)):
- c = n[i:i+1]
- if c.islower() and c.isalpha():
- f += "L"
- elif c.isupper() and c.isalpha():
- f += "U"
- elif c.isdigit():
- f += "N"
- else:
- f += "S"
- for i in range(0, len(f)):
- p = ""
- c = f[i:i+1]
- l = ""
- if i > 0:
- p = f[i-1:i]
- if i < len(n)-1:
- l = f[i+1:i+2]
- if c in "SN":
- if word:
- word = False
- out += "W"
- out+=c
- if c == "U" and word and not (i == len(n)-1 or p == "U"):
- out += "W"
- if c in "UL":
- if p in "SN" and l in "SN":
- out += "L"
- else:
- word = True
- if i == len(n)-1 and word:
- out += "W"
- # Check user for special flags.
- spfl = []
- for sp in special:
- if source in sp[4] or sp[4] == []:
- term = ""
- if sp[2] == "L":
- term = n.lower()
- elif sp[2] == "U":
- term = s.url.lower()
- elif sp[2] == "T":
- term = s.title.lower()
- elif sp[2] == "F":
- term = out.lower()
- elif sp[2] == "C":
- for c in u.get_comments(limit=1):
- term = c.body
- if not term == "":
- if (not sp[3]) and sp[0].lower() in term:
- spfl.append(sp[1])
- cnf += 1
- if sp[3] and sp[0].lower() == term:
- spfl.append(sp[1])
- cnf += 1
- if spfl == []:
- spfl.append("None")
- cnf = round(interp(cnf, [-20,5.5],[0,100]))
- # Append extrapolated data.
- userinfo.append(str(cnf))
- userinfo.append(", ".join(spfl))
- userinfo.append(out)
- else:
- userinfo.append(True)
- return userinfo
- # Checks if user exists.
- def exists(n):
- try:
- r.get_redditor(n, fetch = True)
- return True
- except:
- return False
- # Formats userdump data.
- def frmt(userdump):
- out = "[How to interpret these lists.](https://www.reddit.com/r/ScanBot/wiki/index#wiki_interpreting_a_user_list.)\n\nDeleted|Username|Account Age|Comment Karma|From|Confidence|Special Flags|Name Format\n:--|:--|:--|:--|:--|:--|:--|:--\n"
- for ui in userdump:
- out += str(ui[0]) + "|"
- for i in range(1,7):
- out += ui[i] + "|"
- out += ui[7] + "\n"
- return out
- # Strips formatting from userdump data and converts it back to a nested list. Also updates account deletion statuses.
- def strp(udtext):
- out = []
- lines = udtext.split("\n")
- count = 0
- for l in lines:
- if count > 3 and count < len(lines) - 1:
- f_args = []
- args = l.split("|")
- f_args.append(not exists(args[1][3:]))
- for i in range(1,8):
- f_args.append(args[i])
- out.append(f_args)
- count += 1
- return out
- while True:
- try:
- print("Initializing.")
- # Log in and send user agent then update /r/ScanBot sidebar. Too dumb to use OAuth :(
- r.login(username = "ScanBot", password = , disable_warning=True)
- # Update /u/ScanBot sidebar.
- try:
- r.update_settings(r.get_subreddit("ScanBot"), description="\n\nUsername dump for accounts automatically determined to be bots, spammers, farmers and/or shills by /u/ScanBot. Novelty accounts will be ignored for the most part.\n\nScan list: \n\n/r/"+('\n\n/r/'.join(subreddits))+"\n\nwww.reddit.com/domain/"+'\n\nwww.reddit.com/domain/'.join(domains)+"\n\n-----\n\nMessage /u/GregTJ if you want your subreddit or domain removed from the scan list.")
- except Exception:
- Exception = ""
- print("Initialized.")
- print("")
- # Main loop
- while True:
- # Load latest userlist and update user deletion status.
- UD_Post = next(me.get_submitted(limit = 1))
- flagged = strp(UD_Post.selftext)
- # Create new post if current one is full.
- if len(flagged) >= 30:
- r.submit("ScanBot","Flagged Users "+time.strftime("%m-%d-%Y %H:%M"), text = "")
- UD_Post = next(me.get_submitted(limit = 1))
- flagged = strp(UD_Post.selftext)
- print("Scanning:")
- # Scan subreddits and domains.
- for sr in subreddits:
- print(" /r/"+sr)
- for s in r.get_subreddit(sr).get_new(limit=5):
- unq = True
- for ui in flagged:
- if s.author.name in ui[1]:
- unq = False
- if unq:
- uinf = flagger(s, False, "", sr)
- if not uinf[0]:
- flagged.append(uinf)
- for do in domains:
- print(" /domain/"+do)
- for s in r.get_domain_listing(do, sort = 'new', limit=5):
- unq = True
- for ui in flagged:
- if s.author.name in ui[1]:
- unq = False
- if unq:
- uinf = flagger(s, False, "", do)
- if not uinf[0]:
- flagged.append(uinf)
- print("")
- print("Replying to new username mentions.")
- # Reply to analysis requests.
- for um in filter(lambda x: x.new, r.get_mentions()):
- um.mark_as_read()
- if len(um.body.split(" ")) > 1 and exists(um.body.split(" ")[1][3:]):
- request = flagger(next(r.get_redditor(um.body.split(" ")[1][3:]).get_submitted(limit=1)), True, um.permalink, "request")
- flagged.append(request)
- verdict = ""
- cnf = int(request[5])
- if cnf < 20:
- verdict = "Probably not a bot."
- if cnf >= 20:
- verdict = "Slightly suspicious."
- if cnf > 49:
- verdict = "Very suspicious."
- if cnf > 92:
- verdict = "Probably a bot."
- request[0] = "N/A"
- um.reply(frmt([request]) +"\n\nVerdict: "+verdict)
- # Upload data.
- UD_Post.edit(frmt(flagged))
- UD_Post.approve()
- print("")
- print("Uploaded new data.")
- print("")
- except:
- print("Fatal error, restarting.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement