ScanBot

import praw
import time
import operator
import warnings
import re
import urllib
from datetime import datetime
from numpy import interp

warnings.filterwarnings("ignore")

r = praw.Reddit("Scans for karma bots on popular subreddits. /r/ScanBot")
me = r.get_redditor("ScanBot")

# Scan list.
subreddits = [

"aww",
"funny",
"wallpaper",
"freekarma",
"gifs",
"pics",
"gardening",
"gaming",
"adviceanimals"

]

domains = [

"imgur.com",
"facebook.com",
"www.dailymail.co.uk",
"twitter.com",
"google.com",
"youtube.com",
"wordpress.com"

]

# Special flags [Key (not case sensitive), Description, Term Type, Term Equals or Contains (Boolean), Subreddit/Domain Whitelist (leave empty for no whitelist)]
# Types: L = Submission Link, U = Username, T = Submission Title, F = Username Format, C = Latest Comment.
special = [

["freetoide", "[FreeToide URL](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "L", False, []], # Freetoide spam ring URL checker.
["freetoide", "[FreeToide Title](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "T", False, []], # Freetoide spam ring title checker.
["WWNNN", "[FreeToide Format](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "F", True, []], # Freetoide spam ring format checker.

["WNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
["WNNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.

# Common comments made by bots on their own posts.
["best", "Generic Comment", "C", True, []],
["awesome", "Generic Comment", "C", True, []],
["wao", "Generic Comment", "C", True, []],
["wow", "Generic Comment", "C", True, []],
["good", "Generic Comment", "C", True, []],
["nice", "Generic Comment", "C", True, []],
["funny", "Generic Comment", "C", True, []],
["wow", "Generic Comment", "C", True, []]

]

# Returns an account's trophies, does not use PRAW.
def user_trophies (n):
    aurl = "http://www.reddit.com/user/" + n
    req = urllib.request.Request(aurl, data=None, headers={'User-Agent': "Scans for karma bots on popular subreddits. /r/ScanBot"})
    data = str(urllib.request.urlopen(req).read(), encoding="utf-8")
    trophies_matches = re.findall("trophy-name", data)
    trophies = []
    num = len(trophies_matches)
    if num == 0:
        trophies.append("None")
    else:
        for i in range(1, num + 1):
            trophies.append(data.split("trophy-name\">")[i].split("<")[0])

    return "\n".join(trophies)

# Checks an account's comment history and returns average time in minutes between comments based on the last 5 comments. Returns zero if there are an insuffienct amount of comments.
def comment_rate(u):
    time = 0
    b = ""
    a = ""
    count = 0
    for comment in u.get_comments(limit=5):
        b = a
        a = datetime.fromtimestamp(comment.created_utc)
        if not b == "":
            time += (a-b).seconds / 60 / 60
            count += 1

    if count < 4:
        time = 0
    else:
        time /= count

    return time

# Same as above but returns average comment score instead.
def comment_average(u):
    scr = 0
    count = 0
    for c in u.get_comments(limit=5):
        scr += c.score
        count += 1

    if count < 5:
        scr = 0
    else:
        scr /= 5

    return scr

# Gets account age in days.
def user_age(u):
    return (datetime.now() - datetime.fromtimestamp(r.get_redditor(u).created_utc)).days

# Checks an account's name for trailing numbers.
def name_scan(n):
    match = 0
    for i in range(0,5):
        if n[len(n)-1-i:len(n)-i].isdigit():
            match += 1
    return match

# Flagger algo.
def flagger(s, targ, perm, source):
    u = s.author
    n = u.name
    userinfo = []
    age = user_age(u)
    karma = u.comment_karma
    trophies = user_trophies(u.name)

    if (karma <= 50 and age <= 365*3 and not "Verified Email" in trophies) or targ:
        # Gather pertinent information.
        userinfo.append(False)
        userinfo.append("/u/"+n)
        userinfo.append(str(age))
        userinfo.append(str(karma))
        if targ:
            userinfo.append("[Requested analysis]("+perm+") using [first post found]("+s.short_link+").")
        else:
            userinfo.append("[/r/" + s.subreddit.display_name+" post]("+s.short_link+")")

        # Calculate confidence rating.
        cnf = (356 - age) / 300 # Age.
        cnf += name_scan(u.name) # Trailing numbers in name.
        cnf -= comment_average(u)-2 # Average comment karma.
        cnf -= comment_rate(u) # Average comment rate.
        cnf -= karma / 25 # Total comment karma.

        # Parse name format. W = Word, N = Number, L = Letter, S = Symbol
        f = ""
        out = ""
        word = False
        for i in range(0, len(n)):
            c = n[i:i+1]
            if c.islower() and c.isalpha():
                f += "L"
            elif c.isupper() and c.isalpha():
                f += "U"
            elif c.isdigit():
                f += "N"
            else:
                f += "S"
        for i in range(0, len(f)):
            p = ""
            c = f[i:i+1]
            l = ""
            if i > 0:
                p = f[i-1:i]
            if i < len(n)-1:
                l = f[i+1:i+2]
            if c in "SN":
                if word:
                    word = False
                    out += "W"
                out+=c
            if c == "U" and word and not (i == len(n)-1 or p == "U"):
                out += "W"
            if c in "UL":
                if p in "SN" and l in "SN":
                    out += "L"
                else:
                    word = True
            if i == len(n)-1 and word:
                out += "W"

        # Check user for special flags.
        spfl = []
        for sp in special:
            if source in sp[4] or sp[4] == []:
                term = ""
                if sp[2] == "L":
                    term = n.lower()
                elif sp[2] == "U":
                    term = s.url.lower()
                elif sp[2] == "T":
                    term = s.title.lower()
                elif sp[2] == "F":
                    term = out.lower()
                elif sp[2] == "C":
                    for c in u.get_comments(limit=1):
                        term = c.body
                if not term == "":
                    if (not sp[3]) and sp[0].lower() in term:
                        spfl.append(sp[1])
                        cnf += 1
                    if sp[3] and sp[0].lower() == term:
                        spfl.append(sp[1])
                        cnf += 1
        if spfl == []:
            spfl.append("None")

        cnf = round(interp(cnf, [-20,5.5],[0,100]))

        # Append extrapolated data.
        userinfo.append(str(cnf))
        userinfo.append(", ".join(spfl))
        userinfo.append(out)

    else:
        userinfo.append(True)

    return userinfo

# Checks if user exists.
def exists(n):
    try:
        r.get_redditor(n, fetch = True)
        return True
    except:
        return False

# Formats userdump data.
def frmt(userdump):
    out = "[How to interpret these lists.](https://www.reddit.com/r/ScanBot/wiki/index#wiki_interpreting_a_user_list.)\n\nDeleted|Username|Account Age|Comment Karma|From|Confidence|Special Flags|Name Format\n:--|:--|:--|:--|:--|:--|:--|:--\n"
    for ui in userdump:
        out += str(ui[0]) + "|"
        for i in range(1,7):
            out += ui[i] + "|"
        out += ui[7] + "\n"
    return out

# Strips formatting from userdump data and converts it back to a nested list. Also updates account deletion statuses.
def strp(udtext):
    out = []
    lines = udtext.split("\n")
    count = 0
    for l in lines:
        if count > 3 and count < len(lines) - 1:
            f_args = []
            args = l.split("|")
            f_args.append(not exists(args[1][3:]))
            for i in range(1,8):
                f_args.append(args[i])
            out.append(f_args)

        count += 1

    return out

while True:
    try:
        print("Initializing.")

        # Log in and send user agent then update /r/ScanBot sidebar. Too dumb to use OAuth :(
        r.login(username = "ScanBot", password = , disable_warning=True)

        # Update /u/ScanBot sidebar.
        try:
            r.update_settings(r.get_subreddit("ScanBot"), description="\n\nUsername dump for accounts automatically determined to be bots, spammers, farmers and/or shills by /u/ScanBot. Novelty accounts will be ignored for the most part.\n\nScan list: \n\n/r/"+('\n\n/r/'.join(subreddits))+"\n\nwww.reddit.com/domain/"+'\n\nwww.reddit.com/domain/'.join(domains)+"\n\n-----\n\nMessage /u/GregTJ if you want your subreddit or domain removed from the scan list.")
        except Exception:
            Exception = ""

        print("Initialized.")
        print("")

        # Main loop
        while True:

            # Load latest userlist and update user deletion status.
            UD_Post = next(me.get_submitted(limit = 1))
            flagged = strp(UD_Post.selftext)

            # Create new post if current one is full.
            if len(flagged) >= 30:
                r.submit("ScanBot","Flagged Users "+time.strftime("%m-%d-%Y %H:%M"), text = "")
                UD_Post = next(me.get_submitted(limit = 1))
                flagged = strp(UD_Post.selftext)

            print("Scanning:")

            # Scan subreddits and domains.
            for sr in subreddits:
                print(" /r/"+sr)
                for s in r.get_subreddit(sr).get_new(limit=5):
                    unq = True
                    for ui in flagged:
                        if s.author.name in ui[1]:
                            unq = False
                    if unq:
                        uinf = flagger(s, False, "", sr)
                        if not uinf[0]:
                            flagged.append(uinf)

            for do in domains:
                print(" /domain/"+do)
                for s in r.get_domain_listing(do, sort = 'new', limit=5):
                    unq = True
                    for ui in flagged:
                        if s.author.name in ui[1]:
                            unq = False
                    if unq:
                        uinf = flagger(s, False, "", do)
                        if not uinf[0]:
                            flagged.append(uinf)

            print("")
            print("Replying to new username mentions.")

            # Reply to analysis requests.
            for um in filter(lambda x: x.new, r.get_mentions()):
                um.mark_as_read()
                if len(um.body.split(" ")) > 1 and exists(um.body.split(" ")[1][3:]):
                    request = flagger(next(r.get_redditor(um.body.split(" ")[1][3:]).get_submitted(limit=1)), True, um.permalink, "request")
                    flagged.append(request)
                    verdict = ""
                    cnf = int(request[5])
                    if cnf < 20:
                        verdict = "Probably not a bot."
                    if cnf >= 20:
                        verdict = "Slightly suspicious."
                    if cnf > 49:
                        verdict = "Very suspicious."
                    if cnf > 92:
                        verdict = "Probably a bot."
                    request[0] = "N/A"
                    um.reply(frmt([request]) +"\n\nVerdict: "+verdict)

            # Upload data.
            UD_Post.edit(frmt(flagged))
            UD_Post.approve()

            print("")
            print("Uploaded new data.")
            print("")
    except:
        print("Fatal error, restarting.")