Advertisement
Guest User

ScanBot

a guest
Nov 15th, 2015
7
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.89 KB | None | 0 0
  1. import praw
  2. import time
  3. import operator
  4. import warnings
  5. import re
  6. import urllib
  7. from datetime import datetime
  8. from numpy import interp
  9.  
  10. warnings.filterwarnings("ignore")
  11.  
  12. r = praw.Reddit("Scans for karma bots on popular subreddits. /r/ScanBot")
  13. me = r.get_redditor("ScanBot")
  14.  
  15. # Scan list.
  16. subreddits = [
  17.  
  18. "aww",
  19. "funny",
  20. "wallpaper",
  21. "freekarma",
  22. "gifs",
  23. "pics",
  24. "gardening",
  25. "gaming",
  26. "adviceanimals"
  27.  
  28. ]
  29.  
  30. domains = [
  31.  
  32. "imgur.com",
  33. "facebook.com",
  34. "www.dailymail.co.uk",
  35. "twitter.com",
  36. "google.com",
  37. "youtube.com",
  38. "wordpress.com"
  39.  
  40. ]
  41.  
  42. # Special flags [Key (not case sensitive), Description, Term Type, Term Equals or Contains (Boolean), Subreddit/Domain Whitelist (leave empty for no whitelist)]
  43. # Types: L = Submission Link, U = Username, T = Submission Title, F = Username Format, C = Latest Comment.
  44. special = [
  45.  
  46. ["freetoide", "[FreeToide URL](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "L", False, []], # Freetoide spam ring URL checker.
  47. ["freetoide", "[FreeToide Title](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "T", False, []], # Freetoide spam ring title checker.
  48. ["WWNNN", "[FreeToide Format](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "F", True, []], # Freetoide spam ring format checker.
  49.  
  50. ["WNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
  51. ["WNNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
  52.  
  53. # Common comments made by bots on their own posts.
  54. ["best", "Generic Comment", "C", True, []],
  55. ["awesome", "Generic Comment", "C", True, []],
  56. ["wao", "Generic Comment", "C", True, []],
  57. ["wow", "Generic Comment", "C", True, []],
  58. ["good", "Generic Comment", "C", True, []],
  59. ["nice", "Generic Comment", "C", True, []],
  60. ["funny", "Generic Comment", "C", True, []],
  61. ["wow", "Generic Comment", "C", True, []]
  62.  
  63. ]
  64.  
  65. # Returns an account's trophies, does not use PRAW.
  66. def user_trophies (n):
  67. aurl = "http://www.reddit.com/user/" + n
  68. req = urllib.request.Request(aurl, data=None, headers={'User-Agent': "Scans for karma bots on popular subreddits. /r/ScanBot"})
  69. data = str(urllib.request.urlopen(req).read(), encoding="utf-8")
  70. trophies_matches = re.findall("trophy-name", data)
  71. trophies = []
  72. num = len(trophies_matches)
  73. if num == 0:
  74. trophies.append("None")
  75. else:
  76. for i in range(1, num + 1):
  77. trophies.append(data.split("trophy-name\">")[i].split("<")[0])
  78.  
  79. return "\n".join(trophies)
  80.  
  81. # Checks an account's comment history and returns average time in minutes between comments based on the last 5 comments. Returns zero if there are an insuffienct amount of comments.
  82. def comment_rate(u):
  83. time = 0
  84. b = ""
  85. a = ""
  86. count = 0
  87. for comment in u.get_comments(limit=5):
  88. b = a
  89. a = datetime.fromtimestamp(comment.created_utc)
  90. if not b == "":
  91. time += (a-b).seconds / 60 / 60
  92. count += 1
  93.  
  94. if count < 4:
  95. time = 0
  96. else:
  97. time /= count
  98.  
  99. return time
  100.  
  101. # Same as above but returns average comment score instead.
  102. def comment_average(u):
  103. scr = 0
  104. count = 0
  105. for c in u.get_comments(limit=5):
  106. scr += c.score
  107. count += 1
  108.  
  109. if count < 5:
  110. scr = 0
  111. else:
  112. scr /= 5
  113.  
  114. return scr
  115.  
  116. # Gets account age in days.
  117. def user_age(u):
  118. return (datetime.now() - datetime.fromtimestamp(r.get_redditor(u).created_utc)).days
  119.  
  120. # Checks an account's name for trailing numbers.
  121. def name_scan(n):
  122. match = 0
  123. for i in range(0,5):
  124. if n[len(n)-1-i:len(n)-i].isdigit():
  125. match += 1
  126. return match
  127.  
  128. # Flagger algo.
  129. def flagger(s, targ, perm, source):
  130. u = s.author
  131. n = u.name
  132. userinfo = []
  133. age = user_age(u)
  134. karma = u.comment_karma
  135. trophies = user_trophies(u.name)
  136.  
  137. if (karma <= 50 and age <= 365*3 and not "Verified Email" in trophies) or targ:
  138. # Gather pertinent information.
  139. userinfo.append(False)
  140. userinfo.append("/u/"+n)
  141. userinfo.append(str(age))
  142. userinfo.append(str(karma))
  143. if targ:
  144. userinfo.append("[Requested analysis]("+perm+") using [first post found]("+s.short_link+").")
  145. else:
  146. userinfo.append("[/r/" + s.subreddit.display_name+" post]("+s.short_link+")")
  147.  
  148. # Calculate confidence rating.
  149. cnf = (356 - age) / 300 # Age.
  150. cnf += name_scan(u.name) # Trailing numbers in name.
  151. cnf -= comment_average(u)-2 # Average comment karma.
  152. cnf -= comment_rate(u) # Average comment rate.
  153. cnf -= karma / 25 # Total comment karma.
  154.  
  155. # Parse name format. W = Word, N = Number, L = Letter, S = Symbol
  156. f = ""
  157. out = ""
  158. word = False
  159. for i in range(0, len(n)):
  160. c = n[i:i+1]
  161. if c.islower() and c.isalpha():
  162. f += "L"
  163. elif c.isupper() and c.isalpha():
  164. f += "U"
  165. elif c.isdigit():
  166. f += "N"
  167. else:
  168. f += "S"
  169. for i in range(0, len(f)):
  170. p = ""
  171. c = f[i:i+1]
  172. l = ""
  173. if i > 0:
  174. p = f[i-1:i]
  175. if i < len(n)-1:
  176. l = f[i+1:i+2]
  177. if c in "SN":
  178. if word:
  179. word = False
  180. out += "W"
  181. out+=c
  182. if c == "U" and word and not (i == len(n)-1 or p == "U"):
  183. out += "W"
  184. if c in "UL":
  185. if p in "SN" and l in "SN":
  186. out += "L"
  187. else:
  188. word = True
  189. if i == len(n)-1 and word:
  190. out += "W"
  191.  
  192. # Check user for special flags.
  193. spfl = []
  194. for sp in special:
  195. if source in sp[4] or sp[4] == []:
  196. term = ""
  197. if sp[2] == "L":
  198. term = n.lower()
  199. elif sp[2] == "U":
  200. term = s.url.lower()
  201. elif sp[2] == "T":
  202. term = s.title.lower()
  203. elif sp[2] == "F":
  204. term = out.lower()
  205. elif sp[2] == "C":
  206. for c in u.get_comments(limit=1):
  207. term = c.body
  208. if not term == "":
  209. if (not sp[3]) and sp[0].lower() in term:
  210. spfl.append(sp[1])
  211. cnf += 1
  212. if sp[3] and sp[0].lower() == term:
  213. spfl.append(sp[1])
  214. cnf += 1
  215. if spfl == []:
  216. spfl.append("None")
  217.  
  218. cnf = round(interp(cnf, [-20,5.5],[0,100]))
  219.  
  220. # Append extrapolated data.
  221. userinfo.append(str(cnf))
  222. userinfo.append(", ".join(spfl))
  223. userinfo.append(out)
  224.  
  225. else:
  226. userinfo.append(True)
  227.  
  228. return userinfo
  229.  
  230. # Checks if user exists.
  231. def exists(n):
  232. try:
  233. r.get_redditor(n, fetch = True)
  234. return True
  235. except:
  236. return False
  237.  
  238. # Formats userdump data.
  239. def frmt(userdump):
  240. out = "[How to interpret these lists.](https://www.reddit.com/r/ScanBot/wiki/index#wiki_interpreting_a_user_list.)\n\nDeleted|Username|Account Age|Comment Karma|From|Confidence|Special Flags|Name Format\n:--|:--|:--|:--|:--|:--|:--|:--\n"
  241. for ui in userdump:
  242. out += str(ui[0]) + "|"
  243. for i in range(1,7):
  244. out += ui[i] + "|"
  245. out += ui[7] + "\n"
  246. return out
  247.  
  248. # Strips formatting from userdump data and converts it back to a nested list. Also updates account deletion statuses.
  249. def strp(udtext):
  250. out = []
  251. lines = udtext.split("\n")
  252. count = 0
  253. for l in lines:
  254. if count > 3 and count < len(lines) - 1:
  255. f_args = []
  256. args = l.split("|")
  257. f_args.append(not exists(args[1][3:]))
  258. for i in range(1,8):
  259. f_args.append(args[i])
  260. out.append(f_args)
  261.  
  262. count += 1
  263.  
  264. return out
  265.  
  266. while True:
  267. try:
  268. print("Initializing.")
  269.  
  270. # Log in and send user agent then update /r/ScanBot sidebar. Too dumb to use OAuth :(
  271. r.login(username = "ScanBot", password = , disable_warning=True)
  272.  
  273. # Update /u/ScanBot sidebar.
  274. try:
  275. r.update_settings(r.get_subreddit("ScanBot"), description="\n\nUsername dump for accounts automatically determined to be bots, spammers, farmers and/or shills by /u/ScanBot. Novelty accounts will be ignored for the most part.\n\nScan list: \n\n/r/"+('\n\n/r/'.join(subreddits))+"\n\nwww.reddit.com/domain/"+'\n\nwww.reddit.com/domain/'.join(domains)+"\n\n-----\n\nMessage /u/GregTJ if you want your subreddit or domain removed from the scan list.")
  276. except Exception:
  277. Exception = ""
  278.  
  279. print("Initialized.")
  280. print("")
  281.  
  282. # Main loop
  283. while True:
  284.  
  285. # Load latest userlist and update user deletion status.
  286. UD_Post = next(me.get_submitted(limit = 1))
  287. flagged = strp(UD_Post.selftext)
  288.  
  289. # Create new post if current one is full.
  290. if len(flagged) >= 30:
  291. r.submit("ScanBot","Flagged Users "+time.strftime("%m-%d-%Y %H:%M"), text = "")
  292. UD_Post = next(me.get_submitted(limit = 1))
  293. flagged = strp(UD_Post.selftext)
  294.  
  295. print("Scanning:")
  296.  
  297. # Scan subreddits and domains.
  298. for sr in subreddits:
  299. print(" /r/"+sr)
  300. for s in r.get_subreddit(sr).get_new(limit=5):
  301. unq = True
  302. for ui in flagged:
  303. if s.author.name in ui[1]:
  304. unq = False
  305. if unq:
  306. uinf = flagger(s, False, "", sr)
  307. if not uinf[0]:
  308. flagged.append(uinf)
  309.  
  310. for do in domains:
  311. print(" /domain/"+do)
  312. for s in r.get_domain_listing(do, sort = 'new', limit=5):
  313. unq = True
  314. for ui in flagged:
  315. if s.author.name in ui[1]:
  316. unq = False
  317. if unq:
  318. uinf = flagger(s, False, "", do)
  319. if not uinf[0]:
  320. flagged.append(uinf)
  321.  
  322. print("")
  323. print("Replying to new username mentions.")
  324.  
  325. # Reply to analysis requests.
  326. for um in filter(lambda x: x.new, r.get_mentions()):
  327. um.mark_as_read()
  328. if len(um.body.split(" ")) > 1 and exists(um.body.split(" ")[1][3:]):
  329. request = flagger(next(r.get_redditor(um.body.split(" ")[1][3:]).get_submitted(limit=1)), True, um.permalink, "request")
  330. flagged.append(request)
  331. verdict = ""
  332. cnf = int(request[5])
  333. if cnf < 20:
  334. verdict = "Probably not a bot."
  335. if cnf >= 20:
  336. verdict = "Slightly suspicious."
  337. if cnf > 49:
  338. verdict = "Very suspicious."
  339. if cnf > 92:
  340. verdict = "Probably a bot."
  341. request[0] = "N/A"
  342. um.reply(frmt([request]) +"\n\nVerdict: "+verdict)
  343.  
  344. # Upload data.
  345. UD_Post.edit(frmt(flagged))
  346. UD_Post.approve()
  347.  
  348. print("")
  349. print("Uploaded new data.")
  350. print("")
  351. except:
  352. print("Fatal error, restarting.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement