Guest User

ScanBot

a guest
Nov 15th, 2015
4
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.86 KB | None | 0 0
  1. import praw
  2. import time
  3. import operator
  4. import warnings
  5. import re
  6. import urllib
  7. from datetime import datetime
  8. from numpy import interp
  9.  
  10. warnings.filterwarnings("ignore")
  11.  
  12. r = praw.Reddit("Scans for karma bots on popular subreddits. /r/ScanBot")
  13. me = r.get_redditor("ScanBot")
  14.  
  15. # Scan list.
  16. subreddits = [
  17.  
  18. "aww",
  19. "funny",
  20. "wallpaper",
  21. "freekarma",
  22. "gifs",
  23. "pics",
  24. "gardening",
  25. "gaming",
  26. "adviceanimals"
  27.  
  28. ]
  29.  
  30. domains = [
  31.  
  32. "imgur.com",
  33. "facebook.com",
  34. "www.dailymail.co.uk",
  35. "twitter.com",
  36. "google.com",
  37. "youtube.com",
  38. "wordpress.com"
  39.  
  40. ]
  41.  
  42. # Special flags [Key (not case sensitive), Description, Term Type, Term Equals or Contains (Boolean), Subreddit/Domain Whitelist (leave empty for no whitelist)]
  43. # Types: L = Submission Link, U = Username, T = Submission Title, F = Username Format, C = Latest Comment.
  44. special = [
  45.  
  46. ["freetoide", "[FreeToide URL](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "L", False, []], # Freetoide spam ring URL checker.
  47. ["freetoide", "[FreeToide Title](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "T", False, []], # Freetoide spam ring title checker.
  48. ["WWNNN", "[FreeToide Format](https://www.reddit.com/r/TheseFuckingAccounts/comments/3sqizy/discovered_a_spam_ring_today_they_all_follow_the/)", "F", True, []], # Freetoide spam ring format checker.
  49.  
  50. ["WNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
  51. ["WNNNN", "Generic Bot Format", "F", True, []], # Name format often used by bots.
  52.  
  53. # Common comments made by bots on their own posts.
  54. ["best", "Generic Comment", "C", True, []],
  55. ["awesome", "Generic Comment", "C", True, []],
  56. ["wao", "Generic Comment", "C", True, []],
  57. ["wow", "Generic Comment", "C", True, []],
  58. ["good", "Generic Comment", "C", True, []],
  59. ["nice", "Generic Comment", "C", True, []],
  60. ["funny", "Generic Comment", "C", True, []]
  61.  
  62. ]
  63.  
  64. # Returns an account's trophies, does not use PRAW.
  65. def user_trophies (n):
  66. aurl = "http://www.reddit.com/user/" + n
  67. req = urllib.request.Request(aurl, data=None, headers={'User-Agent': "Scans for karma bots on popular subreddits. /r/ScanBot"})
  68. data = str(urllib.request.urlopen(req).read(), encoding="utf-8")
  69. trophies_matches = re.findall("trophy-name", data)
  70. trophies = []
  71. num = len(trophies_matches)
  72. if num == 0:
  73. trophies.append("None")
  74. else:
  75. for i in range(1, num + 1):
  76. trophies.append(data.split("trophy-name\">")[i].split("<")[0])
  77.  
  78. return "\n".join(trophies)
  79.  
  80. # Checks an account's comment history and returns average time in minutes between comments based on the last 5 comments. Returns zero if there are an insuffienct amount of comments.
  81. def comment_rate(u):
  82. time = 0
  83. b = ""
  84. a = ""
  85. count = 0
  86. for comment in u.get_comments(limit=5):
  87. b = a
  88. a = datetime.fromtimestamp(comment.created_utc)
  89. if not b == "":
  90. time += (a-b).seconds / 60 / 60
  91. count += 1
  92.  
  93. if count < 4:
  94. time = 0
  95. else:
  96. time /= count
  97.  
  98. return time
  99.  
  100. # Same as above but returns average comment score instead.
  101. def comment_average(u):
  102. scr = 0
  103. count = 0
  104. for c in u.get_comments(limit=5):
  105. scr += c.score
  106. count += 1
  107.  
  108. if count < 5:
  109. scr = 0
  110. else:
  111. scr /= 5
  112.  
  113. return scr
  114.  
  115. # Gets account age in days.
  116. def user_age(u):
  117. return (datetime.now() - datetime.fromtimestamp(r.get_redditor(u).created_utc)).days
  118.  
  119. # Checks an account's name for trailing numbers.
  120. def name_scan(n):
  121. match = 0
  122. for i in range(0,5):
  123. if n[len(n)-1-i:len(n)-i].isdigit():
  124. match += 1
  125. return match
  126.  
  127. # Flagger algo.
  128. def flagger(s, targ, perm, source):
  129. u = s.author
  130. n = u.name
  131. userinfo = []
  132. age = user_age(u)
  133. karma = u.comment_karma
  134. trophies = user_trophies(u.name)
  135.  
  136. if (karma <= 50 and age <= 365*3 and not "Verified Email" in trophies) or targ:
  137. # Gather pertinent information.
  138. userinfo.append(False)
  139. userinfo.append("/u/"+n)
  140. userinfo.append(str(age))
  141. userinfo.append(str(karma))
  142. if targ:
  143. userinfo.append("[Requested analysis]("+perm+") using [first post found]("+s.short_link+").")
  144. else:
  145. userinfo.append("[/r/" + s.subreddit.display_name+" post]("+s.short_link+")")
  146.  
  147. # Calculate confidence rating.
  148. cnf = (356 - age) / 300 # Age.
  149. cnf += name_scan(u.name) # Trailing numbers in name.
  150. cnf -= comment_average(u)-2 # Average comment karma.
  151. cnf -= comment_rate(u) # Average comment rate.
  152. cnf -= karma / 25 # Total comment karma.
  153.  
  154. # Parse name format. W = Word, N = Number, L = Letter, S = Symbol
  155. f = ""
  156. out = ""
  157. word = False
  158. for i in range(0, len(n)):
  159. c = n[i:i+1]
  160. if c.islower() and c.isalpha():
  161. f += "L"
  162. elif c.isupper() and c.isalpha():
  163. f += "U"
  164. elif c.isdigit():
  165. f += "N"
  166. else:
  167. f += "S"
  168. for i in range(0, len(f)):
  169. p = ""
  170. c = f[i:i+1]
  171. l = ""
  172. if i > 0:
  173. p = f[i-1:i]
  174. if i < len(n)-1:
  175. l = f[i+1:i+2]
  176. if c in "SN":
  177. if word:
  178. word = False
  179. out += "W"
  180. out+=c
  181. if c == "U" and word and not (i == len(n)-1 or p == "U"):
  182. out += "W"
  183. if c in "UL":
  184. if p in "SN" and l in "SN":
  185. out += "L"
  186. else:
  187. word = True
  188. if i == len(n)-1 and word:
  189. out += "W"
  190.  
  191. # Check user for special flags.
  192. spfl = []
  193. for sp in special:
  194. if source in sp[4] or s.subreddit.display_name in sp[4] or sp[4] == []:
  195. term = ""
  196. if sp[2] == "L":
  197. term = n.lower()
  198. elif sp[2] == "U":
  199. term = s.url.lower()
  200. elif sp[2] == "T":
  201. term = s.title.lower()
  202. elif sp[2] == "F":
  203. term = out.lower()
  204. elif sp[2] == "C":
  205. for c in u.get_comments(limit=1):
  206. term = c.body
  207. if not term == "":
  208. if (not sp[3]) and sp[0].lower() in term:
  209. spfl.append(sp[1])
  210. cnf += 1
  211. if sp[3] and sp[0].lower() == term:
  212. spfl.append(sp[1])
  213. cnf += 1
  214. if spfl == []:
  215. spfl.append("None")
  216.  
  217. cnf = round(interp(cnf, [-20,5.5],[0,100]))
  218.  
  219. # Append extrapolated data.
  220. userinfo.append(str(cnf))
  221. userinfo.append(", ".join(spfl))
  222. userinfo.append(out)
  223.  
  224. else:
  225. userinfo.append(True)
  226.  
  227. return userinfo
  228.  
  229. # Checks if user exists.
  230. def exists(n):
  231. try:
  232. r.get_redditor(n, fetch = True)
  233. return True
  234. except:
  235. return False
  236.  
  237. # Formats userdump data.
  238. def frmt(userdump):
  239. out = "[How to interpret these lists.](https://www.reddit.com/r/ScanBot/wiki/index#wiki_interpreting_a_user_list.)\n\nDeleted|Username|Account Age|Comment Karma|From|Confidence|Special Flags|Name Format\n:--|:--|:--|:--|:--|:--|:--|:--\n"
  240. for ui in userdump:
  241. out += str(ui[0]) + "|"
  242. for i in range(1,7):
  243. out += ui[i] + "|"
  244. out += ui[7] + "\n"
  245. return out
  246.  
  247. # Strips formatting from userdump data and converts it back to a nested list. Also updates account deletion statuses.
  248. def strp(udtext):
  249. out = []
  250. lines = udtext.split("\n")
  251. count = 0
  252. for l in lines:
  253. if count > 3 and count < len(lines) - 1:
  254. f_args = []
  255. args = l.split("|")
  256. f_args.append(not exists(args[1][3:]))
  257. for i in range(1,8):
  258. f_args.append(args[i])
  259. out.append(f_args)
  260.  
  261. count += 1
  262.  
  263. return out
  264.  
  265. while True:
  266. try:
  267. print("Initializing.")
  268.  
  269. # Log in and send user agent then update /r/ScanBot sidebar.
  270. r.login(username = "ScanBot", password = , disable_warning=True)
  271.  
  272. # Update /u/ScanBot sidebar.
  273. try:
  274. r.update_settings(r.get_subreddit("ScanBot"), description="\n\nUsername dump for accounts automatically determined to be bots, spammers, farmers and/or shills by /u/ScanBot. Novelty accounts will be ignored for the most part.\n\nScan list: \n\n/r/"+('\n\n/r/'.join(subreddits))+"\n\nwww.reddit.com/domain/"+'\n\nwww.reddit.com/domain/'.join(domains)+"\n\n-----\n\nMessage /u/GregTJ if you want your subreddit or domain removed from the scan list.")
  275. except Exception:
  276. Exception = ""
  277.  
  278. print("Initialized.")
  279. print("")
  280.  
  281. # Main loop
  282. while True:
  283.  
  284. # Load latest userlist and update user deletion status.
  285. UD_Post = next(me.get_submitted(limit = 1))
  286. flagged = strp(UD_Post.selftext)
  287.  
  288. # Create new post if current one is full.
  289. if len(flagged) >= 30:
  290. r.submit("ScanBot","Flagged Users "+time.strftime("%m-%d-%Y %H:%M"), text = "")
  291. UD_Post = next(me.get_submitted(limit = 1))
  292. flagged = strp(UD_Post.selftext)
  293.  
  294. print("Scanning:")
  295.  
  296. # Scan subreddits and domains.
  297. for sr in subreddits:
  298. print(" /r/"+sr)
  299. for s in r.get_subreddit(sr).get_new(limit=5):
  300. unq = True
  301. for ui in flagged:
  302. if s.author.name in ui[1]:
  303. unq = False
  304. if unq:
  305. uinf = flagger(s, False, "", sr)
  306. if not uinf[0]:
  307. flagged.append(uinf)
  308.  
  309. for do in domains:
  310. print(" /domain/"+do)
  311. for s in r.get_domain_listing(do, sort = 'new', limit=5):
  312. unq = True
  313. for ui in flagged:
  314. if s.author.name in ui[1]:
  315. unq = False
  316. if unq:
  317. uinf = flagger(s, False, "", do)
  318. if not uinf[0]:
  319. flagged.append(uinf)
  320.  
  321. print("")
  322. print("Replying to new username mentions.")
  323.  
  324. # Reply to analysis requests.
  325. for um in filter(lambda x: x.new, r.get_mentions()):
  326. um.mark_as_read()
  327. if len(um.body.split(" ")) > 1 and exists(um.body.split(" ")[1][3:]):
  328. request = flagger(next(r.get_redditor(um.body.split(" ")[1][3:]).get_submitted(limit=1)), True, um.permalink, "request")
  329. flagged.append(request)
  330. verdict = ""
  331. cnf = int(request[5])
  332. if cnf < 20:
  333. verdict = "Probably not a bot."
  334. if cnf >= 20:
  335. verdict = "Slightly suspicious."
  336. if cnf > 49:
  337. verdict = "Very suspicious."
  338. if cnf > 92:
  339. verdict = "Probably a bot."
  340. request[0] = "N/A"
  341. um.reply(frmt([request]) +"\n\nVerdict: "+verdict)
  342.  
  343. # Upload data.
  344. UD_Post.edit(frmt(flagged))
  345. UD_Post.approve()
  346.  
  347. print("")
  348. print("Uploaded new data.")
  349. print("")
  350. except:
  351. print("Fatal error, restarting.")
Advertisement
Add Comment
Please, Sign In to add comment