Guest User

Untitled

a guest
May 23rd, 2016
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.21 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. # qtcreeper.py
  4. # https://github.com/anonimousse12345/qtcreeper
  5. # Based on interpals-autovisit.py by Hexalyse, https://github.com/Hexalyse
  6. # Requires python 2.7 and the python requests module
  7.  
  8. def show_exception_and_exit(exc_type, exc_value, tb):
  9. if exc_type != KeyboardInterrupt:
  10. print "*** ERROR ***\n"
  11. import traceback
  12. traceback.print_exception(exc_type, exc_value, tb)
  13. raw_input("\nPress key to exit.")
  14. sys.exit(-1)
  15.  
  16. import sys
  17. sys.excepthook = show_exception_and_exit
  18.  
  19. import os
  20. import json
  21. import random
  22. import time
  23. import requests
  24. import re
  25.  
  26. from lxml.html import fromstring
  27.  
  28.  
  29. # Number of users shown by interpals per search page
  30. MATCHES_PER_SEARCH = 20
  31.  
  32. # Masquerade as a random one of these web browsers
  33. USER_AGENTS = [
  34. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/601.5.17 (KHTML, like Gecko) Version/9.1 Safari/601.5.17",
  35. "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
  36. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
  37. "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0"
  38. ]
  39.  
  40. CONTINENTS = [
  41. ("AF", "Africa"),
  42. ("AS", "Asia"),
  43. ("EU", "Europe"),
  44. ("NA", "North America"),
  45. ("OC", "Oceania"),
  46. ("SA", "South America")
  47. ]
  48.  
  49. DEFAULT_CONFIG = {
  50. "continents" : [ x[0] for x in CONTINENTS ],
  51. "countries" : [],
  52. "keywords" : [],
  53. "age1" : 40,
  54. "age2" : 80,
  55. "sex" : ["MALE"],
  56. "email" : "",
  57. "password" : "",
  58. "creepspeed" : 1,
  59. "useragent" : random.choice(USER_AGENTS)
  60. }
  61.  
  62.  
  63. DATA_DIR = os.path.join( os.path.expanduser("~"), ".qtcreeper" )
  64. CONFIG_FILE = os.path.join( DATA_DIR, "config.json" )
  65. USERS_VISITED_FILE = os.path.join( DATA_DIR, "users_visited.txt" )
  66.  
  67. if not os.path.exists(DATA_DIR):
  68. os.makedirs(DATA_DIR)
  69.  
  70.  
  71. def get_number(promptText):
  72. print promptText
  73.  
  74. while True:
  75. try:
  76. r = int(raw_input("> "))
  77. return r
  78. except ValueError:
  79. pass
  80.  
  81. print "Invalid selection, try again!"
  82.  
  83. def get_number_from_list(promptText, allowedOptions):
  84. print promptText
  85.  
  86. while True:
  87. try:
  88. r = int(raw_input("> "))
  89.  
  90. if r in allowedOptions:
  91. return r
  92. except ValueError:
  93. pass
  94.  
  95. print "Invalid selection, try again!"
  96.  
  97. def get_iso_codes(promptText, allowedOptions = None):
  98. print promptText
  99.  
  100. while True:
  101. r = raw_input("> ")
  102.  
  103. if r == "":
  104. return []
  105.  
  106. isoCodes = [x.strip().upper() for x in r.split(",")]
  107.  
  108. fail = False
  109.  
  110. # We just check they are two characters
  111. for isoCode in isoCodes:
  112. if len(isoCode) != 2 or (allowedOptions and isoCode not in allowedOptions):
  113. fail = True
  114. break
  115.  
  116. if not fail:
  117. return isoCodes
  118.  
  119. print "Input invalid, try again!"
  120.  
  121. def get_word_list(promptText):
  122. print promptText
  123.  
  124. r = raw_input("> ")
  125.  
  126. if r == "":
  127. return []
  128.  
  129. words = [x.strip().lower() for x in r.split(",")]
  130. return [x for x in words if len(x) > 0]
  131.  
  132.  
  133. config = {}
  134.  
  135. print "Welcome to qtcreeper!"
  136. print "--> https://github.com/anonimousse12345/qtcreeper"
  137.  
  138. if os.path.exists(CONFIG_FILE):
  139. with open(CONFIG_FILE, "r") as f:
  140. config = json.loads(f.read())
  141.  
  142. # Ensure any later added default keys exist
  143. for k, v in DEFAULT_CONFIG.iteritems():
  144. if not k in config:
  145. config[k] = v
  146. else:
  147. # Default config
  148. config = DEFAULT_CONFIG
  149.  
  150.  
  151. usersVisited = set()
  152.  
  153. # Load users already visited
  154. if os.path.exists(USERS_VISITED_FILE):
  155. with open(USERS_VISITED_FILE, "r") as f:
  156. for line in f:
  157. usersVisited.add(line.strip())
  158.  
  159.  
  160. while True:
  161. command = get_number_from_list("\nPlease select an option and press enter:"
  162. + "\n 1 - set account email and password (%s)" % (config["email"] or "NOT SET!")
  163. + "\n 2 - set gender and age range (%s, %d to %d)" % (",".join(config["sex"]), config["age1"], config["age2"])
  164. + "\n 3 - set continents (%s)" % (",".join(config["continents"]))
  165. + "\n 4 - set countries (%s)" % (",".join(config["countries"] or ["All"]))
  166. + "\n 5 - set keywords (%s)" % (",".join(config["keywords"] or ["None"]))
  167. + "\n 6 - set creeper speed (%d)" % (config["creepspeed"])
  168. + "\n 7 - clear users already visited file (%d users visited)" % len(usersVisited)
  169. + "\n 8 - run creeper!"
  170. ,[1,2,3,4,5,6,7,8])
  171.  
  172. if command == 1:
  173. print "\nEnter email address:"
  174. config["email"] = raw_input("> ").strip().lower()
  175. print "\nEnter password:"
  176. config["password"] = raw_input("> ")
  177.  
  178. elif command == 2:
  179. # Get genders
  180. genders = get_number_from_list("\nWhat genders to crawl? 1 = female, 2 = male, 3 = both", [1,2,3])
  181. config["sex"] = {1 : ["FEMALE"], 2 : ["MALE"], 3 : ["MALE", "FEMALE"]}[genders]
  182.  
  183. # Get age range
  184. config["age1"] = get_number("\nMinimum age?")
  185. config["age2"] = get_number("\nMaximum age?")
  186.  
  187. elif command == 3:
  188. # Continents
  189. config["continents"] = get_iso_codes("\nEnter a comma separated list of any of these continent codes, or nothing for all continents:\n"
  190. + "\n".join([(x[0] + " - " + x[1] + " ") for x in CONTINENTS]), [ x[0] for x in CONTINENTS ])
  191.  
  192. if len(config["continents"]) == 0:
  193. config["continents"] = [ x[0] for x in CONTINENTS ]
  194.  
  195. elif command == 4:
  196. # Countries
  197. config["countries"] = get_iso_codes("\nEnter a comma separated list of two letter country codes, or nothing for all countries:")
  198.  
  199. elif command == 5:
  200. # Keywords
  201. config["keywords"] = get_word_list("\nEnter a comma separated list of keywords, or nothing to clear:")
  202.  
  203. elif command == 6:
  204. # Set creep speed
  205. config["creepspeed"] = get_number_from_list("\nEnter a speed between 1 and 10 (1 = slow and realistic, 10 = stupid fast):",
  206. range(1,11))
  207.  
  208. elif command == 7:
  209. # Clear users visited
  210. if os.path.exists(USERS_VISITED_FILE):
  211. os.remove(USERS_VISITED_FILE)
  212. usersVisited = set()
  213.  
  214. elif command == 8:
  215. if config["email"] == "" or config["password"] == "":
  216. print "\nSet email and password first!"
  217. else:
  218. print "\nRunning creeper..."
  219. break
  220.  
  221. # Save any changes
  222. with open(CONFIG_FILE, "w") as f:
  223. f.write( json.dumps(config, indent=4) )
  224.  
  225. print "\n* Changes saved..."
  226.  
  227.  
  228. # File to log users already visited
  229. usersVisitedFp = open(USERS_VISITED_FILE, "a")
  230.  
  231. def record_user_visited(username):
  232. username = username.strip()
  233. usersVisited.add(username)
  234. usersVisitedFp.write(username + "\n")
  235. usersVisitedFp.flush()
  236.  
  237.  
  238. # Main crawler code below
  239.  
  240.  
  241. # Short pause between regular pageloads
  242. def default_wait():
  243. print "\nWaiting..."
  244. time.sleep(random.uniform(2,5))
  245.  
  246. # Longer(?) pause between user views
  247. def user_view_wait():
  248. sleepTime = random.uniform(5,15) / config["creepspeed"]
  249. print "\nWaiting %f seconds..." % sleepTime
  250. time.sleep(sleepTime)
  251.  
  252.  
  253. # Start a session
  254. client = requests.Session()
  255. client.headers["Host"] = "www.interpals.net"
  256. client.headers["User-Agent"] = config["useragent"]
  257.  
  258. print "\nVisiting main page..."
  259.  
  260. r = client.get("https://www.interpals.net/")
  261. client.headers["Referer"] = "https://www.interpals.net/"
  262.  
  263. tree = fromstring(r.text)
  264. csrf_token = tree.xpath('//meta[@name="csrf-token"]/@content')[0]
  265.  
  266. print "\n* Got CSRF Token: %s" % csrf_token
  267.  
  268. default_wait()
  269.  
  270. print "\nAttempting login..."
  271.  
  272. params = {
  273. "username": config["email"],
  274. "auto_login": "1",
  275. "password": config["password"],
  276. "csrf_token" : csrf_token
  277. }
  278.  
  279. r = client.post("https://www.interpals.net/app/auth/login", data=params)
  280. client.headers["Referer"] = "https://www.interpals.net/account.php"
  281.  
  282. #print "\n", r.request.headers
  283.  
  284. if r.text.find("My Profile") == -1:
  285. print "\nError: login failed. Either email/password incorrect or qtcreeper needs updating."
  286.  
  287. #with open("debug.txt", "w") as f:
  288. # f.write(r.text)
  289.  
  290. exit(1)
  291. else:
  292. print "\n* Successfully logged in!"
  293.  
  294. default_wait()
  295.  
  296. print "\nVisiting search page..."
  297. r = client.get("https://www.interpals.net/app/search")
  298. client.headers["Referer"] = "https://www.interpals.net/app/search"
  299.  
  300. default_wait()
  301.  
  302.  
  303. def build_search_url(previousPageNum, desiredPageNum, onlineOnly):
  304. # Age
  305. url = "https://www.interpals.net/app/search?age1=%d&age2=%d" % (config["age1"], config["age2"])
  306.  
  307. # Gender(s)
  308. for i in range(0, len(config["sex"])):
  309. url += "&sex[%d]=%s" % (i, config["sex"][i])
  310.  
  311. # Sorting method
  312. url += "&sort=last_login"
  313.  
  314. # Continents
  315. for i in range(0, len(config["continents"])):
  316. url += "&continents[%d]=%s" % (i, config["continents"][i])
  317.  
  318. # "Looking for"
  319. url += "&lfor[0]=lfor_email&lfor[1]=lfor_snail&lfor[2]=lfor_langex&lfor[3]=lfor_friend&lfor[4]=lfor_flirt&lfor[5]=lfor_relation"
  320.  
  321. # First offset, the previous offset/page we were on
  322. url += "&offset=%d" % (previousPageNum * MATCHES_PER_SEARCH)
  323.  
  324. # Keywords?
  325. if len(config["keywords"]) > 0:
  326. url += "&keywords=" + "+".join(config["keywords"])
  327.  
  328. # Online?
  329. if onlineOnly:
  330. url += "&online=on"
  331.  
  332. # Countries (some strange variable length array)
  333. url += "&countries[0]=---"
  334.  
  335. for i in range(0, len(config["countries"])):
  336. url += "&countries[%d]=%s" % (i+1, config["countries"][i])
  337.  
  338. if len(config["countries"]) > 0:
  339. url += "&countries[%d]=---" % (len(config["countries"])+1)
  340.  
  341. # Second offset, the actual offset/page to get
  342. url += "&offset=%d" % (desiredPageNum * MATCHES_PER_SEARCH)
  343.  
  344. return url
  345.  
  346.  
  347. currentSearchPage = 0
  348. onlineOnly = True # online only by default, but disabled automatically if no users found???
  349. totalViewedCount = 0
  350. totalSkippedCount = 0
  351. ranOutOfUsers = False
  352.  
  353. while True:
  354. # Query search page
  355.  
  356. userSearchUrl = build_search_url(max(0,currentSearchPage-1), currentSearchPage, onlineOnly)
  357. print "\nQuerying search page %d using search URL: %s" % (currentSearchPage, userSearchUrl)
  358.  
  359. r = client.get(userSearchUrl)
  360. client.headers["Referer"] = userSearchUrl
  361.  
  362. # Extract usernames
  363. usernames = re.findall(r'Report ([a-zA-Z0-9\-_]+) to moderators', r.text, re.M)
  364. print "\nFound %d users on search page %d." % (len(usernames), currentSearchPage)
  365.  
  366. default_wait()
  367.  
  368. # No users were found?
  369. if len(usernames) == 0:
  370. print "\n!!!!!!! NO MORE USERS FOUND !!!!!!!"
  371. print "\nMay have reached end of users. Will now start again including offline users in search."
  372. print "\n(Otherwise, try using broader search terms.)"
  373. currentSearchPage = 0
  374. onlineOnly = False
  375. ranOutOfUsers = True
  376. default_wait()
  377. continue
  378.  
  379. # Through users
  380. viewedCount = 0
  381. skippedCount = 0
  382.  
  383. for username in usernames:
  384. if username not in usersVisited:
  385. print "\nVisiting user %s" % username
  386. client.get("https://www.interpals.net/" + username)
  387.  
  388. record_user_visited(username)
  389. viewedCount += 1
  390. totalViewedCount += 1
  391.  
  392. user_view_wait()
  393. else:
  394. print "\nAlready visited user %s, skipping..." % username
  395. skippedCount += 1
  396. totalSkippedCount += 1
  397.  
  398. print "\n*** RESULTS SO FAR ***\n"
  399. print " Search page #%d" % currentSearchPage
  400. print " Visited %d new users this page, %d were already visited." % (viewedCount, skippedCount)
  401. print " Visited %d new users in total, %d were already visited." % (totalViewedCount, totalSkippedCount)
  402.  
  403. if ranOutOfUsers:
  404. print "\n!!! WARNING: At one point the script ran out of online users, and started including offline users."
  405.  
  406. # Next page of search
  407. currentSearchPage += 1
  408. default_wait()
  409.  
  410.  
  411. # Close users visited file??
  412. usersVisitedFp.close()
Add Comment
Please, Sign In to add comment