Advertisement
m3t4l0v3r

Untitled

Apr 19th, 2021
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.03 KB | None | 0 0
  1.  
  2. #coding: utf-8
  3. import requests,time
  4. from stem import Signal
  5. from stem.control import Controller
  6. from twocaptcha import TwoCaptcha
  7. from selenium.webdriver.common.keys import Keys
  8. from selenium.webdriver.common.by import By
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.support import expected_conditions as EC
  11. import requests,io,sys,time,os
  12. from selenium.webdriver.common.action_chains import ActionChains
  13. from selenium.webdriver.chrome.options import Options
  14. from selenium import webdriver
  15. import time,os
  16. import json
  17. import random
  18. from bs4 import BeautifulSoup
  19.  
  20. PORT_1 = 9050
  21. PORT_2 = 9051
  22.  
  23. def iper():
  24. global PORT_1
  25. os.system("curl -sx socks5://127.0.0.1:%s ifconfig.co | grep -oP '(?<=Your IP</span>: ).*(?=</span>)'"%PORT_1)
  26. def newId():
  27. os.system("killall -HUP tor")
  28. time.sleep(10)
  29.  
  30. def grb(cdo):
  31. with io.open("ax.html","w+",encoding="utf-8") as ox:
  32. ox.write(cdo)
  33. soup = BeautifulSoup(cdo,"html.parser")
  34. cd = str(list(soup.find_all("script"))[-1]).split("\n")
  35. cd = cd[-20:-1]
  36. gt = ""
  37. chal = ""
  38. for line in cd:
  39. if "gt" in line:
  40. gt = line.split(":")[1].replace("'",'').replace("\n","").replace(",","").replace(" ","")
  41. elif "challenge" in line:
  42. chal = line.split(":")[1].replace("'",'').replace("\n","").replace(",","").replace(" ","")
  43. if (gt == "") or (chal == ""):
  44. return(False,False)
  45. return(gt,chal)
  46.  
  47. def cpres(gt,chl,ur,api):
  48. global PORT_1
  49. #nu = requests.get("https://api-na.geetest.com/reset.php?gt=%s&challenge=%s&lang=fr-fr&pt=0&client_type=web&callback=geetest_1618213037183"%(gt,chl))
  50. #nch = json.loads(str(nu.text).replace("geetest_1618213037183(","")[:-1])
  51. #nch = nch["data"]["challenge"]
  52. #driver.refresh()
  53. sv = "api-na.geetest.com"
  54. prx = {"http":"socks5://localhost:%s"%PORT_1,"https":"socks5://localhost:%s"%PORT_1}
  55. x = requests.get("http://2captcha.com/in.php?key=%s&method=geetest&gt=%s&challenge=%s&pageurl=%s&api_server=%s"%(api,gt,chl,ur,sv),proxies=prx)
  56. _id = x.text.split("|")[1].replace("\n","")
  57. while True:
  58. try:
  59. xx = requests.get("http://2captcha.com/res.php?key=%s&action=get&id=%s"%(api,_id),timeout=20,proxies=prx)
  60. if "OK" in xx.text:
  61. return(xx.text.replace("OK|",""))
  62. time.sleep(5)
  63. except Exception as e:
  64. return(False)
  65. chrome_options = Options()
  66. chrome_options.add_argument('--lang=fr')
  67. chrome_options.add_argument('--proxy-server=socks5://localhost:%s'%PORT_1)
  68. chrome_options.add_argument("--start-maximized")
  69. chrome_options.add_argument('--no-sandbox')
  70. driver = webdriver.Chrome(options=chrome_options)
  71. cde = ["67000","64000","68000","78000"]
  72. ap = "d2780fa5046f5b658915e0fae91085e1"
  73. cat = input("Category Number: ")
  74. i = 1
  75. s = 0
  76. px = -100
  77. iper()
  78. for zp in cde:
  79. i = 1
  80. s = 0
  81. px = -100
  82. while True:
  83. if i == px+1:
  84. print("\n[+] Scraping Completed For %s."%zp)
  85. break
  86. driver.get("https://www.leboncoin.fr/recherche?category=%s&locations=%s&owner_type=pro&page=%s"%(cat,zp,i))
  87. try:
  88. ux = driver.find_element_by_tag_name("iframe").get_attribute("src")
  89. driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
  90. if u"Vous avez été bloqué" in driver.page_source:
  91. driver.quit()
  92. print("[-] IP Ban .. Changing IP in 60s")
  93. newId()
  94. iper()
  95. chrome_options = Options()
  96. chrome_options.add_argument('--lang=fr')
  97. chrome_options.add_argument('--proxy-server=socks5://localhost:%s'%PORT_1)
  98. chrome_options.add_argument("--start-maximized")
  99. chrome_options.add_argument('--no-sandbox')
  100. driver = webdriver.Chrome(options=chrome_options)
  101. continue
  102. if u"On s'assure qu'on s'adresse bien à vous, et non pas à un robot." in driver.page_source:
  103. print("[+] Captcha Detected ..")
  104. hd = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36","accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9","accept-encoding": "gzip, deflate, br","accept-language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7"}
  105. rt = requests.get(ux,headers=hd)
  106. gt,chl = grb(rt.text)
  107. rsp = ""
  108. while True:
  109. rsp = cpres(gt,chl,driver.current_url,ap)
  110. if rsp != False:
  111. break
  112. exed = 'geetestResponse = %s'%rsp
  113. driver.execute_script(exed)
  114. time.sleep(1)
  115. driver.execute_script("captchaCallback();")
  116. print("[+] Captcha Bypassed")
  117. time.sleep(10)
  118. driver.switch_to.default_content()
  119. raise ValueError
  120. else:
  121. driver.switch_to.default_content()
  122. raise ValueError
  123. except Exception as e:
  124. driver.switch_to.default_content()
  125. data = driver.find_element_by_xpath("//script[contains(text(),'logo') and contains(@type, 'json')]").get_attribute('text')
  126. if s == 0:
  127. px = json.loads(data)["props"]["pageProps"]["listingData"]["max_pages"]
  128. s = 1
  129. sys.stdout.write("[+] Scraped %s|%s \r"%(i,px))
  130. sys.stdout.flush()
  131. with io.open("output/%s-Page%s.json"%(zp,i),"w+",encoding="utf-8") as op:
  132. op.write(data)
  133. i = i + 1
  134.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement