Advertisement
Guest User

Untitled

a guest
Sep 24th, 2017
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.35 KB | None | 0 0
  1. import scrapy
  2. import scrapy
  3. from scrapy.selector import Selector
  4. from testtest.items import TesttestItem
  5. import datetime
  6. from selenium import webdriver
  7. from bs4 import BeautifulSoup
  8. from HTMLParser import HTMLParser
  9. import re
  10. import time
  11. from selenium.webdriver.common.by import By
  12. from selenium.webdriver.support.ui import WebDriverWait
  13. from selenium.webdriver.support import expected_conditions as EC
  14.  
  15.  
  16. class MySpider(scrapy.Spider):
  17. name = "gardeningTips"
  18.  
  19.  
  20.  
  21. allowed_domains = ['https://www.shodan.io']
  22. start_urls = ['https://account.shodan.io/login']
  23.  
  24. def parse(self, response):
  25.  
  26.  
  27. url='https://account.shodan.io/login'
  28. browser = webdriver.Firefox()
  29.  
  30.  
  31.  
  32. usernameStr = 'daveyman321'
  33. passwordStr = '2ZNf+Uy}'
  34.  
  35.  
  36. browser.get(url)
  37.  
  38. # fill in username and hit the next button
  39.  
  40. username = browser.find_element_by_name('username')
  41. username.send_keys(usernameStr)
  42.  
  43. #nextButton = browser.find_element_by_name('login_submit')
  44. #nextButton.click()
  45.  
  46. # wait for transition then continue to fill items
  47.  
  48. #password = WebDriverWait(browser, 10).until(
  49. # EC.presence_of_element_located((By.NAME, "password")))
  50.  
  51. #password.send_keys(passwordStr)
  52. password = browser.find_element_by_name('password')
  53. password.send_keys(passwordStr)
  54.  
  55.  
  56. signInButton = browser.find_element_by_name('login_submit')
  57. signInButton.click()
  58.  
  59.  
  60. browser.get("https://www.shodan.io/search?query=Us+country%3A%22US%22+city%3A%22Los+Angeles%22")
  61.  
  62. html = browser.page_source
  63. soup = BeautifulSoup(html, 'html.parser')
  64.  
  65. sizeTagz = soup.findAll('div',{"class":"search-result"})
  66. specialTagz = soup.findAll('div',{"class":"ip"})
  67.  
  68. for n in range(1000):
  69.  
  70. html = browser.page_source
  71. soup = BeautifulSoup(html, 'html.parser')
  72.  
  73. sizeTagz = soup.findAll('div',{"class":"search-result"})
  74. specialTagz = soup.findAll('div',{"class":"ip"})
  75. for a in range(len(sizeTagz)):
  76. y = specialTagz[a].get_text()
  77. y = y.replace(' ', '')
  78. y = y.strip('\n')
  79. y = y.replace('\n','')
  80.  
  81.  
  82.  
  83.  
  84.  
  85. yield {
  86.  
  87. "special":y,
  88. }
  89. atags = soup.find('div', {'class': 'pagination'}).findAll('a')
  90. for i in atags:
  91. link = i.get('href')
  92. browser.get("https://www.shodan.io" + link)
  93. time.sleep(3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement