Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- import scrapy
- from scrapy.selector import Selector
- from testtest.items import TesttestItem
- import datetime
- from selenium import webdriver
- from bs4 import BeautifulSoup
- from HTMLParser import HTMLParser
- import re
- import time
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- class MySpider(scrapy.Spider):
- name = "gardeningTips"
- allowed_domains = ['https://www.shodan.io']
- start_urls = ['https://account.shodan.io/login']
- def parse(self, response):
- url='https://account.shodan.io/login'
- browser = webdriver.Firefox()
- usernameStr = 'daveyman321'
- passwordStr = '2ZNf+Uy}'
- browser.get(url)
- # fill in username and hit the next button
- username = browser.find_element_by_name('username')
- username.send_keys(usernameStr)
- #nextButton = browser.find_element_by_name('login_submit')
- #nextButton.click()
- # wait for transition then continue to fill items
- #password = WebDriverWait(browser, 10).until(
- # EC.presence_of_element_located((By.NAME, "password")))
- #password.send_keys(passwordStr)
- password = browser.find_element_by_name('password')
- password.send_keys(passwordStr)
- signInButton = browser.find_element_by_name('login_submit')
- signInButton.click()
- browser.get("https://www.shodan.io/search?query=Us+country%3A%22US%22+city%3A%22Los+Angeles%22")
- html = browser.page_source
- soup = BeautifulSoup(html, 'html.parser')
- sizeTagz = soup.findAll('div',{"class":"search-result"})
- specialTagz = soup.findAll('div',{"class":"ip"})
- for n in range(1000):
- html = browser.page_source
- soup = BeautifulSoup(html, 'html.parser')
- sizeTagz = soup.findAll('div',{"class":"search-result"})
- specialTagz = soup.findAll('div',{"class":"ip"})
- for a in range(len(sizeTagz)):
- y = specialTagz[a].get_text()
- y = y.replace(' ', '')
- y = y.strip('\n')
- y = y.replace('\n','')
- yield {
- "special":y,
- }
- atags = soup.find('div', {'class': 'pagination'}).findAll('a')
- for i in atags:
- link = i.get('href')
- browser.get("https://www.shodan.io" + link)
- time.sleep(3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement