Advertisement
TShiva

gos_spider

Jun 8th, 2017
267
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.51 KB | None | 0 0
  1. import requests
  2. import re
  3. import platform
  4. from bs4 import BeautifulSoup
  5. from selenium import webdriver
  6.  
  7. accepted = set()
  8.  
  9. browser = webdriver.PhantomJS()
  10. browser.get("https://star.fintender.ru/Search/Index/?id=6f571c46-c6fb-4454-af68-32f5831ca7f9&backinfo=1|l0116200007917004246-1")
  11. soup = BeautifulSoup(browser.page_source, "html.parser")
  12.  
  13. print(soup)
  14. print(soup.findAll('table'))
  15. '''def request_response(url):#Посылаем запрос
  16.  
  17.    headers = {
  18.        'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'
  19.    }
  20.    response = requests.get(url, headers=headers)
  21.    return response.text
  22.  
  23. def buyer_parse(url):#Собираем инфу о покупателеpip
  24.    text = request_response(url)
  25.    soup = BeautifulSoup(text,"lxml")
  26.    customer_list = soup.findAll('span','customer')
  27.    i=0
  28.    while i < 3:
  29.        print(customer_list[i].text)
  30.        i+=1
  31.  
  32. def link_creater(htmlpage):
  33.    base='https://star.fintender.ru'
  34.    file = open(str(htmlpage)+'.html','r',encoding='utf-8').read()
  35.    links = re.findall(r'(/Search/GetActual/(\w|\?|\=|\-)+)',file)
  36.    my_links=set()
  37.    for link in links:
  38.        my_links.add(link[0])
  39.    for link in my_links:
  40.        if base+link not in accepted:
  41.            print(base+link)
  42.            buyer_parse(base+link)
  43.            accepted.add(base+link)
  44.            print('    ')
  45.        else:
  46.            print('repeat')
  47. htmlpage=1
  48. while htmlpage < 16:
  49.    link_creater(htmlpage)
  50.    htmlpage+=1'''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement