Advertisement
karlicoss

goszakupki

Aug 15th, 2012
351
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.03 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. from BeautifulSoup import BeautifulSoup
  4. import sys
  5. import urllib
  6. import re
  7.  
  8. SEARCH_QUERY = "http://zakupki.gov.ru/pgz/public/action/search/simple/run?orderName={}"
  9. LINK_REGEX = r"/pgz/public/action/orders/info/common_info/show\?notificationId=(\d+)"
  10. NOTIFICATION_PAGE = "http://zakupki.gov.ru/pgz/public/action/orders/info/common_info/show?notificationId={}"
  11.  
  12. CHEAT_CANDIDATES = ['нa', 'нe', 'пo', 'нy']
  13.  
  14. def get_search_results(query):
  15.     link = SEARCH_QUERY.format(query)
  16.     soup = BeautifulSoup(urllib.urlopen(link))
  17.  
  18.     container = soup.find(id = "searchResultContainer")
  19.     items = container.findAll('a', {'class': "iceOutLnk"})
  20.     result = []
  21.     for item in items:
  22.         notification_id = re.search(LINK_REGEX, str(item)).group(1)
  23.         result.append(NOTIFICATION_PAGE.format(notification_id))
  24.     return result
  25.  
  26. result = []
  27. for query in CHEAT_CANDIDATES:
  28.     result.extend(get_search_results(query))
  29.  
  30. result = sorted(list(set(result))) # making links unique
  31.  
  32. for link in result:
  33.     print(link)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement