Advertisement
Typhoon

Get ORSR data

Aug 26th, 2015
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.52 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3.  
  4. # Install BS4 with pip3 : sudo pip3 install -U beautifulsoup4
  5. from bs4 import BeautifulSoup
  6. # Install Requests with pip3 : sudo pip3 install -U requests
  7. import urllib.request as urllib2
  8.  
  9. # Starting URL ID
  10. urlid = 8000
  11. # Ending URL ID
  12. end_urlid = urlid + 20
  13.  
  14. # Iterate through URL ID and scrape data
  15. while urlid <= end_urlid:
  16.     # Define URL for data scraping
  17.     check_url = ("http://www.orsr.sk/vypis.asp?ID=" + str(urlid) + "&SID=3")
  18.     # Open URL in URLlib
  19.     check_page = urllib2.urlopen(check_url)
  20.     # Read and parse html data
  21.     check_soup = BeautifulSoup(check_page.read(), "html.parser")
  22.     print ("\n###############\nURL :" , check_url)
  23.  
  24.     try:
  25.         # Get values with BS
  26.         obch_meno = check_soup.find_all('td')[11].find('span').text[2:]
  27.         sidlo_1 = check_soup.find_all('td')[15].find_all('span')[0].text[2:]
  28.         sidlo_2 = check_soup.find_all('td')[15].find_all('span')[1].text[2:]
  29.         sidlo_3 = check_soup.find_all('td')[15].find_all('span')[2].text[2:]
  30.         sidlo_4 = check_soup.find_all('td')[15].find_all('span')[3].text[2:].replace(" ","")
  31.         ico = check_soup.find_all('td')[19].find('span').text[2:].replace(" ","")
  32.        
  33.         # Print values
  34.         print(obch_meno)
  35.         print(ico)
  36.         # print(sidlo_1)
  37.         # print(sidlo_2)
  38.         # print(sidlo_3)
  39.         # print(sidlo_4)
  40.         print(sidlo_1, sidlo_2, sidlo_3, sidlo_4)
  41.  
  42.     except:
  43.         print("Neplatne ID")
  44.  
  45.     urlid = urlid + 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement