Advertisement
Typhoon

Scrape Z*** Faktury Loop

Feb 3rd, 2015
276
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.80 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4. from bs4 import BeautifulSoup
  5. from bs4 import NavigableString
  6. import re
  7. import urllib2
  8. import json
  9.  
  10. number = 119000
  11. while number<119500:
  12.         print number
  13.         number = str(number)
  14.         url= 'http://www.zvjs.sk/index.php?fa_obj&type=fa&id=' + number
  15.         page = urllib2.urlopen(url)
  16.         soup = BeautifulSoup(page.read(), from_encoding="windows-1252")
  17.  
  18.         evcislo = soup.find_all('td')[3].text
  19.         evcisloval = evcislo.strip()
  20.         prinalezi = soup.find_all('td')[5].text
  21.         prinalezival = prinalezi.strip()
  22.         popisfaplnenia = soup.find_all('td')[7].text
  23.         hodnotafaplnenia = soup.find_all('td')[9].text
  24.         hodnotafaplneniaval = hodnotafaplnenia.encode("utf-8")
  25.         datumdfa = soup.find_all('td')[11].text
  26.         datumzfa = soup.find_all('td')[13].text
  27.         formazaplatenia = soup.find_all('td')[15].text
  28.         if not formazaplatenia:
  29.                 formazaplatenia="NULL"
  30.         obchmenonazov = soup.find_all('td')[17].text
  31.         sidlofirmy = soup.find_all('td')[19].text
  32.         pravnaforma = soup.find_all('td')[21].text
  33.         sudregistracie = soup.find_all('td')[23].text
  34.         ico = soup.find_all('td')[25].text
  35.         dic = soup.find_all('td')[27].text
  36.         cislouctu = soup.find_all('td')[29].text
  37.         doplnujuceinfo = soup.find_all('td')[31].text
  38.         for link in soup.find_all("a", limit=1):
  39.                 urlfa = (link.get('href'))
  40.  
  41.         output = "{""\"Evidencne cislo Faktury\"" ":" "\""+evcisloval.encode("utf-8") + "\"," "\"Prinalezi k\"" ":" "\""+prinalezival.encode$
  42.         outfile = open('/home/pi/scrapy/1.txt', 'a')
  43.         outfile.write(output)
  44.         number = int(number)
  45.         number = number +1
  46.         print number
  47.         print output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement