Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import sys
- from bs4 import BeautifulSoup
- from bs4 import NavigableString
- import re
- import urllib2
- import json
- number = 119000
- while number<119500:
- print number
- number = str(number)
- url= 'http://www.zvjs.sk/index.php?fa_obj&type=fa&id=' + number
- page = urllib2.urlopen(url)
- soup = BeautifulSoup(page.read(), from_encoding="windows-1252")
- evcislo = soup.find_all('td')[3].text
- evcisloval = evcislo.strip()
- prinalezi = soup.find_all('td')[5].text
- prinalezival = prinalezi.strip()
- popisfaplnenia = soup.find_all('td')[7].text
- hodnotafaplnenia = soup.find_all('td')[9].text
- hodnotafaplneniaval = hodnotafaplnenia.encode("utf-8")
- datumdfa = soup.find_all('td')[11].text
- datumzfa = soup.find_all('td')[13].text
- formazaplatenia = soup.find_all('td')[15].text
- if not formazaplatenia:
- formazaplatenia="NULL"
- obchmenonazov = soup.find_all('td')[17].text
- sidlofirmy = soup.find_all('td')[19].text
- pravnaforma = soup.find_all('td')[21].text
- sudregistracie = soup.find_all('td')[23].text
- ico = soup.find_all('td')[25].text
- dic = soup.find_all('td')[27].text
- cislouctu = soup.find_all('td')[29].text
- doplnujuceinfo = soup.find_all('td')[31].text
- for link in soup.find_all("a", limit=1):
- urlfa = (link.get('href'))
- output = "{""\"Evidencne cislo Faktury\"" ":" "\""+evcisloval.encode("utf-8") + "\"," "\"Prinalezi k\"" ":" "\""+prinalezival.encode$
- outfile = open('/home/pi/scrapy/1.txt', 'a')
- outfile.write(output)
- number = int(number)
- number = number +1
- print number
- print output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement