Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import sys
- from bs4 import BeautifulSoup
- from bs4 import NavigableString
- import re
- import urllib2
- import json
- url= 'http://www.zvjs.sk/index.php?fa_obj&type=fa&id=110478'
- page = urllib2.urlopen(url)
- soup = BeautifulSoup(page.read(), from_encoding="windows-1252")
- evcislo = soup.find_all('td')[3].text
- evcisloval = evcislo.strip()
- prinalezi = soup.find_all('td')[5].text
- prinalezival = prinalezi.strip()
- popisfaplnenia = soup.find_all('td')[7].text
- hodnotafaplnenia = soup.find_all('td')[9].text
- hodnotafaplneniaval = hodnotafaplnenia.encode("utf-8")
- datumdfa = soup.find_all('td')[11].text
- datumzfa = soup.find_all('td')[13].text
- formazaplatenia = soup.find_all('td')[15].text
- if not formazaplatenia:
- formazaplatenia="NULL"
- obchmenonazov = soup.find_all('td')[17].text
- sidlofirmy = soup.find_all('td')[19].text
- pravnaforma = soup.find_all('td')[21].text
- sudregistracie = soup.find_all('td')[23].text
- ico = soup.find_all('td')[25].text
- dic = soup.find_all('td')[27].text
- cislouctu = soup.find_all('td')[29].text
- doplnujuceinfo = soup.find_all('td')[31].text
- for link in soup.find_all("a", limit=1):
- urlfa = (link.get('href'))
- print urlfa, "\n"
- print evcisloval, prinalezival, popisfaplnenia, hodnotafaplnenia, datumdfa, datumzfa, formazaplatenia, obchmenonazov, sidlofirmy, pravnaforma, sudregistracie, ico, dic, cislouctu, doplnujuceinfo, hodnotafaplneniaval
- print "####################################"
- print json.dumps({
- "Evidencne cislo Faktury" : evcisloval,
- "Prinalezi k" : prinalezival,
- "Popis fakturovaneho plnenia" : popisfaplnenia,
- "Hodnota fakturovaneho plnenia s DPH" : hodnotafaplnenia,
- "Datum dorucenia faktury" : datumdfa,
- "Datum zaplatenia faktury" : datumzfa,
- "Forma Zaplatenia" : formazaplatenia,
- "Obchodne meno - Nazov Dodavatela" : obchmenonazov,
- "Sidlo pravnickej osoby - Miesto podnikania" : sidlofirmy,
- "Pravna forma" : pravnaforma,
- "Sud registracie" : sudregistracie,
- "ICO" : ico,
- "DIC" : dic,
- "Cislo Uctu" : cislouctu,
- "Doplnuja informacia" : doplnujuceinfo,
- "URL Stranky Faktury" : url,
- "URL Faktura PDF" : "http://www.zvjs.sk" + urlfa
- }, sort_keys=False, encoding="windows-1250", ensure_ascii=False, separators=(',', ':'))
- print "####################################"
- output = "{""\"Evidencne cislo Faktury\"" ":" "\""+evcisloval.encode("utf-8") + "\"," "\"Prinalezi k\"" ":" "\""+prinalezival.encode("utf-8") + "\"," "\"Popis fakturovaneho plnenia\"" ":" "\""+popisfaplnenia.encode("utf-8") + "\"," "\"Hodnota fakturovaneho plnenia s DPH\"" ":" "\""+prinalezival.encode("utf-8") + "\"," "\"Datum dorucenia faktury\"" ":" "\""+datumdfa.encode("utf-8") + "\"," "\"Datum zaplatenia faktury\"" ":" "\""+datumzfa.encode("utf-8") + "\"," "\"Forma Zaplatenia\"" ":" "\""+formazaplatenia.encode("utf-8") + "\"," "\"Obchodne meno - Nazov Dodavatela\"" ":" "\""+obchmenonazov.encode("utf-8") + "\"," "\"Sidlo pravnickej osoby - Miesto podnikania\"" ":" "\""+sidlofirmy.encode("utf-8") + "\"," "\"Pravna forma\"" ":" "\""+pravnaforma.encode("utf-8") + "\"," "\"Sud registracie\"" ":" "\""+sudregistracie.encode("utf-8") + "\"," "\"ICO\"" ":" "\""+ico.encode("utf-8") + "\"," "\"DIC\"" ":" "\""+dic.encode("utf-8") + "\"," "\"Cislo Uctu\"" ":" "\""+cislouctu.encode("utf-8") + "\"," "\"Doplnuja informacia\"" ":" "\""+doplnujuceinfo.encode("utf-8") + "\"," "\"URL Stranky Faktury\"" ":" "\""+url.encode("utf-8") + "\"," "\"URL Faktura PDF\"" ":" "\"""http://www.zvjs.sk" + urlfa.encode("utf-8") +"\"""}" +"\n"
- outfile = open('/home/pi/scrapy/1.txt', 'a')
- outfile.write(output)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement