Advertisement
Typhoon

Scrape Z*** Faktury

Feb 3rd, 2015
271
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.54 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4. from bs4 import BeautifulSoup
  5. from bs4 import NavigableString
  6. import re
  7. import urllib2
  8. import json
  9.  
  10. url= 'http://www.zvjs.sk/index.php?fa_obj&type=fa&id=110478'
  11. page = urllib2.urlopen(url)
  12. soup = BeautifulSoup(page.read(), from_encoding="windows-1252")
  13.  
  14. evcislo = soup.find_all('td')[3].text
  15. evcisloval = evcislo.strip()
  16. prinalezi = soup.find_all('td')[5].text
  17. prinalezival = prinalezi.strip()
  18. popisfaplnenia = soup.find_all('td')[7].text
  19. hodnotafaplnenia = soup.find_all('td')[9].text
  20. hodnotafaplneniaval = hodnotafaplnenia.encode("utf-8")
  21. datumdfa = soup.find_all('td')[11].text
  22. datumzfa = soup.find_all('td')[13].text
  23. formazaplatenia = soup.find_all('td')[15].text
  24. if not formazaplatenia:
  25.     formazaplatenia="NULL"
  26. obchmenonazov = soup.find_all('td')[17].text
  27. sidlofirmy = soup.find_all('td')[19].text
  28. pravnaforma = soup.find_all('td')[21].text
  29. sudregistracie = soup.find_all('td')[23].text
  30. ico = soup.find_all('td')[25].text
  31. dic = soup.find_all('td')[27].text
  32. cislouctu = soup.find_all('td')[29].text
  33. doplnujuceinfo = soup.find_all('td')[31].text
  34. for link in soup.find_all("a", limit=1):
  35.     urlfa = (link.get('href'))
  36.  
  37. print urlfa, "\n"
  38.  
  39. print evcisloval, prinalezival, popisfaplnenia, hodnotafaplnenia, datumdfa, datumzfa, formazaplatenia, obchmenonazov, sidlofirmy, pravnaforma, sudregistracie, ico, dic, cislouctu, doplnujuceinfo, hodnotafaplneniaval
  40.  
  41. print "####################################"
  42.  
  43. print json.dumps({
  44. "Evidencne cislo Faktury" : evcisloval,
  45. "Prinalezi k" : prinalezival,
  46. "Popis fakturovaneho plnenia" : popisfaplnenia,
  47. "Hodnota fakturovaneho plnenia s DPH" : hodnotafaplnenia,
  48. "Datum dorucenia faktury" : datumdfa,
  49. "Datum zaplatenia faktury" : datumzfa,
  50. "Forma Zaplatenia" : formazaplatenia,
  51. "Obchodne meno - Nazov Dodavatela" : obchmenonazov,
  52. "Sidlo pravnickej osoby - Miesto podnikania" : sidlofirmy,
  53. "Pravna forma" : pravnaforma,
  54. "Sud registracie" : sudregistracie,
  55. "ICO" : ico,
  56. "DIC" : dic,
  57. "Cislo Uctu" : cislouctu,
  58. "Doplnuja informacia" : doplnujuceinfo,
  59. "URL Stranky Faktury" : url,
  60. "URL Faktura PDF" : "http://www.zvjs.sk" + urlfa
  61. }, sort_keys=False, encoding="windows-1250", ensure_ascii=False, separators=(',', ':'))
  62.  
  63.  
  64. print "####################################"
  65. output = "{""\"Evidencne cislo Faktury\"" ":" "\""+evcisloval.encode("utf-8") + "\"," "\"Prinalezi k\"" ":" "\""+prinalezival.encode("utf-8") + "\","  "\"Popis fakturovaneho plnenia\"" ":" "\""+popisfaplnenia.encode("utf-8") + "\"," "\"Hodnota fakturovaneho plnenia s DPH\"" ":" "\""+prinalezival.encode("utf-8") + "\"," "\"Datum dorucenia faktury\"" ":" "\""+datumdfa.encode("utf-8") + "\"," "\"Datum zaplatenia faktury\"" ":" "\""+datumzfa.encode("utf-8") + "\"," "\"Forma Zaplatenia\"" ":" "\""+formazaplatenia.encode("utf-8") + "\"," "\"Obchodne meno - Nazov Dodavatela\"" ":" "\""+obchmenonazov.encode("utf-8") + "\"," "\"Sidlo pravnickej osoby - Miesto podnikania\"" ":" "\""+sidlofirmy.encode("utf-8") + "\"," "\"Pravna forma\"" ":" "\""+pravnaforma.encode("utf-8") + "\"," "\"Sud registracie\"" ":" "\""+sudregistracie.encode("utf-8") + "\"," "\"ICO\"" ":" "\""+ico.encode("utf-8") + "\"," "\"DIC\"" ":" "\""+dic.encode("utf-8") + "\"," "\"Cislo Uctu\"" ":" "\""+cislouctu.encode("utf-8") + "\"," "\"Doplnuja informacia\"" ":" "\""+doplnujuceinfo.encode("utf-8") + "\"," "\"URL Stranky Faktury\"" ":" "\""+url.encode("utf-8") + "\"," "\"URL Faktura PDF\"" ":" "\"""http://www.zvjs.sk" + urlfa.encode("utf-8") +"\"""}" +"\n"
  66. outfile = open('/home/pi/scrapy/1.txt', 'a')
  67. outfile.write(output)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement