xdenisx

parsing id meteo stations

Apr 26th, 2013
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.00 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. from lxml import etree
  3. import urllib
  4. import sqlite3
  5.  
  6. def url2db(db_name,st_id,st_name):
  7.     ''' INSERT DATA INTO DB '''
  8.     try:
  9.         conn = sqlite3.connect(db_name)
  10.         c = conn.cursor()
  11.         c.execute("INSERT OR FAIL INTO stations VALUES (?,?)", (st_id,st_name))
  12.         print 'a new data added to database!'
  13.         conn.commit()
  14.         conn.close()
  15.     except:
  16.         print "INSERT error!"
  17.  
  18. web = urllib.urlopen("http://meteocenter.net/_world_weather_stations.htm")
  19. s = web.read()
  20.  
  21. html = etree.HTML(s)
  22.  
  23. ## Get all 'tr'
  24. tr_nodes = html.xpath('//table/tr')
  25.  
  26. ## 'th' is inside first 'tr'
  27. header = [i[0].text for i in tr_nodes[0].xpath("th")]
  28.  
  29. ## Get text from rest all 'tr'
  30. td_content = [[td.text for td in tr.xpath('td')] for tr in tr_nodes[1:]]
  31.  
  32. for istation in td_content:
  33.     if (istation[-1]==u"Российская Федерация"):    
  34.         print istation[0], istation[1]
  35.         url2db('/home/www-data/db/meteo',istation[0], istation[1])
Advertisement
Add Comment
Please, Sign In to add comment