Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import urllib2 #http://love-python.blogspot.nl/2008/02/get-html-source-of-url.html
- url = 'http://services.runescape.com/m=itemdb_rs/top100.ws'
- usock = urllib2.urlopen(url)
- data = usock.read()
- usock.close()
- soup = BeautifulSoup(data).find_all('tbody')[0]
- tr = soup.find_all('tr')
- itnames = {}
- itids = {}
- for thistr in tr:
- td = thistr.find_all('td')
- a = td[0].find_all('a')[0]
- id = a['href'][a['href'].index('obj=')+4:]
- val = td[5].string
- if val[-1] == 'b':
- if val[-3:-1] == '.0':
- val = int(val[:-1])
- else:
- val = float(val[:-1])
- else:
- val = int(float(val[:-1]) * 1000)
- itnames[a.string] = val
- itids[id] = val
- print obj
- # itids now contains all data in the format {"2":107.6,...}
- # itnames now contains all data in the format {"Cannonball":107.6,...}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement