Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import urllib2 #http://love-python.blogspot.nl/2008/02/get-html-source-of-url.html
- url = 'http://services.runescape.com/m=itemdb_rs/top100.ws'
- usock = urllib2.urlopen(url)
- data = usock.read()
- usock.close()
- soup = BeautifulSoup(data)
- tr = soup.find_all("tr", data-item-id=True) # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#the-keyword-arguments - getting all the data-item-id trs
- obj = {}
- for thistr in tr:
- BStr = BeautifulSoup(thistr)
- td = BStr.find_all('td')
- obj[BStr['data-item-id']] = td[5].string # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#contents-and-children - You can just keep working with bs after the [n]
- #obj now contains all data in the format {"2":"107.6m",...}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement