View difference between Paste ID: <a href="/A32n4mug">A32n4mug</a> and <a href="/FbCjTpys">FbCjTpys</a>

from bs4 import BeautifulSoup
1		from bs4 import BeautifulSoup
2
3		import urllib2 #http://love-python.blogspot.nl/2008/02/get-html-source-of-url.html
4
5		url = 'http://services.runescape.com/m=itemdb_rs/top100.ws'
6
7		usock = urllib2.urlopen(url)
8		data = usock.read()
9		usock.close()
10
11	-	soup = BeautifulSoup(BeautifulSoup(data).find_all('tbody')[0])
11	+	soup = BeautifulSoup(data)
12
13	-	tr = soup.find_all("tr")
13	+	tr = soup.find_all("tr", data-item-id=True) # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#the-keyword-arguments - getting all the data-item-id trs
14		obj = {}
15		for thistr in tr:
16	-	bstr = BeautifulSoup(thistr)
16	+	BStr = BeautifulSoup(thistr)
17	-	td = bstr.find_all('td')
17	+	td = BStr.find_all('td')
18	-	obj[bstr['data-item-id']] = BeautifulSoup(td[5]).string
18	+	obj[BStr['data-item-id']] = td[5].string # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#contents-and-children - You can just keep working with bs after the [n]
19
20		#obj now contains all data in the format {"2":"107.6m",...}