View difference between Paste ID: TDYZ92ED and QUqDBwLU
SHOW: | | - or go back to the newest paste.
1
from bs4 import BeautifulSoup
2
3
import urllib2 #http://love-python.blogspot.nl/2008/02/get-html-source-of-url.html
4
5
url = 'http://services.runescape.com/m=itemdb_rs/top100.ws'
6
7
usock = urllib2.urlopen(url)
8
data = usock.read()
9
usock.close()
10
11
soup = BeautifulSoup(data)
12
13
tr = soup.find_all("tr")
14
obj = {}
15
for thistr in tr:
16-
	BStr = BeautifulSoup(thistr)
16+
	if 'data-item-id' in thistr:
17-
	if 'data-item-id' in BStr:
17+
		td = thistr.find_all('td')
18-
		td = BStr.find_all('td')
18+
		obj[thistr['data-item-id']] = td[5].string
19-
		obj[BStr['data-item-id']] = td[5].string
19+
20
print obj
21
22
#obj now contains all data in the format {"2":"107.6m",...}