SHOW:
|
|
- or go back to the newest paste.
| 1 | from bs4 import BeautifulSoup | |
| 2 | ||
| 3 | import urllib2 #http://love-python.blogspot.nl/2008/02/get-html-source-of-url.html | |
| 4 | ||
| 5 | url = 'http://services.runescape.com/m=itemdb_rs/top100.ws' | |
| 6 | ||
| 7 | usock = urllib2.urlopen(url) | |
| 8 | data = usock.read() | |
| 9 | usock.close() | |
| 10 | ||
| 11 | soup = BeautifulSoup(data) | |
| 12 | ||
| 13 | tr = soup.find_all("tr")
| |
| 14 | obj = {}
| |
| 15 | for thistr in tr: | |
| 16 | - | BStr = BeautifulSoup(thistr) |
| 16 | + | if 'data-item-id' in thistr: |
| 17 | - | if 'data-item-id' in BStr: |
| 17 | + | td = thistr.find_all('td')
|
| 18 | - | td = BStr.find_all('td')
|
| 18 | + | obj[thistr['data-item-id']] = td[5].string |
| 19 | - | obj[BStr['data-item-id']] = td[5].string |
| 19 | + | |
| 20 | print obj | |
| 21 | ||
| 22 | #obj now contains all data in the format {"2":"107.6m",...} |