SHOW:
|
|
- or go back to the newest paste.
1 | import urllib2 | |
2 | import re | |
3 | url = "http://services.runescape.com/m=news/behind-the-scenes-video-30-player-owned-ports-" | |
4 | print "Printing stuffs..." | |
5 | a = urllib2.urlopen(url).read() | |
6 | b = a[a.index('<div class=\"Content\">'):] | |
7 | c = b[:b.index('</div>')] | |
8 | cc = c.decode("windows-1252") | |
9 | html = cc.replace("<p>", "\n\n") | |
10 | - | html = re.sub(r"</?b>", "\'\'\'", flags = "g") |
10 | + | html = re.sub(r"</?b>", "\'\'\'", html, flags = "g") |
11 | - | html = re.sub(r"</?i>", "\'\'", flags = "g") |
11 | + | html = re.sub(r"</?i>", "\'\'", html, flags = "g") |
12 | - | html = re.sub(r"</?li>", "\n", flags = "g") |
12 | + | html = re.sub(r"</?li>", "\n", html, flags = "g") |
13 | - | html = re.sub(r"</?ul>", "\n", flags = "g") |
13 | + | html = re.sub(r"</?ul>", "\n", html, flags = "g") |
14 | - | html = re.sub(r'<a.*?href="(.*?)".*?>(.*?)<\/a>', '[$1, $2]', flags = "g") |
14 | + | html = re.sub(r'<a.*?href="(.*?)".*?>(.*?)<\/a>', '[$1, $2]', html, flags = "g") |
15 | - | html = re.sub(r'<a.*?href="?([^ "]*)"?.*?>(.*?)<\/a>', "[$1, $2]", flags = "g") |
15 | + | html = re.sub(r'<a.*?href="?([^ "]*)"?.*?>(.*?)<\/a>', "[$1, $2]", html, flags = "g") |
16 | - | html = re.sub(r'<hr.*?>', "\n----\n", flags = "g") |
16 | + | html = re.sub(r'<hr.*?>', "\n----\n", html, flags = "g") |
17 | - | html = re.sub(r'<iframe.*?embed\/(.*?)".*?<\/iframe>', '<youtube>$1</youtube>', flags = "g") |
17 | + | html = re.sub(r'<iframe.*?embed\/(.*?)".*?<\/iframe>', '<youtube>$1</youtube>', html, flags = "g") |
18 | print html |