Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2
- import re
- url = "http://services.runescape.com/m=news/behind-the-scenes-video-30-player-owned-ports-"
- print "Printing stuffs..."
- a = urllib2.urlopen(url).read()
- b = a[a.index('<div class=\"Content\">'):]
- c = b[:b.index('</div>')]
- cc = c.decode("windows-1252")
- html = cc.replace("<p>", "\n\n")
- html = re.sub(r"</?b>", "\'\'\'", html, flags = "g")
- html = re.sub(r"</?i>", "\'\'", html, flags = "g")
- html = re.sub(r"</?li>", "\n", html, flags = "g")
- html = re.sub(r"</?ul>", "\n", html, flags = "g")
- html = re.sub(r'<a.*?href="(.*?)".*?>(.*?)<\/a>', '[$1, $2]', html, flags = "g")
- html = re.sub(r'<a.*?href="?([^ "]*)"?.*?>(.*?)<\/a>', "[$1, $2]", html, flags = "g")
- html = re.sub(r'<hr.*?>', "\n----\n", html, flags = "g")
- html = re.sub(r'<iframe.*?embed\/(.*?)".*?<\/iframe>', '<youtube>$1</youtube>', html, flags = "g")
- print html
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement