Advertisement
Joeytje50

Untitled

Dec 20th, 2012
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import urllib2
  2. import re
  3. url = "http://services.runescape.com/m=news/behind-the-scenes-video-30-player-owned-ports-"
  4. print "Printing stuffs..."
  5. a = urllib2.urlopen(url).read()
  6. b = a[a.index('<div class=\"Content\">'):]
  7. c = b[:b.index('</div>')]
  8. cc = c.decode("windows-1252")
  9. html = cc.replace("<p>", "\n\n")
  10. html = re.sub(r"</?b>", "\'\'\'", html, flags = "g")
  11. html = re.sub(r"</?i>", "\'\'", html, flags = "g")
  12. html = re.sub(r"</?li>", "\n", html, flags = "g")
  13. html = re.sub(r"</?ul>", "\n", html, flags = "g")
  14. html = re.sub(r'<a.*?href="(.*?)".*?>(.*?)<\/a>', '[$1, $2]', html, flags = "g")
  15. html = re.sub(r'<a.*?href="?([^ "]*)"?.*?>(.*?)<\/a>', "[$1, $2]", html, flags = "g")
  16. html = re.sub(r'<hr.*?>', "\n----\n", html, flags = "g")
  17. html = re.sub(r'<iframe.*?embed\/(.*?)".*?<\/iframe>', '<youtube>$1</youtube>', html, flags = "g")
  18. print html
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement