Advertisement
Joeytje50

Untitled

Dec 20th, 2012
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import urllib2
  2. import re
  3. url = "http://services.runescape.com/m=news/behind-the-scenes-video-30-player-owned-ports-"
  4. print "Printing stuffs..."
  5. a = urllib2.urlopen(url).read()
  6. a = a[a.index('<div class=\"Content\">')+21:]
  7. a = a[:a.index('</div>')]
  8. a = a.decode("windows-1252")
  9. a = a.replace('\n','')
  10. html = a.replace("</p>", "\n\n").replace('<p>','')
  11. html = re.sub(r"</?b>", "'''", html)
  12. html = re.sub(r"</?i>", "''", html)
  13. html = html.replace('<li>','*').replace('</li>','\n')
  14. html = re.sub(r"</?ul>", "\n", html)
  15. html = re.sub(r'<a.*?href="?([^ "]*)"?.*?>(.*?)<\/a>', r"[\1 \2]", html)
  16. html = re.sub(r'<hr.*?>', "\n----\n", html)
  17. html = re.sub(r'<iframe.*?embed\/(.*?)".*?<\/iframe>', r'<youtube>\1</youtube>\n\n', html)
  18. html = re.sub(r'\n{4,}','\n\n',html)
  19. print html
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement