Advertisement
Guest User

Untitled

a guest
Oct 18th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.87 KB | None | 0 0
  1. internet_archive = 'InternetArchive'
  2.  
  3.  
  4. ################ PUT YOUR SOLUTION HERE #################
  5. rss_link = 'http://feeds.bbci.co.uk/news/rss.xml?edition=uk'
  6. html_file = open('18October.html','w')
  7. rss_feed_data = urlopen(rss_link)
  8. rss_contents = rss_feed_data.read().decode('UTF-8')
  9.  
  10. #extract title
  11. title = findall('<title><!\[CDATA\[(.*)\]\]></title>', rss_contents)
  12. title = title[2:] # skip the first 2 <title> tags, these are the webpage title
  13.  
  14. #extract the description
  15. description = findall('<description><!\[CDATA\[(.*)\]\]></description>', rss_contents)
  16. description = description[1:]
  17.  
  18. #extract the link
  19. link = findall('<link>(.*)</link', rss_contents)
  20. link = link[2:]
  21.  
  22. #extract the publication date
  23. date = findall('<pubDate>(.*)</pubDate>', rss_contents)
  24.  
  25.  
  26. #extract the image
  27. image = findall('<media:thumbnail width="976" height="549"(.*)/>', rss_contents)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement