Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- internet_archive = 'InternetArchive'
- ################ PUT YOUR SOLUTION HERE #################
- rss_link = 'http://feeds.bbci.co.uk/news/rss.xml?edition=uk'
- html_file = open('18October.html','w')
- rss_feed_data = urlopen(rss_link)
- rss_contents = rss_feed_data.read().decode('UTF-8')
- #extract title
- title = findall('<title><!\[CDATA\[(.*)\]\]></title>', rss_contents)
- title = title[2:] # skip the first 2 <title> tags, these are the webpage title
- #extract the description
- description = findall('<description><!\[CDATA\[(.*)\]\]></description>', rss_contents)
- description = description[1:]
- #extract the link
- link = findall('<link>(.*)</link', rss_contents)
- link = link[2:]
- #extract the publication date
- date = findall('<pubDate>(.*)</pubDate>', rss_contents)
- #extract the image
- image = findall('<media:thumbnail width="976" height="549"(.*)/>', rss_contents)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement