Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- from urllib.request import urlopen
- url = 'http://www.tigerdirect.com/sectors/category/deal-of-the-day-rss.asp'
- html_code = urlopen(url)
- code = html_code.read().decode('UTF-8')
- html_code.close()
- productname = re.findall("\<title\>\<\!\[CDATA\[(.*?)\]\]\>\<\/title\>", code)
- productprice = re.findall('List Price: (.*?)<br /><strong>', code)
- print(productname)
- print(productprice)
- """
- start_tag = '<title><![CDATA['
- end_tag = ']]></title>'
- starting_position = code.find(start_tag)
- end_position = code.find(end_tag)
- while starting_position != -1 and end_position != -1:
- print(code[starting_position + len(start_tag): end_position])
- starting_position = code.find(start_tag, end_position)
- end_position = code.find(end_tag, starting_position)
- """
- costs=re.findall("\<\!\[CDATA\[\€\[\d]+\.\[\d]+]]>", code)
- print(costs)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement