Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from articles_crawler.items import SubjectItem, AuthorItem, ArticleItem
- class ArticlesSpider(scrapy.spiders.XMLFeedSpider):
- name = "articles"
- start_urls = ['http://gizmodo.uol.com.br/feed/']
- iterator = 'xml'
- itertag = 'item'
- def parse_node(self, response, node):
- node.remove_namespaces()
- authorItem = AuthorItem()
- authorItem['name'] = node.xpath('//creator/text()').extract()
- return authorItem
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement