Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from articles_crawler.items import SubjectItem, AuthorItem, ArticleItem
- class ArticlesSpider(scrapy.spiders.XMLFeedSpider):
- name = "articles"
- start_urls = ['http://gizmodo.uol.com.br/feed/']
- iterator = 'xml'
- itertag = 'item'
- def parse_node(self, response, node):
- node.remove_namespaces()
- authorItem = AuthorItem()
- authorItem['name'] = node.xpath('//creator/text()').extract()
- return authorItem
- -------------------------------------
- {'name': ['Rae Paoletta']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta', 'Zach Ezer']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta', 'Zach Ezer', 'Ryan F. Mandelbaum']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta', 'Zach Ezer', 'Ryan F. Mandelbaum', 'Matt Novak']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown',
- 'Kate Conger']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown',
- 'Kate Conger',
- 'Matt Novak']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown',
- 'Kate Conger',
- 'Matt Novak',
- 'Rae Paoletta']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown',
- 'Kate Conger',
- 'Matt Novak',
- 'Rae Paoletta',
- 'Kristen V. Brown']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown',
- 'Kate Conger',
- 'Matt Novak',
- 'Rae Paoletta',
- 'Kristen V. Brown',
- 'Leo Escudeiro']}
- +++++++++++++++++++++++++++++++++++++
- 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
- None
- -------------------------------------
- {'name': ['Rae Paoletta',
- 'Zach Ezer',
- 'Ryan F. Mandelbaum',
- 'Matt Novak',
- 'Ryan F. Mandelbaum',
- 'Alessandro Junior',
- 'Matt Novak',
- 'Adam Clark Estes',
- 'Alessandro Junior',
- 'George Dvorsky',
- 'Rae Paoletta',
- 'George Dvorsky',
- 'George Dvorsky',
- 'Kristen V. Brown',
- 'Kate Conger',
- 'Matt Novak',
- 'Rae Paoletta',
- 'Kristen V. Brown',
- 'Leo Escudeiro',
- 'Kristen V. Brown']}
- +++++++++++++++++++++++++++++++++++++
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement