Advertisement
Guest User

Untitled

a guest
Jun 22nd, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.82 KB | None | 0 0
  1. import scrapy
  2. from articles_crawler.items import SubjectItem, AuthorItem, ArticleItem
  3.  
  4. class ArticlesSpider(scrapy.spiders.XMLFeedSpider):
  5. name = "articles"
  6. start_urls = ['http://gizmodo.uol.com.br/feed/']
  7. iterator = 'xml'
  8. itertag = 'item'
  9.  
  10. def parse_node(self, response, node):
  11. node.remove_namespaces()
  12. authorItem = AuthorItem()
  13. authorItem['name'] = node.xpath('//creator/text()').extract()
  14. return authorItem
  15.  
  16.  
  17. -------------------------------------
  18. {'name': ['Rae Paoletta']}
  19. +++++++++++++++++++++++++++++++++++++
  20. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  21. None
  22. -------------------------------------
  23. {'name': ['Rae Paoletta', 'Zach Ezer']}
  24. +++++++++++++++++++++++++++++++++++++
  25. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  26. None
  27. -------------------------------------
  28. {'name': ['Rae Paoletta', 'Zach Ezer', 'Ryan F. Mandelbaum']}
  29. +++++++++++++++++++++++++++++++++++++
  30. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  31. None
  32. -------------------------------------
  33. {'name': ['Rae Paoletta', 'Zach Ezer', 'Ryan F. Mandelbaum', 'Matt Novak']}
  34. +++++++++++++++++++++++++++++++++++++
  35. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  36. None
  37. -------------------------------------
  38. {'name': ['Rae Paoletta',
  39. 'Zach Ezer',
  40. 'Ryan F. Mandelbaum',
  41. 'Matt Novak',
  42. 'Ryan F. Mandelbaum']}
  43. +++++++++++++++++++++++++++++++++++++
  44. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  45. None
  46. -------------------------------------
  47. {'name': ['Rae Paoletta',
  48. 'Zach Ezer',
  49. 'Ryan F. Mandelbaum',
  50. 'Matt Novak',
  51. 'Ryan F. Mandelbaum',
  52. 'Alessandro Junior']}
  53. +++++++++++++++++++++++++++++++++++++
  54. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  55. None
  56. -------------------------------------
  57. {'name': ['Rae Paoletta',
  58. 'Zach Ezer',
  59. 'Ryan F. Mandelbaum',
  60. 'Matt Novak',
  61. 'Ryan F. Mandelbaum',
  62. 'Alessandro Junior',
  63. 'Matt Novak']}
  64. +++++++++++++++++++++++++++++++++++++
  65. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  66. None
  67. -------------------------------------
  68. {'name': ['Rae Paoletta',
  69. 'Zach Ezer',
  70. 'Ryan F. Mandelbaum',
  71. 'Matt Novak',
  72. 'Ryan F. Mandelbaum',
  73. 'Alessandro Junior',
  74. 'Matt Novak',
  75. 'Adam Clark Estes']}
  76. +++++++++++++++++++++++++++++++++++++
  77. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  78. None
  79. -------------------------------------
  80. {'name': ['Rae Paoletta',
  81. 'Zach Ezer',
  82. 'Ryan F. Mandelbaum',
  83. 'Matt Novak',
  84. 'Ryan F. Mandelbaum',
  85. 'Alessandro Junior',
  86. 'Matt Novak',
  87. 'Adam Clark Estes',
  88. 'Alessandro Junior']}
  89. +++++++++++++++++++++++++++++++++++++
  90. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  91. None
  92. -------------------------------------
  93. {'name': ['Rae Paoletta',
  94. 'Zach Ezer',
  95. 'Ryan F. Mandelbaum',
  96. 'Matt Novak',
  97. 'Ryan F. Mandelbaum',
  98. 'Alessandro Junior',
  99. 'Matt Novak',
  100. 'Adam Clark Estes',
  101. 'Alessandro Junior',
  102. 'George Dvorsky']}
  103. +++++++++++++++++++++++++++++++++++++
  104. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  105. None
  106. -------------------------------------
  107. {'name': ['Rae Paoletta',
  108. 'Zach Ezer',
  109. 'Ryan F. Mandelbaum',
  110. 'Matt Novak',
  111. 'Ryan F. Mandelbaum',
  112. 'Alessandro Junior',
  113. 'Matt Novak',
  114. 'Adam Clark Estes',
  115. 'Alessandro Junior',
  116. 'George Dvorsky',
  117. 'Rae Paoletta']}
  118. +++++++++++++++++++++++++++++++++++++
  119. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  120. None
  121. -------------------------------------
  122. {'name': ['Rae Paoletta',
  123. 'Zach Ezer',
  124. 'Ryan F. Mandelbaum',
  125. 'Matt Novak',
  126. 'Ryan F. Mandelbaum',
  127. 'Alessandro Junior',
  128. 'Matt Novak',
  129. 'Adam Clark Estes',
  130. 'Alessandro Junior',
  131. 'George Dvorsky',
  132. 'Rae Paoletta',
  133. 'George Dvorsky']}
  134. +++++++++++++++++++++++++++++++++++++
  135. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  136. None
  137. -------------------------------------
  138. {'name': ['Rae Paoletta',
  139. 'Zach Ezer',
  140. 'Ryan F. Mandelbaum',
  141. 'Matt Novak',
  142. 'Ryan F. Mandelbaum',
  143. 'Alessandro Junior',
  144. 'Matt Novak',
  145. 'Adam Clark Estes',
  146. 'Alessandro Junior',
  147. 'George Dvorsky',
  148. 'Rae Paoletta',
  149. 'George Dvorsky',
  150. 'George Dvorsky']}
  151. +++++++++++++++++++++++++++++++++++++
  152. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  153. None
  154. -------------------------------------
  155. {'name': ['Rae Paoletta',
  156. 'Zach Ezer',
  157. 'Ryan F. Mandelbaum',
  158. 'Matt Novak',
  159. 'Ryan F. Mandelbaum',
  160. 'Alessandro Junior',
  161. 'Matt Novak',
  162. 'Adam Clark Estes',
  163. 'Alessandro Junior',
  164. 'George Dvorsky',
  165. 'Rae Paoletta',
  166. 'George Dvorsky',
  167. 'George Dvorsky',
  168. 'Kristen V. Brown']}
  169. +++++++++++++++++++++++++++++++++++++
  170. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  171. None
  172. -------------------------------------
  173. {'name': ['Rae Paoletta',
  174. 'Zach Ezer',
  175. 'Ryan F. Mandelbaum',
  176. 'Matt Novak',
  177. 'Ryan F. Mandelbaum',
  178. 'Alessandro Junior',
  179. 'Matt Novak',
  180. 'Adam Clark Estes',
  181. 'Alessandro Junior',
  182. 'George Dvorsky',
  183. 'Rae Paoletta',
  184. 'George Dvorsky',
  185. 'George Dvorsky',
  186. 'Kristen V. Brown',
  187. 'Kate Conger']}
  188. +++++++++++++++++++++++++++++++++++++
  189. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  190. None
  191. -------------------------------------
  192. {'name': ['Rae Paoletta',
  193. 'Zach Ezer',
  194. 'Ryan F. Mandelbaum',
  195. 'Matt Novak',
  196. 'Ryan F. Mandelbaum',
  197. 'Alessandro Junior',
  198. 'Matt Novak',
  199. 'Adam Clark Estes',
  200. 'Alessandro Junior',
  201. 'George Dvorsky',
  202. 'Rae Paoletta',
  203. 'George Dvorsky',
  204. 'George Dvorsky',
  205. 'Kristen V. Brown',
  206. 'Kate Conger',
  207. 'Matt Novak']}
  208. +++++++++++++++++++++++++++++++++++++
  209. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  210. None
  211. -------------------------------------
  212. {'name': ['Rae Paoletta',
  213. 'Zach Ezer',
  214. 'Ryan F. Mandelbaum',
  215. 'Matt Novak',
  216. 'Ryan F. Mandelbaum',
  217. 'Alessandro Junior',
  218. 'Matt Novak',
  219. 'Adam Clark Estes',
  220. 'Alessandro Junior',
  221. 'George Dvorsky',
  222. 'Rae Paoletta',
  223. 'George Dvorsky',
  224. 'George Dvorsky',
  225. 'Kristen V. Brown',
  226. 'Kate Conger',
  227. 'Matt Novak',
  228. 'Rae Paoletta']}
  229. +++++++++++++++++++++++++++++++++++++
  230. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  231. None
  232. -------------------------------------
  233. {'name': ['Rae Paoletta',
  234. 'Zach Ezer',
  235. 'Ryan F. Mandelbaum',
  236. 'Matt Novak',
  237. 'Ryan F. Mandelbaum',
  238. 'Alessandro Junior',
  239. 'Matt Novak',
  240. 'Adam Clark Estes',
  241. 'Alessandro Junior',
  242. 'George Dvorsky',
  243. 'Rae Paoletta',
  244. 'George Dvorsky',
  245. 'George Dvorsky',
  246. 'Kristen V. Brown',
  247. 'Kate Conger',
  248. 'Matt Novak',
  249. 'Rae Paoletta',
  250. 'Kristen V. Brown']}
  251. +++++++++++++++++++++++++++++++++++++
  252. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  253. None
  254. -------------------------------------
  255. {'name': ['Rae Paoletta',
  256. 'Zach Ezer',
  257. 'Ryan F. Mandelbaum',
  258. 'Matt Novak',
  259. 'Ryan F. Mandelbaum',
  260. 'Alessandro Junior',
  261. 'Matt Novak',
  262. 'Adam Clark Estes',
  263. 'Alessandro Junior',
  264. 'George Dvorsky',
  265. 'Rae Paoletta',
  266. 'George Dvorsky',
  267. 'George Dvorsky',
  268. 'Kristen V. Brown',
  269. 'Kate Conger',
  270. 'Matt Novak',
  271. 'Rae Paoletta',
  272. 'Kristen V. Brown',
  273. 'Leo Escudeiro']}
  274. +++++++++++++++++++++++++++++++++++++
  275. 2017-06-22 23:58:53 [scrapy.core.scraper] DEBUG: Scraped from <200 http://gizmodo.uol.com.br/feed/>
  276. None
  277. -------------------------------------
  278. {'name': ['Rae Paoletta',
  279. 'Zach Ezer',
  280. 'Ryan F. Mandelbaum',
  281. 'Matt Novak',
  282. 'Ryan F. Mandelbaum',
  283. 'Alessandro Junior',
  284. 'Matt Novak',
  285. 'Adam Clark Estes',
  286. 'Alessandro Junior',
  287. 'George Dvorsky',
  288. 'Rae Paoletta',
  289. 'George Dvorsky',
  290. 'George Dvorsky',
  291. 'Kristen V. Brown',
  292. 'Kate Conger',
  293. 'Matt Novak',
  294. 'Rae Paoletta',
  295. 'Kristen V. Brown',
  296. 'Leo Escudeiro',
  297. 'Kristen V. Brown']}
  298. +++++++++++++++++++++++++++++++++++++
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement