Advertisement
Guest User

Untitled

a guest
Feb 1st, 2021
24
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.00 KB | None | 0 0
  1. import re
  2. import scrapy
  3. import time
  4. from scrapy.loader import ItemLoader
  5. from xs2.items import PersonItem
  6. from itemloaders.processors import Join, MapCompose, TakeFirst
  7.  
  8.  
  9.  
  10.  
  11.  
  12.  
  13.  
  14. class XmasSpider(scrapy.Spider):
  15. name = 'ebay'
  16. start_urls = [myurl]
  17.  
  18.  
  19. def parse(self,response):
  20. page_links = response.css('h2.text-module-begin a.ellipsis')
  21.  
  22.  
  23. yield from response.follow_all(page_links, self.parse_item)
  24.  
  25.  
  26. pagination_links = response.css('a.pagination-next')
  27. yield from response.follow_all(pagination_links, self.parse)
  28.  
  29.  
  30.  
  31. def parse_item(self, response):
  32. email = response.css('p#viewad-imprint-text').re_first(r'''([a-zA-Z0-9._%+-:]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})''')
  33. address = response.css('span#street-address')
  34.  
  35. if email is not None:
  36. i = ItemLoader(item=PersonItem(),response=response)
  37. #i.add_xpath('name', '//p[@id="viewad-imprint-text"]',TakeFirst(), re =r"[^()0-9-]+" ),
  38. i.add_xpath('name', '//span[@class="text-bold text-bigger text-force-linebreak"]',TakeFirst(), ),
  39. i.add_xpath('link', '//head//link[@rel="canonical"]/@href',TakeFirst(), ),
  40. i.add_xpath('email', '//p[@id="viewad-imprint-text"]',TakeFirst(), re =r'''([a-zA-Z0-9._%+-:]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})''' ),
  41. i.add_xpath('data', '//p[@id="viewad-imprint-text"]'),
  42. i.add_xpath('city', '//div[@class="boxedarticle--details--full"]//span[@id="viewad-locality"]'),
  43. #i.add_xpath('zipcode', '//div[@class="boxedarticle--details--full"]',TakeFirst(), re =r'\D(\d{5})\D'),
  44. #i.add_xpath('street', '//div[@class="boxedarticle--details--full"]')
  45. if address :
  46. i.add_css('street', 'span#street-address')
  47. else:
  48. i.add_value('street', ' ')
  49.  
  50.  
  51. return i.load_item()
  52. else:
  53.  
  54. print('No email, passing...')
  55.  
  56. pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement