Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import scrapy
- import time
- from scrapy.loader import ItemLoader
- from xs2.items import PersonItem
- from itemloaders.processors import Join, MapCompose, TakeFirst
- class XmasSpider(scrapy.Spider):
- name = 'ebay'
- start_urls = [myurl]
- def parse(self,response):
- page_links = response.css('h2.text-module-begin a.ellipsis')
- yield from response.follow_all(page_links, self.parse_item)
- pagination_links = response.css('a.pagination-next')
- yield from response.follow_all(pagination_links, self.parse)
- def parse_item(self, response):
- email = response.css('p#viewad-imprint-text').re_first(r'''([a-zA-Z0-9._%+-:]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})''')
- address = response.css('span#street-address')
- if email is not None:
- i = ItemLoader(item=PersonItem(),response=response)
- #i.add_xpath('name', '//p[@id="viewad-imprint-text"]',TakeFirst(), re =r"[^()0-9-]+" ),
- i.add_xpath('name', '//span[@class="text-bold text-bigger text-force-linebreak"]',TakeFirst(), ),
- i.add_xpath('link', '//head//link[@rel="canonical"]/@href',TakeFirst(), ),
- i.add_xpath('email', '//p[@id="viewad-imprint-text"]',TakeFirst(), re =r'''([a-zA-Z0-9._%+-:]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})''' ),
- i.add_xpath('data', '//p[@id="viewad-imprint-text"]'),
- i.add_xpath('city', '//div[@class="boxedarticle--details--full"]//span[@id="viewad-locality"]'),
- #i.add_xpath('zipcode', '//div[@class="boxedarticle--details--full"]',TakeFirst(), re =r'\D(\d{5})\D'),
- #i.add_xpath('street', '//div[@class="boxedarticle--details--full"]')
- if address :
- i.add_css('street', 'span#street-address')
- else:
- i.add_value('street', ' ')
- return i.load_item()
- else:
- print('No email, passing...')
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement