Guest User

Untitled

a guest
Jan 19th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.89 KB | None | 0 0
  1. class exhibitors_spider(scrapy.Spider):
  2. name = "exhibitors"
  3.  
  4. url = "some url"
  5.  
  6. def _create_item_class(self, class_name, field_list):
  7. field_dict = {}
  8. for field_name in field_list:
  9. field_dict[field_name] = Field()
  10. return type(str(class_name), (DictItem,), {'fields': field_dict})
  11.  
  12. def start_requests(self):
  13. yield Request(url=self.url, callback=self.parse_page, dont_filter=True)
  14.  
  15. def parse_page(self, response):
  16. Contact_Persons = {}
  17. Contact_Persons_blocks = response.selector.xpath("//h2[contains(text(),'Contact person')]/..//..//div/.//li")
  18. if Contact_Persons_blocks:
  19. for i in xrange(1, len(Contact_Persons_blocks) + 1):
  20. cp_name = Contact_Persons_blocks[i - 1].xpath(".//a[@itemprop='name']/bdi/text()").extract_first()
  21. if cp_name:
  22. cp_name = capwords(cp_name.encode('utf-8'))
  23. else:
  24. cp_name = 0
  25. Contact_Persons.update({"Contact_Person_Name_{}".format(i): cp_name})
  26.  
  27. cp_title = Contact_Persons_blocks[i - 1].xpath(".//div[@itemprop='jobTitle']/text()").extract_first()
  28. if cp_title:
  29. cp_title = capwords(cp_title.encode('utf-8'))
  30. else:
  31. cp_title = 0
  32. Contact_Persons.update({"Contact_Person_Title_{}".format(i): cp_title})
  33.  
  34. cp_link = Contact_Persons_blocks[i - 1].xpath(".//a[@class='ngn-mail-link']/@href").extract_first()
  35. if cp_link:
  36. cp_link = self.domain + cp_link
  37. else:
  38. cp_link = 0
  39. Contact_Persons.update({"Contact_Person_Link{}".format(i): cp_link})
  40.  
  41. ExhibitorsItem = self._create_item_class('ExhibitorsItem', Contact_Persons.keys())
  42.  
  43. for cp_key in Contact_Persons.keys():
  44. item[cp_key] = Contact_Persons[cp_key]
  45. yield item
Add Comment
Please, Sign In to add comment