Guest User

Untitled

a guest
Dec 14th, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.94 KB | None | 0 0
  1. import scrapy
  2. from scrapy.crawler import CrawlerProcess
  3.  
  4. class PythonEventsSpider(scrapy.Spider):
  5. name = 'pythoneventsspider'
  6.  
  7. start_urls = ['https://www.python.org/events/pythonevents/',]
  8. found_events = []
  9.  
  10. def parse(self, response):
  11. for event in response.xpath('//ul[contains(@class, "list-recent-events")]/li'):
  12. event_details = dict()
  13. event_details['name'] = event.xpath('h3[@class="event-title"]/a/text()').extract_first()
  14. event_details['location'] = event.xpath('p/span[@class="event-location"]/text()').extract_first()
  15. event_details['time'] = event.xpath('p/time/text()').extract_first()
  16. self.found_events.append(event_details)
  17.  
  18.  
  19. if __name__ == "__main__":
  20. process = CrawlerProcess({ 'LOG_LEVEL': 'ERROR'})
  21. process.crawl(PythonEventsSpider)
  22. spider = next(iter(process.crawlers)).spider
  23. process.start()
  24.  
  25. for event in spider.found_events: print(event)
Add Comment
Please, Sign In to add comment