Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- forum_loader = ItemLoader(item=forumItem(), selector=threadSel)
- forum_loader.default_output_processor = TakeFirst() #take only the first item of the array of scraped data
- forum_loader.add_xpath('thread_name', ".//td[@class='cell-topic js-cell-topic']/div[@class='topic-wrapper js-topic-wrapper h-wordwrap']/a[@class='topic-title js-topic-title']/text()")
- forum_loader.add_xpath('url',".//td[@class='cell-topic js-cell-topic']/div[@class='topic-wrapper js-topic-wrapper h-wordwrap']/a[@class='topic-title js-topic-title']/@href")
- forum_loader.add_xpath('url_id',".//td[@class='cell-topic js-cell-topic']/div[@class='topic-wrapper js-topic-wrapper h-wordwrap']/a[@class='topic-title js-topic-title']/@href")
- forum_loader.add_xpath('responses',".//td[@class='cell-count']/div[@class='posts-count']/text()")
- forum_loader.add_xpath('dateStarted',".//td[@class='cell-topic js-cell-topic']/div[@class='topic-info h-clear h-hide-on-small h-hide-on-narrow-column']/span[@class='date']/text()")
- forum_loader.add_xpath('dateLastUpdated',".//td[@class='cell-lastpost']/span[@class='post-date']/text()")
- forum_loader.add_xpath('lastPostBy',".//td[@class='cell-lastpost']/div[@class='lastpost-by']/a/text()")
- forum_loader.add_xpath('forum_section',".//td[@class='cell-topic js-cell-topic']/div[@class='topic-info h-clear h-hide-on-small h-hide-on-narrow-column']/span[@class='f-title']/a/text()")
- # yield forum_loader.load_item()
- # print('>>>%s' % url)
- yield scrapy.Request(url, callback=self.parse_thread_details, meta={'itemLoader': forum_loader.load_item()})
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement