Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def parse_reports(self, response):
- report_dict={}
- for frame in response.xpath('//div[re:test(@class,"schedule__cell schedule__cell--talk col-\d-\d-\d")]'):
- try:
- report_link = ''.join(('https://2019.jokerconf.com',frame.xpath('.//a[@class="link schedule__link"]/@href').get(),'/'))
- if ('bof' in report_link) or ('party' in report_link):
- continue
- complexity_text = frame.xpath('.//div[@class="schedule__helper"]//img/@title').get()
- report_dict['complexity'] = {'value':self.COMPLEXITY_VALUES[complexity_text], 'name':complexity_text}
- material_links = [self.materials_dict_form(material_link) for material_link in frame.xpath('.//a/@href').getall()]
- print(material_links)
- report_dict['source'] = material_links
- report_dict['tags'] = [tag.strip()[1::] for tag in frame.xpath('.//i[@class="schedule__tags"]//nobr/text()').getall()]
- yield Request(
- report_link,
- callback=self.parse_authors,
- meta={'report_dict':report_dict}
- )
- except TypeError as typeErr:
- print(f'exception {typeErr} raised')
- continue
- def parse_authors(self, response):
- report = Reports()
- report['complexity'] = response.meta['report_dict']['complexity']
- report['source'] = response.meta['report_dict']['source']
- report['tags'] = response.meta['report_dict']['tags']
- report['title'] = response.xpath('//h1[@class="talk_title"]/text()').get()
- report['description'] = response.xpath('//main[@class="talk-main"]//p/text()').get()
- speakers_list=[]
- for speaker_sec in response.xpath('//div[@class="talk-speaker"]'):
- speaker = Speakers()
- contact_info = ContactInfo()
- speaker['name'] = speaker_sec.xpath('.//h5[@class="speaker-info_name"]/text()').get()
- speaker['avatar'] = speaker_sec.xpath('.//img[@class="img-fluid"]/@src').get()
- speaker['bio'] = speaker_sec.xpath('.//div[@class="speaker-info_bio"]//p/text()').get()
- contact_info['company'] = (speaker_sec.xpath('.//h6[@class="speaker-info_company"]/text()').get(),str(*re.findall(r'\d\d\d\d',self.conference['name'])))
- contact_info['twitterUsername'] = speaker_sec.xpath('.//div[@class="speaker_profiles"]//a[@class="twitter_link"]/@href').get()
- speaker['contactInfo']=contact_info
- speakers_list.append(speaker)
- report['speakers'] = speakers_list
- self.conference['report'] = report
- yield self.conference
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement