Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from scrapy.spiders import CrawlSpider, Rule
- from scrapy.linkextractors import LinkExtractor
- class SolidSpider(CrawlSpider):
- name = "solid"
- allowed_domains = ["solidinfo.se"]
- start_urls = ['http://www.solidinfo.se/bransch/=Film-Video-TV-program-produktion-inspelning-12100/&fP=2']
- #http://www.solidinfo.se/bransch/Film-Video-TV-program-produktion-inspelning-12100
- #http://www.solidinfo.se/bransch/=Film-Video-TV-program-produktion-inspelning-12100/&fP=2
- #http://www.solidinfo.se/bransch/=Film-Video-TV-program-produktion-inspelning-12100/&fP=3
- rules = [
- Rule(
- LinkExtractor(
- allow=([r'/&fP=\d+']),
- ),
- callback='parse_item',
- follow=True,
- ),
- ]
- def parse_item(self, response):
- divs = response.css('div0.ftlk')
- for div in divs:
- business_title = div.xpath('div[1]/a/text()').extract()
- business_link = div.xpath ('div[1]/a/@href').extract()
- print business_title
- print business_link
- #//*[@id="div0"]/div[1]/a
- #//*[@id="div0"]/div[1]/a
- #//*[@id="ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal"]
- #//*[@id="ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal"]/div/table
- #//*[@id="ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal"]/div/table/tbody/tr[2]
- ##ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal > div > table
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement