Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.45 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. from scrapy.spiders import CrawlSpider, Rule
  4. from scrapy.linkextractors import LinkExtractor
  5.  
  6.  
  7. class SolidSpider(CrawlSpider):
  8.     name = "solid"
  9.     allowed_domains = ["solidinfo.se"]
  10.     start_urls = ['http://www.solidinfo.se/bransch/=Film-Video-TV-program-produktion-inspelning-12100/&fP=2']
  11.  
  12.     #http://www.solidinfo.se/bransch/Film-Video-TV-program-produktion-inspelning-12100
  13.     #http://www.solidinfo.se/bransch/=Film-Video-TV-program-produktion-inspelning-12100/&fP=2
  14.     #http://www.solidinfo.se/bransch/=Film-Video-TV-program-produktion-inspelning-12100/&fP=3
  15.  
  16.     rules = [
  17.  
  18.         Rule(
  19.             LinkExtractor(
  20.                 allow=([r'/&fP=\d+']),
  21.             ),
  22.             callback='parse_item',
  23.             follow=True,
  24.         ),
  25.     ]
  26.  
  27.  
  28.     def parse_item(self, response):
  29.  
  30.         divs = response.css('div0.ftlk')
  31.  
  32.         for div in divs:
  33.             business_title = div.xpath('div[1]/a/text()').extract()
  34.             business_link = div.xpath ('div[1]/a/@href').extract()
  35.             print business_title
  36.             print business_link
  37.  
  38.  
  39. #//*[@id="div0"]/div[1]/a
  40. #//*[@id="div0"]/div[1]/a
  41.  
  42.  
  43.  
  44. #//*[@id="ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal"]
  45. #//*[@id="ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal"]/div/table
  46. #//*[@id="ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal"]/div/table/tbody/tr[2]
  47. ##ctl00_ContentPlaceHolder1_ucOversikt_pnlNyckeltal > div > table
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement