Guest User

Untitled

a guest
Jun 18th, 2018
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.55 KB | None | 0 0
  1. class VoteSpider(scrapy.Spider):
  2. name = "test"
  3.  
  4. def start_requests(self):
  5.  
  6. self.start_url = [
  7. "http://www.domain.de/URI.html?get=1&getX=2",
  8. "http://www.domain.de/URI.html?get=2&getX=3",
  9. "http://www.domain.de/URI.html?get=3&getX=4",
  10. "http://www.domain.de/URI.html?get=4&getX=5"
  11. ]
  12.  
  13. for url in self.start_url:
  14. self.a = 0
  15. self.url = url
  16. self.page = self.url.split("/")[-1]
  17. self.filename = '%s.csv' % self.page
  18. with open(self.filename, 'w') as f:
  19. f.write('URL:;'+self.url+'n')
  20.  
  21. yield scrapy.Request(url=self.url,callback=self.parse,dont_filter = True)
  22.  
  23. def parse(self, response):
  24. sel = Selector(response)
  25.  
  26. votes = sel.xpath('//div[contains(@class,"ratings")]/ul')
  27.  
  28. with open(self.filename, 'a') as f:
  29. for vote in votes:
  30. self.a+=1
  31. f.write(str(self.a)+';'+vote.xpath('./li/text()').extract())
  32.  
  33. if len(votes.xpath('//a[contains(@class,"next")]/@href').extract()) != 0:
  34. next_page = votes.xpath('//a[contains(@class,"next")]/@href').extract()[0]
  35. if next_page is not None:
  36. yield response.follow(next_page, callback=self.parse, dont_filter=True)
  37.  
  38. URI.html?get=1&getX=2.csv
  39.  
  40. for url in self.start_url:
  41. self.a = 0
  42. self.url = url
  43. self.page = self.url.split("/")[-1]
  44. self.filename = '%s.csv' % self.page
  45. with open(self.filename, 'w') as f:
  46. f.write('URL:;'+self.url+'n')
Add Comment
Please, Sign In to add comment