Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class VoteSpider(scrapy.Spider):
- name = "test"
- def start_requests(self):
- self.start_url = [
- "http://www.domain.de/URI.html?get=1&getX=2",
- "http://www.domain.de/URI.html?get=2&getX=3",
- "http://www.domain.de/URI.html?get=3&getX=4",
- "http://www.domain.de/URI.html?get=4&getX=5"
- ]
- for url in self.start_url:
- self.a = 0
- self.url = url
- self.page = self.url.split("/")[-1]
- self.filename = '%s.csv' % self.page
- with open(self.filename, 'w') as f:
- f.write('URL:;'+self.url+'n')
- yield scrapy.Request(url=self.url,callback=self.parse,dont_filter = True)
- def parse(self, response):
- sel = Selector(response)
- votes = sel.xpath('//div[contains(@class,"ratings")]/ul')
- with open(self.filename, 'a') as f:
- for vote in votes:
- self.a+=1
- f.write(str(self.a)+';'+vote.xpath('./li/text()').extract())
- if len(votes.xpath('//a[contains(@class,"next")]/@href').extract()) != 0:
- next_page = votes.xpath('//a[contains(@class,"next")]/@href').extract()[0]
- if next_page is not None:
- yield response.follow(next_page, callback=self.parse, dont_filter=True)
- URI.html?get=1&getX=2.csv
- for url in self.start_url:
- self.a = 0
- self.url = url
- self.page = self.url.split("/")[-1]
- self.filename = '%s.csv' % self.page
- with open(self.filename, 'w') as f:
- f.write('URL:;'+self.url+'n')
Add Comment
Please, Sign In to add comment