Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- class QuotesSpider(scrapy.Spider):
- name = "tuh"
- http_user = 'nedc_tuh_eeg'
- http_pass = 'nedc_tuh_eeg'
- def start_requests(self):
- urls = [
- 'https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg/'
- ]
- for url in urls:
- yield scrapy.Request(url=url, callback=self.parse)
- def parse(self, response):
- page = response.url.split("/")[-2]
- next_page = response.css('a::attr(href)').get()
- yield {
- 'text': response.css('a::attr(href)').get()
- }
- if next_page is not None:
- next_page = response.urljoin(next_page)
- yield response.follow(next_page, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement