Advertisement
Guest User

Untitled

a guest
Apr 4th, 2020
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.73 KB | None | 0 0
  1. import scrapy
  2.  
  3. class QuotesSpider(scrapy.Spider):
  4.     name = "tuh"
  5.     http_user = 'nedc_tuh_eeg'
  6.     http_pass = 'nedc_tuh_eeg'
  7.    
  8.     def start_requests(self):
  9.         urls = [
  10.             'https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg/'
  11.         ]
  12.         for url in urls:
  13.             yield scrapy.Request(url=url, callback=self.parse)
  14.  
  15.     def parse(self, response):
  16.         page = response.url.split("/")[-2]
  17.         next_page = response.css('a::attr(href)').get()
  18.         yield {
  19.             'text': response.css('a::attr(href)').get()
  20.         }
  21.         if next_page is not None:
  22.             next_page = response.urljoin(next_page)
  23.             yield response.follow(next_page, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement