Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from scrapy.crawler import CrawlerProcess
- from time import time
- def isAscii(s):
- return all(ord(c) < 128 for c in s)
- region = input('Region? ')
- start = int(input('Start page? '))
- end = int(input('End page? '))
- startTime = time()
- class LolSpider(scrapy.Spider):
- name = 'lolspider'
- start_urls = ['http://lolprofile.net/leaderboards/{}/{}'.format(region, page) for page in range(start, end)]
- def parse(self, response):
- for name in response.xpath('//span[not(@class)]/text()').extract():
- with open('lolusernames.txt', 'a') as f:
- if (' ' not in name) and (name != 'LoL') and isAscii(name):
- f.write(name+'n')
- process = CrawlerProcess({})
- process.crawl(LolSpider)
- process.start()
- endTime = time()
- print('Completed in {:.2f} seconds'.format(endTime-startTime))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement