Advertisement
Guest User

Untitled

a guest
Jan 18th, 2017
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.83 KB | None | 0 0
  1. import scrapy
  2. from scrapy.crawler import CrawlerProcess
  3. from time import time
  4.  
  5. def isAscii(s):
  6. return all(ord(c) < 128 for c in s)
  7.  
  8. region = input('Region? ')
  9. start = int(input('Start page? '))
  10. end = int(input('End page? '))
  11.  
  12. startTime = time()
  13.  
  14. class LolSpider(scrapy.Spider):
  15. name = 'lolspider'
  16. start_urls = ['http://lolprofile.net/leaderboards/{}/{}'.format(region, page) for page in range(start, end)]
  17. def parse(self, response):
  18. for name in response.xpath('//span[not(@class)]/text()').extract():
  19. with open('lolusernames.txt', 'a') as f:
  20. if (' ' not in name) and (name != 'LoL') and isAscii(name):
  21. f.write(name+'n')
  22.  
  23.  
  24. process = CrawlerProcess({})
  25.  
  26. process.crawl(LolSpider)
  27. process.start()
  28.  
  29. endTime = time()
  30. print('Completed in {:.2f} seconds'.format(endTime-startTime))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement