Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from multiprocessing.pool import Pool
- from signal import signal, SIGINT
- from sys import stdout
- from urllib.request import urlopen
- from pymongo import MongoClient
- mongo = MongoClient()
- db = mongo.sites
- sites = db.sites.find({})
- c = sites.count()
- successes = 0
- # direct = 0
- def load(site):
- try:
- with urlopen(site['site'], timeout=15) as f:
- f.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0')
- f.add_header('Accept', 'text/html')
- site['site_data'] = f.read()
- except Exception as ex:
- site['exception'] = str(ex)
- return site
- def sighandle(signum, frame):
- global pool
- pool.terminate()
- exit()
- signal(SIGINT, sighandle)
- threads_count = 90
- pool = Pool(processes=threads_count)
- results = []
- for site in sites:
- # sleep(0.06)
- results.append(pool.apply_async(load, (site,)))
- if len(results) > threads_count/1.5:
- for result in results:
- site = result.get()
- if 'site_data' in site:
- successes += 1
- db.sites.update({'_id': site['_id']}, site)
- c -= 1
- stdout.write('%s/%s\r' % (c, successes))
- results = []
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement