Advertisement
Guest User

Untitled

a guest
Dec 18th, 2014
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.22 KB | None | 0 0
  1. #!/usr/bin/python3
  2. from multiprocessing.pool import Pool
  3. from signal import signal, SIGINT
  4. from sys import stdout
  5. from urllib.request import urlopen
  6. from pymongo import MongoClient
  7.  
  8. mongo = MongoClient()
  9. db = mongo.sites
  10. sites = db.sites.find({})
  11. c = sites.count()
  12. successes = 0
  13. # direct = 0
  14.  
  15.  
  16. def load(site):
  17. try:
  18. with urlopen(site['site'], timeout=15) as f:
  19. f.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0')
  20. f.add_header('Accept', 'text/html')
  21. site['site_data'] = f.read()
  22. except Exception as ex:
  23. site['exception'] = str(ex)
  24. return site
  25.  
  26.  
  27. def sighandle(signum, frame):
  28. global pool
  29. pool.terminate()
  30. exit()
  31.  
  32. signal(SIGINT, sighandle)
  33.  
  34. threads_count = 90
  35. pool = Pool(processes=threads_count)
  36. results = []
  37. for site in sites:
  38. # sleep(0.06)
  39. results.append(pool.apply_async(load, (site,)))
  40. if len(results) > threads_count/1.5:
  41. for result in results:
  42. site = result.get()
  43. if 'site_data' in site:
  44. successes += 1
  45. db.sites.update({'_id': site['_id']}, site)
  46. c -= 1
  47. stdout.write('%s/%s\r' % (c, successes))
  48. results = []
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement