Advertisement
slookin

threadpool

Nov 29th, 2011
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.07 KB | None | 0 0
  1. #@author: SL
  2. #@license: GPLv3
  3.  
  4. # http client
  5. import urllib2
  6. # cookie handler
  7. import cookielib
  8. # time module for performance metrics
  9. import time
  10. # re module for simple replace
  11. import re
  12.  
  13. # syncrinyzed queue
  14. from Queue import Queue
  15. # thread
  16. from threading import Thread
  17.  
  18. # single thread worker
  19. class Worker(Thread):
  20.     def __init__(self, tasks):
  21.         Thread.__init__(self)
  22.         self.tasks = tasks
  23.         self.daemon = True
  24.         self.start()
  25.    
  26.     def run(self):
  27.         while True:
  28.             func, args, kargs = self.tasks.get()
  29.             try: func(*args, **kargs)
  30.             except Exception, e: print e
  31.             self.tasks.task_done()
  32.  
  33. class ThreadPool:
  34.     def __init__(self, num_threads):
  35.         self.tasks = Queue(num_threads)
  36.         for _ in range(num_threads): Worker(self.tasks)
  37.  
  38.     def add_task(self, func, *args, **kargs):
  39.         self.tasks.put((func, args, kargs))
  40.  
  41.     def wait_completion(self):
  42.         self.tasks.join()
  43.  
  44.  
  45. def getUrl(url):
  46.     fname = url
  47.     # Replace specific symblos
  48.     fname = re.sub("[\?=\/]","_",fname);
  49.     cookie = cookielib.CookieJar()  
  50.     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
  51.     urllib2.install_opener(opener)
  52.     headers = {"User-Agent" : "Mozilla/4.0 (compatible; MSIE 5.5; WindowsNT)" }
  53.     # Exception handling (because some url doesn't work)
  54.     try:
  55.        out=open("".join(fname[7:])+".txt", "w")
  56.        site = urllib2.urlopen(url)
  57.        out.write(site.read())
  58.        out.close()
  59.     except urllib2.HTTPError, e:
  60.        print e.code
  61.  
  62.  
  63. start = time.time()
  64.    
  65.     # 1) Init a Thread pool with the desired number of threads
  66. pool = ThreadPool(20)
  67.  
  68.  
  69. inp = open("input.txt", "r")
  70. str = inp.readlines()
  71.  
  72. #@TODO no limit register
  73. threadsRegister=[];
  74.  
  75. for a in str:
  76.   try:
  77.     t=Thread(None,getUrl,None,(a[:-1],));
  78.     pool.add_task(getUrl, a[:-1])
  79.   except Exception as errtxt:
  80.     print errtxt
  81.  
  82. end = time.time()
  83. # 3) Wait for completion
  84. pool.wait_completion()
  85.  
  86. print "Elapsed Time: %s" % (end - start)
  87.    
  88.    
  89.  
  90.    
  91.  
  92.  
  93.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement