Advertisement
slookin

MultiThread

Nov 29th, 2011
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.31 KB | None | 0 0
  1. #@author: Mihver1 & SL
  2.  
  3. import urllib2
  4. import cookielib
  5.  
  6. # time module for performance metrics
  7. import time
  8. # re module for simple replace
  9. import re
  10. # error handler for 404/401/403 and etc
  11. from urllib2 import HTTPError
  12.  
  13. # simple threads
  14. from threading import Thread
  15.  
  16.  
  17. start = time.time()
  18.  
  19.  
  20. def getUrl(url):
  21.     fname = url
  22.     # Replace specific symblos
  23.     fname = re.sub("[\?=\/]","_",fname);
  24.     cookie = cookielib.CookieJar()  
  25.     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
  26.     urllib2.install_opener(opener)
  27.     headers = {"User-Agent" : "Mozilla/4.0 (compatible; MSIE 5.5; WindowsNT)" }
  28.     # Exception handling (because some url doesn't work)
  29.     try:
  30.        out=open("".join(fname[7:])+".txt", "w")
  31.        site = urllib2.urlopen(url)
  32.        out.write(site.read())
  33.        out.close()
  34.     except HTTPError, e:
  35.        print e.code
  36.  
  37. inp = open("input.txt", "r")
  38. str = inp.readlines()
  39.  
  40. #@TODO no limit register @see threadpool implementation
  41. threadsRegister=[];
  42.  
  43. for a in str:
  44.   try:
  45.     print a
  46.     t=Thread(None,getUrl,None,(a[:-1],));
  47.     # set explicitly deamonic property  
  48.     t.setDaemon(True);
  49.     t.start();
  50.     threadsRegister.append(t);
  51.   except Exception as errtxt:
  52.     print errtxt
  53.  
  54. for t in threadsRegister:
  55.     t.join();
  56.  
  57. end = time.time()
  58.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement