Advertisement
roman_gemini

Untitled

Sep 14th, 2016
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.86 KB | None | 0 0
  1. import urllib
  2. import re
  3. import timeit
  4.  
  5. from multiprocessing import Pool
  6.  
  7. def get_url_content(url):
  8. return urllib.urlopen(url).read()
  9.  
  10. def get_links_from_content(content):
  11. return re.findall(r'href=\"(http.+?)\"', content)
  12.  
  13. def get_links_from_url(url):
  14. return get_links_from_content(get_url_content(url))
  15.  
  16. if __name__ == '__main__':
  17. urls = [
  18. "http://google.com",
  19. "http://ukr.net",
  20. "http://i.ua",
  21. "http://mail.ru",
  22. "http://yahoo.com",
  23. "http://habrahabr.ru",
  24. ]
  25.  
  26. """Do without threads"""
  27. def without_threads():
  28. list(map(get_links_from_url, urls))
  29.  
  30. print(timeit.timeit(stmt=without_threads, number=1))
  31.  
  32.  
  33. """Do with threads"""
  34. p = Pool(8)
  35. def with_threads():
  36. list(p.map(get_links_from_url, urls))
  37.  
  38. print(timeit.timeit(stmt=with_threads, number=1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement