Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Python async hardwork

By: a guest on Oct 15th, 2012  |  syntax: Python  |  size: 1.74 KB  |  views: 31  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import re
  2. from concurrent.futures import ThreadPoolExecutor
  3.  
  4. thread_pool = ThreadPoolExecutor(4)
  5.  
  6. def load_words(filename):
  7.     '''Returns a list containing every word in `filename`.'''
  8.     word_list = []
  9.     with open(filename, 'r') as f:
  10.         for line in f:
  11.             word_list.extend(line.split(' '))
  12.     return word_list
  13.  
  14. def clean_words(words):
  15.     '''Returns a list containing only words and all lowercased.'''
  16.  
  17.     clean_list = []
  18.     for word in words:
  19.         match = re.search('[a-z]+', word, re.IGNORECASE)
  20.         if match:
  21.             clean_list.append(match.group(0).lower())
  22.  
  23.     return clean_list
  24.  
  25. def count_words(words):
  26.     '''Returns a dictionary mapping each word to the number of times
  27.    it appears.'''
  28.     word_count = {}
  29.     for word in words:
  30.         c = word_count.get(word, 0)
  31.         word_count[word] = c + 1
  32.  
  33.     return word_count
  34.  
  35. def get_most_common(word_count, n=10):
  36.     '''Returns the `n` most common words based on the count.'''
  37.     return [i[0] for i in sorted(word_count.items(), key=lambda i: i[1], reverse=True)][:n]
  38.  
  39.  
  40.  
  41.  
  42. def load_words_async(filename):
  43.     '''Returns a future for a list containing every word in `filename`.'''
  44.     return thread_pool.submit(load_words, filename)
  45.  
  46. def clean_words_async(words):
  47.     '''Returns a future for a list of only words and all lowercased.'''
  48.     return thread_pool.submit(clean_words, words)
  49.  
  50. def count_words_async(words):
  51.     '''Returns a future for a dictionary mapping words to the number of
  52.    times it appears.'''
  53.     return thread_pool.submit(count_words, words)
  54.  
  55. def get_most_common_async(word_count, n=10):
  56.     '''Returns the `n` most common words based on the count.'''
  57.     return thread_pool.submit(get_most_common, word_count, n)