Guest User

Untitled

a guest
Jun 17th, 2018
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.62 KB | None | 0 0
  1. ## Python 3.6 Script
  2.  
  3. ## Created by Lakshmanan
  4.  
  5. ## Modules needed
  6. from multiprocessing import Pool
  7. from multiprocessing.dummy import Pool as ThreadPool
  8. import threading
  9.  
  10. pool = ThreadPool(8) ## Desired number of Threads
  11.  
  12. lck = threading.Lock() ## Resource locking mechanism for Threads
  13.  
  14. ## Worker function: In this case, writing to multiple file where line number in each file need to represent
  15. ## coupled data (like sql row).
  16. def generate_one_line_entry(plain_topic, paragraph, query_id, sim):
  17. lck.acquire() ## Lock to block all other threads to while writing a single line to multiple files
  18.  
  19. with open(get_filepath(castor_directory, castor_topics), 'a') as file:
  20. file.write('{}\n'.format(plain_topic))
  21.  
  22. with open(get_filepath(castor_directory, castor_paragraphs), 'a') as file:
  23. file.write('{}\n'.format(paragraph))
  24.  
  25. with open(get_filepath(castor_directory, castor_topic_ids), 'a') as file:
  26. file.write('{}\n'.format(query_id))
  27.  
  28. with open(get_filepath(castor_directory, castor_samples), 'a') as file:
  29. file.write('{}\n'.format(sim))
  30.  
  31. lck.release() ## Release the lock as a line is written to all the necessary files
  32.  
  33. ## A helper function for data preparation
  34. def generate_castor_files(topic):
  35. ### Data generation for file writing
  36. plain_topic = pre_process_topic(topic)
  37. para = get_para(docid)
  38. relevance = get_relevance(topic, docid)
  39.  
  40. ## Call to worker
  41. generate_one_line_entry(plain_topic, para, topic, relevance)
  42.  
  43. pool.map(generate_castor_files, list(topics)) ## Topics data is put into threadpool for threading
  44.  
  45. pool.close()
  46. pool.join()
  47.  
  48. print("(**END**)")
Add Comment
Please, Sign In to add comment