Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Python 3.6 Script
- ## Created by Lakshmanan
- ## Modules needed
- from multiprocessing import Pool
- from multiprocessing.dummy import Pool as ThreadPool
- import threading
- pool = ThreadPool(8) ## Desired number of Threads
- lck = threading.Lock() ## Resource locking mechanism for Threads
- ## Worker function: In this case, writing to multiple file where line number in each file need to represent
- ## coupled data (like sql row).
- def generate_one_line_entry(plain_topic, paragraph, query_id, sim):
- lck.acquire() ## Lock to block all other threads to while writing a single line to multiple files
- with open(get_filepath(castor_directory, castor_topics), 'a') as file:
- file.write('{}\n'.format(plain_topic))
- with open(get_filepath(castor_directory, castor_paragraphs), 'a') as file:
- file.write('{}\n'.format(paragraph))
- with open(get_filepath(castor_directory, castor_topic_ids), 'a') as file:
- file.write('{}\n'.format(query_id))
- with open(get_filepath(castor_directory, castor_samples), 'a') as file:
- file.write('{}\n'.format(sim))
- lck.release() ## Release the lock as a line is written to all the necessary files
- ## A helper function for data preparation
- def generate_castor_files(topic):
- ### Data generation for file writing
- plain_topic = pre_process_topic(topic)
- para = get_para(docid)
- relevance = get_relevance(topic, docid)
- ## Call to worker
- generate_one_line_entry(plain_topic, para, topic, relevance)
- pool.map(generate_castor_files, list(topics)) ## Topics data is put into threadpool for threading
- pool.close()
- pool.join()
- print("(**END**)")
Add Comment
Please, Sign In to add comment