Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import subprocess
- from multiprocessing import Pool
- from tqdm import tqdm
- pdfs = sorted([int(f.split('.')[0]) for f in os.listdir('pdfs')])
- def process(pdf):
- command = ['pdftotext', f'pdfs/{pdf}.pdf', f'txts/{pdf}.txt']
- subprocess.call(command)
- if __name__ == '__main__':
- npoc = 30
- os.makedirs('txts', exist_ok=True)
- with Pool(npoc) as p:
- list(tqdm(p.imap(process, pdfs), total=len(pdfs)))
Advertisement
Add Comment
Please, Sign In to add comment