Advertisement
nicuf

Convert DOCX to TXT

Jun 17th, 2023
810
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.72 KB | None | 0 0
  1. import os
  2. from docx import Document
  3.  
  4. # Path to the folder containing .docx files
  5. input_folder = "d:/doc"
  6.  
  7. # Path to the folder where .txt files will be saved
  8. output_folder = "d:/doc"
  9.  
  10. # Get a list of all .docx files in the input folder
  11. files = [f for f in os.listdir(input_folder) if f.endswith(".docx")]
  12.  
  13. # Loop through each .docx file and convert it to .txt
  14. for file in files:
  15.     docx_path = os.path.join(input_folder, file)
  16.     txt_path = os.path.join(output_folder, os.path.splitext(file)[0] + ".txt")
  17.  
  18.     doc = Document(docx_path)
  19.     content = [p.text for p in doc.paragraphs]
  20.  
  21.     with open(txt_path, "w", encoding="utf-8") as txt_file:
  22.         txt_file.write("\n".join(content))
  23.  
  24. print("Conversion complete!")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement