Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---------------
- ROMANIAN: https://neculaifantanaru.com/python-how-to-convert-txt-files-to-word-docx.html
- ENGLISH: https://neculaifantanaru.com/en/python-how-to-convert-txt-files-to-word-docx.html
- ---------------
- # pachetul docx nu merge cu python 3
- # pip install docx
- # Ruleaza urmatoarea comanda
- # pip install docx
- # Apoi instaleaza
- # pip install python-docx
- # E nevoie si de urmatoarele pachete
- # pip install pathlib
- import re
- import os
- from pathlib import Path
- import sys
- from docx import Document
- # Locatia unde se afla fisierele
- input_path = r'c:\Folder7\input'
- # Locatia unde vom scrie fisierele docx
- output_path = r'c:\Folder7\output'
- # Creeaza structura de foldere daca nu exista
- os.makedirs(output_path, exist_ok=True)
- # Verifica existenta folder-ului
- directory_path = Path(input_path)
- if directory_path.exists() and directory_path.is_dir():
- print(directory_path, "exists")
- else:
- print(directory_path, "is invalid")
- sys.exit(1)
- for file_path in directory_path.glob("*"):
- # file_path is a Path object
- print("Procesez fisierul:", file_path)
- document = Document()
- # file_path.name is the name of the file as str without the Path
- document.add_heading(file_path.name, 0)
- # remove all non-XML-compatible characters
- #file_content = re.sub(r"[^\x00-\x7F]+|\x0c", " ", file_path.read_text(encoding='UTF-8'))
- file_content = file_path.read_text(encoding='UTF-8')
- document.add_paragraph(file_content)
- # build the new path where we store the files
- output_file_path = os.path.join(output_path, file_path.name + ".docx")
- document.save(output_file_path)
- print("Am convertit urmatorul fisier:", file_path, "in: ", output_file_path)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement