Advertisement
nicuf

detect and save Fonts and Paragraphs from docx to html

Oct 3rd, 2023
993
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.62 KB | None | 0 0
  1. import docx
  2.  
  3. def run_get_style(run) -> str:
  4.     if run.bold:
  5.         return "bold"
  6.     elif run.italic:
  7.         return "italic"
  8.     else:
  9.         return "normal"
  10.  
  11. def convert_docx_to_html_with_styles(document: docx.Document, output_file: str) -> None:
  12.     with open(output_file, "w") as f:
  13.         f.write("<html>\n<body>\n")
  14.         current_style = None
  15.  
  16.         for paragraph in document.paragraphs:
  17.             runs = paragraph.runs
  18.             if not runs:
  19.                 continue
  20.  
  21.             for run in runs:
  22.                 run_style = run_get_style(run)
  23.  
  24.                 if run_style == current_style:
  25.                     f.write(run.text)
  26.                 else:
  27.                     if current_style:
  28.                         if current_style == "bold":
  29.                             f.write("</b>")
  30.                         elif current_style == "italic":
  31.                             f.write("</em>")
  32.                     if run_style == "bold":
  33.                         f.write("<b>")
  34.                     elif run_style == "italic":
  35.                         f.write("<em>")
  36.  
  37.                     f.write(run.text)
  38.                     current_style = run_style
  39.  
  40.             f.write("<br>\n")  # Adăugăm un salt de linie între paragrafe
  41.  
  42.         if current_style:
  43.             if current_style == "bold":
  44.                 f.write("</b>")
  45.             elif current_style == "italic":
  46.                 f.write("</em>")
  47.  
  48.         f.write("</body>\n</html>")
  49.  
  50. def main():
  51.     document = docx.Document("bebe.docx")
  52.     convert_docx_to_html_with_styles(document, "bebe.html")
  53.  
  54. if __name__ == "__main__":
  55.     main()
  56.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement