Advertisement
nicuf

detect fonts in .docx files

Oct 1st, 2023
893
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.79 KB | None | 0 0
  1. import docx
  2.  
  3. def run_get_style(run) -> str:
  4.     if run.bold and run.italic:
  5.         return "bold-italic"
  6.     elif run.bold:
  7.         return "bold"
  8.     elif run.italic:
  9.         return "italic"
  10.     else:
  11.         return "normal"
  12.  
  13. def convert_docx_to_html_style(para):
  14.     result = ""
  15.     if para.runs:
  16.         html_para = '<p>'
  17.         current_style = None
  18.         current_text = ""
  19.  
  20.         for run in para.runs:
  21.             run_style = run_get_style(run)
  22.             if run_style == current_style:
  23.                 current_text += run.text
  24.             else:
  25.                 if current_style:
  26.                     if "bold" in current_style:
  27.                         html_para += '<b>'
  28.                     if "italic" in current_style:
  29.                         html_para += '<em>'
  30.                     html_para += current_text
  31.                     if "italic" in current_style:
  32.                         html_para += '</em>'
  33.                     if "bold" in current_style:
  34.                         html_para += '</b>'
  35.                 current_style = run_style
  36.                 current_text = run.text
  37.  
  38.         if current_style:
  39.             if "bold" in current_style:
  40.                 html_para += '<b>'
  41.             if "italic" in current_style:
  42.                 html_para += '<em>'
  43.             html_para += current_text
  44.             if "italic" in current_style:
  45.                 html_para += '</em>'
  46.             if "bold" in current_style:
  47.                 html_para += '</b>'
  48.  
  49.         html_para += '</p>\n'
  50.         result += html_para
  51.     return result
  52.  
  53. # Exemplu de utilizare:
  54. document = docx.Document("bebe.docx")  # Înlocuiți cu numele fișierului DOCX
  55. for paragraph in document.paragraphs:
  56.     converted_paragraph = convert_docx_to_html_style(paragraph)
  57.     print(converted_paragraph)
  58.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement