Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import docx
- def run_get_style(run) -> str:
- if run.bold and run.italic:
- return "bold-italic"
- elif run.bold:
- return "bold"
- elif run.italic:
- return "italic"
- else:
- return "normal"
- def convert_docx_to_html_style(para):
- result = ""
- if para.runs:
- html_para = '<p>'
- current_style = None
- current_text = ""
- for run in para.runs:
- run_style = run_get_style(run)
- if run_style == current_style:
- current_text += run.text
- else:
- if current_style:
- if "bold" in current_style:
- html_para += '<b>'
- if "italic" in current_style:
- html_para += '<em>'
- html_para += current_text
- if "italic" in current_style:
- html_para += '</em>'
- if "bold" in current_style:
- html_para += '</b>'
- current_style = run_style
- current_text = run.text
- if current_style:
- if "bold" in current_style:
- html_para += '<b>'
- if "italic" in current_style:
- html_para += '<em>'
- html_para += current_text
- if "italic" in current_style:
- html_para += '</em>'
- if "bold" in current_style:
- html_para += '</b>'
- html_para += '</p>\n'
- result += html_para
- return result
- # Exemplu de utilizare:
- document = docx.Document("bebe.docx") # Înlocuiți cu numele fișierului DOCX
- for paragraph in document.paragraphs:
- converted_paragraph = convert_docx_to_html_style(paragraph)
- print(converted_paragraph)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement