Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import docx
- import re
- def run_get_style(run) -> str:
- if run.bold:
- return "bold"
- elif run.italic:
- return "italic"
- else:
- return "normal"
- def detect_fonts(document: docx.Document) -> None:
- with open("bebe.html", "w") as f:
- for paragraph in document.paragraphs:
- runs = paragraph.runs
- if not runs:
- continue
- current_style = None
- current_text = ""
- for run in runs:
- run_style = run_get_style(run)
- if run_style == current_style:
- current_text += run.text
- else:
- if current_style:
- if current_style == "bold":
- f.write(f"<b>{current_text}</b>")
- elif current_style == "italic":
- f.write(f"<em>{current_text}</em>")
- else:
- f.write(current_text)
- current_style = run_style
- current_text = run.text
- if current_style:
- if current_style == "bold":
- f.write(f"<b>{current_text}</b>")
- elif current_style == "italic":
- f.write(f"<em>{current_text}</em>")
- else:
- f.write(current_text)
- f.write("</p>\n") # Add a closing paragraph tag at the end of each paragraph
- # Add a paragraph tag at the beginning of each line
- with open("bebe.html", "r") as f:
- content = f.read()
- content = re.sub(r"^[ \t]*", "<p>", content, flags=re.MULTILINE)
- with open("bebe.html", "w") as f:
- f.write(content)
- def main():
- document = docx.Document("bebe.docx")
- detect_fonts(document)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement