Guest User

Untitled

a guest
Jul 30th, 2016
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.67 KB | None | 0 0
  1. import bs4
  2.  
  3. html_string = """
  4. <html>
  5. <head>
  6. <title></title>
  7. </head>
  8. <body>
  9. <p align="center">
  10. This is before.
  11. <div style="page-break-after:always">
  12. </div>
  13. This is after.
  14. </p>
  15. </body>
  16. </html>
  17. """
  18.  
  19. html_element = bs4.BeautifulSoup(html_string, features="xml")
  20.  
  21. style = {'style': 'page-break-after:always'}
  22.  
  23. page_break_elements = html_element.findAll('div', style)
  24.  
  25. for page_break_element in page_break_elements:
  26. current = page_break_element
  27. while True:
  28. parent = current.parent
  29. if parent is None:
  30. break
  31. if parent.name == 'body':
  32. current.insert_before(page_break_element)
  33. break
  34. current = parent
Add Comment
Please, Sign In to add comment