Advertisement
simeonshopov

html stuff

Feb 26th, 2020
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.63 KB | None | 0 0
  1. #!/usr/local/bin/python3.7
  2. # -*- coding: utf-8 -*import
  3. import re
  4.  
  5. text = input()
  6.  
  7. pattern = r'(?<=<title>)(?P<title>.+)(?=<\/title>)'
  8. match = re.search(pattern, text)
  9.  
  10. title = match.group('title')
  11.  
  12. pattern_content = r'(?<=<body>).+(?=<\/body>)'
  13.  
  14. content_match = re.search(pattern_content, text)
  15. content = content_match.group(0)
  16.  
  17. cleaner = re.compile('<[^>]*>|\\[rnt]')
  18. clean_text = re.sub(cleaner, ' ', content)
  19.  
  20.  
  21. search = r'<[^>]*>|\[rnt]'
  22.  
  23. m = re.findall(search, content)
  24. print(m)
  25.  
  26. # cln = re.compile(r'\\[rnt]')
  27. # cln_text = re.sub(cln, ' ', clean_text)
  28.  
  29.  
  30. print(f'Title: {title}')
  31. print(f'Content: {clean_text}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement