Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import re
- import os
- import sys
- filename = sys.argv[1]
- text_file = open(filename, "r")
- text = text_file.read()
- text_file.close()
- regex = r'<p.*?>(.*?)</p>'
- blocks = []
- for m in re.finditer(regex, text, flags=re.IGNORECASE|re.DOTALL):
- blocks.append(m.group(1))
- for block in blocks:
- clean = re.sub(r'<.*?>', "", block, flags=re.DOTALL)
- print(clean)
- regex = r'<img (.*?)>'
- blocks = [m.group(1) for m in re.finditer(regex, text, flags=re.IGNORECASE|re.DOTALL)]
- for block in blocks:
- clean = re.sub(r""".*src=["']""", "", block, flags=re.IGNORECASE|re.DOTALL)
- clean = re.sub(r"""["'].*""", "", clean, flags=re.DOTALL)
- print(clean)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement