Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- EXPLANATION:
- ROMANIAN: https://neculaifantanaru.com/python-sterge-spatiile-goale-duble-din-tagurile-html.html
- ENGLISH: https://neculaifantanaru.com/en/python-delete-double-empty-spaces-in-html-tags.html
- ----------------------------------
- import re
- import os
- def read_text_from_file(file_path):
- """
- Aceasta functie returneaza continutul unui fisier.
- file_path: calea catre fisierul din care vrei sa citesti
- """
- with open(file_path, encoding='utf8') as f:
- text = f.read()
- return text
- def write_to_file(text, file_path):
- """
- Aceasta functie scrie un text intr-un fisier.
- text: textul pe care vrei sa il scrii
- file_path: calea catre fisierul in care vrei sa scrii
- """
- with open(file_path, 'wb') as f:
- f.write(text.encode('utf8', 'ignore'))
- def replace_white_spaces(tag_name, file_path):
- """
- Aceasta functie modifica textul dintre un tag dat ca argument.
- """
- text = read_text_from_file(file_path)
- text = str(text)
- articol_pattern = re.compile('<!-- ARTICOL START -->[\s\S]*?<!-- ARTICOL FINAL -->[\s\S]*?')
- text_articol = re.findall(articol_pattern, text)
- if len(text_articol) != 0:
- text_articol = str(text_articol[0])
- pattern = re.compile('<{} class=\".*?\">(.*?)</{}>'.format(tag_name, tag_name))
- tag_texts = re.findall(pattern, text_articol)
- new_text_articol = text_articol
- for tag_text in tag_texts:
- new_text = tag_text.strip()
- m = re.findall('<em>(.*?)</em>', new_text)
- if len(m) >= 1:
- text_em = str(m[0])
- text_em_new = text_em.strip()
- new_text = new_text.replace(text_em, text_em_new)
- new_text = " ".join(new_text.split())
- new_text_articol = new_text_articol.replace(tag_text, new_text)
- text = text.replace(text_articol, new_text_articol)
- write_to_file(text, file_path)
- else:
- print("Fisierul nu are structura corecta: ", file_path)
- def replace_white_spaces_only_html_php(tag_name, directory_name):
- for root, dirs, files in os.walk(directory_name):
- for f in files:
- if f.endswith('html'):
- file_path = os.path.join(root, f)
- replace_white_spaces(tag_name, file_path)
- else:
- continue
- if __name__ == '__main__':
- directory_name = 'c:\\Folder1'
- tag_name = 'p'
- replace_white_spaces_only_html_php(tag_name, directory_name)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement