Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #-------------------------------------------------------------------------------
- # Author: Neculai Fantanaru Ioan
- #
- # Created: 22/01/2022
- # Copyright: (c) Neculai Fantanaru Ioan
- EXPLANATION:
- ROMANIAN: https://neculaifantanaru.com/python-copiaza-continutul-fisierelor-text-in-corpul-fisierelor-html.html
- ENGLISH: https://neculaifantanaru.com/en/python-copy-the-contents-of-text-files-in-the-body-of-html-files.html
- #-------------------------------------------------------------------------------
- import os
- import re
- def read_text_from_file(file_path):
- """
- Aceasta functie returneaza continutul unui fisier.
- file_path: calea catre fisierul din care vrei sa citesti
- """
- with open(file_path, encoding='utf8') as f:
- text = f.read()
- return text
- def write_to_file(text, file_path):
- """
- Aceasta functie scrie un text intr-un fisier.
- text: textul pe care vrei sa il scrii
- file_path: calea catre fisierul in care vrei sa scrii
- """
- with open(file_path, 'wb') as f:
- f.write(text.encode('utf8', 'ignore'))
- def copiaza_continut_txt_html(cale_fisier_txt, cale_fisier_html): # astea sunt argumentele functiei, adica cand apelez functia
- # citesti textul din fisier
- text_txt = read_text_from_file(cale_fisier_txt)
- text_html = read_text_from_file(cale_fisier_html)
- # transformam textul din fisier intr-un string
- text_txt = str(text_txt)
- text_html = str(text_html)
- # aici e pattern-ul pentru expresia regex; (.*?) inseamna ca preia tot ce este intre tag-uri
- # modifici expresia regulata in functie de ce tag dai ca argument pentru functie
- articol_pattern = re.compile('<!-- ARTICOL START -->([\s\S]*?)<!-- ARTICOL FINAL -->[\s\S]*?')
- text_articol = re.findall(articol_pattern, text_html)
- if len(text_articol) != 0:
- text_articol = str(text_articol[0])
- text_txt = '\n\n' + text_txt + '\n\n'
- text_html = text_html.replace(text_articol, text_txt)
- file_path = os.path.dirname(cale_fisier_txt) + "\\" + "fisiere_html" + "\\" + os.path.splitext(os.path.basename(cale_fisier_txt))[0] + '.html'
- write_to_file(text_html, file_path)
- print("Scriere efectuata cu succes.")
- else:
- print("Fisier html fara ARTICOL START/FINAL.")
- def creare_fisiere_html(cale_folder_txt, cale_fisier_html):
- """
- Functia itereaza printr-un folder care contine fisiere txt si creeaza fisiere html corespunzatoare
- """
- for f in os.listdir(cale_folder_txt):
- if f.endswith('txt'):
- cale_fisier_txt = cale_folder_txt + "\\" + f
- copiaza_continut_txt_html(cale_fisier_txt, cale_fisier_html)
- else:
- continue
- def main():
- creare_fisiere_html("c:\\Folder1", "c:\\Folder1\\oana.html")
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement