Delete All Files With Less Than 250 Characters - part.2

----------------------
EXPLANATION:

ENGLISH: https://neculaifantanaru.com/en/python-delete-all-files-with-less-than-250-characters.html

ROMANIAN: https://neculaifantanaru.com/python-sterge-toate-fisierele-cu-mai-putin-de-250-de-caractere.html
----------------------


import os
import re
import random
import unidecode
import nltk
from nltk import tokenize
# nltk.download('punkt')
import requests
from usp.tree import sitemap_tree_for_homepage

def read_text_from_file(file_path):
    """
    Aceasta functie returneaza continutul unui fisier.
    file_path: calea catre fisierul din care vrei sa citesti
    """
    with open(file_path, encoding='utf8') as f:
        text = f.read()
        f.close()
        return text


def write_to_file(text, file_path):
    """
    Aceasta functie scrie un text intr-un fisier.
    text: textul pe care vrei sa il scrii
    file_path: calea catre fisierul in care vrei sa scrii
    """
    with open(file_path, 'wb') as f:
        f.write(text.encode('utf8', 'ignore'))
        f.close()

# 1. Preluare site-uri de pe o anumita pagina (vezi variabila PAGE)
FOLDER_LOCAL = 'd:\\Folder1'

page_text_pattern = re.compile('<-- START -->([\s\S]*?)<-- FINAL -->')
counter_sterse = 0

for f in os.listdir(FOLDER_LOCAL):
    if f.endswith('.html') or f.endswith('.htm'):
        filepath = os.path.join(FOLDER_LOCAL, f)
        page_html = read_text_from_file(filepath)
        page_text = re.findall(page_text_pattern, page_html)
        if len(page_text) != 0:
            page_text = page_text[0]

            # print(page_text, len(page_text), filepath)

            if len(page_text) < 1500:
                os.remove(filepath)
                counter_sterse += 1
                continue

print("S-au sters {} fisiere".format(counter_sterse))