Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- # import sys
- def filter_filename_list_by_format(filenames: list[str], format: str = ".py") -> list[str]:
- return [name for name in filenames if name[-len(format):] == format]
- def metadata_from_filename(filename: str) -> str:
- """
- Los nombres de archivo son de la forma 01_-_actividad-mail_gmail.com-nombredearchivo.py-AAAA-MM-DD_HH.MM.SS.py,
- esta función está "hardcodeada" pensando en esa forma tan particular de nombre de archivo.
- Gracias a la existencia de "predador-presa" y su guión, esto no es tan fácil como hacer filename.split("-")[2].
- Pero casi. Esta función asume que las direcciones de mail no incluyen "-" (en general se cumple, gmail y yahoo lo prohiben).
- """
- split_on_dash = filename.split("-")
- mail_index = 2 + int("_" not in split_on_dash[2])
- mail = split_on_dash[mail_index][::-1].replace("_", "@", 1)[::-1]
- task = "-".join(split_on_dash[:mail_index]).replace("_", "")
- datetime = filename[-22:-3].replace("_", " ").replace(".", ":")
- metadata = " - ".join([task, mail, datetime])
- return metadata
- def lines_from_file(full_file_path: str) -> list[str]:
- with open(full_file_path, "r", encoding = "latin1") as file:
- lines = file.readlines()
- return lines
- def pattern_from_function_name(function_name: str) -> str:
- pattern = ""
- split_on_underscore = function_name.split("_")
- for word in split_on_underscore:
- pattern += word + ".*"
- return "def.*" + pattern
- def function_def_lines_from_file(full_file_path: str, function_name: str) -> list[str]:
- def_pattern = pattern_from_function_name(function_name)
- return_pattern = ".*return"
- lines = lines_from_file(full_file_path)
- function_def_start_not_reached, function_def_end_not_reached = True, True
- def_match_line_id, return_match_line_id = 0, 0
- for i, line in enumerate(lines):
- if function_def_start_not_reached:
- if re.match(def_pattern, line.lower()):
- function_def_start_not_reached = False
- def_match_line_id = i
- elif function_def_end_not_reached:
- if re.match(return_pattern, line):
- return_match_line_id = i + 1
- elif line[0] != " " and line != "\n":
- function_def_end_not_reached = False
- return lines[def_match_line_id:return_match_line_id]
- def create_empty_file(full_save_path: str) -> None:
- with open(full_save_path, 'w') as file:
- file.write("")
- def append_lines_to_file(lines: list[str], full_save_path: str, metadata: str = "") -> None:
- if lines:
- with open(full_save_path, 'a', encoding = "latin1") as file:
- if metadata:
- file.write(metadata + "\n")
- file.writelines(lines)
- file.write("\n")
- if __name__ == "__main__":
- ### PARAMETROS ###
- files_load_dir = "/home/tomo/Documents/Playground/Python/pyDefScrapper/targetFolder/"
- function_name = "hay_10mil"
- include_metadata = True
- ### PROGRAMA PRINCIPAL ###
- full_save_path = os.path.join(os.path.dirname(__file__), "output.py")
- create_empty_file(full_save_path)
- py_filename_list = filter_filename_list_by_format(os.listdir(files_load_dir), ".py")
- for filename in py_filename_list:
- full_file_path = os.path.join(files_load_dir, filename)
- metadata = include_metadata * ("# " + metadata_from_filename(filename))
- def_lines = function_def_lines_from_file(full_file_path, function_name)
- append_lines_to_file(def_lines, full_save_path, metadata)
- # Probando re
- # test = "01_-_figuritas-mail_gmail.com-nombredearchivo.py-AAAA-MM-DD_HH.MM.SS.py"
- # p = re.compile("-*-") # Crear patron
- # m = re.match(p, test) # Buscar match (re.match si desde principio, re.search si a partir de cualquier parte del string)
- # print(m.group(0)) # Extraer string del match
- # TODO setear los parámetros desde la terminal con sys.argv
- # TODO filtrar por actividad hardcodeando los nombres de cada una
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement