Advertisement
Toumo

py_def_scrapper.py

Apr 6th, 2023 (edited)
879
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.99 KB | None | 0 0
  1. import os
  2. import re
  3. # import sys
  4.  
  5. def filter_filename_list_by_format(filenames: list[str], format: str = ".py") -> list[str]:
  6.     return [name for name in filenames if name[-len(format):] == format]
  7.  
  8. def metadata_from_filename(filename: str) -> str:
  9.     """
  10.    Los nombres de archivo son de la forma 01_-_actividad-mail_gmail.com-nombredearchivo.py-AAAA-MM-DD_HH.MM.SS.py,
  11.    esta función está "hardcodeada" pensando en esa forma tan particular de nombre de archivo.
  12.    Gracias a la existencia de "predador-presa" y su guión, esto no es tan fácil como hacer filename.split("-")[2].
  13.    Pero casi. Esta función asume que las direcciones de mail no incluyen "-" (en general se cumple, gmail y yahoo lo prohiben).
  14.    """
  15.     split_on_dash = filename.split("-")
  16.     mail_index = 2 + int("_" not in split_on_dash[2])
  17.     mail = split_on_dash[mail_index][::-1].replace("_", "@", 1)[::-1]
  18.     task = "-".join(split_on_dash[:mail_index]).replace("_", "")
  19.     datetime = filename[-22:-3].replace("_", " ").replace(".", ":")
  20.     metadata = " - ".join([task, mail, datetime])
  21.     return metadata
  22.  
  23. def lines_from_file(full_file_path: str) -> list[str]:
  24.     with open(full_file_path, "r", encoding = "latin1") as file:
  25.         lines = file.readlines()
  26.     return lines
  27.  
  28. def pattern_from_function_name(function_name: str) -> str:
  29.     pattern = ""
  30.     split_on_underscore = function_name.split("_")
  31.     for word in split_on_underscore:
  32.         pattern += word + ".*"
  33.     return "def.*" + pattern
  34.  
  35. def function_def_lines_from_file(full_file_path: str, function_name: str) -> list[str]:
  36.     def_pattern = pattern_from_function_name(function_name)
  37.     return_pattern = ".*return"
  38.     lines = lines_from_file(full_file_path)
  39.     function_def_start_not_reached, function_def_end_not_reached = True, True
  40.     def_match_line_id, return_match_line_id = 0, 0
  41.  
  42.     for i, line in enumerate(lines):
  43.  
  44.         if function_def_start_not_reached:
  45.             if re.match(def_pattern, line.lower()):
  46.                 function_def_start_not_reached = False
  47.                 def_match_line_id = i
  48.  
  49.         elif function_def_end_not_reached:
  50.             if re.match(return_pattern, line):
  51.                 return_match_line_id = i + 1
  52.             elif line[0] != " " and line != "\n":
  53.                 function_def_end_not_reached = False
  54.    
  55.     return lines[def_match_line_id:return_match_line_id]
  56.  
  57. def create_empty_file(full_save_path: str) -> None:
  58.     with open(full_save_path, 'w') as file:
  59.         file.write("")
  60.  
  61. def append_lines_to_file(lines: list[str], full_save_path: str, metadata: str = "") -> None:
  62.     if lines:
  63.         with open(full_save_path, 'a', encoding = "latin1") as file:
  64.             if metadata:
  65.                 file.write(metadata + "\n")
  66.             file.writelines(lines)
  67.             file.write("\n")
  68.  
  69. if __name__ == "__main__":
  70.     ### PARAMETROS ###
  71.     files_load_dir = "/home/tomo/Documents/Playground/Python/pyDefScrapper/targetFolder/"
  72.     function_name = "hay_10mil"
  73.     include_metadata = True
  74.  
  75.     ### PROGRAMA PRINCIPAL ###
  76.     full_save_path = os.path.join(os.path.dirname(__file__), "output.py")
  77.     create_empty_file(full_save_path)
  78.     py_filename_list = filter_filename_list_by_format(os.listdir(files_load_dir), ".py")
  79.     for filename in py_filename_list:
  80.         full_file_path = os.path.join(files_load_dir, filename)
  81.         metadata = include_metadata * ("# " + metadata_from_filename(filename))
  82.         def_lines = function_def_lines_from_file(full_file_path, function_name)
  83.         append_lines_to_file(def_lines, full_save_path, metadata)
  84.  
  85. # Probando re
  86. # test = "01_-_figuritas-mail_gmail.com-nombredearchivo.py-AAAA-MM-DD_HH.MM.SS.py"
  87. # p = re.compile("-*-") # Crear patron
  88. # m = re.match(p, test) # Buscar match (re.match si desde principio, re.search si a partir de cualquier parte del string)
  89. # print(m.group(0)) # Extraer string del match
  90.  
  91. # TODO setear los parámetros desde la terminal con sys.argv
  92. # TODO filtrar por actividad hardcodeando los nombres de cada una
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement