Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - import os
 - import fitz
 - from datetime import datetime
 - BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 - LOG_FILE = os.path.join(BASE_DIR, "skipped_files_log.txt")
 - rename_mapping = {
 - "formw2": ("0010", "W-2"),
 - "w2statement": ("0010", "W-2"),
 - "w2g": ("0015", "W-2G"),
 - "ssa1099": ("0030", "SSA"),
 - "form1099r": ("0020", "1099-R"),
 - "1099r": ("0020", "1099-R"),
 - "k-1": ("0050", "K-1"),
 - "form1095c": ("0065", "HEALTH"),
 - "1095c": ("0065", "HEALTH"),
 - "form1095a": ("0065", "HEALTH"),
 - "1095a": ("0065", "HEALTH"),
 - "form1099int": ("0040", "INT DIV SCH D"),
 - "form1099div": ("0040", "INT DIV SCH D"),
 - "form1099b": ("0040", "INT DIV SCH D"),
 - "interestincome": ("0040", "INT DIV SCH D"),
 - "ordinarydividends": ("0040", "INT DIV SCH D"),
 - "proceedsfrombroker": ("0040", "INT DIV SCH D"),
 - "healthequity": ("0060", "HSA"),
 - "hsastatement": ("0060", "HSA"),
 - "1099sa": ("0060", "HSA"),
 - "form1099sa": ("0060", "HSA"),
 - "form5498sa": ("0060", "HSA"),
 - "1098t": ("0070", "EDUCATION"),
 - "1099q": ("0070", "EDUCATION"),
 - "1098e": ("0070", "EDUCATION"),
 - "mortgageinterest": ("1000", "SCH A - MORTGAGE"),
 - "formptc": ("0080", "NE PTC"),
 - "registration": ("1010", "SCH A - VEHICLE"),
 - "ptclookup": ("0080", "NE PTC"),
 - "donationreceipt": ("1020", "SCH A - DONATIONS"),
 - "giftstatement": ("1020", "SCH A - DONATIONS"),
 - "donation": ("1020", "SCH A - DONATIONS"),
 - "charitable": ("1020", "SCH A - DONATIONS"),
 - "501c": ("1020", "SCH A - DONATIONS"),
 - }
 - file_counters = {}
 - renamed_total = 0
 - skipped_total = 0
 - skipped_files = []
 - with open(LOG_FILE, "w") as log:
 - log.write(f"Skipped Files Log - {datetime.now()}\n")
 - log.write("=" * 40 + "\n")
 - for filename in os.listdir(BASE_DIR):
 - if not filename.lower().endswith(".pdf"):
 - continue
 - full_path = os.path.join(BASE_DIR, filename)
 - print(f"\nš Checking: {filename}")
 - try:
 - doc = fitz.open(full_path)
 - full_text = "".join([page.get_text() for page in doc])
 - doc.close()
 - except Exception as e:
 - reason = f"Error reading PDF: {e}"
 - print(f"ā {reason}")
 - skipped_files.append((filename, reason))
 - skipped_total += 1
 - continue
 - normalized_text = full_text.lower().replace(" ", "").replace("-", "").replace("\n", "")
 - renamed = False
 - for keyword, (prefix, label) in rename_mapping.items():
 - if keyword in normalized_text:
 - file_counters.setdefault(prefix, 0)
 - file_counters[prefix] += 1
 - new_filename = f"{prefix}.{file_counters[prefix]} {label}.pdf"
 - new_path = os.path.join(BASE_DIR, new_filename)
 - if os.path.exists(new_path):
 - reason = "Target file already exists"
 - print(f"ā ļø Skipped {filename}: {reason}")
 - skipped_files.append((filename, reason))
 - skipped_total += 1
 - renamed = True
 - break
 - os.rename(full_path, new_path)
 - print(f"ā Renamed: {filename} ā {new_filename}")
 - renamed = True
 - renamed_total += 1
 - break
 - if not renamed:
 - reason = "No keyword match"
 - print(f"ā ļø Skipped: {filename} - {reason}")
 - skipped_files.append((filename, reason))
 - skipped_total += 1
 - with open(LOG_FILE, "a") as log:
 - for file, reason in skipped_files:
 - log.write(f"{file}: {reason}\n")
 - print(f"\nā Done. Renamed: {renamed_total} | Skipped: {skipped_total}")
 - print(f"š Log saved to: {LOG_FILE}")
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment