Advertisement
Guest User

Tax File Renamer

a guest
Apr 17th, 2025
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.62 KB | Money | 0 0
  1. import os
  2. import fitz
  3. from datetime import datetime
  4.  
  5. BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6. LOG_FILE = os.path.join(BASE_DIR, "skipped_files_log.txt")
  7.  
  8. rename_mapping = {
  9. "formw2": ("0010", "W-2"),
  10. "w2statement": ("0010", "W-2"),
  11. "w2g": ("0015", "W-2G"),
  12. "ssa1099": ("0030", "SSA"),
  13. "form1099r": ("0020", "1099-R"),
  14. "1099r": ("0020", "1099-R"),
  15. "k-1": ("0050", "K-1"),
  16. "form1095c": ("0065", "HEALTH"),
  17. "1095c": ("0065", "HEALTH"),
  18. "form1095a": ("0065", "HEALTH"),
  19. "1095a": ("0065", "HEALTH"),
  20. "form1099int": ("0040", "INT DIV SCH D"),
  21. "form1099div": ("0040", "INT DIV SCH D"),
  22. "form1099b": ("0040", "INT DIV SCH D"),
  23. "interestincome": ("0040", "INT DIV SCH D"),
  24. "ordinarydividends": ("0040", "INT DIV SCH D"),
  25. "proceedsfrombroker": ("0040", "INT DIV SCH D"),
  26. "healthequity": ("0060", "HSA"),
  27. "hsastatement": ("0060", "HSA"),
  28. "1099sa": ("0060", "HSA"),
  29. "form1099sa": ("0060", "HSA"),
  30. "form5498sa": ("0060", "HSA"),
  31. "1098t": ("0070", "EDUCATION"),
  32. "1099q": ("0070", "EDUCATION"),
  33. "1098e": ("0070", "EDUCATION"),
  34. "mortgageinterest": ("1000", "SCH A - MORTGAGE"),
  35. "formptc": ("0080", "NE PTC"),
  36. "registration": ("1010", "SCH A - VEHICLE"),
  37. "ptclookup": ("0080", "NE PTC"),
  38. "donationreceipt": ("1020", "SCH A - DONATIONS"),
  39. "giftstatement": ("1020", "SCH A - DONATIONS"),
  40. "donation": ("1020", "SCH A - DONATIONS"),
  41. "charitable": ("1020", "SCH A - DONATIONS"),
  42. "501c": ("1020", "SCH A - DONATIONS"),
  43. }
  44.  
  45. file_counters = {}
  46. renamed_total = 0
  47. skipped_total = 0
  48. skipped_files = []
  49.  
  50. with open(LOG_FILE, "w") as log:
  51. log.write(f"Skipped Files Log - {datetime.now()}\n")
  52. log.write("=" * 40 + "\n")
  53.  
  54. for filename in os.listdir(BASE_DIR):
  55. if not filename.lower().endswith(".pdf"):
  56. continue
  57.  
  58. full_path = os.path.join(BASE_DIR, filename)
  59. print(f"\nšŸ“„ Checking: {filename}")
  60.  
  61. try:
  62. doc = fitz.open(full_path)
  63. full_text = "".join([page.get_text() for page in doc])
  64. doc.close()
  65. except Exception as e:
  66. reason = f"Error reading PDF: {e}"
  67. print(f"āŒ {reason}")
  68. skipped_files.append((filename, reason))
  69. skipped_total += 1
  70. continue
  71.  
  72. normalized_text = full_text.lower().replace(" ", "").replace("-", "").replace("\n", "")
  73.  
  74. renamed = False
  75. for keyword, (prefix, label) in rename_mapping.items():
  76. if keyword in normalized_text:
  77. file_counters.setdefault(prefix, 0)
  78. file_counters[prefix] += 1
  79. new_filename = f"{prefix}.{file_counters[prefix]} {label}.pdf"
  80. new_path = os.path.join(BASE_DIR, new_filename)
  81.  
  82. if os.path.exists(new_path):
  83. reason = "Target file already exists"
  84. print(f"āš ļø Skipped {filename}: {reason}")
  85. skipped_files.append((filename, reason))
  86. skipped_total += 1
  87. renamed = True
  88. break
  89.  
  90. os.rename(full_path, new_path)
  91. print(f"āœ… Renamed: {filename} → {new_filename}")
  92. renamed = True
  93. renamed_total += 1
  94. break
  95.  
  96. if not renamed:
  97. reason = "No keyword match"
  98. print(f"āš ļø Skipped: {filename} - {reason}")
  99. skipped_files.append((filename, reason))
  100. skipped_total += 1
  101.  
  102. with open(LOG_FILE, "a") as log:
  103. for file, reason in skipped_files:
  104. log.write(f"{file}: {reason}\n")
  105.  
  106. print(f"\nāœ… Done. Renamed: {renamed_total} | Skipped: {skipped_total}")
  107. print(f"šŸ“ Log saved to: {LOG_FILE}")
Tags: TAX
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement