Guest User

Untitled

a guest
Mar 21st, 2024
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.87 KB | None | 0 0
  1. #Script by Ian R. Kelly
  2. #12/17/2023
  3. import mailbox
  4. import os
  5. import re
  6. import logging
  7. from email import policy
  8. from email.utils import parsedate
  9. from email.generator import BytesGenerator
  10. from datetime import datetime
  11. from io import BytesIO
  12.  
  13. # Configuration
  14. #Use email1, email2 and target_names to filter the targets. In the case this script was written for, the targets are specific individuals who's corrispondance is sought by the court.
  15. #mbox_file = 'YourMBOXFile'
  16. base_output_folder = 'SortedMessages'
  17. #email1 = '[email protected]'
  18. #email2 = '[email protected]'
  19. #target_names = ['Smith', 'Jones', 'White', 'Doe', '[email protected]']
  20.  
  21. email_pattern = re.compile(re.escape(email1) + '|' + re.escape(email2), re.IGNORECASE)
  22. name_patterns = [re.compile(re.escape(name), re.IGNORECASE) for name in target_names]
  23.  
  24. logging.basicConfig(level=logging.INFO,
  25. format='%(message)s',
  26. handlers=[logging.FileHandler("export.log"), logging.StreamHandler()])
  27.  
  28. def sanitize_subject(subject):
  29. subject = re.sub(r'[^\w\s]', '', subject)
  30. subject = subject.replace(' ', '_')
  31. return subject
  32.  
  33. def parse_email_date(date_str):
  34. if date_str is None:
  35. return None
  36. date_str = re.sub(r'\s+\(.*\)$', '', date_str)
  37. try:
  38. return datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S %z')
  39. except ValueError:
  40. logging.error(f"Failed to parse date: {date_str}")
  41. return None
  42.  
  43. def get_message_body(message):
  44. if message.is_multipart():
  45. for part in message.walk():
  46. content_type = part.get_content_type()
  47. content_disposition = part.get("Content-Disposition")
  48. if content_disposition is None:
  49. if content_type == 'text/plain' or content_type == 'text/html':
  50. return part.get_payload(decode=True).decode('utf-8', errors='ignore')
  51. else:
  52. return message.get_payload(decode=True).decode('utf-8', errors='ignore')
  53.  
  54.  
  55. def create_directory_structure(year, month):
  56. path = os.path.join(base_output_folder, str(year), month.zfill(2))
  57. if not os.path.exists(path):
  58. os.makedirs(path)
  59. return path
  60.  
  61. print(f"Searching {mbox_file} for specific emails. This could take a while...")
  62. mbox = mailbox.mbox(mbox_file)
  63. total_messages = len(mbox)
  64. print(f"Loaded {total_messages} messages.")
  65.  
  66. for i, message in enumerate(mbox, start=1):
  67. print(f"Processing message {i} of {total_messages}...", end="\r")
  68. try:
  69. email_date = parse_email_date(message['Date'])
  70. subject = sanitize_subject(message['Subject'] if message['Subject'] is not None else "No Subject")
  71. date_str = email_date.strftime('%b-%d-%Y') if email_date else "Date Corrupted or Unreadable."
  72. filename = f"{date_str}.{subject}.txt"
  73.  
  74. if email_pattern.search(message.as_string()) and any(name_pattern.search(message.as_string()) for name_pattern in name_patterns):
  75. if email_date is None or not (2022 <= email_date.year <= 2022):
  76. continue
  77.  
  78. from_header = message['From']
  79. to_header = message['To']
  80. body = get_message_body(message)
  81.  
  82. year_month_path = create_directory_structure(email_date.year, email_date.strftime('%m'))
  83. filepath = os.path.join(year_month_path, filename)
  84.  
  85. #Write the email out as a text file.
  86. with open(filepath, 'w', encoding='utf-8') as text_file:
  87. text_file.write(f"Date: {date_str}\nFrom: {from_header}\nTo: {to_header}\nSubject: {subject}\n\n{body}")
  88.  
  89. #Capture the attachements and export them in their original format (.pdf, .png, .doc, etc)
  90. if message.is_multipart():
  91. attachments_folder = os.path.join(year_month_path, f"{filename}.ATTACHMENTS")
  92. if not os.path.exists(attachments_folder):
  93. os.makedirs(attachments_folder)
  94. for part in message.walk():
  95. if part.get_content_maintype() == 'multipart':
  96. continue
  97. content_disposition = part.get('Content-Disposition')
  98. if content_disposition is None:
  99. continue
  100. attachment_filename = part.get_filename()
  101. if attachment_filename:
  102. file_path = os.path.join(attachments_folder, attachment_filename)
  103. with open(file_path, 'wb') as f:
  104. f.write(part.get_payload(decode=True))
  105. else:
  106. print(f'{i} - No relevant addresses found, moving on.')
  107. except UnicodeEncodeError as e:
  108. logging.error(f"UnicodeEncodeError processing message {i}: {e}")
  109. except Exception as e:
  110. logging.error(f"Unexpected error processing message {i}: {e}")
  111.  
  112. print(f"Job's finished.")
  113.  
Advertisement
Add Comment
Please, Sign In to add comment