Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import email
- import email.header
- import email.message
- import email.utils
- # import collections
- # import pprint
- with open('haengermails.mail', 'rb') as fp:
- super_message = email.message_from_binary_file(fp)
- def parse_header(v):
- v = str(email.header.make_header(email.header.decode_header(v)))
- try:
- v2 = v.encode('latin1').decode('utf8')
- except (UnicodeEncodeError, UnicodeDecodeError):
- pass
- else:
- if v != v2:
- print("Double-encoded %r" % (v2,))
- return v2
- return v
- STRIP_HEADERS = r'''
- ^(Authentication-Results|Delivered-To|Received|Received-SPF|Return-Path|
- X-Received|X-OriginalArrivalTime|X-Sieve|X-NFIT.*|X-Scanned-By|X-Sim|
- X-SPF-Fail|X-Forefront-.*|X-SpamScore|X-BigFish|X-OriginatorOrg|
- X-NilSimsa-Score|X-FOPE-CONNECTOR|X-FFO-Routing-Override)$
- '''
- def strip_headers(items):
- return [x for x in items if not re.match(STRIP_HEADERS, x[0], re.X|re.I)]
- # header_counts = collections.Counter()
- assert super_message.get_payload(0).get_content_maintype() == 'text'
- attachments = super_message.get_payload()[1:]
- for attachment in attachments: # type: email.message.Message
- assert attachment.get_content_type() == 'message/rfc822'
- message = attachment.get_payload(0)
- assert message.get_content_type() == 'text/plain'
- body = message.get_payload()
- receiveds = message.get_all('Received')
- if receiveds is None:
- print(message.items())
- mo = re.search('from (userid \d+|\S+\w)', receiveds[-1])
- assert mo.group(1) in ('userid 13', 'prodekanus.auitdrift.client.au.dk')
- date = email.utils.parsedate_to_datetime(message['Date'])
- from_name, from_email = email.utils.parseaddr(
- parse_header(message['From']))
- subject = parse_header(message['Subject'])
- headers = strip_headers(message.items())
- print(date, from_name, from_email, subject, len(body))
- # header_counts += collections.Counter(k for k, v in headers)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement