Advertisement
Guest User

Untitled

a guest
Apr 24th, 2017
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.92 KB | None | 0 0
  1. import re
  2.  
  3. import email
  4. import email.header
  5. import email.message
  6. import email.utils
  7. # import collections
  8. # import pprint
  9.  
  10.  
  11. with open('haengermails.mail', 'rb') as fp:
  12. super_message = email.message_from_binary_file(fp)
  13.  
  14.  
  15. def parse_header(v):
  16. v = str(email.header.make_header(email.header.decode_header(v)))
  17. try:
  18. v2 = v.encode('latin1').decode('utf8')
  19. except (UnicodeEncodeError, UnicodeDecodeError):
  20. pass
  21. else:
  22. if v != v2:
  23. print("Double-encoded %r" % (v2,))
  24. return v2
  25. return v
  26.  
  27. STRIP_HEADERS = r'''
  28. ^(Authentication-Results|Delivered-To|Received|Received-SPF|Return-Path|
  29. X-Received|X-OriginalArrivalTime|X-Sieve|X-NFIT.*|X-Scanned-By|X-Sim|
  30. X-SPF-Fail|X-Forefront-.*|X-SpamScore|X-BigFish|X-OriginatorOrg|
  31. X-NilSimsa-Score|X-FOPE-CONNECTOR|X-FFO-Routing-Override)$
  32. '''
  33.  
  34. def strip_headers(items):
  35. return [x for x in items if not re.match(STRIP_HEADERS, x[0], re.X|re.I)]
  36.  
  37.  
  38. # header_counts = collections.Counter()
  39. assert super_message.get_payload(0).get_content_maintype() == 'text'
  40. attachments = super_message.get_payload()[1:]
  41. for attachment in attachments: # type: email.message.Message
  42. assert attachment.get_content_type() == 'message/rfc822'
  43. message = attachment.get_payload(0)
  44. assert message.get_content_type() == 'text/plain'
  45. body = message.get_payload()
  46. receiveds = message.get_all('Received')
  47. if receiveds is None:
  48. print(message.items())
  49. mo = re.search('from (userid \d+|\S+\w)', receiveds[-1])
  50. assert mo.group(1) in ('userid 13', 'prodekanus.auitdrift.client.au.dk')
  51. date = email.utils.parsedate_to_datetime(message['Date'])
  52. from_name, from_email = email.utils.parseaddr(
  53. parse_header(message['From']))
  54. subject = parse_header(message['Subject'])
  55.  
  56. headers = strip_headers(message.items())
  57.  
  58. print(date, from_name, from_email, subject, len(body))
  59. # header_counts += collections.Counter(k for k, v in headers)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement