eudemonics

UPDATED! v1.π! script to download all emails and attachments

Aug 4th, 2014
295
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.91 KB | None | 0 0
  1. #!/usr/bin/python
  2. #
  3. ##### EMAIL2FILE v1.1π
  4. ##### download or copy & paste into blank text file and save as "email2file.py"
  5. ##### to run, open terminal to script location and enter "python email2file.py"
  6. ##### all files are written to unique email user folder within 'output' subdirectory
  7. ##### attachments saved either in user folder or user's 'attachments' subfolder
  8. ##### questions? bugs? suggestions? contact vvn at:
  9. ##### vvn (at) eudemonics (dot) org
  10. ##### support my work - by my music! http://dreamcorp.bandcamp.com
  11.  
  12. from __future__ import print_function
  13. import email, base64, getpass, imaplib
  14. import re, sys, os, os.path, datetime, socket
  15.  
  16. print('''
  17. \033[34m
  18. ------------------------------------- \033[33m
  19. ----------\033[36m EMAIL2FILE v1.1 \033[33m----------
  20. -----------\033[35m author : vvn \033[33m------------
  21. -------------------------------------
  22. ----\033[37m support my work: buy my EP! \033[33m----
  23. ---\033[37m http://dreamcorp.bandcamp.com \033[33m---
  24. \033[34m-------------------------------------\n\033[0m
  25. ''')
  26.  
  27. emailaddr = raw_input('please enter email --> ')
  28.  
  29. #VALIDATE EMAIL USING REGEX
  30. match = re.search(r'^[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,4})$', emailaddr)
  31.  
  32. if match:
  33.    print('\033[32m\nemail is valid\033[0m\n\n')
  34.    atdomain = re.search("@.*", emailaddr).group()
  35.    emaildomain = atdomain[1:]
  36.  
  37. else:
  38.    tries = 5
  39.  
  40.    while not match and tries > 0:
  41.    
  42.       print('\033[31minvalid email format\033[0m\n')
  43.       print('bad attempts: \033[33m' + str(6 - tries) + '\033[0m\n')
  44.       print('\033[36myou have ' + str(tries) + ' attempts remaining.\033[0m\n\n')
  45.       emailaddr = raw_input('please enter email again --> ')
  46.      
  47.       match = re.search(r'^[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,4})$', emailaddr)
  48.      
  49.       if match:
  50.          tries = -1
  51.          break
  52.          
  53.       else:
  54.          tries = tries - 1
  55.        
  56.    if match:
  57.       print('\n\033[32m email is valid \033[0m\n\n')
  58.       atdomain = re.search("@.*", emailaddr).group()
  59.       emaildomain = atdomain[1:]
  60.      
  61.    else:
  62.       print('\033[31munhandled exception. aborting..\033[0m\n')
  63.       sys.exit()
  64.    
  65.    if tries is 0:
  66.       print('\033[31m too many bad attempts using invalid format! \033[0m\n')
  67.       print('aborting..')
  68.       sys.exit()
  69.                
  70. def decode_email(msgbody):
  71.    
  72.    msg = email.message_from_string(msgbody)
  73.    
  74.    if msg is None:
  75.       decoded = msg
  76.    
  77.    decoded = msg
  78.  
  79.    text = ""
  80.    
  81.    att = False
  82.    
  83.    if msg.is_multipart():
  84.       html = None
  85.    
  86.       for part in msg.get_payload():
  87.    
  88.          print("\033[31m%s, %s\033[0m" % (part.get_content_type(), part.get_content_charset()))
  89.  
  90.          if part.get_content_charset() is None:
  91.             text = part.get_payload(decode=True)
  92.             continue
  93.  
  94.          charset = part.get_content_charset()
  95.  
  96.          if part.get_content_type() == 'text/plain':
  97.             text = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
  98.             enc = part['Content-Transfer-Encoding']
  99.             if enc == "base64":
  100.                text = part.get_payload()
  101.                text = base64.decodestring(text)
  102.  
  103.          if part.get_content_type() == 'text/html':
  104.             html = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
  105.                    
  106.          if part.get_content_maintype() == 'multipart':
  107.             continue
  108.            
  109.          elif part.get('Content-Disposition') is None:
  110.             continue
  111.            
  112.          elif part.get_content_type() == "multipart/alternative":
  113.             text = part.get_payload(decode=True)
  114.             enc = part['Content-Transfer-Encoding']
  115.             if part.get_content_type() == "text/plain":
  116.                 text = part.get_payload()
  117.                 if enc == "base64":
  118.                     text = base64.decodestring(text)
  119.            
  120.          filename = part.get_filename()
  121.          
  122.          if bool(filename):
  123.        
  124.             rootdir = 'output'
  125.             atdomain = re.search("@.*", emailaddr).group()
  126.             emaildomain = atdomain[1:]
  127.             i = len(emailaddr) - len(atdomain)
  128.             user_savename = emailaddr[:i]
  129.             # user_savename = emailaddr.rstrip(atdomain)
  130.             subdir = user_savename+"_"+emaildomain
  131.        
  132.             detach_dir = os.path.join(rootdir, subdir)
  133.            
  134.             if not os.path.exists(detach_dir):
  135.                os.makedirs(detach_dir, 0755)
  136.        
  137.             att_path = os.path.join(detach_dir, 'attachments', filename)
  138.            
  139.             if 'attachments' not in os.listdir(detach_dir):
  140.                os.makedirs(detach_dir + '/attachments', 0755)
  141.          
  142.             att = True
  143.  
  144.             if not os.path.isfile(att_path):
  145.                attfile = open(att_path, 'wb+')
  146.                attfile.write(part.get_payload(decode=True))
  147.                attfile.close()
  148.                decoded = attfile
  149.    
  150.       if att is False:
  151.          decoded = msg
  152.                
  153.          if html is None and text is not None:
  154.             decoded = text.strip()
  155.          
  156.          elif html is None and text is None:
  157.             decoded = msg
  158.          
  159.          else:
  160.             decoded = html.strip()
  161.          
  162.    else:
  163.       decoded = msg
  164.          
  165.    return decoded
  166.                
  167. def getimap(emailaddr, emailpass):
  168.  
  169.    atdomain = re.search("@.*", emailaddr).group()
  170.    emaildomain = atdomain[1:]
  171.    
  172.    imap_server = 'imap.' + emaildomain
  173.    imap_port = 993
  174.    
  175.    server = imaplib.IMAP4_SSL(imap_server, imap_port)
  176.    attempts = 5
  177.  
  178.    while True and attempts > 0:
  179.  
  180.       try:
  181.  
  182.          loginstatus, logindata = server.login(emailaddr, emailpass)
  183.    
  184.          if loginstatus == 'OK':
  185.    
  186.             select_info = server.select('INBOX')
  187.  
  188.             status, unseen = server.search(None, 'UNSEEN')
  189.      
  190.             print("\n\033[35m%d UNREAD MESSAGES\033[0m" % len(unseen))
  191.  
  192.             print()
  193.                                            
  194.             typ, listdata = server.list()
  195.          
  196.             print('Response code: \n\n\033[32m', typ)
  197.             print('\033[0m\nFOLDERS:\n\n\033[33m', listdata)
  198.      
  199.             print('\033[34m\n\nlogin successful, fetching emails.. \033[0m\n\n')
  200.          
  201.             # server.list()
  202.          
  203.             server.select()
  204.  
  205.             result, msgs = server.search(None, 'ALL')
  206.          
  207.             ids = msgs[0]
  208.             id_list = ids.split()
  209.      
  210.             print(id_list)
  211.    
  212.             print('\033[37m------------------------------------------------------------\n\033[0m')
  213.          
  214.             rootdir = 'output'
  215.      
  216.             printdate = str(datetime.date.today())
  217.  
  218.             prev_file_name = emailaddr+"-headerlist-"+printdate+".txt"
  219.             prev_complete_name = os.path.join(rootdir, prev_file_name)
  220.          
  221.             for email_uid in id_list:
  222.  
  223.                result, rawdata = server.fetch(email_uid, '(RFC822)')
  224.  
  225.                rawbody = rawdata[0][1]
  226.          
  227.                m = email.message_from_string(rawbody)
  228.                
  229.                msgfrom = m['From'].replace('/', '-')
  230.            
  231.                body = decode_email(rawbody)
  232.          
  233.                emaildomain = atdomain[1:]
  234.                j = len(emailaddr) - len(atdomain)
  235.                user_save = emailaddr[:j]
  236.      
  237.                subdir =  user_save + "_" + emaildomain
  238.                save_path = os.path.join(rootdir, subdir)
  239.          
  240.                if not os.path.exists(save_path):
  241.                   os.makedirs(save_path)
  242.            
  243.                mbody = email.message_from_string(rawbody)
  244.          
  245.                if mbody.is_multipart():
  246.          
  247.                   ext = ".txt"
  248.          
  249.                   for mpart in mbody.get_payload():
  250.            
  251.                      if 'text' in mpart.get_content_type():
  252.                         ext = ".txt"
  253.                         isattach = False
  254.                  
  255.                         if mpart.get_content_type() == 'text/html':
  256.                            ext = ".htm"
  257.                            isattach = False
  258.                  
  259.                      else:
  260.                         file_name = mpart.get_filename()
  261.                         isattach = True
  262.                
  263.                else:
  264.                   isattach = False
  265.                   ext = ".txt"
  266.                  
  267.                if isattach is False:
  268.                   file_name = user_save + "-" + email_uid + "-" + msgfrom[:25] + ext
  269.        
  270.                if file_name is None:
  271.                   file_name = user_save + "-" + msgfrom[:25] + "-" + email_uid + ext
  272.        
  273.                complete_name = os.path.join(save_path, file_name)
  274.                
  275.                dtnow = datetime.datetime.now()
  276.                dtyr = str(dtnow.year)
  277.                dtmo = str(dtnow.month)
  278.                dtday = str(dtnow.day)
  279.                dthr = str(dtnow.hour)
  280.                dtmin = str(dtnow.minute)
  281.                
  282.                dtdate = str(dtyr + "-" + dtmo + "-" + dtday)
  283.                dttime = str(dthr + "." + dtmin)
  284.                              
  285.                if os.path.isfile(complete_name):
  286.          
  287.                   print('\n\033[33m' + complete_name + '\033[0m already exists, skipping.. \n\n')
  288.            
  289.                else:
  290.            
  291.                   if type(body) is str or type(body) is buffer and isattach is True:
  292.                      print('\n\033[34mdownloading file: \033[33m' + str(file_name) + '\033[0m\n\n')
  293.                      bodyfile = open(complete_name, 'wb+')
  294.                      # bodyfile.seek(0)
  295.                      bodyfile.write(body)
  296.                      bodyfile.close()
  297.                
  298.                   else:
  299.                      bodyfile = open(complete_name, 'wb+')
  300.                      bodyfile.write("SENDER: \n")
  301.                      bodyfile.write(msgfrom)
  302.                      bodyfile.write('\n\n')
  303.                      # bodyfile.write('Decoded:\n\n')
  304.                      bodyfile.write(str(body))
  305.                      bodyfile.write('\n\nRAW MESSAGE DATA:\n\n')
  306.                      bodyfile.write(rawbody)
  307.                      bodyfile.write('\n\n')
  308.                      bodyfile.write('file saved: ' + dtdate + ', ' + dttime)
  309.                      bodyfile.write('\n\n')
  310.                      bodyfile.close()
  311.      
  312.                   print('\033[36m\033[1mmessage data saved to new file: \033[35m' + complete_name + '\033[0m\n')
  313.                
  314.                print('\033[37m------------------------------------------------------------\033[0m\n')
  315.            
  316.                resp, data = server.fetch(email_uid, '(UID FLAGS BODY.PEEK[HEADER.FIELDS (FROM SUBJECT DATE)])')
  317.                print('\033[35m' + email_uid + '\033[0m\n')
  318.                print(data[0][1] + '\n\n')
  319.                msgpreview = data[0][1]
  320.                
  321.                if not os.path.isfile(prev_complete_name):
  322.                   prevfile = open(prev_complete_name, 'wb+')
  323.                #   prevfile.write('Email headers for: ' + emailaddr + '\n\n')
  324.                #   prevfile.close()
  325.                  
  326.                with open(prev_complete_name, 'a+b') as prevfile:  
  327.                   prevfile.write(email_uid)
  328.                   prevfile.write("\n\n")
  329.                   prevfile.write(msgpreview)
  330.                   prevfile.write("\n\n")
  331.                   # prevfile.close()
  332.                            
  333.             print('\033[32minbox contents successfully saved to file. YAY! \033[0m\n')
  334.          
  335.          print('list of message previews saved as: \033[31m'+prev_complete_name+'\033[0m \n')
  336.          
  337.          print('logging out..\n')
  338.            
  339.          server.logout()
  340.            
  341.          print('logout successful. exiting..\n')
  342.          attempts = -1
  343.          break
  344.  
  345.       except server.error, e:
  346.      
  347.          pass
  348.          print('\033[35mfailed connecting to IMAP server.\033[0m\n')
  349.          print('\033[31merror: \033[33m' + str(e) + '\033[0m\n\n')
  350.  
  351.          attempts = attempts - 1              
  352.  
  353.          emailaddr = raw_input('please enter email again --> ')
  354.          emailpass = getpass.getpass('please enter password --> ')
  355.      
  356.          matchaddy = re.search(r'^[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,4})$', emailaddr)
  357.      
  358.          while not matchaddy and attempts > 1:
  359.             print('\033[31m invalid email format \033[0m\n')
  360.             attempts = attempts - 1
  361.            
  362.       continue
  363.          
  364.    if attempts is 0:
  365.       print('too many logon failures. unable to log onto IMAP server.')  
  366.      
  367. emailpass = getpass.getpass('please enter password --> ')
  368.            
  369. try:
  370.    print('\ntrying IMAP connection to server: \033[36mimap.' + emaildomain + '\033[0m' )
  371.    getimap(emailaddr, emailpass)
  372.    
  373. except socket.error, e:
  374.    print("Error establishing IMAP connection: ", e)
  375.    sys.exit()
  376.    
  377. sys.exit()
Add Comment
Please, Sign In to add comment