pymen

Tool to extract emails from body and FROM field

Oct 9th, 2012
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.22 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. # Python Tool to extract emails from body and FROM field from gmail (imap) account
  3. # author: pymen # [email protected]
  4. # dot forget to enter your login and password in user vaviables 'userlogin' and 'userpass'
  5.  
  6. import sys
  7. import re
  8. import poplib
  9. import email
  10. import imaplib
  11.  
  12. in_server = 'pop.gmail.com'
  13. username = '[email protected]'
  14. userpass = ""
  15.  
  16. class gmail():
  17.     def __init__(self):
  18.         self.emaildata_from = []
  19.         self.emaildata_body = []
  20.         self.regex_from = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}')
  21.         self.regex_html = re.compile(r""".*<a href="mailto:(.*)">""")
  22.         self.regex_txt = re.compile(r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""")
  23.  
  24.     def connect(self):
  25.         try:
  26.             self.m = imaplib.IMAP4_SSL(in_server)
  27.             self.m.login(username, userpass)
  28.             self.m.select()
  29.             print ("connecting to account %s" %username)
  30.         except imaplib.IMAP4.error:
  31.             print ("Error. Please check your login and passors in user vaviables 'userlogin' and 'userpass' above")
  32.             sys.exit()
  33.  
  34.     def receive_imap(self):
  35.         typ, data = self.m.search(None, 'ALL')
  36.         number = data[0].split()[-1]
  37.         print ("There %s number of messages in this account " %number)
  38.         print ("Processing message: "),
  39.         for num in data[0].split():
  40.             print num,
  41.             typ, data = self.m.fetch(num, '(RFC822)')
  42.             raw_email_string = data[0][1]
  43.             self.msg = email.message_from_string(raw_email_string)
  44.             self.get_email_address_from()
  45.             for part in self.msg.walk():
  46.                 if part.get_content_type() == "text/plain":
  47.                     self.body = part.get_payload(decode=True)
  48.                     self.get_email_address_html()
  49.                 if part.get_content_type() == "text/html":
  50.                     self.body = part.get_payload(decode=True)
  51.                     self.get_email_address_txt()
  52.  
  53.     def get_email_address_html(self):
  54.         self.email_address = self.regex_html.findall(self.body)
  55.         self.emaildata_body += self.email_address
  56.     def get_email_address_txt(self):
  57.         self.email_address = self.regex_txt.findall(self.body)
  58.         self.emaildata_body += self.email_address
  59.     def get_email_address_from(self):
  60.         email_address = self.msg.get("From")
  61.         self.email_address =  self.regex_from.findall(email_address)
  62.         self.emaildata_from += self.email_address
  63.  
  64.     def print_results(self):
  65.         self.m.close()
  66.         self.m.logout()
  67.         print ("\n\nResult emails from body:")
  68.         print (list(set(self.emaildata_body)))
  69.         print ("\nResult emails from FROM:")
  70.         print (list(set(self.emaildata_from)))
  71.  
  72. if __name__ == "__main__":
  73.     gm = gmail()
  74.     gm.connect()
  75.     gm.receive_imap()
  76.     gm.print_results()
Advertisement
Add Comment
Please, Sign In to add comment