Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- # Python Tool to extract emails from body and FROM field from gmail (imap) account
- # author: pymen # [email protected]
- # dot forget to enter your login and password in user vaviables 'userlogin' and 'userpass'
- import sys
- import re
- import poplib
- import email
- import imaplib
- in_server = 'pop.gmail.com'
- username = '[email protected]'
- userpass = ""
- class gmail():
- def __init__(self):
- self.emaildata_from = []
- self.emaildata_body = []
- self.regex_from = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}')
- self.regex_html = re.compile(r""".*<a href="mailto:(.*)">""")
- self.regex_txt = re.compile(r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""")
- def connect(self):
- try:
- self.m = imaplib.IMAP4_SSL(in_server)
- self.m.login(username, userpass)
- self.m.select()
- print ("connecting to account %s" %username)
- except imaplib.IMAP4.error:
- print ("Error. Please check your login and passors in user vaviables 'userlogin' and 'userpass' above")
- sys.exit()
- def receive_imap(self):
- typ, data = self.m.search(None, 'ALL')
- number = data[0].split()[-1]
- print ("There %s number of messages in this account " %number)
- print ("Processing message: "),
- for num in data[0].split():
- print num,
- typ, data = self.m.fetch(num, '(RFC822)')
- raw_email_string = data[0][1]
- self.msg = email.message_from_string(raw_email_string)
- self.get_email_address_from()
- for part in self.msg.walk():
- if part.get_content_type() == "text/plain":
- self.body = part.get_payload(decode=True)
- self.get_email_address_html()
- if part.get_content_type() == "text/html":
- self.body = part.get_payload(decode=True)
- self.get_email_address_txt()
- def get_email_address_html(self):
- self.email_address = self.regex_html.findall(self.body)
- self.emaildata_body += self.email_address
- def get_email_address_txt(self):
- self.email_address = self.regex_txt.findall(self.body)
- self.emaildata_body += self.email_address
- def get_email_address_from(self):
- email_address = self.msg.get("From")
- self.email_address = self.regex_from.findall(email_address)
- self.emaildata_from += self.email_address
- def print_results(self):
- self.m.close()
- self.m.logout()
- print ("\n\nResult emails from body:")
- print (list(set(self.emaildata_body)))
- print ("\nResult emails from FROM:")
- print (list(set(self.emaildata_from)))
- if __name__ == "__main__":
- gm = gmail()
- gm.connect()
- gm.receive_imap()
- gm.print_results()
Advertisement
Add Comment
Please, Sign In to add comment