Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def extract_first_email(s):
- """
- Assumes `s` is a list of email addresses split by (mixed) spaces or commas.
- Returns (first_email, rest) where first_email + rest == s,
- or (None, None) if we don't think this is an email address.
- """
- first_quote = s.find('"')
- first_at = s.find('@')
- if first_at == -1:
- return (None, None)
- if first_quote != -1 and first_quote < first_at:
- second_quote = s.find('"', first_quote+1)
- if second_quote == -1:
- return (None, None)
- first_at = s.find('@', second_quote)
- next_separator = re.search('[ ,]', s[first_at:])
- if not next_separator:
- return (s, '')
- first_email_end = first_at + next_separator.start()
- return (s[:first_email_end], s[first_email_end:])
- def split_email_line_by_spaces_or_commas(s):
- """
- Returns a pair of ([maybe_valid_emails], [invalid_emails]) in s.
- """
- emails = []
- invalid = []
- s = s.strip(' ,')
- while s:
- first, rest = extract_first_email(s)
- if first is None:
- invalid.append(s)
- break
- emails.append(first.strip(' ,'))
- s = rest.strip(' ,')
- return emails, invalid
- def lenient_email_extractor(text):
- """
- Returns a pair (address_pairs, invalid_addresses),
- where address_pairs is of the kind returned by email.utils.parseaddr.
- Test case:
- '''
- 16@example.com, "Andrew, Esq." <17@example.com>,
- "Mr. Bob Ross" <foo@example.com> test@foo.bar,23432@example.com,
- 2235233432@example.com,dsfjkadls@example.com "Full Name with quotes and <weird@chars.com>" <weird@example.com>
- sdflkadsjfkdalfds@example.com
- 1@example.com 2@example.com 3@example.com
- '''
- (note: spaces, commas, blank lines)
- """
- from email.utils import parseaddr
- address_pairs = []
- invalid_addresses = []
- lines = text.strip().splitlines()
- for l in lines:
- emails, invalid = split_email_line_by_spaces_or_commas(l)
- invalid_addresses.extend(invalid)
- for e in emails:
- name, real = parseaddr(e)
- if name == real == '':
- invalid_addresses.append(e)
- else:
- address_pairs.append((name, real))
Add Comment
Please, Sign In to add comment