Guest User

Untitled

a guest
Jan 28th, 2019
530
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.21 KB | None | 0 0
  1. def extract_first_email(s):
  2. """
  3. Assumes `s` is a list of email addresses split by (mixed) spaces or commas.
  4. Returns (first_email, rest) where first_email + rest == s,
  5. or (None, None) if we don't think this is an email address.
  6. """
  7. first_quote = s.find('"')
  8. first_at = s.find('@')
  9. if first_at == -1:
  10. return (None, None)
  11. if first_quote != -1 and first_quote < first_at:
  12. second_quote = s.find('"', first_quote+1)
  13. if second_quote == -1:
  14. return (None, None)
  15. first_at = s.find('@', second_quote)
  16. next_separator = re.search('[ ,]', s[first_at:])
  17. if not next_separator:
  18. return (s, '')
  19. first_email_end = first_at + next_separator.start()
  20. return (s[:first_email_end], s[first_email_end:])
  21.  
  22.  
  23. def split_email_line_by_spaces_or_commas(s):
  24. """
  25. Returns a pair of ([maybe_valid_emails], [invalid_emails]) in s.
  26. """
  27. emails = []
  28. invalid = []
  29. s = s.strip(' ,')
  30. while s:
  31. first, rest = extract_first_email(s)
  32. if first is None:
  33. invalid.append(s)
  34. break
  35. emails.append(first.strip(' ,'))
  36. s = rest.strip(' ,')
  37. return emails, invalid
  38.  
  39.  
  40. def lenient_email_extractor(text):
  41. """
  42. Returns a pair (address_pairs, invalid_addresses),
  43. where address_pairs is of the kind returned by email.utils.parseaddr.
  44.  
  45.  
  46. Test case:
  47. '''
  48.  
  49. 16@example.com, "Andrew, Esq." <17@example.com>,
  50.  
  51. "Mr. Bob Ross" <foo@example.com> test@foo.bar,23432@example.com,
  52.  
  53. 2235233432@example.com,dsfjkadls@example.com "Full Name with quotes and <weird@chars.com>" <weird@example.com>
  54.  
  55. sdflkadsjfkdalfds@example.com
  56.  
  57. 1@example.com 2@example.com 3@example.com
  58.  
  59. '''
  60. (note: spaces, commas, blank lines)
  61. """
  62. from email.utils import parseaddr
  63. address_pairs = []
  64. invalid_addresses = []
  65. lines = text.strip().splitlines()
  66. for l in lines:
  67. emails, invalid = split_email_line_by_spaces_or_commas(l)
  68. invalid_addresses.extend(invalid)
  69. for e in emails:
  70. name, real = parseaddr(e)
  71. if name == real == '':
  72. invalid_addresses.append(e)
  73. else:
  74. address_pairs.append((name, real))
Add Comment
Please, Sign In to add comment