Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def get_document_emails(pdf_format_content):
- """ The function runs through the document and extracts all the email addresses it finds.
- This method returns an ordered list of document emails without repeating. """
- new_list = []
- document_emails = re.findall(r'[w.w]*@w*.[w.w]*', pdf_format_content)
- for i in document_emails:
- if i not in new_list and not str(i).endswith('.'):
- new_list.append(i)
- return sorted(new_list)
- def get_document_provider(pdf_format_content):
- """ Return the name of the provider """
- return re.match(r'(PROVEEDOR:)+(.*?\n)', pdf_format_content)
Add Comment
Please, Sign In to add comment