Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import requests
- from bs4 import BeautifulSoup
- import argparse
- import sys
- import re
- from time import sleep
- # define arguments
- parser = argparse.ArgumentParser(description='Scrape haveibeenpwned for email breach tools')
- parser.add_argument('-e', "--email", help='Email to search')
- parser.add_argument('-s', "--search", help='Search databases ')
- parser.add_argument("-l", "--list")
- parser.add_argument('-o', '--output', help='Output results to csv')
- args = parser.parse_args()
- # check if any arge are present befor starting the prog
- if len(sys.argv) < 2:
- parser.print_usage()
- sys.exit(1)
- # set up our vars
- emails_to_check = []
- email_not_pwned = []
- email_pwned = []
- pwned_email_info = {}
- #set up vars from argpharse
- email_input_list = args.list
- search_bol = args.search
- output_csv_file = args.output
- target_email = args.email
- #email regex for seing if a email matches the regex
- def read_from_file(input_list_file):
- with open(input_list_file, "r") as input_file:
- print("starting search with these emails!")
- for current_email_line in input_file:
- print(current_email_line)
- regex = '^[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,})$'
- match = re.match(regex, current_email_line)
- if match == None:
- print(f"{current_email_line} dosnt match email syntax")
- else:
- print("Email matched regex: {}".format(email))
- emails_to_check.append(current_email_line)
- return(emails_to_check)
- def get_source(account):
- print(f'starting search for: {account}')
- url = f"https://haveibeenpwned.com/account/{account}"
- base_request = requests.get(f"https://haveibeenpwned.com/account/{account}").text
- soup = BeautifulSoup(base_request, "lxml")
- database_list = []
- pwn_count = 0
- for match in soup.find_all("span", class_="pwnedCompanyTitle"):
- #pwn_co print(pwned_email_info)unt += 1
- #print(match.text.replace(":", ''))
- database_list.append(match.text.replace(":", ""))
- pwned_email_info[account] = database_list
- return(pwned_email_info)
- #print(pwn_count)
- def main():
- if target_email != None:
- get_source(target_email)
- elif email_input_list != None:
- read_from_file(email_input_list)
- for test in emails_to_check:
- get_source(test)
- sleep(3)
- print(pwned_email_info)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement