Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests, re
- from requests_html import HTMLSession
- from os import system
- with open('domains.txt') as file:
- domains = file.readlines()
- def ismail(mail):
- exceptions = ['.png', '.jpg', '.gif','.webp', '/', 'example', 'domain', '.jpeg', '.svg']
- for e in exceptions:
- if e in mail:
- return False
- return True
- def renderPage(r):
- r.html.render(retries=1, timeout=16)
- r.session.close()
- def getResponse(link):
- try:
- session = HTMLSession()
- r = session.get(link, timeout=16)
- r.session.close()
- session.close()
- except Exception as e:
- print(e)
- return [0, 0]
- return [r.status_code, r]
- def writeTable(domain, to_table):
- with open('table.txt', 'a') as table:
- table.write(f'{clear_domain}\t{to_table}\n')
- sublinks = ['enquiry', 'contact', 'write', 'support', 'about', 'help', 'privacy', 'policy', 'advert', 'author']
- for domain in domains:
- domain = domain.strip()
- clear_domain = domain
- domain = f'https://{domain}'
- database = []
- to_table = ''
- print('#'*20)
- print('#'*20)
- print(domain)
- if getResponse(domain)[0] == 200:
- print('WORKING')
- r = getResponse(domain)[1]
- else:
- print('NOT WORKING\n\n')
- writeTable(domain, to_table)
- continue
- to_parse = [domain]
- try:
- links = r.html.absolute_links
- except:
- writeTable(domain, to_table)
- continue
- for link in links:
- for sublink in sublinks:
- if sublink in link[len(clear_domain)+6:] and link not in to_parse and clear_domain in link:
- to_parse.append(link)
- for link in to_parse:
- print(link)
- try:
- r = getResponse(link)[1]
- print('CONNECTED')
- try:
- renderPage(r)
- print('RENDERED')
- r.session.close()
- except Exception as e:
- r.session.close()
- print(e)
- print('CANT RENDER')
- continue
- except Exception as e:
- print(e)
- print('CANT OPEN')
- continue
- mails = re.findall("([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z.]+)", r.html.raw_html.decode())
- if not mails:
- if link != domain:
- mails = re.findall('<form', r.html.html)
- for mail in mails:
- if ismail(mail) and mail not in database:
- database.append(mail)
- if mail == '<form':
- mail = 'Form'
- print(f'\nFOUND: {mail}\n')
- to_table += f'{mail}\t'
- writeTable(clear_domain, to_table)
- print('\n\n')
Add Comment
Please, Sign In to add comment