Guest User

Untitled

a guest
Oct 15th, 2020
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.82 KB | None | 0 0
  1. import requests, re
  2. from requests_html import HTMLSession
  3. from os import system
  4.  
  5.  
  6.  
  7.  
  8. with open('domains.txt') as file:
  9.     domains = file.readlines()
  10.  
  11.  
  12. def ismail(mail):
  13.     exceptions = ['.png', '.jpg', '.gif','.webp', '/', 'example', 'domain', '.jpeg', '.svg']
  14.     for e in exceptions:
  15.         if e in mail:
  16.             return False    
  17.     return True
  18.  
  19.  
  20. def renderPage(r):
  21.     r.html.render(retries=1, timeout=16)
  22.     r.session.close()
  23.  
  24.  
  25. def getResponse(link):
  26.     try:
  27.         session = HTMLSession()
  28.         r = session.get(link, timeout=16)
  29.         r.session.close()
  30.         session.close()
  31.     except Exception as e:
  32.         print(e)
  33.        
  34.         return [0, 0]
  35.  
  36.     return [r.status_code, r]
  37.  
  38.  
  39. def writeTable(domain, to_table):
  40.     with open('table.txt', 'a') as table:      
  41.         table.write(f'{clear_domain}\t{to_table}\n')
  42.  
  43.  
  44. sublinks = ['enquiry', 'contact', 'write', 'support', 'about', 'help', 'privacy', 'policy', 'advert', 'author']
  45.  
  46.  
  47.  
  48.  
  49. for domain in domains:
  50.     domain = domain.strip()
  51.     clear_domain = domain
  52.     domain = f'https://{domain}'
  53.     database = []
  54.     to_table = ''
  55.  
  56.     print('#'*20)
  57.     print('#'*20)
  58.     print(domain)
  59.    
  60.  
  61.     if getResponse(domain)[0] == 200:
  62.         print('WORKING')
  63.         r = getResponse(domain)[1]      
  64.     else:
  65.         print('NOT WORKING\n\n')
  66.         writeTable(domain, to_table)
  67.         continue
  68.  
  69.  
  70.     to_parse = [domain]
  71.     try:
  72.         links = r.html.absolute_links
  73.     except:
  74.         writeTable(domain, to_table)
  75.         continue
  76.     for link in links:
  77.         for sublink in sublinks:
  78.             if sublink in link[len(clear_domain)+6:] and link not in to_parse and clear_domain in link:
  79.                 to_parse.append(link)
  80.  
  81.     for link in to_parse:
  82.         print(link)
  83.         try:
  84.             r = getResponse(link)[1]
  85.             print('CONNECTED')
  86.             try:      
  87.                 renderPage(r)
  88.                 print('RENDERED')
  89.                 r.session.close()
  90.             except Exception as e:
  91.                 r.session.close()
  92.                 print(e)
  93.                 print('CANT RENDER')
  94.                 continue                
  95.         except Exception as e:
  96.             print(e)
  97.             print('CANT OPEN')
  98.             continue
  99.  
  100.  
  101.         mails = re.findall("([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z.]+)", r.html.raw_html.decode())
  102.         if not mails:
  103.             if link != domain:
  104.                 mails = re.findall('<form', r.html.html)
  105.  
  106.         for mail in mails:
  107.             if ismail(mail) and mail not in database:
  108.                 database.append(mail)
  109.                 if mail == '<form':
  110.                     mail = 'Form'
  111.                 print(f'\nFOUND: {mail}\n')
  112.                 to_table += f'{mail}\t'
  113.  
  114.     writeTable(clear_domain, to_table)
  115.  
  116.     print('\n\n')
  117.  
Add Comment
Please, Sign In to add comment