sirnon

emingoo.py

Aug 16th, 2013
155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.28 KB | None | 0 0
  1. #!/usr/bin/python3
  2. # -*- coding: utf-8 -*-
  3. # Name: emingoo.py
  4. # Version: 1.0
  5. # Author: pantuts
  6. # Description: Extract emails from Google search results.
  7. # Agreement: You can use, modify, or redistribute this tool under the terms of GNU General Public License (GPLv3).
  8. # This tool is for educational purposes only. Any damage you make will not affect the author.
  9.  
  10. import sys, re, time
  11. from urllib.request import Request, urlopen
  12. import urllib.error
  13.  
  14. def helper():
  15. print('Usage:./emingoo.py -d domainToSearch -c maxResults[int]')
  16. print('Default min google search result is 100 and max is 10000.')
  17. print()
  18.  
  19. def extract(res, d):
  20. em_sub = re.sub('<[^<]+?>', '', res) # strip all html tags like <em>
  21. tmp_emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9._%+-]+' + d, em_sub)
  22. tmp_emails1 = re.findall(r'[a-zA-Z0-9._%+-]+@' + d, em_sub)
  23. tmp_emails2 = set(tmp_emails + tmp_emails1)
  24. emails = [x for x in tmp_emails2]
  25. if len(emails) == 0: print('Sorry, no results found.')
  26. else:
  27. for i in emails: print(i); time.sleep(0.01)
  28. print()
  29.  
  30. def crawl(d, c):
  31. if c > 10000:
  32. print('Sorry your argument -c exceeded on its max search result.')
  33. sys.exit(0)
  34. print('Searching emails for ' + d + ' ...')
  35. print()
  36. try:
  37. q = 'http://www.google.com/search?hl=en&num=' + str(c) + '&q=intext%3A%40' + d + '&ie=utf-8'
  38. req = Request(q)
  39. req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:19.0) Gecko/20100101 Firefox/19.0')
  40. res = urlopen(req).read().decode('utf-8')
  41. except (urllib.error.URLError, urllib.error.HTTPError) as e:
  42. print(e)
  43. sys.exit(1)
  44.  
  45. extract(res, d)
  46.  
  47. def main():
  48. if len(sys.argv) < 3 or len(sys.argv) > 5:
  49. helper()
  50. sys.exit(0)
  51. else:
  52. if len(sys.argv) == 5:
  53. domain = sys.argv[2]
  54. max_res = int(sys.argv[4])
  55. elif len(sys.argv) == 3:
  56. domain = sys.argv[2]
  57. max_res = 100
  58. else:
  59. helper()
  60. sys.exit(0)
  61.  
  62. crawl(domain, max_res)
  63.  
  64. if __name__=='__main__':
  65. print(' ____ __ __ ____ _ _ ___ _____ _____ ')
  66. print('( ___)( \/ )(_ _)( \( )/ __)( _ )( _ )')
  67. print(' )__) ) ( _)(_ ) (( (_-. )(_)( )(_)( ')
  68. print('(____)(_/\/\_)(____)(_)\_)\___/(_____)(_____)')
  69. print()
  70. try:
  71. main()
  72. except KeyboardInterrupt:
  73. print('\rKeyboardInterrupt')
  74. sys.exit(0)
Add Comment
Please, Sign In to add comment