Advertisement
moften

googlepythonmail.py

Jan 17th, 2012
4,737
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.26 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import sys
  4. import re
  5. import string
  6. import httplib
  7. import urllib2
  8. import re
  9. def StripTags(text):
  10.     finished = 0
  11.     while not finished:
  12.         finished = 1
  13.         start = text.find("<")
  14.         if start >= 0:
  15.             stop = text[start:].find(">")
  16.             if stop >= 0:
  17.                 text = text[:start] + text[start+stop+1:]
  18.                 finished = 0
  19.     return text
  20. if len(sys.argv) != 2:
  21.         print "\nExtracts emails from google results.\n"
  22.         print "\nUsage : ./goog-mail.py <domain-name>\n"
  23.         sys.exit(1)
  24.  
  25. domain_name=sys.argv[1]
  26. d={}
  27. page_counter = 0
  28. try:
  29.     while page_counter < 50 :
  30.         results = 'http://groups.google.com/groups?q=' + str(domain_name)+'&hl=en&lr=&ie=UTF-8&start=' + repr(page_counter) + '&sa=N'
  31.         request = urllib2.Request(results)
  32.         request.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)')
  33.         opener = urllib2.build_opener()
  34.         text = opener.open(request).read()
  35.         emails = (re.findall('([\w\.\-]+@'+domain_name+')',StripTags(text)))
  36.         for email in emails:
  37.             d[email]=1
  38.             uniq_emails=d.keys()
  39.         page_counter = page_counter +10
  40. except IOError:
  41.     print "Can't connect to Google Groups!"+""
  42.  
  43. page_counter_web=0
  44. try:
  45.     print "\n\n+++++++++++++++++++++++++++++++++++++++++++++++++++++"+""
  46.     print "+ Google Web & Group Results:"+""
  47.     print "+++++++++++++++++++++++++++++++++++++++++++++++++++++\n\n"+""
  48.  
  49.     while page_counter_web < 50 :
  50.         results_web = 'http://www.google.com/search?q=%40'+str(domain_name)+'&hl=en&lr=&ie=UTF-8&start='+ repr(page_counter_web) + '&sa=N'
  51.         request_web = urllib2.Request(results_web)
  52.         request_web.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)')
  53.         opener_web = urllib2.build_opener()
  54.         text = opener_web.open(request_web).read()
  55.         emails_web = (re.findall('([\w\.\-]+@'+domain_name+')',StripTags(text)))
  56.         for email_web in emails_web:
  57.             d[email_web]=1
  58.             uniq_emails_web=d.keys()
  59.         page_counter_web = page_counter_web +10
  60.  
  61. except IOError:
  62.     print "Can't connect to Google Web!"+""
  63. for uniq_emails_web in d.keys():
  64.     print uniq_emails_web+""
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement