Advertisement
Guest User

Untitled

a guest
May 11th, 2010
361
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.09 KB | None | 0 0
  1. #!/usr/bin/env python
  2. #google
  3.  
  4. import sys
  5. from optparse import OptionParser
  6. parser = OptionParser(
  7.     version=" %prog V0.1 Ben Holroyd",  
  8.     description = "%prog - google results in the terminal.",
  9.     usage = "%prog [-vr] [--view] [--results][--help] [--version]")
  10. parser.add_option("-r","--results",action="store",type = "int",dest="results",help="display specified number of results, default is 5." )
  11. parser.add_option("-v","--view",action="store",type = "int",dest="view",help="view page by index number." )    
  12. (options,args) = parser.parse_args()
  13.    
  14. try:  # if arg is an int assume its a page selection
  15.     options.view=int(' '.join(args))
  16. except ValueError:
  17.     pass
  18.  
  19. if options.view != None: #opens selected entry in browser
  20.     file = open("/tmp/google",'r')
  21.     text = file.readlines()
  22.     for counter,lines in enumerate(text,start=1):
  23.         if counter == options.view:
  24.             from subprocess import call
  25.             call("w3m %s" % lines, shell = True)
  26.             sys.exit(0)
  27.              
  28. print "loading..."
  29. option = ""
  30. for arg in args: option += arg+"+"
  31. if len(option) == 0: parser.error("please enter a search term")
  32.    
  33. from urllib import FancyURLopener
  34. class MyOpener(FancyURLopener): version = '' #google doesn't like the user agent so send it blank
  35. try:  
  36.         #s = MyOpener().open('http://www.google.com/search?q=%s'% option).read()
  37.     s = MyOpener().open('http://www.google.com/cse?cx=partner-pub-3417583613261068%%3Adg71g8k18b4&q=%s' % option).read()
  38. except IOError:
  39.     print 'could not connect to google, check your connection'
  40.     sys.exit(1)
  41. import re
  42. dict = {}
  43. entry = re.split('<div class=g><h2 class=r>',s)[1:] #split page by results
  44.  
  45. for counter, s in enumerate(entry,start=1):
  46.     s = s.replace('&#39;',"'")                 #replace bits like this
  47.     s = s.replace('&amp;',"&")
  48.     s = s.replace('&middot;',"\302\267")
  49.     s = s.replace('&quot;','"')
  50.     s = s.replace('&lt;','<')
  51.     s = s.replace('&gt;','>')
  52.     s = s.replace('&#26412;','\346\234\254')
  53.     s = s.replace('&#26085;','\346\227\245')
  54.     s = s.replace('&nbsp;',' ')    
  55.        
  56.     entry = re.split('<br>',s)[0]# gets rid of last bits #cached and similar pages  etc
  57.        
  58.     a = s.find("<a href")     #get web address
  59.     b = s.find("</a>")
  60.     address = s[a:b]
  61.     title = s[a:b]            # get title
  62.     a = address.find('"')
  63.     address = address[a+1:]
  64.     b = address.find('"')
  65.     address = address[:b]
  66.    
  67.     a = s.find("</a>")
  68.     b = s.find("<span class=a>")
  69.     s = s[a:b]                  #clean up main description string
  70.     s = re.sub('<.*?>','',s)
  71.     title = re.sub('<.*?>','',title)                               
  72.        
  73.     print "\033[34;04m[%d] %s\033[00;00m" % (counter , title)
  74.     print s
  75.     print "\033[32;01m%s\033[00;00m" % address
  76.     print "-----------------------------------------------------------"
  77.     dict[counter] = address
  78.     if options.results == None and counter == 5: break
  79.     if options.results == counter: break
  80.  
  81. file = open("/tmp/google",'w')
  82. for entries in range(1,len(dict)+1): file.write(dict[entries]+'\n')
  83. file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement