Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- #google
- import sys
- from optparse import OptionParser
- parser = OptionParser(
- version=" %prog V0.1 Ben Holroyd",
- description = "%prog - google results in the terminal.",
- usage = "%prog [-vr] [--view] [--results][--help] [--version]")
- parser.add_option("-r","--results",action="store",type = "int",dest="results",help="display specified number of results, default is 5." )
- parser.add_option("-v","--view",action="store",type = "int",dest="view",help="view page by index number." )
- (options,args) = parser.parse_args()
- try: # if arg is an int assume its a page selection
- options.view=int(' '.join(args))
- except ValueError:
- pass
- if options.view != None: #opens selected entry in browser
- file = open("/tmp/google",'r')
- text = file.readlines()
- for counter,lines in enumerate(text,start=1):
- if counter == options.view:
- from subprocess import call
- call("w3m %s" % lines, shell = True)
- sys.exit(0)
- print "loading..."
- option = ""
- for arg in args: option += arg+"+"
- if len(option) == 0: parser.error("please enter a search term")
- from urllib import FancyURLopener
- class MyOpener(FancyURLopener): version = '' #google doesn't like the user agent so send it blank
- try:
- #s = MyOpener().open('http://www.google.com/search?q=%s'% option).read()
- s = MyOpener().open('http://www.google.com/cse?cx=partner-pub-3417583613261068%%3Adg71g8k18b4&q=%s' % option).read()
- except IOError:
- print 'could not connect to google, check your connection'
- sys.exit(1)
- import re
- dict = {}
- entry = re.split('<div class=g><h2 class=r>',s)[1:] #split page by results
- for counter, s in enumerate(entry,start=1):
- s = s.replace(''',"'") #replace bits like this
- s = s.replace('&',"&")
- s = s.replace('·',"\302\267")
- s = s.replace('"','"')
- s = s.replace('<','<')
- s = s.replace('>','>')
- s = s.replace('本','\346\234\254')
- s = s.replace('日','\346\227\245')
- s = s.replace(' ',' ')
- entry = re.split('<br>',s)[0]# gets rid of last bits #cached and similar pages etc
- a = s.find("<a href") #get web address
- b = s.find("</a>")
- address = s[a:b]
- title = s[a:b] # get title
- a = address.find('"')
- address = address[a+1:]
- b = address.find('"')
- address = address[:b]
- a = s.find("</a>")
- b = s.find("<span class=a>")
- s = s[a:b] #clean up main description string
- s = re.sub('<.*?>','',s)
- title = re.sub('<.*?>','',title)
- print "\033[34;04m[%d] %s\033[00;00m" % (counter , title)
- print s
- print "\033[32;01m%s\033[00;00m" % address
- print "-----------------------------------------------------------"
- dict[counter] = address
- if options.results == None and counter == 5: break
- if options.results == counter: break
- file = open("/tmp/google",'w')
- for entries in range(1,len(dict)+1): file.write(dict[entries]+'\n')
- file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement