Algabe

Google

Jul 11th, 2012
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.85 KB | None | 0 0
  1. ###
  2. # Copyright (c) 2002-2004, Jeremiah Fincher
  3. # Copyright (c) 2008-2010, James Vega
  4. # All rights reserved.
  5. #
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions are met:
  8. #
  9. #   * Redistributions of source code must retain the above copyright notice,
  10. #     this list of conditions, and the following disclaimer.
  11. #   * Redistributions in binary form must reproduce the above copyright notice,
  12. #     this list of conditions, and the following disclaimer in the
  13. #     documentation and/or other materials provided with the distribution.
  14. #   * Neither the name of the author of this software nor the name of
  15. #     contributors to this software may be used to endorse or promote products
  16. #     derived from this software without specific prior written consent.
  17. #
  18. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. # ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. # POSSIBILITY OF SUCH DAMAGE.
  29. ###
  30.  
  31. import re
  32. import cgi
  33. import time
  34. import socket
  35. import urllib
  36.  
  37. import supybot.conf as conf
  38. import supybot.utils as utils
  39. import supybot.world as world
  40. from supybot.commands import *
  41. import supybot.ircmsgs as ircmsgs
  42. import supybot.ircutils as ircutils
  43. import supybot.callbacks as callbacks
  44.  
  45. simplejson = None
  46.  
  47. try:
  48.     simplejson = utils.python.universalImport('json')
  49. except ImportError:
  50.     pass
  51.  
  52. try:
  53.     # The 3rd party simplejson module was included in Python 2.6 and renamed to
  54.     # json.  Unfortunately, this conflicts with the 3rd party json module.
  55.     # Luckily, the 3rd party json module has a different interface so we test
  56.     # to make sure we aren't using it.
  57.     if simplejson is None or hasattr(simplejson, 'read'):
  58.         simplejson = utils.python.universalImport('simplejson',
  59.                                                   'local.simplejson')
  60. except ImportError:
  61.     raise callbacks.Error, \
  62.             'You need Python2.6 or the simplejson module installed to use ' \
  63.             'this plugin.  Download the module at ' \
  64.             '<http://undefined.org/python/#simplejson>.'
  65.  
  66. class Google(callbacks.PluginRegexp):
  67.     threaded = True
  68.     callBefore = ['Web']
  69.     regexps = ['googleSnarfer']
  70.  
  71.     _colorGoogles = {}
  72.     def _getColorGoogle(self, m):
  73.         s = m.group(1)
  74.         ret = self._colorGoogles.get(s)
  75.         if not ret:
  76.             L = list(s)
  77.             L[0] = ircutils.mircColor(L[0], 'blue')[:-1]
  78.             L[1] = ircutils.mircColor(L[1], 'red')[:-1]
  79.             L[2] = ircutils.mircColor(L[2], 'yellow')[:-1]
  80.             L[3] = ircutils.mircColor(L[3], 'blue')[:-1]
  81.             L[4] = ircutils.mircColor(L[4], 'green')[:-1]
  82.             L[5] = ircutils.mircColor(L[5], 'red')
  83.             ret = ''.join(L)
  84.             self._colorGoogles[s] = ret
  85.         return ircutils.bold(ret)
  86.  
  87.     _googleRe = re.compile(r'\b(google)\b', re.I)
  88.     def outFilter(self, irc, msg):
  89.         if msg.command == 'PRIVMSG' and \
  90.            self.registryValue('colorfulFilter', msg.args[0]):
  91.             s = msg.args[1]
  92.             s = re.sub(self._googleRe, self._getColorGoogle, s)
  93.             msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
  94.         return msg
  95.  
  96.     _gsearchUrl = 'http://ajax.googleapis.com/ajax/services/search/web'
  97.     def search(self, query, channel, options={}):
  98.         """Perform a search using Google's AJAX API.
  99.        search("search phrase", options={})
  100.  
  101.        Valid options are:
  102.            smallsearch - True/False (Default: False)
  103.            filter - {active,moderate,off} (Default: "moderate")
  104.            language - Restrict search to documents in the given language
  105.                       (Default: "lang_en")
  106.        """
  107.         ref = self.registryValue('referer')
  108.         if not ref:
  109.             ref = 'http://%s/%s' % (dynamic.irc.server,
  110.                                     dynamic.irc.nick)
  111.         headers = utils.web.defaultHeaders
  112.         headers['Referer'] = ref
  113.         opts = {'q': query, 'v': '1.0'}
  114.         for (k, v) in options.iteritems():
  115.             if k == 'smallsearch':
  116.                 if v:
  117.                     opts['rsz'] = 'small'
  118.                 else:
  119.                     opts['rsz'] = 'large'
  120.             elif k == 'filter':
  121.                 opts['safe'] = v
  122.             elif k == 'language':
  123.                 opts['lr'] = v
  124.         defLang = self.registryValue('defaultLanguage', channel)
  125.         if 'lr' not in opts and defLang:
  126.             opts['lr'] = defLang
  127.         if 'safe' not in opts:
  128.             opts['safe'] = self.registryValue('searchFilter', dynamic.channel)
  129.         if 'rsz' not in opts:
  130.             opts['rsz'] = 'large'
  131.  
  132.         fd = utils.web.getUrlFd('%s?%s' % (self._gsearchUrl,
  133.                                            urllib.urlencode(opts)),
  134.                                 headers)
  135.         json = simplejson.load(fd)
  136.         fd.close()
  137.         if json['responseStatus'] != 200:
  138.             raise callbacks.Error, 'We broke The Google!'
  139.         return json
  140.  
  141.     def formatData(self, data, bold=True, max=0):
  142.         if isinstance(data, basestring):
  143.             return data
  144.         results = []
  145.         if max:
  146.             data = data[:max]
  147.         for result in data:
  148.             title = utils.web.htmlToText(result['titleNoFormatting']\
  149.                                          .encode('utf-8'))
  150.             url = result['unescapedUrl'].encode('utf-8')
  151.             if title:
  152.                 if bold:
  153.                     title = ircutils.bold(title)
  154.                 results.append(format('%s: %u', title, url))
  155.             else:
  156.                 results.append(url)
  157.         if not results:
  158.             return format('No matches found.')
  159.         else:
  160.             return format('; '.join(results))
  161.  
  162.     def lucky(self, irc, msg, args, opts, text):
  163.         """[--snippet] <search>
  164.  
  165.        Does a google search, but only returns the first result.
  166.        If option --snippet is given, returns also the page text snippet.
  167.        """
  168.         opts = dict(opts)
  169.         data = self.search(text, msg.args[0], {'smallsearch': True})
  170.         if data['responseData']['results']:
  171.             url = data['responseData']['results'][0]['unescapedUrl'].encode('utf-8')
  172.             if opts.has_key('snippet'):
  173.                 snippet = data['responseData']['results'][0]['content'].encode('utf-8')
  174.                 snippet = " | " + utils.web.htmlToText(snippet, tagReplace='')
  175.             else:
  176.                 snippet = ""
  177.             result = url + snippet
  178.             irc.reply(result)
  179.         else:
  180.             irc.reply('Google found nothing.')
  181.     lucky = wrap(lucky, [getopts({'snippet':'',}), 'text'])
  182.  
  183.     def google(self, irc, msg, args, optlist, text):
  184.         """<search> [--{filter,language} <value>]
  185.  
  186.        Searches google.com for the given string.  As many results as can fit
  187.        are included.  --language accepts a language abbreviation; --filter
  188.        accepts a filtering level ('active', 'moderate', 'off').
  189.        """
  190.         if 'language' in optlist and optlist['language'].lower() not in \
  191.            conf.supybot.plugins.Google.safesearch.validStrings:
  192.             irc.errorInvalid('language')
  193.         data = self.search(text, msg.args[0], dict(optlist))
  194.         if data['responseStatus'] != 200:
  195.             irc.reply('We broke The Google!')
  196.             return
  197.         bold = self.registryValue('bold', msg.args[0])
  198.         max = self.registryValue('maximumResults', msg.args[0])
  199.         irc.reply(self.formatData(data['responseData']['results'],
  200.                                   bold=bold, max=max))
  201.     google = wrap(google, [getopts({'language':'something',
  202.                                     'filter':''}),
  203.                            'text'])
  204.  
  205.     def cache(self, irc, msg, args, url):
  206.         """<url>
  207.  
  208.        Returns a link to the cached version of <url> if it is available.
  209.        """
  210.         data = self.search(url, msg.args[0], {'smallsearch': True})
  211.         if data['responseData']['results']:
  212.             m = data['responseData']['results'][0]
  213.             if m['cacheUrl']:
  214.                 url = m['cacheUrl'].encode('utf-8')
  215.                 irc.reply(url)
  216.                 return
  217.         irc.error('Google seems to have no cache for that site.')
  218.     cache = wrap(cache, ['url'])
  219.  
  220.     def fight(self, irc, msg, args):
  221.         """<search string> <search string> [<search string> ...]
  222.  
  223.        Returns the results of each search, in order, from greatest number
  224.        of results to least.
  225.        """
  226.         channel = msg.args[0]
  227.         results = []
  228.         for arg in args:
  229.             data = self.search(arg, channel, {'smallsearch': True})
  230.             count = data['responseData']['cursor'].get('estimatedResultCount',
  231.                                                        0)
  232.             results.append((int(count), arg))
  233.         results.sort()
  234.         results.reverse()
  235.         if self.registryValue('bold', msg.args[0]):
  236.             bold = ircutils.bold
  237.         else:
  238.             bold = repr
  239.         s = ', '.join([format('%s: %i', bold(s), i) for (i, s) in results])
  240.         irc.reply(s)
  241.  
  242.     _gtranslateUrl='http://ajax.googleapis.com/ajax/services/language/translate'
  243.     def translate(self, irc, msg, args, fromLang, toLang, text):
  244.         """<from-language> [to] <to-language> <text>
  245.  
  246.        Returns <text> translated from <from-language> into <to-language>.
  247.        Beware that translating to or from languages that use multi-byte
  248.        characters may result in some very odd results.
  249.        """
  250.         channel = msg.args[0]
  251.         ref = self.registryValue('referer')
  252.         if not ref:
  253.             ref = 'http://%s/%s' % (dynamic.irc.server,
  254.                                     dynamic.irc.nick)
  255.         headers = utils.web.defaultHeaders
  256.         headers['Referer'] = ref
  257.         opts = {'q': text, 'v': '1.0'}
  258.         lang = conf.supybot.plugins.Google.defaultLanguage
  259.         if fromLang.capitalize() in lang.transLangs:
  260.             fromLang = lang.transLangs[fromLang.capitalize()]
  261.         elif lang.normalize('lang_'+fromLang)[5:] \
  262.                 not in lang.transLangs.values():
  263.             irc.errorInvalid('from language', fromLang,
  264.                              format('Valid languages are: %L',
  265.                                     lang.transLangs.keys()))
  266.         else:
  267.             fromLang = lang.normalize('lang_'+fromLang)[5:]
  268.         if toLang.capitalize() in lang.transLangs:
  269.             toLang = lang.transLangs[toLang.capitalize()]
  270.         elif lang.normalize('lang_'+toLang)[5:] \
  271.                 not in lang.transLangs.values():
  272.             irc.errorInvalid('to language', toLang,
  273.                              format('Valid languages are: %L',
  274.                                     lang.transLangs.keys()))
  275.         else:
  276.             toLang = lang.normalize('lang_'+toLang)[5:]
  277.         if fromLang == 'auto':
  278.             fromLang = ''
  279.         if toLang == 'auto':
  280.             irc.error("Destination language cannot be 'auto'.")
  281.             return
  282.         opts['langpair'] = '%s|%s' % (fromLang, toLang)
  283.         fd = utils.web.getUrlFd('%s?%s' % (self._gtranslateUrl,
  284.                                            urllib.urlencode(opts)),
  285.                                 headers)
  286.         json = simplejson.load(fd)
  287.         fd.close()
  288.         if json['responseStatus'] != 200:
  289.             raise callbacks.Error, 'Google says: Response Status %s: %s.' % \
  290.                     (json['responseStatus'], json['responseDetails'],)
  291.         if fromLang != '':
  292.             irc.reply(json['responseData']['translatedText'].encode('utf-8'))
  293.         else:
  294.             detected_language = json['responseData']['detectedSourceLanguage'].encode('utf-8')
  295.             translation = json['responseData']['translatedText'].encode('utf-8')
  296.             try:
  297.                 long_lang_name = [k for k,v in lang.transLangs.iteritems() if v == detected_language][0]
  298.             except IndexError: #just in case google adds langs we don't know about
  299.                 long_lang_name = detected_language
  300.             responsestring = "(Detected source language: %s) %s" % \
  301.                 (long_lang_name, translation)
  302.             irc.reply(responsestring)
  303.     translate = wrap(translate, ['something', 'to', 'something', 'text'])
  304.  
  305.     def googleSnarfer(self, irc, msg, match):
  306.         r"^google\s+(.*)$"
  307.         if not self.registryValue('searchSnarfer', msg.args[0]):
  308.             return
  309.         searchString = match.group(1)
  310.         data = self.search(searchString, msg.args[0], {'smallsearch': True})
  311.         if data['responseData']['results']:
  312.             url = data['responseData']['results'][0]['unescapedUrl']
  313.             irc.reply(url.encode('utf-8'), prefixNick=False)
  314.     googleSnarfer = urlSnarfer(googleSnarfer)
  315.  
  316.     def _googleUrl(self, s):
  317.         s = s.replace('+', '%2B')
  318.         s = s.replace(' ', '+')
  319.         url = r'http://google.com/search?q=' + s
  320.         return url
  321.  
  322.     def _googleUrlIG(self, s):
  323.         s = s.replace('+', '%2B')
  324.         s = s.replace(' ', '+')
  325.         url = r'http://www.google.com/ig/calculator?hl=en&q=' + s
  326.         return url
  327.  
  328.     _calcRe1 = re.compile(r'<table.*class="?obcontainer"?[^>]*>(.*?)</table>', re.I)
  329.     _calcRe2 = re.compile(r'<h\d class="?r"?[^>]*>(?:<b>)?(.*?)(?:</b>)?</h\d>', re.I | re.S)
  330.     _calcSupRe = re.compile(r'<sup>(.*?)</sup>', re.I)
  331.     _calcFontRe = re.compile(r'<font size=-2>(.*?)</font>')
  332.     _calcTimesRe = re.compile(r'&(?:times|#215);')
  333.     def calc(self, irc, msg, args, expr):
  334.         """<expression>
  335.  
  336.        Uses Google's calculator to calculate the value of <expression>.
  337.        """
  338.         urlig = self._googleUrlIG(expr)
  339.         js = utils.web.getUrl(urlig)
  340.         # fix bad google json
  341.         js = js.replace('lhs:','"lhs":').replace('rhs:','"rhs":').replace('error:','"error":').replace('icc:','"icc":')
  342.         js = simplejson.loads(js)
  343.        
  344.         if js['error'] == '':
  345.             irc.reply("%s = %s" % (js['lhs'], js['rhs'],))
  346.             return
  347.        
  348.         url = self._googleUrl(expr)
  349.         html = utils.web.getUrl(url)
  350.         match = self._calcRe1.search(html)
  351.         if match is None:
  352.             match = self._calcRe2.search(html)
  353.         if match is not None:
  354.             s = match.group(1)
  355.             s = self._calcSupRe.sub(r'^(\1)', s)
  356.             s = self._calcFontRe.sub(r',', s)
  357.             s = self._calcTimesRe.sub(r'*', s)
  358.             s = utils.web.htmlToText(s)
  359.             irc.reply(s)
  360.         else:
  361.             irc.reply('Google\'s calculator didn\'t come up with anything.')
  362.     calc = wrap(calc, ['text'])
  363.  
  364.     _phoneRe = re.compile(r'Phonebook.*?<font size=-1>(.*?)<a href')
  365.     def phonebook(self, irc, msg, args, phonenumber):
  366.         """<phone number>
  367.  
  368.        Looks <phone number> up on Google.
  369.        """
  370.         url = self._googleUrl(phonenumber)
  371.         html = utils.web.getUrl(url)
  372.         m = self._phoneRe.search(html)
  373.         if m is not None:
  374.             s = m.group(1)
  375.             s = s.replace('<b>', '')
  376.             s = s.replace('</b>', '')
  377.             s = utils.web.htmlToText(s)
  378.             irc.reply(s)
  379.         else:
  380.             irc.reply('Google\'s phonebook didn\'t come up with anything.')
  381.     phonebook = wrap(phonebook, ['text'])
  382.  
  383.  
  384. Class = Google
  385.  
  386.  
  387. # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
Add Comment
Please, Sign In to add comment