Advertisement
genBTC

googlesearch code from node.js (bot)

Nov 7th, 2015
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. var request = require('request')
  2. var cheerio = require('cheerio')
  3. var querystring = require('querystring')
  4. var util = require('util')
  5.  
  6. var linkSel = 'h3.r a'
  7. var descSel = 'div.s'
  8. var itemSel = 'li.g'
  9. var nextSel = 'td.b a span'
  10.  
  11. var URL = 'http://www.google.%s/search?hl=%s&q=%s&start=%s&sa=N&num=%s&ie=UTF-8&oe=UTF-8'
  12.  
  13. var nextTextErrorMsg = 'Translate `google.nextText` option to selected language to detect next results link.'
  14.  
  15. function google (query, callback) {
  16.   igoogle(query, 0, callback)
  17. }
  18.  
  19. google.resultsPerPage = 10
  20. google.tld = 'com'
  21. google.lang = 'en'
  22. google.requestOptions = {}
  23. google.nextText = 'Next'
  24.  
  25. var igoogle = function (query, start, callback) {
  26.   if (google.resultsPerPage > 100) google.resultsPerPage = 100 // Google won't allow greater than 100 anyway
  27.   if (google.lang !== 'en' && google.nextText === 'Next') console.warn(nextTextErrorMsg)
  28.  
  29.   var newUrl = util.format(URL, google.tld, google.lang, querystring.escape(query), start, google.resultsPerPage)
  30.   var requestOptions = {
  31.     url: newUrl,
  32.     method: 'GET'
  33.   }
  34.  
  35.   for (var k in google.requestOptions) {
  36.     requestOptions[k] = google.requestOptions[k]
  37.   }
  38.  
  39.   request(requestOptions, function (err, resp, body) {
  40.     if ((err == null) && resp.statusCode === 200) {
  41.       var $ = cheerio.load(body)
  42.       var links = []
  43.  
  44.       $(itemSel).each(function (i, elem) {
  45.         var linkElem = $(elem).find(linkSel)
  46.         var descElem = $(elem).find(descSel)
  47.         var item = {
  48.           title: $(linkElem).first().text(),
  49.           link: null,
  50.           description: null,
  51.           href: null
  52.         }
  53.         var qsObj = querystring.parse($(linkElem).attr('href'))
  54.  
  55.         if (qsObj['/url?q']) {
  56.           item.link = qsObj['/url?q']
  57.           item.href = item.link
  58.         }
  59.  
  60.         $(descElem).find('div').remove()
  61.         item.description = $(descElem).text()
  62.  
  63.         links.push(item)
  64.       })
  65.  
  66.       var nextFunc = null
  67.       if ($(nextSel).last().text() === google.nextText) {
  68.         nextFunc = function () {
  69.           igoogle(query, start + google.resultsPerPage, callback)
  70.         }
  71.       }
  72.  
  73.       callback(null, nextFunc, links)
  74.     } else {
  75.       callback(new Error('Error on response' + (resp ? ' (' + resp.statusCode + ')' : '') + ':' + err + ' : ' + body), null, null)
  76.     }
  77.   })
  78. }
  79.  
  80. module.exports = google
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement