Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var request = require('request')
- var cheerio = require('cheerio')
- var querystring = require('querystring')
- var util = require('util')
- var linkSel = 'h3.r a'
- var descSel = 'div.s'
- var itemSel = 'li.g'
- var nextSel = 'td.b a span'
- var URL = 'http://www.google.%s/search?hl=%s&q=%s&start=%s&sa=N&num=%s&ie=UTF-8&oe=UTF-8'
- var nextTextErrorMsg = 'Translate `google.nextText` option to selected language to detect next results link.'
- function google (query, callback) {
- igoogle(query, 0, callback)
- }
- google.resultsPerPage = 10
- google.tld = 'com'
- google.lang = 'en'
- google.requestOptions = {}
- google.nextText = 'Next'
- var igoogle = function (query, start, callback) {
- if (google.resultsPerPage > 100) google.resultsPerPage = 100 // Google won't allow greater than 100 anyway
- if (google.lang !== 'en' && google.nextText === 'Next') console.warn(nextTextErrorMsg)
- var newUrl = util.format(URL, google.tld, google.lang, querystring.escape(query), start, google.resultsPerPage)
- var requestOptions = {
- url: newUrl,
- method: 'GET'
- }
- for (var k in google.requestOptions) {
- requestOptions[k] = google.requestOptions[k]
- }
- request(requestOptions, function (err, resp, body) {
- if ((err == null) && resp.statusCode === 200) {
- var $ = cheerio.load(body)
- var links = []
- $(itemSel).each(function (i, elem) {
- var linkElem = $(elem).find(linkSel)
- var descElem = $(elem).find(descSel)
- var item = {
- title: $(linkElem).first().text(),
- link: null,
- description: null,
- href: null
- }
- var qsObj = querystring.parse($(linkElem).attr('href'))
- if (qsObj['/url?q']) {
- item.link = qsObj['/url?q']
- item.href = item.link
- }
- $(descElem).find('div').remove()
- item.description = $(descElem).text()
- links.push(item)
- })
- var nextFunc = null
- if ($(nextSel).last().text() === google.nextText) {
- nextFunc = function () {
- igoogle(query, start + google.resultsPerPage, callback)
- }
- }
- callback(null, nextFunc, links)
- } else {
- callback(new Error('Error on response' + (resp ? ' (' + resp.statusCode + ')' : '') + ':' + err + ' : ' + body), null, null)
- }
- })
- }
- module.exports = google
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement