Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Copyright (c) 2006, Opera Software ASA
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Opera Software ASA nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY OPERA SOFTWARE ASA AND CONTRIBUTORS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL OPERA SOFTWARE ASA AND CONTRIBUTORS BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- /**
- * Scraper class
- *
- * This class basically does XMLHttpRequests and use a callback function to scrape some content
- *
- * @author Mathieu HENRI, Opera Software ASA
- * @author Magnus Kristiansen, Opera Software ASA
- * @version 0.9
- */
- /**
- * @constructor
- */
- function Scraper() {
- /** @private */
- /** the URL of the document to scrape */
- var URL = ''
- /** @private */
- /** the username required to access the document */
- var username = null
- /** @private */
- /** the password required to access the document */
- var password = null
- /** @private */
- /** the scraper function that will be called back to scrape the document */
- var scraperFunction = null;
- /** @private */
- /** the callback function that will be called back after scraping */
- var returnFunction = null;
- /** @private */
- /** the list of data scraped */
- var data = {};
- /**
- * sets the URL and authentication parameters
- * @param {String} newURL URL of the document to scrape.
- * @param {String} newUsername [OPTIONAL] the username required to access the document. NULL by default.
- * @param {String} newPassword [OPTIONAL] the password required to access the document. NULL by default.
- * @return a flag indicating if the setting was succesful
- * @type boolean
- */
- this.setURLAndAuthentication = function( newURL, newUsername, newPassword )
- {
- if ( typeof newURL != 'string' ) {
- return false;
- }
- username = typeof newUsername == 'string' ? newUsername : null;
- password = typeof newPassword == 'string' ? newPassword : null;
- URL = newURL;
- return true;
- }
- /**
- * sets the scraper function
- * @param {Function} newScraperFunction the handle of the scraper function. It will be called with an 'xml' and 'txt' arguments.
- * @return a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
- * @type boolean
- */
- this.setScraperFunction = function( newScraperFunction )
- {
- if ( typeof newScraperFunction != 'function' ) {
- return false;
- }
- scraperFunction = newScraperFunction;
- return true;
- }
- /**
- * sets the callback function
- * @param {Function} newReturnFunction the handle of the callback function. It will be called with the return value of the scraper function.
- * @return a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
- * @type boolean
- */
- this.setReturnFunction = function( newReturnFunction )
- {
- if ( typeof newReturnFunction != 'function' ) {
- return false;
- }
- returnFunction = newReturnFunction;
- return true;
- }
- /**
- * triggers the request of a document and scraping of some data
- * @param {String} dataId the id of the data to scrape
- * @param {Number} dataMaxAge [OPTIONAL] maximum age ( in minutes ) of the data below which the cache will be used. 1 minute by default.
- * @param {Function} newScraperFunc [OPTIONAL] temporary override of scraperFunction
- * @param {Function} newReturnFunc [OPTIONAL] temporary override of returnFunction
- * @return FALSE if the optional parameters are invalid,
- * NULL if the request of the document failed,
- * whatever the scaperFunction returns ( should be an array or an object )
- * @see #setURLAndAuthentication
- * @see #setScraperFunction
- */
- this.scrapeData = function( dataId, dataMaxAge, newScraperFunc, newReturnFunc ) {
- var localScraperFunc = (typeof newScraperFunc == 'function' && newScraperFunc) || scraperFunction;
- var localReturnFunc = (typeof newReturnFunc == 'function' && newReturnFunc) || returnFunction;
- var error = null;
- if ( dataId == undefined ) { error = 'dataId missing'; }
- if ( URL == '' ) { error = 'no URL defined'; }
- if ( ! localScraperFunc ) { error = 'no scraper defined'; }
- if ( ! localReturnFunc ) { error = 'no callback defined'; }
- if ( error ) {
- opera.postError( 'Scraper.scrapeData: ' + error );
- return false;
- }
- var dataMaxAge = Math.max( 1, dataMaxAge || 1 ) * 60 * 1000;
- if ( data[dataId] && data[dataId].timestamp + dataMaxAge > new Date().getTime() ) {
- opera.postError( 'Using cached data for request ' + dataId );
- return data[dataId].data;
- }
- var XHR = new XMLHttpRequest()
- if ( username && password ) {
- XHR.open( 'get', URL, true, username, password );
- } else {
- XHR.open( 'get', URL, true );
- }
- XHR.onreadystatechange = function() {
- if ( XHR.readyState != 4) return;
- if ( XHR.status==200 || XHR.status==304 || XHR.status==0 ) {
- var xml = XHR.responseXML;
- var text = XHR.responseText;
- if (xml && xml.documentElement) {
- // valid XML, yay
- } else {
- opera.postError( 'No responseXML from ' + URL + '\nFalling back to DOMParser' );
- var parser = new DOMParser();
- xml = parser.parseFromString( text, 'text/html' );
- }
- data[dataId] = {
- timestamp: new Date().getTime(),
- data: localScraperFunc( xml, text )
- };
- localReturnFunc( data[dataId].data );
- } else {
- localReturnFunc( null );
- }
- }
- XHR.send(null);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement