Untitled

/*
 * Copyright (c) 2006, Opera Software ASA
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of Opera Software ASA nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY OPERA SOFTWARE ASA AND CONTRIBUTORS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL OPERA SOFTWARE ASA AND CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * Scraper class
 *
 * This class basically does XMLHttpRequests and use a callback function to scrape some content
 *
 *  @author Mathieu HENRI, Opera Software ASA
 *  @author Magnus Kristiansen, Opera Software ASA
 *  @version    0.9
 */

/**
 *  @constructor
 */
function Scraper() {

    /** @private    */
    /** the URL of the document to scrape   */
    var URL             = ''
    /** @private    */
    /** the username required to access the document    */
    var username        = null
    /** @private    */
    /** the password required to access the document    */
    var password        = null
    /** @private    */
    /** the scraper function that will be called back to scrape the document    */
    var scraperFunction = null;
    /** @private    */
    /** the callback function that will be called back after scraping   */
    var returnFunction  = null;
    /** @private    */
    /** the list of data scraped    */
    var data            = {};


    /**
     *  sets the URL and authentication parameters
     *  @param  {String}    newURL              URL of the document to scrape.
     *  @param  {String}    newUsername         [OPTIONAL] the username required to access the document. NULL by default.
     *  @param  {String}    newPassword         [OPTIONAL] the password required to access the document. NULL by default.
     *  @return a flag indicating if the setting was succesful
     *  @type   boolean
     */
    this.setURLAndAuthentication = function( newURL, newUsername, newPassword )
    {
        if ( typeof newURL != 'string' ) {
            return false;
        }

        username = typeof newUsername == 'string' ? newUsername : null;
        password = typeof newPassword == 'string' ? newPassword : null;

        URL = newURL;

        return true;
    }


    /**
     *  sets the scraper function
     *  @param  {Function}  newScraperFunction  the handle of the scraper function. It will be called with an 'xml' and 'txt' arguments.
     *  @return a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
     *  @type   boolean
     */
    this.setScraperFunction = function( newScraperFunction )
    {
        if ( typeof newScraperFunction != 'function' ) {
            return false;
        }

        scraperFunction = newScraperFunction;

        return true;
    }

    /**
     *  sets the callback function
     *  @param  {Function}  newReturnFunction   the handle of the callback function. It will be called with the return value of the scraper function.
     *  @return a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
     *  @type   boolean
     */
    this.setReturnFunction = function( newReturnFunction )
    {
        if ( typeof newReturnFunction != 'function' ) {
            return false;
        }

        returnFunction = newReturnFunction;

        return true;
    }

    /**
     *  triggers the request of a document and scraping of some data
     *  @param  {String}    dataId              the id of the data to scrape
     *  @param  {Number}    dataMaxAge          [OPTIONAL] maximum age ( in minutes ) of the data below which the cache will be used. 1 minute by default.
     *  @param  {Function}  newScraperFunc      [OPTIONAL] temporary override of scraperFunction
     *  @param  {Function}  newReturnFunc       [OPTIONAL] temporary override of returnFunction
     *  @return FALSE if the optional parameters are invalid,
     *          NULL if the request of the document failed,
     *          whatever the scaperFunction returns ( should be an array or an object )
     *  @see #setURLAndAuthentication
     *  @see #setScraperFunction
     */
    this.scrapeData = function( dataId, dataMaxAge, newScraperFunc, newReturnFunc ) {

        var localScraperFunc = (typeof newScraperFunc == 'function' && newScraperFunc) || scraperFunction;
        var localReturnFunc = (typeof newReturnFunc == 'function' && newReturnFunc) || returnFunction;

        var error = null;
        if ( dataId == undefined ) { error = 'dataId missing'; }
        if ( URL == '' ) { error = 'no URL defined'; }
        if ( ! localScraperFunc ) { error = 'no scraper defined'; }
        if ( ! localReturnFunc ) { error = 'no callback defined'; }

        if ( error ) {
            opera.postError( 'Scraper.scrapeData: ' + error );
            return false;
        }

        var dataMaxAge = Math.max( 1, dataMaxAge || 1 ) * 60 * 1000;
        if ( data[dataId] && data[dataId].timestamp + dataMaxAge > new Date().getTime() ) {
            opera.postError( 'Using cached data for request ' + dataId );
            return data[dataId].data;
        }

        var XHR = new XMLHttpRequest()

        if ( username && password ) {
            XHR.open( 'get', URL, true, username, password );
        } else {
            XHR.open( 'get', URL, true );
        }

        XHR.onreadystatechange = function() {
            if ( XHR.readyState != 4) return;

            if ( XHR.status==200 || XHR.status==304 || XHR.status==0 ) {

                var xml = XHR.responseXML;
                var text = XHR.responseText;

                if (xml && xml.documentElement) {
                    // valid XML, yay
                } else {
                    opera.postError( 'No responseXML from ' + URL + '\nFalling back to DOMParser' );
                    var parser = new DOMParser();
                    xml = parser.parseFromString( text, 'text/html' );
                }

                data[dataId] = {
                    timestamp: new Date().getTime(),
                    data: localScraperFunc( xml, text )
                };

                localReturnFunc( data[dataId].data );
            } else {
                localReturnFunc( null );
            }
        }

        XHR.send(null);

    }

}