Advertisement
Guest User

Untitled

a guest
May 5th, 2017
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2.  * Copyright (c) 2006, Opera Software ASA
  3.  * All rights reserved.
  4.  * Redistribution and use in source and binary forms, with or without
  5.  * modification, are permitted provided that the following conditions are met:
  6.  *
  7.  *     * Redistributions of source code must retain the above copyright
  8.  *       notice, this list of conditions and the following disclaimer.
  9.  *     * Redistributions in binary form must reproduce the above copyright
  10.  *       notice, this list of conditions and the following disclaimer in the
  11.  *       documentation and/or other materials provided with the distribution.
  12.  *     * Neither the name of Opera Software ASA nor the
  13.  *       names of its contributors may be used to endorse or promote products
  14.  *       derived from this software without specific prior written permission.
  15.  *
  16.  * THIS SOFTWARE IS PROVIDED BY OPERA SOFTWARE ASA AND CONTRIBUTORS ``AS IS'' AND ANY
  17.  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18.  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19.  * DISCLAIMED. IN NO EVENT SHALL OPERA SOFTWARE ASA AND CONTRIBUTORS BE LIABLE FOR ANY
  20.  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21.  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22.  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23.  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25.  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26.  */
  27.  
  28. /**
  29.  * Scraper class
  30.  *
  31.  * This class basically does XMLHttpRequests and use a callback function to scrape some content
  32.  *
  33.  *  @author Mathieu HENRI, Opera Software ASA
  34.  *  @author Magnus Kristiansen, Opera Software ASA
  35.  *  @version    0.9
  36.  */
  37.  
  38. /**  
  39.  *  @constructor
  40.  */
  41. function Scraper() {
  42.  
  43.     /** @private    */
  44.     /** the URL of the document to scrape   */
  45.     var URL             = ''
  46.     /** @private    */
  47.     /** the username required to access the document    */
  48.     var username        = null
  49.     /** @private    */
  50.     /** the password required to access the document    */
  51.     var password        = null
  52.     /** @private    */
  53.     /** the scraper function that will be called back to scrape the document    */
  54.     var scraperFunction = null;
  55.     /** @private    */
  56.     /** the callback function that will be called back after scraping   */
  57.     var returnFunction  = null;
  58.     /** @private    */
  59.     /** the list of data scraped    */
  60.     var data            = {};
  61.  
  62.  
  63.     /**
  64.      *  sets the URL and authentication parameters
  65.      *  @param  {String}    newURL              URL of the document to scrape.
  66.      *  @param  {String}    newUsername         [OPTIONAL] the username required to access the document. NULL by default.
  67.      *  @param  {String}    newPassword         [OPTIONAL] the password required to access the document. NULL by default.
  68.      *  @return a flag indicating if the setting was succesful
  69.      *  @type   boolean
  70.      */
  71.     this.setURLAndAuthentication = function( newURL, newUsername, newPassword )
  72.     {
  73.         if ( typeof newURL != 'string' ) {
  74.             return false;
  75.         }
  76.  
  77.         username = typeof newUsername == 'string' ? newUsername : null;
  78.         password = typeof newPassword == 'string' ? newPassword : null;
  79.  
  80.         URL = newURL;
  81.  
  82.         return true;
  83.     }
  84.  
  85.  
  86.     /**
  87.      *  sets the scraper function
  88.      *  @param  {Function}  newScraperFunction  the handle of the scraper function. It will be called with an 'xml' and 'txt' arguments.
  89.      *  @return a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
  90.      *  @type   boolean
  91.      */
  92.     this.setScraperFunction = function( newScraperFunction )
  93.     {
  94.         if ( typeof newScraperFunction != 'function' ) {
  95.             return false;
  96.         }
  97.  
  98.         scraperFunction = newScraperFunction;
  99.          
  100.         return true;
  101.     }
  102.  
  103.     /**
  104.      *  sets the callback function
  105.      *  @param  {Function}  newReturnFunction   the handle of the callback function. It will be called with the return value of the scraper function.
  106.      *  @return a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
  107.      *  @type   boolean
  108.      */
  109.     this.setReturnFunction = function( newReturnFunction )
  110.     {
  111.         if ( typeof newReturnFunction != 'function' ) {
  112.             return false;
  113.         }
  114.  
  115.         returnFunction = newReturnFunction;
  116.          
  117.         return true;
  118.     }
  119.  
  120.     /**
  121.      *  triggers the request of a document and scraping of some data
  122.      *  @param  {String}    dataId              the id of the data to scrape
  123.      *  @param  {Number}    dataMaxAge          [OPTIONAL] maximum age ( in minutes ) of the data below which the cache will be used. 1 minute by default.
  124.      *  @param  {Function}  newScraperFunc      [OPTIONAL] temporary override of scraperFunction
  125.      *  @param  {Function}  newReturnFunc       [OPTIONAL] temporary override of returnFunction
  126.      *  @return FALSE if the optional parameters are invalid,
  127.      *          NULL if the request of the document failed,
  128.      *          whatever the scaperFunction returns ( should be an array or an object )
  129.      *  @see #setURLAndAuthentication
  130.      *  @see #setScraperFunction
  131.      */
  132.     this.scrapeData = function( dataId, dataMaxAge, newScraperFunc, newReturnFunc ) {
  133.          
  134.         var localScraperFunc = (typeof newScraperFunc == 'function' && newScraperFunc) || scraperFunction;
  135.         var localReturnFunc = (typeof newReturnFunc == 'function' && newReturnFunc) || returnFunction;
  136.          
  137.         var error = null;
  138.         if ( dataId == undefined ) { error = 'dataId missing'; }
  139.         if ( URL == '' ) { error = 'no URL defined'; }
  140.         if ( ! localScraperFunc ) { error = 'no scraper defined'; }
  141.         if ( ! localReturnFunc ) { error = 'no callback defined'; }
  142.              
  143.         if ( error ) {
  144.             opera.postError( 'Scraper.scrapeData: ' + error );
  145.             return false;
  146.         }
  147.  
  148.         var dataMaxAge = Math.max( 1, dataMaxAge || 1 ) * 60 * 1000;
  149.         if ( data[dataId] && data[dataId].timestamp + dataMaxAge > new Date().getTime() ) {
  150.             opera.postError( 'Using cached data for request ' + dataId );
  151.             return data[dataId].data;
  152.         }
  153.  
  154.         var XHR = new XMLHttpRequest()
  155.  
  156.         if ( username && password ) {
  157.             XHR.open( 'get', URL, true, username, password );
  158.         } else {
  159.             XHR.open( 'get', URL, true );
  160.         }
  161.          
  162.         XHR.onreadystatechange = function() {
  163.             if ( XHR.readyState != 4) return;
  164.              
  165.             if ( XHR.status==200 || XHR.status==304 || XHR.status==0 ) {
  166.                  
  167.                 var xml = XHR.responseXML;
  168.                 var text = XHR.responseText;
  169.                  
  170.                 if (xml && xml.documentElement) {
  171.                     // valid XML, yay
  172.                 } else {
  173.                     opera.postError( 'No responseXML from ' + URL + '\nFalling back to DOMParser' );
  174.                     var parser = new DOMParser();
  175.                     xml = parser.parseFromString( text, 'text/html' );
  176.                 }
  177.      
  178.                 data[dataId] = {
  179.                     timestamp: new Date().getTime(),
  180.                     data: localScraperFunc( xml, text )
  181.                 };
  182.                  
  183.                 localReturnFunc( data[dataId].data );
  184.             } else {
  185.                 localReturnFunc( null );
  186.             }
  187.         }
  188.  
  189.         XHR.send(null);
  190.  
  191.     }
  192.  
  193. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement