Advertisement
Guest User

Untitled

a guest
Oct 10th, 2011
576
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 23.87 KB | None | 0 0
  1. <?php
  2. /* 9 April 2008. version 1.1
  3.  *
  4.  * This is the php version of the Dean Edwards JavaScript's Packer,
  5.  * Based on :
  6.  *
  7.  * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
  8.  * a multi-pattern parser.
  9.  * KNOWN BUG: erroneous behavior when using escapeChar with a replacement
  10.  * value that is a function
  11.  *
  12.  * packer, version 2.0.2 (2005-08-19) Copyright 2004-2005, Dean Edwards
  13.  *
  14.  * License: http://creativecommons.org/licenses/LGPL/2.1/
  15.  *
  16.  * Ported to PHP by Nicolas Martin.
  17.  *
  18.  * ----------------------------------------------------------------------
  19.  * changelog:
  20.  * 1.1 : correct a bug, '\0' packed then unpacked becomes '\'.
  21.  * ----------------------------------------------------------------------
  22.  *
  23.  * examples of usage :
  24.  * $myPacker = new JavaScriptPacker($script, 62, true, false);
  25.  * $packed = $myPacker->pack();
  26.  *
  27.  * or
  28.  *
  29.  * $myPacker = new JavaScriptPacker($script, 'Normal', true, false);
  30.  * $packed = $myPacker->pack();
  31.  *
  32.  * or (default values)
  33.  *
  34.  * $myPacker = new JavaScriptPacker($script);
  35.  * $packed = $myPacker->pack();
  36.  *
  37.  *
  38.  * params of the constructor :
  39.  * $script:       the JavaScript to pack, string.
  40.  * $encoding:     level of encoding, int or string :
  41.  *                0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'.
  42.  *                default: 62.
  43.  * $fastDecode:   include the fast decoder in the packed result, boolean.
  44.  *                default : true.
  45.  * $specialChars: if you are flagged your private and local variables
  46.  *                in the script, boolean.
  47.  *                default: false.
  48.  *
  49.  * The pack() method return the compressed JavasScript, as a string.
  50.  *
  51.  * see http://dean.edwards.name/packer/usage/ for more information.
  52.  *
  53.  * Notes :
  54.  * # need PHP 5 . Tested with PHP 5.1.2, 5.1.3, 5.1.4, 5.2.3
  55.  *
  56.  * # The packed result may be different than with the Dean Edwards
  57.  *   version, but with the same length. The reason is that the PHP
  58.  *   function usort to sort array don't necessarily preserve the
  59.  *   original order of two equal member. The Javascript sort function
  60.  *   in fact preserve this order (but that's not require by the
  61.  *   ECMAScript standard). So the encoded keywords order can be
  62.  *   different in the two results.
  63.  *
  64.  * # Be careful with the 'High ASCII' Level encoding if you use
  65.  *   UTF-8 in your files...
  66.  */
  67.  
  68.  
  69. class JavaScriptPacker {
  70.     // constants
  71.     const IGNORE = '$1';
  72.  
  73.     // validate parameters
  74.     private $_script = '';
  75.     private $_encoding = 62;
  76.     private $_fastDecode = true;
  77.     private $_specialChars = false;
  78.    
  79.     private $LITERAL_ENCODING = array(
  80.         'None' => 0,
  81.         'Numeric' => 10,
  82.         'Normal' => 62,
  83.         'High ASCII' => 95
  84.     );
  85.    
  86.     public function __construct($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false)
  87.     {
  88.         $this->_script = $_script . "\n";
  89.         if (array_key_exists($_encoding, $this->LITERAL_ENCODING))
  90.             $_encoding = $this->LITERAL_ENCODING[$_encoding];
  91.         $this->_encoding = min((int)$_encoding, 95);
  92.         $this->_fastDecode = $_fastDecode; 
  93.         $this->_specialChars = $_specialChars;
  94.     }
  95.    
  96.     public function pack() {
  97.         $this->_addParser('_basicCompression');
  98.         if ($this->_specialChars)
  99.             $this->_addParser('_encodeSpecialChars');
  100.         if ($this->_encoding)
  101.             $this->_addParser('_encodeKeywords');
  102.        
  103.         // go!
  104.         return $this->_pack($this->_script);
  105.     }
  106.    
  107.     // apply all parsing routines
  108.     private function _pack($script) {
  109.         for ($i = 0; isset($this->_parsers[$i]); $i++) {
  110.             $script = call_user_func(array(&$this,$this->_parsers[$i]), $script);
  111.         }
  112.         return $script;
  113.     }
  114.    
  115.     // keep a list of parsing functions, they'll be executed all at once
  116.     private $_parsers = array();
  117.     private function _addParser($parser) {
  118.         $this->_parsers[] = $parser;
  119.     }
  120.    
  121.     // zero encoding - just removal of white space and comments
  122.     private function _basicCompression($script) {
  123.         $parser = new ParseMaster();
  124.         // make safe
  125.         $parser->escapeChar = '\\';
  126.         // protect strings
  127.         $parser->add('/\'[^\'\\n\\r]*\'/', self::IGNORE);
  128.         $parser->add('/"[^"\\n\\r]*"/', self::IGNORE);
  129.         // remove comments
  130.         $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' ');
  131.         $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' ');
  132.         // protect regular expressions
  133.         $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE
  134.         $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', self::IGNORE);
  135.         // remove: ;;; doSomething();
  136.         if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/');
  137.         // remove redundant semi-colons
  138.         $parser->add('/\\(;;\\)/', self::IGNORE); // protect for (;;) loops
  139.         $parser->add('/;+\\s*([};])/', '$2');
  140.         // apply the above
  141.         $script = $parser->exec($script);
  142.  
  143.         // remove white-space
  144.         $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3');
  145.         $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3');
  146.         $parser->add('/\\s+/', '');
  147.         // done
  148.         return $parser->exec($script);
  149.     }
  150.    
  151.     private function _encodeSpecialChars($script) {
  152.         $parser = new ParseMaster();
  153.         // replace: $name -> n, $$name -> na
  154.         $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/',
  155.                      array('fn' => '_replace_name')
  156.         );
  157.         // replace: _name -> _0, double-underscore (__name) is ignored
  158.         $regexp = '/\\b_[A-Za-z\\d]\\w*/';
  159.         // build the word list
  160.         $keywords = $this->_analyze($script, $regexp, '_encodePrivate');
  161.         // quick ref
  162.         $encoded = $keywords['encoded'];
  163.        
  164.         $parser->add($regexp,
  165.             array(
  166.                 'fn' => '_replace_encoded',
  167.                 'data' => $encoded
  168.             )
  169.         );
  170.         return $parser->exec($script);
  171.     }
  172.    
  173.     private function _encodeKeywords($script) {
  174.         // escape high-ascii values already in the script (i.e. in strings)
  175.         if ($this->_encoding > 62)
  176.             $script = $this->_escape95($script);
  177.         // create the parser
  178.         $parser = new ParseMaster();
  179.         $encode = $this->_getEncoder($this->_encoding);
  180.         // for high-ascii, don't encode single character low-ascii
  181.         $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/';
  182.         // build the word list
  183.         $keywords = $this->_analyze($script, $regexp, $encode);
  184.         $encoded = $keywords['encoded'];
  185.        
  186.         // encode
  187.         $parser->add($regexp,
  188.             array(
  189.                 'fn' => '_replace_encoded',
  190.                 'data' => $encoded
  191.             )
  192.         );
  193.         if (empty($script)) return $script;
  194.         else {
  195.             //$res = $parser->exec($script);
  196.             //$res = $this->_bootStrap($res, $keywords);
  197.             //return $res;
  198.             return $this->_bootStrap($parser->exec($script), $keywords);
  199.         }
  200.     }
  201.    
  202.     private function _analyze($script, $regexp, $encode) {
  203.         // analyse
  204.         // retreive all words in the script
  205.         $all = array();
  206.         preg_match_all($regexp, $script, $all);
  207.         $_sorted = array(); // list of words sorted by frequency
  208.         $_encoded = array(); // dictionary of word->encoding
  209.         $_protected = array(); // instances of "protected" words
  210.         $all = $all[0]; // simulate the javascript comportement of global match
  211.         if (!empty($all)) {
  212.             $unsorted = array(); // same list, not sorted
  213.             $protected = array(); // "protected" words (dictionary of word->"word")
  214.             $value = array(); // dictionary of charCode->encoding (eg. 256->ff)
  215.             $this->_count = array(); // word->count
  216.             $i = count($all); $j = 0; //$word = null;
  217.             // count the occurrences - used for sorting later
  218.             do {
  219.                 --$i;
  220.                 $word = '$' . $all[$i];
  221.                 if (!isset($this->_count[$word])) {
  222.                     $this->_count[$word] = 0;
  223.                     $unsorted[$j] = $word;
  224.                     // make a dictionary of all of the protected words in this script
  225.                     //  these are words that might be mistaken for encoding
  226.                     //if (is_string($encode) && method_exists($this, $encode))
  227.                     $values[$j] = call_user_func(array(&$this, $encode), $j);
  228.                     $protected['$' . $values[$j]] = $j++;
  229.                 }
  230.                 // increment the word counter
  231.                 $this->_count[$word]++;
  232.             } while ($i > 0);
  233.             // prepare to sort the word list, first we must protect
  234.             //  words that are also used as codes. we assign them a code
  235.             //  equivalent to the word itself.
  236.             // e.g. if "do" falls within our encoding range
  237.             //      then we store keywords["do"] = "do";
  238.             // this avoids problems when decoding
  239.             $i = count($unsorted);
  240.             do {
  241.                 $word = $unsorted[--$i];
  242.                 if (isset($protected[$word]) /*!= null*/) {
  243.                     $_sorted[$protected[$word]] = substr($word, 1);
  244.                     $_protected[$protected[$word]] = true;
  245.                     $this->_count[$word] = 0;
  246.                 }
  247.             } while ($i);
  248.            
  249.             // sort the words by frequency
  250.             // Note: the javascript and php version of sort can be different :
  251.             // in php manual, usort :
  252.             // " If two members compare as equal,
  253.             // their order in the sorted array is undefined."
  254.             // so the final packed script is different of the Dean's javascript version
  255.             // but equivalent.
  256.             // the ECMAscript standard does not guarantee this behaviour,
  257.             // and thus not all browsers (e.g. Mozilla versions dating back to at
  258.             // least 2003) respect this.
  259.             usort($unsorted, array(&$this, '_sortWords'));
  260.             $j = 0;
  261.             // because there are "protected" words in the list
  262.             //  we must add the sorted words around them
  263.             do {
  264.                 if (!isset($_sorted[$i]))
  265.                     $_sorted[$i] = substr($unsorted[$j++], 1);
  266.                 $_encoded[$_sorted[$i]] = $values[$i];
  267.             } while (++$i < count($unsorted));
  268.         }
  269.         return array(
  270.             'sorted'  => $_sorted,
  271.             'encoded' => $_encoded,
  272.             'protected' => $_protected);
  273.     }
  274.    
  275.     private $_count = array();
  276.     private function _sortWords($match1, $match2) {
  277.         return $this->_count[$match2] - $this->_count[$match1];
  278.     }
  279.    
  280.     // build the boot function used for loading and decoding
  281.     private function _bootStrap($packed, $keywords) {
  282.         $ENCODE = $this->_safeRegExp('$encode\\($count\\)');
  283.  
  284.         // $packed: the packed script
  285.         $packed = "'" . $this->_escape($packed) . "'";
  286.  
  287.         // $ascii: base for encoding
  288.         $ascii = min(count($keywords['sorted']), $this->_encoding);
  289.         if ($ascii == 0) $ascii = 1;
  290.  
  291.         // $count: number of words contained in the script
  292.         $count = count($keywords['sorted']);
  293.  
  294.         // $keywords: list of words contained in the script
  295.         foreach ($keywords['protected'] as $i=>$value) {
  296.             $keywords['sorted'][$i] = '';
  297.         }
  298.         // convert from a string to an array
  299.         ksort($keywords['sorted']);
  300.         $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')";
  301.  
  302.         $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii);
  303.         $encode = $this->_getJSFunction($encode);
  304.         $encode = preg_replace('/_encoding/','$ascii', $encode);
  305.         $encode = preg_replace('/arguments\\.callee/','$encode', $encode);
  306.         $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : '');
  307.  
  308.         // $decode: code snippet to speed up decoding
  309.         if ($this->_fastDecode) {
  310.             // create the decoder
  311.             $decode = $this->_getJSFunction('_decodeBody');
  312.             if ($this->_encoding > 62)
  313.                 $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode);
  314.             // perform the encoding inline for lower ascii values
  315.             elseif ($ascii < 36)
  316.                 $decode = preg_replace($ENCODE, $inline, $decode);
  317.             // special case: when $count==0 there are no keywords. I want to keep
  318.             //  the basic shape of the unpacking funcion so i'll frig the code...
  319.             if ($count == 0)
  320.                 $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1);
  321.         }
  322.  
  323.         // boot function
  324.         $unpack = $this->_getJSFunction('_unpack');
  325.         if ($this->_fastDecode) {
  326.             // insert the decoder
  327.             $this->buffer = $decode;
  328.             $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1);
  329.         }
  330.         $unpack = preg_replace('/"/', "'", $unpack);
  331.         if ($this->_encoding > 62) { // high-ascii
  332.             // get rid of the word-boundaries for regexp matches
  333.             $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack);
  334.         }
  335.         if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) {
  336.             // insert the encode function
  337.             $this->buffer = $encode;
  338.             $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1);
  339.         } else {
  340.             // perform the encoding inline
  341.             $unpack = preg_replace($ENCODE, $inline, $unpack);
  342.         }
  343.         // pack the boot function too
  344.         $unpackPacker = new JavaScriptPacker($unpack, 0, false, true);
  345.         $unpack = $unpackPacker->pack();
  346.        
  347.         // arguments
  348.         $params = array($packed, $ascii, $count, $keywords);
  349.         if ($this->_fastDecode) {
  350.             $params[] = 0;
  351.             $params[] = '{}';
  352.         }
  353.         $params = implode(',', $params);
  354.        
  355.         // the whole thing
  356.         return 'eval(' . $unpack . '(' . $params . "))\n";
  357.     }
  358.    
  359.     private $buffer;
  360.     private function _insertFastDecode($match) {
  361.         return '{' . $this->buffer . ';';
  362.     }
  363.     private function _insertFastEncode($match) {
  364.         return '{$encode=' . $this->buffer . ';';
  365.     }
  366.    
  367.     // mmm.. ..which one do i need ??
  368.     private function _getEncoder($ascii) {
  369.         return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ?
  370.                '_encode95' : '_encode62' : '_encode36' : '_encode10';
  371.     }
  372.    
  373.     // zero encoding
  374.     // characters: 0123456789
  375.     private function _encode10($charCode) {
  376.         return $charCode;
  377.     }
  378.    
  379.     // inherent base36 support
  380.     // characters: 0123456789abcdefghijklmnopqrstuvwxyz
  381.     private function _encode36($charCode) {
  382.         return base_convert($charCode, 10, 36);
  383.     }
  384.    
  385.     // hitch a ride on base36 and add the upper case alpha characters
  386.     // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  387.     private function _encode62($charCode) {
  388.         $res = '';
  389.         if ($charCode >= $this->_encoding) {
  390.             $res = $this->_encode62((int)($charCode / $this->_encoding));
  391.         }
  392.         $charCode = $charCode % $this->_encoding;
  393.        
  394.         if ($charCode > 35)
  395.             return $res . chr($charCode + 29);
  396.         else
  397.             return $res . base_convert($charCode, 10, 36);
  398.     }
  399.    
  400.     // use high-ascii values
  401.     // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
  402.     private function _encode95($charCode) {
  403.         $res = '';
  404.         if ($charCode >= $this->_encoding)
  405.             $res = $this->_encode95($charCode / $this->_encoding);
  406.        
  407.         return $res . chr(($charCode % $this->_encoding) + 161);
  408.     }
  409.    
  410.     private function _safeRegExp($string) {
  411.         return '/'.preg_replace('/\$/', '\\\$', $string).'/';
  412.     }
  413.    
  414.     private function _encodePrivate($charCode) {
  415.         return "_" . $charCode;
  416.     }
  417.    
  418.     // protect characters used by the parser
  419.     private function _escape($script) {
  420.         return preg_replace('/([\\\\\'])/', '\\\$1', $script);
  421.     }
  422.    
  423.     // protect high-ascii characters already in the script
  424.     private function _escape95($script) {
  425.         return preg_replace_callback(
  426.             '/[\\xa1-\\xff]/',
  427.             array(&$this, '_escape95Bis'),
  428.             $script
  429.         );
  430.     }
  431.     private function _escape95Bis($match) {
  432.         return '\x'.((string)dechex(ord($match)));
  433.     }
  434.    
  435.    
  436.     private function _getJSFunction($aName) {
  437.         if (defined('self::JSFUNCTION'.$aName))
  438.             return constant('self::JSFUNCTION'.$aName);
  439.         else
  440.             return '';
  441.     }
  442.    
  443.     // JavaScript Functions used.
  444.     // Note : In Dean's version, these functions are converted
  445.     // with 'String(aFunctionName);'.
  446.     // This internal conversion complete the original code, ex :
  447.     // 'while (aBool) anAction();' is converted to
  448.     // 'while (aBool) { anAction(); }'.
  449.     // The JavaScript functions below are corrected.
  450.    
  451.     // unpacking function - this is the boot strap function
  452.     //  data extracted from this packing routine is passed to
  453.     //  this function when decoded in the target
  454.     // NOTE ! : without the ';' final.
  455.     const JSFUNCTION_unpack =
  456.  
  457. 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
  458.    while ($count--) {
  459.        if ($keywords[$count]) {
  460.            $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
  461.        }
  462.    }
  463.    return $packed;
  464. }';
  465. /*
  466. 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
  467.     while ($count--)
  468.         if ($keywords[$count])
  469.             $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
  470.     return $packed;
  471. }';
  472. */
  473.    
  474.     // code-snippet inserted into the unpacker to speed up decoding
  475.     const JSFUNCTION_decodeBody =
  476. //_decode = function() {
  477. // does the browser support String.replace where the
  478. //  replacement value is a function?
  479.  
  480. '    if (!\'\'.replace(/^/, String)) {
  481.        // decode all the values we need
  482.        while ($count--) {
  483.            $decode[$encode($count)] = $keywords[$count] || $encode($count);
  484.        }
  485.        // global replacement function
  486.        $keywords = [function ($encoded) {return $decode[$encoded]}];
  487.        // generic match
  488.        $encode = function () {return \'\\\\w+\'};
  489.        // reset the loop counter -  we are now doing a global replace
  490.        $count = 1;
  491.    }
  492. ';
  493. //};
  494. /*
  495. '   if (!\'\'.replace(/^/, String)) {
  496.         // decode all the values we need
  497.         while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
  498.         // global replacement function
  499.         $keywords = [function ($encoded) {return $decode[$encoded]}];
  500.         // generic match
  501.         $encode = function () {return\'\\\\w+\'};
  502.         // reset the loop counter -  we are now doing a global replace
  503.         $count = 1;
  504.     }';
  505. */
  506.    
  507.      // zero encoding
  508.      // characters: 0123456789
  509.      const JSFUNCTION_encode10 =
  510. 'function($charCode) {
  511.    return $charCode;
  512. }';//;';
  513.    
  514.      // inherent base36 support
  515.      // characters: 0123456789abcdefghijklmnopqrstuvwxyz
  516.      const JSFUNCTION_encode36 =
  517. 'function($charCode) {
  518.    return $charCode.toString(36);
  519. }';//;';
  520.    
  521.     // hitch a ride on base36 and add the upper case alpha characters
  522.     // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  523.     const JSFUNCTION_encode62 =
  524. 'function($charCode) {
  525.    return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) +
  526.    (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
  527. }';
  528.    
  529.     // use high-ascii values
  530.     // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
  531.     const JSFUNCTION_encode95 =
  532. 'function($charCode) {
  533.    return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) +
  534.        String.fromCharCode($charCode % _encoding + 161);
  535. }';
  536.    
  537. }
  538.  
  539.  
  540. class ParseMaster {
  541.     public $ignoreCase = false;
  542.     public $escapeChar = '';
  543.    
  544.     // constants
  545.     const EXPRESSION = 0;
  546.     const REPLACEMENT = 1;
  547.     const LENGTH = 2;
  548.    
  549.     // used to determine nesting levels
  550.     private $GROUPS = '/\\(/';//g
  551.     private $SUB_REPLACE = '/\\$\\d/';
  552.     private $INDEXED = '/^\\$\\d+$/';
  553.     private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/';
  554.     private $ESCAPE = '/\\\./';//g
  555.     private $QUOTE = '/\'/';
  556.     private $DELETED = '/\\x01[^\\x01]*\\x01/';//g
  557.    
  558.     public function add($expression, $replacement = '') {
  559.         // count the number of sub-expressions
  560.         //  - add one because each pattern is itself a sub-expression
  561.         $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out);
  562.        
  563.         // treat only strings $replacement
  564.         if (is_string($replacement)) {
  565.             // does the pattern deal with sub-expressions?
  566.             if (preg_match($this->SUB_REPLACE, $replacement)) {
  567.                 // a simple lookup? (e.g. "$2")
  568.                 if (preg_match($this->INDEXED, $replacement)) {
  569.                     // store the index (used for fast retrieval of matched strings)
  570.                     $replacement = (int)(substr($replacement, 1)) - 1;
  571.                 } else { // a complicated lookup (e.g. "Hello $2 $1")
  572.                     // build a function to do the lookup
  573.                     $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement))
  574.                              ? '"' : "'";
  575.                     $replacement = array(
  576.                         'fn' => '_backReferences',
  577.                         'data' => array(
  578.                             'replacement' => $replacement,
  579.                             'length' => $length,
  580.                             'quote' => $quote
  581.                         )
  582.                     );
  583.                 }
  584.             }
  585.         }
  586.         // pass the modified arguments
  587.         if (!empty($expression)) $this->_add($expression, $replacement, $length);
  588.         else $this->_add('/^$/', $replacement, $length);
  589.     }
  590.    
  591.     public function exec($string) {
  592.         // execute the global replacement
  593.         $this->_escaped = array();
  594.        
  595.         // simulate the _patterns.toSTring of Dean
  596.         $regexp = '/';
  597.         foreach ($this->_patterns as $reg) {
  598.             $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|';
  599.         }
  600.         $regexp = substr($regexp, 0, -1) . '/';
  601.         $regexp .= ($this->ignoreCase) ? 'i' : '';
  602.        
  603.         $string = $this->_escape($string, $this->escapeChar);
  604.         $string = preg_replace_callback(
  605.             $regexp,
  606.             array(
  607.                 &$this,
  608.                 '_replacement'
  609.             ),
  610.             $string
  611.         );
  612.         $string = $this->_unescape($string, $this->escapeChar);
  613.        
  614.         return preg_replace($this->DELETED, '', $string);
  615.     }
  616.        
  617.     public function reset() {
  618.         // clear the patterns collection so that this object may be re-used
  619.         $this->_patterns = array();
  620.     }
  621.  
  622.     // private
  623.     private $_escaped = array();  // escaped characters
  624.     private $_patterns = array(); // patterns stored by index
  625.    
  626.     // create and add a new pattern to the patterns collection
  627.     private function _add() {
  628.         $arguments = func_get_args();
  629.         $this->_patterns[] = $arguments;
  630.     }
  631.    
  632.     // this is the global replace function (it's quite complicated)
  633.     private function _replacement($arguments) {
  634.         if (empty($arguments)) return '';
  635.        
  636.         $i = 1; $j = 0;
  637.         // loop through the patterns
  638.         while (isset($this->_patterns[$j])) {
  639.             $pattern = $this->_patterns[$j++];
  640.             // do we have a result?
  641.             if (isset($arguments[$i]) && ($arguments[$i] != '')) {
  642.                 $replacement = $pattern[self::REPLACEMENT];
  643.                
  644.                 if (is_array($replacement) && isset($replacement['fn'])) {
  645.                    
  646.                     if (isset($replacement['data'])) $this->buffer = $replacement['data'];
  647.                     return call_user_func(array(&$this, $replacement['fn']), $arguments, $i);
  648.                    
  649.                 } elseif (is_int($replacement)) {
  650.                     return $arguments[$replacement + $i];
  651.                
  652.                 }
  653.                 $delete = ($this->escapeChar == '' ||
  654.                            strpos($arguments[$i], $this->escapeChar) === false)
  655.                         ? '' : "\x01" . $arguments[$i] . "\x01";
  656.                 return $delete . $replacement;
  657.            
  658.             // skip over references to sub-expressions
  659.             } else {
  660.                 $i += $pattern[self::LENGTH];
  661.             }
  662.         }
  663.     }
  664.    
  665.     private function _backReferences($match, $offset) {
  666.         $replacement = $this->buffer['replacement'];
  667.         $quote = $this->buffer['quote'];
  668.         $i = $this->buffer['length'];
  669.         while ($i) {
  670.             $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement);
  671.         }
  672.         return $replacement;
  673.     }
  674.    
  675.     private function _replace_name($match, $offset){
  676.         $length = strlen($match[$offset + 2]);
  677.         $start = $length - max($length - strlen($match[$offset + 3]), 0);
  678.         return substr($match[$offset + 1], $start, $length) . $match[$offset + 4];
  679.     }
  680.    
  681.     private function _replace_encoded($match, $offset) {
  682.         return $this->buffer[$match[$offset]];
  683.     }
  684.    
  685.    
  686.     // php : we cannot pass additional data to preg_replace_callback,
  687.     // and we cannot use &$this in create_function, so let's go to lower level
  688.     private $buffer;
  689.    
  690.     // encode escaped characters
  691.     private function _escape($string, $escapeChar) {
  692.         if ($escapeChar) {
  693.             $this->buffer = $escapeChar;
  694.             return preg_replace_callback(
  695.                 '/\\' . $escapeChar . '(.)' .'/',
  696.                 array(&$this, '_escapeBis'),
  697.                 $string
  698.             );
  699.            
  700.         } else {
  701.             return $string;
  702.         }
  703.     }
  704.     private function _escapeBis($match) {
  705.         $this->_escaped[] = $match[1];
  706.         return $this->buffer;
  707.     }
  708.    
  709.     // decode escaped characters
  710.     private function _unescape($string, $escapeChar) {
  711.         if ($escapeChar) {
  712.             $regexp = '/'.'\\'.$escapeChar.'/';
  713.             $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0);
  714.             return preg_replace_callback
  715.             (
  716.                 $regexp,
  717.                 array(&$this, '_unescapeBis'),
  718.                 $string
  719.             );
  720.            
  721.         } else {
  722.             return $string;
  723.         }
  724.     }
  725.     private function _unescapeBis() {
  726.         if (isset($this->_escaped[$this->buffer['i']])
  727.             && $this->_escaped[$this->buffer['i']] != '')
  728.         {
  729.              $temp = $this->_escaped[$this->buffer['i']];
  730.         } else {
  731.             $temp = '';
  732.         }
  733.         $this->buffer['i']++;
  734.         return $this->buffer['escapeChar'] . $temp;
  735.     }
  736.    
  737.     private function _internalEscape($string) {
  738.         return preg_replace($this->ESCAPE, '', $string);
  739.     }
  740. }
  741. ?>
  742.  
  743.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement