Guest User

Joshua Thompson

a guest
Jul 31st, 2007
1,901
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 8.19 KB | None | 0 0
  1. <?php
  2. /**
  3.  * Diff and patch functions created using the documentation from the Wikipedia
  4.  * article at http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
  5.  *
  6.  * Copyright (c) 2007, Joshua Thompson <http://www.schmalls.com>
  7.  * All rights reserved.
  8.  *
  9.  * Redistribution and use in source and binary forms, with or without
  10.  * modification, are permitted provided that the following conditions
  11.  * are met:
  12.  *
  13.  *   * Redistributions of source code must retain the above copyright
  14.  *     notice, this list of conditions and the following disclaimer.
  15.  *
  16.  *   * Redistributions in binary form must reproduce the above copyright
  17.  *     notice, this list of conditions and the following disclaimer in
  18.  *     the documentation and/or other materials provided with the
  19.  *     distribution.
  20.  *
  21.  *   * Neither the name of Joshua Thompson nor the names of its
  22.  *     contributors may be used to endorse or promote products derived
  23.  *     from this software without specific prior written permission.
  24.  *
  25.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26.  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27.  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  28.  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  29.  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30.  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31.  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  32.  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  33.  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRIC
  34.  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  35.  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36.  * POSSIBILITY OF SUCH DAMAGE.
  37.  *
  38.  * @copyright  2007 Joshua Thompson <http://www.schmalls.com>
  39.  * @license    http://www.opensource.org/licenses/bsd-license.php BSD License
  40.  * @author     Joshua Thompson <[email protected]>
  41.  * @version    1.0.0
  42.  * @link       http://www.schmalls.com
  43.  */
  44.  
  45. /**
  46.  * Finds the Least Common Subsequence of the two inputs, then uses it to
  47.  * generate the diff, and returns the diff as a string.
  48.  *
  49.  * @param   string $original
  50.  * @param   string $updated
  51.  * @return  array
  52.  */
  53. function diff( $original, $updated )
  54. {
  55.     /**
  56.      * Returns the diff as a string.
  57.      *
  58.      * @param   array $c the LCS
  59.      * @param   array $x the original
  60.      * @param   array $y the updated
  61.      * @param   int $i
  62.      * @param   int $j
  63.      * @param   string $last
  64.      * @param   boolean $change
  65.      * @param   int $end_i
  66.      * @param   int $end_j
  67.      * @param   int $add_i
  68.      * @param   int $add_j
  69.      * @return  string
  70.      */
  71.     function print_diff( $c, $x, $y, $i, $j, $last = '', $change = false, $end_i = 0, $end_j = 0, $add_i = 0, $add_j = 0 )
  72.     {
  73.         $patch = '';
  74.         if ( ( $i >= 0 ) && ( $j >= 0 ) && ( $x[$i] == $y[$j] ) ) {
  75.             $patch .= print_diff( $c, $x, $y, $i - 1, $j - 1 );
  76.             if ( $last != '' ) {
  77.                 $i_text = ( ( $i + $add_i ) == ( $end_i + 1 ) ) ? ( $i + $add_i ) : ( $i + $add_i ) . ',' . ( $end_i + 1 );
  78.                 $j_text = ( ( $j + $add_j ) == ( $end_j + 1 ) ) ? ( $j + $add_j ) : ( $j + $add_j ) . ',' . ( $end_j + 1 );
  79.                 $last = ( $change ) ? 'c' : $last;
  80.                 $patch .= $i_text . $last . $j_text . "\n";
  81.             }
  82.         } elseif ( ( $i == -1) && ( $j == -1 ) ) {
  83.             $i_text = ( ( $i + $add_i ) == ( $end_i + 1 ) ) ? ( $i + $add_i ) : ( $i + $add_i ) . ',' . ( $end_i + 1 );
  84.             $j_text = ( ( $j + $add_j ) == ( $end_j + 1 ) ) ? ( $j + $add_j ) : ( $j + $add_j ) . ',' . ( $end_j + 1 );
  85.             $last = ( $change ) ? 'c' : $last;
  86.             $patch .= $i_text . $last . $j_text . "\n";
  87.         } else {
  88.             $end_i = ( $end_i == 0 ) ? $i : $end_i;
  89.             $end_j = ( $end_j == 0 ) ? $j : $end_j;
  90.             if ( ($j >= 0 ) && ( ( $i == -1 ) || ( $c[$i][$j - 1] >= $c[$i - 1][$j] ) ) ) {
  91.                 $change = ( ( $last == 'd' ) || ( $change ) );
  92.                 $patch .= print_diff( $c, $x, $y, $i, $j - 1, 'a', $change, $end_i, $end_j, 1, 2 );
  93.                 $patch .= '> ' . $y[$j] . "\n";
  94.             } elseif ( ( $i >= 0 ) && ( ( $j == -1 ) || ( $c[$i][$j - 1] < $c[$i - 1][$j] ) ) ) {
  95.                 $change = ( ( $last == 'a' ) || ( $change ) );
  96.                 $patch .= print_diff( $c, $x, $y, $i - 1, $j, 'd', $change, $end_i, $end_j, 2, 2 );
  97.             }
  98.         }
  99.        
  100.         return $patch;
  101.     }
  102.    
  103.     $x = explode( "\n", str_replace( "\r\n", "\n", $original ) );
  104.     $y = explode( "\n", str_replace( "\r\n", "\n", $updated ) );
  105.    
  106.     $m_start = 0;
  107.     $m_end = count( $x ) - 1;
  108.     $n_start = 0;
  109.     $n_end = count( $y ) - 1;
  110.    
  111.     // trim off matching items at the beginning
  112.     while ( ( $m_start < $m_end ) && ( $n_start < $n_end ) && ( $x[$m_start] == $y[$n_start] ) ) {
  113.         $m_start++;
  114.         $n_start++;
  115.     }
  116.     // trim off matching items at the end
  117.     while ( ( $m_start < $m_end ) && ( $n_start < $n_end ) && ( $x[$m_end] == $y[$n_end] ) ) {
  118.         $m_end--;
  119.         $n_end--;
  120.     }
  121.     // now the LCS magic
  122.     $c = array();
  123.     for ( $a = -1; $a <= $m_end; $a++ ) {
  124.         $c[$a] = array();
  125.         for ( $b = -1; $b <= $n_end; $b++ ) {
  126.             $c[$a][$b] = 0;
  127.         }
  128.     }
  129.     for ( $i = $m_start; $i <= $m_end; $i++ ) {
  130.         for ( $j = $n_start; $j <= $n_end; $j++ ) {
  131.             if ( $x[$i] == $y[$j] ) {
  132.                 $c[$i][$j] = $c[$i - 1][$j - 1] + 1;
  133.             } else {
  134.                 $c[$i][$j] = max( $c[$i][$j - 1], $c[$i - 1][$j] );
  135.             }
  136.         }
  137.     }
  138.    
  139.     return print_diff( $c, $x, $y, count( $x ) - 1, count( $y ) - 1 );
  140. }
  141.  
  142. /**
  143.  * Applies the patch to the original document and returns the patched document.
  144.  *
  145.  * @param   string $original
  146.  * @param   string $patch
  147.  * @return  string
  148.  */
  149. function patch( $original, $patch )
  150. {
  151.     $new = array();
  152.     $original_array = explode( "\n", str_replace( "\r\n", "\n", $original ) );
  153.     $patch_array = explode( "\n", $patch );
  154.     $i = 0;
  155.     foreach ( $patch_array as $line ) {
  156.         if ( ( !empty( $line ) ) && ( $line[0] == '>' ) ) {
  157.             $new[] = substr( $line, 2 );
  158.         } elseif ( !empty( $line ) ) {
  159.             $pos = ( strpos( $line, 'a' ) !== false ) ? strpos( $line, 'a' ) : ( ( strpos( $line, 'c' ) !== false ) ? strpos( $line, 'c' ) : ( ( strpos( $line, 'd' ) !== false ) ? strpos( $line, 'd' ) : false ) );
  160.             $type = $line[$pos];
  161.             list( $i_half, $j_half ) = explode( $type, $line );
  162.             list( $i_start, $i_end ) = explode( ',', $i_half . ',' . $i_half );
  163.             $sub = ( $type == 'a' ) ? 0 : 1;
  164.             for ( $a = $i; $a < ( $i_start - $sub ); $a++ ) {
  165.                 $new[] = $original_array[$a];
  166.             }
  167.             $i = $i_end;
  168.         }
  169.     }
  170.     for ( $a = $i; $a < count( $original_array ); $a++ ) {
  171.         $new[] = $original_array[$a];
  172.     }
  173.    
  174.     return implode( "\n", $new );
  175. }
  176.  
  177. $original = 'This part of the
  178. document has stayed the
  179. same from version to
  180. version.  It shouldn\'t
  181. be shown if it doesn\'t
  182. change.  Otherwise, that
  183. would not be helping to
  184. compress the size of the
  185. changes.
  186.  
  187. This paragraph contains
  188. text that is outdated.
  189. It will be deleted in the
  190. near future.
  191.  
  192. It is important to spell
  193. check this dokument. On
  194. the other hand, a
  195. misspelled word isn\'t
  196. the end of the world.
  197. Nothing in the rest of
  198. this paragraph needs to
  199. be changed. Things can
  200. be added after it.';
  201.  
  202. $updated = 'This is an important
  203. notice! It should
  204. therefore be located at
  205. the beginning of this
  206. document!
  207.  
  208. This part of the
  209. document has stayed the
  210. same from version to
  211. version.  It shouldn\'t
  212. be shown if it doesn\'t
  213. change.  Otherwise, that
  214. would not be helping to
  215. compress anything.
  216.  
  217. It is important to spell
  218. check this document. On
  219. the other hand, a
  220. misspelled word isn\'t
  221. the end of the world.
  222. Nothing in the rest of
  223. this paragraph needs to
  224. be changed. Things can
  225. be added after it.
  226.  
  227. This paragraph contains
  228. important new additions
  229. to this document.';
  230.  
  231. $patch = diff( $original, $updated );
  232. $new = patch( $original, $patch );
  233. echo $patch , "\n", $new;
Advertisement
Add Comment
Please, Sign In to add comment