Advertisement
Guest User

StackOverflow Question - dunc

a guest
Jun 15th, 2012
594
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 6.46 KB | None | 0 0
  1. <?php
  2.  
  3.     set_time_limit(3000);
  4.     ini_set('max_execution_time', 3000);
  5.  
  6. function matchTerms($text, $terms) {
  7.  
  8.     // Each dynamically constructed regexp will contain at most 70 subpatterns
  9. define('GROUPS_PER_REGEXPS', 70);
  10.  
  11.   $result = array();
  12.   $t = 0;
  13.   $termCount = count($terms);
  14.   reset($terms);
  15.   while ($t < $termCount) {
  16.     // Maps capturing group identifiers to term ids
  17.     $termMapping = array();
  18.  
  19.     // Dynamically construct regexp
  20.     $groups = '';
  21.     $c = 1;
  22.     while (list($termId, $termPattern) = each($terms)) {
  23.       if (!empty($groups)) {
  24.         $groups .= '|';
  25.       }
  26.       // Match word boundaries, so we don't capture "B. tricotisomeramblingstring"
  27.       $groups .= '(\b' . $termPattern . '\b)';
  28.       $termMapping[$c++] = $termId;
  29.       if (++$t % GROUPS_PER_REGEXPS == 0) {
  30.         break;
  31.       }
  32.     }
  33.     $regexp = "/$groups/m";
  34.  
  35.     preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
  36.     for ($i = 1; $i < $c; $i++) {
  37.       foreach ($matches[$i] as $matchData) {
  38.         // matchData[0] holds matched string, e.g. Benthochromis tricoti
  39.         // matchData[1] holds offset, e.g. 15
  40.         if (isset($matchData[0]) && !empty($matchData[0])) {
  41.           $result[] = array(
  42.             'text' => $matchData[0],
  43.             'offset' => $matchData[1],
  44.             'id' => $termMapping[$i],
  45.           );
  46.         }
  47.       }
  48.     }
  49.   }
  50.   // Sort by offset in descending order
  51.   usort($result, function($a, $b) {
  52.     return $a['offset'] > $b['offset'] ? -1 : 1;
  53.   });
  54.   return $result;
  55. }
  56.  
  57. function filter( $html, $terms, $type ) {
  58.  
  59. $doc = DOMDocument::loadHTML($html);
  60.  
  61. // Stack will be used to avoid recursive functions
  62. $stack = new SplStack;
  63. $stack->push($doc);
  64. while (!$stack->isEmpty()) {
  65.   $node = $stack->pop();
  66.   if ($node->nodeType == XML_TEXT_NODE && $node->parentNode instanceof DOMElement) {
  67.     // $node represents text node
  68.     //  and it's inside a tag (second condition in the statement above)
  69.  
  70.     // Check that this text is not wrapped in <a> tag
  71.     //  as we don't want to wrap it twice
  72.     if ($node->parentNode->tagName != 'a') {
  73.       $matches = matchTerms($node->wholeText, $terms);
  74.       foreach ($matches as $match) {
  75.         // Create new link element in the DOM
  76.         $link = $doc->createElement('a', $match['text']);
  77.         if ( $type == "species" ) {
  78.             $link->setAttribute('href', '/species/' . $match['id'] . '/');
  79.             $link->setAttribute('rel', '/species/' . $match['id'] . '/?hover=true');
  80.             $link->setAttribute('class', 'link_species');
  81.         } else {
  82.             $link->setAttribute('href', '/glossary/' . $match['id'] . '/');
  83.             $link->setAttribute('rel', '/glossary/' . $match['id'] . '/?hover=true');
  84.             $link->setAttribute('class', 'link_glossary');
  85.         }
  86.  
  87.         // Save the text after the link
  88.         $remainingText = $node->splitText($match['offset'] + strlen($match['text']));
  89.         // Save the text before the link
  90.         $linkText = $node->splitText($match['offset']);
  91.  
  92.         // Replace $linkText with $link node
  93.         //  i.e. 'something' becomes '<a href="..">something</a>'
  94.         $node->parentNode->replaceChild($link, $linkText);
  95.       }
  96.     }
  97.   }
  98.   if ($node->hasChildNodes()) {
  99.     foreach ($node->childNodes as $childNode) {
  100.       $stack->push($childNode);
  101.     }
  102.   }
  103. }
  104.  
  105. $body = $doc->getElementsByTagName('body');
  106. return $doc->saveHTML($body->item(0));
  107.  
  108. }
  109.  
  110.  
  111. function convert( $string ) {
  112.     $search = array( "&#039;", "&quot;", "&nbsp;" );
  113.     $replace = array( '"', '"', ' ' );
  114.    
  115.     return str_replace( $search, $replace, $string );
  116. }
  117.  
  118.  
  119. /* #########################################################
  120.    process the post... */
  121. function run_filter( $post_id ) {
  122.     global $wpdb;
  123.  
  124.     /* get the glossary terms */
  125.     $results = $wpdb->get_results( 'SELECT post_title AS list, post_name AS slug FROM wp_posts WHERE post_status="publish" AND post_type="glossary" AND post_parent>0' );
  126.    
  127.     $glossary_terms = array();
  128.    
  129.     foreach ( $results as $row )
  130.         $glossary_terms[$row->slug] = '(?:' . preg_quote( trim( strip_tags ( convert( $row->list ) ) ), '/' ) . ')';
  131.    
  132.     /* get the species terms */
  133.     $results = $wpdb->get_results( 'SELECT posts.post_name AS slug, posts.id AS post_id, meta1.meta_value AS genus, meta2.meta_value AS species
  134.         FROM wp_posts posts
  135.         LEFT OUTER JOIN wp_postmeta meta1 ON posts.id = meta1.post_id
  136.         AND meta1.meta_key =  "genus"
  137.         LEFT OUTER JOIN wp_postmeta meta2 ON posts.id = meta2.post_id
  138.         AND meta2.meta_key =  "species"
  139.         WHERE posts.post_type =  "species"' );
  140.    
  141.     $species_terms = array();
  142.    
  143.     foreach ( $results as $row ) {
  144.         if ( isset( $row->genus ) && isset( $row->species ) ) {
  145.             $genus = preg_quote( trim( strip_tags( convert( $row->genus ) ) ), '/' );
  146.             $species = preg_quote( trim( strip_tags ( convert( $row->species ) ) ), '/' );
  147.             $genus_initial = preg_quote( substr( trim( strip_tags( convert( $row->genus ) ) ), 0, 1 ) );
  148.             $id = $row->slug;
  149.            
  150.             if ( ctype_alpha( $genus_initial ) )
  151.                 $pattern = '(?:' . $genus . '|' . $genus_initial . '\.) ' . $species;
  152.             else
  153.                 $pattern = '(?:' . $genus . ') ' . $species;
  154.            
  155.             $species_terms[$id] = $pattern;
  156.         }
  157.     }
  158.    
  159.     $species_terms = array_unique( $species_terms );
  160.  
  161.     $categories = get_category_by_slug('article');
  162.     $category_id = $categories->term_id;
  163.    
  164.     $post = wp_get_single_post( $post_id );
  165.     $updated = $post;
  166.    
  167.     if ( $post->post_type == "species" || ( $post->post_type == "post" && in_array( $category_id, get_the_category( $post_id ), true ) ) ) {
  168.        
  169.         $updated->post_content = filter( $post->post_content, $species_terms, "species" );
  170.         $updated->post_content = filter( $post->post_content, $glossary_terms, "glossary" );
  171.        
  172.         wp_update_post( $updated );
  173.  
  174.         if ( $post->post_type == "species" ) {
  175.             $meta = get_post_custom( $post_id );
  176.            
  177.             $values_to_filter = array( "distribution", "habitat", "max_size", "aquarium_size", "maintenance", "water_chemistry", "diet", "behaviour", "dimorphism", "reproduction", "misc_notes" );
  178.            
  179.             foreach ( $meta as $key => $val ) {
  180.                 if ( isset( $val ) && isset( $val[0] ) && !empty( $val[0] ) ) {
  181.                     if ( in_array( $key, $values_to_filter ) ) {
  182.                         $updated_meta = filter( $val[0], $species_terms, "species" );
  183.                         $updated_meta = filter( $updated_meta, $glossary_terms, "glossary" );
  184.                         update_post_meta( $post_id, $key, $updated_meta );
  185.                     }
  186.                 }
  187.             }
  188.            
  189.         }
  190.     } else if ( $post->post_type == "post" ) {     
  191.         $updated->post_content = filter_species( $post->post_content );
  192.         wp_update_post( $updated );
  193.     }
  194.  
  195. }
  196.  
  197. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement