Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

StackOverflow Question - dunc

By: a guest on Jun 15th, 2012  |  syntax: PHP  |  size: 6.46 KB  |  views: 56  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. <?php
  2.  
  3.         set_time_limit(3000);
  4.         ini_set('max_execution_time', 3000);
  5.  
  6. function matchTerms($text, $terms) {
  7.  
  8.         // Each dynamically constructed regexp will contain at most 70 subpatterns
  9. define('GROUPS_PER_REGEXPS', 70);
  10.  
  11.   $result = array();
  12.   $t = 0;
  13.   $termCount = count($terms);
  14.   reset($terms);
  15.   while ($t < $termCount) {
  16.     // Maps capturing group identifiers to term ids
  17.     $termMapping = array();
  18.  
  19.     // Dynamically construct regexp
  20.     $groups = '';
  21.     $c = 1;
  22.     while (list($termId, $termPattern) = each($terms)) {
  23.       if (!empty($groups)) {
  24.         $groups .= '|';
  25.       }
  26.       // Match word boundaries, so we don't capture "B. tricotisomeramblingstring"
  27.       $groups .= '(\b' . $termPattern . '\b)';
  28.       $termMapping[$c++] = $termId;
  29.       if (++$t % GROUPS_PER_REGEXPS == 0) {
  30.         break;
  31.       }
  32.     }
  33.     $regexp = "/$groups/m";
  34.  
  35.     preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
  36.     for ($i = 1; $i < $c; $i++) {
  37.       foreach ($matches[$i] as $matchData) {
  38.         // matchData[0] holds matched string, e.g. Benthochromis tricoti
  39.         // matchData[1] holds offset, e.g. 15
  40.         if (isset($matchData[0]) && !empty($matchData[0])) {
  41.           $result[] = array(
  42.             'text' => $matchData[0],
  43.             'offset' => $matchData[1],
  44.             'id' => $termMapping[$i],
  45.           );
  46.         }
  47.       }
  48.     }
  49.   }
  50.   // Sort by offset in descending order
  51.   usort($result, function($a, $b) {
  52.     return $a['offset'] > $b['offset'] ? -1 : 1;
  53.   });
  54.   return $result;
  55. }
  56.  
  57. function filter( $html, $terms, $type ) {
  58.  
  59. $doc = DOMDocument::loadHTML($html);
  60.  
  61. // Stack will be used to avoid recursive functions
  62. $stack = new SplStack;
  63. $stack->push($doc);
  64. while (!$stack->isEmpty()) {
  65.   $node = $stack->pop();
  66.   if ($node->nodeType == XML_TEXT_NODE && $node->parentNode instanceof DOMElement) {
  67.     // $node represents text node
  68.     //  and it's inside a tag (second condition in the statement above)
  69.  
  70.     // Check that this text is not wrapped in <a> tag
  71.     //  as we don't want to wrap it twice
  72.     if ($node->parentNode->tagName != 'a') {
  73.       $matches = matchTerms($node->wholeText, $terms);
  74.       foreach ($matches as $match) {
  75.         // Create new link element in the DOM
  76.         $link = $doc->createElement('a', $match['text']);
  77.                 if ( $type == "species" ) {
  78.                         $link->setAttribute('href', '/species/' . $match['id'] . '/');
  79.                         $link->setAttribute('rel', '/species/' . $match['id'] . '/?hover=true');
  80.                         $link->setAttribute('class', 'link_species');
  81.                 } else {
  82.                         $link->setAttribute('href', '/glossary/' . $match['id'] . '/');
  83.                         $link->setAttribute('rel', '/glossary/' . $match['id'] . '/?hover=true');
  84.                         $link->setAttribute('class', 'link_glossary');
  85.                 }
  86.  
  87.         // Save the text after the link
  88.         $remainingText = $node->splitText($match['offset'] + strlen($match['text']));
  89.         // Save the text before the link
  90.         $linkText = $node->splitText($match['offset']);
  91.  
  92.         // Replace $linkText with $link node
  93.         //  i.e. 'something' becomes '<a href="..">something</a>'
  94.         $node->parentNode->replaceChild($link, $linkText);
  95.       }
  96.     }
  97.   }
  98.   if ($node->hasChildNodes()) {
  99.     foreach ($node->childNodes as $childNode) {
  100.       $stack->push($childNode);
  101.     }
  102.   }
  103. }
  104.  
  105. $body = $doc->getElementsByTagName('body');
  106. return $doc->saveHTML($body->item(0));
  107.  
  108. }
  109.  
  110.  
  111. function convert( $string ) {
  112.         $search = array( "&#039;", "&quot;", "&nbsp;" );
  113.         $replace = array( '"', '"', ' ' );
  114.        
  115.         return str_replace( $search, $replace, $string );
  116. }
  117.  
  118.  
  119. /* #########################################################
  120.    process the post... */
  121. function run_filter( $post_id ) {
  122.         global $wpdb;
  123.  
  124.         /* get the glossary terms */
  125.         $results = $wpdb->get_results( 'SELECT post_title AS list, post_name AS slug FROM wp_posts WHERE post_status="publish" AND post_type="glossary" AND post_parent>0' );
  126.        
  127.         $glossary_terms = array();
  128.        
  129.         foreach ( $results as $row )
  130.                 $glossary_terms[$row->slug] = '(?:' . preg_quote( trim( strip_tags ( convert( $row->list ) ) ), '/' ) . ')';
  131.        
  132.         /* get the species terms */
  133.         $results = $wpdb->get_results( 'SELECT posts.post_name AS slug, posts.id AS post_id, meta1.meta_value AS genus, meta2.meta_value AS species
  134.                 FROM wp_posts posts
  135.                 LEFT OUTER JOIN wp_postmeta meta1 ON posts.id = meta1.post_id
  136.                 AND meta1.meta_key =  "genus"
  137.                 LEFT OUTER JOIN wp_postmeta meta2 ON posts.id = meta2.post_id
  138.                 AND meta2.meta_key =  "species"
  139.                 WHERE posts.post_type =  "species"' );
  140.        
  141.         $species_terms = array();
  142.        
  143.         foreach ( $results as $row ) {
  144.                 if ( isset( $row->genus ) && isset( $row->species ) ) {
  145.                         $genus = preg_quote( trim( strip_tags( convert( $row->genus ) ) ), '/' );
  146.                         $species = preg_quote( trim( strip_tags ( convert( $row->species ) ) ), '/' );
  147.                         $genus_initial = preg_quote( substr( trim( strip_tags( convert( $row->genus ) ) ), 0, 1 ) );
  148.                         $id = $row->slug;
  149.                        
  150.                         if ( ctype_alpha( $genus_initial ) )
  151.                                 $pattern = '(?:' . $genus . '|' . $genus_initial . '\.) ' . $species;
  152.                         else
  153.                                 $pattern = '(?:' . $genus . ') ' . $species;
  154.                        
  155.                         $species_terms[$id] = $pattern;
  156.                 }
  157.         }
  158.        
  159.         $species_terms = array_unique( $species_terms );
  160.  
  161.         $categories = get_category_by_slug('article');
  162.         $category_id = $categories->term_id;
  163.        
  164.         $post = wp_get_single_post( $post_id );
  165.         $updated = $post;
  166.        
  167.         if ( $post->post_type == "species" || ( $post->post_type == "post" && in_array( $category_id, get_the_category( $post_id ), true ) ) ) {
  168.                
  169.                 $updated->post_content = filter( $post->post_content, $species_terms, "species" );
  170.                 $updated->post_content = filter( $post->post_content, $glossary_terms, "glossary" );
  171.                
  172.                 wp_update_post( $updated );
  173.  
  174.                 if ( $post->post_type == "species" ) {
  175.                         $meta = get_post_custom( $post_id );
  176.                        
  177.                         $values_to_filter = array( "distribution", "habitat", "max_size", "aquarium_size", "maintenance", "water_chemistry", "diet", "behaviour", "dimorphism", "reproduction", "misc_notes" );
  178.                        
  179.                         foreach ( $meta as $key => $val ) {
  180.                                 if ( isset( $val ) && isset( $val[0] ) && !empty( $val[0] ) ) {
  181.                                         if ( in_array( $key, $values_to_filter ) ) {
  182.                                                 $updated_meta = filter( $val[0], $species_terms, "species" );
  183.                                                 $updated_meta = filter( $updated_meta, $glossary_terms, "glossary" );
  184.                                                 update_post_meta( $post_id, $key, $updated_meta );
  185.                                         }
  186.                                 }
  187.                         }
  188.                        
  189.                 }
  190.         } else if ( $post->post_type == "post" ) {             
  191.                 $updated->post_content = filter_species( $post->post_content );
  192.                 wp_update_post( $updated );
  193.         }
  194.  
  195. }
  196.  
  197. ?>
clone this paste RAW Paste Data