Guest User

parser.php

a guest
Jan 31st, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 48.76 KB | None | 0 0
  1. <?php
  2.  
  3. /*
  4.  * Transposh v1.0.4.1
  5.  * http://transposh.org/
  6.  *
  7.  * Copyright 2018, Team Transposh
  8.  * Licensed under the GPL Version 2 or higher.
  9.  * http://transposh.org/license
  10.  *
  11.  * Date: Mon, 31 Dec 2018 13:56:20 +0200
  12.  */
  13.  
  14. require_once("shd/simple_html_dom.php");
  15. require_once("constants.php");
  16. require_once("logging.php");
  17. require_once("utils.php");
  18.  
  19. /**
  20.  * parserstats class - holds parser statistics
  21.  */
  22. class tp_parserstats {
  23.  
  24.     /** @var int Holds the total phrases the parser encountered */
  25.     public $total_phrases;
  26.  
  27.     /** @var int Holds the number of phrases that had translation */
  28.     public $translated_phrases;
  29.  
  30.     /** @var int Holds the number of phrases that had human translation */
  31.     public $human_translated_phrases;
  32.  
  33.     /** @var int Holds the number of phrases that are hidden - yet still somewhat viewable (such as the title attribure) */
  34.     public $hidden_phrases;
  35.  
  36.     /** @var int Holds the number of phrases that are hidden and translated */
  37.     public $hidden_translated_phrases;
  38.  
  39.     /** @var int Holds the amounts of hidden spans created for translation */
  40.     public $hidden_translateable_phrases;
  41.  
  42.     /** @var int Holds the number of phrases that are hidden and probably won't be viewed - such as meta keys */
  43.     public $meta_phrases;
  44.  
  45.     /** @var int Holds the number of translated phrases that are hidden and probably won't be viewed - such as meta keys */
  46.     public $meta_translated_phrases;
  47.  
  48.     /** @var float Holds the time translation took */
  49.     public $time;
  50.  
  51.     /** @var int Holds the time translation started */
  52.     private $start_time;
  53.  
  54.     /**
  55.      * This function is when the object is initialized, which is a good time to start ticking.
  56.      */
  57.     function __construct() {
  58.         $this->start_time = microtime(true);
  59.     }
  60.  
  61.     /**
  62.      * Calculated values - computer translated phrases
  63.      * @return int How many phrases were auto-translated
  64.      */
  65.     function get_computer_translated_phrases() {
  66.         return $this->translated_phrases - $this->human_translated_phrases;
  67.     }
  68.  
  69.     /**
  70.      * Calculated values - missing phrases
  71.      * @return int How many phrases are missing
  72.      */
  73.     function get_missing_phrases() {
  74.         return $this->total_phrases - $this->translated_phrases;
  75.     }
  76.  
  77.     /**
  78.      * Start the timer
  79.      */
  80.     function start_timing() {
  81.         $this->start_time = microtime(true);
  82.     }
  83.  
  84.     /**
  85.      * Stop timing, store time for reference
  86.      */
  87.     function stop_timing() {
  88.         $this->time = number_format(microtime(true) - $this->start_time, 3);
  89.     }
  90.  
  91. }
  92.  
  93. /**
  94.  * Parser class - allows phrase marking and translation with callback functions
  95.  */
  96. class tp_parser {
  97.  
  98.     private $punct_breaks = true;
  99.     private $num_breaks = true;
  100.     private $ent_breaks = true;
  101.     // functions that need to be defined... //
  102.     /** @var function */
  103.     public $url_rewrite_func = null;
  104.  
  105.     /** @var function */
  106.     public $fetch_translate_func = null;
  107.  
  108.     /** @var function */
  109.     public $prefetch_translate_func = null;
  110.  
  111.     /** @var function */
  112.     public $split_url_func = null;
  113.  
  114.     /** @var function */
  115.     public $fix_src_tag_func = null;
  116.  
  117.     /** @var int stores the number of the last used span_id */
  118.     private $span_id = 0;
  119.  
  120.     /** @var simple_html_dom_node Contains the current node */
  121.     private $currentnode;
  122.  
  123.     /** @var simple_html_dom Contains the document dom model */
  124.     private $html;
  125.     // the document
  126.     public $dir_rtl;
  127.  
  128.     /** @var string Contains the iso of the target language */
  129.     public $lang;
  130.  
  131.     /** @var boolean Contains the fact that this language is the default one (only parse other lanaguage spans) */
  132.     public $default_lang = false;
  133.  
  134.     /** @var string Contains the iso of the source language - if a lang attribute is found, assumed to be en by default */
  135.     public $srclang;
  136.     private $inbody = false;
  137.  
  138.     /** @var hold fact that we are in select or other similar elements */
  139.     private $inselect = false;
  140.     public $is_edit_mode;
  141.     public $is_auto_translate;
  142.     public $feed_fix;
  143.  
  144.     /** @var boolean should we attempt to handle page as json */
  145.     public $might_json = false;
  146.     public $allow_ad = false;
  147.     //first three are html, later 3 come from feeds xml (link is problematic...)
  148.     protected $ignore_tags = array('script' => 1, 'style' => 1, 'code' => 1, 'wfw:commentrss' => 1, 'comments' => 1, 'guid' => 1);
  149.  
  150.     /** @var parserstats Contains parsing statistics */
  151.     private $stats;
  152.  
  153.     /** @var boolean Are we inside a translated gettext */
  154.     private $in_get_text = false;
  155.  
  156.     /** @var boolean Are we inside an inner text %s in gettext */
  157.     private $in_get_text_inner = false;
  158.  
  159.     /** @var string Additional header information */
  160.     public $added_header;
  161.  
  162.     /** @var array Contains reference to changable a tags */
  163.     private $atags = array();
  164.  
  165.     /** @var array Contains reference to changable option values */
  166.     private $otags = array();
  167.     public $edit_span_created = false;
  168.  
  169.     /** @var array store all values that may be prefetched */
  170.     private $prefetch_phrases = array();
  171.  
  172.     /**
  173.      * Determine if the current position in buffer is a white space.
  174.      * @param char $char
  175.      * @return boolean true if current position marks a white space
  176.      */
  177.     function is_white_space($char) {
  178.         if (!$char)
  179.             return TRUE;
  180.         return strspn($char, " \t\r\n\0\x0B");
  181.     }
  182.  
  183.     /**
  184.      * Determine if the current position in page points to a character in the
  185.      * range of a-z (case insensetive).
  186.      * @return boolean true if a-z
  187.      */
  188.     function is_a_to_z_character($char) {
  189.         return (($char >= 'a' && $char <= 'z') || ($char >= 'A' && $char <= 'Z')) ? true : false;
  190.     }
  191.  
  192.     /**
  193.      * Determine if the current position is a digit.
  194.      * @return boolean true if a digit
  195.      */
  196.     function is_digit($char) {
  197.         return (($char >= '0' && $char <= '9')) ? true : false;
  198.     }
  199.  
  200.     /**
  201.      * Determine if the current position is an html entity - such as &amp; or &#8220;.
  202.      * @param string $string string to evalute
  203.      * @param int $position where to check for entities
  204.      * @return int length of entity
  205.      */
  206.     function is_html_entity($string, $position) {
  207.         if ($string[$position] == '&') {
  208.             $end_pos = $position + 1;
  209.             while ($string[$end_pos] == '#' || $this->is_digit($string[$end_pos]) || $this->is_a_to_z_character($string[$end_pos]))
  210.                 ++$end_pos;
  211.             if ($string[$end_pos] == ';')
  212.                 return $end_pos - $position + 1;
  213.         }
  214.         return 0;
  215.     }
  216.  
  217.     /**
  218.      * Some entities will not cause a break if they don't have whitespace after them
  219.      * such as Jack`s apple.
  220.      * `uncatagorized` will break on the later entity
  221.      * Added " quotes to this claim, as it is used in some languages in a similar fashion
  222.      * @param string $entity - html entity to check
  223.      * @return boolean true if not a breaker (apostrophy)
  224.      */
  225.     function is_entity_breaker($entity) { // &#8216;&#8217;??
  226.         return !(stripos('&#8216;&#8217;&apos;&quot;&#039;&#39;&rsquo;&lsquo;&rdquo;&ldquo;', $entity) !== FALSE);
  227.     }
  228.  
  229.     /**
  230.      * Some entities are to be regarded as simple letters in most cases
  231.       &Agrave;    &#192;    À      À      latin capital letter A with grave
  232.       &Aacute;    &#193;    Á  Á  latin capital letter A with acute
  233.       &Acirc;     &#194;    Â  Â  latin capital letter A with circumflex
  234.       &Atilde;    &#195;    Ã  Ã  latin capital letter A with tilde
  235.       &Auml;      &#196;    Ä  Ä  latin capital letter A with diaeresis
  236.       &Aring;     &#197;    Å  Å  latin capital letter A with ring above
  237.       &AElig;     &#198;    Æ  Æ  latin capital letter AE
  238.       &Ccedil;    &#199;    Ç  Ç  latin capital letter C with cedilla
  239.       &Egrave;    &#200;    È  È  latin capital letter E with grave
  240.       &Eacute;    &#201;    É  É  latin capital letter E with acute
  241.       &Ecirc;     &#202;    Ê  Ê  latin capital letter E with circumflex
  242.       &Euml;      &#203;    Ë  Ë  latin capital letter E with diaeresis
  243.       &Igrave;    &#204;    Ì  Ì  latin capital letter I with grave
  244.       &Iacute;    &#205;    Í  Í  latin capital letter I with acute
  245.       &Icirc;     &#206;    Î  Î  latin capital letter I with circumflex
  246.       &Iuml;      &#207;    Ï  Ï  latin capital letter I with diaeresis
  247.       &ETH;       &#208;    Ð  Ð  latin capital letter ETH
  248.       &Ntilde;    &#209;    Ñ  Ñ  latin capital letter N with tilde
  249.       &Ograve;    &#210;    Ò  Ò  latin capital letter O with grave
  250.       &Oacute;    &#211;    Ó  Ó  latin capital letter O with acute
  251.       &Ocirc;     &#212;    Ô  Ô  latin capital letter O with circumflex
  252.       &Otilde;    &#213;    Õ  Õ  latin capital letter O with tilde
  253.       &Ouml;      &#214;    Ö  Ö  latin capital letter O with diaeresis
  254.       //&times;     &#215;  ×  ×  multiplication sign
  255.       &Oslash;    &#216;    Ø  Ø  latin capital letter O with stroke
  256.       &Ugrave;    &#217;    Ù  Ù  latin capital letter U with grave
  257.       &Uacute;    &#218;    Ú  Ú  latin capital letter U with acute
  258.       &Ucirc;     &#219;    Û  Û  latin capital letter U with circumflex
  259.       &Uuml;      &#220;    Ü  Ü  latin capital letter U with diaeresis
  260.       &Yacute;    &#221;    Ý  Ý  latin capital letter Y with acute
  261.       &THORN;     &#222;    Þ  Þ  latin capital letter THORN
  262.       &szlig;     &#223;    ß  ß  latin small letter sharp s
  263.       &agrave;    &#224;    à  à  latin small letter a with grave
  264.       &aacute;    &#225;    á  á  latin small letter a with acute
  265.       &acirc;     &#226;    â  â  latin small letter a with circumflex
  266.       &atilde;    &#227;    ã  ã  latin small letter a with tilde
  267.       &auml;      &#228;    ä  ä  latin small letter a with diaeresis
  268.       &aring;     &#229;    å  å  latin small letter a with ring above
  269.       &aelig;     &#230;    æ  æ  latin small letter ae
  270.       &ccedil;    &#231;    ç  ç  latin small letter c with cedilla
  271.       &egrave;    &#232;    è  è  latin small letter e with grave
  272.       &eacute;    &#233;    é  é  latin small letter e with acute
  273.       &ecirc;     &#234;    ê  ê  latin small letter e with circumflex
  274.       &euml;      &#235;    ë  ë  latin small letter e with diaeresis
  275.       &igrave;    &#236;    ì  ì  latin small letter i with grave
  276.       &iacute;    &#237;    í  í  latin small letter i with acute
  277.       &icirc;     &#238;    î  î  latin small letter i with circumflex
  278.       &iuml;      &#239;    ï  ï  latin small letter i with diaeresis
  279.       &eth;       &#240;    ð  ð  latin small letter eth
  280.       &ntilde;    &#241;    ñ  ñ  latin small letter n with tilde
  281.       &ograve;    &#242;    ò  ò  latin small letter o with grave
  282.       &oacute;    &#243;    ó  ó  latin small letter o with acute
  283.       &ocirc;     &#244;    ô  ô  latin small letter o with circumflex
  284.       &otilde;    &#245;    õ  õ  latin small letter o with tilde
  285.       &ouml;      &#246;    ö  ö  latin small letter o with diaeresis
  286.       //&divide;  &#247;    ÷  ÷  division sign
  287.       &oslash;    &#248;    ø  ø  latin small letter o with stroke
  288.       &ugrave;    &#249;    ù  ù  latin small letter u with grave
  289.       &uacute;    &#250;    ú  ú  latin small letter u with acute
  290.       &ucirc;     &#251;    û  û  latin small letter u with circumflex
  291.       &uuml;      &#252;    ü  ü  latin small letter u with diaeresis
  292.       &yacute;    &#253;    ý  ý  latin small letter y with acute
  293.       &thorn;     &#254;    þ  þ  latin small letter thorn
  294.       &yuml;      &#255;    ÿ  ÿ  latin small letter y with diaeresis
  295.  
  296.       Latin-1 extended
  297.       &OElig;     &#338;                        latin capital ligature OE
  298.       &oelig;     &#339;                        latin small ligature oe
  299.       &Scaron;    &#352;                        latin capital letter S with caron
  300.       &scaron;    &#353;                        latin small letter s with caron
  301.       &Yuml;      &#376;                        latin capital letter Y with diaeresis
  302.      */
  303.     function is_entity_letter($entity) {
  304.         tp_logger("checking ($entity) - " . htmlentities($entity), 4);
  305.         $entnum = (int) substr($entity, 2);
  306.         // skip multiply and divide (215, 247)
  307.         if (($entnum >= 192 && $entnum <= 214) || ($entnum >= 216 && $entnum <= 246) || ($entnum >= 248 && $entnum <= 696)) {
  308.             return true;
  309.         }
  310.         $entities = '&Agrave;&Aacute;&Acirc;&Atilde;&Auml;&Aring;&AElig;&Ccedil;&Egrave;&Eacute;&Ecirc;&Euml;&Igrave;&Iacute;&Icirc;&Iuml;&ETH;' .
  311.                 '&Ntilde;&Ograve;&Oacute;&Ocirc;&Otilde;&Ouml;&Oslash;&Ugrave;&Uacute;&Ucirc;&Uuml;&Yacute;&THORN;&szlig;' .
  312.                 '&oslash;&ugrave;&yuml;&oelig;&scaron;&nbsp;';
  313.         return (stripos($entities, $entity) !== FALSE);
  314.     }
  315.  
  316.     /**
  317.      * Determine if the current position in buffer is a sentence breaker, e.g. '.' or ',' .
  318.      * Note html markups are not considered sentence breaker within the scope of this function.
  319.      * @param char $char charcter checked if breaker
  320.      * @param char $nextchar needed for checking if . or - breaks
  321.      * @return int length of breaker if current position marks a break in sentence
  322.      */
  323.     function is_sentence_breaker($char, $nextchar, $nextnextchar) {
  324.         if (($char == '.' || $char == '-') && ($this->is_white_space($nextchar)))
  325.             return 1;
  326.         //,
  327.         if (ord($char) == 239 && ord($nextchar) == 188 && ord($nextnextchar) == 140)
  328.             return 3;
  329.         //。
  330.         if (ord($char) == 227 && ord($nextchar) == 128 && ord($nextnextchar) == 130)
  331.             return 3;
  332.         //、
  333.         if (ord($char) == 227 && ord($nextchar) == 128 && ord($nextnextchar) == 129)
  334.             return 3;
  335.         //;
  336.         if (ord($char) == 239 && ord($nextchar) == 188 && ord($nextnextchar) == 155)
  337.             return 3;
  338.         //:
  339.         if (ord($char) == 239 && ord($nextchar) == 188 && ord($nextnextchar) == 154)
  340.             return 3;
  341.         //∙
  342.         if (ord($char) == 226 && ord($nextchar) == 136 && ord($nextnextchar) == 153)
  343.             return 3;
  344.         //·
  345.         if (ord($char) == 194 && ord($nextchar) == 183)
  346.             return 2;
  347.         return (strpos(',?()[]{}"!:|;' . TP_GTXT_BRK . TP_GTXT_BRK_CLOSER . TP_GTXT_IBRK . TP_GTXT_IBRK_CLOSER, $char) !== false) ? 1 : 0; // TODO: might need to add < and > here
  348.     }
  349.  
  350.     /**
  351.      * Determines if the current position marks the begining of a number, e.g. 123 050-391212232
  352.      * @return int length of number.
  353.      */
  354.     function is_number($page, $position) {
  355.         return strspn($page, '0123456789-+$%#*,.\\/', $position);
  356.     }
  357.  
  358.     /**
  359.      * Create a phrase tag in the html dom tree
  360.      * @param int $start - beginning of phrase in element
  361.      * @param int $end - end of phrase in element
  362.      */
  363.     function tag_phrase($string, $start, $end) {
  364.         $phrase = trim(substr($string, $start, $end - $start));
  365.         $phrasefixed = trim(str_replace('&nbsp;', ' ', $phrase));
  366. //        $logstr = str_replace(array(chr(1),chr(2),chr(3),chr(4)), array('[1]','[2]','[3]','[4]'), $string);
  367. //        tp_logger ("p:$phrasefixed, s:$logstr, st:$start, en:$end, gt:{$this->in_get_text}, gti:{$this->in_get_text_inner}");
  368.         if ($this->in_get_text > $this->in_get_text_inner) {
  369.             tp_logger('not tagging ' . $phrase . ' assumed gettext translated', 4);
  370.             return;
  371.         }
  372.         if ($phrase) {
  373.             tp_logger('tagged phrase: ' . $phrase, 4);
  374.             $node = new simple_html_dom_node($this->html);
  375.             $node->tag = 'phrase';
  376.             $node->parent = $this->currentnode;
  377.             $this->currentnode->nodes[] = $node;
  378.             $node->_[HDOM_INFO_OUTER] = '';
  379.             $node->phrase = $phrasefixed;
  380.             $this->prefetch_phrases[$phrasefixed] = true;
  381.             $node->start = $start;
  382.             $node->len = strlen($phrase);
  383.             if ($this->srclang)
  384.                 $node->srclang = $this->srclang;
  385.             if ($this->inbody)
  386.                 $node->inbody = $this->inbody;
  387.             if ($this->inselect)
  388.                 $node->inselect = true;
  389.         }
  390.     }
  391.  
  392.     /**
  393.      * Breaks strings into substring according to some rules and common sense
  394.      * @param string $string - the string which is "broken" into smaller strings
  395.      */
  396.     function parsetext($string) {
  397.         $pos = 0;
  398.         //  $pos = skip_white_space($string, $pos);
  399.         // skip CDATA in feed_fix mode
  400.         if ($this->feed_fix) {
  401.             if (strpos($string, '<![CDATA[') === 0) {
  402.                 $pos = 9; // CDATA length
  403.                 $string = substr($string, 0, -3); // chop the last ]]>;
  404.             }
  405.         }
  406.  
  407.         $start = $pos;
  408.  
  409.         while ($pos < strlen($string)) {
  410.             // Some HTML entities make us break, almost all but apostrophies
  411.             if ($this->ent_breaks && $len_of_entity = $this->is_html_entity($string, $pos)) {
  412.                 $entity = substr($string, $pos, $len_of_entity);
  413.                 if (($this->is_white_space(@$string[$pos + $len_of_entity]) || $this->is_entity_breaker($entity)) && !$this->is_entity_letter($entity)) {
  414.                     tp_logger("entity ($entity) breaks", 4);
  415.                     $this->tag_phrase($string, $start, $pos);
  416.                     $start = $pos + $len_of_entity;
  417.                 }
  418.                 // skip nbsp starting a phrase
  419.                 tp_logger("entity ($entity)", 4);
  420.                 if ($entity === '&nbsp;' && $start === $pos) {
  421.                     $start = $pos + $len_of_entity;
  422.                 }
  423.                 //skip past entity
  424.                 $pos += $len_of_entity;
  425.             }
  426.             // we have a special case for <> tags which might have came to us (maybe in xml feeds) (we'll skip them...)
  427.             elseif ($string[$pos] == '<') {
  428.                 $this->tag_phrase($string, $start, $pos);
  429.                 while ($string[$pos] != '>' && $pos < strlen($string))
  430.                     $pos++;
  431.                 $pos++;
  432.                 $start = $pos;
  433.             } elseif ($string[$pos] == TP_GTXT_BRK || $string[$pos] == TP_GTXT_BRK_CLOSER) {
  434. //                $logstr = str_replace(array(chr(1),chr(2),chr(3),chr(4)), array('[1]','[2]','[3]','[4]'), $string);
  435. //                $closers = ($string[$pos] == TP_GTXT_BRK) ? '': 'closer';
  436. //                tp_logger(" $closers TEXT breaker $logstr start:$start pos:$pos gt:" . $this->in_get_text, 3);
  437.                 $this->tag_phrase($string, $start, $pos);
  438.                 ($string[$pos] == TP_GTXT_BRK) ? $this->in_get_text += 1 : $this->in_get_text -= 1;
  439.                 $pos++;
  440.                 $start = $pos;
  441.                 // reset state based on string start, no need to flip
  442.                 //$this->in_get_text = ($pos == 1);
  443.                 //if (!$this->in_get_text) $this->in_get_text_inner = false;
  444.             } elseif ($string[$pos] == TP_GTXT_IBRK || $string[$pos] == TP_GTXT_IBRK_CLOSER) {
  445. //                $logstr = str_replace(array(chr(1),chr(2),chr(3),chr(4)), array('[1]','[2]','[3]','[4]'), $string);
  446. //                $closers = ($string[$pos] == TP_GTXT_IBRK) ? '': 'closer';
  447. //                tp_logger("   $closers INNER text breaker $logstr start:$start pos:$pos gt:" . $this->in_get_text_inner, 3);
  448.                 //tp_logger("inner text breaker $start $pos $string " . (($this->in_get_text_inner) ? 'true' : 'false'), 5);
  449.                 $this->tag_phrase($string, $start, $pos);
  450.                 if ($this->in_get_text)
  451.                     ($string[$pos] == TP_GTXT_IBRK) ? $this->in_get_text_inner += 1 : $this->in_get_text_inner -= 1;
  452.                 $pos++;
  453.                 $start = $pos;
  454.                 //$this->in_get_text_inner = !$this->in_get_text_inner;
  455.             }
  456.             // will break translation unit when there's a breaker ",.[]()..."
  457.             elseif ($this->punct_breaks && $senb_len = $this->is_sentence_breaker($string[$pos], @$string[$pos + 1], @$string[$pos + 2])) {
  458. //                logger ("sentence breaker...");
  459.                 $this->tag_phrase($string, $start, $pos);
  460.                 $pos += $senb_len;
  461.                 $start = $pos;
  462.             }
  463.             // Numbers also break, if they are followed by whitespace (or a sentence breaker) (don't break 42nd) // TODO: probably by breaking entities too...
  464.             // also prefixed by whitespace?
  465.             elseif ($this->num_breaks && $num_len = $this->is_number($string, $pos)) {
  466. //                logger ("numnum... $num_len");
  467.                 // this is the case of B2 or B2,
  468.                 if (($start == $pos) || ($this->is_white_space($string[$pos - 1]) || ($this->is_sentence_breaker(@$string[$pos + $num_len - 1], @$string[$pos + $num_len], @$string[$pos + $num_len + 1]))) &&
  469.                         ($this->is_white_space(@$string[$pos + $num_len]) || $this->is_sentence_breaker(@$string[$pos + $num_len], @$string[$pos + $num_len + 1], @$string[$pos + $num_len + 2]))) {
  470.                     // we will now compensate on the number followed by breaker case, if we need to
  471. //                            logger ("compensate part1?");
  472.                     if (!(($start == $pos) || $this->is_white_space($string[$pos - 1]))) {
  473. //                            logger ("compensate part2?");
  474.                         if ($this->is_sentence_breaker($string[$pos + $num_len - 1], @$string[$pos + $num_len], @$string[$pos + $num_len + 1])) {
  475. //                            logger ("compensate 3?");
  476.                             $num_len--; //this makes the added number shorter by one, and the pos will be at a sentence breaker next so we don't have to compensate
  477.                         }
  478.                         $pos += $num_len;
  479.                         $num_len = 0; // we have already added this
  480.                     }
  481.                     $this->tag_phrase($string, $start, $pos);
  482.                     $start = $pos + $num_len /* +1 */;
  483.                 }
  484.                 $pos += $num_len/* + 1 */;
  485. //                logger ("numnumpos... $pos");
  486.             } else {
  487.                 // smarter marking of start location
  488.                 if ($start == $pos && $this->is_white_space($string[$pos]))
  489.                     $start++;
  490.                 $pos++;
  491.             }
  492.         }
  493.  
  494.         // the end is also some breaker
  495.         if ($pos > $start) {
  496.             $this->tag_phrase($string, $start, $pos);
  497.         }
  498.     }
  499.  
  500.     /**
  501.      * This recursive function works on the $html dom and adds phrase nodes to translate as needed
  502.      * it currently also rewrites urls, and should consider if this is smart
  503.      * @param simple_html_dom_node $node
  504.      */
  505.     function translate_tagging($node, $level = 0) {
  506.         $this->currentnode = $node;
  507.         // we don't want to translate non-translatable classes
  508.         if (stripos($node->class, NO_TRANSLATE_CLASS) !== false || stripos($node->class, NO_TRANSLATE_CLASS_GOOGLE) !== false)
  509.             return;
  510.  
  511.         // the node lang is the current node lang or its parent lang
  512.         if ($node->lang) {
  513.             // allow nesting of srclang (again - local var)
  514.             $prevsrclang = $this->srclang;
  515.             $this->srclang = strtolower($node->lang);
  516.             // using a local variable scope for later
  517.             $src_set_here = true;
  518.             // eliminate the lang tag from the html, since we aim to translate it
  519.             unset($node->lang);
  520.         }
  521.  
  522.         // we can only do translation for elements which are in the body, not in other places, and this must
  523.         // move here due to the possibility of early recurse in default language
  524.         if ($node->tag == 'body') {
  525.             $this->inbody = true;
  526.         }
  527.  
  528.         // this again should be here, the different behaviour on select and textarea
  529.         // for now - we assume that they can't include each other
  530.         elseif ($node->tag == 'select' || $node->tag == 'textarea' || $node->tag == 'noscript') {
  531.             $this->inselect = true;
  532.             $inselect_set_here = true;
  533.         }
  534.  
  535.         //support only_thislanguage class, (nulling the node if it should not display)
  536.         if (isset($src_set_here) && $src_set_here && $this->srclang != $this->lang && stripos($node->class, ONLY_THISLANGUAGE_CLASS) !== false) {
  537.             $this->srclang = $prevsrclang; //we should return to the previous src lang or it will be kept and carried
  538.             $node->outertext = '';
  539.             return;
  540.         }
  541.  
  542.         // if we are in the default lang, and we have no foreign langs classes, we'll recurse from here
  543.         // we also avoid processing if the node lang is the target lang
  544.         if (($this->default_lang && !$this->srclang) || ($this->srclang === $this->lang)) {
  545.             foreach ($node->nodes as $c) {
  546.                 $this->translate_tagging($c, $level + 1);
  547.             }
  548.             if (isset($src_set_here) && $src_set_here)
  549.                 $this->srclang = $prevsrclang;
  550.             if (isset($inselect_set_here) && $inselect_set_here)
  551.                 $this->inselect = false;
  552.             return;
  553.         }
  554.  
  555.         if (isset($this->ignore_tags[$node->tag]))
  556.             return;
  557.  
  558.         if ($node->tag == 'text') {
  559.             // this prevents translation of a link that just surrounds its address
  560.             if ($node->parent->tag == 'a' && $node->parent->href == $node->outertext) {
  561.                 return;
  562.             }
  563.             // link tags inners are to be ignored
  564.             if ($node->parent->tag == 'link') {
  565.                 return;
  566.             }
  567.             if (trim($node->outertext)) {
  568.                 $this->parsetext($node->outertext);
  569.             }
  570.         }
  571.         // for anchors we will rewrite urls if we can
  572.         elseif ($node->tag == 'a') {
  573.             array_push($this->atags, $node);
  574.         }
  575.         // same for options, although normally not required (ticket #34)
  576.         elseif ($node->tag == 'option') {
  577.             array_push($this->otags, $node);
  578.         }
  579.         // in submit type inputs, we want to translate the value
  580.         elseif ($node->tag == 'input' && $node->type == 'submit') {
  581.             $this->parsetext($node->value);
  582.         }
  583.         // for iframes we will rewrite urls if we can
  584.         elseif ($node->tag == 'iframe') {
  585.             if ($this->url_rewrite_func) {
  586.                 $node->src = call_user_func_array($this->url_rewrite_func, array($node->src));
  587.                 tp_logger('iframe: ' . $node->src, 4);
  588.             }
  589.         }
  590.  
  591.         // titles and placeholders are also good places to translate, exist in a, img, abbr, acronym
  592.         if ($node->title) {
  593.             $this->parsetext($node->title);
  594.         }
  595.         if ($node->placeholder) {
  596.             $this->parsetext($node->placeholder);
  597.         }
  598.         if ($node->alt) {
  599.             $this->parsetext($node->alt);
  600.         }
  601.  
  602.         // Meta content (keywords, description) are also good places to translate (but not in robots... or http-equiv)
  603.         if ($node->tag == 'meta' && $node->content && ($node->name != 'robots') && ($node->name != 'viewport') && ($node->{'http-equiv'} != 'Content-Type'))
  604.             $this->parsetext($node->content);
  605.  
  606.         // recurse
  607.         foreach ($node->nodes as $c) {
  608.             $this->translate_tagging($c, $level + 1);
  609.         }
  610.         if (isset($src_set_here) && $src_set_here)
  611.             $this->srclang = $prevsrclang;
  612.         if (isset($inselect_set_here) && $inselect_set_here)
  613.             $this->inselect = false;
  614.     }
  615.  
  616.     /**
  617.      * Creates a span used in translation and editing
  618.      * @param string $original_text
  619.      * @param string $translated_text
  620.      * @param int $source (Either "0" for Human, "1" for Machine or "" for untouched)
  621.      * @param boolean $for_hidden_element
  622.      * @param string $src_lang - if source lang of element is different that default (eg. wrapped in lang="xx" attr)
  623.      * @return string
  624.      */
  625.     function create_edit_span($original_text, $translated_text, $source, $for_hidden_element = false, $src_lang = '') {
  626.         // Use base64 encoding to make that when the page is translated (i.e. update_translation) we
  627.         // get back exactlly the same string without having the client decode/encode it in anyway.
  628.         $this->edit_span_created = true;
  629.         $span = '<span class ="' . SPAN_PREFIX . '" id="' . SPAN_PREFIX . $this->span_id . '" data-source="' . $source . '"';
  630.         //$span = '<span class ="' . SPAN_PREFIX . '" id="' . SPAN_PREFIX . $this->span_id . '" data-token="' . transposh_utils::base64_url_encode($original_text) . '" data-source="' . $source . '"';
  631.         // if we have a source language
  632.         if ($src_lang) {
  633.             $span .= ' data-srclang="' . $src_lang . '"';
  634.         }
  635.         // since orig replaces token too
  636.         $span .= ' data-orig="' . $original_text . '"';
  637.         // those are needed for hidden elements translations
  638.         if ($for_hidden_element) {
  639.             $span .= ' data-hidden="y"';
  640.             // hidden elements currently have issues figuring what they translated in the JS
  641.             if ($translated_text != null) {
  642.                 $span .= ' data-trans="' . $translated_text . '"';
  643.             }
  644.         }
  645.         $span .= '>';
  646.         if (!$for_hidden_element) {
  647.             if ($translated_text)
  648.                 $span .= $translated_text;
  649.             else
  650.                 $span .= $original_text;
  651.         }
  652.         $span .= '</span>';
  653.         ++$this->span_id;
  654.         return $span;
  655.     }
  656.  
  657.     /**
  658.      * This function does some ad replacement for transposh benefit
  659.      */
  660.     function do_ad_switch() {
  661.         if (isset($this->html->noise) && is_array($this->html->noise)) {
  662.             foreach ($this->html->noise as $key => $value) {
  663.                 if (strpos($value, 'google_ad_client') !== false) {
  664.                     $publoc = strpos($value, 'pub-');
  665.                     $sufloc = strpos($value, '"', $publoc);
  666.                     if (!$sufloc)
  667.                         $sufloc = strpos($value, "'", $publoc);
  668.                     echo $publoc . ' ' . $sufloc;
  669.                     if ($publoc && $sufloc)
  670.                         $this->html->noise[$key] = substr($value, 0, $publoc) . 'pub-7523823497771676' . substr($value, $sufloc);
  671.                 }
  672.             }
  673.         }
  674.         // INS TAGS
  675.         foreach ($this->html->find('ins') as $e) {
  676.             $e->{'data-ad-client'} = 'ca-pub-7523823497771676';
  677.         }
  678.     }
  679.  
  680.  
  681.     /**
  682.      * Allow changing of parsing rules, yeah, I caved
  683.      * @param type $puncts
  684.      * @param type $numbers
  685.      * @param type $entities
  686.      */
  687.     function change_parsing_rules($puncts, $numbers, $entities) {
  688.         $this->punct_breaks = $puncts;
  689.         $this->num_breaks = $numbers;
  690.         $this->ent_breaks = $entities;
  691.     }
  692.  
  693.     /**
  694.      * Main function - actually translates a given HTML
  695.      * @param string $string containing HTML
  696.      * @return string Translated content is here
  697.      */
  698.     function fix_html($string) {
  699.         // ready our stats
  700.         $this->stats = new tp_parserstats();
  701.         // handler for possible json (buddypress)
  702.         if ($this->might_json) {
  703.             if ($string[0] == '{') {
  704.                 $jsoner = json_decode($string);
  705.                 if ($jsoner != null) {
  706.                     tp_logger("json detected (buddypress?)", 4);
  707.                     // currently we only handle contents (which buddypress heavily use)
  708.                     if ($jsoner->contents) {
  709.                         $jsoner->contents = $this->fix_html($jsoner->contents);
  710.                     }
  711.                     if ($jsoner->fragments->{'div.widget_shopping_cart_content'}) {
  712.                         $jsoner->fragments->{'div.widget_shopping_cart_content'} = $this->fix_html($jsoner->fragments->{'div.widget_shopping_cart_content'});
  713.                     }
  714.                     if ($jsoner->fragments->{'div.kt-header-mini-cart-refreash'}) {
  715.                         $jsoner->fragments->{'div.kt-header-mini-cart-refreash'} = $this->fix_html($jsoner->fragments->{'div.kt-header-mini-cart-refreash'});
  716.                     }
  717.                     if ($jsoner->fragments->{'a.cart-contents'}) {
  718.                         $jsoner->fragments->{'a.cart-contents'} = $this->fix_html($jsoner->fragments->{'a.cart-contents'});
  719.                     }
  720.                     if ($jsoner->fragments->{'.woocommerce-checkout-review-order-table'}) {
  721.                         $jsoner->fragments->{'.woocommerce-checkout-review-order-table'} = $this->fix_html($jsoner->fragments->{'.woocommerce-checkout-review-order-table'});
  722.                     }
  723.                     if ($jsoner->fragments->{'.woocommerce-checkout-payment'}) {
  724.                         $jsoner->fragments->{'.woocommerce-checkout-payment'} = $this->fix_html($jsoner->fragments->{'.woocommerce-checkout-payment'});
  725.                     }
  726.                     return json_encode($jsoner); // now any attempted json will actually return a json
  727.                 }
  728.             }
  729.         }
  730.  
  731.         // create our dom
  732.         $string = str_replace(chr(0xC2) . chr(0xA0), ' ', $string); // annoying NBSPs?
  733.         $this->html = str_get_html($string, false); // false for RSS?
  734.         //$this->stats->do_timing();
  735.         //Log::info("Stats Build dom:" . $this->stats->time);
  736.         // mark translateable elements
  737.         if ($this->html->find('html', 0))
  738.             $this->html->find('html', 0)->lang = ''; // Document defined lang may be preset to correct lang, but should be ignored TODO: Better?
  739.         $this->translate_tagging($this->html->root);
  740.         //$this->stats->do_timing();
  741.         //Log::info("Stats Done tagging:" . $this->stats->time);
  742.         // first fix the html tag itself - we might need to to the same for all such attributes with flipping
  743.         if ($this->html->find('html', 0)) {
  744.             if ($this->dir_rtl)
  745.                 $this->html->find('html', 0)->dir = 'rtl';
  746.             else
  747.                 $this->html->find('html', 0)->dir = 'ltr';
  748.         }
  749.  
  750.         if ($this->lang) {
  751.             if ($this->html->find('html', 0))
  752.                 $this->html->find('html', 0)->lang = $this->lang;
  753.             // add support for <meta name="language" content="<lang>">
  754.             if ($this->html->find('meta[name=language]')) {
  755.                 @$this->html->find('meta[name=language]')->content = $this->lang;
  756.             }
  757.         }
  758.  
  759.         // not much point in further processing if we don't have a function that does it
  760.         if ($this->fetch_translate_func == null) {
  761.             return $this->html;
  762.         }
  763.  
  764.         // fix feed
  765.         if ($this->feed_fix) {
  766.             // fix urls on feed
  767.             tp_logger('fixing rss feed', 3);
  768.             foreach (array('link', 'wfw:commentrss', 'comments') as $tag) {
  769.                 foreach ($this->html->find($tag) as $e) {
  770.                     $e->innertext = htmlspecialchars(call_user_func_array($this->url_rewrite_func, array($e->innertext)));
  771.                     // no need to translate anything here
  772.                     unset($e->nodes);
  773.                 }
  774.             }
  775.             // guid is not really a url -- in some future, we can check if permalink is true and probably falsify it
  776.             foreach ($this->html->find('guid') as $e) {
  777.                 $e->innertext = $e->innertext . '-' . $this->lang;
  778.                 unset($e->nodes);
  779.             }
  780.             // fix feed language
  781.             @$this->html->find('language', 0)->innertext = $this->lang;
  782.             unset($this->html->find('language', 0)->nodes);
  783.         } else {
  784.             // since this is not a feed, we might have references to such in the <link rel="alternate">
  785.             foreach ($this->html->find('link') as $e) {
  786.                 if (strcasecmp($e->rel, 'alternate') == 0 || strcasecmp($e->rel, 'canonical') == 0) {
  787.                     if (!$e->hreflang)
  788.                         $e->href = call_user_func_array($this->url_rewrite_func, array($e->href));
  789.                 }
  790.             }
  791.         }
  792.  
  793.         // try some prefetching... (//todo - maybe move directly to the phrase create)
  794. //        $originals = array();
  795.         if ($this->prefetch_translate_func != null) {
  796.             /*          foreach ($this->html->find('text') as $e) {
  797.               foreach ($e->nodes as $ep) {
  798.               if ($ep->phrase) $originals[$ep->phrase] = true;
  799.               }
  800.               }
  801.               foreach (array('title', 'value', 'placeholder', 'alt') as $title) {
  802.               foreach ($this->html->find('[' . $title . ']') as $e) {
  803.               if (isset($e->nodes))
  804.               foreach ($e->nodes as $ep) {
  805.               if ($ep->phrase) $originals[$ep->phrase] = true;
  806.               }
  807.               }
  808.               }
  809.               foreach ($this->html->find('[content]') as $e) {
  810.               foreach ($e->nodes as $ep) {
  811.               if ($ep->phrase) $originals[$ep->phrase] = true;
  812.               }
  813.               } */
  814.             // if we should split, we will split some urls for translation prefetching
  815.             if ($this->split_url_func != null) {
  816.                 foreach ($this->atags as $e) {
  817.                     foreach (call_user_func_array($this->split_url_func, array($e->href)) as $part) {
  818.                         $this->prefetch_phrases[$part] = true;
  819.                     }
  820.                 }
  821.                 foreach ($this->otags as $e) {
  822.                     foreach (call_user_func_array($this->split_url_func, array($e->value)) as $part) {
  823.                         $this->prefetch_phrases[$part] = true;
  824.                     }
  825.                 }
  826.             }
  827.             call_user_func_array($this->prefetch_translate_func, array($this->prefetch_phrases, $this->lang));
  828.         }
  829.  
  830.         //fix urls more
  831.         // WORK IN PROGRESS
  832.         /* foreach ($this->atags as $e) {
  833.           $hrefspans = '';
  834.           foreach (call_user_func_array($this->split_url_func, array($e->href)) as $part) {
  835.           // fix - not for dashes
  836.           list ($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($part, $this->lang));
  837.           $hrefspans .= $this->create_edit_span($part, $translated_text, $source, true);
  838.           }
  839.           $e->href = call_user_func_array($this->url_rewrite_func, array($e->href));
  840.           $e->outertext .= $hrefspans;
  841.           } */
  842.  
  843.         // fix src for items
  844.         if ($this->fix_src_tag_func !== null) {
  845.             foreach ($this->html->find('[src]') as $e) {
  846.                 $e->src = call_user_func_array($this->fix_src_tag_func, array($e->src));
  847.             }
  848.  
  849.             foreach ($this->html->find('link') as $e) {
  850.                 $e->href = call_user_func_array($this->fix_src_tag_func, array($e->href));
  851.             }
  852.         }
  853.  
  854.         // fix urls...
  855.         foreach ($this->atags as $e) {
  856.             if ($e->href)
  857.                 $e->href = call_user_func_array($this->url_rewrite_func, array($e->href));
  858.         }
  859.         foreach ($this->otags as $e) {
  860.             if ($e->value)
  861.                 $e->value = call_user_func_array($this->url_rewrite_func, array($e->value));
  862.         }
  863.  
  864.         // this is used to reserve spans we cannot add directly (out of body, metas, etc)
  865.         $hiddenspans = '';
  866.         $savedspan = '';
  867.  
  868.         // actually translate tags
  869.         // texts are first
  870.         foreach ($this->html->find('text') as $e) {
  871.             $replace = array();
  872.             foreach ($e->nodes as $ep) {
  873.                 list ($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($ep->phrase, $this->lang));
  874.                 //stats
  875.                 $this->stats->total_phrases++;
  876.                 if ($translated_text) {
  877.                     $this->stats->translated_phrases++;
  878.                     if ($source == 0)
  879.                         $this->stats->human_translated_phrases++;
  880.                 }
  881.                 if (($this->is_edit_mode || ($this->is_auto_translate && $translated_text == null))/* && $ep->inbody */) {
  882.                     if ($ep->inselect) {
  883.                         $savedspan .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang);
  884.                     } elseif (!$ep->inbody) {
  885.                         $hiddenspans .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang);
  886.                     } else {
  887.                         $translated_text = $this->create_edit_span($ep->phrase, $translated_text, $source, false, $ep->srclang);
  888.                     }
  889.                 }
  890.                 // store replacements
  891.                 if ($translated_text) {
  892.                     $replace[] = array($translated_text, $ep);
  893.                 }
  894.             }
  895.             // do replacements in reverse
  896.             foreach (array_reverse($replace) as $epag) {
  897.                 list($replacetext, $epg) = $epag;
  898.                 $e->outertext = substr_replace($e->outertext, $replacetext, $epg->start, $epg->len);
  899.             }
  900.  
  901.             // this adds saved spans to the first not in select element which is in the body
  902.             if ($e->nodes && !$ep->inselect && $savedspan && $ep->inbody) { // (TODO: might not be...?)
  903.                 $e->outertext = $savedspan . $e->outertext;
  904.                 $savedspan = '';
  905.             }
  906.         }
  907.  
  908.         // now we handle the title attributes (and the value of submit buttons)
  909.         $hidden_phrases = array();
  910.         foreach (array('title', 'value', 'placeholder', 'alt') as $title) {
  911.             foreach ($this->html->find('[' . $title . ']') as $e) {
  912.                 $replace = array();
  913.                 $span = '';
  914.                 // when we already have a parent outertext we'll have to update it directly
  915.                 if (isset($e->parent->_[HDOM_INFO_OUTER])) {
  916.                     $saved_outertext = $e->outertext;
  917.                 }
  918.                 tp_logger("$title-original: $e->$title}", 4);
  919.                 if (isset($e->nodes))
  920.                     foreach ($e->nodes as $ep) {
  921.                         if ($ep->tag == 'phrase') {
  922.                             list ($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($ep->phrase, $this->lang));
  923.                             // more stats
  924.                             $this->stats->total_phrases++;
  925.                             if ($ep->inbody)
  926.                                 $this->stats->hidden_phrases++;
  927.                             else
  928.                                 $this->stats->meta_phrases++;
  929.                             if ($translated_text) {
  930.                                 $this->stats->translated_phrases++;
  931.                                 if ($ep->inbody)
  932.                                     $this->stats->hidden_translated_phrases++;
  933.                                 else
  934.                                     $this->stats->meta_translated_phrases++;
  935.                                 if ($source == 0)
  936.                                     $this->stats->human_translated_phrases++;
  937.                             }
  938.                             if (($this->is_edit_mode || ($this->is_auto_translate && $translated_text == null)) && $ep->inbody) {
  939.                                 // prevent duplicate translation (title = text)
  940.                                 if (strpos($e->innertext, $ep->phrase /* Transposh_utils::base64_url_encode($ep->phrase) */) === false) {
  941. //                                if (strpos($e->innertext, transposh_utils::base64_url_encode($ep->phrase)) === false) {
  942.                                     //no need to translate span the same hidden phrase more than once
  943.                                     if (!in_array($ep->phrase, $hidden_phrases)) {
  944.                                         $this->stats->hidden_translateable_phrases++;
  945.                                         $span .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang);
  946.                                         //    logger ($span);
  947.                                         $hidden_phrases[] = $ep->phrase;
  948.                                     }
  949.                                 }
  950.                             }
  951.                             // if we need to replace, we store this
  952.                             if ($translated_text) {
  953.                                 $replace[$translated_text] = $ep;
  954.                             }
  955.                         }
  956.                     }
  957.                 // and later replace
  958.                 foreach (array_reverse($replace, true) as $replace => $epg) {
  959.                     $e->$title = substr_replace($e->$title, $replace, $epg->start, $epg->len);
  960.                 }
  961.  
  962.                 $e->outertext .= $span;
  963.                 // this is where we update in the outercase issue
  964.                 if (isset($e->parent->_[HDOM_INFO_OUTER])) {
  965.                     $e->parent->outertext = implode($e->outertext, explode($saved_outertext, $e->parent->outertext, 2));
  966.                 }
  967.             }
  968.         }
  969.  
  970.         // now we handle the meta content - which is simpler because they can't be edited or auto-translated in place
  971.         // we also don't expect any father modifications here
  972.         // so we now add all those spans right before the <body> tag end
  973.         foreach ($this->html->find('[content]') as $e) {
  974.             $right = '';
  975.             $newtext = '';
  976.  
  977.             foreach ($e->nodes as $ep) {
  978.                 if ($ep->tag == 'phrase') {
  979.                     // even more stats
  980.                     $this->stats->total_phrases++;
  981.                     $this->stats->meta_phrases++;
  982.                     list ($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($ep->phrase, $this->lang));
  983.                     if ($translated_text) {
  984.                         $this->stats->translated_phrases++;
  985.                         $this->stats->meta_translated_phrases++;
  986.                         if ($source == 0)
  987.                             $this->stats->human_translated_phrases++;
  988.                         list ($left, $right) = explode($ep->phrase, $e->content, 2);
  989.                         $newtext .= $left . $translated_text;
  990.                         $e->content = $right;
  991.                     }
  992.                     if ($this->is_edit_mode) {
  993.                         $hiddenspans .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang);
  994.                     }
  995.                     if (!$translated_text && $this->is_auto_translate && !$this->is_edit_mode) {
  996.                         tp_logger('untranslated meta for ' . $ep->phrase . ' ' . $this->lang);
  997.                         if ($this->is_edit_mode || $this->is_auto_translate) { // FIX
  998.                         }
  999.                     }
  1000.                 }
  1001.             }
  1002.             if ($newtext) {
  1003.                 $e->content = $newtext . $right;
  1004.                 tp_logger("content-phrase: $newtext", 4);
  1005.             }
  1006.         }
  1007.  
  1008.         if ($hiddenspans) {
  1009.             $body = $this->html->find('body', 0);
  1010.             if ($body != null)
  1011.                 $body->lastChild()->outertext .= $hiddenspans;
  1012.         }
  1013.        
  1014.             // we might show an ad for transposh in some cases
  1015.             if (($this->allow_ad && !$this->default_lang && mt_rand(1, 100) > 95) || // 5 of 100 for translated non default language pages
  1016.                     ($this->allow_ad && $this->default_lang && mt_rand(1, 100) > 99) || // 1 of 100 for translated default languages pages
  1017.                     (!$this->allow_ad && mt_rand(1, 1000) > 999)) { // 1 of 1000 otherwise
  1018.                 $this->do_ad_switch();
  1019.             }
  1020.        
  1021.         // This adds a meta tag with our statistics json-encoded inside...
  1022. //      $this->stats->do_timing();
  1023. //        Log::info("Stats Done:" . $this->stats->time);
  1024.  
  1025.         $head = $this->html->find('head', 0);
  1026.         if ($this->edit_span_created) {
  1027.             if ($head != null) {
  1028.                 $head->lastChild()->outertext .= $this->added_header;
  1029.             }
  1030.         }
  1031.         //exit;
  1032.         if ($head != null)
  1033.             $head->lastChild()->outertext .= "\n<meta name=\"translation-stats\" content='" . json_encode($this->stats) . "'/>";
  1034.  
  1035.         // we make sure that the result is clear from our shananigans
  1036.         return str_replace(array(TP_GTXT_BRK, TP_GTXT_IBRK, TP_GTXT_BRK_CLOSER, TP_GTXT_IBRK_CLOSER), '', $this->html->outertext);
  1037.         // Changed because of places where tostring failed
  1038.         //return $this->html;
  1039.         //return $this->html->outertext;
  1040.     }
  1041.  
  1042.     /**
  1043.      * This functions returns a list of phrases from a given HTML string
  1044.      * @param string $string Html with phrases to extract
  1045.      * @return array List of phrases (or an empty one)
  1046.      * @since 0.3.5
  1047.      */
  1048.     function get_phrases_list($string) {
  1049.         $result = array();
  1050.         // create our dom
  1051.         $this->html = str_get_html('<span lang="xx">' . $string . '</span>');
  1052.         // mark translateable elements
  1053.         $this->translate_tagging($this->html->root);
  1054.         foreach ($this->html->nodes as $ep) {
  1055.             if ($ep->tag == 'phrase') {
  1056.                 $result[$ep->phrase] = $ep->phrase;
  1057.             }
  1058.         }
  1059.         return $result;
  1060.     }
  1061.  
  1062. }
  1063.  
  1064. ?>
Add Comment
Please, Sign In to add comment