Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. /**
  2.  * Checks to see if a string is utf8 encoded.
  3.  *
  4.  * NOTE: This function checks for 5-Byte sequences, UTF8
  5.  *       has Bytes Sequences with a maximum length of 4.
  6.  *
  7.  * @author bmorel at ssi dot fr (modified)
  8.  * @since 1.2.1
  9.  *
  10.  * @param string $str The string to be checked
  11.  * @return bool True if $str fits a UTF-8 model, false otherwise.
  12.  */
  13. function seems_utf8($str) {
  14.     $length = strlen($str);
  15.     for ($i=0; $i < $length; $i++) {
  16.         $c = ord($str[$i]);
  17.         if ($c < 0x80) $n = 0; # 0bbbbbbb
  18.         elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
  19.         elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
  20.         elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
  21.         elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
  22.         elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
  23.         else return false; # Does not match any model
  24.         for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  25.             if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
  26.                 return false;
  27.         }
  28.     }
  29.     return true;
  30. }