Advertisement
roniewill

Migrando Dados NΓ£o-UTF-8 Para UTF-8

Sep 12th, 2017
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.04 KB | None | 0 0
  1. $regex = <<<'END'
  2. /
  3.   (
  4.     (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
  5.     |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
  6.     |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
  7.     |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
  8.     ){1,100}                      # ...one or more times
  9.   )
  10. | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
  11. | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
  12. /x
  13. END;
  14.  
  15. function utf8replacer($captures) {
  16.   if ($captures[1] != "") {
  17.     // Valid byte sequence. Return unmodified.
  18.     return $captures[1];
  19.   }
  20.   elseif ($captures[2] != "") {
  21.     // Invalid byte of the form 10xxxxxx.
  22.     // Encode as 11000010 10xxxxxx.
  23.     return "\xC2".$captures[2];
  24.   }
  25.   else {
  26.     // Invalid byte of the form 11xxxxxx.
  27.     // Encode as 11000011 10xxxxxx.
  28.     return "\xC3".chr(ord($captures[3])-64);
  29.   }
  30. }
  31. preg_replace_callback($regex, "utf8replacer", $text);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement