Advertisement
Guest User

Untitled

a guest
Mar 20th, 2017
266
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.01 KB | None | 0 0
  1. $regex = <<<'END'
  2. /
  3.   (
  4.     (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
  5.     |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
  6.     |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
  7.     |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
  8.     ){1,100}                      # ...one or more times
  9.   )
  10. | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
  11. | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
  12. /x
  13. END;
  14. $code = preg_replace_callback($regex, function($captures){if ($captures[1] != "") {
  15.     // Valid byte sequence. Return unmodified.
  16.     return $captures[1];
  17.   }
  18.   elseif ($captures[2] != "") {
  19.     // Invalid byte of the form 10xxxxxx.
  20.     // Encode as 11000010 10xxxxxx.
  21.     return "\xC2".$captures[2];
  22.   }
  23.   else {
  24.     // Invalid byte of the form 11xxxxxx.
  25.     // Encode as 11000011 10xxxxxx.
  26.     return "\xC3".chr(ord($captures[3])-64);
  27.   }}, $code);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement