Advertisement
petschko

UTF-8 Function class

Jul 22nd, 2015
299
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 14.78 KB | None | 0 0
  1. <?php
  2. /**
  3.  * Author: Peter Dragicevic [peter-91@hotmail.de]
  4.  * Authors-Website: http://petschko.org/
  5.  * Licence: http://creativecommons.org/licenses/by-sa/4.0/
  6.  * Date: 14.07.2015
  7.  * Time: 10:38
  8.  * Update: 09.04.2016
  9.  * Version: 1.0.7 (Changed Class-Name & Website)
  10.  *
  11.  * Notice: -
  12.  */
  13.  
  14. // Initials this class on include - disable these lines if you want to check UserData by yourself
  15. header('Content-Type: text/html; charset=utf-8');
  16. mb_internal_encoding('UTF-8');
  17. new utf8();
  18. // ------------------------------
  19.  
  20. /**
  21.  * Class Utf8
  22.  *
  23.  * Single-Instance-Class
  24.  */
  25. class Utf8 {
  26.     private static $init = false; // Don't touch!
  27.     // Setup stuff here
  28.     private static $useEntities = false; // Use HTML-Entities instead of HTML-SpecialCharacters (Config does not affect, if check is turned off)
  29.     private static $checkUserInput = true; // Pre-check User-Input (Recommend!)
  30.  
  31.     /**
  32.      * Initial this Single-Instance Class and pre-checks/Escape all User-Input-Fields
  33.      */
  34.     public function __construct() {
  35.         if(! self::isInit()) {
  36.             // Check if all MultiByte (mb) functions are available
  37.             self::checkMultiByteFunctions();
  38.  
  39.             // Check User-Data?
  40.             if(self::isCheckUserInput()) {
  41.  
  42.                 if(self::isUseEntities()) {
  43.                     // Remove unknown chars from User-Data and convert html chars to HTML-Entities
  44.                     $_POST = self::arr_htmlentities(self::arr_removeNonUtf8($_POST));
  45.                     $_GET = self::arr_htmlentities(self::arr_removeNonUtf8($_GET));
  46.                     $_SERVER = self::arr_htmlentities(self::arr_removeNonUtf8($_SERVER));
  47.                     $_REQUEST = self::arr_htmlentities(self::arr_removeNonUtf8($_REQUEST));
  48.                     $_FILES = self::arr_htmlentities(self::arr_removeNonUtf8($_FILES));
  49.  
  50.                     // Cookies
  51.                     if(! class_exists('cookie'))
  52.                         $_COOKIE = self::arr_htmlentities(self::arr_removeNonUtf8($_COOKIE));
  53.                     else if(cookie::getIsAllowed())
  54.                         $_COOKIE = self::arr_htmlentities(self::arr_removeNonUtf8($_COOKIE));
  55.                     else {
  56.                         // Anyway check Master-Cookie
  57.                         if(! isset($_COOKIE[cookie::getMasterCookieName()]))
  58.                             $_COOKIE[cookie::getMasterCookieName()] = '';
  59.  
  60.                         $_COOKIE[cookie::getMasterCookieName()] = self::htmlentities(self::removeNonUtf8($_COOKIE[cookie::getMasterCookieName()]));
  61.                     }
  62.  
  63.                 } else {
  64.                     // Remove unknown chars from User-Data and convert html chars to HTML-SpecialCharacters
  65.                     $_POST = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_POST));
  66.                     $_GET = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_GET));
  67.                     $_SERVER = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_SERVER));
  68.                     $_REQUEST = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_REQUEST));
  69.                     $_FILES = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_FILES));
  70.  
  71.                     // Cookies
  72.                     if(! class_exists('cookie'))
  73.                         $_COOKIE = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_COOKIE));
  74.                     else if(cookie::getIsAllowed())
  75.                         $_COOKIE = self::arr_htmlspecialchars(self::arr_removeNonUtf8($_COOKIE));
  76.                     else {
  77.                         // Anyway check Master-Cookie
  78.                         if(! isset($_COOKIE[cookie::getMasterCookieName()]))
  79.                             $_COOKIE[cookie::getMasterCookieName()] = '';
  80.  
  81.                         $_COOKIE[cookie::getMasterCookieName()] = self::htmlspecialchars(self::removeNonUtf8($_COOKIE[cookie::getMasterCookieName()]));
  82.                     }
  83.                 }
  84.             }
  85.  
  86.             // Done. set initials to true
  87.             self::setInit(true);
  88.         }
  89.     }
  90.    
  91.     /**
  92.      * Show if class is initialed
  93.      *
  94.      * @return boolean - show if class is initialed | true = yes | false = no
  95.      */
  96.     public static function isInit() {
  97.         return self::$init;
  98.     }
  99.  
  100.     /**
  101.      * Set if class is initialed
  102.      *
  103.      * @param boolean $init - set if class is initialed | true = yes | false = no
  104.      */
  105.     private static function setInit($init) {
  106.         self::$init = $init;
  107.     }
  108.  
  109.     /**
  110.      * Shows if pre-check should use HTML-entities or HTML-SpecialCharacters
  111.      *
  112.      * @return boolean - use entities instead of special characters? true = yes | false = no
  113.      */
  114.     public static function isUseEntities() {
  115.         return self::$useEntities;
  116.     }
  117.  
  118.     /**
  119.      * Shows if pre-check is enabled
  120.      *
  121.      * @return boolean - check user input
  122.      */
  123.     public static function isCheckUserInput() {
  124.         return self::$checkUserInput;
  125.     }
  126.  
  127.     /**
  128.      * Like http://php.net/htmlentities (htmlentities) but with different default parameters
  129.      *
  130.      * @param string $string - The string to convert
  131.      * @param int $quote - Quotes, see the functions documentation itself ( http://php.net/htmlentities ) - Default: ENT_QUOTES
  132.      * @param string $charset - Character-Set-Encoding - Default: UTF-8
  133.      * @param bool $double_encode - Convert full string or ignore already converted entities - Default: true (convert full)
  134.      * @return string - The converted string
  135.      */
  136.     public static function htmlentities($string, $quote = ENT_QUOTES, $charset = 'UTF-8', $double_encode = true) {
  137.         return htmlentities($string, $quote, $charset, $double_encode);
  138.     }
  139.  
  140.     /**
  141.      * Like self::htmlentities but also work with arrays
  142.      *
  143.      * @param string|array $string - The array/string to convert
  144.      * @param int $quote - Quotes, see the functions documentation itself ( http://php.net/htmlentities ) - Default: ENT_QUOTES
  145.      * @param string $charset - Character-Set-Encoding - Default: UTF-8
  146.      * @param bool $double_encode - Convert full string or ignore already converted entities - Default: true (convert full)
  147.      * @return array|string - The converted string/array
  148.      */
  149.     public static function arr_htmlentities($string, $quote = ENT_QUOTES, $charset = 'UTF-8', $double_encode = true) {
  150.         if(is_array($string)) {
  151.             $tmp = array();
  152.             foreach($string as $key => $value) {
  153.                 $tmp[$key] = self::arr_htmlentities($value, $quote, $charset, $double_encode);
  154.             }
  155.             return $tmp;
  156.         }
  157.  
  158.         return self::htmlentities($string, $quote, $charset, $double_encode);
  159.     }
  160.  
  161.     /**
  162.      * Like http://php.net/htmlspecialchars (htmlspecialchars) but with different default parameters
  163.      *
  164.      * @param string $string - The string to convert
  165.      * @param int $flags - Quotes, see the functions documentation itself ( http://php.net/htmlspecialchars )- Default: ENT_QUOTES
  166.      * @param string $encoding - Character-Set-Encoding - Default: UTF-8
  167.      * @param bool $double_quote - Convert full string or ignore already converted entities - Default: true (convert full)
  168.      * @return string - The converted string
  169.      */
  170.     public static function htmlspecialchars($string, $flags = ENT_QUOTES, $encoding = 'UTF-8', $double_quote = true){
  171.         return htmlspecialchars($string, $flags, $encoding, $double_quote);
  172.     }
  173.  
  174.     /**
  175.      * Like self::htmlspecialchars but also work with arrays
  176.      *
  177.      * @param string|array $string - The array/string to convert
  178.      * @param int $flags - Quotes, see the functions documentation itself ( http://php.net/htmlspecialchars )- Default: ENT_QUOTES
  179.      * @param string $encoding - Character-Set-Encoding - Default: UTF-8
  180.      * @param bool $double_quote - Convert full string or ignore already converted entities - Default: true (convert full)
  181.      * @return array|string - The converted string/array
  182.      */
  183.     public static function arr_htmlspecialchars($string, $flags = ENT_QUOTES, $encoding = 'UTF-8', $double_quote = true) {
  184.         if(is_array($string)) {
  185.             $tmp = array();
  186.             foreach($string as $key => $value) {
  187.                 $tmp[$key] = self::arr_htmlspecialchars($value, $flags, $encoding, $double_quote);
  188.             }
  189.             return $tmp;
  190.         }
  191.  
  192.         return self::htmlspecialchars($string, $flags, $encoding, $double_quote);
  193.     }
  194.  
  195.     /**
  196.      * Like http://php.net/html_entity_decode (html_entity_decode) but with different default parameters
  197.      *
  198.      * @param string $string - The string to convert
  199.      * @param int $quote - Quotes, see the functions documentation itself ( http://php.net/html_entity_decode ) - Default: ENT_QUOTES
  200.      * @param string $charset - Character-Set-Encoding - Default: UTF-8
  201.      * @return string - The converted string
  202.      */
  203.     public static function html_entity_decode($string, $quote = ENT_QUOTES, $charset = 'UTF-8') {
  204.         return html_entity_decode($string, $quote, $charset);
  205.     }
  206.  
  207.     /**
  208.      * Like self::html_entity_decode but also work with arrays
  209.      *
  210.      * @param array|mixed $string - string/array to convert
  211.      * @param int $quote - Quotes, see the functions documentation itself ( http://php.net/html_entity_decode ) - Default: ENT_QUOTES
  212.      * @param string $charset - Character-Set-Encoding - Default: UTF-8
  213.      * @return array|mixed - The converted string/array
  214.      */
  215.     public static function arr_html_entity_decode($string, $quote = ENT_QUOTES, $charset = 'UTF-8') {
  216.         if(is_array($string)) {
  217.             $tmp = array();
  218.             foreach($string as $key => $value) {
  219.                 $tmp[$key] = self::arr_html_entity_decode($value, $quote, $charset);
  220.             }
  221.             return $tmp;
  222.         }
  223.  
  224.         if(is_string($string))
  225.             return self::html_entity_decode($string, $quote, $charset);
  226.         else
  227.             return $string;
  228.     }
  229.  
  230.     /**
  231.      * Like http://php.net/lcfirst (lcfirst) but compatible with MultiByte Characters
  232.      *
  233.      * @param string $string - String to convert, see also: http://php.net/lcfirst
  234.      * @return string - String with first char lower
  235.      */
  236.     public static function lcfirst($string) {
  237.         return mb_strtolower(mb_substr($string, 0, 1)) . mb_substr($string, 1);
  238.     }
  239.  
  240.     /**
  241.      * Like http://php.net/ucfirst (lcfirst) but compatible with MultiByte Characters
  242.      *
  243.      * @param string $string - String to convert, see also: http://php.net/ucfirst
  244.      * @return string - String with first char upper
  245.      */
  246.     public static function ucfirst($string) {
  247.         return mb_strtoupper(mb_substr($string, 0, 1)) . mb_substr($string, 1);
  248.     }
  249.  
  250.     /**
  251.      *
  252.      *
  253.      * @param array|string $string - encode array/string to url
  254.      * @return array|string - encoded array/string
  255.      */
  256.     public static function arr_urlencode($string) {
  257.         if(is_array($string)) {
  258.             $tmp = array();
  259.             foreach($string as $key => $value) {
  260.                 $tmp[$key] = self::arr_urlencode($value);
  261.             }
  262.             return $tmp;
  263.         }
  264.  
  265.         return urlencode($string);
  266.     }
  267.  
  268.     /**
  269.      * Like http://php.net/rawurldecode (rawurldecode) but works also on arrays
  270.      *
  271.      * @param array|string $string - encode array/string to url
  272.      * @return array|string - encoded array/string
  273.      */
  274.     public static function arr_rawurldecode($string) {
  275.         if(is_array($string)) {
  276.             $tmp = array();
  277.             foreach($string as $key => $value) {
  278.                 $tmp[$key] = self::arr_rawurldecode($value);
  279.             }
  280.             return $tmp;
  281.         }
  282.  
  283.         return rawurldecode($string);
  284.     }
  285.  
  286.     /**
  287.      * Encode an string to RFC - Like this: http://php.net/rawurlencode
  288.      * ----
  289.      * Improvement: Thanks to bolvaritamas@vipmail.hu on PHP.NET for FULL UTF-8 convert!
  290.      *
  291.      * @param array|string $string - the string/array which should converted
  292.      * @return array|string - the converted string/array
  293.      */
  294.     public static function arr_rawurlencode($string) {
  295.         if(is_array($string)) {
  296.             $tmp = array();
  297.             foreach($string as $key => $value) {
  298.                 $tmp[$key] = self::arr_rawurlencode($value);
  299.             }
  300.             return $tmp;
  301.         }
  302.  
  303.         // Function itself
  304.         $result = "";
  305.         $length = mb_strlen($string);
  306.  
  307.         for($i = 0; $i < $length; $i++)
  308.             $result .= '%' . wordwrap(bin2hex(mb_substr($string, $i, 1)), 2, '%', true);
  309.  
  310.         return $result;
  311.         // OLD return rawurlencode($string);
  312.     }
  313.  
  314.     /**
  315.      * Like http://php.net/urldecode
  316.      *
  317.      * @param string - $str unicode and ulrencoded string, see also: http://php.net/urldecode
  318.      * @return string - decoded string
  319.      */
  320.     public static function urldecode($string) {
  321.         $string = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($string));
  322.         return self::html_entity_decode($string);
  323.     }
  324.  
  325.     /**
  326.      * Like self::urldecode but also work with arrays
  327.      *
  328.      * @param array|mixed $string -  string/array unicode and ulrencoded string, see also: http://php.net/urldecode
  329.      * @return array|string - decoded array/string
  330.      */
  331.     public static function arr_urldecode($string) {
  332.         if(is_array($string)) {
  333.             $tmp = array();
  334.             foreach($string as $key => $value) {
  335.                 $tmp[$key] = self::arr_urldecode($value);
  336.             }
  337.             return $tmp;
  338.         }
  339.  
  340.         return self::urldecode($string);
  341.     }
  342.  
  343.     /**
  344.      * Detect encoding and convert it to UTF-8 and remove not allowed Encoding
  345.      *
  346.      * @param $string - Dirty, may not UTF-8 String
  347.      * @return bool|mixed|string - Clean UTF-8 String or false on error
  348.      * @throws Exception - UTF-7 Warn
  349.      */
  350.     private static function removeNonUtf8($string) {
  351.         if(mb_detect_encoding($string) != 'UTF-8')
  352.             $string = mb_convert_encoding($string, 'UTF-8', mb_detect_encoding($string)); // Convert non UTF-8 String to UTF-8
  353.  
  354.         // Exit on UTF7
  355.         if(mb_detect_encoding($string) == 'UTF-7')
  356.             throw new Exception('UTF-7 is not allowed!');
  357.  
  358.         // Check if string is ascii or utf8 (utf-8 is ascii compatible, that is why this should be also checked)
  359.         if(mb_detect_encoding($string) != 'UTF-8' && mb_detect_encoding($string) != 'ASCII')
  360.             return false;
  361.         $string = preg_replace('/[\xF0-\xF7].../s', '', $string); // Remove UTF16 chars
  362.  
  363.         return $string;
  364.     }
  365.  
  366.     /**
  367.      * Same as self::removeNonUtf8 but it work also with arrays
  368.      *
  369.      * @param string|array $string - dirty array (may not utf8 encoded)
  370.      * @return array|mixed|string - clean array utf8
  371.      * @throws Exception - UTF-7 Warn
  372.      */
  373.     private static function arr_removeNonUtf8($string) {
  374.         if(is_array($string)) {
  375.             $tmp = array();
  376.             foreach($string as $key => $value) {
  377.                 $tmp[$key] = self::arr_removeNonUtf8($value);
  378.             }
  379.             return $tmp;
  380.         }
  381.  
  382.         return self::removeNonUtf8($string);
  383.     }
  384.  
  385.     /**
  386.      * Same as http://php.net/basename (basename) but work also with MultiBytes File/Directory-Names
  387.      * /!\ Usual not needed to use! There are no MultiBytes-File/Directory-Names yet, but may needed for some of you if your server has them Like Chine/Russian etc /!\
  388.      *
  389.      * @param string $path - Path to convert
  390.      * @param string|null $suffix - Remove this ending if exists
  391.      * @return string -  Filename without path (and may without suffix)
  392.      */
  393.     public static function mb_basename($path, $suffix = null) {
  394.         if(mb_stripos($path, DIRECTORY_SEPARATOR) !== false)
  395.             $basename = mb_substr($path, mb_strripos($path, DIRECTORY_SEPARATOR));
  396.         else
  397.             $basename = $path;
  398.  
  399.         if($suffix && mb_stripos($basename, $suffix)) {
  400.             $suffix_pos = mb_strlen($suffix) * -1;
  401.             $base_end = mb_substr($basename, $suffix_pos);
  402.  
  403.             if($base_end == $suffix)
  404.                 $basename = mb_substr($basename, 0, $suffix_pos);
  405.         }
  406.  
  407.         return $basename;
  408.     }
  409.  
  410.     /**
  411.      * Checks if MultiBytes (mb) functions exists - They are needed for this Class!
  412.      *
  413.      * @throws Exception - Warning MB-Functions not found
  414.      */
  415.     private static function checkMultiByteFunctions() {
  416.         if(! function_exists('mb_detect_encoding') || ! function_exists('mb_convert_encoding'))
  417.             throw new Exception('[Security]: Can\'t find mb_*_encoding functions, make sure that you have them in your PHP-Install');
  418.         if(! function_exists('mb_substr') || ! function_exists('mb_strlen') || ! function_exists('mb_strripos') || ! function_exists('mb_stripos'))
  419.             throw new Exception('[Security]: Can\'t find mb_str* functions, make sure that you have them in your PHP-Install');
  420.     }
  421. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement