Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * This tries to anonymise all letters in a string.
- *
- * @param string $s
- * @return string
- */
- function anonymize_letters($s) {
- $tokens = str_split($s, 1);
- $ns = '';
- $l = strlen($s);
- for ($i = 0; $i < $l; $i++) {
- $cp = ord($s[$i]);
- if ($cp < 128) {
- $offset = null;
- $range = null;
- if ($cp > 64 && $cp < 91) { // roman capital letters
- $offset = 65;
- $range = 25;
- } elseif ($cp > 96 && $cp < 123) { // roman lowercase letters
- $offset = 97;
- $range = 25;
- } elseif ($cp > 47 && $cp < 58) { // numbers
- $offset = 48;
- $range = 9;
- }
- if ($offset && $range) {
- $ns .= chr($offset + \mt_rand(0, $range));
- } else {
- $ns .= $s[$i];
- }
- } else { // a non-ASCII, hopefully UTF-8 string
- $t = '';
- // byte length according to UTF-8 standard
- $bl = $cp < 224 ? 2 : ($cp < 240 ? 3 : 4);
- if ($cp < 224) {
- $t = $s[$i] . $s[$i + 1];
- $i++;
- } elseif ($cp < 240) {
- $t = $s[$i] . $s[$i + 1] . $s[$i + 2];
- $i += 2;
- } else {
- $t = $s[$i] . $s[$i + 1] . $s[$i + 2] . $s[$i + 3];
- $i += 3;
- }
- if (preg_match('/\p{L}/', $t)) {
- $cp = mb_ord($t);
- $n = $t;
- $j = 0;
- do {
- $n = mb_chr($cp + (\mt_rand(-10, 10))); // find a letter nearby
- $j++;
- } while ($j < 10 && (!preg_match('/\p{L}/', $n) || ctype_upper($n) !== ctype_upper($t)));
- $ns .= $n;
- } else {
- $ns .= $t;
- }
- }
- }
- return $ns;
- }
Add Comment
Please, Sign In to add comment