Advertisement
blistovmhz

Untitled

Oct 3rd, 2018
248
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.83 KB | None | 0 0
  1. <?php
  2.  
  3.  
  4. //callback function for the regex
  5. function utf8_entity_decode($entity){
  6. $convmap = array(0x0, 0x10000, 0, 0xfffff);
  7. return mb_decode_numericentity($entity, $convmap, 'UTF-8');
  8. }
  9.  
  10. $xmlDoc = file_get_contents("php://stdin");
  11.  
  12. $xmlDoc = str_replace("&#34;",'"', $xmlDoc);
  13. $xmlDoc = str_replace("&#38;","&amp;", $xmlDoc);
  14. $xmlDoc = str_replace("&#39;","'", $xmlDoc);
  15. $xmlDoc = str_replace("&#60;","&lt;", $xmlDoc);
  16. $xmlDoc = str_replace("&#133;","...", $xmlDoc);
  17. $xmlDoc = str_replace("&#145;","'", $xmlDoc);
  18. $xmlDoc = str_replace("&#146;","'", $xmlDoc);
  19. $xmlDoc = str_replace("&#147;","'", $xmlDoc);
  20. $xmlDoc = str_replace("&#149;","-", $xmlDoc);
  21. //$xmlDoc = str_replace("&#149;","Β·", $xmlDoc);
  22. $xmlDoc = str_replace("&#151;","-", $xmlDoc);
  23. $xmlDoc = str_replace("&#8212;","-", $xmlDoc);
  24. $xmlDoc = str_replace("&#8217;","'", $xmlDoc);
  25. $xmlDoc = str_replace("&#8220;","'", $xmlDoc);
  26. $xmlDoc = str_replace("&#8221;","'", $xmlDoc);
  27.  
  28. //decode decimal html entities added by web browser
  29. $xmlDoc = preg_replace_callback('/&#\d{2,5};/u',
  30. function($match){
  31. return utf8_entity_decode($match[0]);
  32. },
  33. $xmlDoc
  34. );
  35.  
  36. //$xmlDoc = preg_replace_callback('/&#\d{2,5};/ue', utf8_entity_decode('$0'), $xmlDoc);
  37.  
  38. //decode hex html entities added by web browser
  39. $xmlDoc = preg_replace_callback('/&#x([a-fA-F0-7]{2,8});/u',
  40. function($match){
  41. return utf8_entity_decode('&#'.hexdec($match[1]).';');
  42. },
  43. $xmlDoc
  44. );
  45.  
  46. //$xmlDoc = preg_replace_callback('/&#x([a-fA-F0-7]{2,8});/ue', utf8_entity_decode('&#'.hexdec('$1').';'), $xmlDoc);
  47.  
  48. //Replace "&amp;#???; encoded characters with equivalents
  49.  
  50. $xmlDoc = str_replace("&amp;#160;"," ", $xmlDoc);
  51. $xmlDoc = str_replace("&amp;#172;","", $xmlDoc);
  52. $xmlDoc = str_replace("&amp;#173;","", $xmlDoc);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement