Advertisement
Guest User

Untitled

a guest
Apr 21st, 2015
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.23 KB | None | 0 0
  1. function tryToXml($dom,$content) {
  2. if(!$content) return false;
  3.  
  4. // xml well formed content can be loaded as xml node tree
  5. $fragment = $dom->createDocumentFragment();
  6. // wonderfull appendXML to add an XML string directly into the node tree!
  7.  
  8. // aappendxml will fail on a xml declaration so manually skip this when occurred
  9. if( substr( $content,0, 5) == '<?xml' ) {
  10. $content = substr($content,strpos($content,'>')+1);
  11. if( strpos($content,'<') ) {
  12. $content = substr($content,strpos($content,'<'));
  13. }
  14. }
  15.  
  16. // if appendXML is not working then use below htmlToXml() for nasty html correction
  17. if(!@$fragment->appendXML( $content )) {
  18. return $this->htmlToXml($dom,$content);
  19. }
  20.  
  21. return $fragment;
  22. }
  23.  
  24.  
  25.  
  26. // convert content into xml
  27. // dom is only needed to prepare the xml which will be returned
  28. function htmlToXml($dom, $content, $needEncoding=false, $bodyOnly=true) {
  29.  
  30. // no xml when html is empty
  31. if(!$content) return false;
  32.  
  33. // real content and possibly it needs encoding
  34. if( $needEncoding ) {
  35. // no need to convert character encoding as loadHTML will respect the content-type (only)
  36. $content = '<meta http-equiv="Content-Type" content="text/html;charset='.$this->encoding.'">' . $content;
  37. }
  38.  
  39. // return a dom from the content
  40. $domInject = new DOMDocument("1.0", "UTF-8");
  41. $domInject->preserveWhiteSpace = false;
  42. $domInject->formatOutput = true;
  43.  
  44. // html type
  45. try {
  46. @$domInject->loadHTML( $content );
  47. } catch(Exception $e){
  48. // do nothing and continue as it's normal that warnings will occur on nasty HTML content
  49. }
  50. // to check encoding: echo $dom->encoding
  51. $this->reworkDom( $domInject );
  52.  
  53. if( $bodyOnly ) {
  54. $fragment = $dom->createDocumentFragment();
  55.  
  56. // retrieve nodes within /html/body
  57. foreach( $domInject->documentElement->childNodes as $elementLevel1 ) {
  58. if( $elementLevel1->nodeName == 'body' and $elementLevel1->nodeType == XML_ELEMENT_NODE ) {
  59. foreach( $elementLevel1->childNodes as $elementInject ) {
  60. $fragment->insertBefore( $dom->importNode($elementInject, true) );
  61. }
  62. }
  63. }
  64. } else {
  65. $fragment = $dom->importNode($domInject->documentElement, true);
  66. }
  67.  
  68. return $fragment;
  69. }
  70.  
  71.  
  72.  
  73. protected function reworkDom( $node, $level = 0 ) {
  74.  
  75. // start with the first child node to iterate
  76. $nodeChild = $node->firstChild;
  77.  
  78. while ( $nodeChild ) {
  79. $nodeNextChild = $nodeChild->nextSibling;
  80.  
  81. switch ( $nodeChild->nodeType ) {
  82. case XML_ELEMENT_NODE:
  83. // iterate through children element nodes
  84. $this->reworkDom( $nodeChild, $level + 1);
  85. break;
  86. case XML_TEXT_NODE:
  87. case XML_CDATA_SECTION_NODE:
  88. // do nothing with text, cdata
  89. break;
  90. case XML_COMMENT_NODE:
  91. // ensure comments to remove - sign also follows the w3c guideline
  92. $nodeChild->nodeValue = str_replace("-","_",$nodeChild->nodeValue);
  93. break;
  94. case XML_DOCUMENT_TYPE_NODE: // 10: needs to be removed
  95. case XML_PI_NODE: // 7: remove PI
  96. $node->removeChild( $nodeChild );
  97. $nodeChild = null; // make null to test later
  98. break;
  99. case XML_DOCUMENT_NODE:
  100. // should not appear as it's always the root, just to be complete
  101. // however generate exception!
  102. case XML_HTML_DOCUMENT_NODE:
  103. // should not appear as it's always the root, just to be complete
  104. // however generate exception!
  105. default:
  106. throw new exception("Engine: reworkDom type not declared [".$nodeChild->nodeType. "]");
  107. }
  108. $nodeChild = $nodeNextChild;
  109. } ;
  110. }
  111.  
  112. $c='<p>test<font>two</p>';
  113. $dom=new DOMDocument('1.0', 'UTF-8');
  114.  
  115. $n=$dom->appendChild($dom->createElement('info')); // make a root element
  116.  
  117. if( $valueXml=tryToXml($dom,$c) ) {
  118. $n->appendChild($valueXml);
  119. }
  120. echo '<pre/>'. htmlentities($dom->saveXml($n)). '</pre>';
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement