Published on 17 Dec 2010. Tagged with php, algorithmicadvent.
<?php
function nodeTypeToString($nodeType)
{
$map = array(
1 => 'XML_ELEMENT_NODE',
2 => 'XML_ATTRIBUTE_NODE',
3 => 'XML_TEXT_NODE',
4 => 'XML_CDATA_SECTION_NODE',
5 => 'XML_ENTITY_REFERENCE_NODE',
6 => 'XML_ENTITY_NODE',
7 => 'XML_PROCESSING_INSTRUCTION_NODE',
8 => 'XML_COMMENT_NODE', //
9 => 'XML_DOCUMENT_NODE',
10 => 'XML_DOCUMENT_TYPE_NODE', //
11 => 'XML_DOCUMENT_FRAGMENT_NODE',
12 => 'XML_NOTATION_NODE'
);
if (isset($map[$nodeType])) {
return $map[$nodeType];
}
return 'UNKNOWN';
}
header('Content-Type: text/html; charset=UTF-8');
$doc = new DOMDocument();
$doc->preserveWhiteSpace = false;
$doc->loadXML('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html [
<!ELEMENT test (#PCDATA) >
<!ENTITY % xx "%zz;">
<!ENTITY % zz "<">
]>
<html>
<div>Some text<!--a comment-->
<strong class="test">some <![CDATA[<tag>]]> more text</strong>
</div></html>');
function rec(DOMNode $node, $indent = 0)
{
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $child) {
echo '<tr>';
echo '<td>' . str_repeat(' ', $indent)
. $child->nodeName . '</td>';
echo '<td>' . nodeTypeToString($child->nodeType) . '</td>';
$nv = htmlspecialchars($child->nodeValue);
$nv = str_replace(array("\n", "\r", "\t", ' '),
array('\n', '\r', '\t', ' '), $nv);
if ($nv === '') {
$nv = '{empty}';
} else {
$nv = '"' . $nv . '"';
}
echo '<td>' . $nv . '</td>';
echo '</tr>';
rec($child, $indent + 4);
}
}
}
echo '<table border="1">';
echo '<tr><th>nodeName</th><th>nodeType</th><th>nodeValue</th></tr>';
rec($doc);
echo '</table>';