From 06bfb14e18b505707a06a5b42ea92545ba343e9d Mon Sep 17 00:00:00 2001 From: Rowan Collins Date: Thu, 9 Nov 2017 23:14:11 +0000 Subject: [PATCH] Fix issue #3: attributes with no namespace prefix not shown Oddly, the XML namespace spec leaves un-prefixed attributes as undefined behaviour, rather than inheriting the default namespace. While fixing this, I've also made most of the output ignore local aliases, because everyone should be using namespace URIs anyway. --- src/simplexml_dump.php | 146 ++++++++++++++-------------- src/simplexml_tree.php | 38 +++++--- tests/dump-output/basic | 3 +- tests/dump-output/basic-default-ns | 4 +- tests/dump-output/basic-multiple-ns | 13 +++ tests/dump-output/basic-ns | 6 +- tests/dump-output/issue-11 | 4 +- tests/dump-output/issue-3 | 15 +++ tests/dump-output/soap | 4 +- tests/input/basic-default-ns.xml | 2 +- tests/input/basic-multiple-ns.xml | 26 +++++ tests/input/basic-ns.xml | 8 +- tests/input/basic.xml | 2 +- tests/input/issue-3.xml | 4 + tests/tree-output/basic-default-ns | 6 +- tests/tree-output/basic-multiple-ns | 22 +++++ tests/tree-output/basic-ns | 8 +- tests/tree-output/issue-3 | 5 + 18 files changed, 201 insertions(+), 115 deletions(-) create mode 100644 tests/dump-output/basic-multiple-ns create mode 100644 tests/dump-output/issue-3 create mode 100644 tests/input/basic-multiple-ns.xml create mode 100644 tests/input/issue-3.xml create mode 100644 tests/tree-output/basic-multiple-ns create mode 100644 tests/tree-output/issue-3 diff --git a/src/simplexml_dump.php b/src/simplexml_dump.php index 3ad21a3..3a0094c 100644 --- a/src/simplexml_dump.php +++ b/src/simplexml_dump.php @@ -42,23 +42,23 @@ function simplexml_dump(SimpleXMLElement $sxml, $return=false) $dom_item = dom_import_simplexml($item); // To what namespace does this element or attribute belong? Returns array( alias => URI ) - $ns_prefix = $dom_item->prefix; - $ns_uri = $dom_item->namespaceURI; + $item_ns_alias = $dom_item->prefix; + $item_ns_uri = $dom_item->namespaceURI; if ( $dom_item instanceOf DOMAttr ) { $dump .= $indent . 'Attribute {' . PHP_EOL; - if ( ! is_null($ns_uri) ) + if ( ! is_null($item_ns_uri) ) { - $dump .= $indent . $indent . 'Namespace: \'' . $ns_uri . '\'' . PHP_EOL; - if ( $ns_prefix == '' ) + $dump .= $indent . $indent . 'Namespace: \'' . $item_ns_uri . '\'' . PHP_EOL; + if ( $item_ns_alias == '' ) { $dump .= $indent . $indent . '(Default Namespace)' . PHP_EOL; } else { - $dump .= $indent . $indent . 'Namespace Alias: \'' . $ns_prefix . '\'' . PHP_EOL; + $dump .= $indent . $indent . 'Namespace Alias: \'' . $item_ns_alias . '\'' . PHP_EOL; } } @@ -71,16 +71,16 @@ function simplexml_dump(SimpleXMLElement $sxml, $return=false) { $dump .= $indent . 'Element {' . PHP_EOL; - if ( ! is_null($ns_uri) ) + if ( ! is_null($item_ns_uri) ) { - $dump .= $indent . $indent . 'Namespace: \'' . $ns_uri . '\'' . PHP_EOL; - if ( $ns_prefix == '' ) + $dump .= $indent . $indent . 'Namespace: \'' . $item_ns_uri . '\'' . PHP_EOL; + if ( $item_ns_alias == '' ) { $dump .= $indent . $indent . '(Default Namespace)' . PHP_EOL; } else { - $dump .= $indent . $indent . 'Namespace Alias: \'' . $ns_prefix . '\'' . PHP_EOL; + $dump .= $indent . $indent . 'Namespace Alias: \'' . $item_ns_alias . '\'' . PHP_EOL; } } @@ -93,33 +93,32 @@ function simplexml_dump(SimpleXMLElement $sxml, $return=false) // This returns all namespaces used by this node and all its descendants, // whether declared in this node, in its ancestors, or in its descendants $all_ns = $item->getNamespaces(true); - // If the default namespace is never declared, it will never show up using the below code - if ( ! array_key_exists('', $all_ns) ) + $has_default_namespace = isset($all_ns['']); + + // If the default namespace is never declared, we need to add a dummy entry for it + // We also need to handle the odd fact that attributes are never assigned to the default namespace + // The spec basically leaves their meaning undefined: https://www.w3.org/TR/xml-names/#defaulting + if ( ! in_array(null, $all_ns, true) ) { - $all_ns[''] = NULL; + $all_ns[] = null; } - - foreach ( $all_ns as $ns_alias => $ns_uri ) + + // Prioritise "current" namespace by merging into onto the beginning of the list + // (it will be added to the beginning and the duplicate entry dropped) + $all_ns = array_unique(array_merge( + array($item_ns_uri), + $all_ns + )); + + foreach ( $all_ns as $ns_uri ) { $children = $item->children($ns_uri); $attributes = $item->attributes($ns_uri); - - // Somewhat confusingly, in the case where a parent element is missing the xmlns declaration, - // but a descendant adds it, SimpleXML will look ahead and fill $all_ns[''] incorrectly - if ( - $ns_alias == '' - && - ! is_null($ns_uri) - && - count($children) == 0 - && - count($attributes) == 0 - ) + + // Don't show children(null) if we have a default namespace defined + if ( $has_default_namespace && $ns_uri === null ) { - // Try looking for a default namespace without a known URI - $ns_uri = NULL; - $children = $item->children($ns_uri); - $attributes = $item->attributes($ns_uri); + $children = array(); } // Don't show zero-counts, as they're not that useful @@ -128,59 +127,56 @@ function simplexml_dump(SimpleXMLElement $sxml, $return=false) continue; } - $ns_label = (($ns_alias == '') ? 'Default Namespace' : "Namespace $ns_alias"); + $ns_label = ($ns_uri === null) ? 'Null Namespace' : "Namespace '$ns_uri'"; $dump .= $indent . $indent . 'Content in ' . $ns_label . PHP_EOL; - if ( ! is_null($ns_uri) ) - { - $dump .= $indent . $indent . $indent . 'Namespace URI: \'' . $ns_uri . '\'' . PHP_EOL; - } - - // Count occurrence of child element names, rather than listing them all out - $child_names = array(); - foreach ( $children as $sx_child ) - { - // Below is a rather clunky way of saying $child_names[ $sx_child->getName() ]++; - // which avoids Notices about unset array keys - $child_node_name = $sx_child->getName(); - if ( array_key_exists($child_node_name, $child_names) ) - { - $child_names[$child_node_name]++; - } - else - { - $child_names[$child_node_name] = 1; - } - } - ksort($child_names); - $child_name_output = array(); - foreach ( $child_names as $name => $count ) - { - $child_name_output[] = "$count '$name'"; - } - - $dump .= $indent . $indent . $indent . 'Children: ' . count($children); - // Don't output a trailing " - " if there are no children if ( count($children) > 0 ) { + // Count occurrence of child element names, rather than listing them all out + $child_names = array(); + foreach ( $children as $sx_child ) + { + // Below is a rather clunky way of saying $child_names[ $sx_child->getName() ]++; + // which avoids Notices about unset array keys + $child_node_name = $sx_child->getName(); + if ( array_key_exists($child_node_name, $child_names) ) + { + $child_names[$child_node_name]++; + } + else + { + $child_names[$child_node_name] = 1; + } + } + ksort($child_names); + $child_name_output = array(); + foreach ( $child_names as $name => $count ) + { + $child_name_output[] = "$count '$name'"; + } + + $dump .= $indent . $indent . $indent . 'Children: ' . count($children); $dump .= ' - ' . implode(', ', $child_name_output); + $dump .= PHP_EOL; } - $dump .= PHP_EOL; - - // Attributes can't be duplicated, but I'm going to put them in alphabetical order - $attribute_names = array(); - foreach ( $attributes as $sx_attribute ) - { - $attribute_names[] = "'" . $sx_attribute->getName() . "'"; - } - ksort($attribute_names); - $dump .= $indent . $indent . $indent . 'Attributes: ' . count($attributes); - // Don't output a trailing " - " if there are no attributes + if ( count($attributes) > 0 ) { - $dump .= ' - ' . implode(', ', $attribute_names); + // Attributes can't be duplicated, but I'm going to put them in alphabetical order + $attribute_names = array(); + foreach ( $attributes as $sx_attribute ) + { + $attribute_names[] = "'" . $sx_attribute->getName() . "'"; + } + ksort($attribute_names); + $dump .= $indent . $indent . $indent . 'Attributes: ' . count($attributes); + // Don't output a trailing " - " if there are no attributes + if ( count($attributes) > 0 ) + { + $dump .= ' - ' . implode(', ', $attribute_names); + } + $dump .= PHP_EOL; } - $dump .= PHP_EOL; } $dump .= $indent . '}' . PHP_EOL; diff --git a/src/simplexml_tree.php b/src/simplexml_tree.php index a80a865..6968e49 100644 --- a/src/simplexml_tree.php +++ b/src/simplexml_tree.php @@ -145,32 +145,44 @@ function _simplexml_tree_recursively_process_node($item, $depth, $include_string // This returns all namespaces used by this node and all its descendants, // whether declared in this node, in its ancestors, or in its descendants $all_ns = $item->getNamespaces(true); - // If the default namespace is never declared, it will never show up using the below code - if ( ! array_key_exists('', $all_ns) ) + $has_default_namespace = isset($all_ns['']); + + // If the default namespace is never declared, we need to add a dummy entry for it + // We also need to handle the odd fact that attributes are never assigned to the default namespace + // The spec basically leaves their meaning undefined: https://www.w3.org/TR/xml-names/#defaulting + if ( ! in_array(null, $all_ns, true) ) { - $all_ns[''] = NULL; + $all_ns[] = null; } // Prioritise "current" namespace by merging into onto the beginning of the list - // (it will be added to the beginning and the duplicate entry dropped) - $all_ns = array_merge( - array($item_ns_prefix => $item_ns_uri), + // (it will be added to the beginning and the duplicate entry dropped) + $all_ns = array_unique(array_merge( + array($item_ns_uri), $all_ns - ); + )); - foreach ( $all_ns as $ns_alias => $ns_uri ) + foreach ( $all_ns as $ns_uri ) { - $children = $item->children($ns_alias, true); - $attributes = $item->attributes($ns_alias, true); - + $children = $item->children($ns_uri); + $attributes = $item->attributes($ns_uri); + + // Don't show children(null) if we have a default namespace defined + if ( $has_default_namespace && $ns_uri === null ) + { + $children = array(); + } + // If things are in the current namespace, display them a bit differently $is_current_namespace = ( $ns_uri == $item_ns_uri ); + $force_attribute_namespace = ($ns_uri === null && $item_ns_uri !== null); + $ns_uri_quoted = (strlen($ns_uri) == 0 ? 'null' : "'$ns_uri'"); if ( count($attributes) > 0 ) { - if ( ! $is_current_namespace ) + if ( ! $is_current_namespace || $force_attribute_namespace ) { $dump .= str_repeat($indent, $depth) . "->attributes($ns_uri_quoted)" . PHP_EOL; @@ -179,7 +191,7 @@ function _simplexml_tree_recursively_process_node($item, $depth, $include_string foreach ( $attributes as $sx_attribute ) { // Output the attribute - if ( $is_current_namespace ) + if ( $is_current_namespace && ! $force_attribute_namespace ) { // In current namespace // e.g. ['attribName'] diff --git a/tests/dump-output/basic b/tests/dump-output/basic index 510fe3b..a7359d1 100644 --- a/tests/dump-output/basic +++ b/tests/dump-output/basic @@ -5,8 +5,7 @@ SimpleXML object (1 item) String Content: ' ' - Content in Default Namespace + Content in Null Namespace Children: 1 - 1 'movie' - Attributes: 0 } ] diff --git a/tests/dump-output/basic-default-ns b/tests/dump-output/basic-default-ns index cf178ad..109c6a7 100644 --- a/tests/dump-output/basic-default-ns +++ b/tests/dump-output/basic-default-ns @@ -7,9 +7,7 @@ SimpleXML object (1 item) String Content: ' ' - Content in Default Namespace - Namespace URI: 'https://github.com/IMSoP/simplexml_debug' + Content in Namespace 'https://github.com/IMSoP/simplexml_debug' Children: 1 - 1 'movie' - Attributes: 0 } ] diff --git a/tests/dump-output/basic-multiple-ns b/tests/dump-output/basic-multiple-ns new file mode 100644 index 0000000..2f7a94d --- /dev/null +++ b/tests/dump-output/basic-multiple-ns @@ -0,0 +1,13 @@ +SimpleXML object (1 item) +[ + Element { + Namespace: 'https://github.com/IMSoP/simplexml_debug#movies' + Namespace Alias: 'movies' + Name: 'movies' + String Content: ' + +' + Content in Namespace 'https://github.com/IMSoP/simplexml_debug#movies' + Children: 1 - 1 'movie' + } +] diff --git a/tests/dump-output/basic-ns b/tests/dump-output/basic-ns index 34de153..1d399a7 100644 --- a/tests/dump-output/basic-ns +++ b/tests/dump-output/basic-ns @@ -1,13 +1,13 @@ SimpleXML object (1 item) [ Element { + Namespace: 'https://github.com/IMSoP/simplexml_debug' + Namespace Alias: 'test' Name: 'movies' String Content: ' ' - Content in Namespace test - Namespace URI: 'https://github.com/IMSoP/simplexml_debug' + Content in Namespace 'https://github.com/IMSoP/simplexml_debug' Children: 1 - 1 'movie' - Attributes: 0 } ] diff --git a/tests/dump-output/issue-11 b/tests/dump-output/issue-11 index 8a77221..77a71e5 100644 --- a/tests/dump-output/issue-11 +++ b/tests/dump-output/issue-11 @@ -3,9 +3,7 @@ SimpleXML object (1 item) Element { Name: 'notinnamespace' String Content: '' - Content in Namespace test - Namespace URI: 'http://example.com' - Children: 0 + Content in Namespace 'http://example.com' Attributes: 1 - 'isinnamespace' } ] diff --git a/tests/dump-output/issue-3 b/tests/dump-output/issue-3 new file mode 100644 index 0000000..9ea7468 --- /dev/null +++ b/tests/dump-output/issue-3 @@ -0,0 +1,15 @@ +SimpleXML object (1 item) +[ + Element { + Namespace: 'http://example.com' + (Default Namespace) + Name: 'Foo' + String Content: ' + +' + Content in Namespace 'http://example.com' + Children: 1 - 1 'MaskingChildElement' + Content in Null Namespace + Attributes: 1 - 'InvisibleAttribute' + } +] diff --git a/tests/dump-output/soap b/tests/dump-output/soap index e1e31a2..8c846e3 100644 --- a/tests/dump-output/soap +++ b/tests/dump-output/soap @@ -8,9 +8,7 @@ SimpleXML object (1 item) ' - Content in Namespace soap - Namespace URI: 'http://schemas.xmlsoap.org/soap/envelope/' + Content in Namespace 'http://schemas.xmlsoap.org/soap/envelope/' Children: 2 - 1 'Body', 1 'Header' - Attributes: 0 } ] diff --git a/tests/input/basic-default-ns.xml b/tests/input/basic-default-ns.xml index ce2b9df..68507cc 100644 --- a/tests/input/basic-default-ns.xml +++ b/tests/input/basic-default-ns.xml @@ -23,4 +23,4 @@ 7 5 - \ No newline at end of file + diff --git a/tests/input/basic-multiple-ns.xml b/tests/input/basic-multiple-ns.xml new file mode 100644 index 0000000..c1870ff --- /dev/null +++ b/tests/input/basic-multiple-ns.xml @@ -0,0 +1,26 @@ + + + + PHP: Behind the Parser + + + Ms. Coder + Onlivia Actora + + + Mr. Coder + El ActÓr + + + + So, this language. It's like, a programming language. + Or is it a scripting language? All is revealed in this + thrilling horror spoof of a documentary. + + + PHP solves all my web problems + + 7 + 5 + + diff --git a/tests/input/basic-ns.xml b/tests/input/basic-ns.xml index 9b3afbd..f86c432 100644 --- a/tests/input/basic-ns.xml +++ b/tests/input/basic-ns.xml @@ -1,5 +1,5 @@ - + PHP: Behind the Parser @@ -20,7 +20,7 @@ PHP solves all my web problems - 7 - 5 + 7 + 5 - \ No newline at end of file + diff --git a/tests/input/basic.xml b/tests/input/basic.xml index a1d4621..2246039 100644 --- a/tests/input/basic.xml +++ b/tests/input/basic.xml @@ -23,4 +23,4 @@ 7 5 - \ No newline at end of file + diff --git a/tests/input/issue-3.xml b/tests/input/issue-3.xml new file mode 100644 index 0000000..a94a0b7 --- /dev/null +++ b/tests/input/issue-3.xml @@ -0,0 +1,4 @@ + + + + diff --git a/tests/tree-output/basic-default-ns b/tests/tree-output/basic-default-ns index 67aa4c3..88518ae 100644 --- a/tests/tree-output/basic-default-ns +++ b/tests/tree-output/basic-default-ns @@ -14,6 +14,8 @@ SimpleXML object (1 item) ->great-lines[0] ->line[0] ->rating[0] - ['type'] + ->attributes(null) + ->type ->rating[1] - ['type'] + ->attributes(null) + ->type diff --git a/tests/tree-output/basic-multiple-ns b/tests/tree-output/basic-multiple-ns new file mode 100644 index 0000000..faef5af --- /dev/null +++ b/tests/tree-output/basic-multiple-ns @@ -0,0 +1,22 @@ +SimpleXML object (1 item) +[0] // + ->children('https://github.com/IMSoP/simplexml_debug#movies') + ->movie[0] + ->title[0] + ->characters[0] + ->character[0] + ->name[0] + ->actor[0] + ->character[1] + ->name[0] + ->actor[0] + ->plot[0] + ->children('https://github.com/IMSoP/simplexml_debug#reviews') + ->great-lines[0] + ->line[0] + ->rating[0] + ->attributes(null) + ->type + ->rating[1] + ->attributes(null) + ->type diff --git a/tests/tree-output/basic-ns b/tests/tree-output/basic-ns index 88518ae..a459a21 100644 --- a/tests/tree-output/basic-ns +++ b/tests/tree-output/basic-ns @@ -1,5 +1,5 @@ SimpleXML object (1 item) -[0] // +[0] // ->children('https://github.com/IMSoP/simplexml_debug') ->movie[0] ->title[0] @@ -14,8 +14,6 @@ SimpleXML object (1 item) ->great-lines[0] ->line[0] ->rating[0] - ->attributes(null) - ->type + ['type'] ->rating[1] - ->attributes(null) - ->type + ['type'] diff --git a/tests/tree-output/issue-3 b/tests/tree-output/issue-3 new file mode 100644 index 0000000..fe1fa55 --- /dev/null +++ b/tests/tree-output/issue-3 @@ -0,0 +1,5 @@ +SimpleXML object (1 item) +[0] // + ['InvisibleAttribute'] + ->children('http://example.com') + ->MaskingChildElement[0]