fix bug with XML matcher handling missing children

In the XMLMatches matcher used by assertXmlEqual(), it did not
correctly handle the case where a child element of an element <foo>
was expected but the actual element <foo> was totally empty.  In this
scenario, an error like the following would be generated:

    nova.tests.unit.test_matchers.TestXMLMatchesOrderedExtraChildren.test_describe_difference
    -----------------------------------------------------------------------------------------

    Captured traceback:
    ~~~~~~~~~~~~~~~~~~~
        Traceback (most recent call last):
          File ".../nova/.tox/py27/lib/python2.7/site-packages/testtools/tests/matchers/helpers.py", line 32, in test_describe_difference
            mismatch = matcher.match(matchee)
          File "nova/tests/unit/matchers.py", line 431, in match
            actual.getroot(), state, None)
          File "nova/tests/unit/matchers.py", line 520, in _compare_node
            state, actual_child_idx)
          File "nova/tests/unit/matchers.py", line 520, in _compare_node
            state, actual_child_idx)
          File "nova/tests/unit/matchers.py", line 534, in _compare_node
            actual_child_idx + 1)
        UnboundLocalError: local variable 'actual_child_idx' referenced before assignment

actual_child_idx was being used when constructing the XMLExpectedChild
XMLMismatch object, in order to indicate that the missing child was
expected at that position in the list of XML siblings.  However this
was only being set as a side-effect of the loop

    for actual_child_idx in range(...):
        ....

which is brittle in general, but also specifically is guaranteed to
break when the start and end parameters supplied to range() are equal
- in this case actual_child_idx will not get set.

While fixing this reference to the uninitialized variable, it became
apparent that the XMLExpectedChild subclass of XMLMismatch was
generating a slightly misleading error which claimed that the missing
child was expected at a certain index even when the matcher was set
with allow_mixed_nodes=True in order to not require strict ordering of
siblings.

It also became apparent that the use of a context manager for tracking
the XMLMatchState was not appropriate, because the state was stored in
a single object which got mutated as the XML tree was recursed, and
this did not allow for bubbling XMLMismatch objects up the stack which
would accurately preserve the details of the mismatch.

Bearing these issues in mind, make the following changes:

  - Avoid the use of the potentially uninitialized actual_child_idx
    variable.

  - Generate a new XMLMatchState object for each layer of recursion.

  - When an XMLExpectedChild or XMLUnexpectedChild mismatch is
    encountered, bubble that all the way up to the caller.

  - Allow the idx parameter to XMLExpectedChild to be None, when
    strict ordering is not required.

  - Add test cases to cover these scenarios.

Since comparison of XML trees is a generic task, in the future most
likely it would make sense to remove this code from nova, and use
an upstream alternative such as xmldiff:

    https://xmldiff.readthedocs.io/en/stable/

Change-Id: I2dc912d4e3059ab86d414937223f766a1b893900
This commit is contained in:
Adam Spiers 2019-03-01 12:21:07 +00:00
parent a8c065dea9
commit ffd81eb107
2 changed files with 229 additions and 82 deletions

View File

@ -16,6 +16,7 @@
"""Matcher classes to be used inside of the testtools assertThat framework."""
import copy
import pprint
from lxml import etree
@ -339,7 +340,11 @@ class XMLUnexpectedChild(XMLMismatch):
class XMLExpectedChild(XMLMismatch):
"""Expected child not present in XML."""
"""Expected child not present in XML.
idx indicates at which position the child was expected.
If idx is None, that indicates that strict ordering was not required.
"""
def __init__(self, state, tag, idx):
super(XMLExpectedChild, self).__init__(state)
@ -347,9 +352,15 @@ class XMLExpectedChild(XMLMismatch):
self.idx = idx
def describe(self):
return ("%(path)s: XML expected child element <%(tag)s> "
"not present at index %(idx)d" %
{'path': self.path, 'tag': self.tag, 'idx': self.idx})
s = ("%(path)s: XML expected child element <%(tag)s> "
"not present" %
{'path': self.path, 'tag': self.tag})
# If we are not requiring strict ordering then the child element
# can be expected at any index, so don't claim that it is expected
# at a particular one.
if self.idx is not None:
s += " at index %d" % self.idx
return s
class XMLMatchState(object):
@ -364,31 +375,30 @@ class XMLMatchState(object):
self.expected = expected
self.actual = actual
def __enter__(self):
pass
def __exit__(self, exc_type, exc_value, exc_tb):
self.path.pop()
return False
def __str__(self):
return '/' + '/'.join(self.path)
def node(self, tag, idx):
"""Adds tag and index to the path; they will be popped off when
the corresponding 'with' statement exits.
"""Returns a new state based on the current one, with tag and idx
appended to the path. We avoid appending in place and popping
on exit from the context of the comparison at this level in
the XML tree, because this would mutate state objects embedded
in XMLMismatch objects which are bubbled up through recursive
calls to _compare_nodes. This would result in a misleading
error by the time the XMLMismatch object surfaced at the top
of the assertThat() part of the stack.
:param tag: The element tag
:param idx: If not None, the integer index of the element
within its parent. Not included in the path
element if None.
"""
new_state = copy.deepcopy(self)
if idx is not None:
self.path.append("%s[%d]" % (tag, idx))
new_state.path.append("%s[%d]" % (tag, idx))
else:
self.path.append(tag)
return self
new_state.path.append(tag)
return new_state
class XMLMatches(object):
@ -463,77 +473,86 @@ class XMLMatches(object):
if expected.tag != actual.tag:
return XMLTagMismatch(state, idx, expected.tag, actual.tag)
with state.node(expected.tag, idx):
# Compare the attribute keys
expected_attrs = set(expected.attrib.keys())
actual_attrs = set(actual.attrib.keys())
if expected_attrs != actual_attrs:
expected_only = expected_attrs - actual_attrs
actual_only = actual_attrs - expected_attrs
return XMLAttrKeysMismatch(state, expected_only, actual_only)
new_state = state.node(expected.tag, idx)
# Compare the attribute values
for key in expected_attrs:
expected_value = expected.attrib[key]
actual_value = actual.attrib[key]
# Compare the attribute keys
expected_attrs = set(expected.attrib.keys())
actual_attrs = set(actual.attrib.keys())
if expected_attrs != actual_attrs:
expected_only = expected_attrs - actual_attrs
actual_only = actual_attrs - expected_attrs
return XMLAttrKeysMismatch(new_state, expected_only, actual_only)
if self.skip_values.intersection(
[expected_value, actual_value]):
continue
elif expected_value != actual_value:
return XMLAttrValueMismatch(state, key, expected_value,
actual_value)
# Compare the attribute values
for key in expected_attrs:
expected_value = expected.attrib[key]
actual_value = actual.attrib[key]
# Compare text nodes
text_nodes_mismatch = self._compare_text_nodes(
expected, actual, state)
if text_nodes_mismatch:
return text_nodes_mismatch
if self.skip_values.intersection(
[expected_value, actual_value]):
continue
elif expected_value != actual_value:
return XMLAttrValueMismatch(new_state, key, expected_value,
actual_value)
# Compare the contents of the node
matched_actual_child_idxs = set()
# first_actual_child_idx - pointer to next actual child
# used with allow_mixed_nodes=False ONLY
# prevent to visit actual child nodes twice
first_actual_child_idx = 0
for expected_child in expected:
if expected_child.tag in self.SKIP_TAGS:
continue
related_actual_child_idx = None
# Compare text nodes
text_nodes_mismatch = self._compare_text_nodes(
expected, actual, new_state)
if text_nodes_mismatch:
return text_nodes_mismatch
if self.allow_mixed_nodes:
first_actual_child_idx = 0
for actual_child_idx in range(
first_actual_child_idx, len(actual)):
if actual[actual_child_idx].tag in self.SKIP_TAGS:
first_actual_child_idx += 1
continue
if actual_child_idx in matched_actual_child_idxs:
continue
# Compare the nodes
result = self._compare_node(expected_child,
actual[actual_child_idx],
state, actual_child_idx)
# Compare the contents of the node
matched_actual_child_idxs = set()
# first_actual_child_idx - pointer to next actual child
# used with allow_mixed_nodes=False ONLY
# prevent to visit actual child nodes twice
first_actual_child_idx = 0
result = None
for expected_child in expected:
if expected_child.tag in self.SKIP_TAGS:
continue
related_actual_child_idx = None
if self.allow_mixed_nodes:
first_actual_child_idx = 0
for actual_child_idx in range(
first_actual_child_idx, len(actual)):
if actual[actual_child_idx].tag in self.SKIP_TAGS:
first_actual_child_idx += 1
if result is not True:
if self.allow_mixed_nodes:
continue
else:
return result
else: # nodes match
related_actual_child_idx = actual_child_idx
break
if related_actual_child_idx is not None:
matched_actual_child_idxs.add(actual_child_idx)
continue
if actual_child_idx in matched_actual_child_idxs:
continue
# Compare the nodes
result = self._compare_node(expected_child,
actual[actual_child_idx],
new_state, actual_child_idx)
first_actual_child_idx += 1
if result is not True:
if self.allow_mixed_nodes:
continue
else:
return result
else: # nodes match
related_actual_child_idx = actual_child_idx
break
if related_actual_child_idx is not None:
matched_actual_child_idxs.add(actual_child_idx)
else:
if isinstance(result, XMLExpectedChild) or \
isinstance(result, XMLUnexpectedChild):
return result
if self.allow_mixed_nodes:
expected_child_idx = None
else:
return XMLExpectedChild(state, expected_child.tag,
actual_child_idx + 1)
# Make sure we consumed all nodes in actual
for actual_child_idx, actual_child in enumerate(actual):
if (actual_child.tag not in self.SKIP_TAGS and
actual_child_idx not in matched_actual_child_idxs):
return XMLUnexpectedChild(state, actual_child.tag,
actual_child_idx)
expected_child_idx = first_actual_child_idx
return XMLExpectedChild(new_state, expected_child.tag,
expected_child_idx)
# Make sure we consumed all nodes in actual
for actual_child_idx, actual_child in enumerate(actual):
if (actual_child.tag not in self.SKIP_TAGS and
actual_child_idx not in matched_actual_child_idxs):
return XMLUnexpectedChild(new_state, actual_child.tag,
actual_child_idx)
# The nodes match
return True

View File

@ -452,7 +452,7 @@ class TestXMLMatchesUnorderedNodes(testtools.TestCase,
]
describe_examples = [
("/root: XML expected child element <text> not present at index 4",
("/root: XML expected child element <text> not present",
"""<?xml version="1.0"?>
<root>
<text>some text here</text>
@ -467,3 +467,131 @@ class TestXMLMatchesUnorderedNodes(testtools.TestCase,
]
str_examples = []
class TestXMLMatchesOrderedMissingChildren(testtools.TestCase,
helpers.TestMatchersInterface):
matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2>
<foo>subchild</foo>
</child2>
</children>
</root>""", allow_mixed_nodes=False)
matches_matches = []
matches_mismatches = []
describe_examples = [
("/root/children[0]/child2[1]: XML expected child element <foo> not "
"present at index 0",
"""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2 />
</children>
</root>""", matches_matcher),
]
str_examples = []
class TestXMLMatchesUnorderedMissingChildren(testtools.TestCase,
helpers.TestMatchersInterface):
matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2>
<foo>subchild</foo>
</child2>
</children>
</root>""", allow_mixed_nodes=True)
matches_matches = []
matches_mismatches = []
describe_examples = [
("/root/children[0]/child2[1]: XML expected child element <foo> not "
"present",
"""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2 />
</children>
</root>""", matches_matcher),
]
str_examples = []
class TestXMLMatchesOrderedExtraChildren(testtools.TestCase,
helpers.TestMatchersInterface):
matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2 />
</children>
</root>""", allow_mixed_nodes=False)
matches_matches = []
matches_mismatches = []
describe_examples = [
("/root/children[0]/child2[1]: XML unexpected child element <foo> "
"present at index 0",
"""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2>
<foo>subchild</foo>
</child2>
</children>
</root>""", matches_matcher),
]
str_examples = []
class TestXMLMatchesUnorderedExtraChildren(testtools.TestCase,
helpers.TestMatchersInterface):
matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2 />
</children>
</root>""", allow_mixed_nodes=True)
matches_matches = []
matches_mismatches = []
describe_examples = [
("/root/children[0]/child2[1]: XML unexpected child element <foo> "
"present at index 0",
"""<?xml version="1.0"?>
<root>
<children>
<child1 />
<child2>
<foo>subchild</foo>
</child2>
</children>
</root>""", matches_matcher),
]
str_examples = []