Expansion of matching XML strings logic

Extended class XMLMatcher with following features:
1. Ability to ignore ordering of XML nodes;
2. Matching version and encoding of XMLs;
3. Ability to skip empty text nodes(nodes which contain white spaces only)

Related-Bug: #1350287
Change-Id: I74b0660d2b09b3c6251ec0eabf37978fa5d03004
This commit is contained in:
Marian Horban 2015-01-02 10:33:02 -05:00 committed by Daniel P. Berrange
parent 3ae2ddd4a2
commit b8885fc6ec
2 changed files with 228 additions and 48 deletions

View File

@ -17,6 +17,7 @@
"""Matcher classes to be used inside of the testtools assertThat framework.""" """Matcher classes to be used inside of the testtools assertThat framework."""
import pprint import pprint
import StringIO
from lxml import etree from lxml import etree
from testtools import content from testtools import content
@ -233,6 +234,27 @@ class XMLMismatch(object):
} }
class XMLDocInfoMismatch(XMLMismatch):
"""XML version or encoding doesn't match."""
def __init__(self, state, expected_doc_info, actual_doc_info):
super(XMLDocInfoMismatch, self).__init__(state)
self.expected_doc_info = expected_doc_info
self.actual_doc_info = actual_doc_info
def describe(self):
return ("%(path)s: XML information mismatch(version, encoding) "
"expected version %(expected_version)s, "
"expected encoding %(expected_encoding)s; "
"actual version %(actual_version)s, "
"actual encoding %(actual_encoding)s" %
{'path': self.path,
'expected_version': self.expected_doc_info['version'],
'expected_encoding': self.expected_doc_info['encoding'],
'actual_version': self.actual_doc_info['version'],
'actual_encoding': self.actual_doc_info['encoding']})
class XMLTagMismatch(XMLMismatch): class XMLTagMismatch(XMLMismatch):
"""XML tags don't match.""" """XML tags don't match."""
@ -371,24 +393,61 @@ class XMLMatchState(object):
class XMLMatches(object): class XMLMatches(object):
"""Compare XML strings. More complete than string comparison.""" """Compare XML strings. More complete than string comparison."""
def __init__(self, expected): SKIP_TAGS = (etree.Comment, etree.ProcessingInstruction)
def __init__(self, expected, allow_mixed_nodes=False,
skip_empty_text_nodes=True, skip_values=('DONTCARE',)):
self.expected_xml = expected self.expected_xml = expected
self.expected = etree.fromstring(expected) self.expected = etree.parse(StringIO.StringIO(expected))
self.allow_mixed_nodes = allow_mixed_nodes
self.skip_empty_text_nodes = skip_empty_text_nodes
self.skip_values = set(skip_values)
def __str__(self): def __str__(self):
return 'XMLMatches(%r)' % self.expected_xml return 'XMLMatches(%r)' % self.expected_xml
def match(self, actual_xml): def match(self, actual_xml):
actual = etree.fromstring(actual_xml) actual = etree.parse(StringIO.StringIO(actual_xml))
state = XMLMatchState(self.expected_xml, actual_xml) state = XMLMatchState(self.expected_xml, actual_xml)
result = self._compare_node(self.expected, actual, state, None) expected_doc_info = self._get_xml_docinfo(self.expected)
actual_doc_info = self._get_xml_docinfo(actual)
if expected_doc_info != actual_doc_info:
return XMLDocInfoMismatch(state, expected_doc_info,
actual_doc_info)
result = self._compare_node(self.expected.getroot(),
actual.getroot(), state, None)
if result is False: if result is False:
return XMLMismatch(state) return XMLMismatch(state)
elif result is not True: elif result is not True:
return result return result
@staticmethod
def _get_xml_docinfo(xml_document):
return {'version': xml_document.docinfo.xml_version,
'encoding': xml_document.docinfo.encoding}
def _compare_text_nodes(self, expected, actual, state):
expected_text = [expected.text]
expected_text.extend(child.tail for child in expected)
actual_text = [actual.text]
actual_text.extend(child.tail for child in actual)
if self.skip_empty_text_nodes:
expected_text = [text for text in expected_text
if text and not text.isspace()]
actual_text = [text for text in actual_text
if text and not text.isspace()]
if self.skip_values.intersection(
expected_text + actual_text):
return
if self.allow_mixed_nodes:
# lets sort text nodes because they can be mixed
expected_text = sorted(expected_text)
actual_text = sorted(actual_text)
if expected_text != actual_text:
return XMLTextValueMismatch(state, expected_text, actual_text)
def _compare_node(self, expected, actual, state, idx): def _compare_node(self, expected, actual, state, idx):
"""Recursively compares nodes within the XML tree.""" """Recursively compares nodes within the XML tree."""
@ -410,57 +469,62 @@ class XMLMatches(object):
expected_value = expected.attrib[key] expected_value = expected.attrib[key]
actual_value = actual.attrib[key] actual_value = actual.attrib[key]
if 'DONTCARE' in (expected_value, actual_value): if self.skip_values.intersection(
[expected_value, actual_value]):
continue continue
elif expected_value != actual_value: elif expected_value != actual_value:
return XMLAttrValueMismatch(state, key, expected_value, return XMLAttrValueMismatch(state, key, expected_value,
actual_value) actual_value)
# Compare text nodes
text_nodes_mismatch = self._compare_text_nodes(
expected, actual, state)
if text_nodes_mismatch:
return text_nodes_mismatch
# Compare the contents of the node # Compare the contents of the node
if len(expected) == 0 and len(actual) == 0: matched_actual_child_idxs = set()
# No children, compare text values # first_actual_child_idx - pointer to next actual child
if ('DONTCARE' not in (expected.text, actual.text) and # used with allow_mixed_nodes=False ONLY
expected.text != actual.text): # prevent to visit actual child nodes twice
return XMLTextValueMismatch(state, expected.text, first_actual_child_idx = 0
actual.text) for expected_child in expected:
else: if expected_child.tag in self.SKIP_TAGS:
expected_idx = 0 continue
actual_idx = 0 related_actual_child_idx = None
while (expected_idx < len(expected) and
actual_idx < len(actual)): if self.allow_mixed_nodes:
# Ignore comments and processing instructions first_actual_child_idx = 0
# TODO(Vek): may interpret PIs in the future, to for actual_child_idx in range(
# allow for, say, arbitrary ordering of some first_actual_child_idx, len(actual)):
# elements if actual[actual_child_idx].tag in self.SKIP_TAGS:
if (expected[expected_idx].tag in first_actual_child_idx += 1
(etree.Comment, etree.ProcessingInstruction)): continue
expected_idx += 1 if actual_child_idx in matched_actual_child_idxs:
continue continue
# Compare the nodes # Compare the nodes
result = self._compare_node(expected[expected_idx], result = self._compare_node(expected_child,
actual[actual_idx], state, actual[actual_child_idx],
actual_idx) state, actual_child_idx)
first_actual_child_idx += 1
if result is not True: if result is not True:
return result if self.allow_mixed_nodes:
# Step on to comparing the next nodes...
expected_idx += 1
actual_idx += 1
# Make sure we consumed all nodes in actual
if actual_idx < len(actual):
return XMLUnexpectedChild(state, actual[actual_idx].tag,
actual_idx)
# Make sure we consumed all nodes in expected
if expected_idx < len(expected):
for node in expected[expected_idx:]:
if (node.tag in
(etree.Comment, etree.ProcessingInstruction)):
continue continue
else:
return XMLExpectedChild(state, node.tag, actual_idx) return result
else: # nodes match
related_actual_child_idx = actual_child_idx
break
if related_actual_child_idx is not None:
matched_actual_child_idxs.add(actual_child_idx)
else:
return XMLExpectedChild(state, expected_child.tag,
actual_child_idx + 1)
# Make sure we consumed all nodes in actual
for actual_child_idx, actual_child in enumerate(actual):
if (actual_child.tag not in self.SKIP_TAGS and
actual_child_idx not in matched_actual_child_idxs):
return XMLUnexpectedChild(state, actual_child.tag,
actual_child_idx)
# The nodes match # The nodes match
return True return True

View File

@ -156,7 +156,7 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
<child3>DONTCARE</child3> <child3>DONTCARE</child3>
<?spam processing instruction?> <?spam processing instruction?>
</children> </children>
</root>""") </root>""", allow_mixed_nodes=False)
matches_matches = ["""<?xml version="1.0"?> matches_matches = ["""<?xml version="1.0"?>
<root> <root>
@ -247,6 +247,32 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
<child3>child 3</child3> <child3>child 3</child3>
<child4>child 4</child4> <child4>child 4</child4>
</children> </children>
</root>""",
"""<?xml version="1.0"?>
<root>
<text>some text here</text>
<text>some other text here</text>
<attrs key1="spam" key2="DONTCARE"/>
<children>
<!--This is a comment-->
<child2>child 2</child2>
<child1>child 1</child1>
<child3>DONTCARE</child3>
<?spam processing instruction?>
</children>
</root>""",
"""<?xml version="1.1"?>
<root>
<text>some text here</text>
<text>some other text here</text>
<attrs key1="spam" key2="DONTCARE"/>
<children>
<!--This is a comment-->
<child1>child 1</child1>
<child2>child 2</child2>
<child3>DONTCARE</child3>
<?spam processing instruction?>
</children>
</root>""", </root>""",
] ]
@ -268,7 +294,7 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
describe_examples = [ describe_examples = [
("/root/text[1]: XML text value mismatch: expected text value: " ("/root/text[1]: XML text value mismatch: expected text value: "
"'some other text here'; actual value: 'mismatch text'", "['some other text here']; actual value: ['mismatch text']",
"""<?xml version="1.0"?> """<?xml version="1.0"?>
<root> <root>
<text>some text here</text> <text>some text here</text>
@ -345,5 +371,95 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
<child3>child 3</child3> <child3>child 3</child3>
<child4>child 4</child4> <child4>child 4</child4>
</children> </children>
</root>""", matches_matcher),
("/root/children[3]: XML tag mismatch at index 0: "
"expected tag <child1>; actual tag <child2>",
"""<?xml version="1.0"?>
<root>
<text>some text here</text>
<text>some other text here</text>
<attrs key1="spam" key2="quux"/>
<children>
<child2>child 2</child2>
<child1>child 1</child1>
<child3>child 3</child3>
</children>
</root>""", matches_matcher),
("/: XML information mismatch(version, encoding) "
"expected version 1.0, expected encoding UTF-8; "
"actual version 1.1, actual encoding UTF-8",
"""<?xml version="1.1"?>
<root>
<text>some text here</text>
<text>some other text here</text>
<attrs key1="spam" key2="DONTCARE"/>
<children>
<!--This is a comment-->
<child1>child 1</child1>
<child2>child 2</child2>
<child3>DONTCARE</child3>
<?spam processing instruction?>
</children>
</root>""", matches_matcher), </root>""", matches_matcher),
] ]
class TestXMLMatchesUnorderedNodes(testtools.TestCase,
helpers.TestMatchersInterface):
matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
<root>
<text>some text here</text>
<text>some other text here</text>
<attrs key1="spam" key2="DONTCARE"/>
<children>
<child3>DONTCARE</child3>
<!--This is a comment-->
<child2>child 2</child2>
<child1>child 1</child1>
<?spam processing instruction?>
</children>
</root>""", allow_mixed_nodes=True)
matches_matches = ["""<?xml version="1.0"?>
<root>
<text>some text here</text>
<attrs key2="spam" key1="spam"/>
<children>
<child1>child 1</child1>
<child2>child 2</child2>
<child3>child 3</child3>
</children>
<text>some other text here</text>
</root>""",
]
matches_mismatches = ["""<?xml version="1.0"?>
<root>
<text>some text here</text>
<text>mismatch text</text>
<attrs key1="spam" key2="quux"/>
<children>
<child1>child 1</child1>
<child2>child 2</child2>
<child3>child 3</child3>
</children>
</root>""",
]
describe_examples = [
("/root: XML expected child element <text> not present at index 4",
"""<?xml version="1.0"?>
<root>
<text>some text here</text>
<text>mismatch text</text>
<attrs key1="spam" key2="quux"/>
<children>
<child1>child 1</child1>
<child2>child 2</child2>
<child3>child 3</child3>
</children>
</root>""", matches_matcher),
]
str_examples = []