Expansion of matching XML strings logic

Extended class XMLMatcher with following features: 1. Ability to ignore ordering of XML nodes; 2. Matching version and encoding of XMLs; 3. Ability to skip empty text nodes(nodes which contain white spaces only) Related-Bug: #1350287 Change-Id: I74b0660d2b09b3c6251ec0eabf37978fa5d03004
2015-01-02 10:33:02 -05:00 · 2015-01-02 10:33:02 -05:00 · b8885fc6ec
parent 3ae2ddd4a2
commit b8885fc6ec
2 changed files with 228 additions and 48 deletions
--- a/nova/tests/unit/matchers.py
+++ b/nova/tests/unit/matchers.py
@ -17,6 +17,7 @@
 """Matcher classes to be used inside of the testtools assertThat framework."""
 import pprint
 import StringIO
 from lxml import etree
 from testtools import content
@ -233,6 +234,27 @@ class XMLMismatch(object):
        }
 class XMLDocInfoMismatch(XMLMismatch):
    """XML version or encoding doesn't match."""
    def __init__(self, state, expected_doc_info, actual_doc_info):
        super(XMLDocInfoMismatch, self).__init__(state)
        self.expected_doc_info = expected_doc_info
        self.actual_doc_info = actual_doc_info
    def describe(self):
        return ("%(path)s: XML information mismatch(version, encoding) "
                "expected version %(expected_version)s, "
                "expected encoding %(expected_encoding)s; "
                "actual version %(actual_version)s, "
                "actual encoding %(actual_encoding)s" %
                {'path': self.path,
                 'expected_version': self.expected_doc_info['version'],
                 'expected_encoding': self.expected_doc_info['encoding'],
                 'actual_version': self.actual_doc_info['version'],
                 'actual_encoding': self.actual_doc_info['encoding']})
 class XMLTagMismatch(XMLMismatch):
    """XML tags don't match."""
@ -371,24 +393,61 @@ class XMLMatchState(object):
 class XMLMatches(object):
    """Compare XML strings.  More complete than string comparison."""
-    def __init__(self, expected):
+    SKIP_TAGS = (etree.Comment, etree.ProcessingInstruction)
    def __init__(self, expected, allow_mixed_nodes=False,
                 skip_empty_text_nodes=True, skip_values=('DONTCARE',)):
        self.expected_xml = expected
-        self.expected = etree.fromstring(expected)
+        self.expected = etree.parse(StringIO.StringIO(expected))
        self.allow_mixed_nodes = allow_mixed_nodes
        self.skip_empty_text_nodes = skip_empty_text_nodes
        self.skip_values = set(skip_values)
    def __str__(self):
        return 'XMLMatches(%r)' % self.expected_xml
    def match(self, actual_xml):
-        actual = etree.fromstring(actual_xml)
+        actual = etree.parse(StringIO.StringIO(actual_xml))
        state = XMLMatchState(self.expected_xml, actual_xml)
-        result = self._compare_node(self.expected, actual, state, None)
+        expected_doc_info = self._get_xml_docinfo(self.expected)
        actual_doc_info = self._get_xml_docinfo(actual)
        if expected_doc_info != actual_doc_info:
            return XMLDocInfoMismatch(state, expected_doc_info,
                                      actual_doc_info)
        result = self._compare_node(self.expected.getroot(),
                                    actual.getroot(), state, None)
        if result is False:
            return XMLMismatch(state)
        elif result is not True:
            return result
    @staticmethod
    def _get_xml_docinfo(xml_document):
        return {'version': xml_document.docinfo.xml_version,
                'encoding': xml_document.docinfo.encoding}
    def _compare_text_nodes(self, expected, actual, state):
        expected_text = [expected.text]
        expected_text.extend(child.tail for child in expected)
        actual_text = [actual.text]
        actual_text.extend(child.tail for child in actual)
        if self.skip_empty_text_nodes:
            expected_text = [text for text in expected_text
                             if text and not text.isspace()]
            actual_text = [text for text in actual_text
                           if text and not text.isspace()]
        if self.skip_values.intersection(
                        expected_text + actual_text):
            return
        if self.allow_mixed_nodes:
            # lets sort text nodes because they can be mixed
            expected_text = sorted(expected_text)
            actual_text = sorted(actual_text)
        if expected_text != actual_text:
            return XMLTextValueMismatch(state, expected_text, actual_text)
    def _compare_node(self, expected, actual, state, idx):
        """Recursively compares nodes within the XML tree."""
@ -410,57 +469,62 @@ class XMLMatches(object):
                expected_value = expected.attrib[key]
                actual_value = actual.attrib[key]
-                if 'DONTCARE' in (expected_value, actual_value):
+                if self.skip_values.intersection(
                        [expected_value, actual_value]):
                    continue
                elif expected_value != actual_value:
                    return XMLAttrValueMismatch(state, key, expected_value,
                                                actual_value)
            # Compare text nodes
            text_nodes_mismatch = self._compare_text_nodes(
                expected, actual, state)
            if text_nodes_mismatch:
                return text_nodes_mismatch
            # Compare the contents of the node
-            if len(expected) == 0 and len(actual) == 0:
+            matched_actual_child_idxs = set()
-                # No children, compare text values
+            # first_actual_child_idx - pointer to next actual child
-                if ('DONTCARE' not in (expected.text, actual.text) and
+            # used with allow_mixed_nodes=False ONLY
-                        expected.text != actual.text):
+            # prevent to visit actual child nodes twice
-                    return XMLTextValueMismatch(state, expected.text,
+            first_actual_child_idx = 0
-                                                actual.text)
+            for expected_child in expected:
-            else:
+                if expected_child.tag in self.SKIP_TAGS:
-                expected_idx = 0
+                    continue
-                actual_idx = 0
+                related_actual_child_idx = None
-                while (expected_idx < len(expected) and
+
-                       actual_idx < len(actual)):
+                if self.allow_mixed_nodes:
-                    # Ignore comments and processing instructions
+                    first_actual_child_idx = 0
-                    # TODO(Vek): may interpret PIs in the future, to
+                for actual_child_idx in range(
-                    # allow for, say, arbitrary ordering of some
+                        first_actual_child_idx, len(actual)):
-                    # elements
+                    if actual[actual_child_idx].tag in self.SKIP_TAGS:
-                    if (expected[expected_idx].tag in
+                        first_actual_child_idx += 1
-                            (etree.Comment, etree.ProcessingInstruction)):
+                        continue
-                        expected_idx += 1
+                    if actual_child_idx in matched_actual_child_idxs:
                        continue
                    # Compare the nodes
-                    result = self._compare_node(expected[expected_idx],
+                    result = self._compare_node(expected_child,
-                                                actual[actual_idx], state,
+                                                actual[actual_child_idx],
-                                                actual_idx)
+                                                state, actual_child_idx)
                    first_actual_child_idx += 1
                    if result is not True:
-                        return result
+                        if self.allow_mixed_nodes:
                    # Step on to comparing the next nodes...
                    expected_idx += 1
                    actual_idx += 1
                # Make sure we consumed all nodes in actual
                if actual_idx < len(actual):
                    return XMLUnexpectedChild(state, actual[actual_idx].tag,
                                              actual_idx)
                # Make sure we consumed all nodes in expected
                if expected_idx < len(expected):
                    for node in expected[expected_idx:]:
                        if (node.tag in
                                (etree.Comment, etree.ProcessingInstruction)):
                            continue
-
+                        else:
-                        return XMLExpectedChild(state, node.tag, actual_idx)
+                            return result
-
+                    else:  # nodes match
                        related_actual_child_idx = actual_child_idx
                        break
                if related_actual_child_idx is not None:
                    matched_actual_child_idxs.add(actual_child_idx)
                else:
                    return XMLExpectedChild(state, expected_child.tag,
                                            actual_child_idx + 1)
            # Make sure we consumed all nodes in actual
            for actual_child_idx, actual_child in enumerate(actual):
                if (actual_child.tag not in self.SKIP_TAGS and
                        actual_child_idx not in matched_actual_child_idxs):
                    return XMLUnexpectedChild(state, actual_child.tag,
                                              actual_child_idx)
        # The nodes match
        return True
--- a/nova/tests/unit/test_matchers.py
+++ b/nova/tests/unit/test_matchers.py
@ -156,7 +156,7 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
    <child3>DONTCARE</child3>
    <?spam processing instruction?>
  </children>
-</root>""")
+</root>""", allow_mixed_nodes=False)
    matches_matches = ["""<?xml version="1.0"?>
 <root>
@ -247,6 +247,32 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
    <child3>child 3</child3>
    <child4>child 4</child4>
  </children>
 </root>""",
                          """<?xml version="1.0"?>
 <root>
  <text>some text here</text>
  <text>some other text here</text>
  <attrs key1="spam" key2="DONTCARE"/>
  <children>
    <!--This is a comment-->
    <child2>child 2</child2>
    <child1>child 1</child1>
    <child3>DONTCARE</child3>
    <?spam processing instruction?>
  </children>
 </root>""",
                          """<?xml version="1.1"?>
 <root>
  <text>some text here</text>
  <text>some other text here</text>
  <attrs key1="spam" key2="DONTCARE"/>
  <children>
    <!--This is a comment-->
    <child1>child 1</child1>
    <child2>child 2</child2>
    <child3>DONTCARE</child3>
    <?spam processing instruction?>
  </children>
 </root>""",
    ]
@ -268,7 +294,7 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
    describe_examples = [
        ("/root/text[1]: XML text value mismatch: expected text value: "
-         "'some other text here'; actual value: 'mismatch text'",
+         "['some other text here']; actual value: ['mismatch text']",
         """<?xml version="1.0"?>
 <root>
  <text>some text here</text>
@ -345,5 +371,95 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
    <child3>child 3</child3>
    <child4>child 4</child4>
  </children>
 </root>""", matches_matcher),
        ("/root/children[3]: XML tag mismatch at index 0: "
         "expected tag <child1>; actual tag <child2>",
         """<?xml version="1.0"?>
 <root>
  <text>some text here</text>
  <text>some other text here</text>
  <attrs key1="spam" key2="quux"/>
  <children>
    <child2>child 2</child2>
    <child1>child 1</child1>
    <child3>child 3</child3>
  </children>
 </root>""", matches_matcher),
        ("/: XML information mismatch(version, encoding) "
         "expected version 1.0, expected encoding UTF-8; "
         "actual version 1.1, actual encoding UTF-8",
         """<?xml version="1.1"?>
 <root>
  <text>some text here</text>
  <text>some other text here</text>
  <attrs key1="spam" key2="DONTCARE"/>
  <children>
    <!--This is a comment-->
    <child1>child 1</child1>
    <child2>child 2</child2>
    <child3>DONTCARE</child3>
    <?spam processing instruction?>
  </children>
 </root>""", matches_matcher),
    ]
 class TestXMLMatchesUnorderedNodes(testtools.TestCase,
                                   helpers.TestMatchersInterface):
    matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
 <root>
  <text>some text here</text>
  <text>some other text here</text>
  <attrs key1="spam" key2="DONTCARE"/>
  <children>
    <child3>DONTCARE</child3>
    <!--This is a comment-->
    <child2>child 2</child2>
    <child1>child 1</child1>
    <?spam processing instruction?>
  </children>
 </root>""", allow_mixed_nodes=True)
    matches_matches = ["""<?xml version="1.0"?>
 <root>
  <text>some text here</text>
  <attrs key2="spam" key1="spam"/>
  <children>
    <child1>child 1</child1>
    <child2>child 2</child2>
    <child3>child 3</child3>
  </children>
  <text>some other text here</text>
 </root>""",
    ]
    matches_mismatches = ["""<?xml version="1.0"?>
 <root>
  <text>some text here</text>
  <text>mismatch text</text>
  <attrs key1="spam" key2="quux"/>
  <children>
    <child1>child 1</child1>
    <child2>child 2</child2>
    <child3>child 3</child3>
  </children>
 </root>""",
    ]
    describe_examples = [
        ("/root: XML expected child element <text> not present at index 4",
         """<?xml version="1.0"?>
 <root>
  <text>some text here</text>
  <text>mismatch text</text>
  <attrs key1="spam" key2="quux"/>
  <children>
    <child1>child 1</child1>
    <child2>child 2</child2>
    <child3>child 3</child3>
  </children>
 </root>""", matches_matcher),
    ]
    str_examples = []