From b8885fc6ec3160dc858dce7a21f83ea1d7458118 Mon Sep 17 00:00:00 2001
From: Marian Horban <mhorban@mirantis.com>
Date: Fri, 2 Jan 2015 10:33:02 -0500
Subject: [PATCH] Expansion of matching XML strings logic

Extended class XMLMatcher with following features:
1. Ability to ignore ordering of XML nodes;
2. Matching version and encoding of XMLs;
3. Ability to skip empty text nodes(nodes which contain white spaces only)

Related-Bug: #1350287
Change-Id: I74b0660d2b09b3c6251ec0eabf37978fa5d03004
---
 nova/tests/unit/matchers.py      | 156 ++++++++++++++++++++++---------
 nova/tests/unit/test_matchers.py | 120 +++++++++++++++++++++++-
 2 files changed, 228 insertions(+), 48 deletions(-)

diff --git a/nova/tests/unit/matchers.py b/nova/tests/unit/matchers.py
index b392e3e852c1..a8842a970877 100644
--- a/nova/tests/unit/matchers.py
+++ b/nova/tests/unit/matchers.py
@@ -17,6 +17,7 @@
 """Matcher classes to be used inside of the testtools assertThat framework."""
 
 import pprint
+import StringIO
 
 from lxml import etree
 from testtools import content
@@ -233,6 +234,27 @@ class XMLMismatch(object):
         }
 
 
+class XMLDocInfoMismatch(XMLMismatch):
+    """XML version or encoding doesn't match."""
+
+    def __init__(self, state, expected_doc_info, actual_doc_info):
+        super(XMLDocInfoMismatch, self).__init__(state)
+        self.expected_doc_info = expected_doc_info
+        self.actual_doc_info = actual_doc_info
+
+    def describe(self):
+        return ("%(path)s: XML information mismatch(version, encoding) "
+                "expected version %(expected_version)s, "
+                "expected encoding %(expected_encoding)s; "
+                "actual version %(actual_version)s, "
+                "actual encoding %(actual_encoding)s" %
+                {'path': self.path,
+                 'expected_version': self.expected_doc_info['version'],
+                 'expected_encoding': self.expected_doc_info['encoding'],
+                 'actual_version': self.actual_doc_info['version'],
+                 'actual_encoding': self.actual_doc_info['encoding']})
+
+
 class XMLTagMismatch(XMLMismatch):
     """XML tags don't match."""
 
@@ -371,24 +393,61 @@ class XMLMatchState(object):
 class XMLMatches(object):
     """Compare XML strings.  More complete than string comparison."""
 
-    def __init__(self, expected):
+    SKIP_TAGS = (etree.Comment, etree.ProcessingInstruction)
+
+    def __init__(self, expected, allow_mixed_nodes=False,
+                 skip_empty_text_nodes=True, skip_values=('DONTCARE',)):
         self.expected_xml = expected
-        self.expected = etree.fromstring(expected)
+        self.expected = etree.parse(StringIO.StringIO(expected))
+        self.allow_mixed_nodes = allow_mixed_nodes
+        self.skip_empty_text_nodes = skip_empty_text_nodes
+        self.skip_values = set(skip_values)
 
     def __str__(self):
         return 'XMLMatches(%r)' % self.expected_xml
 
     def match(self, actual_xml):
-        actual = etree.fromstring(actual_xml)
+        actual = etree.parse(StringIO.StringIO(actual_xml))
 
         state = XMLMatchState(self.expected_xml, actual_xml)
-        result = self._compare_node(self.expected, actual, state, None)
+        expected_doc_info = self._get_xml_docinfo(self.expected)
+        actual_doc_info = self._get_xml_docinfo(actual)
+        if expected_doc_info != actual_doc_info:
+            return XMLDocInfoMismatch(state, expected_doc_info,
+                                      actual_doc_info)
+        result = self._compare_node(self.expected.getroot(),
+                                    actual.getroot(), state, None)
 
         if result is False:
             return XMLMismatch(state)
         elif result is not True:
             return result
 
+    @staticmethod
+    def _get_xml_docinfo(xml_document):
+        return {'version': xml_document.docinfo.xml_version,
+                'encoding': xml_document.docinfo.encoding}
+
+    def _compare_text_nodes(self, expected, actual, state):
+        expected_text = [expected.text]
+        expected_text.extend(child.tail for child in expected)
+        actual_text = [actual.text]
+        actual_text.extend(child.tail for child in actual)
+        if self.skip_empty_text_nodes:
+            expected_text = [text for text in expected_text
+                             if text and not text.isspace()]
+            actual_text = [text for text in actual_text
+                           if text and not text.isspace()]
+        if self.skip_values.intersection(
+                        expected_text + actual_text):
+            return
+        if self.allow_mixed_nodes:
+            # lets sort text nodes because they can be mixed
+            expected_text = sorted(expected_text)
+            actual_text = sorted(actual_text)
+        if expected_text != actual_text:
+            return XMLTextValueMismatch(state, expected_text, actual_text)
+
     def _compare_node(self, expected, actual, state, idx):
         """Recursively compares nodes within the XML tree."""
 
@@ -410,57 +469,62 @@ class XMLMatches(object):
                 expected_value = expected.attrib[key]
                 actual_value = actual.attrib[key]
 
-                if 'DONTCARE' in (expected_value, actual_value):
+                if self.skip_values.intersection(
+                        [expected_value, actual_value]):
                     continue
                 elif expected_value != actual_value:
                     return XMLAttrValueMismatch(state, key, expected_value,
                                                 actual_value)
 
+            # Compare text nodes
+            text_nodes_mismatch = self._compare_text_nodes(
+                expected, actual, state)
+            if text_nodes_mismatch:
+                return text_nodes_mismatch
+
             # Compare the contents of the node
-            if len(expected) == 0 and len(actual) == 0:
-                # No children, compare text values
-                if ('DONTCARE' not in (expected.text, actual.text) and
-                        expected.text != actual.text):
-                    return XMLTextValueMismatch(state, expected.text,
-                                                actual.text)
-            else:
-                expected_idx = 0
-                actual_idx = 0
-                while (expected_idx < len(expected) and
-                       actual_idx < len(actual)):
-                    # Ignore comments and processing instructions
-                    # TODO(Vek): may interpret PIs in the future, to
-                    # allow for, say, arbitrary ordering of some
-                    # elements
-                    if (expected[expected_idx].tag in
-                            (etree.Comment, etree.ProcessingInstruction)):
-                        expected_idx += 1
+            matched_actual_child_idxs = set()
+            # first_actual_child_idx - pointer to next actual child
+            # used with allow_mixed_nodes=False ONLY
+            # prevent to visit actual child nodes twice
+            first_actual_child_idx = 0
+            for expected_child in expected:
+                if expected_child.tag in self.SKIP_TAGS:
+                    continue
+                related_actual_child_idx = None
+
+                if self.allow_mixed_nodes:
+                    first_actual_child_idx = 0
+                for actual_child_idx in range(
+                        first_actual_child_idx, len(actual)):
+                    if actual[actual_child_idx].tag in self.SKIP_TAGS:
+                        first_actual_child_idx += 1
+                        continue
+                    if actual_child_idx in matched_actual_child_idxs:
                         continue
-
                     # Compare the nodes
-                    result = self._compare_node(expected[expected_idx],
-                                                actual[actual_idx], state,
-                                                actual_idx)
+                    result = self._compare_node(expected_child,
+                                                actual[actual_child_idx],
+                                                state, actual_child_idx)
+                    first_actual_child_idx += 1
                     if result is not True:
-                        return result
-
-                    # Step on to comparing the next nodes...
-                    expected_idx += 1
-                    actual_idx += 1
-
-                # Make sure we consumed all nodes in actual
-                if actual_idx < len(actual):
-                    return XMLUnexpectedChild(state, actual[actual_idx].tag,
-                                              actual_idx)
-
-                # Make sure we consumed all nodes in expected
-                if expected_idx < len(expected):
-                    for node in expected[expected_idx:]:
-                        if (node.tag in
-                                (etree.Comment, etree.ProcessingInstruction)):
+                        if self.allow_mixed_nodes:
                             continue
-
-                        return XMLExpectedChild(state, node.tag, actual_idx)
-
+                        else:
+                            return result
+                    else:  # nodes match
+                        related_actual_child_idx = actual_child_idx
+                        break
+                if related_actual_child_idx is not None:
+                    matched_actual_child_idxs.add(actual_child_idx)
+                else:
+                    return XMLExpectedChild(state, expected_child.tag,
+                                            actual_child_idx + 1)
+            # Make sure we consumed all nodes in actual
+            for actual_child_idx, actual_child in enumerate(actual):
+                if (actual_child.tag not in self.SKIP_TAGS and
+                        actual_child_idx not in matched_actual_child_idxs):
+                    return XMLUnexpectedChild(state, actual_child.tag,
+                                              actual_child_idx)
         # The nodes match
         return True
diff --git a/nova/tests/unit/test_matchers.py b/nova/tests/unit/test_matchers.py
index 77fefafca86c..4aab81b1f15d 100644
--- a/nova/tests/unit/test_matchers.py
+++ b/nova/tests/unit/test_matchers.py
@@ -156,7 +156,7 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
     <child3>DONTCARE</child3>
     <?spam processing instruction?>
   </children>
-</root>""")
+</root>""", allow_mixed_nodes=False)
 
     matches_matches = ["""<?xml version="1.0"?>
 <root>
@@ -247,6 +247,32 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
     <child3>child 3</child3>
     <child4>child 4</child4>
   </children>
+</root>""",
+                          """<?xml version="1.0"?>
+<root>
+  <text>some text here</text>
+  <text>some other text here</text>
+  <attrs key1="spam" key2="DONTCARE"/>
+  <children>
+    <!--This is a comment-->
+    <child2>child 2</child2>
+    <child1>child 1</child1>
+    <child3>DONTCARE</child3>
+    <?spam processing instruction?>
+  </children>
+</root>""",
+                          """<?xml version="1.1"?>
+<root>
+  <text>some text here</text>
+  <text>some other text here</text>
+  <attrs key1="spam" key2="DONTCARE"/>
+  <children>
+    <!--This is a comment-->
+    <child1>child 1</child1>
+    <child2>child 2</child2>
+    <child3>DONTCARE</child3>
+    <?spam processing instruction?>
+  </children>
 </root>""",
     ]
 
@@ -268,7 +294,7 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
 
     describe_examples = [
         ("/root/text[1]: XML text value mismatch: expected text value: "
-         "'some other text here'; actual value: 'mismatch text'",
+         "['some other text here']; actual value: ['mismatch text']",
          """<?xml version="1.0"?>
 <root>
   <text>some text here</text>
@@ -345,5 +371,95 @@ class TestXMLMatches(testtools.TestCase, helpers.TestMatchersInterface):
     <child3>child 3</child3>
     <child4>child 4</child4>
   </children>
+</root>""", matches_matcher),
+        ("/root/children[3]: XML tag mismatch at index 0: "
+         "expected tag <child1>; actual tag <child2>",
+         """<?xml version="1.0"?>
+<root>
+  <text>some text here</text>
+  <text>some other text here</text>
+  <attrs key1="spam" key2="quux"/>
+  <children>
+    <child2>child 2</child2>
+    <child1>child 1</child1>
+    <child3>child 3</child3>
+  </children>
+</root>""", matches_matcher),
+        ("/: XML information mismatch(version, encoding) "
+         "expected version 1.0, expected encoding UTF-8; "
+         "actual version 1.1, actual encoding UTF-8",
+         """<?xml version="1.1"?>
+<root>
+  <text>some text here</text>
+  <text>some other text here</text>
+  <attrs key1="spam" key2="DONTCARE"/>
+  <children>
+    <!--This is a comment-->
+    <child1>child 1</child1>
+    <child2>child 2</child2>
+    <child3>DONTCARE</child3>
+    <?spam processing instruction?>
+  </children>
 </root>""", matches_matcher),
     ]
+
+
+class TestXMLMatchesUnorderedNodes(testtools.TestCase,
+                                   helpers.TestMatchersInterface):
+
+    matches_matcher = matchers.XMLMatches("""<?xml version="1.0"?>
+<root>
+  <text>some text here</text>
+  <text>some other text here</text>
+  <attrs key1="spam" key2="DONTCARE"/>
+  <children>
+    <child3>DONTCARE</child3>
+    <!--This is a comment-->
+    <child2>child 2</child2>
+    <child1>child 1</child1>
+    <?spam processing instruction?>
+  </children>
+</root>""", allow_mixed_nodes=True)
+
+    matches_matches = ["""<?xml version="1.0"?>
+<root>
+  <text>some text here</text>
+  <attrs key2="spam" key1="spam"/>
+  <children>
+    <child1>child 1</child1>
+    <child2>child 2</child2>
+    <child3>child 3</child3>
+  </children>
+  <text>some other text here</text>
+</root>""",
+    ]
+
+    matches_mismatches = ["""<?xml version="1.0"?>
+<root>
+  <text>some text here</text>
+  <text>mismatch text</text>
+  <attrs key1="spam" key2="quux"/>
+  <children>
+    <child1>child 1</child1>
+    <child2>child 2</child2>
+    <child3>child 3</child3>
+  </children>
+</root>""",
+    ]
+
+    describe_examples = [
+        ("/root: XML expected child element <text> not present at index 4",
+         """<?xml version="1.0"?>
+<root>
+  <text>some text here</text>
+  <text>mismatch text</text>
+  <attrs key1="spam" key2="quux"/>
+  <children>
+    <child1>child 1</child1>
+    <child2>child 2</child2>
+    <child3>child 3</child3>
+  </children>
+</root>""", matches_matcher),
+    ]
+
+    str_examples = []