Add probetest for response with duplicate frags

When an SSYNC connection fails after shipping a fragment, but before cleaning itself up - it can lead to multiple replicas of the same fragment index serviceable in the node_iter at the same time. Or perhaps more likely if a partner nodes win a race to rebuild waiting on a handoff. In this case, the proxy need not fail to service a GET just because of extra information. A similar guard was added to the reconstructor in a related change [1]. This underlying bug is acctually fixed by the related change [2], this probetest is just encoding the failure to help with future maintenance. 1. Related-Change: I91f3d4af52cbc66c9f7ce00726f247b5462e66f9 2. Related-Change: I2310981fd1c4622ff5d1a739cbcc59637ffe3fc3 Closes-Bug: 1624176 Change-Id: I06fc381a25613585dd18916d50e7ca2c68d406b6
2016-09-16 11:58:46 -07:00 · 2016-09-16 11:58:46 -07:00 · 116462d8a5
commit 116462d8a5
parent ebcc3ac8fa
1 changed files with 121 additions and 1 deletions
--- a/test/probe/test_object_handoff.py
+++ b/test/probe/test_object_handoff.py
@ -14,12 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import print_function
+
 from unittest import main
 from uuid import uuid4
 import random
 from hashlib import md5
 from collections import defaultdict
 import os
+import socket
+import errno

 from swiftclient import client

@ -243,7 +247,7 @@ class TestObjectHandoff(ReplProbeTest):
            self.fail("Expected ClientException but didn't get it")


-class TestECObjectHandoffOverwrite(ECProbeTest):
+class TestECObjectHandoff(ECProbeTest):

    def get_object(self, container_name, object_name):
        headers, body = client.get_object(self.url, self.token,
@ -340,5 +344,121 @@ class TestECObjectHandoffOverwrite(ECProbeTest):
        resp_etag = self.get_object(container_name, object_name)
        self.assertEqual(resp_etag, new_contents.etag)

+    def _check_nodes(self, opart, onodes, container_name, object_name):
+        found_frags = defaultdict(int)
+        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
+        for node in onodes + list(self.object_ring.get_more_nodes(opart)):
+            try:
+                headers = direct_client.direct_head_object(
+                    node, opart, self.account, container_name,
+                    object_name, headers=req_headers)
+            except socket.error as e:
+                if e.errno != errno.ECONNREFUSED:
+                    raise
+            except direct_client.DirectClientException as e:
+                if e.http_status != 404:
+                    raise
+            else:
+                found_frags[headers['X-Object-Sysmeta-Ec-Frag-Index']] += 1
+        return found_frags
+
+    def test_ec_handoff_duplicate_available(self):
+        container_name = 'container-%s' % uuid4()
+        object_name = 'object-%s' % uuid4()
+
+        # create EC container
+        headers = {'X-Storage-Policy': self.policy.name}
+        client.put_container(self.url, self.token, container_name,
+                             headers=headers)
+
+        # get our node lists
+        opart, onodes = self.object_ring.get_nodes(
+            self.account, container_name, object_name)
+
+        # find both primary servers that have both of their devices in
+        # the primary node list
+        group_nodes_by_config = defaultdict(list)
+        for n in onodes:
+            group_nodes_by_config[self.config_number(n)].append(n)
+        double_disk_primary = []
+        for config_number, node_list in group_nodes_by_config.items():
+            if len(node_list) > 1:
+                double_disk_primary.append((config_number, node_list))
+
+        # sanity, in a 4+2 with 8 disks two servers will be doubled
+        self.assertEqual(len(double_disk_primary), 2)
+
+        # shutdown the first double primary
+        primary0_config_number, primary0_node_list = double_disk_primary[0]
+        Manager(['object-server']).stop(number=primary0_config_number)
+
+        # PUT object
+        contents = Body()
+        client.put_object(self.url, self.token, container_name,
+                          object_name, contents=contents)
+
+        # sanity fetch two frags on handoffs
+        handoff_frags = []
+        for node in self.object_ring.get_more_nodes(opart):
+            headers, data = direct_client.direct_get_object(
+                node, opart, self.account, container_name, object_name,
+                headers={'X-Backend-Storage-Policy-Index': int(self.policy)}
+            )
+            handoff_frags.append((node, headers, data))
+
+        # bring the first double primary back, and fail the other one
+        Manager(['object-server']).start(number=primary0_config_number)
+        primary1_config_number, primary1_node_list = double_disk_primary[1]
+        Manager(['object-server']).stop(number=primary1_config_number)
+
+        # we can still GET the object
+        resp_etag = self.get_object(container_name, object_name)
+        self.assertEqual(resp_etag, contents.etag)
+
+        # now start to "revert" the first handoff frag
+        node = primary0_node_list[0]
+        handoff_node, headers, data = handoff_frags[0]
+        # N.B. object server api returns quoted ETag
+        headers['ETag'] = headers['Etag'].strip('"')
+        headers['X-Backend-Storage-Policy-Index'] = int(self.policy)
+        direct_client.direct_put_object(
+            node, opart,
+            self.account, container_name, object_name,
+            contents=data, headers=headers)
+
+        # sanity - check available frags
+        frag2count = self._check_nodes(opart, onodes,
+                                       container_name, object_name)
+        # ... five frags total
+        self.assertEqual(sum(frag2count.values()), 5)
+        # ... only 4 unique indexes
+        self.assertEqual(len(frag2count), 4)
+
+        # we can still GET the object
+        resp_etag = self.get_object(container_name, object_name)
+        self.assertEqual(resp_etag, contents.etag)
+
+        # ... but we need both handoffs or we get a error
+        for handoff_node, hdrs, data in handoff_frags:
+            Manager(['object-server']).stop(
+                number=self.config_number(handoff_node))
+            with self.assertRaises(Exception) as cm:
+                self.get_object(container_name, object_name)
+            self.assertIn(cm.exception.http_status, (404, 503))
+            Manager(['object-server']).start(
+                number=self.config_number(handoff_node))
+
+        # fix everything
+        Manager(['object-server']).start(number=primary1_config_number)
+        Manager(["object-reconstructor"]).once()
+
+        # sanity - check available frags
+        frag2count = self._check_nodes(opart, onodes,
+                                       container_name, object_name)
+        # ... six frags total
+        self.assertEqual(sum(frag2count.values()), 6)
+        # ... all six unique
+        self.assertEqual(len(frag2count), 6)
+
 if __name__ == '__main__':
    main()