Fix: Incorrect replication status on clusters

When running a replicated service in a cluster, the replication_status field in the Cluster DB table doesn't get updated in some cases, making the cluster look like it's non replicated: - When starting a new clustered replicated service cluster. - When enabling replication on an already clustered service. The reason for that is that the service and cluster creation happens on the Service's __init__ method, while the replication status isn't known until later, when the start method in the service calls the manager's init_host_with_rpc, where the manager updates the service DB entry. This patch adds an update for the replication field to the cluster table in that same place, making sure we have the right data. Closes-Bug: #1758024 Change-Id: I0d2fded62372a2fc5f49538133c5b66694dccc77
2018-03-21 18:33:26 +01:00 · 2018-03-21 18:33:26 +01:00 · 43b1209f2a
parent 1dbe0dcc03
commit 43b1209f2a
3 changed files with 52 additions and 5 deletions
--- a/cinder/tests/fake_driver.py
+++ b/cinder/tests/fake_driver.py
@ -397,3 +397,11 @@ class FakeGateDriver(lvm.LVMVolumeDriver):
            snapshot_model_updates.append(snapshot_model_update)

        return model_update, snapshot_model_updates
+
+
+class FakeHAReplicatedLoggingVolumeDriver(FakeLoggingVolumeDriver):
+    SUPPORTS_ACTIVE_ACTIVE = True
+
+    @utils.trace_method
+    def failover_completed(self, context, active_backend_id=None):
+        pass
--- a/cinder/tests/unit/volume/test_replication_manager.py
+++ b/cinder/tests/unit/volume/test_replication_manager.py
@ -663,3 +663,33 @@ class ReplicationTestCase(base.BaseVolumeTestCase):
                                          out_volumes, out_snapshots,
                                          [], [], [],
                                          self.manager.FAILBACK_SENTINEL)
+
+    @mock.patch('cinder.utils.log_unsupported_driver_warning', mock.Mock())
+    @mock.patch('cinder.utils.require_driver_initialized', mock.Mock())
+    def test_init_host_with_rpc_clustered_replication(self):
+        # These are not OVOs but ORM instances
+        cluster = utils.create_cluster(self.context)
+        service = utils.create_service(self.context,
+                                       {'cluster_name': cluster.name,
+                                        'binary': cluster.binary})
+        self.assertNotEqual(fields.ReplicationStatus.ENABLED,
+                            cluster.replication_status)
+        self.assertNotEqual(fields.ReplicationStatus.ENABLED,
+                            service.replication_status)
+
+        vol_manager = manager.VolumeManager(
+            'cinder.tests.fake_driver.FakeHAReplicatedLoggingVolumeDriver',
+            host=service.host, cluster=cluster.name)
+        vol_manager.driver = mock.Mock()
+        vol_manager.driver.get_volume_stats.return_value = {
+            'replication_enabled': True
+        }
+        vol_manager.init_host_with_rpc()
+
+        cluster_ovo = objects.Cluster.get_by_id(self.context, cluster.id)
+        service_ovo = objects.Service.get_by_id(self.context, service.id)
+
+        self.assertEqual(fields.ReplicationStatus.ENABLED,
+                         cluster_ovo.replication_status)
+        self.assertEqual(fields.ReplicationStatus.ENABLED,
+                         service_ovo.replication_status)
--- a/cinder/volume/manager.py
+++ b/cinder/volume/manager.py
@ -522,14 +522,23 @@ class VolumeManager(manager.CleanableManager,
            with excutils.save_and_reraise_exception():
                LOG.error("Service not found for updating replication_status.")

-        if service.replication_status != (
-                fields.ReplicationStatus.FAILED_OVER):
+        if service.replication_status != fields.ReplicationStatus.FAILED_OVER:
            if stats and stats.get('replication_enabled', False):
-                service.replication_status = fields.ReplicationStatus.ENABLED
+                replication_status = fields.ReplicationStatus.ENABLED
            else:
-                service.replication_status = fields.ReplicationStatus.DISABLED
+                replication_status = fields.ReplicationStatus.DISABLED
+
+            if replication_status != service.replication_status:
+                service.replication_status = replication_status
+                service.save()
+
+        # Update the cluster replication status if necessary
+        cluster = service.cluster
+        if (cluster and
+                cluster.replication_status != service.replication_status):
+            cluster.replication_status = service.replication_status
+            cluster.save()

-        service.save()
        LOG.info("Driver post RPC initialization completed successfully.",
                 resource={'type': 'driver',
                           'id': self.driver.__class__.__name__})