From 0db593d86c2566ee24af8f29b9f2cb2b805dc682 Mon Sep 17 00:00:00 2001 From: wangwei Date: Thu, 8 Nov 2018 16:47:57 +0900 Subject: [PATCH] Fix the problem of the scheduler status During the startup process of cinder-scheduler, the time corresponding to the periodic_interval parameter will be sleeped. If the value of this parameter is large, such as one hour, then during this period the cinder-scheduler is always down and the volume cannot be created. This patch fixes this problem. Change-Id: I932a725c1665add590f09fa8d26e84b79b06e159 Closes-bug: #1802249 --- cinder/scheduler/driver.py | 9 ++ cinder/scheduler/host_manager.py | 14 ++++ cinder/scheduler/manager.py | 5 +- .../tests/unit/scheduler/test_host_manager.py | 83 ++++++++++++++++++- cinder/tests/unit/scheduler/test_scheduler.py | 11 ++- 5 files changed, 116 insertions(+), 6 deletions(-) diff --git a/cinder/scheduler/driver.py b/cinder/scheduler/driver.py index 293ed953901..8a168b81eed 100644 --- a/cinder/scheduler/driver.py +++ b/cinder/scheduler/driver.py @@ -91,6 +91,15 @@ class Scheduler(object): return self.host_manager.has_all_capabilities() + def is_first_receive(self): + """Returns True if Scheduler receives the capabilities at startup. + + This is to handle the problem of too long sleep time during scheduler + service startup process. + """ + + return self.host_manager.first_receive_capabilities() + def update_service_capabilities(self, service_name, host, capabilities, cluster_name, timestamp): """Process a capability update from a service node.""" diff --git a/cinder/scheduler/host_manager.py b/cinder/scheduler/host_manager.py index 725707623bd..ba005bb5844 100644 --- a/cinder/scheduler/host_manager.py +++ b/cinder/scheduler/host_manager.py @@ -509,6 +509,10 @@ class HostManager(object): {'service_name': service_name, 'host': host}) return + # Determine whether HostManager has just completed initialization, and + # has not received the rpc message returned by volume. + just_init = self._is_just_initialized() + # TODO(geguileo): In P - Remove the next line since we receive the # timestamp timestamp = timestamp or timeutils.utcnow() @@ -550,6 +554,8 @@ class HostManager(object): 'cluster': cluster_msg}) self._no_capabilities_backends.discard(backend) + if just_init: + self._update_backend_state_map(cinder_context.get_admin_context()) def notify_service_capabilities(self, service_name, backend, capabilities, timestamp): @@ -584,6 +590,14 @@ class HostManager(object): def has_all_capabilities(self): return len(self._no_capabilities_backends) == 0 + def _is_just_initialized(self): + return not self.service_states_last_update + + def first_receive_capabilities(self): + return (not self._is_just_initialized() and + len(set(self.backend_state_map)) > 0 and + len(self._no_capabilities_backends) == 0) + def _update_backend_state_map(self, context): # Get resource usage across the available volume nodes: diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 9ee445fbb31..193cdda3630 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -104,7 +104,10 @@ class SchedulerManager(manager.CleanableManager, manager.Manager): ctxt = context.get_admin_context() self.request_service_capabilities(ctxt) - eventlet.sleep(CONF.periodic_interval) + for __ in range(CONF.periodic_interval): + if self.driver.is_first_receive(): + break + eventlet.sleep(1) self._startup_delay = False def reset(self): diff --git a/cinder/tests/unit/scheduler/test_host_manager.py b/cinder/tests/unit/scheduler/test_host_manager.py index 7dd19c75bb3..37c198b663d 100644 --- a/cinder/tests/unit/scheduler/test_host_manager.py +++ b/cinder/tests/unit/scheduler/test_host_manager.py @@ -111,12 +111,16 @@ class HostManagerTestCase(test.TestCase): self.assertEqual(expected, mock_func.call_args_list) self.assertEqual(set(self.fake_backends), set(result)) + @mock.patch( + 'cinder.scheduler.host_manager.HostManager._is_just_initialized') @mock.patch('cinder.scheduler.host_manager.HostManager._get_updated_pools') @mock.patch('oslo_utils.timeutils.utcnow') def test_update_service_capabilities(self, _mock_utcnow, - _mock_get_updated_pools): + _mock_get_updated_pools, + _mock_is_just_initialized): service_states = self.host_manager.service_states self.assertDictEqual({}, service_states) + _mock_is_just_initialized.return_value = False _mock_utcnow.side_effect = [31338, 31339] _mock_get_updated_pools.return_value = [] @@ -156,14 +160,18 @@ class HostManagerTestCase(test.TestCase): 'host3': host3_volume_capabs} self.assertDictEqual(expected, service_states) + @mock.patch( + 'cinder.scheduler.host_manager.HostManager._is_just_initialized') @mock.patch( 'cinder.scheduler.host_manager.HostManager.get_usage_and_notify') @mock.patch('oslo_utils.timeutils.utcnow') def test_update_and_notify_service_capabilities_case1( self, _mock_utcnow, - _mock_get_usage_and_notify): + _mock_get_usage_and_notify, + _mock_is_just_initialized): _mock_utcnow.side_effect = [31337, 31338, 31339] + _mock_is_just_initialized.return_value = False service_name = 'volume' capab1 = {'pools': [{ @@ -207,14 +215,18 @@ class HostManagerTestCase(test.TestCase): self.assertDictEqual(dict(dict(timestamp=31339), **capab1), self.host_manager_1.service_states['host1']) + @mock.patch( + 'cinder.scheduler.host_manager.HostManager._is_just_initialized') @mock.patch( 'cinder.scheduler.host_manager.HostManager.get_usage_and_notify') @mock.patch('oslo_utils.timeutils.utcnow') def test_update_and_notify_service_capabilities_case2( self, _mock_utcnow, - _mock_get_usage_and_notify): + _mock_get_usage_and_notify, + _mock_is_just_initialized): _mock_utcnow.side_effect = [31340, 31341, 31342] + _mock_is_just_initialized.return_value = False service_name = 'volume' @@ -546,6 +558,71 @@ class HostManagerTestCase(test.TestCase): None, timestamp) self.assertTrue(self.host_manager.has_all_capabilities()) + @mock.patch('cinder.objects.service.Service.is_up', + new_callable=mock.PropertyMock) + @mock.patch('cinder.db.service_get_all') + def test_first_receive_capabilities_case1(self, _mock_service_get_all, + _mock_service_is_up): + # No volume service startup + self.assertFalse(self.host_manager.first_receive_capabilities()) + services = [ + dict(id=1, host='host1', topic='volume', disabled=False, + availability_zone='zone1', updated_at=timeutils.utcnow(), + uuid='06acda71-b3b4-4f1b-8d87-db5c47e7ebd2', ) + ] + _mock_service_get_all.return_value = services + _mock_service_is_up.return_value = True + + timestamp = jsonutils.to_primitive(datetime.utcnow()) + host1_volume_capabs = dict(free_capacity_gb=4321) + + service_name = 'volume' + self.host_manager.update_service_capabilities(service_name, 'host1', + host1_volume_capabs, + None, timestamp) + self.assertTrue(self.host_manager.first_receive_capabilities()) + + @mock.patch('cinder.objects.service.Service.is_up', + new_callable=mock.PropertyMock) + @mock.patch('cinder.db.service_get_all') + def test_first_receive_capabilities_case2(self, _mock_service_get_all, + _mock_service_is_up): + _mock_service_is_up.return_value = True + services = [ + dict(id=1, host='host1', topic='volume', disabled=False, + availability_zone='zone1', updated_at=timeutils.utcnow(), + uuid='36ede0e2-1b3c-41b0-9cd3-66e1f56dc959'), + dict(id=2, host='host2', topic='volume', disabled=False, + availability_zone='zone1', updated_at=timeutils.utcnow(), + uuid='b124e8dc-bf5f-4923-802d-27153ac7fe56'), + dict(id=3, host='host3', topic='volume', disabled=False, + availability_zone='zone1', updated_at=timeutils.utcnow(), + uuid='4d0b1c5e-ce3c-424e-b2f4-a09a0f54d328'), + ] + _mock_service_get_all.return_value = services + # Create host_manager again to let db.service_get_all mock run + self.host_manager = host_manager.HostManager() + self.assertFalse(self.host_manager.first_receive_capabilities()) + + timestamp = jsonutils.to_primitive(datetime.utcnow()) + host1_volume_capabs = dict(free_capacity_gb=4321) + host2_volume_capabs = dict(free_capacity_gb=5432) + host3_volume_capabs = dict(free_capacity_gb=6543) + + service_name = 'volume' + self.host_manager.update_service_capabilities(service_name, 'host1', + host1_volume_capabs, + None, timestamp) + self.assertFalse(self.host_manager.first_receive_capabilities()) + self.host_manager.update_service_capabilities(service_name, 'host2', + host2_volume_capabs, + None, timestamp) + self.assertFalse(self.host_manager.first_receive_capabilities()) + self.host_manager.update_service_capabilities(service_name, 'host3', + host3_volume_capabs, + None, timestamp) + self.assertTrue(self.host_manager.first_receive_capabilities()) + @mock.patch('cinder.db.service_get_all') @mock.patch('cinder.objects.service.Service.is_up', new_callable=mock.PropertyMock) diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index ee167923e2a..3427c4c6ffc 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -66,13 +66,20 @@ class SchedulerManagerTestCase(test.TestCase): manager = self.manager self.assertIsInstance(manager.driver, self.driver_cls) + @mock.patch('cinder.scheduler.driver.Scheduler.is_first_receive') @mock.patch('eventlet.sleep') @mock.patch('cinder.volume.rpcapi.VolumeAPI.publish_service_capabilities') - def test_init_host_with_rpc(self, publish_capabilities_mock, sleep_mock): + def test_init_host_with_rpc_delay_after_3_tries(self, + publish_capabilities_mock, + sleep_mock, + is_first_receive_mock): self.manager._startup_delay = True + is_first_receive_mock.side_effect = [False, False, True] self.manager.init_host_with_rpc() publish_capabilities_mock.assert_called_once_with(mock.ANY) - sleep_mock.assert_called_once_with(CONF.periodic_interval) + calls = [mock.call(1)] * 2 + sleep_mock.assert_has_calls(calls) + self.assertEqual(2, sleep_mock.call_count) self.assertFalse(self.manager._startup_delay) @mock.patch('cinder.scheduler.driver.Scheduler.backend_passes_filters')