diff --git a/doc/source/cli/nova-manage.rst b/doc/source/cli/nova-manage.rst index 53adecb7c9e9..e0d1c2bd09c5 100644 --- a/doc/source/cli/nova-manage.rst +++ b/doc/source/cli/nova-manage.rst @@ -259,17 +259,33 @@ Nova Cells v2 ``nova-manage cell_v2 discover_hosts [--cell_uuid ] [--verbose] [--strict] [--by-service]`` Searches cells, or a single cell, and maps found hosts. This command will check the database for each cell (or a single one if passed in) and map any - hosts which are not currently mapped. If a host is already mapped nothing - will be done. You need to re-run this command each time you add more + hosts which are not currently mapped. If a host is already mapped, nothing + will be done. You need to re-run this command each time you add a batch of compute hosts to a cell (otherwise the scheduler will never place instances - there and the API will not list the new hosts). If the strict option is - provided the command will only be considered successful if an unmapped host - is discovered (exit code 0). Any other case is considered a failure (exit - code 1). If --by-service is specified, this command will look in the - appropriate cell(s) for any nova-compute services and ensure there are host - mappings for them. This is less efficient and is only necessary when using - compute drivers that may manage zero or more actual compute nodes at any - given time (currently only ironic). + there and the API will not list the new hosts). If --strict is specified, + the command will only return 0 if an unmapped host was discovered and + mapped successfully. If --by-service is specified, this command will look + in the appropriate cell(s) for any nova-compute services and ensure there + are host mappings for them. This is less efficient and is only necessary + when using compute drivers that may manage zero or more actual compute + nodes at any given time (currently only ironic). + + This command should be run once after all compute hosts have been deployed + and should not be run in parallel. When run in parallel, the commands will + collide with each other trying to map the same hosts in the database at the + same time. + + The meaning of the various exit codes returned by this command are + explained below: + + * Returns 0 if hosts were successfully mapped or no hosts needed to be + mapped. If --strict is specified, returns 0 only if an unmapped host was + discovered and mapped. + * Returns 1 if --strict is specified and no unmapped hosts were found. + Also returns 1 if an exception was raised while running. + * Returns 2 if the command aborted because of a duplicate host mapping + found. This means the command collided with another running + discover_hosts command or scheduler periodic task and is safe to retry. ``nova-manage cell_v2 list_cells [--verbose]`` By default the cell name, uuid, disabled state, masked transport URL and diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 0f9d55917756..441ca2646db5 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -1281,7 +1281,7 @@ class CellV2Commands(object): @args('--strict', action='store_true', help=_('Considered successful (exit code 0) only when an unmapped ' 'host is discovered. Any other outcome will be considered a ' - 'failure (exit code 1).')) + 'failure (non-zero exit code).')) @args('--by-service', action='store_true', default=False, dest='by_service', help=_('Discover hosts by service instead of compute node')) @@ -1293,14 +1293,28 @@ class CellV2Commands(object): to the db it's configured to use. This command will check the db for each cell, or a single one if passed in, and map any hosts which are not currently mapped. If a host is already mapped nothing will be done. + + This command should be run once after all compute hosts have been + deployed and should not be run in parallel. When run in parallel, + the commands will collide with each other trying to map the same hosts + in the database at the same time. """ def status_fn(msg): if verbose: print(msg) ctxt = context.RequestContext() - hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn, - by_service) + try: + hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn, + by_service) + except exception.HostMappingExists as exp: + print(_('ERROR: Duplicate host mapping was encountered. This ' + 'command should be run once after all compute hosts have ' + 'been deployed and should not be run in parallel. When ' + 'run in parallel, the commands will collide with each ' + 'other trying to map the same hosts in the database at ' + 'the same time. Error: %s') % exp) + return 2 # discover_hosts will return an empty list if no hosts are discovered if strict: return int(not hosts) diff --git a/nova/exception.py b/nova/exception.py index 227e865fd688..9d521d2ba23c 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2089,6 +2089,10 @@ class HostMappingNotFound(Invalid): msg_fmt = _("Host '%(name)s' is not mapped to any cell") +class HostMappingExists(Invalid): + msg_fmt = _("Host '%(name)s' mapping already exists") + + class RealtimeConfigurationInvalid(Invalid): msg_fmt = _("Cannot set realtime policy in a non dedicated " "cpu pinning policy") diff --git a/nova/objects/host_mapping.py b/nova/objects/host_mapping.py index 04b4162491a9..bde342302b5b 100644 --- a/nova/objects/host_mapping.py +++ b/nova/objects/host_mapping.py @@ -10,6 +10,7 @@ # License for the specific language governing permissions and limitations # under the License. +from oslo_db import exception as db_exc from sqlalchemy.orm import joinedload from nova import context @@ -175,6 +176,13 @@ class HostMappingList(base.ObjectListBase, base.NovaObject): return base.obj_make_list(context, cls(), HostMapping, db_mappings) +def _create_host_mapping(host_mapping): + try: + host_mapping.create() + except db_exc.DBDuplicateEntry: + raise exception.HostMappingExists(name=host_mapping.host) + + def _check_and_create_node_host_mappings(ctxt, cm, compute_nodes, status_fn): host_mappings = [] for compute in compute_nodes: @@ -190,7 +198,7 @@ def _check_and_create_node_host_mappings(ctxt, cm, compute_nodes, status_fn): host_mapping = HostMapping( ctxt, host=compute.host, cell_mapping=cm) - host_mapping.create() + _create_host_mapping(host_mapping) host_mappings.append(host_mapping) compute.mapped = 1 compute.save() @@ -208,7 +216,7 @@ def _check_and_create_service_host_mappings(ctxt, cm, services, status_fn): host_mapping = HostMapping( ctxt, host=service.host, cell_mapping=cm) - host_mapping.create() + _create_host_mapping(host_mapping) host_mappings.append(host_mapping) return host_mappings diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index dc6644f2bb24..67dcf3fc6bc8 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -25,6 +25,7 @@ from oslo_log import log as logging import oslo_messaging as messaging from oslo_serialization import jsonutils from oslo_service import periodic_task +import six from stevedore import driver import nova.conf @@ -44,6 +45,8 @@ CONF = nova.conf.CONF QUOTAS = quota.QUOTAS +HOST_MAPPING_EXISTS_WARNING = False + class SchedulerManager(manager.Manager): """Chooses a host to run instances on.""" @@ -67,13 +70,24 @@ class SchedulerManager(manager.Manager): spacing=CONF.scheduler.discover_hosts_in_cells_interval, run_immediately=True) def _discover_hosts_in_cells(self, context): - host_mappings = host_mapping_obj.discover_hosts(context) - if host_mappings: - LOG.info('Discovered %(count)i new hosts: %(hosts)s', - {'count': len(host_mappings), - 'hosts': ','.join(['%s:%s' % (hm.cell_mapping.name, - hm.host) - for hm in host_mappings])}) + global HOST_MAPPING_EXISTS_WARNING + try: + host_mappings = host_mapping_obj.discover_hosts(context) + if host_mappings: + LOG.info('Discovered %(count)i new hosts: %(hosts)s', + {'count': len(host_mappings), + 'hosts': ','.join(['%s:%s' % (hm.cell_mapping.name, + hm.host) + for hm in host_mappings])}) + except exception.HostMappingExists as exp: + msg = ('This periodic task should only be enabled on a single ' + 'scheduler to prevent collisions between multiple ' + 'schedulers: %s' % six.text_type(exp)) + if not HOST_MAPPING_EXISTS_WARNING: + LOG.warning(msg) + HOST_MAPPING_EXISTS_WARNING = True + else: + LOG.debug(msg) @periodic_task.periodic_task(spacing=CONF.scheduler.periodic_task_interval, run_immediately=True) diff --git a/nova/tests/unit/objects/test_host_mapping.py b/nova/tests/unit/objects/test_host_mapping.py index f82970806aa3..7cbd9ae9c90a 100644 --- a/nova/tests/unit/objects/test_host_mapping.py +++ b/nova/tests/unit/objects/test_host_mapping.py @@ -11,7 +11,9 @@ # under the License. import mock +from oslo_db import exception as db_exc from oslo_utils.fixture import uuidsentinel as uuids +import six from nova import context from nova import exception @@ -310,3 +312,41 @@ Creating host mapping for service host1 Found 1 unmapped computes in cell: %(cell)s""" % {'cell': uuids.cell1} self.assertEqual(expected, '\n'.join(lines)) + + @mock.patch('nova.objects.CellMappingList.get_all') + @mock.patch('nova.objects.HostMapping.create') + @mock.patch('nova.objects.HostMapping.get_by_host') + @mock.patch('nova.objects.ComputeNodeList.get_all_by_not_mapped') + def test_discover_hosts_duplicate(self, mock_cn_get, mock_hm_get, + mock_hm_create, mock_cm): + mock_cm.return_value = [objects.CellMapping(name='foo', + uuid=uuids.cm)] + mock_cn_get.return_value = [objects.ComputeNode(host='bar', + uuid=uuids.cn)] + mock_hm_get.side_effect = exception.HostMappingNotFound(name='bar') + mock_hm_create.side_effect = db_exc.DBDuplicateEntry() + + ctxt = context.get_admin_context() + exp = self.assertRaises(exception.HostMappingExists, + host_mapping.discover_hosts, ctxt) + expected = "Host 'bar' mapping already exists" + self.assertIn(expected, six.text_type(exp)) + + @mock.patch('nova.objects.CellMappingList.get_all') + @mock.patch('nova.objects.HostMapping.get_by_host') + @mock.patch('nova.objects.HostMapping.create') + @mock.patch('nova.objects.ServiceList.get_by_binary') + def test_discover_services_duplicate(self, mock_srv, mock_hm_create, + mock_hm_get, mock_cm): + mock_cm.return_value = [objects.CellMapping(name='foo', + uuid=uuids.cm)] + mock_srv.return_value = [objects.Service(host='bar')] + mock_hm_get.side_effect = exception.HostMappingNotFound(name='bar') + mock_hm_create.side_effect = db_exc.DBDuplicateEntry() + + ctxt = context.get_admin_context() + exp = self.assertRaises(exception.HostMappingExists, + host_mapping.discover_hosts, ctxt, + by_service=True) + expected = "Host 'bar' mapping already exists" + self.assertIn(expected, six.text_type(exp)) diff --git a/nova/tests/unit/scheduler/test_scheduler.py b/nova/tests/unit/scheduler/test_scheduler.py index 513ce28598c8..29351db3db70 100644 --- a/nova/tests/unit/scheduler/test_scheduler.py +++ b/nova/tests/unit/scheduler/test_scheduler.py @@ -22,6 +22,7 @@ import oslo_messaging as messaging from oslo_utils.fixture import uuidsentinel as uuids from nova import context +from nova import exception from nova import objects from nova.scheduler import filter_scheduler from nova.scheduler import host_manager @@ -335,6 +336,27 @@ class SchedulerManagerTestCase(test.NoDBTestCase): cell_mapping=cm2)] self.manager._discover_hosts_in_cells(mock.sentinel.context) + @mock.patch('nova.scheduler.manager.LOG.debug') + @mock.patch('nova.scheduler.manager.LOG.warning') + @mock.patch('nova.objects.host_mapping.discover_hosts') + def test_discover_hosts_duplicate_host_mapping(self, mock_discover, + mock_log_warning, + mock_log_debug): + # This tests the scenario of multiple schedulers running discover_hosts + # at the same time. + mock_discover.side_effect = exception.HostMappingExists(name='a') + self.manager._discover_hosts_in_cells(mock.sentinel.context) + msg = ("This periodic task should only be enabled on a single " + "scheduler to prevent collisions between multiple " + "schedulers: Host 'a' mapping already exists") + mock_log_warning.assert_called_once_with(msg) + mock_log_debug.assert_not_called() + # Second collision should log at debug, not warning. + mock_log_warning.reset_mock() + self.manager._discover_hosts_in_cells(mock.sentinel.context) + mock_log_warning.assert_not_called() + mock_log_debug.assert_called_once_with(msg) + class SchedulerTestCase(test.NoDBTestCase): """Test case for base scheduler driver class.""" diff --git a/nova/tests/unit/test_nova_manage.py b/nova/tests/unit/test_nova_manage.py index b2128d3820d9..3651d91c7f69 100644 --- a/nova/tests/unit/test_nova_manage.py +++ b/nova/tests/unit/test_nova_manage.py @@ -1642,6 +1642,22 @@ class CellV2CommandsTestCase(test.NoDBTestCase): mock.ANY, True) + @mock.patch('nova.objects.host_mapping.discover_hosts') + def test_discover_hosts_mapping_exists(self, mock_discover_hosts): + mock_discover_hosts.side_effect = exception.HostMappingExists( + name='fake') + ret = self.commands.discover_hosts() + output = self.output.getvalue().strip() + self.assertEqual(2, ret) + expected = ("ERROR: Duplicate host mapping was encountered. This " + "command should be run once after all compute hosts " + "have been deployed and should not be run in parallel. " + "When run in parallel, the commands will collide with " + "each other trying to map the same hosts in the database " + "at the same time. Error: Host 'fake' mapping already " + "exists") + self.assertEqual(expected, output) + def test_validate_transport_url_in_conf(self): from_conf = 'fake://user:pass@host:5672/' self.flags(transport_url=from_conf)