Merge "Warn for duplicate host mappings during discover_hosts"

This commit is contained in:
Zuul 2019-06-14 07:45:14 +00:00 committed by Gerrit Code Review
commit c4abea67f4
8 changed files with 156 additions and 22 deletions

View File

@ -259,17 +259,33 @@ Nova Cells v2
``nova-manage cell_v2 discover_hosts [--cell_uuid <cell_uuid>] [--verbose] [--strict] [--by-service]``
Searches cells, or a single cell, and maps found hosts. This command will
check the database for each cell (or a single one if passed in) and map any
hosts which are not currently mapped. If a host is already mapped nothing
will be done. You need to re-run this command each time you add more
hosts which are not currently mapped. If a host is already mapped, nothing
will be done. You need to re-run this command each time you add a batch of
compute hosts to a cell (otherwise the scheduler will never place instances
there and the API will not list the new hosts). If the strict option is
provided the command will only be considered successful if an unmapped host
is discovered (exit code 0). Any other case is considered a failure (exit
code 1). If --by-service is specified, this command will look in the
appropriate cell(s) for any nova-compute services and ensure there are host
mappings for them. This is less efficient and is only necessary when using
compute drivers that may manage zero or more actual compute nodes at any
given time (currently only ironic).
there and the API will not list the new hosts). If --strict is specified,
the command will only return 0 if an unmapped host was discovered and
mapped successfully. If --by-service is specified, this command will look
in the appropriate cell(s) for any nova-compute services and ensure there
are host mappings for them. This is less efficient and is only necessary
when using compute drivers that may manage zero or more actual compute
nodes at any given time (currently only ironic).
This command should be run once after all compute hosts have been deployed
and should not be run in parallel. When run in parallel, the commands will
collide with each other trying to map the same hosts in the database at the
same time.
The meaning of the various exit codes returned by this command are
explained below:
* Returns 0 if hosts were successfully mapped or no hosts needed to be
mapped. If --strict is specified, returns 0 only if an unmapped host was
discovered and mapped.
* Returns 1 if --strict is specified and no unmapped hosts were found.
Also returns 1 if an exception was raised while running.
* Returns 2 if the command aborted because of a duplicate host mapping
found. This means the command collided with another running
discover_hosts command or scheduler periodic task and is safe to retry.
``nova-manage cell_v2 list_cells [--verbose]``
By default the cell name, uuid, disabled state, masked transport URL and

View File

@ -1281,7 +1281,7 @@ class CellV2Commands(object):
@args('--strict', action='store_true',
help=_('Considered successful (exit code 0) only when an unmapped '
'host is discovered. Any other outcome will be considered a '
'failure (exit code 1).'))
'failure (non-zero exit code).'))
@args('--by-service', action='store_true', default=False,
dest='by_service',
help=_('Discover hosts by service instead of compute node'))
@ -1293,14 +1293,28 @@ class CellV2Commands(object):
to the db it's configured to use. This command will check the db for
each cell, or a single one if passed in, and map any hosts which are
not currently mapped. If a host is already mapped nothing will be done.
This command should be run once after all compute hosts have been
deployed and should not be run in parallel. When run in parallel,
the commands will collide with each other trying to map the same hosts
in the database at the same time.
"""
def status_fn(msg):
if verbose:
print(msg)
ctxt = context.RequestContext()
hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn,
by_service)
try:
hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn,
by_service)
except exception.HostMappingExists as exp:
print(_('ERROR: Duplicate host mapping was encountered. This '
'command should be run once after all compute hosts have '
'been deployed and should not be run in parallel. When '
'run in parallel, the commands will collide with each '
'other trying to map the same hosts in the database at '
'the same time. Error: %s') % exp)
return 2
# discover_hosts will return an empty list if no hosts are discovered
if strict:
return int(not hosts)

View File

@ -2089,6 +2089,10 @@ class HostMappingNotFound(Invalid):
msg_fmt = _("Host '%(name)s' is not mapped to any cell")
class HostMappingExists(Invalid):
msg_fmt = _("Host '%(name)s' mapping already exists")
class RealtimeConfigurationInvalid(Invalid):
msg_fmt = _("Cannot set realtime policy in a non dedicated "
"cpu pinning policy")

View File

@ -10,6 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
from oslo_db import exception as db_exc
from sqlalchemy.orm import joinedload
from nova import context
@ -175,6 +176,13 @@ class HostMappingList(base.ObjectListBase, base.NovaObject):
return base.obj_make_list(context, cls(), HostMapping, db_mappings)
def _create_host_mapping(host_mapping):
try:
host_mapping.create()
except db_exc.DBDuplicateEntry:
raise exception.HostMappingExists(name=host_mapping.host)
def _check_and_create_node_host_mappings(ctxt, cm, compute_nodes, status_fn):
host_mappings = []
for compute in compute_nodes:
@ -190,7 +198,7 @@ def _check_and_create_node_host_mappings(ctxt, cm, compute_nodes, status_fn):
host_mapping = HostMapping(
ctxt, host=compute.host,
cell_mapping=cm)
host_mapping.create()
_create_host_mapping(host_mapping)
host_mappings.append(host_mapping)
compute.mapped = 1
compute.save()
@ -208,7 +216,7 @@ def _check_and_create_service_host_mappings(ctxt, cm, services, status_fn):
host_mapping = HostMapping(
ctxt, host=service.host,
cell_mapping=cm)
host_mapping.create()
_create_host_mapping(host_mapping)
host_mappings.append(host_mapping)
return host_mappings

View File

@ -25,6 +25,7 @@ from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_serialization import jsonutils
from oslo_service import periodic_task
import six
from stevedore import driver
import nova.conf
@ -44,6 +45,8 @@ CONF = nova.conf.CONF
QUOTAS = quota.QUOTAS
HOST_MAPPING_EXISTS_WARNING = False
class SchedulerManager(manager.Manager):
"""Chooses a host to run instances on."""
@ -67,13 +70,24 @@ class SchedulerManager(manager.Manager):
spacing=CONF.scheduler.discover_hosts_in_cells_interval,
run_immediately=True)
def _discover_hosts_in_cells(self, context):
host_mappings = host_mapping_obj.discover_hosts(context)
if host_mappings:
LOG.info('Discovered %(count)i new hosts: %(hosts)s',
{'count': len(host_mappings),
'hosts': ','.join(['%s:%s' % (hm.cell_mapping.name,
hm.host)
for hm in host_mappings])})
global HOST_MAPPING_EXISTS_WARNING
try:
host_mappings = host_mapping_obj.discover_hosts(context)
if host_mappings:
LOG.info('Discovered %(count)i new hosts: %(hosts)s',
{'count': len(host_mappings),
'hosts': ','.join(['%s:%s' % (hm.cell_mapping.name,
hm.host)
for hm in host_mappings])})
except exception.HostMappingExists as exp:
msg = ('This periodic task should only be enabled on a single '
'scheduler to prevent collisions between multiple '
'schedulers: %s' % six.text_type(exp))
if not HOST_MAPPING_EXISTS_WARNING:
LOG.warning(msg)
HOST_MAPPING_EXISTS_WARNING = True
else:
LOG.debug(msg)
@periodic_task.periodic_task(spacing=CONF.scheduler.periodic_task_interval,
run_immediately=True)

View File

@ -11,7 +11,9 @@
# under the License.
import mock
from oslo_db import exception as db_exc
from oslo_utils.fixture import uuidsentinel as uuids
import six
from nova import context
from nova import exception
@ -310,3 +312,41 @@ Creating host mapping for service host1
Found 1 unmapped computes in cell: %(cell)s""" % {'cell': uuids.cell1}
self.assertEqual(expected, '\n'.join(lines))
@mock.patch('nova.objects.CellMappingList.get_all')
@mock.patch('nova.objects.HostMapping.create')
@mock.patch('nova.objects.HostMapping.get_by_host')
@mock.patch('nova.objects.ComputeNodeList.get_all_by_not_mapped')
def test_discover_hosts_duplicate(self, mock_cn_get, mock_hm_get,
mock_hm_create, mock_cm):
mock_cm.return_value = [objects.CellMapping(name='foo',
uuid=uuids.cm)]
mock_cn_get.return_value = [objects.ComputeNode(host='bar',
uuid=uuids.cn)]
mock_hm_get.side_effect = exception.HostMappingNotFound(name='bar')
mock_hm_create.side_effect = db_exc.DBDuplicateEntry()
ctxt = context.get_admin_context()
exp = self.assertRaises(exception.HostMappingExists,
host_mapping.discover_hosts, ctxt)
expected = "Host 'bar' mapping already exists"
self.assertIn(expected, six.text_type(exp))
@mock.patch('nova.objects.CellMappingList.get_all')
@mock.patch('nova.objects.HostMapping.get_by_host')
@mock.patch('nova.objects.HostMapping.create')
@mock.patch('nova.objects.ServiceList.get_by_binary')
def test_discover_services_duplicate(self, mock_srv, mock_hm_create,
mock_hm_get, mock_cm):
mock_cm.return_value = [objects.CellMapping(name='foo',
uuid=uuids.cm)]
mock_srv.return_value = [objects.Service(host='bar')]
mock_hm_get.side_effect = exception.HostMappingNotFound(name='bar')
mock_hm_create.side_effect = db_exc.DBDuplicateEntry()
ctxt = context.get_admin_context()
exp = self.assertRaises(exception.HostMappingExists,
host_mapping.discover_hosts, ctxt,
by_service=True)
expected = "Host 'bar' mapping already exists"
self.assertIn(expected, six.text_type(exp))

View File

@ -22,6 +22,7 @@ import oslo_messaging as messaging
from oslo_utils.fixture import uuidsentinel as uuids
from nova import context
from nova import exception
from nova import objects
from nova.scheduler import filter_scheduler
from nova.scheduler import host_manager
@ -335,6 +336,27 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
cell_mapping=cm2)]
self.manager._discover_hosts_in_cells(mock.sentinel.context)
@mock.patch('nova.scheduler.manager.LOG.debug')
@mock.patch('nova.scheduler.manager.LOG.warning')
@mock.patch('nova.objects.host_mapping.discover_hosts')
def test_discover_hosts_duplicate_host_mapping(self, mock_discover,
mock_log_warning,
mock_log_debug):
# This tests the scenario of multiple schedulers running discover_hosts
# at the same time.
mock_discover.side_effect = exception.HostMappingExists(name='a')
self.manager._discover_hosts_in_cells(mock.sentinel.context)
msg = ("This periodic task should only be enabled on a single "
"scheduler to prevent collisions between multiple "
"schedulers: Host 'a' mapping already exists")
mock_log_warning.assert_called_once_with(msg)
mock_log_debug.assert_not_called()
# Second collision should log at debug, not warning.
mock_log_warning.reset_mock()
self.manager._discover_hosts_in_cells(mock.sentinel.context)
mock_log_warning.assert_not_called()
mock_log_debug.assert_called_once_with(msg)
class SchedulerTestCase(test.NoDBTestCase):
"""Test case for base scheduler driver class."""

View File

@ -1642,6 +1642,22 @@ class CellV2CommandsTestCase(test.NoDBTestCase):
mock.ANY,
True)
@mock.patch('nova.objects.host_mapping.discover_hosts')
def test_discover_hosts_mapping_exists(self, mock_discover_hosts):
mock_discover_hosts.side_effect = exception.HostMappingExists(
name='fake')
ret = self.commands.discover_hosts()
output = self.output.getvalue().strip()
self.assertEqual(2, ret)
expected = ("ERROR: Duplicate host mapping was encountered. This "
"command should be run once after all compute hosts "
"have been deployed and should not be run in parallel. "
"When run in parallel, the commands will collide with "
"each other trying to map the same hosts in the database "
"at the same time. Error: Host 'fake' mapping already "
"exists")
self.assertEqual(expected, output)
def test_validate_transport_url_in_conf(self):
from_conf = 'fake://user:pass@host:5672/'
self.flags(transport_url=from_conf)