Populate ComputeNode.service_id

The ComputeNode object already has a service_id field that we stopped
using a while ago. This moves us back to the point where we set it when
creating new ComputeNode records, and also migrates existing records
when they are loaded.

The resource tracker is created before we may have created the
service record, but is updated afterwards in the pre_start_hook().
So this adds a way for us to pass the service_ref to the resource
tracker during that hook so that it is present before the first time
we update all of our ComputeNode records. It also makes sure to pass
the Service through from the actual Service manager instead of looking
it up again to make sure we maintain the tight relationship and avoid
any name-based ambiguity.

Related to blueprint compute-object-ids

Change-Id: I5e060d674b6145c9797c2251a2822106fc6d4a71
This commit is contained in:
Dan Smith 2023-04-07 07:44:34 -07:00
parent 971809b4d4
commit afad847e4d
8 changed files with 100 additions and 9 deletions

View File

@ -675,6 +675,7 @@ class ComputeManager(manager.Manager):
# NOTE(russellb) Load the driver last. It may call back into the
# compute manager via the virtapi, so we want it to be fully
# initialized before that happens.
self.service_ref = None
self.driver = driver.load_compute_driver(self.virtapi, compute_driver)
self.rt = resource_tracker.ResourceTracker(
self.host, self.driver, reportclient=self.reportclient)
@ -1749,11 +1750,13 @@ class ComputeManager(manager.Manager):
instance_uuid=migration.instance_uuid)
self._waiting_live_migrations.clear()
def pre_start_hook(self):
def pre_start_hook(self, service_ref):
"""After the service is initialized, but before we fully bring
the service up by listening on RPC queues, make sure to update
our available resources (and indirectly our available nodes).
"""
self.service_ref = service_ref
self.rt.set_service_ref(service_ref)
self.update_available_resource(nova.context.get_admin_context(),
startup=True)

View File

@ -91,6 +91,7 @@ class ResourceTracker(object):
"""
def __init__(self, host, driver, reportclient=None):
self.service_ref = None
self.host = host
self.driver = driver
self.pci_tracker = None
@ -125,6 +126,18 @@ class ResourceTracker(object):
# smarter logging.
self.absent_providers = set()
def set_service_ref(self, service_ref):
# NOTE(danms): Neither of these should ever happen, but sanity check
# just in case
if self.service_ref and self.service_ref.id != service_ref.id:
raise exception.ComputeServiceInUse(
'Resource tracker re-initialized with a different service')
elif service_ref.host != self.host:
raise exception.ServiceNotUnique(
'Resource tracker initialized with service that does '
'not match host')
self.service_ref = service_ref
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def instance_claim(self, context, instance, nodename, allocations,
limits=None):
@ -704,6 +717,14 @@ class ResourceTracker(object):
# to initialize
if nodename in self.compute_nodes:
cn = self.compute_nodes[nodename]
if 'service_id' not in cn or cn.service_id is None:
LOG.debug('Setting ComputeNode %s service_id to %i',
cn.uuid, self.service_ref.id)
cn.service_id = self.service_ref.id
elif cn.service_id != self.service_ref.id:
LOG.warning('Moving ComputeNode %s from service %i to %i',
cn.uuid, cn.service_id, self.service_ref.id)
cn.service_id = self.service_ref.id
self._copy_resources(cn, resources)
self._setup_pci_tracker(context, cn, resources)
return False
@ -727,6 +748,7 @@ class ResourceTracker(object):
LOG.info("ComputeNode %(name)s moving from %(old)s to %(new)s",
{"name": nodename, "old": cn.host, "new": self.host})
cn.host = self.host
cn.service_id = self.service_ref.id
self._update(context, cn)
self.compute_nodes[nodename] = cn
@ -739,6 +761,7 @@ class ResourceTracker(object):
# to be initialized with resource values.
cn = objects.ComputeNode(context)
cn.host = self.host
cn.service_id = self.service_ref.id
self._copy_resources(cn, resources, initial=True)
try:
cn.create()

View File

@ -122,13 +122,15 @@ class Manager(PeriodicTasks, metaclass=ManagerMeta):
"""
pass
def pre_start_hook(self):
def pre_start_hook(self, service_ref):
"""Hook to provide the manager the ability to do additional
start-up work before any RPC queues/consumers are created. This is
called after other initialization has succeeded and a service
record is created.
Child classes should override this method.
:param service_ref: The nova.objects.Service for this
"""
pass

View File

@ -174,7 +174,7 @@ class Service(service.Service):
self.service_ref = objects.Service.get_by_host_and_binary(
ctxt, self.host, self.binary)
self.manager.pre_start_hook()
self.manager.pre_start_hook(self.service_ref)
if self.backdoor_port is not None:
self.manager.backdoor_port = self.backdoor_port
@ -443,9 +443,10 @@ class WSGIService(service.Service):
service_ref = objects.Service.get_by_host_and_binary(
ctxt, self.host, self.binary)
self.service_ref = service_ref
if self.manager:
self.manager.init_host()
self.manager.pre_start_hook()
self.manager.pre_start_hook(self.service_ref)
if self.backdoor_port is not None:
self.manager.backdoor_port = self.backdoor_port
self.server.start()

View File

@ -209,8 +209,10 @@ class BaseTestCase(test.TestCase):
self.stub_out('nova.db.main.api.compute_node_delete',
fake_compute_node_delete)
self.compute.update_available_resource(
context.get_admin_context())
self.service = objects.Service(id=7,
host=self.compute.host)
# NOTE(danms): This calls update_available_resource() for us
self.compute.pre_start_hook(self.service)
self.user_id = 'fake'
self.project_id = 'fake'

View File

@ -487,11 +487,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
"""Very simple test just to make sure update_available_resource is
called as expected.
"""
fake_service = objects.Service(id=123, host=self.compute.host)
with mock.patch.object(
self.compute, 'update_available_resource') as update_res:
self.compute.pre_start_hook()
self.compute.pre_start_hook(fake_service)
update_res.assert_called_once_with(
get_admin_context.return_value, startup=True)
self.assertEqual(fake_service, self.compute.rt.service_ref)
@mock.patch.object(objects.ComputeNodeList, 'get_all_by_host',
side_effect=exception.NotFound)

View File

@ -50,6 +50,7 @@ from nova.virt import driver
_HOSTNAME = 'fake-host'
_NODENAME = 'fake-node'
_SERVICE_ID = 123
CONF = cfg.CONF
_VIRT_DRIVER_AVAIL_RESOURCES = {
@ -82,6 +83,7 @@ _COMPUTE_NODE_FIXTURES = [
hypervisor_type='fake',
hypervisor_version=0,
hypervisor_hostname=_NODENAME,
service_id=_SERVICE_ID,
free_ram_mb=(_VIRT_DRIVER_AVAIL_RESOURCES['memory_mb'] -
_VIRT_DRIVER_AVAIL_RESOURCES['memory_mb_used']),
free_disk_gb=(_VIRT_DRIVER_AVAIL_RESOURCES['local_gb'] -
@ -505,6 +507,10 @@ def setup_rt(hostname, virt_resources=_VIRT_DRIVER_AVAIL_RESOURCES):
return_value=report_client_mock),
mock.patch('nova.rpc.get_notifier', return_value=notifier_mock)):
rt = resource_tracker.ResourceTracker(hostname, vd)
fake_service = objects.Service(id=_SERVICE_ID, host=hostname)
rt.set_service_ref(fake_service)
return (rt, query_client_mock, report_client_mock, vd)
@ -1424,6 +1430,7 @@ class TestInitComputeNode(BaseTestCase):
hypervisor_hostname=resources['hypervisor_hostname'],
# NOTE(sbauza): ResourceTracker adds host field
host=_HOSTNAME,
service_id=_SERVICE_ID,
# NOTE(sbauza): ResourceTracker adds CONF allocation ratios
ram_allocation_ratio=CONF.initial_ram_allocation_ratio,
cpu_allocation_ratio=CONF.initial_cpu_allocation_ratio,
@ -1441,6 +1448,8 @@ class TestInitComputeNode(BaseTestCase):
uuids.compute_node_uuid)
create_mock.assert_called_once_with()
self.assertEqual(obj_base.obj_to_primitive(expected_compute),
obj_base.obj_to_primitive(cn))
self.assertTrue(obj_base.obj_equal_prims(expected_compute, cn))
setup_pci.assert_called_once_with(mock.sentinel.ctx, cn, resources)
self.assertFalse(update_mock.called)
@ -1568,6 +1577,23 @@ class TestInitComputeNode(BaseTestCase):
mock.MagicMock,
resources)
def test_init_compute_node_updates_service_id(self):
unset = object()
# Make sure that we can go from unset, none, or some other service_id
# to the one in our RT's service_ref
for iv in [unset, None, 121]:
compute = objects.ComputeNode(uuid=uuids.fakenode, id=7)
if iv is not unset:
compute.service_id = iv
self._setup_rt()
resources = {'hypervisor_hostname': 'fake-node',
'uuid': uuids.fakenode}
self.rt.compute_nodes = {'fake-node': compute}
with mock.patch.object(self.rt, '_setup_pci_tracker'):
self.rt._init_compute_node(mock.MagicMock(), resources)
self.assertIn('service_id', compute)
self.assertEqual(self.rt.service_ref.id, compute.service_id)
@ddt.ddt
class TestUpdateComputeNode(BaseTestCase):
@ -4284,6 +4310,37 @@ class ResourceTrackerTestCase(test.NoDBTestCase):
self.assertRaises(AssertionError, _test_explict_unfair)
self.assertRaises(AssertionError, _test_implicit_unfair)
def test_set_service_ref(self):
rt = resource_tracker.ResourceTracker(
_HOSTNAME, mock.sentinel.driver, mock.sentinel.reportclient)
# The service ref should default to None and we should be able to set
# the service ref to anything from that state, as long as the host
# matches what the rt was initialized with
self.assertIsNone(rt.service_ref)
service = objects.Service(id=123, host=_HOSTNAME)
rt.set_service_ref(service)
self.assertEqual(service, rt.service_ref)
# We should be able to set the service again with the same attributes
# without raising
service = service.obj_clone()
rt.set_service_ref(service)
# A service with a different id is an error
service = objects.Service(id=456, host=_HOSTNAME)
self.assertRaises(exc.ComputeServiceInUse,
rt.set_service_ref, service)
# A service with a different host is an error
service = objects.Service(id=123, host='foo')
self.assertRaises(exc.ServiceNotUnique,
rt.set_service_ref, service)
# Make sure we haven't disturbed the stored ref with the above
self.assertEqual(123, rt.service_ref.id)
self.assertEqual(_HOSTNAME, rt.service_ref.host)
class ProviderConfigTestCases(BaseTestCase):
def setUp(self):

View File

@ -134,7 +134,8 @@ class ServiceTestCase(test.NoDBTestCase):
mock_create.assert_called_once_with()
# pre_start_hook is called after service record is created,
# but before RPC consumer is created
serv.manager.pre_start_hook.assert_called_once_with()
serv.manager.pre_start_hook.assert_called_once_with(
serv.service_ref)
# post_start_hook is called after RPC consumer is created.
serv.manager.post_start_hook.assert_called_once_with()
@ -221,7 +222,7 @@ class ServiceTestCase(test.NoDBTestCase):
self.host,
self.binary)
mock_create.assert_called_once_with()
serv.manager.pre_start_hook.assert_called_once_with()
serv.manager.pre_start_hook.assert_called_once_with(serv.service_ref)
serv.manager.post_start_hook.assert_called_once_with()
serv.stop()
mock_stop.assert_called_once_with()