Add nova-manage ironic-compute-node-move

When people transition from three ironic nova-compute processes down
to one process, we need a way to move the ironic nodes, and any
associcated instances, between nova-compute processes.

For saftey, a nova-compute process must first be forced_down via
the API, similar to when using evacaute, before moving the associated
ironic nodes to another nova-compute process. The destination
nova-compute process should ideally not be running, but not forced
down.

blueprint ironic-shards

Change-Id: I33034ec77b033752797bd679c6e61cef5af0a18f
This commit is contained in:
John Garbutt 2023-06-26 17:32:38 +01:00 committed by Jay Faulkner
parent f1a4857d61
commit 08d7be1726
4 changed files with 141 additions and 1 deletions

View File

@ -465,6 +465,47 @@ command completed successfully with exit code 0.
* - 127 * - 127
- Invalid input was provided. - Invalid input was provided.
db ironic_compute_node_move
---------------------------
.. program:: nova-manage db ironic_compute_node_move
.. code-block:: shell
nova-manage db ironic_compute_node_move --ironic-node-uuid <uuid> --destination-host <host>
Move ironic nodes, along with any associated instances,
between nova-compute services.
This is useful when migrating away from using peer_list and multiple
hash ring balanced nova-compute servers to the new ironic shard system.
First you must turn off the nova-compute service that currently manages
the Ironic host. Second you mark that nova-compute service as forced down
via the Nova API. Third, you ensure the new nova-compute service is
correctly configured to target the appropriate shard (and optionally
also a conductor group). Finally, most Ironic nodes should now move to
the new service, but any Ironic nodes with instances on them
will need to be manually moved to their new Ironic service
by using this nova-manage command.
.. versionadded:: 28.0.0 (2023.2 Bobcat)
.. rubric:: Options
.. option:: --ironic-node-uuid <uuid>
Ironic node uuid to be moved (which is also the Nova compute node uuid
and the uuid of corresponding resource provider in Placement).
The Nova compute service that currently manages this Ironic node
must first be marked a "forced down" via the Nova API, in a similar
way to a down hypervisor that is about to have its VMs evacuated to
a replacement hypervisor.
.. option:: --destination-host <host>
Destination ironic nova-compute service CONF.host.
API Database Commands API Database Commands
===================== =====================

View File

@ -623,6 +623,47 @@ class DbCommands(object):
# "there are more migrations, but not completable right now" # "there are more migrations, but not completable right now"
return ran and 1 or 0 return ran and 1 or 0
@args('--ironic-node-uuid', metavar='<uuid>', dest='compute_node_uuid',
help='UUID of Ironic node to be moved between services')
@args('--destination-host', metavar='<host>',
dest='destination_service_host',
help='Destination ironic nova-compute service CONF.host')
def ironic_compute_node_move(self, compute_node_uuid,
destination_service_host):
ctxt = context.get_admin_context()
destination_service = objects.Service.get_by_compute_host(
ctxt, destination_service_host)
if destination_service.forced_down:
raise exception.NovaException(
"Destination compute is forced down!")
target_compute_node = objects.ComputeNode.get_by_uuid(
ctxt, compute_node_uuid)
source_service = objects.Service.get_by_id(
ctxt, target_compute_node.service_id)
if not source_service.forced_down:
raise exception.NovaException(
"Source service is not yet forced down!")
instances = objects.InstanceList.get_by_host_and_node(
ctxt, target_compute_node.host,
target_compute_node.hypervisor_hostname)
if len(instances) > 1:
raise exception.NovaException(
"Found an ironic host with more than one instance! "
"Please delete all Nova instances that do not match "
"the instance uuid recorded on the Ironic node.")
target_compute_node.service_id = destination_service.id
target_compute_node.host = destination_service.host
target_compute_node.save()
for instance in instances:
# this is a bit like evacuate, except no need to rebuild
instance.host = destination_service.host
instance.save()
class ApiDbCommands(object): class ApiDbCommands(object):
"""Class for managing the api database.""" """Class for managing the api database."""

View File

@ -163,7 +163,9 @@ class NovaManageDBIronicTest(test.TestCase):
user_id=self.context.user_id, user_id=self.context.user_id,
project_id=self.context.project_id, project_id=self.context.project_id,
flavor=flavor, flavor=flavor,
node=cn.hypervisor_hostname) node=cn.hypervisor_hostname,
host=cn.host,
compute_id=cn.id)
inst.create() inst.create()
self.insts.append(inst) self.insts.append(inst)
@ -173,6 +175,57 @@ class NovaManageDBIronicTest(test.TestCase):
if i.node == self.cn4.hypervisor_hostname] if i.node == self.cn4.hypervisor_hostname]
class TestIronicComputeNodeMove(NovaManageDBIronicTest):
"""Functional tests for "nova-manage db ironic_compute_node_move" CLI."""
api_major_version = 'v2.1'
def setUp(self):
super(TestIronicComputeNodeMove, self).setUp()
self.enforce_fk_constraints()
self.cli = manage.DbCommands()
self.output = StringIO()
self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
def test_ironic_compute_node_move_success(self):
self.service1.forced_down = True
self.service1.save()
self.assertEqual(self.service1.id, self.cn1.service_id)
# move cn1 on service1 to service2
node_uuid = self.cn1.uuid
dest_host = self.service2.host
self.commands.ironic_compute_node_move(node_uuid, dest_host)
# check the compute node got moved to service 2
updated_cn1 = objects.ComputeNode.get_by_id(self.context, self.cn1.id)
self.assertEqual(self.service2.id, updated_cn1.service_id)
self.assertEqual(self.service2.host, updated_cn1.host)
# check the instance got moved too
updated_instance = objects.Instance.get_by_id(
self.context, self.insts[0].id)
self.assertEqual(self.service2.host, updated_instance.host)
def test_ironic_compute_node_move_raise_not_forced_down(self):
node_uuid = self.cn1.uuid
dest_host = self.service2.host
self.assertRaises(exception.NovaException,
self.commands.ironic_compute_node_move,
node_uuid, dest_host)
def test_ironic_compute_node_move_raise_forced_down(self):
self.service1.forced_down = True
self.service1.save()
self.service2.forced_down = True
self.service2.save()
node_uuid = self.cn1.uuid
dest_host = self.service2.host
self.assertRaises(exception.NovaException,
self.commands.ironic_compute_node_move,
node_uuid, dest_host)
class NovaManageCellV2Test(test.TestCase): class NovaManageCellV2Test(test.TestCase):
def setUp(self): def setUp(self):
super(NovaManageCellV2Test, self).setUp() super(NovaManageCellV2Test, self).setUp()

View File

@ -10,3 +10,8 @@ features:
which ironic nodes are managed by each nova-compute service. which ironic nodes are managed by each nova-compute service.
Note that when you use ``[ironic]shard`` the ``[ironic]peer_list`` Note that when you use ``[ironic]shard`` the ``[ironic]peer_list``
is hard coded to a single nova-compute service. is hard coded to a single nova-compute service.
There is a new nova-manage command ``db ironic_compute_node_move`` that
can be used to move ironic nodes, and the associated instances, between
nova-compute services. This is useful when migrating from the legacy
hash ring based HA towards the new sharding approach.