diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 63691ee4cd23..5e5bc7e9567d 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -492,6 +492,50 @@ class ResourceTracker(object): return (nodename not in self.compute_nodes or not self.driver.node_is_available(nodename)) + def _check_for_nodes_rebalance(self, context, resources, nodename): + """Check if nodes rebalance has happened. + + The ironic driver maintains a hash ring mapping bare metal nodes + to compute nodes. If a compute dies, the hash ring is rebuilt, and + some of its bare metal nodes (more precisely, those not in ACTIVE + state) are assigned to other computes. + + This method checks for this condition and adjusts the database + accordingly. + + :param context: security context + :param resources: initial values + :param nodename: node name + :returns: True if a suitable compute node record was found, else False + """ + if not self.driver.rebalances_nodes: + return False + + # Its possible ironic just did a node re-balance, so let's + # check if there is a compute node that already has the correct + # hypervisor_hostname. We can re-use that rather than create a + # new one and have to move existing placement allocations + cn_candidates = objects.ComputeNodeList.get_by_hypervisor( + context, nodename) + + if len(cn_candidates) == 1: + cn = cn_candidates[0] + LOG.info("ComputeNode %(name)s moving from %(old)s to %(new)s", + {"name": nodename, "old": cn.host, "new": self.host}) + cn.host = self.host + self.compute_nodes[nodename] = cn + self._copy_resources(cn, resources) + self._setup_pci_tracker(context, cn, resources) + self._update(context, cn) + return True + elif len(cn_candidates) > 1: + LOG.error( + "Found more than one ComputeNode for nodename %s. " + "Please clean up the orphaned ComputeNode records in your DB.", + nodename) + + return False + def _init_compute_node(self, context, resources): """Initialize the compute node if it does not already exist. @@ -527,6 +571,9 @@ class ResourceTracker(object): self._update(context, cn) return + if self._check_for_nodes_rebalance(context, resources, nodename): + return + # there was no local copy and none in the database # so we need to create a new compute node. This needs # to be initialized with resource values. diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 2226919cf283..096bfccc99bc 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -435,6 +435,7 @@ def setup_rt(hostname, virt_resources=_VIRT_DRIVER_AVAIL_RESOURCES, vd.get_inventory.side_effect = NotImplementedError vd.get_host_ip_addr.return_value = _NODENAME vd.estimate_instance_overhead.side_effect = estimate_overhead + vd.rebalances_nodes = False with test.nested( mock.patch('nova.scheduler.client.SchedulerClient', @@ -1012,6 +1013,39 @@ class TestInitComputeNode(BaseTestCase): self.assertFalse(create_mock.called) self.assertTrue(update_mock.called) + @mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor') + @mock.patch('nova.objects.PciDeviceList.get_by_compute_node', + return_value=objects.PciDeviceList()) + @mock.patch('nova.objects.ComputeNode.create') + @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename') + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_update') + def test_compute_node_rebalanced(self, update_mock, get_mock, create_mock, + pci_mock, get_by_hypervisor_mock): + self._setup_rt() + self.driver_mock.rebalances_nodes = True + cn = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0]) + cn.host = "old-host" + + def fake_get_all(_ctx, nodename): + return [cn] + + get_mock.side_effect = exc.NotFound + get_by_hypervisor_mock.side_effect = fake_get_all + resources = copy.deepcopy(_VIRT_DRIVER_AVAIL_RESOURCES) + + self.rt._init_compute_node(mock.sentinel.ctx, resources) + + get_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME, + _NODENAME) + get_by_hypervisor_mock.assert_called_once_with(mock.sentinel.ctx, + _NODENAME) + create_mock.assert_not_called() + update_mock.assert_called_once_with(mock.sentinel.ctx, cn) + + self.assertEqual(_HOSTNAME, self.rt.compute_nodes[_NODENAME].host) + + @mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor') @mock.patch('nova.objects.PciDeviceList.get_by_compute_node', return_value=objects.PciDeviceList(objects=[])) @mock.patch('nova.objects.ComputeNode.create') @@ -1019,10 +1053,55 @@ class TestInitComputeNode(BaseTestCase): @mock.patch('nova.compute.resource_tracker.ResourceTracker.' '_update') def test_compute_node_created_on_empty(self, update_mock, get_mock, - create_mock, pci_tracker_mock): + create_mock, pci_tracker_mock, + get_by_hypervisor_mock): + get_by_hypervisor_mock.return_value = [] + self._test_compute_node_created(update_mock, get_mock, create_mock, + pci_tracker_mock, + get_by_hypervisor_mock) + + @mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor') + @mock.patch('nova.objects.PciDeviceList.get_by_compute_node', + return_value=objects.PciDeviceList(objects=[])) + @mock.patch('nova.objects.ComputeNode.create') + @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename') + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_update') + def test_compute_node_created_on_empty_rebalance(self, update_mock, + get_mock, + create_mock, + pci_tracker_mock, + get_by_hypervisor_mock): + get_by_hypervisor_mock.return_value = [] + self._test_compute_node_created(update_mock, get_mock, create_mock, + pci_tracker_mock, + get_by_hypervisor_mock, + rebalances_nodes=True) + + @mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor') + @mock.patch('nova.objects.PciDeviceList.get_by_compute_node', + return_value=objects.PciDeviceList(objects=[])) + @mock.patch('nova.objects.ComputeNode.create') + @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename') + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_update') + def test_compute_node_created_too_many(self, update_mock, get_mock, + create_mock, pci_tracker_mock, + get_by_hypervisor_mock): + get_by_hypervisor_mock.return_value = ["fake_node_1", "fake_node_2"] + self._test_compute_node_created(update_mock, get_mock, create_mock, + pci_tracker_mock, + get_by_hypervisor_mock, + rebalances_nodes=True) + + def _test_compute_node_created(self, update_mock, get_mock, + create_mock, pci_tracker_mock, + get_by_hypervisor_mock, + rebalances_nodes=False): self.flags(cpu_allocation_ratio=1.0, ram_allocation_ratio=1.0, disk_allocation_ratio=1.0) self._setup_rt() + self.driver_mock.rebalances_nodes = rebalances_nodes get_mock.side_effect = exc.NotFound @@ -1088,6 +1167,11 @@ class TestInitComputeNode(BaseTestCase): cn = self.rt.compute_nodes[_NODENAME] get_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME, _NODENAME) + if rebalances_nodes: + get_by_hypervisor_mock.assert_called_once_with( + mock.sentinel.ctx, _NODENAME) + else: + get_by_hypervisor_mock.assert_not_called() create_mock.assert_called_once_with() self.assertTrue(obj_base.obj_equal_prims(expected_compute, cn)) pci_tracker_mock.assert_called_once_with(mock.sentinel.ctx, diff --git a/nova/virt/driver.py b/nova/virt/driver.py index d4c4d0488189..c7f96eabde11 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -134,6 +134,11 @@ class ComputeDriver(object): requires_allocation_refresh = False + # Indicates if this driver will rebalance nodes among compute service + # hosts. This is really here for ironic and should not be used by any + # other driver. + rebalances_nodes = False + def __init__(self, virtapi): self.virtapi = virtapi self._compute_event_callback = None diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 58f5f7d0a527..601eec07aa18 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -161,9 +161,12 @@ class FakeDriver(driver.ComputeDriver): self._mounts = {} self._interfaces = {} self.active_migrations = {} + self._nodes = self._init_nodes() + + def _init_nodes(self): if not _FAKE_NODES: set_nodes([CONF.host]) - self._nodes = copy.copy(_FAKE_NODES) + return copy.copy(_FAKE_NODES) def init_host(self, host): return diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 425a84461d93..8d20740682a2 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -141,6 +141,9 @@ class IronicDriver(virt_driver.ComputeDriver): # migration has been completed. requires_allocation_refresh = True + # This driver is capable of rebalancing nodes between computes. + rebalances_nodes = True + def __init__(self, virtapi, read_only=False): super(IronicDriver, self).__init__(virtapi) global ironic