Fix TypeError in _update_from_compute_node race
A compute node record is created in the database from the
_init_compute_node method in the resource tracker in the
nova-compute service. It is created without the updated_at
or free_disk_gb fields being set.
Shortly after it's created, update_resource_stats() is called
on the scheduler report client which calls compute_node.save()
which calls the update_compute_node() method in the DB API which
*always* updates the updated_at field even if nothing else
was changed.
So at this point, we have a compute node with updated_at set
but not free_disk_gb. The free_disk_gb field gets set by the
_update_usage_from_instances() method in the resource tracker
and then that value is later saved off when the compute node
is updated in the database on a second call to the
update_resource_stats() method in the scheduler report client.
At that point the free_disk_gb field is set on the compute
node record.
There is a race in between the compute node create and initial
update but before free_disk_gb is set where the scheduler host
manager can be attempting to update the HostState object from
the same compute node when selecting host destinations during
a server build. If that picks up the compute node before its
free_disk_gb field is set it will result in a TypeError when
trying to multiply None * 1024.
Change 36a0ba9c81
was an earlier
attempt at fixing this bug and shortened the race window but
didn't handle the problem where updated_at is set but free_disk_gb
is not yet.
This change builds on that by simply checking for the thing
the scheduler host manager actually cares about, which is the
free_disk_gb field.
Closes-Bug: #1654102
Closes-Bug: #1610679
Change-Id: I37b75dabb3ea7ec2d5678550d9aff30b1a0c29e6
This commit is contained in:
parent
1bcf3b553a
commit
2f1245a56c
|
@ -183,7 +183,9 @@ class HostState(object):
|
|||
"""Update information about a host from a ComputeNode object."""
|
||||
# NOTE(jichenjc): if the compute record is just created but not updated
|
||||
# some field such as free_disk_gb can be None
|
||||
if compute.updated_at is None:
|
||||
if 'free_disk_gb' not in compute or compute.free_disk_gb is None:
|
||||
LOG.debug('Ignoring compute node %s as its usage has not been '
|
||||
'updated yet.', compute.uuid)
|
||||
return
|
||||
|
||||
if (self.updated and compute.updated_at
|
||||
|
|
|
@ -21,6 +21,8 @@ This host manager will consume all cpu's, disk space, and
|
|||
ram from a host / node as it is supporting Baremetal hosts, which can not be
|
||||
subdivided into multiple instances.
|
||||
"""
|
||||
from oslo_log import log as logging
|
||||
|
||||
import nova.conf
|
||||
from nova import context as context_module
|
||||
from nova import objects
|
||||
|
@ -29,6 +31,8 @@ from nova.scheduler import host_manager
|
|||
|
||||
CONF = nova.conf.CONF
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IronicNodeState(host_manager.HostState):
|
||||
"""Mutable and immutable information tracked for a host.
|
||||
|
@ -38,6 +42,14 @@ class IronicNodeState(host_manager.HostState):
|
|||
|
||||
def _update_from_compute_node(self, compute):
|
||||
"""Update information about a host from a ComputeNode object."""
|
||||
|
||||
# NOTE(jichenjc): if the compute record is just created but not updated
|
||||
# some field such as free_disk_gb can be None
|
||||
if 'free_disk_gb' not in compute or compute.free_disk_gb is None:
|
||||
LOG.debug('Ignoring compute node %s as its usage has not been '
|
||||
'updated yet.', compute.uuid)
|
||||
return
|
||||
|
||||
self.vcpus_total = compute.vcpus
|
||||
self.vcpus_used = compute.vcpus_used
|
||||
|
||||
|
|
|
@ -1218,7 +1218,7 @@ class HostStateTestCase(test.NoDBTestCase):
|
|||
|
||||
def test_stat_consumption_from_compute_node_not_ready(self):
|
||||
compute = objects.ComputeNode(free_ram_mb=100,
|
||||
updated_at=None)
|
||||
uuid=uuids.compute_node_uuid)
|
||||
|
||||
host = host_manager.HostState("fakehost", "fakenode")
|
||||
host._update_from_compute_node(compute)
|
||||
|
|
|
@ -182,7 +182,8 @@ class IronicHostManagerChangedNodesTestCase(test.NoDBTestCase):
|
|||
hypervisor_version=1,
|
||||
hypervisor_hostname='fake_host',
|
||||
cpu_allocation_ratio=16.0, ram_allocation_ratio=1.5,
|
||||
disk_allocation_ratio=1.0)
|
||||
disk_allocation_ratio=1.0,
|
||||
uuid=uuids.compute_node_uuid)
|
||||
|
||||
@mock.patch.object(ironic_host_manager.IronicNodeState, '__init__')
|
||||
def test_create_ironic_node_state(self, init_mock):
|
||||
|
@ -265,6 +266,15 @@ class IronicHostManagerChangedNodesTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(1, host.hypervisor_version)
|
||||
self.assertEqual('fake_host', host.hypervisor_hostname)
|
||||
|
||||
def test_update_from_compute_node_not_ready(self):
|
||||
"""Tests that we ignore a compute node that does not have its
|
||||
free_disk_gb field set yet from the compute resource tracker.
|
||||
"""
|
||||
host = ironic_host_manager.IronicNodeState("fakehost", "fakenode")
|
||||
self.compute_node.free_disk_gb = None
|
||||
host.update(compute=self.compute_node)
|
||||
self.assertEqual(0, host.free_disk_mb)
|
||||
|
||||
def test_consume_identical_instance_from_compute(self):
|
||||
host = ironic_host_manager.IronicNodeState("fakehost", "fakenode")
|
||||
host.update(compute=self.compute_node)
|
||||
|
|
Loading…
Reference in New Issue