libvirt: Ignore DiskNotFound during update_available_resource
There was a previous attempt to fix this in change Id687e11e235fd6c2f99bb647184310dfdce9a08d. However, there were 2 problems with the previous fix: 1. The handling of missing volumes and disks, while typically having the same cause, was inconsistent. 2. It failed to consider the very wide race opportunity in _get_disk_over_committed_size_total between initially fetching the instance list from the DB and later getting disk sizes. Because _get_disk_over_committed_size_total() can be a very long operation, we found that we were reliably hitting this race in CI. It might be possible to fix the race, but this would add unnecessary complication to code which isn't critical. It's far more robust just to log it and ignore it, which is also consistent with the handling of missing volumes. Closes-Bug: #1774249 Change-Id: I48719c02713113a41176b8f5cc3c5831f1284a39
This commit is contained in:
parent
36b582a55a
commit
6198f317be
|
@ -16289,30 +16289,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
|||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual(0, drvr._get_disk_over_committed_size_total())
|
||||
|
||||
@mock.patch('nova.virt.libvirt.host.Host.list_instance_domains')
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
|
||||
'_get_instance_disk_info_from_config',
|
||||
side_effect=exception.DiskNotFound(location='/opt/stack/foo'))
|
||||
@mock.patch('nova.objects.BlockDeviceMappingList.bdms_by_instance_uuid',
|
||||
return_value=objects.BlockDeviceMappingList())
|
||||
@mock.patch('nova.objects.InstanceList.get_by_filters',
|
||||
return_value=objects.InstanceList(objects=[
|
||||
objects.Instance(uuid=uuids.instance,
|
||||
vm_state=vm_states.ACTIVE,
|
||||
task_state=None)]))
|
||||
def test_disk_over_committed_size_total_disk_not_found_reraise(
|
||||
self, mock_get, mock_bdms, mock_get_disk_info, mock_list_domains):
|
||||
"""Tests that we handle DiskNotFound gracefully for an instance that
|
||||
is NOT undergoing a task_state transition and the error is re-raised.
|
||||
"""
|
||||
mock_dom = mock.Mock()
|
||||
mock_dom.XMLDesc.return_value = "<domain/>"
|
||||
mock_dom.UUIDString.return_value = uuids.instance
|
||||
mock_list_domains.return_value = [mock_dom]
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertRaises(exception.DiskNotFound,
|
||||
drvr._get_disk_over_committed_size_total)
|
||||
|
||||
@mock.patch('nova.virt.libvirt.storage.lvm.get_volume_size')
|
||||
@mock.patch('nova.virt.disk.api.get_disk_size',
|
||||
new_callable=mock.NonCallableMock)
|
||||
|
|
|
@ -9576,35 +9576,18 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
{'i_name': guest.name})
|
||||
else:
|
||||
raise
|
||||
except exception.VolumeBDMPathNotFound as e:
|
||||
except (exception.VolumeBDMPathNotFound,
|
||||
exception.DiskNotFound) as e:
|
||||
if isinstance(e, exception.VolumeBDMPathNotFound):
|
||||
thing = 'backing volume block device'
|
||||
elif isinstance(e, exception.DiskNotFound):
|
||||
thing = 'backing disk storage'
|
||||
|
||||
LOG.warning('Periodic task is updating the host stats, '
|
||||
'it is trying to get disk info for %(i_name)s, '
|
||||
'but the backing volume block device was removed '
|
||||
'by concurrent operations such as resize. '
|
||||
'Error: %(error)s',
|
||||
{'i_name': guest.name, 'error': e})
|
||||
except exception.DiskNotFound:
|
||||
with excutils.save_and_reraise_exception() as err_ctxt:
|
||||
# If the instance is undergoing a task state transition,
|
||||
# like moving to another host or is being deleted, we
|
||||
# should ignore this instance and move on.
|
||||
if guest.uuid in local_instances:
|
||||
inst = local_instances[guest.uuid]
|
||||
# bug 1774249 indicated when instance is in RESIZED
|
||||
# state it might also can't find back disk
|
||||
if (inst.task_state is not None or
|
||||
inst.vm_state == vm_states.RESIZED):
|
||||
LOG.info('Periodic task is updating the host '
|
||||
'stats; it is trying to get disk info '
|
||||
'for %(i_name)s, but the backing disk '
|
||||
'was removed by a concurrent operation '
|
||||
'(task_state=%(task_state)s) and '
|
||||
'(vm_state=%(vm_state)s)',
|
||||
{'i_name': guest.name,
|
||||
'task_state': inst.task_state,
|
||||
'vm_state': inst.vm_state},
|
||||
instance=inst)
|
||||
err_ctxt.reraise = False
|
||||
'but the %(thing)s was removed by a concurrent '
|
||||
'operation such as resize. Error: %(error)s',
|
||||
{'i_name': guest.name, 'thing': thing, 'error': e})
|
||||
|
||||
# NOTE(gtt116): give other tasks a chance.
|
||||
greenthread.sleep(0)
|
||||
|
|
Loading…
Reference in New Issue