From 05f4bf0e6738093d79c6a8ffb9ca3ccb189c6658 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Fri, 27 Sep 2019 16:51:02 +0100 Subject: [PATCH] libvirt: Ignore DiskNotFound during update_available_resource There was a previous attempt to fix this in change Id687e11e235fd6c2f99bb647184310dfdce9a08d. However, there were 2 problems with the previous fix: 1. The handling of missing volumes and disks, while typically having the same cause, was inconsistent. 2. It failed to consider the very wide race opportunity in _get_disk_over_committed_size_total between initially fetching the instance list from the DB and later getting disk sizes. Because _get_disk_over_committed_size_total() can be a very long operation, we found that we were reliably hitting this race in CI. It might be possible to fix the race, but this would add unnecessary complication to code which isn't critical. It's far more robust just to log it and ignore it, which is also consistent with the handling of missing volumes. Closes-Bug: #1774249 Change-Id: I48719c02713113a41176b8f5cc3c5831f1284a39 (cherry picked from commit 6198f317be549e6d2bd324a48f226b379556e945) (cherry picked from commit 73d9b6e5f622dc645ac6ad322c836ffbe4045072) (cherry picked from commit 4700b3658e5983a731d0da259365317e230c4a52) (cherry picked from commit 1962633328dc7227dd040c1cf3a9cbe97b36ea37) --- nova/tests/unit/virt/libvirt/test_driver.py | 24 ------------- nova/virt/libvirt/driver.py | 37 ++++++--------------- 2 files changed, 10 insertions(+), 51 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index f983cc8a0d7b..dcdf35b62e53 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -14332,30 +14332,6 @@ class LibvirtConnTestCase(test.NoDBTestCase, drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.assertEqual(0, drvr._get_disk_over_committed_size_total()) - @mock.patch('nova.virt.libvirt.host.Host.list_instance_domains') - @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' - '_get_instance_disk_info_from_config', - side_effect=exception.DiskNotFound(location='/opt/stack/foo')) - @mock.patch('nova.objects.BlockDeviceMappingList.bdms_by_instance_uuid', - return_value=objects.BlockDeviceMappingList()) - @mock.patch('nova.objects.InstanceList.get_by_filters', - return_value=objects.InstanceList(objects=[ - objects.Instance(uuid=uuids.instance, - vm_state=vm_states.ACTIVE, - task_state=None)])) - def test_disk_over_committed_size_total_disk_not_found_reraise( - self, mock_get, mock_bdms, mock_get_disk_info, mock_list_domains): - """Tests that we handle DiskNotFound gracefully for an instance that - is NOT undergoing a task_state transition and the error is re-raised. - """ - mock_dom = mock.Mock() - mock_dom.XMLDesc.return_value = "" - mock_dom.UUIDString.return_value = uuids.instance - mock_list_domains.return_value = [mock_dom] - drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - self.assertRaises(exception.DiskNotFound, - drvr._get_disk_over_committed_size_total) - @mock.patch('nova.virt.libvirt.storage.lvm.get_volume_size') @mock.patch('nova.virt.disk.api.get_disk_size', new_callable=mock.NonCallableMock) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 1f23267330c0..65aacf190e56 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -8057,35 +8057,18 @@ class LibvirtDriver(driver.ComputeDriver): {'i_name': guest.name}) else: raise - except exception.VolumeBDMPathNotFound as e: + except (exception.VolumeBDMPathNotFound, + exception.DiskNotFound) as e: + if isinstance(e, exception.VolumeBDMPathNotFound): + thing = 'backing volume block device' + elif isinstance(e, exception.DiskNotFound): + thing = 'backing disk storage' + LOG.warning('Periodic task is updating the host stats, ' 'it is trying to get disk info for %(i_name)s, ' - 'but the backing volume block device was removed ' - 'by concurrent operations such as resize. ' - 'Error: %(error)s', - {'i_name': guest.name, 'error': e}) - except exception.DiskNotFound: - with excutils.save_and_reraise_exception() as err_ctxt: - # If the instance is undergoing a task state transition, - # like moving to another host or is being deleted, we - # should ignore this instance and move on. - if guest.uuid in local_instances: - inst = local_instances[guest.uuid] - # bug 1774249 indicated when instance is in RESIZED - # state it might also can't find back disk - if (inst.task_state is not None or - inst.vm_state == vm_states.RESIZED): - LOG.info('Periodic task is updating the host ' - 'stats; it is trying to get disk info ' - 'for %(i_name)s, but the backing disk ' - 'was removed by a concurrent operation ' - '(task_state=%(task_state)s) and ' - '(vm_state=%(vm_state)s)', - {'i_name': guest.name, - 'task_state': inst.task_state, - 'vm_state': inst.vm_state}, - instance=inst) - err_ctxt.reraise = False + 'but the %(thing)s was removed by a concurrent ' + 'operation such as resize. Error: %(error)s', + {'i_name': guest.name, 'thing': thing, 'error': e}) # NOTE(gtt116): give other tasks a chance. greenthread.sleep(0)