From 14c38ac0f253036da79f9d07aedf7dfd5778fde8 Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Thu, 20 Jul 2017 19:01:23 +0200 Subject: [PATCH] libvirt: Post-migration, set cache value for Cinder volume(s) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was noticed in a downstream bug when a Nova instance with Cinder volume (in this case, both the Nova instance storage _and_ Cinder volume are located on Ceph) is migrated to a target Compute node, the disk cache value for the Cinder volume gets changed. I.e. the QEMU command-line for the Cinder volume stored on Ceph turns into the following: Pre-migration, QEMU command-line for the Nova instance: [...] -drive file=rbd:volumes/volume-[...],cache=writeback Post-migration, QEMU command-line for the Nova instance: [...] -drive file=rbd:volumes/volume-[...],cache=none Furthermore, Jason Dillaman from Ceph confirms RBD cache being enabled pre-migration: $ ceph --admin-daemon /var/run/qemu/ceph-client.openstack.[...] \ config get rbd_cache { "rbd_cache": "true" } And disabled, post-migration: $ ceph --admin-daemon /var/run/qemu/ceph-client.openstack.[...] \ config get rbd_cache { "rbd_cache": "false" } This change in cache value post-migration causes I/O latency on the Cinder volume. From a chat with Daniel Berrangé on IRC: Prior to live migration, Nova rewrites all the elements, and passes this updated guest XML across to target libvirt. And it is never calling _set_cache_mode() when doing this. So `nova.conf`'s `writeback` setting is getting lost, leaving the default `cache=none` setting. And this mistake (of leaving the default cache value to 'none') will of course be correct when you reboot the guest on the target later. So: - Call _set_cache_mode() in _get_volume_config() method -- because it is a callback function to _update_volume_xml() in nova/virt/libvirt/migration.py. - And remove duplicate calls to _set_cache_mode() in _get_guest_storage_config() and attach_volume(). - Fix broken unit tests; adjust test_get_volume_config() to reflect the disk cache mode. Thanks: Jason Dillaman of Ceph for observing the change in cache modes in a downstream bug analysis, Daniel Berrangé for help in analysis from a Nova libvirt driver POV, and Stefan Hajnoczi from QEMU for help on I/O latency instrumentation with `perf`. Closes-bug: 1706083 Change-Id: I4184382b49dd2193d6a21bfe02ea973d02d8b09f --- nova/tests/unit/virt/libvirt/test_driver.py | 16 +++++++--------- nova/virt/libvirt/driver.py | 8 +++----- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 543a3fc01006..c29b39693280 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -6355,7 +6355,9 @@ class LibvirtConnTestCase(test.NoDBTestCase, @mock.patch.object(volume_drivers.LibvirtFakeVolumeDriver, 'connect_volume') @mock.patch.object(volume_drivers.LibvirtFakeVolumeDriver, 'get_config') - def test_get_volume_config(self, get_config, connect_volume): + @mock.patch.object(libvirt_driver.LibvirtDriver, '_set_cache_mode') + def test_get_volume_config(self, _set_cache_mode, + get_config, connect_volume): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) connection_info = {'driver_volume_type': 'fake', 'data': {'device_path': '/fake', @@ -6370,6 +6372,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, get_config.return_value = mock_config config = drvr._get_volume_config(connection_info, disk_info) get_config.assert_called_once_with(connection_info, disk_info) + _set_cache_mode.assert_called_once_with(config) self.assertEqual(mock_config, config) def test_attach_invalid_volume_type(self): @@ -6460,12 +6463,10 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock.patch.object(drvr, '_connect_volume'), mock.patch.object(drvr, '_get_volume_config', return_value=mock_conf), - mock.patch.object(drvr, '_set_cache_mode'), mock.patch.object(drvr, '_check_discard_for_attach_volume'), mock.patch.object(drvr, '_build_device_metadata'), mock.patch.object(objects.Instance, 'save') - ) as (mock_connect_volume, mock_get_volume_config, - mock_set_cache_mode, mock_check_discard, + ) as (mock_connect_volume, mock_get_volume_config, mock_check_discard, mock_build_metadata, mock_save): for state in (power_state.RUNNING, power_state.PAUSED): mock_dom.info.return_value = [state, 512, 512, 2, 1234, 5678] @@ -6486,7 +6487,6 @@ class LibvirtConnTestCase(test.NoDBTestCase, connection_info, disk_info, instance) mock_get_volume_config.assert_called_with( connection_info, disk_info) - mock_set_cache_mode.assert_called_with(mock_conf) mock_dom.attachDeviceFlags.assert_called_with( mock_conf.to_xml(), flags=flags) mock_check_discard.assert_called_with(mock_conf, instance) @@ -14797,9 +14797,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'save'), mock.patch.object(drvr, '_connect_volume'), mock.patch.object(drvr, '_get_volume_config', - return_value=mock_conf), - mock.patch.object(drvr, '_set_cache_mode') - ) as (volume_save, connect_volume, get_volume_config, set_cache_mode): + return_value=mock_conf) + ) as (volume_save, connect_volume, get_volume_config): devices = drvr._get_guest_storage_config(instance, image_meta, disk_info, False, bdi, flavor, "hvm") @@ -14811,7 +14810,6 @@ class LibvirtConnTestCase(test.NoDBTestCase, get_volume_config.assert_called_with(bdm['connection_info'], {'bus': 'virtio', 'type': 'disk', 'dev': 'vdc'}) volume_save.assert_called_once_with() - self.assertEqual(3, set_cache_mode.call_count) def test_get_neutron_events(self): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index df86cb851ef7..57285a6fc39a 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1174,7 +1174,9 @@ class LibvirtDriver(driver.ComputeDriver): def _get_volume_config(self, connection_info, disk_info): vol_driver = self._get_volume_driver(connection_info) - return vol_driver.get_config(connection_info, disk_info) + conf = vol_driver.get_config(connection_info, disk_info) + self._set_cache_mode(conf) + return conf def _get_volume_encryptor(self, connection_info, encryption): root_helper = utils.get_root_helper() @@ -1233,7 +1235,6 @@ class LibvirtDriver(driver.ComputeDriver): disk_info['unit'] = self._get_scsi_controller_max_unit(guest) + 1 conf = self._get_volume_config(connection_info, disk_info) - self._set_cache_mode(conf) self._check_discard_for_attach_volume(conf, instance) @@ -3783,9 +3784,6 @@ class LibvirtDriver(driver.ComputeDriver): vol['connection_info'] = connection_info vol.save() - for d in devices: - self._set_cache_mode(d) - if scsi_controller: devices.append(scsi_controller)