libvirt: Post-migration, set cache value for Cinder volume(s)

This was noticed in a downstream bug when a Nova instance with Cinder
volume (in this case, both the Nova instance storage _and_ Cinder volume
are located on Ceph) is migrated to a target Compute node, the disk
cache value for the Cinder volume gets changed.  I.e. the QEMU
command-line for the Cinder volume stored on Ceph turns into the
following:

Pre-migration, QEMU command-line for the Nova instance:

    [...] -drive file=rbd:volumes/volume-[...],cache=writeback

Post-migration, QEMU command-line for the Nova instance:

    [...] -drive file=rbd:volumes/volume-[...],cache=none

Furthermore, Jason Dillaman from Ceph confirms RBD cache being enabled
pre-migration:

    $ ceph --admin-daemon /var/run/qemu/ceph-client.openstack.[...] \
        config get rbd_cache
    {
        "rbd_cache": "true"
    }

And disabled, post-migration:

    $ ceph --admin-daemon /var/run/qemu/ceph-client.openstack.[...] \
        config get rbd_cache
    {
        "rbd_cache": "false"
    }

This change in cache value post-migration causes I/O latency on the
Cinder volume.

From a chat with Daniel Berrangé on IRC: Prior to live migration, Nova
rewrites all the <disk> elements, and passes this updated guest XML
across to target libvirt.  And it is never calling _set_cache_mode()
when doing this.  So `nova.conf`'s `writeback` setting is getting lost,
leaving the default `cache=none` setting.  And this mistake (of leaving
the default cache value to 'none') will of course be correct when you
reboot the guest on the target later.

So:

  - Call _set_cache_mode() in _get_volume_config() method -- because it
    is a callback function to _update_volume_xml() in
    nova/virt/libvirt/migration.py.

  - And remove duplicate calls to _set_cache_mode() in
    _get_guest_storage_config() and attach_volume().

  - Fix broken unit tests; adjust test_get_volume_config() to reflect
    the disk cache mode.

Thanks: Jason Dillaman of Ceph for observing the change in cache modes
        in a downstream bug analysis, Daniel Berrangé for help in
        analysis from a Nova libvirt driver POV, and Stefan Hajnoczi
        from QEMU for help on I/O latency instrumentation with `perf`.

Closes-bug: 1706083
Change-Id: I4184382b49dd2193d6a21bfe02ea973d02d8b09f
This commit is contained in:
Kashyap Chamarthy 2017-07-20 19:01:23 +02:00
parent 79f6920eac
commit 14c38ac0f2
2 changed files with 10 additions and 14 deletions

View File

@ -6355,7 +6355,9 @@ class LibvirtConnTestCase(test.NoDBTestCase,
@mock.patch.object(volume_drivers.LibvirtFakeVolumeDriver,
'connect_volume')
@mock.patch.object(volume_drivers.LibvirtFakeVolumeDriver, 'get_config')
def test_get_volume_config(self, get_config, connect_volume):
@mock.patch.object(libvirt_driver.LibvirtDriver, '_set_cache_mode')
def test_get_volume_config(self, _set_cache_mode,
get_config, connect_volume):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
connection_info = {'driver_volume_type': 'fake',
'data': {'device_path': '/fake',
@ -6370,6 +6372,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
get_config.return_value = mock_config
config = drvr._get_volume_config(connection_info, disk_info)
get_config.assert_called_once_with(connection_info, disk_info)
_set_cache_mode.assert_called_once_with(config)
self.assertEqual(mock_config, config)
def test_attach_invalid_volume_type(self):
@ -6460,12 +6463,10 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock.patch.object(drvr, '_connect_volume'),
mock.patch.object(drvr, '_get_volume_config',
return_value=mock_conf),
mock.patch.object(drvr, '_set_cache_mode'),
mock.patch.object(drvr, '_check_discard_for_attach_volume'),
mock.patch.object(drvr, '_build_device_metadata'),
mock.patch.object(objects.Instance, 'save')
) as (mock_connect_volume, mock_get_volume_config,
mock_set_cache_mode, mock_check_discard,
) as (mock_connect_volume, mock_get_volume_config, mock_check_discard,
mock_build_metadata, mock_save):
for state in (power_state.RUNNING, power_state.PAUSED):
mock_dom.info.return_value = [state, 512, 512, 2, 1234, 5678]
@ -6486,7 +6487,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
connection_info, disk_info, instance)
mock_get_volume_config.assert_called_with(
connection_info, disk_info)
mock_set_cache_mode.assert_called_with(mock_conf)
mock_dom.attachDeviceFlags.assert_called_with(
mock_conf.to_xml(), flags=flags)
mock_check_discard.assert_called_with(mock_conf, instance)
@ -14797,9 +14797,8 @@ class LibvirtConnTestCase(test.NoDBTestCase,
'save'),
mock.patch.object(drvr, '_connect_volume'),
mock.patch.object(drvr, '_get_volume_config',
return_value=mock_conf),
mock.patch.object(drvr, '_set_cache_mode')
) as (volume_save, connect_volume, get_volume_config, set_cache_mode):
return_value=mock_conf)
) as (volume_save, connect_volume, get_volume_config):
devices = drvr._get_guest_storage_config(instance, image_meta,
disk_info, False, bdi, flavor, "hvm")
@ -14811,7 +14810,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
get_volume_config.assert_called_with(bdm['connection_info'],
{'bus': 'virtio', 'type': 'disk', 'dev': 'vdc'})
volume_save.assert_called_once_with()
self.assertEqual(3, set_cache_mode.call_count)
def test_get_neutron_events(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)

View File

@ -1174,7 +1174,9 @@ class LibvirtDriver(driver.ComputeDriver):
def _get_volume_config(self, connection_info, disk_info):
vol_driver = self._get_volume_driver(connection_info)
return vol_driver.get_config(connection_info, disk_info)
conf = vol_driver.get_config(connection_info, disk_info)
self._set_cache_mode(conf)
return conf
def _get_volume_encryptor(self, connection_info, encryption):
root_helper = utils.get_root_helper()
@ -1233,7 +1235,6 @@ class LibvirtDriver(driver.ComputeDriver):
disk_info['unit'] = self._get_scsi_controller_max_unit(guest) + 1
conf = self._get_volume_config(connection_info, disk_info)
self._set_cache_mode(conf)
self._check_discard_for_attach_volume(conf, instance)
@ -3783,9 +3784,6 @@ class LibvirtDriver(driver.ComputeDriver):
vol['connection_info'] = connection_info
vol.save()
for d in devices:
self._set_cache_mode(d)
if scsi_controller:
devices.append(scsi_controller)