Fix guard for NVMeOF volumes

When detaching multiple NVMe-oF volumes from the same host we may end
with a NVMe subsystem in "connecting" state, and we'll see a bunch nvme
error in dmesg.

This happens on storage systems that share the same subsystem for
multiple volumes because Nova has not been updated to support the
tri-state "shared_targets" option that groups the detach and unmap of
volumes to prevent race conditions.

This is related to the issue mentioned in an os-brick commit message [1]

For the guard_connection method of os-brick to work as expected for
NVMe-oF volumes we need to use microversion 3.69 when retrieving the
cinder volume.

In microversion 3.69 we started reporting 3 states for shared_targets:
True, False, and None.

- True is to guard iSCSI volumes and will only be used if the iSCSI
  initiator running on the host doesn't have the manual scans feature.

- False is that no target/subsystem is being shared so no guard is
  necessary.

- None is to force guarding, and it's currenly used for NVMe-oF volumes
  when sharing the subsystem.

[1]: https://review.opendev.org/c/openstack/os-brick/+/836062/12//COMMIT_MSG

Closes-Bug: #2035375
Change-Id: I4def1c0f20118d0b8eb7d3bbb09af2948ffd70e1
This commit is contained in:
Gorka Eguileor 2023-09-12 20:53:15 +02:00
parent 53012f1c55
commit 18163761d0
3 changed files with 40 additions and 9 deletions

View File

@ -15,6 +15,7 @@
from os_brick import encryptors
from unittest import mock
import ddt
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel as uuids
@ -35,6 +36,7 @@ from nova.volume import cinder
ATTACHMENT_ID = uuids.attachment_id
@ddt.ddt
class TestDriverBlockDevice(test.NoDBTestCase):
# os-brick>=5.1 now uses external file system locks instead of internal
# locks so we need to set up locking
@ -613,6 +615,7 @@ class TestDriverBlockDevice(test.NoDBTestCase):
# First call to get() fails because the API isn't new enough.
# So we fallback to the old call.
self.volume_api.get.side_effect = [
exception.CinderAPIVersionNotAvailable(version='3.69'),
exception.CinderAPIVersionNotAvailable(version='3.48'),
fake_volume]
@ -688,14 +691,17 @@ class TestDriverBlockDevice(test.NoDBTestCase):
if include_shared_targets:
self.volume_api.get.assert_called_once_with(
self.context, fake_volume['id'], microversion='3.48')
self.context, fake_volume['id'], microversion='3.69')
else:
# First call to get() fails because the API isn't new enough.
# So we fallback to the old call.
self.volume_api.get.assert_has_calls([
mock.call(self.context, fake_volume['id'],
microversion='3.69'),
mock.call(self.context, fake_volume['id'],
microversion='3.48'),
mock.call(self.context, fake_volume['id'])])
mock.call(self.context, fake_volume['id'],
microversion=None)])
try:
self.volume_api.check_availability_zone.assert_called_once_with(
@ -1557,6 +1563,24 @@ class TestDriverBlockDevice(test.NoDBTestCase):
self._test_boot_from_volume_source_snapshot_volume_type(
bdm, 'fake-lvm-1')
@ddt.data(['3.69'], ['3.69', '3.48'], ['3.69', '3.48', None])
def test__get_volume(self, microversions):
volume_api = mock.Mock()
exp = mock.Mock()
exc = exception.CinderAPIVersionNotAvailable
side_effect = [exc(version=mv) for mv in microversions[:-1]] + [exp]
volume_api.get.side_effect = side_effect
res = self.driver_classes['volume']._get_volume(
self.context, volume_api, mock.sentinel.volume_id)
self.assertEqual(exp, res)
self.assertEqual(len(microversions), volume_api.get.call_count)
volume_api.get.assert_has_calls(
[mock.call(self.context, mock.sentinel.volume_id, microversion=mv)
for mv in microversions])
class TestDriverBlockDeviceNewFlow(TestDriverBlockDevice):
"""Virt block_device tests for the Cinder 3.44 volume attach flow

View File

@ -399,13 +399,14 @@ class DriverVolumeBlockDevice(DriverBlockDevice):
@staticmethod
def _get_volume(context, volume_api, volume_id):
# First try to get the volume at microversion 3.48 so we can get the
# shared_targets parameter exposed in that version. If that API version
# is not available, we just fallback.
try:
return volume_api.get(context, volume_id, microversion='3.48')
except exception.CinderAPIVersionNotAvailable:
return volume_api.get(context, volume_id)
# First try microversion for tri-state shared_targets, then older
# shared_targets, finally fallback to standard v3.
versions = ('3.69', '3.48', None)
for mv in versions:
try:
return volume_api.get(context, volume_id, microversion=mv)
except exception.CinderAPIVersionNotAvailable:
pass
def _create_volume(self, context, instance, volume_api, size,
wait_func=None, **create_kwargs):

View File

@ -0,0 +1,6 @@
---
fixes:
- |
`Bug #2035375 <https://bugs.launchpad.net/nova/+bug/2035375>`_: Fixed
leftover NVMe-oF subsystems when disconnecting multiple NVMe-oF volumes on
the same host from storage sharing the subsystem for different volumes.