Merge "Fix: Race between attachment and volume deletion"

This commit is contained in:
Zuul 2021-11-16 09:50:34 +00:00 committed by Gerrit Code Review
commit 5b1578f472
8 changed files with 127 additions and 113 deletions

View File

@ -63,6 +63,9 @@ from cinder.volume import volume_utils
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
# Map with cases where attach status differs from volume status
ATTACH_STATUS_MAP = {'attached': 'in-use', 'detached': 'available'}
options.set_defaults(CONF, connection='sqlite:///$state_path/cinder.sqlite')
@ -1727,74 +1730,83 @@ def volume_include_in_cluster(context, cluster, partial_rename=True,
partial_rename, filters)
def _get_statuses_from_attachments(context, session, volume_id):
"""Get volume status and attach_status based on existing attachments."""
# NOTE: Current implementation ignores attachments on error attaching,
# since they will not have been used by any consumer because os-brick's
# connect_volume has not been called yet. This leads to cases where a
# volume will be in 'available' state yet have attachments.
# If we sort status of attachments alphabetically, ignoring errors, the
# first element will be the attachment status for the volume:
# attached > attaching > detaching > reserved
attach_status = session.query(models.VolumeAttachment.attach_status).\
filter_by(deleted=False).\
filter_by(volume_id=volume_id).\
filter(~models.VolumeAttachment.attach_status.startswith('error_')).\
order_by(models.VolumeAttachment.attach_status.asc()).\
limit(1).\
scalar()
# No volume attachment records means the volume is detached.
attach_status = attach_status or 'detached'
# Check cases where volume status is different from attach status, and
# default to the same value if it's not one of those cases.
status = ATTACH_STATUS_MAP.get(attach_status, attach_status)
return (status, attach_status)
@require_admin_context
def volume_detached(context, volume_id, attachment_id):
"""This updates a volume attachment and marks it as detached.
"""Delete an attachment and update the volume accordingly.
This method also ensures that the volume entry is correctly
marked as either still attached/in-use or detached/available
if this was the last detachment made.
After marking the attachment as detached the method will decide the status
and attach_status values for the volume based on the current status and the
remaining attachments and their status.
Volume status may be changed to: in-use, attaching, detaching, reserved, or
available.
Volume attach_status will be changed to one of: attached, attaching,
detaching, reserved, or detached.
"""
# NOTE(jdg): This is a funky band-aid for the earlier attempts at
# multiattach, it's a bummer because these things aren't really being used
# but at the same time we don't want to break them until we work out the
# new proposal for multi-attach
remain_attachment = True
session = get_session()
with session.begin():
# Only load basic volume info necessary to check various status and use
# the volume row as a lock with the for_update.
volume = _volume_get(context, volume_id, session=session,
joined_load=False, for_update=True)
try:
attachment = _attachment_get(context, attachment_id,
session=session)
attachment_updates = attachment.delete(session)
except exception.VolumeAttachmentNotFound:
attachment_updates = None
attachment = None
if attachment:
now = timeutils.utcnow()
attachment_updates = {
'attach_status': fields.VolumeAttachStatus.DETACHED,
'detach_time': now,
'deleted': True,
'deleted_at': now,
'updated_at': attachment.updated_at,
}
attachment.update(attachment_updates)
attachment.save(session=session)
del attachment_updates['updated_at']
status, attach_status = _get_statuses_from_attachments(context,
session,
volume_id)
attachment_list = None
volume_ref = _volume_get(context, volume_id,
session=session)
volume_updates = {'updated_at': volume_ref.updated_at}
if not volume_ref.volume_attachment:
# NOTE(jdg): We kept the old arg style allowing session exclusively
# for this one call
attachment_list = volume_attachment_get_all_by_volume_id(
context, volume_id, session=session)
remain_attachment = False
if attachment_list and len(attachment_list) > 0:
remain_attachment = True
volume_updates = {'updated_at': volume.updated_at,
'attach_status': attach_status}
if not remain_attachment:
# Hide status update from user if we're performing volume migration
# or uploading it to image
if ((not volume_ref.migration_status and
not (volume_ref.status == 'uploading')) or
volume_ref.migration_status in ('success', 'error')):
volume_updates['status'] = 'available'
# Hide volume status update to available on volume migration or upload,
# as status is updated later on those flows.
if ((attach_status != 'detached')
or (not volume.migration_status and volume.status != 'uploading')
or volume.migration_status in ('success', 'error')):
volume_updates['status'] = status
volume_updates['attach_status'] = (
fields.VolumeAttachStatus.DETACHED)
else:
# Volume is still attached
volume_updates['status'] = 'in-use'
volume_updates['attach_status'] = (
fields.VolumeAttachStatus.ATTACHED)
volume_ref.update(volume_updates)
volume_ref.save(session=session)
volume.update(volume_updates)
volume.save(session=session)
del volume_updates['updated_at']
return (volume_updates, attachment_updates)
@ -1878,11 +1890,14 @@ def _volume_get_query(context, session=None, project_only=False,
@require_context
def _volume_get(context, volume_id, session=None, joined_load=True):
def _volume_get(context, volume_id, session=None, joined_load=True,
for_update=False):
result = _volume_get_query(context, session=session, project_only=True,
joined_load=joined_load)
if joined_load:
result = result.options(joinedload('volume_type.extra_specs'))
if for_update:
result = result.with_for_update()
result = result.filter_by(id=volume_id).first()
if not result:

View File

@ -56,8 +56,10 @@ class CinderBase(models.TimestampMixin,
def delete(self, session):
"""Delete this object."""
updated_values = self.delete_values()
updated_values['updated_at'] = self.updated_at
self.update(updated_values)
self.save(session=session)
del updated_values['updated_at']
return updated_values
@ -393,6 +395,14 @@ class VolumeAttachment(BASE, CinderBase):
# Stores a serialized json dict of host connector information from brick.
connector = sa.Column(sa.Text)
@staticmethod
def delete_values():
now = timeutils.utcnow()
return {'deleted': True,
'deleted_at': now,
'attach_status': 'detached',
'detach_time': now}
class VolumeType(BASE, CinderBase):
"""Represent possible volume_types of volumes offered."""

View File

@ -172,13 +172,13 @@ class AttachmentsAPITestCase(test.TestCase):
self.controller.delete(req, attachment.id)
volume2 = objects.Volume.get_by_id(self.ctxt, volume1.id)
if status == 'reserved':
self.assertEqual('detached', volume2.attach_status)
self.assertRaises(
exception.VolumeAttachmentNotFound,
objects.VolumeAttachment.get_by_id, self.ctxt, attachment.id)
else:
self.assertEqual('attached', volume2.attach_status)
# Volume and attachment status is changed on the API service
self.assertEqual('detached', volume2.attach_status)
self.assertEqual('available', volume2.status)
self.assertRaises(
exception.VolumeAttachmentNotFound,
objects.VolumeAttachment.get_by_id, self.ctxt, attachment.id)
if status != 'reserved':
mock_delete.assert_called_once_with(req.environ['cinder.context'],
attachment.id, mock.ANY)

View File

@ -18,7 +18,6 @@ from oslo_utils import importutils
from cinder import context
from cinder import db
from cinder.db.sqlalchemy import api as sqla_db
from cinder import exception
from cinder.objects import fields
from cinder.objects import volume_attachment
from cinder.tests.unit.api.v2 import fakes as v2_fakes
@ -137,13 +136,21 @@ class AttachmentManagerTestCase(test.TestCase):
attachment_ref = db.volume_attachment_get(
self.context,
attachment_ref['id'])
vref.refresh()
expected_status = (vref.status, vref.attach_status,
attachment_ref.attach_status)
self.manager.attachment_delete(self.context,
attachment_ref['id'],
vref)
self.assertRaises(exception.VolumeAttachmentNotFound,
db.volume_attachment_get,
self.context,
attachment_ref.id)
# Manager doesn't change the resource status. It is changed on the API
attachment_ref = db.volume_attachment_get(self.context,
attachment_ref.id)
vref.refresh()
self.assertEqual(
expected_status,
(vref.status, vref.attach_status, attachment_ref.attach_status))
def test_attachment_delete_remove_export_fail(self):
"""attachment_delete removes attachment on remove_export failure."""
@ -160,15 +167,18 @@ class AttachmentManagerTestCase(test.TestCase):
attach = db.volume_attach(self.context, values)
# Confirm the volume OVO has the attachment before the deletion
vref.refresh()
expected_vol_status = (vref.status, vref.attach_status)
self.assertEqual(1, len(vref.volume_attachment))
self.manager.attachment_delete(self.context, attach.id, vref)
# Attachment has been removed from the DB
self.assertRaises(exception.VolumeAttachmentNotFound,
db.volume_attachment_get, self.context, attach.id)
# Attachment has been removed from the volume OVO attachment list
self.assertEqual(0, len(vref.volume_attachment))
# Manager doesn't change the resource status. It is changed on the API
attachment = db.volume_attachment_get(self.context, attach.id)
self.assertEqual(attach.attach_status, attachment.attach_status)
vref = db.volume_get(self.context, vref.id)
self.assertEqual(expected_vol_status,
(vref.status, vref.attach_status))
def test_attachment_delete_multiple_attachments(self):
volume_params = {'status': 'available'}

View File

@ -644,11 +644,13 @@ class DBAPIVolumeTestCase(BaseTest):
'instance_uuid': instance_uuid,
'attach_status': fields.VolumeAttachStatus.ATTACHING, }
attachment = db.volume_attach(self.ctxt, values)
db.volume_attached(self.ctxt, attachment.id, instance_uuid,
None, '/tmp')
values2 = {'volume_id': volume.id,
'instance_uuid': fake.OBJECT_ID,
'attach_status': fields.VolumeAttachStatus.ATTACHING, }
db.volume_attach(self.ctxt, values2)
db.volume_attached(self.ctxt, attachment.id,
attachment2 = db.volume_attach(self.ctxt, values2)
db.volume_attached(self.ctxt, attachment2.id,
instance_uuid,
None, '/tmp')
volume_updates, attachment_updates = (

View File

@ -2261,53 +2261,26 @@ class API(base.Base):
volume = attachment.volume
if attachment.attach_status == fields.VolumeAttachStatus.RESERVED:
volume_utils.notify_about_volume_usage(ctxt, volume,
"detach.start")
volume.finish_detach(attachment.id)
do_notify = True
volume_utils.notify_about_volume_usage(ctxt,
volume, "detach.start")
else:
self.volume_rpcapi.attachment_delete(ctxt,
attachment.id,
volume)
do_notify = False
status_updates = {'status': 'available',
'attach_status': 'detached'}
remaining_attachments = AO_LIST.get_all_by_volume_id(ctxt, volume.id)
LOG.debug("Remaining volume attachments: %s", remaining_attachments,
resource=volume)
# Generate the detach.start notification on the volume service to
# include the host doing the operation.
self.volume_rpcapi.attachment_delete(ctxt, attachment.id, volume)
# NOTE(jdg) Try and figure out the > state we have left and set that
# attached > attaching > > detaching > reserved
pending_status_list = []
for attachment in remaining_attachments:
pending_status_list.append(attachment.attach_status)
LOG.debug("Adding status of: %s to pending status list "
"for volume.", attachment.attach_status,
resource=volume)
# Trigger attachments lazy load (missing since volume was loaded in the
# attachment without joined tables). With them loaded the finish_detach
# call removes the detached one from the list, and the notification and
# return have the right attachment list.
volume.volume_attachment
volume.finish_detach(attachment.id)
LOG.debug("Pending status list for volume during "
"attachment-delete: %s",
pending_status_list, resource=volume)
if 'attached' in pending_status_list:
status_updates['status'] = 'in-use'
status_updates['attach_status'] = 'attached'
elif 'attaching' in pending_status_list:
status_updates['status'] = 'attaching'
status_updates['attach_status'] = 'attaching'
elif 'detaching' in pending_status_list:
status_updates['status'] = 'detaching'
status_updates['attach_status'] = 'detaching'
elif 'reserved' in pending_status_list:
status_updates['status'] = 'reserved'
status_updates['attach_status'] = 'reserved'
volume.status = status_updates['status']
volume.attach_status = status_updates['attach_status']
volume.save()
if do_notify:
volume_utils.notify_about_volume_usage(ctxt, volume, "detach.end")
return remaining_attachments
# Do end notification on the API so it comes after finish_detach.
# Doing it on the volume service leads to race condition from bug
# #1937084, and doing the notification there with the finish here leads
# to bug #1916980.
volume_utils.notify_about_volume_usage(ctxt, volume, "detach.end")
return volume.volume_attachment
class HostAPI(base.Base):

View File

@ -4951,8 +4951,6 @@ class VolumeManager(manager.CleanableManager,
# Failures on detach_volume and remove_export are not considered
# failures in terms of detaching the volume.
pass
vref.finish_detach(attachment_id)
self._notify_about_volume_usage(context, vref, "detach.end")
# Replication group API (Tiramisu)
def enable_replication(self,

View File

@ -0,0 +1,6 @@
---
fixes:
- |
`Bug #1937084 <https://bugs.launchpad.net/cinder/+bug/1937084>`_: Fixed
race condition between delete attachment and delete volume that can leave
deleted volumes stuck as attached to instances.