From 714894a7ff81e23f5ac7545a6b91ea484447a988 Mon Sep 17 00:00:00 2001
From: Maxim Monin <maximmonin@gmail.com>
Date: Wed, 1 Feb 2023 08:33:18 +0000
Subject: [PATCH] Server Rescue leads to Server ERROR state if original image
 is deleted

Server rescue in stable device mode has dependency on the original
image used to create or rebuild the server. If the original image
was deleted from Glance, the server could not be rescued. Nova has
server.image_ref reference to non-existent image. Rescue in stable
device rescue mode leads to driver error and Server in ERROR state.
Proposed fix use falling back to the instance image metadata if
the original image is not found in Glance.

Closes-Bug: #2002606
Change-Id: Ia0f747158721f82ef1803618b6cb30661b6252c9
---
 .../libvirt/test_rescue_deleted_base.py       | 182 ++++++++++++++++++
 nova/virt/libvirt/driver.py                   |  29 +--
 ...h-deleted-base-image-5143f1c1c914b8fe.yaml |  10 +
 3 files changed, 209 insertions(+), 12 deletions(-)
 create mode 100644 nova/tests/functional/libvirt/test_rescue_deleted_base.py
 create mode 100644 releasenotes/notes/rescue-stable-with-deleted-base-image-5143f1c1c914b8fe.yaml

diff --git a/nova/tests/functional/libvirt/test_rescue_deleted_base.py b/nova/tests/functional/libvirt/test_rescue_deleted_base.py
new file mode 100644
index 000000000000..437106711ef0
--- /dev/null
+++ b/nova/tests/functional/libvirt/test_rescue_deleted_base.py
@@ -0,0 +1,182 @@
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import datetime
+
+import fixtures
+from oslo_log import log as logging
+from oslo_utils.fixture import uuidsentinel as uuids
+
+from nova import conf
+from nova import context as nova_context
+from nova.tests import fixtures as nova_fixtures
+from nova.tests.functional.libvirt import base
+
+LOG = logging.getLogger(__name__)
+CONF = conf.CONF
+
+
+class RescueServerTestWithDeletedBaseImage(
+    base.ServersTestBase
+):
+
+    api_major_version = 'v2.1'
+    microversion = '2.87'
+
+    # BUG #2002606
+    # Rescue in stable rescue mode with deleted original image must be
+    # successful
+    def setUp(self):
+        super(RescueServerTestWithDeletedBaseImage, self).setUp()
+
+        self.ctxt = nova_context.get_admin_context()
+
+        # to create bfv server with libvirt driver
+        self.cinder = self.useFixture(nova_fixtures.CinderFixture(self))
+        self.useFixture(nova_fixtures.OSBrickFixture())
+        # disk.rescue image_create ignoring privsep
+        self.useFixture(fixtures.MockPatch(
+            'nova.virt.libvirt.imagebackend._update_utime_ignore_eacces'))
+
+        # dir to create 'unrescue.xml'
+        def fake_path(_self, *args, **kwargs):
+            return CONF.instances_path
+
+        self.useFixture(fixtures.MonkeyPatch(
+            'nova.virt.libvirt.utils.get_instance_path', fake_path))
+
+    def _create_test_images(self):
+        timestamp = datetime.datetime(2021, 1, 2, 3, 4, 5)
+        base_image = {
+            'id': uuids.base_image,
+            'name': 'base_image',
+            'created_at': timestamp,
+            'updated_at': timestamp,
+            'deleted_at': None,
+            'deleted': False,
+            'status': 'active',
+            'is_public': False,
+            'container_format': 'ova',
+            'disk_format': 'vhd',
+            'size': '74185822',
+            'min_ram': 0,
+            'min_disk': 0,
+            'protected': False,
+            'visibility': 'public',
+            'tags': [],
+            'properties': {}
+        }
+        rescue_stable_image = {
+            'id': uuids.rescue_stable_image,
+            'name': 'base_image',
+            'created_at': timestamp,
+            'updated_at': timestamp,
+            'deleted_at': None,
+            'deleted': False,
+            'status': 'active',
+            'is_public': False,
+            'container_format': 'ova',
+            'disk_format': 'vhd',
+            'size': '74185822',
+            'min_ram': 0,
+            'min_disk': 0,
+            'protected': False,
+            'visibility': 'public',
+            'tags': [],
+            'properties': {
+               'hw_rescue_bus': 'scsi',
+               'hw_rescue_device': 'cdrom'
+            }
+        }
+        self.glance.create(self.ctxt, base_image)
+        self.glance.create(self.ctxt, rescue_stable_image)
+        return base_image, rescue_stable_image
+
+    def test_stable_rescue_server_local_disk(self):
+        self.start_compute()
+
+        # create a local server with base image
+        base_image, rescue_stable_image = self._create_test_images()
+        server = self._create_server(image_uuid=uuids.base_image,
+            networks='none')
+        # instance.image_ref exists
+        self.assertEqual(base_image['id'], server['image']['id'])
+
+        # Delete base image
+        self.glance.delete(self.ctxt, base_image['id'])
+
+        rescue_req = {
+            'rescue': {
+                'rescue_image_ref': rescue_stable_image['id']
+            }
+        }
+        # Rescue in stable device mode successful
+        res = self.api.api_post('/servers/%s/action' % server['id'],
+            rescue_req)
+        self.assertEqual(200, res.status)
+
+        # BUG #2002606
+        # This test fails and server never reach RESCUE state.
+        # It is expected behavior when server in stable rescue mode cannot
+        # find original glance image.
+        #
+        # FIX with fallback:
+        # except exception.ImageNotFound:
+        #    image_meta = instance.image_meta
+        # leads to passing this test
+
+        self._wait_for_state_change(server, 'RESCUE')
+
+    def test_stable_rescue_server_bfv(self):
+        self.start_compute()
+
+        # create a boot from volume server with libvirt driver
+        base_image_not_used, rescue_stable_image = self._create_test_images()
+        server_request = self._build_server(networks=[])
+        server_request.pop('imageRef')
+        server_request['block_device_mapping_v2'] = [{
+            'boot_index': 0,
+            'uuid': nova_fixtures.CinderFixture.IMAGE_BACKED_VOL,
+            'source_type': 'volume',
+            'destination_type': 'volume'}]
+        server = self.api.post_server({'server': server_request})
+        server = self._wait_for_state_change(server, 'ACTIVE')
+        # instance.image_ref is missing, attached volume exists
+        self.assertEqual('', server['image'])
+        self.assertEqual(1,
+           len(server['os-extended-volumes:volumes_attached']))
+
+        # Delete all glance images except rescue image
+        ids = []
+        for o in self.glance.images.keys():
+            ids.append(o)
+        for o in ids:
+            if o != rescue_stable_image['id']:
+                self.glance.delete(self.ctxt, o)
+        self.assertEqual(1, len(self.glance.images.keys()))
+
+        rescue_req = {
+            'rescue': {
+                'rescue_image_ref': rescue_stable_image['id']
+            }
+        }
+        # Rescue in stable device mode successful
+        res = self.api.api_post('/servers/%s/action' % server['id'],
+            rescue_req)
+        self.assertEqual(200, res.status)
+
+        # BUG #2002606 does not affect server with bfv root
+        # disks unless server.image_ref defined for bfv server.
+
+        self._wait_for_state_change(server, 'RESCUE')
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 4b34759bb7d2..c8073b2a5f22 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -4247,18 +4247,23 @@ class LibvirtDriver(driver.ComputeDriver):
             # new rescue_image_meta and block_device_info when calling
             # get_disk_info.
             rescue_image_meta = image_meta
-            if instance.image_ref:
-                image_meta = objects.ImageMeta.from_image_ref(
-                    context, self._image_api, instance.image_ref)
-            else:
-                # NOTE(lyarwood): If instance.image_ref isn't set attempt to
-                # lookup the original image_meta from the bdms. This will
-                # return an empty dict if no valid image_meta is found.
-                image_meta_dict = block_device.get_bdm_image_metadata(
-                    context, self._image_api, self._volume_api,
-                    block_device_info['block_device_mapping'],
-                    legacy_bdm=False)
-                image_meta = objects.ImageMeta.from_dict(image_meta_dict)
+
+            try:
+                if instance.image_ref:
+                    image_meta = objects.ImageMeta.from_image_ref(
+                        context, self._image_api, instance.image_ref)
+                else:
+                    # NOTE(lyarwood): If instance.image_ref isn't set attempt
+                    # to lookup the original image_meta from the bdms. This
+                    # will return an empty dict if no valid image_meta is
+                    # found.
+                    image_meta_dict = block_device.get_bdm_image_metadata(
+                        context, self._image_api, self._volume_api,
+                        block_device_info['block_device_mapping'],
+                        legacy_bdm=False)
+                    image_meta = objects.ImageMeta.from_dict(image_meta_dict)
+            except exception.ImageNotFound:
+                image_meta = instance.image_meta
 
         else:
             LOG.info("Attempting rescue", instance=instance)
diff --git a/releasenotes/notes/rescue-stable-with-deleted-base-image-5143f1c1c914b8fe.yaml b/releasenotes/notes/rescue-stable-with-deleted-base-image-5143f1c1c914b8fe.yaml
new file mode 100644
index 000000000000..83d59e9348fe
--- /dev/null
+++ b/releasenotes/notes/rescue-stable-with-deleted-base-image-5143f1c1c914b8fe.yaml
@@ -0,0 +1,10 @@
+---
+fixes:
+  - |
+    `Bug #2002606`_: Previously, server rescue in stable device mode had a
+    dependency on the original image used to create or rebuild the server.
+    If the original image was deleted from Glance, the server could not be
+    rescued. The issue has been fixed by falling back to the instance image
+    metadata if the original image is not found in Glance.
+
+    .. _Bug #2002606: https://bugs.launchpad.net/nova/+bug/2002606