Fix stale or missing ostree_repo bind mount in iso dir

This commit addresses the odd cases where the
/var/www/pages/iso/<rel>/ostree_repo bind mount
becomes missing or stale.

We add detection of missing content, and also detect a stale bind mount.

A stale bind mount is detected by comparing the inode numbers
of the bind-mounted /var/www/pages/iso/<rel>/ostree_repo and original
/var/www/pages/feed/rel-<rel>/ostree_repo directory.

NOTES:
- The self.www_root variable is changed to self.www_iso_root to make
  it more obvious that this is the /var/www/pages/iso path, not the feed
  path.
- Now using the 'sh' python library for the mount commands, which is
  much more convenient and straight-forward than the subprocess library

Test Plan:
PASS:
- Unmount (but do not delete) the /var/www/pages/iso/<rel>/ostree_repo
  directory. When a subcloud add or deploy operation is done, the bind
  mount is recreated.
- Stale mount:
    # Replace the original
    sudo cp -a /var/www/pages/feed/rel-24.09/ostree_repo \
      /var/www/pages/feed/rel-24.09/ostree_repo.orig
    sudo rm -rf /var/www/pages/feed/rel-24.09/ostree_repo
    sudo cp -a /var/www/pages/feed/rel-24.09/ostree_repo.orig \
      /var/www/pages/feed/rel-24.09/ostree_repo
  When a subcloud add or deploy operation is done, the stale bind
  mount is detected. The /var/www/pages/iso/24.09/ostree_repo is
  unmounted, and the directory is removed.
  When a subcloud add or deploy operation is done, the bind
  mount is recreated.

Closes-Bug: 2066411

Change-Id: I25911722b1e333cd352f142664526d7dfa73e9e8
Signed-off-by: Kyle MacLeod <kyle.macleod@windriver.com>
This commit is contained in:
Kyle MacLeod
2024-05-22 12:13:14 -04:00
parent 609cd7678e
commit a60ce81f26
4 changed files with 120 additions and 40 deletions

View File

@@ -0,0 +1,93 @@
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
from oslo_log import log as logging
import sh
from dcmanager.common import utils
# The 'sh' library is magical - it looks up CLI functions dynamically.
# Disable the pylint warnings here:
# pylint: disable=not-callable,no-member
LOG = logging.getLogger(__name__)
def check_stale_bindmount(mount_path, source_path, log_error=True):
"""Check if the mount has become stale.
We do this by comparing the directory inodes. If the bind mount is
valid, the two directories should have the same inode number; otherwise
the original directory has been replaced and we are no longer tracking
the actual location of source_path. In this case we teardown the bind
mount.
"""
mount_path_inode = sh.stat("--format", "%i", mount_path)
source_path_inode = sh.stat("--format", "%i", source_path)
if mount_path_inode != source_path_inode:
logmsg = f"Found stale bind mount: {mount_path}, unmounting"
if log_error:
LOG.error(logmsg)
else:
LOG.warn(logmsg)
try:
sh.umount(mount_path)
os.rmdir(mount_path)
except Exception:
LOG.error(f"Failed to fix bind mount at {mount_path}")
raise
return True
return False
# TODO(kmacleod): utils.synchronized should be moved into dccommon
@utils.synchronized("ostree-mount-subclouds", external=True)
def validate_ostree_iso_mount(www_iso_root, source_path):
"""Ensure the ostree_repo is properly mounted under the iso path.
Validity check includes if the mount is stale.
If stale, the bind mount is recreated.
Note that ostree_repo is mounted in a location not specific to a subcloud.
"""
ostree_repo_mount_path = os.path.join(www_iso_root, "ostree_repo")
LOG.debug("Checking ostree_repo mount: %s", ostree_repo_mount_path)
if os.path.exists(ostree_repo_mount_path) and check_stale_bindmount(
ostree_repo_mount_path, source_path
):
LOG.warn(f"Found stale bind mount: {ostree_repo_mount_path}, unmounting")
try:
sh.umount(ostree_repo_mount_path)
os.rmdir(ostree_repo_mount_path)
except Exception:
LOG.error(f"Failed to fix bind mount at {ostree_repo_mount_path}")
raise
# Check for the config file inside the ostree_repo
check_path = os.path.join(ostree_repo_mount_path, "config")
if not os.path.exists(check_path):
LOG.info("Mounting ostree_repo at %s", ostree_repo_mount_path)
if not os.path.exists(ostree_repo_mount_path):
os.makedirs(ostree_repo_mount_path, mode=0o755)
mount_args = (
"--bind",
"%s/ostree_repo" % source_path,
ostree_repo_mount_path,
)
try:
sh.mount(*mount_args)
except Exception as exc:
LOG.warn(
f"Command 'mount {' '.join(mount_args)}' failed; "
f"attempting to rebuild: {str(exc)}"
)
try:
sh.umount(ostree_repo_mount_path)
except Exception:
LOG.exception("rebuild: umount failed, continuing")
os.rmdir(ostree_repo_mount_path)
os.makedirs(ostree_repo_mount_path, mode=0o755)
sh.mount(*mount_args)

View File

@@ -32,6 +32,7 @@ from dccommon import consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon import exceptions
from dccommon import ostree_mount
from dccommon import utils as dccommon_utils
from dcmanager.common import consts as dcmanager_consts
from dcmanager.common import utils
@@ -70,7 +71,7 @@ class SubcloudInstall(object):
session, endpoint=endpoint)
self.name = subcloud_name
self.input_iso = None
self.www_root = None
self.www_iso_root = None
self.https_enabled = None
self.ipmi_logger = None
@@ -235,10 +236,10 @@ class SubcloudInstall(object):
raise e
def update_iso(self, override_path, values):
if not os.path.isdir(self.www_root):
os.mkdir(self.www_root, 0o755)
LOG.debug("update_iso: www_root: %s, values: %s, override_path: %s",
self.www_root, str(values), override_path)
if not os.path.isdir(self.www_iso_root):
os.mkdir(self.www_iso_root, 0o755)
LOG.debug("update_iso: www_iso_root: %s, values: %s, override_path: %s",
self.www_iso_root, str(values), override_path)
path = None
software_version = str(values['software_version'])
try:
@@ -283,7 +284,7 @@ class SubcloudInstall(object):
update_iso_cmd = [
GEN_ISO_COMMAND,
"--input", self.input_iso,
"--www-root", self.www_root,
"--www-root", self.www_iso_root,
"--id", self.name,
"--boot-hostname", self.name,
"--timeout", BOOT_MENU_TIMEOUT,
@@ -292,7 +293,7 @@ class SubcloudInstall(object):
update_iso_cmd = [
GEN_ISO_COMMAND_CENTOS,
"--input", self.input_iso,
"--www-root", self.www_root,
"--www-root", self.www_iso_root,
"--id", self.name,
"--boot-hostname", self.name,
"--timeout", BOOT_MENU_TIMEOUT,
@@ -378,19 +379,19 @@ class SubcloudInstall(object):
os.path.exists(self.input_iso)):
os.remove(self.input_iso)
if (self.www_root is not None and os.path.isdir(self.www_root)):
if (self.www_iso_root is not None and os.path.isdir(self.www_iso_root)):
if dccommon_utils.is_debian(software_version):
cleanup_cmd = [
GEN_ISO_COMMAND,
"--id", self.name,
"--www-root", self.www_root,
"--www-root", self.www_iso_root,
"--delete"
]
else:
cleanup_cmd = [
GEN_ISO_COMMAND_CENTOS,
"--id", self.name,
"--www-root", self.www_root,
"--www-root", self.www_iso_root,
"--delete"
]
LOG.info("Running install cleanup: %s", self.name)
@@ -474,33 +475,6 @@ class SubcloudInstall(object):
subprocess.check_call(['umount', '-l', temp_bootimage_mnt_dir])
os.rmdir(temp_bootimage_mnt_dir)
def check_ostree_mount(self, source_path):
"""Mount the ostree_repo at ostree_repo_mount_path if necessary.
Note that ostree_repo is mounted in a location not specific to a
subcloud. We never unmount this directory once the mount path is
established.
"""
ostree_mount_dir = os.path.join(self.www_root, 'ostree_repo')
LOG.debug("Checking mount: %s", ostree_mount_dir)
check_path = os.path.join(ostree_mount_dir, 'config')
if not os.path.exists(check_path):
self._do_ostree_mount(ostree_mount_dir, check_path, source_path)
# TODO(kmacleod): utils.synchronized should be moved into dccommon
@utils.synchronized("ostree-mount-subclouds", external=True)
def _do_ostree_mount(self, ostree_repo_mount_path,
check_path, source_path):
# check again while locked:
if not os.path.exists(check_path):
LOG.info("Mounting ostree_repo at %s", ostree_repo_mount_path)
if not os.path.exists(ostree_repo_mount_path):
os.makedirs(ostree_repo_mount_path, mode=0o755)
subprocess.check_call( # pylint: disable=not-callable
["mount", "--bind",
"%s/ostree_repo" % source_path,
ostree_repo_mount_path])
@staticmethod
def is_serial_console(install_type):
return (install_type is not None
@@ -534,19 +508,21 @@ class SubcloudInstall(object):
if not os.path.isdir(override_path):
os.mkdir(override_path, 0o755)
self.www_root = os.path.join(SUBCLOUD_ISO_PATH, software_version)
self.www_iso_root = os.path.join(SUBCLOUD_ISO_PATH, software_version)
feed_path_rel_version = os.path.join(SUBCLOUD_FEED_PATH,
"rel-{version}".format(
version=software_version))
if dccommon_utils.is_debian(software_version):
self.check_ostree_mount(feed_path_rel_version)
ostree_mount.validate_ostree_iso_mount(
self.www_iso_root, feed_path_rel_version
)
# Clean up iso directory if it already exists
# This may happen if a previous installation attempt was abruptly
# terminated
iso_dir_path = os.path.join(self.www_root, 'nodes', self.name)
iso_dir_path = os.path.join(self.www_iso_root, 'nodes', self.name)
if os.path.isdir(iso_dir_path):
LOG.info("Found preexisting iso dir for subcloud %s, cleaning up",
self.name)

View File

@@ -40,6 +40,7 @@ from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack import dcmanager_v1
from dccommon.exceptions import PlaybookExecutionFailed
from dccommon import kubeoperator
from dccommon import ostree_mount
from dccommon import subcloud_enrollment
from dccommon import subcloud_install
from dccommon.utils import AnsiblePlaybook
@@ -370,6 +371,7 @@ class BaseTestSubcloudManager(base.DCManagerTestCase):
self._mock_os_path_isdir()
self._mock_os_path_exists()
self._mock_os_remove()
self._mock_ostree_mount_validate_ostree_iso_mount()
self._mock_get_local_system()
self.sm = subcloud_manager.SubcloudManager()
@@ -473,6 +475,14 @@ class BaseTestSubcloudManager(base.DCManagerTestCase):
self.mock_run_subcloud_install = mock_patch.start()
self.addCleanup(mock_patch.stop)
def _mock_ostree_mount_validate_ostree_iso_mount(self):
"""Mock ostree_mount validate_ostree_iso_mount"""
mock_patch = mock.patch.object(ostree_mount,
'validate_ostree_iso_mount')
self.mock_validate_ostree_iso_mount = mock_patch.start()
self.addCleanup(mock_patch.stop)
def _mock_subcloud_manager_create_intermediate_ca_cert(self):
"""Mock subcloud manager's _create_intermediate_ca_cert"""

View File

@@ -38,6 +38,7 @@ requests!=2.12.2,!=2.13.0,>=2.10.0 # Apache-2.0
requests_toolbelt # Apache-2.0
retrying!=1.3.0,>=1.2.3 # Apache-2.0
routes>=2.3.1 # MIT
sh # MIT
six>=1.9.0 # MIT
sqlalchemy!=1.1.5,!=1.1.6,!=1.1.7,!=1.1.8,>=1.0.10 # MIT
sqlalchemy-migrate>=0.11.0 # Apache-2.0