diff --git a/distributedcloud/dccommon/ostree_mount.py b/distributedcloud/dccommon/ostree_mount.py new file mode 100644 index 000000000..d013749c2 --- /dev/null +++ b/distributedcloud/dccommon/ostree_mount.py @@ -0,0 +1,93 @@ +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import os + +from oslo_log import log as logging +import sh + +from dcmanager.common import utils + +# The 'sh' library is magical - it looks up CLI functions dynamically. +# Disable the pylint warnings here: +# pylint: disable=not-callable,no-member + +LOG = logging.getLogger(__name__) + + +def check_stale_bindmount(mount_path, source_path, log_error=True): + """Check if the mount has become stale. + + We do this by comparing the directory inodes. If the bind mount is + valid, the two directories should have the same inode number; otherwise + the original directory has been replaced and we are no longer tracking + the actual location of source_path. In this case we teardown the bind + mount. + """ + mount_path_inode = sh.stat("--format", "%i", mount_path) + source_path_inode = sh.stat("--format", "%i", source_path) + if mount_path_inode != source_path_inode: + logmsg = f"Found stale bind mount: {mount_path}, unmounting" + if log_error: + LOG.error(logmsg) + else: + LOG.warn(logmsg) + try: + sh.umount(mount_path) + os.rmdir(mount_path) + except Exception: + LOG.error(f"Failed to fix bind mount at {mount_path}") + raise + return True + + return False + + +# TODO(kmacleod): utils.synchronized should be moved into dccommon +@utils.synchronized("ostree-mount-subclouds", external=True) +def validate_ostree_iso_mount(www_iso_root, source_path): + """Ensure the ostree_repo is properly mounted under the iso path. + + Validity check includes if the mount is stale. + If stale, the bind mount is recreated. + Note that ostree_repo is mounted in a location not specific to a subcloud. + """ + ostree_repo_mount_path = os.path.join(www_iso_root, "ostree_repo") + LOG.debug("Checking ostree_repo mount: %s", ostree_repo_mount_path) + if os.path.exists(ostree_repo_mount_path) and check_stale_bindmount( + ostree_repo_mount_path, source_path + ): + LOG.warn(f"Found stale bind mount: {ostree_repo_mount_path}, unmounting") + try: + sh.umount(ostree_repo_mount_path) + os.rmdir(ostree_repo_mount_path) + except Exception: + LOG.error(f"Failed to fix bind mount at {ostree_repo_mount_path}") + raise + # Check for the config file inside the ostree_repo + check_path = os.path.join(ostree_repo_mount_path, "config") + if not os.path.exists(check_path): + LOG.info("Mounting ostree_repo at %s", ostree_repo_mount_path) + if not os.path.exists(ostree_repo_mount_path): + os.makedirs(ostree_repo_mount_path, mode=0o755) + mount_args = ( + "--bind", + "%s/ostree_repo" % source_path, + ostree_repo_mount_path, + ) + try: + sh.mount(*mount_args) + except Exception as exc: + LOG.warn( + f"Command 'mount {' '.join(mount_args)}' failed; " + f"attempting to rebuild: {str(exc)}" + ) + try: + sh.umount(ostree_repo_mount_path) + except Exception: + LOG.exception("rebuild: umount failed, continuing") + os.rmdir(ostree_repo_mount_path) + os.makedirs(ostree_repo_mount_path, mode=0o755) + sh.mount(*mount_args) diff --git a/distributedcloud/dccommon/subcloud_install.py b/distributedcloud/dccommon/subcloud_install.py index 69c6a6ee4..f401ec153 100644 --- a/distributedcloud/dccommon/subcloud_install.py +++ b/distributedcloud/dccommon/subcloud_install.py @@ -32,6 +32,7 @@ from dccommon import consts from dccommon.drivers.openstack.sdk_platform import OpenStackDriver from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon import exceptions +from dccommon import ostree_mount from dccommon import utils as dccommon_utils from dcmanager.common import consts as dcmanager_consts from dcmanager.common import utils @@ -70,7 +71,7 @@ class SubcloudInstall(object): session, endpoint=endpoint) self.name = subcloud_name self.input_iso = None - self.www_root = None + self.www_iso_root = None self.https_enabled = None self.ipmi_logger = None @@ -235,10 +236,10 @@ class SubcloudInstall(object): raise e def update_iso(self, override_path, values): - if not os.path.isdir(self.www_root): - os.mkdir(self.www_root, 0o755) - LOG.debug("update_iso: www_root: %s, values: %s, override_path: %s", - self.www_root, str(values), override_path) + if not os.path.isdir(self.www_iso_root): + os.mkdir(self.www_iso_root, 0o755) + LOG.debug("update_iso: www_iso_root: %s, values: %s, override_path: %s", + self.www_iso_root, str(values), override_path) path = None software_version = str(values['software_version']) try: @@ -283,7 +284,7 @@ class SubcloudInstall(object): update_iso_cmd = [ GEN_ISO_COMMAND, "--input", self.input_iso, - "--www-root", self.www_root, + "--www-root", self.www_iso_root, "--id", self.name, "--boot-hostname", self.name, "--timeout", BOOT_MENU_TIMEOUT, @@ -292,7 +293,7 @@ class SubcloudInstall(object): update_iso_cmd = [ GEN_ISO_COMMAND_CENTOS, "--input", self.input_iso, - "--www-root", self.www_root, + "--www-root", self.www_iso_root, "--id", self.name, "--boot-hostname", self.name, "--timeout", BOOT_MENU_TIMEOUT, @@ -378,19 +379,19 @@ class SubcloudInstall(object): os.path.exists(self.input_iso)): os.remove(self.input_iso) - if (self.www_root is not None and os.path.isdir(self.www_root)): + if (self.www_iso_root is not None and os.path.isdir(self.www_iso_root)): if dccommon_utils.is_debian(software_version): cleanup_cmd = [ GEN_ISO_COMMAND, "--id", self.name, - "--www-root", self.www_root, + "--www-root", self.www_iso_root, "--delete" ] else: cleanup_cmd = [ GEN_ISO_COMMAND_CENTOS, "--id", self.name, - "--www-root", self.www_root, + "--www-root", self.www_iso_root, "--delete" ] LOG.info("Running install cleanup: %s", self.name) @@ -474,33 +475,6 @@ class SubcloudInstall(object): subprocess.check_call(['umount', '-l', temp_bootimage_mnt_dir]) os.rmdir(temp_bootimage_mnt_dir) - def check_ostree_mount(self, source_path): - """Mount the ostree_repo at ostree_repo_mount_path if necessary. - - Note that ostree_repo is mounted in a location not specific to a - subcloud. We never unmount this directory once the mount path is - established. - """ - ostree_mount_dir = os.path.join(self.www_root, 'ostree_repo') - LOG.debug("Checking mount: %s", ostree_mount_dir) - check_path = os.path.join(ostree_mount_dir, 'config') - if not os.path.exists(check_path): - self._do_ostree_mount(ostree_mount_dir, check_path, source_path) - - # TODO(kmacleod): utils.synchronized should be moved into dccommon - @utils.synchronized("ostree-mount-subclouds", external=True) - def _do_ostree_mount(self, ostree_repo_mount_path, - check_path, source_path): - # check again while locked: - if not os.path.exists(check_path): - LOG.info("Mounting ostree_repo at %s", ostree_repo_mount_path) - if not os.path.exists(ostree_repo_mount_path): - os.makedirs(ostree_repo_mount_path, mode=0o755) - subprocess.check_call( # pylint: disable=not-callable - ["mount", "--bind", - "%s/ostree_repo" % source_path, - ostree_repo_mount_path]) - @staticmethod def is_serial_console(install_type): return (install_type is not None @@ -534,19 +508,21 @@ class SubcloudInstall(object): if not os.path.isdir(override_path): os.mkdir(override_path, 0o755) - self.www_root = os.path.join(SUBCLOUD_ISO_PATH, software_version) + self.www_iso_root = os.path.join(SUBCLOUD_ISO_PATH, software_version) feed_path_rel_version = os.path.join(SUBCLOUD_FEED_PATH, "rel-{version}".format( version=software_version)) if dccommon_utils.is_debian(software_version): - self.check_ostree_mount(feed_path_rel_version) + ostree_mount.validate_ostree_iso_mount( + self.www_iso_root, feed_path_rel_version + ) # Clean up iso directory if it already exists # This may happen if a previous installation attempt was abruptly # terminated - iso_dir_path = os.path.join(self.www_root, 'nodes', self.name) + iso_dir_path = os.path.join(self.www_iso_root, 'nodes', self.name) if os.path.isdir(iso_dir_path): LOG.info("Found preexisting iso dir for subcloud %s, cleaning up", self.name) diff --git a/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py b/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py index 676e7e9e6..a743946de 100644 --- a/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py +++ b/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py @@ -40,6 +40,7 @@ from dccommon import consts as dccommon_consts from dccommon.drivers.openstack import dcmanager_v1 from dccommon.exceptions import PlaybookExecutionFailed from dccommon import kubeoperator +from dccommon import ostree_mount from dccommon import subcloud_enrollment from dccommon import subcloud_install from dccommon.utils import AnsiblePlaybook @@ -370,6 +371,7 @@ class BaseTestSubcloudManager(base.DCManagerTestCase): self._mock_os_path_isdir() self._mock_os_path_exists() self._mock_os_remove() + self._mock_ostree_mount_validate_ostree_iso_mount() self._mock_get_local_system() self.sm = subcloud_manager.SubcloudManager() @@ -473,6 +475,14 @@ class BaseTestSubcloudManager(base.DCManagerTestCase): self.mock_run_subcloud_install = mock_patch.start() self.addCleanup(mock_patch.stop) + def _mock_ostree_mount_validate_ostree_iso_mount(self): + """Mock ostree_mount validate_ostree_iso_mount""" + + mock_patch = mock.patch.object(ostree_mount, + 'validate_ostree_iso_mount') + self.mock_validate_ostree_iso_mount = mock_patch.start() + self.addCleanup(mock_patch.stop) + def _mock_subcloud_manager_create_intermediate_ca_cert(self): """Mock subcloud manager's _create_intermediate_ca_cert""" diff --git a/distributedcloud/requirements.txt b/distributedcloud/requirements.txt index 8bc5cd645..b1ca37ab4 100644 --- a/distributedcloud/requirements.txt +++ b/distributedcloud/requirements.txt @@ -38,6 +38,7 @@ requests!=2.12.2,!=2.13.0,>=2.10.0 # Apache-2.0 requests_toolbelt # Apache-2.0 retrying!=1.3.0,>=1.2.3 # Apache-2.0 routes>=2.3.1 # MIT +sh # MIT six>=1.9.0 # MIT sqlalchemy!=1.1.5,!=1.1.6,!=1.1.7,!=1.1.8,>=1.0.10 # MIT sqlalchemy-migrate>=0.11.0 # Apache-2.0