Fix stale or missing ostree_repo bind mount in iso dir
This commit addresses the odd cases where the /var/www/pages/iso/<rel>/ostree_repo bind mount becomes missing or stale. We add detection of missing content, and also detect a stale bind mount. A stale bind mount is detected by comparing the inode numbers of the bind-mounted /var/www/pages/iso/<rel>/ostree_repo and original /var/www/pages/feed/rel-<rel>/ostree_repo directory. NOTES: - The self.www_root variable is changed to self.www_iso_root to make it more obvious that this is the /var/www/pages/iso path, not the feed path. - Now using the 'sh' python library for the mount commands, which is much more convenient and straight-forward than the subprocess library Test Plan: PASS: - Unmount (but do not delete) the /var/www/pages/iso/<rel>/ostree_repo directory. When a subcloud add or deploy operation is done, the bind mount is recreated. - Stale mount: # Replace the original sudo cp -a /var/www/pages/feed/rel-24.09/ostree_repo \ /var/www/pages/feed/rel-24.09/ostree_repo.orig sudo rm -rf /var/www/pages/feed/rel-24.09/ostree_repo sudo cp -a /var/www/pages/feed/rel-24.09/ostree_repo.orig \ /var/www/pages/feed/rel-24.09/ostree_repo When a subcloud add or deploy operation is done, the stale bind mount is detected. The /var/www/pages/iso/24.09/ostree_repo is unmounted, and the directory is removed. When a subcloud add or deploy operation is done, the bind mount is recreated. Closes-Bug: 2066411 Change-Id: I25911722b1e333cd352f142664526d7dfa73e9e8 Signed-off-by: Kyle MacLeod <kyle.macleod@windriver.com>
This commit is contained in:
93
distributedcloud/dccommon/ostree_mount.py
Normal file
93
distributedcloud/dccommon/ostree_mount.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from oslo_log import log as logging
|
||||
import sh
|
||||
|
||||
from dcmanager.common import utils
|
||||
|
||||
# The 'sh' library is magical - it looks up CLI functions dynamically.
|
||||
# Disable the pylint warnings here:
|
||||
# pylint: disable=not-callable,no-member
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_stale_bindmount(mount_path, source_path, log_error=True):
|
||||
"""Check if the mount has become stale.
|
||||
|
||||
We do this by comparing the directory inodes. If the bind mount is
|
||||
valid, the two directories should have the same inode number; otherwise
|
||||
the original directory has been replaced and we are no longer tracking
|
||||
the actual location of source_path. In this case we teardown the bind
|
||||
mount.
|
||||
"""
|
||||
mount_path_inode = sh.stat("--format", "%i", mount_path)
|
||||
source_path_inode = sh.stat("--format", "%i", source_path)
|
||||
if mount_path_inode != source_path_inode:
|
||||
logmsg = f"Found stale bind mount: {mount_path}, unmounting"
|
||||
if log_error:
|
||||
LOG.error(logmsg)
|
||||
else:
|
||||
LOG.warn(logmsg)
|
||||
try:
|
||||
sh.umount(mount_path)
|
||||
os.rmdir(mount_path)
|
||||
except Exception:
|
||||
LOG.error(f"Failed to fix bind mount at {mount_path}")
|
||||
raise
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# TODO(kmacleod): utils.synchronized should be moved into dccommon
|
||||
@utils.synchronized("ostree-mount-subclouds", external=True)
|
||||
def validate_ostree_iso_mount(www_iso_root, source_path):
|
||||
"""Ensure the ostree_repo is properly mounted under the iso path.
|
||||
|
||||
Validity check includes if the mount is stale.
|
||||
If stale, the bind mount is recreated.
|
||||
Note that ostree_repo is mounted in a location not specific to a subcloud.
|
||||
"""
|
||||
ostree_repo_mount_path = os.path.join(www_iso_root, "ostree_repo")
|
||||
LOG.debug("Checking ostree_repo mount: %s", ostree_repo_mount_path)
|
||||
if os.path.exists(ostree_repo_mount_path) and check_stale_bindmount(
|
||||
ostree_repo_mount_path, source_path
|
||||
):
|
||||
LOG.warn(f"Found stale bind mount: {ostree_repo_mount_path}, unmounting")
|
||||
try:
|
||||
sh.umount(ostree_repo_mount_path)
|
||||
os.rmdir(ostree_repo_mount_path)
|
||||
except Exception:
|
||||
LOG.error(f"Failed to fix bind mount at {ostree_repo_mount_path}")
|
||||
raise
|
||||
# Check for the config file inside the ostree_repo
|
||||
check_path = os.path.join(ostree_repo_mount_path, "config")
|
||||
if not os.path.exists(check_path):
|
||||
LOG.info("Mounting ostree_repo at %s", ostree_repo_mount_path)
|
||||
if not os.path.exists(ostree_repo_mount_path):
|
||||
os.makedirs(ostree_repo_mount_path, mode=0o755)
|
||||
mount_args = (
|
||||
"--bind",
|
||||
"%s/ostree_repo" % source_path,
|
||||
ostree_repo_mount_path,
|
||||
)
|
||||
try:
|
||||
sh.mount(*mount_args)
|
||||
except Exception as exc:
|
||||
LOG.warn(
|
||||
f"Command 'mount {' '.join(mount_args)}' failed; "
|
||||
f"attempting to rebuild: {str(exc)}"
|
||||
)
|
||||
try:
|
||||
sh.umount(ostree_repo_mount_path)
|
||||
except Exception:
|
||||
LOG.exception("rebuild: umount failed, continuing")
|
||||
os.rmdir(ostree_repo_mount_path)
|
||||
os.makedirs(ostree_repo_mount_path, mode=0o755)
|
||||
sh.mount(*mount_args)
|
@@ -32,6 +32,7 @@ from dccommon import consts
|
||||
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
||||
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
||||
from dccommon import exceptions
|
||||
from dccommon import ostree_mount
|
||||
from dccommon import utils as dccommon_utils
|
||||
from dcmanager.common import consts as dcmanager_consts
|
||||
from dcmanager.common import utils
|
||||
@@ -70,7 +71,7 @@ class SubcloudInstall(object):
|
||||
session, endpoint=endpoint)
|
||||
self.name = subcloud_name
|
||||
self.input_iso = None
|
||||
self.www_root = None
|
||||
self.www_iso_root = None
|
||||
self.https_enabled = None
|
||||
self.ipmi_logger = None
|
||||
|
||||
@@ -235,10 +236,10 @@ class SubcloudInstall(object):
|
||||
raise e
|
||||
|
||||
def update_iso(self, override_path, values):
|
||||
if not os.path.isdir(self.www_root):
|
||||
os.mkdir(self.www_root, 0o755)
|
||||
LOG.debug("update_iso: www_root: %s, values: %s, override_path: %s",
|
||||
self.www_root, str(values), override_path)
|
||||
if not os.path.isdir(self.www_iso_root):
|
||||
os.mkdir(self.www_iso_root, 0o755)
|
||||
LOG.debug("update_iso: www_iso_root: %s, values: %s, override_path: %s",
|
||||
self.www_iso_root, str(values), override_path)
|
||||
path = None
|
||||
software_version = str(values['software_version'])
|
||||
try:
|
||||
@@ -283,7 +284,7 @@ class SubcloudInstall(object):
|
||||
update_iso_cmd = [
|
||||
GEN_ISO_COMMAND,
|
||||
"--input", self.input_iso,
|
||||
"--www-root", self.www_root,
|
||||
"--www-root", self.www_iso_root,
|
||||
"--id", self.name,
|
||||
"--boot-hostname", self.name,
|
||||
"--timeout", BOOT_MENU_TIMEOUT,
|
||||
@@ -292,7 +293,7 @@ class SubcloudInstall(object):
|
||||
update_iso_cmd = [
|
||||
GEN_ISO_COMMAND_CENTOS,
|
||||
"--input", self.input_iso,
|
||||
"--www-root", self.www_root,
|
||||
"--www-root", self.www_iso_root,
|
||||
"--id", self.name,
|
||||
"--boot-hostname", self.name,
|
||||
"--timeout", BOOT_MENU_TIMEOUT,
|
||||
@@ -378,19 +379,19 @@ class SubcloudInstall(object):
|
||||
os.path.exists(self.input_iso)):
|
||||
os.remove(self.input_iso)
|
||||
|
||||
if (self.www_root is not None and os.path.isdir(self.www_root)):
|
||||
if (self.www_iso_root is not None and os.path.isdir(self.www_iso_root)):
|
||||
if dccommon_utils.is_debian(software_version):
|
||||
cleanup_cmd = [
|
||||
GEN_ISO_COMMAND,
|
||||
"--id", self.name,
|
||||
"--www-root", self.www_root,
|
||||
"--www-root", self.www_iso_root,
|
||||
"--delete"
|
||||
]
|
||||
else:
|
||||
cleanup_cmd = [
|
||||
GEN_ISO_COMMAND_CENTOS,
|
||||
"--id", self.name,
|
||||
"--www-root", self.www_root,
|
||||
"--www-root", self.www_iso_root,
|
||||
"--delete"
|
||||
]
|
||||
LOG.info("Running install cleanup: %s", self.name)
|
||||
@@ -474,33 +475,6 @@ class SubcloudInstall(object):
|
||||
subprocess.check_call(['umount', '-l', temp_bootimage_mnt_dir])
|
||||
os.rmdir(temp_bootimage_mnt_dir)
|
||||
|
||||
def check_ostree_mount(self, source_path):
|
||||
"""Mount the ostree_repo at ostree_repo_mount_path if necessary.
|
||||
|
||||
Note that ostree_repo is mounted in a location not specific to a
|
||||
subcloud. We never unmount this directory once the mount path is
|
||||
established.
|
||||
"""
|
||||
ostree_mount_dir = os.path.join(self.www_root, 'ostree_repo')
|
||||
LOG.debug("Checking mount: %s", ostree_mount_dir)
|
||||
check_path = os.path.join(ostree_mount_dir, 'config')
|
||||
if not os.path.exists(check_path):
|
||||
self._do_ostree_mount(ostree_mount_dir, check_path, source_path)
|
||||
|
||||
# TODO(kmacleod): utils.synchronized should be moved into dccommon
|
||||
@utils.synchronized("ostree-mount-subclouds", external=True)
|
||||
def _do_ostree_mount(self, ostree_repo_mount_path,
|
||||
check_path, source_path):
|
||||
# check again while locked:
|
||||
if not os.path.exists(check_path):
|
||||
LOG.info("Mounting ostree_repo at %s", ostree_repo_mount_path)
|
||||
if not os.path.exists(ostree_repo_mount_path):
|
||||
os.makedirs(ostree_repo_mount_path, mode=0o755)
|
||||
subprocess.check_call( # pylint: disable=not-callable
|
||||
["mount", "--bind",
|
||||
"%s/ostree_repo" % source_path,
|
||||
ostree_repo_mount_path])
|
||||
|
||||
@staticmethod
|
||||
def is_serial_console(install_type):
|
||||
return (install_type is not None
|
||||
@@ -534,19 +508,21 @@ class SubcloudInstall(object):
|
||||
if not os.path.isdir(override_path):
|
||||
os.mkdir(override_path, 0o755)
|
||||
|
||||
self.www_root = os.path.join(SUBCLOUD_ISO_PATH, software_version)
|
||||
self.www_iso_root = os.path.join(SUBCLOUD_ISO_PATH, software_version)
|
||||
|
||||
feed_path_rel_version = os.path.join(SUBCLOUD_FEED_PATH,
|
||||
"rel-{version}".format(
|
||||
version=software_version))
|
||||
|
||||
if dccommon_utils.is_debian(software_version):
|
||||
self.check_ostree_mount(feed_path_rel_version)
|
||||
ostree_mount.validate_ostree_iso_mount(
|
||||
self.www_iso_root, feed_path_rel_version
|
||||
)
|
||||
|
||||
# Clean up iso directory if it already exists
|
||||
# This may happen if a previous installation attempt was abruptly
|
||||
# terminated
|
||||
iso_dir_path = os.path.join(self.www_root, 'nodes', self.name)
|
||||
iso_dir_path = os.path.join(self.www_iso_root, 'nodes', self.name)
|
||||
if os.path.isdir(iso_dir_path):
|
||||
LOG.info("Found preexisting iso dir for subcloud %s, cleaning up",
|
||||
self.name)
|
||||
|
@@ -40,6 +40,7 @@ from dccommon import consts as dccommon_consts
|
||||
from dccommon.drivers.openstack import dcmanager_v1
|
||||
from dccommon.exceptions import PlaybookExecutionFailed
|
||||
from dccommon import kubeoperator
|
||||
from dccommon import ostree_mount
|
||||
from dccommon import subcloud_enrollment
|
||||
from dccommon import subcloud_install
|
||||
from dccommon.utils import AnsiblePlaybook
|
||||
@@ -370,6 +371,7 @@ class BaseTestSubcloudManager(base.DCManagerTestCase):
|
||||
self._mock_os_path_isdir()
|
||||
self._mock_os_path_exists()
|
||||
self._mock_os_remove()
|
||||
self._mock_ostree_mount_validate_ostree_iso_mount()
|
||||
self._mock_get_local_system()
|
||||
self.sm = subcloud_manager.SubcloudManager()
|
||||
|
||||
@@ -473,6 +475,14 @@ class BaseTestSubcloudManager(base.DCManagerTestCase):
|
||||
self.mock_run_subcloud_install = mock_patch.start()
|
||||
self.addCleanup(mock_patch.stop)
|
||||
|
||||
def _mock_ostree_mount_validate_ostree_iso_mount(self):
|
||||
"""Mock ostree_mount validate_ostree_iso_mount"""
|
||||
|
||||
mock_patch = mock.patch.object(ostree_mount,
|
||||
'validate_ostree_iso_mount')
|
||||
self.mock_validate_ostree_iso_mount = mock_patch.start()
|
||||
self.addCleanup(mock_patch.stop)
|
||||
|
||||
def _mock_subcloud_manager_create_intermediate_ca_cert(self):
|
||||
"""Mock subcloud manager's _create_intermediate_ca_cert"""
|
||||
|
||||
|
@@ -38,6 +38,7 @@ requests!=2.12.2,!=2.13.0,>=2.10.0 # Apache-2.0
|
||||
requests_toolbelt # Apache-2.0
|
||||
retrying!=1.3.0,>=1.2.3 # Apache-2.0
|
||||
routes>=2.3.1 # MIT
|
||||
sh # MIT
|
||||
six>=1.9.0 # MIT
|
||||
sqlalchemy!=1.1.5,!=1.1.6,!=1.1.7,!=1.1.8,>=1.0.10 # MIT
|
||||
sqlalchemy-migrate>=0.11.0 # Apache-2.0
|
||||
|
Reference in New Issue
Block a user