Merge "Fix resize of drbd filesystems"

This commit is contained in:
Zuul 2021-04-29 01:16:04 +00:00 committed by Gerrit Code Review
commit 7202d17175
1 changed files with 151 additions and 64 deletions

View File

@ -8757,17 +8757,25 @@ class ConductorManager(service.PeriodicService):
fs = [] fs = []
for row in output: for row in output:
# Check PausedSyncS as well as drbd sync is changed to serial # Check PausedSyncS as well as drbd sync is changed to serial
if "drbd-pgsql" in row and ("SyncSource" in row or "PausedSyncS" in row): # Check Connected because there are cases when drbd-overview
# showed Connected instead of PausedSyncS and SyncSource states
if "drbd-pgsql" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_PGSQL) fs.append(constants.DRBD_PGSQL)
if "drbd-platform" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-platform" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_PLATFORM) fs.append(constants.DRBD_PLATFORM)
if "drbd-extension" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-extension" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_EXTENSION) fs.append(constants.DRBD_EXTENSION)
if "drbd-dc-vault" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-dc-vault" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_DC_VAULT) fs.append(constants.DRBD_DC_VAULT)
if "drbd-etcd" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-etcd" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_ETCD) fs.append(constants.DRBD_ETCD)
if "drbd-dockerdistribution" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-dockerdistribution" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_DOCKER_DISTRIBUTION) fs.append(constants.DRBD_DOCKER_DISTRIBUTION)
return fs return fs
@ -8785,32 +8793,18 @@ class ConductorManager(service.PeriodicService):
for row in drbd_dict: for row in drbd_dict:
if "sync\'ed" not in row: if "sync\'ed" not in row:
try:
size = ([_f for _f in row.split(' ') if _f])[8]
except IndexError:
LOG.error("Skipping unexpected drbd-overview output: %s" % row)
continue
unit = size[-1]
size = float(size[:-1])
# drbd-overview can display the units in M or G
if unit == 'M':
size = size / 1024
elif unit == 'T':
size = size * 1024
if 'drbd-pgsql' in row: if 'drbd-pgsql' in row:
drbd_pgsql_size = size drbd_pgsql_size = self._get_drbd_fs_size("drbd0")[0]
if 'drbd-platform' in row: elif 'drbd-platform' in row:
drbd_platform_size = size drbd_platform_size = self._get_drbd_fs_size("drbd2")[0]
if 'drbd-extension' in row: elif 'drbd-extension' in row:
drbd_extension_size = size drbd_extension_size = self._get_drbd_fs_size("drbd5")[0]
if 'drbd-dc-vault' in row: elif 'drbd-dc-vault' in row:
drbd_patch_size = size drbd_patch_size = self._get_drbd_fs_size("drbd6")[0]
if 'drbd-etcd' in row: elif 'drbd-etcd' in row:
drbd_etcd_size = size drbd_etcd_size = self._get_drbd_fs_size("drbd7")[0]
if 'drbd-dockerdistribution' in row: elif 'drbd-dockerdistribution' in row:
dockerdistribution_size = size dockerdistribution_size = self._get_drbd_fs_size("drbd8")[0]
lvdisplay_dict = self.get_controllerfs_lv_sizes(context) lvdisplay_dict = self.get_controllerfs_lv_sizes(context)
if lvdisplay_dict.get('pgsql-lv', None): if lvdisplay_dict.get('pgsql-lv', None):
@ -8851,11 +8845,111 @@ class ConductorManager(service.PeriodicService):
return drbd_fs_updated return drbd_fs_updated
def _get_drbd_fs_size(self, drbd_dev):
""" Get drbd filesystem size
:param drbd_dev: drbd device name
:returns: tuple with (drbd_filesystem_size, return_code)
"""
cmd = "dumpe2fs -h /dev/{}".format(drbd_dev)
dumpfs_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
dumpfs_out, dumpfs_err = dumpfs_proc.communicate()
total_size = 0
retcode = dumpfs_proc.returncode
log_msg = "Executed _get_drbd_fs_size: drbd_dev: {} return code: {}"\
.format(drbd_dev, retcode)
if retcode == 0:
dumpfs_dict = [_f for _f in dumpfs_out.split('\n') if _f]
block_size = 0
block_count = 0
try:
for row in dumpfs_dict:
if "Block size" in row:
block_size = int([i for i in row.split() if i][2])
elif "Block count" in row:
block_count = int([i for i in row.split() if i][2])
total_size = cutils.bytes_to_GiB(block_count * block_size)
except IndexError:
retcode = 1
else:
log_msg += "\nstdout={}\nstderr={}".format(dumpfs_out, dumpfs_err)
LOG.info(log_msg)
return total_size, retcode
def _get_drbd_dev_size(self, drbd_dev):
""" Get drbd device size
:param drbd_dev: drbd device name
:returns: tuple with (drbd_device_size, return_code)
"""
cmd = "blockdev --getpbsz /dev/{}".format(drbd_dev)
blockdev_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
blockdev_out, blockdev_err = blockdev_proc.communicate()
total_size = 0
retcode = blockdev_proc.returncode
log_msg = "Executed _get_drbd_dev_size: drbd_dev: {} return code: {}"\
.format(drbd_dev, retcode)
if retcode == 0:
sector_size = 0
drbd_size_in_sectors = 0
try:
sector_size = int(blockdev_out.strip())
drbd_size_in_sectors_file_path = "/sys/block/{}/size".format(drbd_dev)
with open(drbd_size_in_sectors_file_path) as f:
drbd_size_in_sectors = int(f.read().strip())
except ValueError:
retcode = 1
total_size = cutils.bytes_to_GiB(sector_size * drbd_size_in_sectors)
else:
log_msg += "\nstdout={}\nstderr={}".format(blockdev_out, blockdev_err)
LOG.info(log_msg)
return total_size, retcode
def _verify_drbd_dev_resized(self, context, drbd_dev, drbd_lv):
return self._verify_drbd_resized_generic(context, drbd_dev,
drbd_lv, self._get_drbd_dev_size)
def _verify_drbd_fs_resized(self, context, drbd_dev, drbd_lv):
return self._verify_drbd_resized_generic(context, drbd_dev,
drbd_lv, self._get_drbd_fs_size)
def _verify_drbd_resized_generic(self, context, drbd_dev, drbd_lv,
get_actual_size_func, delay=15, max_retries=3):
retries = 0
resized = False
while retries < max_retries:
lvdisplay_dict = self.get_controllerfs_lv_sizes(context)
drbd_actual_size, retcode = get_actual_size_func(drbd_dev)
if retcode == 0 and lvdisplay_dict.get(drbd_lv, None):
drbd_lv_size = float(lvdisplay_dict[drbd_lv])
if math.ceil(drbd_actual_size) >= math.ceil(drbd_lv_size):
resized = True
break
retries += 1
time.sleep(delay)
return resized
def _resize2fs_drbd_dev(self, context, retry_attempts, drbd_dev, drbd_lv):
resized = False
if self._verify_drbd_dev_resized(context, drbd_dev, drbd_lv):
progress = "resize2fs {}".format(drbd_dev)
cmd = ["resize2fs", "/dev/{}".format(drbd_dev)]
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True)
if self._verify_drbd_fs_resized(context, drbd_dev, drbd_lv):
LOG.info("Performed %s" % progress)
resized = True
else:
LOG.warn("{} filesystem not resized yet".format(drbd_dev))
else:
LOG.warn("{} device not resized yet".format(drbd_dev))
return resized
def _config_resize_filesystems(self, context, standby_host): def _config_resize_filesystems(self, context, standby_host):
"""Resize the filesystems upon completion of storage config. """Resize the filesystems upon completion of storage config.
Retry in case of errors or racing issues when resizing fails.""" Retry in case of errors or racing issues when resizing fails."""
progress = ""
retry_attempts = 3 retry_attempts = 3
rc = False rc = False
with open(os.devnull, "w"): with open(os.devnull, "w"):
@ -8891,66 +8985,60 @@ class ConductorManager(service.PeriodicService):
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_PGSQL in self._drbd_fs_sync()))): constants.DRBD_PGSQL in self._drbd_fs_sync()))):
# database_gib /var/lib/postgresql # database_gib /var/lib/postgresql
progress = "resize2fs drbd0" drbd_dev = "drbd0"
cmd = ["resize2fs", "/dev/drbd0"] drbd_lv = "pgsql-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) pgsql_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
pgsql_resized = True
if constants.DRBD_PLATFORM in drbd_fs_updated: if constants.DRBD_PLATFORM in drbd_fs_updated:
if (not platform_resized and if (not platform_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_PLATFORM in self._drbd_fs_sync()))): constants.DRBD_PLATFORM in self._drbd_fs_sync()))):
# platform_gib /opt/platform # platform_gib /opt/platform
progress = "resize2fs drbd2" drbd_dev = "drbd2"
cmd = ["resize2fs", "/dev/drbd2"] drbd_lv = "platform-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) platform_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
platform_resized = True
if constants.DRBD_EXTENSION in drbd_fs_updated: if constants.DRBD_EXTENSION in drbd_fs_updated:
if (not extension_resized and if (not extension_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_EXTENSION in self._drbd_fs_sync()))): constants.DRBD_EXTENSION in self._drbd_fs_sync()))):
# extension_gib /opt/extension # extension_gib /opt/extension
progress = "resize2fs drbd5" drbd_dev = "drbd5"
cmd = ["resize2fs", "/dev/drbd5"] drbd_lv = "extension-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) extension_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
extension_resized = True
if constants.DRBD_DC_VAULT in drbd_fs_updated: if constants.DRBD_DC_VAULT in drbd_fs_updated:
if (not patch_resized and if (not patch_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_DC_VAULT in self._drbd_fs_sync()))): constants.DRBD_DC_VAULT in self._drbd_fs_sync()))):
# patch_gib /opt/dc-vault # patch_gib /opt/dc-vault
progress = "resize2fs drbd6" drbd_dev = "drbd6"
cmd = ["resize2fs", "/dev/drbd6"] drbd_lv = "dc-vault-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) patch_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
patch_resized = True
if constants.DRBD_ETCD in drbd_fs_updated: if constants.DRBD_ETCD in drbd_fs_updated:
if (not etcd_resized and if (not etcd_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_ETCD in self._drbd_fs_sync()))): constants.DRBD_ETCD in self._drbd_fs_sync()))):
# patch_gib /opt/etcd # patch_gib /opt/etcd
progress = "resize2fs drbd7" drbd_dev = "drbd7"
cmd = ["resize2fs", "/dev/drbd7"] drbd_lv = "etcd-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) etcd_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
etcd_resized = True
if constants.DRBD_DOCKER_DISTRIBUTION in drbd_fs_updated: if constants.DRBD_DOCKER_DISTRIBUTION in drbd_fs_updated:
if (not dockerdistribution_resized and if (not dockerdistribution_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_DOCKER_DISTRIBUTION in self._drbd_fs_sync()))): constants.DRBD_DOCKER_DISTRIBUTION in self._drbd_fs_sync()))):
# patch_gib /var/lib/docker-distribution # patch_gib /var/lib/docker-distribution
progress = "resize2fs drbd8" drbd_dev = "drbd8"
cmd = ["resize2fs", "/dev/drbd8"] drbd_lv = "dockerdistribution-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) dockerdistribution_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
dockerdistribution_resized = True
if not standby_host: if not standby_host:
rc = True rc = True
@ -8980,12 +9068,11 @@ class ConductorManager(service.PeriodicService):
time.sleep(1) time.sleep(1)
else: else:
LOG.warn("resizing filesystems not completed") LOG.warn("resizing filesystems not completed")
except exception.ProcessExecutionError as ex: except exception.ProcessExecutionError as ex:
LOG.warn("Failed to perform storage resizing (cmd: '%(cmd)s', " LOG.warn("Failed to perform storage resizing (cmd: '%(cmd)s', "
"return code: %(rc)s, stdout: '%(stdout)s).', " "return code: %(rc)s, stdout: '%(stdout)s).', "
"stderr: '%(stderr)s'" % "stderr: '%(stderr)s'" %
{"cmd": " ".join(cmd), "stdout": ex.stdout, {"cmd": ex.cmd, "stdout": ex.stdout,
"stderr": ex.stderr, "rc": ex.exit_code}) "stderr": ex.stderr, "rc": ex.exit_code})
return rc return rc