Fix resize of drbd filesystems

This commit adds the following modifications:

- The drbd filesystems sizes are now calculated using 'dumpe2fs'
utilitary because it gives better results for larger filesystems.

- Added extra checks before and after executing 'resize2fs'.
Before running 'resize2fs' check if the drbd device is resized using
"/sys/block/{drbd_device}/size" and sector size.
After running 'resize2fs', check if the filesystem is resized
using 'dumpe2fs'.

- The drbd filesystems were resized only if they were
in SyncSource or PausedSyncS states. There are cases
when drbd-overview showed “Connected” instead of this sync states
and the filesystems would never be resized. Now
the drbd filesystems will also be resized when they
are in "Connected" state.

Closes-Bug: 1921896
Change-Id: I548300deb8916ce863bcd4bb70969cb9d51c9c2a
Signed-off-by: Mihnea Saracin <Mihnea.Saracin@windriver.com>
This commit is contained in:
Mihnea Saracin 2021-03-30 14:12:23 +03:00
parent c11892e146
commit b1c8d95f2c
1 changed files with 151 additions and 64 deletions

View File

@ -8687,17 +8687,25 @@ class ConductorManager(service.PeriodicService):
fs = [] fs = []
for row in output: for row in output:
# Check PausedSyncS as well as drbd sync is changed to serial # Check PausedSyncS as well as drbd sync is changed to serial
if "drbd-pgsql" in row and ("SyncSource" in row or "PausedSyncS" in row): # Check Connected because there are cases when drbd-overview
# showed Connected instead of PausedSyncS and SyncSource states
if "drbd-pgsql" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_PGSQL) fs.append(constants.DRBD_PGSQL)
if "drbd-platform" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-platform" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_PLATFORM) fs.append(constants.DRBD_PLATFORM)
if "drbd-extension" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-extension" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_EXTENSION) fs.append(constants.DRBD_EXTENSION)
if "drbd-dc-vault" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-dc-vault" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_DC_VAULT) fs.append(constants.DRBD_DC_VAULT)
if "drbd-etcd" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-etcd" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_ETCD) fs.append(constants.DRBD_ETCD)
if "drbd-dockerdistribution" in row and ("SyncSource" in row or "PausedSyncS" in row): if "drbd-dockerdistribution" in row and ("SyncSource" in row or "PausedSyncS" in row
or "Connected" in row):
fs.append(constants.DRBD_DOCKER_DISTRIBUTION) fs.append(constants.DRBD_DOCKER_DISTRIBUTION)
return fs return fs
@ -8715,32 +8723,18 @@ class ConductorManager(service.PeriodicService):
for row in drbd_dict: for row in drbd_dict:
if "sync\'ed" not in row: if "sync\'ed" not in row:
try:
size = ([_f for _f in row.split(' ') if _f])[8]
except IndexError:
LOG.error("Skipping unexpected drbd-overview output: %s" % row)
continue
unit = size[-1]
size = float(size[:-1])
# drbd-overview can display the units in M or G
if unit == 'M':
size = size / 1024
elif unit == 'T':
size = size * 1024
if 'drbd-pgsql' in row: if 'drbd-pgsql' in row:
drbd_pgsql_size = size drbd_pgsql_size = self._get_drbd_fs_size("drbd0")[0]
if 'drbd-platform' in row: elif 'drbd-platform' in row:
drbd_platform_size = size drbd_platform_size = self._get_drbd_fs_size("drbd2")[0]
if 'drbd-extension' in row: elif 'drbd-extension' in row:
drbd_extension_size = size drbd_extension_size = self._get_drbd_fs_size("drbd5")[0]
if 'drbd-dc-vault' in row: elif 'drbd-dc-vault' in row:
drbd_patch_size = size drbd_patch_size = self._get_drbd_fs_size("drbd6")[0]
if 'drbd-etcd' in row: elif 'drbd-etcd' in row:
drbd_etcd_size = size drbd_etcd_size = self._get_drbd_fs_size("drbd7")[0]
if 'drbd-dockerdistribution' in row: elif 'drbd-dockerdistribution' in row:
dockerdistribution_size = size dockerdistribution_size = self._get_drbd_fs_size("drbd8")[0]
lvdisplay_dict = self.get_controllerfs_lv_sizes(context) lvdisplay_dict = self.get_controllerfs_lv_sizes(context)
if lvdisplay_dict.get('pgsql-lv', None): if lvdisplay_dict.get('pgsql-lv', None):
@ -8781,11 +8775,111 @@ class ConductorManager(service.PeriodicService):
return drbd_fs_updated return drbd_fs_updated
def _get_drbd_fs_size(self, drbd_dev):
""" Get drbd filesystem size
:param drbd_dev: drbd device name
:returns: tuple with (drbd_filesystem_size, return_code)
"""
cmd = "dumpe2fs -h /dev/{}".format(drbd_dev)
dumpfs_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
dumpfs_out, dumpfs_err = dumpfs_proc.communicate()
total_size = 0
retcode = dumpfs_proc.returncode
log_msg = "Executed _get_drbd_fs_size: drbd_dev: {} return code: {}"\
.format(drbd_dev, retcode)
if retcode == 0:
dumpfs_dict = [_f for _f in dumpfs_out.split('\n') if _f]
block_size = 0
block_count = 0
try:
for row in dumpfs_dict:
if "Block size" in row:
block_size = int([i for i in row.split() if i][2])
elif "Block count" in row:
block_count = int([i for i in row.split() if i][2])
total_size = cutils.bytes_to_GiB(block_count * block_size)
except IndexError:
retcode = 1
else:
log_msg += "\nstdout={}\nstderr={}".format(dumpfs_out, dumpfs_err)
LOG.info(log_msg)
return total_size, retcode
def _get_drbd_dev_size(self, drbd_dev):
""" Get drbd device size
:param drbd_dev: drbd device name
:returns: tuple with (drbd_device_size, return_code)
"""
cmd = "blockdev --getpbsz /dev/{}".format(drbd_dev)
blockdev_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
blockdev_out, blockdev_err = blockdev_proc.communicate()
total_size = 0
retcode = blockdev_proc.returncode
log_msg = "Executed _get_drbd_dev_size: drbd_dev: {} return code: {}"\
.format(drbd_dev, retcode)
if retcode == 0:
sector_size = 0
drbd_size_in_sectors = 0
try:
sector_size = int(blockdev_out.strip())
drbd_size_in_sectors_file_path = "/sys/block/{}/size".format(drbd_dev)
with open(drbd_size_in_sectors_file_path) as f:
drbd_size_in_sectors = int(f.read().strip())
except ValueError:
retcode = 1
total_size = cutils.bytes_to_GiB(sector_size * drbd_size_in_sectors)
else:
log_msg += "\nstdout={}\nstderr={}".format(blockdev_out, blockdev_err)
LOG.info(log_msg)
return total_size, retcode
def _verify_drbd_dev_resized(self, context, drbd_dev, drbd_lv):
return self._verify_drbd_resized_generic(context, drbd_dev,
drbd_lv, self._get_drbd_dev_size)
def _verify_drbd_fs_resized(self, context, drbd_dev, drbd_lv):
return self._verify_drbd_resized_generic(context, drbd_dev,
drbd_lv, self._get_drbd_fs_size)
def _verify_drbd_resized_generic(self, context, drbd_dev, drbd_lv,
get_actual_size_func, delay=15, max_retries=3):
retries = 0
resized = False
while retries < max_retries:
lvdisplay_dict = self.get_controllerfs_lv_sizes(context)
drbd_actual_size, retcode = get_actual_size_func(drbd_dev)
if retcode == 0 and lvdisplay_dict.get(drbd_lv, None):
drbd_lv_size = float(lvdisplay_dict[drbd_lv])
if math.ceil(drbd_actual_size) >= math.ceil(drbd_lv_size):
resized = True
break
retries += 1
time.sleep(delay)
return resized
def _resize2fs_drbd_dev(self, context, retry_attempts, drbd_dev, drbd_lv):
resized = False
if self._verify_drbd_dev_resized(context, drbd_dev, drbd_lv):
progress = "resize2fs {}".format(drbd_dev)
cmd = ["resize2fs", "/dev/{}".format(drbd_dev)]
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True)
if self._verify_drbd_fs_resized(context, drbd_dev, drbd_lv):
LOG.info("Performed %s" % progress)
resized = True
else:
LOG.warn("{} filesystem not resized yet".format(drbd_dev))
else:
LOG.warn("{} device not resized yet".format(drbd_dev))
return resized
def _config_resize_filesystems(self, context, standby_host): def _config_resize_filesystems(self, context, standby_host):
"""Resize the filesystems upon completion of storage config. """Resize the filesystems upon completion of storage config.
Retry in case of errors or racing issues when resizing fails.""" Retry in case of errors or racing issues when resizing fails."""
progress = ""
retry_attempts = 3 retry_attempts = 3
rc = False rc = False
with open(os.devnull, "w"): with open(os.devnull, "w"):
@ -8821,66 +8915,60 @@ class ConductorManager(service.PeriodicService):
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_PGSQL in self._drbd_fs_sync()))): constants.DRBD_PGSQL in self._drbd_fs_sync()))):
# database_gib /var/lib/postgresql # database_gib /var/lib/postgresql
progress = "resize2fs drbd0" drbd_dev = "drbd0"
cmd = ["resize2fs", "/dev/drbd0"] drbd_lv = "pgsql-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) pgsql_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
pgsql_resized = True
if constants.DRBD_PLATFORM in drbd_fs_updated: if constants.DRBD_PLATFORM in drbd_fs_updated:
if (not platform_resized and if (not platform_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_PLATFORM in self._drbd_fs_sync()))): constants.DRBD_PLATFORM in self._drbd_fs_sync()))):
# platform_gib /opt/platform # platform_gib /opt/platform
progress = "resize2fs drbd2" drbd_dev = "drbd2"
cmd = ["resize2fs", "/dev/drbd2"] drbd_lv = "platform-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) platform_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
platform_resized = True
if constants.DRBD_EXTENSION in drbd_fs_updated: if constants.DRBD_EXTENSION in drbd_fs_updated:
if (not extension_resized and if (not extension_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_EXTENSION in self._drbd_fs_sync()))): constants.DRBD_EXTENSION in self._drbd_fs_sync()))):
# extension_gib /opt/extension # extension_gib /opt/extension
progress = "resize2fs drbd5" drbd_dev = "drbd5"
cmd = ["resize2fs", "/dev/drbd5"] drbd_lv = "extension-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) extension_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
extension_resized = True
if constants.DRBD_DC_VAULT in drbd_fs_updated: if constants.DRBD_DC_VAULT in drbd_fs_updated:
if (not patch_resized and if (not patch_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_DC_VAULT in self._drbd_fs_sync()))): constants.DRBD_DC_VAULT in self._drbd_fs_sync()))):
# patch_gib /opt/dc-vault # patch_gib /opt/dc-vault
progress = "resize2fs drbd6" drbd_dev = "drbd6"
cmd = ["resize2fs", "/dev/drbd6"] drbd_lv = "dc-vault-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) patch_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
patch_resized = True
if constants.DRBD_ETCD in drbd_fs_updated: if constants.DRBD_ETCD in drbd_fs_updated:
if (not etcd_resized and if (not etcd_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_ETCD in self._drbd_fs_sync()))): constants.DRBD_ETCD in self._drbd_fs_sync()))):
# patch_gib /opt/etcd # patch_gib /opt/etcd
progress = "resize2fs drbd7" drbd_dev = "drbd7"
cmd = ["resize2fs", "/dev/drbd7"] drbd_lv = "etcd-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) etcd_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
etcd_resized = True
if constants.DRBD_DOCKER_DISTRIBUTION in drbd_fs_updated: if constants.DRBD_DOCKER_DISTRIBUTION in drbd_fs_updated:
if (not dockerdistribution_resized and if (not dockerdistribution_resized and
(not standby_host or (standby_host and (not standby_host or (standby_host and
constants.DRBD_DOCKER_DISTRIBUTION in self._drbd_fs_sync()))): constants.DRBD_DOCKER_DISTRIBUTION in self._drbd_fs_sync()))):
# patch_gib /var/lib/docker-distribution # patch_gib /var/lib/docker-distribution
progress = "resize2fs drbd8" drbd_dev = "drbd8"
cmd = ["resize2fs", "/dev/drbd8"] drbd_lv = "dockerdistribution-lv"
stdout, __ = cutils.execute(*cmd, attempts=retry_attempts, run_as_root=True) dockerdistribution_resized = self._resize2fs_drbd_dev(context, retry_attempts,
LOG.info("Performed %s" % progress) drbd_dev, drbd_lv)
dockerdistribution_resized = True
if not standby_host: if not standby_host:
rc = True rc = True
@ -8910,12 +8998,11 @@ class ConductorManager(service.PeriodicService):
time.sleep(1) time.sleep(1)
else: else:
LOG.warn("resizing filesystems not completed") LOG.warn("resizing filesystems not completed")
except exception.ProcessExecutionError as ex: except exception.ProcessExecutionError as ex:
LOG.warn("Failed to perform storage resizing (cmd: '%(cmd)s', " LOG.warn("Failed to perform storage resizing (cmd: '%(cmd)s', "
"return code: %(rc)s, stdout: '%(stdout)s).', " "return code: %(rc)s, stdout: '%(stdout)s).', "
"stderr: '%(stderr)s'" % "stderr: '%(stderr)s'" %
{"cmd": " ".join(cmd), "stdout": ex.stdout, {"cmd": ex.cmd, "stdout": ex.stdout,
"stderr": ex.stderr, "rc": ex.exit_code}) "stderr": ex.stderr, "rc": ex.exit_code})
return rc return rc