Migrate etcd after both controllers are upgraded

The etcd database can go out of sync if we swact back and forth between
controllers running different versions. This can happen if we need to
abort the upgrade after swacting to controller-1. Since we are not
upversioning etcd in this release we will address this by waiting to
migrate the etcd data when both controllers are running the new release.
The migration will now take place during the swact to controller-0
before upgrade-activate.

This solution will present some problems when we do upgrade etcd, so
further development will be required at that time.

Story: 2008055
Task: 41630
Signed-off-by: David Sullivan <david.sullivan@windriver.com>
Change-Id: I02b82bfe1a4b4b69aaa85d5f0d20246b9cda5629
This commit is contained in:
David Sullivan
2021-01-20 10:25:33 -06:00
parent 30e0b0487f
commit 6d4d5e3847
4 changed files with 62 additions and 28 deletions

View File

@@ -980,18 +980,6 @@ def upgrade_controller(from_release, to_release):
except subprocess.CalledProcessError:
LOG.exception("Failed to remove file %s" % admin_conf)
# Prepare for swact
LOG.info("Prepare for swact to controller-1")
try:
subprocess.check_call(['/usr/bin/upgrade_swact_migration.py',
'prepare_swact',
from_release,
to_release],
stdout=devnull)
except subprocess.CalledProcessError:
LOG.exception("Failed upgrade_swact_migration prepare_swact")
raise
print("Shutting down upgrade processes...")
# Stop postgres service

View File

@@ -149,6 +149,14 @@ def prepare_upgrade(from_load, to_load, i_system, mgmt_address):
"config"))
raise
# Point N+1 etcd to N for now. We will migrate when both controllers are
# running N+1, during the swact back to controller-0. This solution will
# present some problems when we do upgrade etcd, so further development
# will be required at that time.
etcd_to_dir = os.path.join(tsc.ETCD_PATH, to_load)
etcd_from_dir = os.path.join(tsc.ETCD_PATH, from_load)
os.symlink(etcd_from_dir, etcd_to_dir)
# Copy /etc/kubernetes/admin.conf so controller-1 can access
# during its upgrade
try:
@@ -292,6 +300,13 @@ def abort_upgrade(from_load, to_load, upgrade):
except Exception:
LOG.exception("Failed to unexport filesystems")
# Depending on where we are in the upgrade we may need to remove the
# symlink to the etcd directory
etcd_to_dir = os.path.join(tsc.ETCD_PATH, to_load)
if os.path.islink(etcd_to_dir):
LOG.info("Unlinking destination etcd directory: %s " % etcd_to_dir)
os.unlink(etcd_to_dir)
# Remove upgrade directories
upgrade_dirs = [
os.path.join(tsc.PLATFORM_PATH, "config", to_load),
@@ -382,6 +397,7 @@ def complete_upgrade(from_load, to_load, upgrade):
os.path.join(tsc.PLATFORM_PATH, "sysinv", from_load),
os.path.join(tsc.PLATFORM_PATH, "armada", from_load),
os.path.join(tsc.PLATFORM_PATH, "helm", from_load),
os.path.join(tsc.ETCD_PATH, from_load)
]
for directory in upgrade_dirs:

View File

@@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0
#
# This script will perform upgrade preparation and migration operations for
# host-swact to controller-1.
# host-swact to controller-0.
#
import os
@@ -14,12 +14,12 @@ import subprocess
import sys
import yaml
from oslo_log import log
from controllerconfig.common import log
LOG = log.getLogger(__name__)
LOG = log.get_logger(__name__)
ETCD_PATH = "/opt/etcd"
UPGRADE_CONTROLLER_1_FILE = "/etc/platform/.upgrade_swact_controller_1"
UPGRADE_ETCD_FILE = os.path.join(ETCD_PATH, ".upgrade_etcd")
def main():
@@ -40,6 +40,11 @@ def main():
return 1
arg += 1
log.configure()
LOG.info("upgrade_swact_migration called with action: %s from_release: %s "
"to_release: %s" % (action, from_release, to_release))
if action == "migrate_etcd":
try:
migrate_etcd_on_swact()
@@ -48,6 +53,8 @@ def main():
return 1
elif action == "prepare_swact":
upgrade_prepare_swact(from_release, to_release)
LOG.info("upgrade_swact_migration complete")
return 0
@@ -56,12 +63,17 @@ def upgrade_prepare_swact(from_release, to_release):
'from_release': from_release,
'to_release': to_release
}
with open(UPGRADE_CONTROLLER_1_FILE, 'w') as f:
with open(UPGRADE_ETCD_FILE, 'w') as f:
yaml.dump(migrate_data, f, default_flow_style=False)
def migrate_etcd_on_swact():
with open(UPGRADE_CONTROLLER_1_FILE, 'r') as f:
if not os.path.isfile(UPGRADE_ETCD_FILE):
LOG.info("Skipping etcd migration, no request %s" %
UPGRADE_ETCD_FILE)
return
with open(UPGRADE_ETCD_FILE, 'r') as f:
document = yaml.safe_load(f)
from_release = document.get('from_release')
@@ -69,23 +81,22 @@ def migrate_etcd_on_swact():
dest_etcd = os.path.join(ETCD_PATH, to_release)
if os.path.exists(dest_etcd):
# The dest_etcd must not have already been created,
# however this can occur on a forced host-swact
LOG.info("skipping etcd migration %s already exists" %
dest_etcd)
return
if os.path.islink(dest_etcd):
LOG.info("Unlinking destination etcd directory: %s " % dest_etcd)
os.unlink(dest_etcd)
if not os.path.isfile(UPGRADE_CONTROLLER_1_FILE):
LOG.info("skipping etcd migration, no request %s" %
UPGRADE_CONTROLLER_1_FILE)
if os.path.exists(dest_etcd):
# The directory was already copied but somehow the upgrade file exists
LOG.info("Skipping etcd migration %s already exists" %
dest_etcd)
os.remove(UPGRADE_ETCD_FILE)
return
source_etcd = os.path.join(ETCD_PATH, from_release)
try:
shutil.copytree(os.path.join(source_etcd),
os.path.join(dest_etcd))
os.remove(UPGRADE_CONTROLLER_1_FILE)
os.remove(UPGRADE_ETCD_FILE)
except subprocess.CalledProcessError:
LOG.exception("Failed to migrate %s" % source_etcd)
raise