Migrate etcd after both controllers are upgraded
The etcd database can go out of sync if we swact back and forth between controllers running different versions. This can happen if we need to abort the upgrade after swacting to controller-1. Since we are not upversioning etcd in this release we will address this by waiting to migrate the etcd data when both controllers are running the new release. The migration will now take place during the swact to controller-0 before upgrade-activate. This solution will present some problems when we do upgrade etcd, so further development will be required at that time. Story: 2008055 Task: 41630 Signed-off-by: David Sullivan <david.sullivan@windriver.com> Change-Id: I02b82bfe1a4b4b69aaa85d5f0d20246b9cda5629
This commit is contained in:
@@ -980,18 +980,6 @@ def upgrade_controller(from_release, to_release):
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.exception("Failed to remove file %s" % admin_conf)
|
||||
|
||||
# Prepare for swact
|
||||
LOG.info("Prepare for swact to controller-1")
|
||||
try:
|
||||
subprocess.check_call(['/usr/bin/upgrade_swact_migration.py',
|
||||
'prepare_swact',
|
||||
from_release,
|
||||
to_release],
|
||||
stdout=devnull)
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.exception("Failed upgrade_swact_migration prepare_swact")
|
||||
raise
|
||||
|
||||
print("Shutting down upgrade processes...")
|
||||
|
||||
# Stop postgres service
|
||||
|
||||
@@ -149,6 +149,14 @@ def prepare_upgrade(from_load, to_load, i_system, mgmt_address):
|
||||
"config"))
|
||||
raise
|
||||
|
||||
# Point N+1 etcd to N for now. We will migrate when both controllers are
|
||||
# running N+1, during the swact back to controller-0. This solution will
|
||||
# present some problems when we do upgrade etcd, so further development
|
||||
# will be required at that time.
|
||||
etcd_to_dir = os.path.join(tsc.ETCD_PATH, to_load)
|
||||
etcd_from_dir = os.path.join(tsc.ETCD_PATH, from_load)
|
||||
os.symlink(etcd_from_dir, etcd_to_dir)
|
||||
|
||||
# Copy /etc/kubernetes/admin.conf so controller-1 can access
|
||||
# during its upgrade
|
||||
try:
|
||||
@@ -292,6 +300,13 @@ def abort_upgrade(from_load, to_load, upgrade):
|
||||
except Exception:
|
||||
LOG.exception("Failed to unexport filesystems")
|
||||
|
||||
# Depending on where we are in the upgrade we may need to remove the
|
||||
# symlink to the etcd directory
|
||||
etcd_to_dir = os.path.join(tsc.ETCD_PATH, to_load)
|
||||
if os.path.islink(etcd_to_dir):
|
||||
LOG.info("Unlinking destination etcd directory: %s " % etcd_to_dir)
|
||||
os.unlink(etcd_to_dir)
|
||||
|
||||
# Remove upgrade directories
|
||||
upgrade_dirs = [
|
||||
os.path.join(tsc.PLATFORM_PATH, "config", to_load),
|
||||
@@ -382,6 +397,7 @@ def complete_upgrade(from_load, to_load, upgrade):
|
||||
os.path.join(tsc.PLATFORM_PATH, "sysinv", from_load),
|
||||
os.path.join(tsc.PLATFORM_PATH, "armada", from_load),
|
||||
os.path.join(tsc.PLATFORM_PATH, "helm", from_load),
|
||||
os.path.join(tsc.ETCD_PATH, from_load)
|
||||
]
|
||||
|
||||
for directory in upgrade_dirs:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This script will perform upgrade preparation and migration operations for
|
||||
# host-swact to controller-1.
|
||||
# host-swact to controller-0.
|
||||
#
|
||||
|
||||
import os
|
||||
@@ -14,12 +14,12 @@ import subprocess
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
from oslo_log import log
|
||||
from controllerconfig.common import log
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
LOG = log.get_logger(__name__)
|
||||
|
||||
ETCD_PATH = "/opt/etcd"
|
||||
UPGRADE_CONTROLLER_1_FILE = "/etc/platform/.upgrade_swact_controller_1"
|
||||
UPGRADE_ETCD_FILE = os.path.join(ETCD_PATH, ".upgrade_etcd")
|
||||
|
||||
|
||||
def main():
|
||||
@@ -40,6 +40,11 @@ def main():
|
||||
return 1
|
||||
arg += 1
|
||||
|
||||
log.configure()
|
||||
|
||||
LOG.info("upgrade_swact_migration called with action: %s from_release: %s "
|
||||
"to_release: %s" % (action, from_release, to_release))
|
||||
|
||||
if action == "migrate_etcd":
|
||||
try:
|
||||
migrate_etcd_on_swact()
|
||||
@@ -48,6 +53,8 @@ def main():
|
||||
return 1
|
||||
elif action == "prepare_swact":
|
||||
upgrade_prepare_swact(from_release, to_release)
|
||||
|
||||
LOG.info("upgrade_swact_migration complete")
|
||||
return 0
|
||||
|
||||
|
||||
@@ -56,12 +63,17 @@ def upgrade_prepare_swact(from_release, to_release):
|
||||
'from_release': from_release,
|
||||
'to_release': to_release
|
||||
}
|
||||
with open(UPGRADE_CONTROLLER_1_FILE, 'w') as f:
|
||||
with open(UPGRADE_ETCD_FILE, 'w') as f:
|
||||
yaml.dump(migrate_data, f, default_flow_style=False)
|
||||
|
||||
|
||||
def migrate_etcd_on_swact():
|
||||
with open(UPGRADE_CONTROLLER_1_FILE, 'r') as f:
|
||||
if not os.path.isfile(UPGRADE_ETCD_FILE):
|
||||
LOG.info("Skipping etcd migration, no request %s" %
|
||||
UPGRADE_ETCD_FILE)
|
||||
return
|
||||
|
||||
with open(UPGRADE_ETCD_FILE, 'r') as f:
|
||||
document = yaml.safe_load(f)
|
||||
|
||||
from_release = document.get('from_release')
|
||||
@@ -69,23 +81,22 @@ def migrate_etcd_on_swact():
|
||||
|
||||
dest_etcd = os.path.join(ETCD_PATH, to_release)
|
||||
|
||||
if os.path.exists(dest_etcd):
|
||||
# The dest_etcd must not have already been created,
|
||||
# however this can occur on a forced host-swact
|
||||
LOG.info("skipping etcd migration %s already exists" %
|
||||
dest_etcd)
|
||||
return
|
||||
if os.path.islink(dest_etcd):
|
||||
LOG.info("Unlinking destination etcd directory: %s " % dest_etcd)
|
||||
os.unlink(dest_etcd)
|
||||
|
||||
if not os.path.isfile(UPGRADE_CONTROLLER_1_FILE):
|
||||
LOG.info("skipping etcd migration, no request %s" %
|
||||
UPGRADE_CONTROLLER_1_FILE)
|
||||
if os.path.exists(dest_etcd):
|
||||
# The directory was already copied but somehow the upgrade file exists
|
||||
LOG.info("Skipping etcd migration %s already exists" %
|
||||
dest_etcd)
|
||||
os.remove(UPGRADE_ETCD_FILE)
|
||||
return
|
||||
|
||||
source_etcd = os.path.join(ETCD_PATH, from_release)
|
||||
try:
|
||||
shutil.copytree(os.path.join(source_etcd),
|
||||
os.path.join(dest_etcd))
|
||||
os.remove(UPGRADE_CONTROLLER_1_FILE)
|
||||
os.remove(UPGRADE_ETCD_FILE)
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.exception("Failed to migrate %s" % source_etcd)
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user