Fix race condition issue of copying default-registry-key

There's race condition issue in "Copy default-registry-key to "rvmc"
namespace" task in apply-rvmc-job playbook role. It could block or fail
some remote installations when executed in a large batch. The solution
is to move the related tasks from the install playbook to
subcloud_install.py and ensure the execution of these tasks is done in
a thread-safe manner.

Test Plan:
PASS: Delete the "rvmc" namespace and successfully run subcloud parallel
      installation without this race condition issue.
PASS: Ensure the "rvmc" namespace existed but no default-registry-key
      secret in this namespace and successfully run subcloud parallel
      installation without this race condition issue.
PASS: Ensure default-registry-key secret existed in the "rvmc" namespace
      and successfully run subcloud parallel installation without this
      race condition issue.

Closes-Bug: 2031846

Change-Id: I7dd1bf34e26bf5d56b3d412c1587337122a3821d
Signed-off-by: lzhu1 <li.zhu@windriver.com>
This commit is contained in:
Li Zhu 2023-08-17 23:37:25 -04:00
parent ccf55d7263
commit c0248e0bdb
2 changed files with 72 additions and 0 deletions

View File

@ -204,3 +204,49 @@ class KubeOperator(object):
except Exception as e:
LOG.error("Kubernetes exception in kube_delete_pod: %s" % e)
raise
def kube_get_namespace(self, namespace):
c = self._get_kubernetesclient_core()
try:
c.read_namespace(namespace)
return True
except ApiException as e:
if e.status == httplib.NOT_FOUND:
return False
else:
LOG.error("Failed to get Namespace %s: %s" % (namespace, e.body))
raise
except Exception as e:
LOG.error("Kubernetes exception in "
"kube_get_namespace %s: %s" % (namespace, e))
raise
def kube_create_namespace(self, namespace):
body = {'metadata': {'name': namespace}}
c = self._get_kubernetesclient_core()
try:
c.create_namespace(body)
except ApiException as e:
if e.status == httplib.CONFLICT:
# Already exist
LOG.warn("Namespace %s already exist." % namespace)
else:
LOG.error("Failed to create Namespace %s: %s" % (namespace, e.body))
raise
except Exception as e:
LOG.error("Kubernetes exception in "
"_kube_create_namespace %s: %s" % (namespace, e))
raise
def kube_copy_secret(self, name, src_namespace, dst_namespace):
c = self._get_kubernetesclient_core()
try:
body = c.read_namespaced_secret(name, src_namespace)
body.metadata.resource_version = None
body.metadata.namespace = dst_namespace
c.create_namespaced_secret(dst_namespace, body)
except Exception as e:
LOG.error("Failed to copy Secret %s from Namespace %s to Namespace "
"%s: %s" % (name, src_namespace, dst_namespace, e))
raise

View File

@ -23,6 +23,7 @@ import time
from eventlet.green import subprocess
import netaddr
from oslo_concurrency import lockutils
from oslo_log import log as logging
from six.moves.urllib import error as urllib_error
from six.moves.urllib import parse
@ -33,6 +34,7 @@ from dccommon import consts
from dccommon.drivers.openstack.keystone_v3 import KeystoneClient
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon import exceptions
from dccommon import kubeoperator
from dccommon import utils as dccommon_utils
from dcmanager.common import consts as dcmanager_consts
from dcmanager.common import utils
@ -66,6 +68,11 @@ REDFISH_HEADER = {'Content-Type': 'application/json',
REDFISH_SYSTEMS_URL = '/redfish/v1/Systems'
SUCCESSFUL_STATUS_CODES = [200, 202, 204]
RVMC_LOCK_NAME = 'dc-rvmc-install'
RVMC_NAMESPACE = 'rvmc'
KUBE_SYSTEM_NAMESPACE = 'kube-system'
DEFAULT_REGISTRY_KEY = 'default-registry-key'
class SubcloudShutdown(object):
"""Sends a shutdown signal to a Redfish controlled subcloud
@ -272,6 +279,22 @@ class SubcloudInstall(object):
if k in consts.BMC_INSTALL_VALUES or k == 'image':
f_out_rvmc_config_file.write(k + ': ' + v + '\n')
@lockutils.synchronized(RVMC_LOCK_NAME)
def copy_default_registry_key(self):
"""Copy default-registry-key secret for pulling rvmc image."""
kube = kubeoperator.KubeOperator()
try:
if kube.kube_get_secret(DEFAULT_REGISTRY_KEY, RVMC_NAMESPACE) is None:
if not kube.kube_get_namespace(RVMC_NAMESPACE):
LOG.info("Creating rvmc namespace")
kube.kube_create_namespace(RVMC_NAMESPACE)
LOG.info("Copying default-registry-key secret to rvmc namespace")
kube.kube_copy_secret(
DEFAULT_REGISTRY_KEY, KUBE_SYSTEM_NAMESPACE, RVMC_NAMESPACE)
except Exception as e:
LOG.exception("Failed to copy default-registry-key secret")
raise e
def create_install_override_file(self, override_path, payload):
LOG.debug("create install override file")
@ -664,6 +687,9 @@ class SubcloudInstall(object):
# create the rvmc config file
self.create_rvmc_config_file(override_path, payload)
# copy the default_registry_key secret to rvmc namespace
self.copy_default_registry_key()
# remove the bmc values from the payload
for k in consts.BMC_INSTALL_VALUES:
if k in payload: