Fix race condition issue of copying default-registry-key
There's race condition issue in "Copy default-registry-key to "rvmc" namespace" task in apply-rvmc-job playbook role. It could block or fail some remote installations when executed in a large batch. The solution is to move the related tasks from the install playbook to subcloud_install.py and ensure the execution of these tasks is done in a thread-safe manner. Test Plan: PASS: Delete the "rvmc" namespace and successfully run subcloud parallel installation without this race condition issue. PASS: Ensure the "rvmc" namespace existed but no default-registry-key secret in this namespace and successfully run subcloud parallel installation without this race condition issue. PASS: Ensure default-registry-key secret existed in the "rvmc" namespace and successfully run subcloud parallel installation without this race condition issue. Closes-Bug: 2031846 Change-Id: I7dd1bf34e26bf5d56b3d412c1587337122a3821d Signed-off-by: lzhu1 <li.zhu@windriver.com>
This commit is contained in:
parent
ccf55d7263
commit
c0248e0bdb
@ -204,3 +204,49 @@ class KubeOperator(object):
|
||||
except Exception as e:
|
||||
LOG.error("Kubernetes exception in kube_delete_pod: %s" % e)
|
||||
raise
|
||||
|
||||
def kube_get_namespace(self, namespace):
|
||||
c = self._get_kubernetesclient_core()
|
||||
try:
|
||||
c.read_namespace(namespace)
|
||||
return True
|
||||
except ApiException as e:
|
||||
if e.status == httplib.NOT_FOUND:
|
||||
return False
|
||||
else:
|
||||
LOG.error("Failed to get Namespace %s: %s" % (namespace, e.body))
|
||||
raise
|
||||
except Exception as e:
|
||||
LOG.error("Kubernetes exception in "
|
||||
"kube_get_namespace %s: %s" % (namespace, e))
|
||||
raise
|
||||
|
||||
def kube_create_namespace(self, namespace):
|
||||
body = {'metadata': {'name': namespace}}
|
||||
|
||||
c = self._get_kubernetesclient_core()
|
||||
try:
|
||||
c.create_namespace(body)
|
||||
except ApiException as e:
|
||||
if e.status == httplib.CONFLICT:
|
||||
# Already exist
|
||||
LOG.warn("Namespace %s already exist." % namespace)
|
||||
else:
|
||||
LOG.error("Failed to create Namespace %s: %s" % (namespace, e.body))
|
||||
raise
|
||||
except Exception as e:
|
||||
LOG.error("Kubernetes exception in "
|
||||
"_kube_create_namespace %s: %s" % (namespace, e))
|
||||
raise
|
||||
|
||||
def kube_copy_secret(self, name, src_namespace, dst_namespace):
|
||||
c = self._get_kubernetesclient_core()
|
||||
try:
|
||||
body = c.read_namespaced_secret(name, src_namespace)
|
||||
body.metadata.resource_version = None
|
||||
body.metadata.namespace = dst_namespace
|
||||
c.create_namespaced_secret(dst_namespace, body)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to copy Secret %s from Namespace %s to Namespace "
|
||||
"%s: %s" % (name, src_namespace, dst_namespace, e))
|
||||
raise
|
||||
|
@ -23,6 +23,7 @@ import time
|
||||
|
||||
from eventlet.green import subprocess
|
||||
import netaddr
|
||||
from oslo_concurrency import lockutils
|
||||
from oslo_log import log as logging
|
||||
from six.moves.urllib import error as urllib_error
|
||||
from six.moves.urllib import parse
|
||||
@ -33,6 +34,7 @@ from dccommon import consts
|
||||
from dccommon.drivers.openstack.keystone_v3 import KeystoneClient
|
||||
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
||||
from dccommon import exceptions
|
||||
from dccommon import kubeoperator
|
||||
from dccommon import utils as dccommon_utils
|
||||
from dcmanager.common import consts as dcmanager_consts
|
||||
from dcmanager.common import utils
|
||||
@ -66,6 +68,11 @@ REDFISH_HEADER = {'Content-Type': 'application/json',
|
||||
REDFISH_SYSTEMS_URL = '/redfish/v1/Systems'
|
||||
SUCCESSFUL_STATUS_CODES = [200, 202, 204]
|
||||
|
||||
RVMC_LOCK_NAME = 'dc-rvmc-install'
|
||||
RVMC_NAMESPACE = 'rvmc'
|
||||
KUBE_SYSTEM_NAMESPACE = 'kube-system'
|
||||
DEFAULT_REGISTRY_KEY = 'default-registry-key'
|
||||
|
||||
|
||||
class SubcloudShutdown(object):
|
||||
"""Sends a shutdown signal to a Redfish controlled subcloud
|
||||
@ -272,6 +279,22 @@ class SubcloudInstall(object):
|
||||
if k in consts.BMC_INSTALL_VALUES or k == 'image':
|
||||
f_out_rvmc_config_file.write(k + ': ' + v + '\n')
|
||||
|
||||
@lockutils.synchronized(RVMC_LOCK_NAME)
|
||||
def copy_default_registry_key(self):
|
||||
"""Copy default-registry-key secret for pulling rvmc image."""
|
||||
kube = kubeoperator.KubeOperator()
|
||||
try:
|
||||
if kube.kube_get_secret(DEFAULT_REGISTRY_KEY, RVMC_NAMESPACE) is None:
|
||||
if not kube.kube_get_namespace(RVMC_NAMESPACE):
|
||||
LOG.info("Creating rvmc namespace")
|
||||
kube.kube_create_namespace(RVMC_NAMESPACE)
|
||||
LOG.info("Copying default-registry-key secret to rvmc namespace")
|
||||
kube.kube_copy_secret(
|
||||
DEFAULT_REGISTRY_KEY, KUBE_SYSTEM_NAMESPACE, RVMC_NAMESPACE)
|
||||
except Exception as e:
|
||||
LOG.exception("Failed to copy default-registry-key secret")
|
||||
raise e
|
||||
|
||||
def create_install_override_file(self, override_path, payload):
|
||||
|
||||
LOG.debug("create install override file")
|
||||
@ -664,6 +687,9 @@ class SubcloudInstall(object):
|
||||
# create the rvmc config file
|
||||
self.create_rvmc_config_file(override_path, payload)
|
||||
|
||||
# copy the default_registry_key secret to rvmc namespace
|
||||
self.copy_default_registry_key()
|
||||
|
||||
# remove the bmc values from the payload
|
||||
for k in consts.BMC_INSTALL_VALUES:
|
||||
if k in payload:
|
||||
|
Loading…
Reference in New Issue
Block a user