Implement power off and on EDPM nodes in the podified topology

It can be done using OCP APIs if nodes are provisioned using BareMetal
operator. Otherwise (e.g. in the default deployment done with
install_yamls) powering on/off nodes is not possible through Tobiko.

Change-Id: I18d3f35bbd03b7346b31302fd715b9bb0e9ac531
This commit is contained in:
Slawek Kaplonski
2024-02-08 14:06:02 +01:00
parent 5460bfd6b2
commit 76c5cbb78a
2 changed files with 99 additions and 6 deletions
+90 -6
View File
@@ -17,6 +17,7 @@ import netaddr
import openshift_client as oc
from oslo_log import log
import tobiko
from tobiko.shell import sh
LOG = log.getLogger(__name__)
@@ -24,6 +25,8 @@ LOG = log.getLogger(__name__)
OSP_CONTROLPLANE = 'openstackcontrolplane'
OSP_DP_NODESET = 'openstackdataplanenodeset'
DP_SSH_SECRET_NAME = 'secret/dataplane-ansible-ssh-private-key-secret'
OSP_BM_HOST = 'baremetalhost.metal3.io'
OSP_BM_CRD = 'baremetalhosts.metal3.io'
OCP_WORKERS = 'nodes'
OVN_DP_SERVICE_NAME = 'ovn'
@@ -34,13 +37,35 @@ EDPM_NETWORKER_GROUP = 'edpm-networker'
EDPM_OTHER_GROUP = 'edpm-other'
_IS_OC_CLIENT_AVAILABLE = None
_IS_BM_CRD_AVAILABLE = None
def _is_oc_client_available() -> bool:
try:
if sh.execute('which oc').exit_status == 0:
return True
except sh.ShellCommandFailed:
pass
return False
# pylint: disable=global-statement
global _IS_OC_CLIENT_AVAILABLE
if _IS_OC_CLIENT_AVAILABLE is None:
_IS_OC_CLIENT_AVAILABLE = False
try:
if sh.execute('which oc').exit_status == 0:
_IS_OC_CLIENT_AVAILABLE = True
except sh.ShellCommandFailed:
pass
return _IS_OC_CLIENT_AVAILABLE
def _is_baremetal_crd_available() -> bool:
# pylint: disable=global-statement
global _IS_BM_CRD_AVAILABLE
if not _is_oc_client_available():
return False
if _IS_BM_CRD_AVAILABLE is None:
try:
_IS_BM_CRD_AVAILABLE = any(
[OSP_BM_CRD in n for n in oc.selector("crd").qnames()])
except oc.OpenShiftPythonException:
_IS_BM_CRD_AVAILABLE = False
return _IS_BM_CRD_AVAILABLE
def _get_group(services):
@@ -140,3 +165,62 @@ def list_ocp_workers():
'addresses': _get_ocp_worker_addresses(node_dict)
})
return ocp_workers
def power_on_edpm_node(nodename):
_set_edpm_node_online_status(nodename, online=True)
def power_off_edpm_node(nodename):
_set_edpm_node_online_status(nodename, online=False)
def _set_edpm_node_online_status(nodename, online):
if _is_baremetal_crd_available() is False:
LOG.info("BareMetal operator is not available in the deployment. "
"Starting and stopping EDPM nodes is not supported.")
return
try:
bm_node = oc.selector(f"{OSP_BM_HOST}/{nodename}").objects()[0]
except oc.OpenShiftPythonException as err:
LOG.info(f"Error while trying to get BareMetal Node '{nodename}' "
f"from Openshift. Error: {err}")
return
except IndexError:
LOG.error(f"Node {nodename} not found in the {OSP_BM_HOST} CRs.")
return
bm_node.model.spec['online'] = online
try:
# NOTE(slaweq): returned status is 0 when all operations where
# finished successfully. Otherwise status will be different than
# 0, like in the shell scripts
if not bool(bm_node.apply().status()):
_wait_for_poweredOn_status(nodename, online)
except oc.OpenShiftPythonException as err:
LOG.error(f"Error while applying new online state: {online} for "
f"the node: {nodename}. Error: {err}")
def _wait_for_poweredOn_status(nodename, expected_status,
timeout: tobiko.Seconds = None):
for attempt in tobiko.retry(
timeout=timeout,
count=10,
interval=5.,
default_timeout=30):
LOG.debug(f"Checking power status of the '{nodename}'.")
try:
poweredOn = oc.selector(
f"{OSP_BM_HOST}/{nodename}"
).objects()[0].model.status['poweredOn']
except oc.OpenShiftPythonException as err:
LOG.error("Error while trying to get 'poweredOn' state of "
f"the node {nodename}. Error: {err}")
else:
if poweredOn == expected_status:
LOG.debug(f"Actual poweredOn state of the node {nodename} "
f"is: '{poweredOn}' which is as expected.")
return True
LOG.debug(f"Actual poweredOn state is: '{poweredOn}' != "
f" '{expected_status}'")
attempt.check_limits()
+9
View File
@@ -24,6 +24,7 @@ from tobiko.podified import _edpm
from tobiko.podified import _openshift
from tobiko.podified import containers
from tobiko import rhosp
from tobiko.shell import sh
from tobiko.shell import ssh
LOG = log.getLogger(__name__)
@@ -169,9 +170,17 @@ class EdpmNode(rhosp.RhospNode):
def power_on_node(self):
LOG.debug(f"Ensuring EDPM node {self.name} power is on...")
self.ssh_client.close()
_openshift.power_on_edpm_node(self.name)
hostname = sh.get_hostname(ssh_client=self.ssh_client)
LOG.debug(f"Overcloud node {self.name} power is on ("
f"hostname={hostname})")
def power_off_node(self):
LOG.debug(f"Ensuring EDPM node {self.name} power is off...")
self.ssh_client.close()
_openshift.power_off_edpm_node(self.name)
LOG.debug(f"EDPM node {self.name} power is off.")
class OcpWorkerNode(rhosp.RhospNode):