Merge "Improve test_reinstall_standby_host from weekly regression"

This commit is contained in:
Zuul
2025-12-04 20:18:21 +00:00
committed by Gerrit Code Review
6 changed files with 133 additions and 61 deletions

View File

@@ -2,6 +2,7 @@ import re
import time
from framework.logging.automation_logger import get_logger
from framework.ssh.ssh_connection import SSHConnection
from keywords.base_keyword import BaseKeyword
from keywords.cloud_platform.command_wrappers import source_openrc
from keywords.cloud_platform.fault_management.alarms.objects.alarm_list_object import AlarmListObject
@@ -13,23 +14,36 @@ class AlarmListKeywords(BaseKeyword):
Class for alarm list keywords
"""
def __init__(self, ssh_connection):
def __init__(self, ssh_connection: SSHConnection) -> None:
"""
Constructor
Constructor.
Args:
ssh_connection:
ssh_connection (SSHConnection): Active SSH connection used for remote operations.
"""
self._ssh_connection = ssh_connection
self._check_interval_in_seconds = 3
self._timeout_in_seconds = 600
def alarm_list(self) -> [AlarmListObject]:
def get_alarm_list(self) -> AlarmListOutput:
"""
Keyword to get all alarms
Args:
Keyword to get all alarms.
Returns: the list of alarms
Returns:
AlarmListOutput: List of alarm objects retrieved from the system.
"""
output = self._ssh_connection.send(source_openrc("fm alarm-list --nowrap"))
self.validate_success_return_code(self._ssh_connection)
alarms_output = AlarmListOutput(output)
return alarms_output
def alarm_list(self) -> AlarmListObject:
"""
Keyword to get all alarms.
Returns:
AlarmListObject: List of alarm objects retrieved from the system.
"""
output = self._ssh_connection.send(source_openrc("fm alarm-list --nowrap"))
self.validate_success_return_code(self._ssh_connection)
@@ -37,20 +51,21 @@ class AlarmListKeywords(BaseKeyword):
return alarms.get_alarms()
def wait_for_all_alarms_cleared(self):
def wait_for_all_alarms_cleared(self) -> None:
"""
Wait for all alarms to be cleared.
This method waits for all alarms to be cleared in this SSH connection within the period defined by
'get_timeout_in_seconds()'. Otherwise, this method raises TimeoutError exception.
get_timeout_in_seconds(). Otherwise, this method raises TimeoutError exception.
Notes:
The alarms in this SSH connection are checked every 'get_check_interval_in_seconds()' seconds.
The alarms in this SSH connection are checked every get_check_interval_in_seconds() seconds.
Returns:
None
Returns: None
Raises:
TimeoutError: if some alarm can not be cleared within a period defined by
'get_timeout_in_seconds()' seconds; False otherwise.
TimeoutError: if some alarm can not be cleared within a period defined by
the `get_timeout_in_seconds()` seconds; False otherwise.
"""
# Retrieves the current alarms on this SSH connection
@@ -71,8 +86,10 @@ class AlarmListKeywords(BaseKeyword):
alarm_ids = ", ".join([alarm.get_alarm_id() for alarm in alarms])
raise TimeoutError(f"The alarms with the following IDs: {alarm_ids} could not be cleared within {self.get_timeout_in_seconds()} seconds.")
def wait_for_alarms_cleared(self, alarms: list[AlarmListObject]):
def wait_for_alarms_cleared(self, alarms: list[AlarmListObject]) -> None:
"""
Wait for alarms be cleared
This method waits for the alarms defined in 'alarms' to be cleared in this SSH connection within the period
defined by 'get_timeout_in_seconds()'. Otherwise, a TimeoutError exception is raised.
@@ -80,16 +97,14 @@ class AlarmListKeywords(BaseKeyword):
The alarms in this SSH connection are checked every 'get_check_interval_in_seconds()' seconds.
Args:
alarms (list[AlarmListObject]): The list of alarms to be checked to see if they have been cleared in this
SSH connection.
alarms (list[AlarmListObject]): The list of alarms to be checked to see if they have been cleared
in this SSH connection.
Returns:
None
Returns: None
Raises:
TimeoutError: if some alarm can not be cleared within a period defined by
'get_timeout_in_seconds()' seconds; False otherwise.
the `get_timeout_in_seconds()` seconds; False otherwise.
"""
current_alarms = self.alarm_list()
alarm_ids = ", ".join([alarm.get_alarm_id() for alarm in alarms])
@@ -118,6 +133,8 @@ class AlarmListKeywords(BaseKeyword):
def wait_for_alarms_to_appear(self, alarms: list[AlarmListObject]) -> None:
"""
Wait for an alarm to appear
Waits for the specified alarms to appear on the SSH connection within the timeout
period defined by 'get_timeout_in_seconds()'. Validates Alarm ID, Reason Text, and Entity ID.
@@ -157,8 +174,7 @@ class AlarmListKeywords(BaseKeyword):
def alarms_match(self, observed_alarm_object: AlarmListObject, expected_alarm_object: AlarmListObject) -> bool:
"""
Compares two AlarmListObject instances for equality based on
alarm ID, reason text, and entity ID.
Compares two AlarmListObject instances for equality based on alarm ID, reason text, and entity ID.
Args:
observed_alarm_object (AlarmListObject): The current alarm object to compare against.
@@ -187,17 +203,17 @@ class AlarmListKeywords(BaseKeyword):
def get_timeout_in_seconds(self) -> int:
"""
Gets an integer representing the maximum time in seconds to wait for the alarms to be cleared.
Default value: 600.
Gets an integer representing the maximum time in seconds to wait for the alarms to be cleared, default value: 600.
Returns:
(int): An integer representing the maximum time in seconds to wait for the alarms to be cleared.
int: An integer representing the maximum time in seconds to wait for the alarms to be cleared.
"""
return self._timeout_in_seconds
def set_timeout_in_seconds(self, timeout_in_seconds: int):
"""
Sets the integer representation of the maximum time in seconds to wait for the alarms to be cleared.
Args:
timeout_in_seconds (int): An integer representing the maximum time to wait for the alarms to be cleared.
"""
@@ -205,29 +221,30 @@ class AlarmListKeywords(BaseKeyword):
def get_check_interval_in_seconds(self) -> int:
"""
Gets an integer representing the interval in seconds at which this instance will check the alarms again.
Default value: 3.
Gets an integer representing the interval in seconds at which this instance will check the alarms again, default value: 3.
Returns:
(int): An integer representing the interval in seconds at which this instance will check the alarms again.
int: An integer representing the interval in seconds at which this instance will check the alarms again.
"""
return self._check_interval_in_seconds
def set_check_interval_in_seconds(self, check_interval_in_seconds: int) -> int:
"""
Sets the integer representation of the interval in seconds at which this instance will check the alarms again.
Default value: 3.
Sets the integer representation of the interval in seconds at which this instance will check the alarms again, default value: 3.
Args:
check_interval_in_seconds (int): An integer representing the interval in seconds to check the alarms again.
Returns:
(int): An integer representing the interval in seconds at which this instance will check the alarms again.
int: An integer representing the interval in seconds at which this instance will check the alarms again.
"""
return self._check_interval_in_seconds
def get_ssh_connection(self):
def get_ssh_connection(self) -> SSHConnection:
"""
Gets the SSH connection of this AlarmListKeywords instance.
Returns:
SSHConnection: the SSH connection of this AlarmListKeywords instance.

View File

@@ -14,22 +14,51 @@ class AlarmListOutput:
for value in output_values:
alarm_list_object = AlarmListObject()
if 'Alarm ID' in value:
alarm_list_object.set_alarm_id(value['Alarm ID'])
if 'Reason Text' in value:
alarm_list_object.set_reason_text(value['Reason Text'])
if 'Entity ID' in value:
alarm_list_object.set_entity_id(value['Entity ID'])
if 'Severity' in value:
alarm_list_object.set_severity(value['Severity'])
if 'Time Stamp' in value:
alarm_list_object.set_time_stamp(value['Time Stamp'])
if "Alarm ID" in value:
alarm_list_object.set_alarm_id(value["Alarm ID"])
if "Reason Text" in value:
alarm_list_object.set_reason_text(value["Reason Text"])
if "Entity ID" in value:
alarm_list_object.set_entity_id(value["Entity ID"])
if "Severity" in value:
alarm_list_object.set_severity(value["Severity"])
if "Time Stamp" in value:
alarm_list_object.set_time_stamp(value["Time Stamp"])
self.alarms.append(alarm_list_object)
def get_alarms(self) -> [AlarmListObject]:
def get_alarms(self) -> list[AlarmListObject]:
"""
Returns the list of alarms
Returns:
Returns the list of alarms.
Returns:
list[AlarmListObject]: List of alarm objects.
"""
return self.alarms
def alarms_id(self) -> list[str]:
"""
Return a list of alarm IDs from AlarmListObject instances.
Returns:
list[str]: List of alarm IDs.
"""
return [alarm.get_alarm_id() for alarm in self.alarms]
@staticmethod
def is_new_alarm_id_since(alarm_ids_before: list[str], alarm_ids_after: list[str]) -> bool:
"""
Check if there are new alarms compared to a previous state.
Args:
alarm_ids_before (list[str]): Alarm IDs before the test.
alarm_ids_after (list[str]): Alarm IDs after the test.
Returns:
bool: True if new alarms are present, False if no new alarms.
"""
alarm_ids_before_set = set(alarm_ids_before)
alarm_ids_after_set = set(alarm_ids_after)
new_alarms = list(alarm_ids_after_set - alarm_ids_before_set)
return len(new_alarms) != 0

View File

@@ -1,7 +1,9 @@
from framework.ssh.ssh_connection import SSHConnection
from framework.validation.validation import validate_equals
from keywords.base_keyword import BaseKeyword
from keywords.cloud_platform.fault_management.alarms.alarm_list_keywords import AlarmListKeywords
from keywords.cloud_platform.system.application.system_application_list_keywords import SystemApplicationListKeywords
from keywords.cloud_platform.system.host.system_host_list_keywords import SystemHostListKeywords
from keywords.k8s.pods.kubectl_get_pods_keywords import KubectlGetPodsKeywords
@@ -45,3 +47,12 @@ class HealthKeywords(BaseKeyword):
"""Function to validate all apps are healthy and applied"""
healthy_status = ["applied", "uploaded"]
SystemApplicationListKeywords(self.ssh_connection).validate_all_apps_status(healthy_status)
def validate_hosts_health(self):
"""Function to validate all hosts are healthy"""
host_values = SystemHostListKeywords(self.ssh_connection).get_system_host_list().get_hosts()
for host_value in host_values:
validate_equals(host_value.get_availability(), "available", f"The host {host_value.get_host_name()} availability is {host_value.get_availability()}")
validate_equals(host_value.get_administrative(), "unlocked", f"The host {host_value.get_host_name()} administrative is {host_value.get_administrative()}")
validate_equals(host_value.get_operational(), "enabled", f"The host {host_value.get_host_name()} operational is {host_value.get_operational()}")

View File

@@ -156,7 +156,7 @@ class SystemHostLockKeywords(BaseKeyword):
except TimeoutError: # Alarm still exists, we can't unlock
raise KeywordException("Failed unlock pre-check. Application apply was in progress")
def wait_for_host_unlocked(self, host_name: str, unlock_wait_timeout: int = 1800) -> bool:
def wait_for_host_unlocked(self, host_name: str, unlock_wait_timeout: int = 2800) -> bool:
"""
Wait for the host to be unlocked

View File

@@ -91,7 +91,7 @@ class SystemHostReinstallKeywords(BaseKeyword):
"""
self.ssh_connection.send(source_openrc(f"system host-reinstall {host_name}"))
self.validate_success_return_code(self.ssh_connection)
validate_equals_with_retry(lambda: SystemHostListKeywords(self.ssh_connection).get_system_host_list().get_host(host_name).get_availability(), expected_value="offline", validation_description="Waiting for host to go offline")
validate_equals_with_retry(lambda: SystemHostListKeywords(self.ssh_connection).get_system_host_list().get_host(host_name).get_availability(), expected_value="offline", validation_description="Waiting for host to go offline", timeout=500)
is_host_reinstalled = self.wait_for_host_reinstall(host_name)
if not is_host_reinstalled:
host_value = SystemHostListKeywords(self.ssh_connection).get_system_host_list().get_host(host_name)

View File

@@ -4,11 +4,14 @@ from framework.logging.automation_logger import get_logger
from framework.validation.validation import validate_equals
from keywords.ceph.ceph_status_keywords import CephStatusKeywords
from keywords.cloud_platform.fault_management.alarms.alarm_list_keywords import AlarmListKeywords
from keywords.cloud_platform.fault_management.alarms.objects.alarm_list_output import AlarmListOutput
from keywords.cloud_platform.health.health_keywords import HealthKeywords
from keywords.cloud_platform.ssh.lab_connection_keywords import LabConnectionKeywords
from keywords.cloud_platform.system.host.system_host_list_keywords import SystemHostListKeywords
from keywords.cloud_platform.system.host.system_host_lock_keywords import SystemHostLockKeywords
from keywords.cloud_platform.system.host.system_host_reboot_keywords import SystemHostRebootKeywords
from keywords.cloud_platform.system.host.system_host_reinstall_keywords import SystemHostReinstallKeywords
from keywords.cloud_platform.system.host.system_host_swact_keywords import SystemHostSwactKeywords
from keywords.server.power_keywords import PowerKeywords
@@ -104,10 +107,13 @@ def test_reinstall_standby_host():
Test to validate standby controller reinstallation and ceph health.
Test Steps:
- Check the hosts healthy
- Check if controller-0 is the active controller
- Get the active alarms
- Lock standby controller
- Reinstall standby controller
- Unlock standby controller
- Checking if there are any active alarms
- Checking if there are any new active alarms
- Checking storage backend health after reinstall.
Args: None
@@ -115,28 +121,37 @@ def test_reinstall_standby_host():
ssh_connection = LabConnectionKeywords().get_active_controller_ssh()
system_host_list_keywords = SystemHostListKeywords(ssh_connection)
standby_controller = system_host_list_keywords.get_standby_controller().get_host_name()
system_host_lock_keywords = SystemHostLockKeywords(ssh_connection)
standby_controller = system_host_list_keywords.get_standby_controller().get_host_name()
system_host_reinstall_keywords = SystemHostReinstallKeywords(ssh_connection)
ceph_status_keywords = CephStatusKeywords(ssh_connection)
alarm_list_keyword = AlarmListKeywords(ssh_connection)
alarm_list_keywords = AlarmListKeywords(ssh_connection)
health_keywords = HealthKeywords(ssh_connection)
system_host_swact_keywords = SystemHostSwactKeywords(ssh_connection)
get_logger().log_test_case_step("Checking if there are any active alarms")
alarms = alarm_list_keyword.alarm_list()
validate_equals(alarms, [], "No active alarms")
get_logger().log_test_case_step("Check the hosts healthy")
health_keywords.validate_hosts_health()
get_logger().log_test_case_step("Lock standby controller")
get_logger().log_test_case_step("Check if controller-0 is the active controller")
if standby_controller == "controller-0":
system_host_swact_keywords.host_swact()
standby_controller = system_host_list_keywords.get_standby_controller().get_host_name()
get_logger().log_test_case_step("Get the active alarms")
initial_alarm_list_ids = alarm_list_keywords.get_alarm_list().alarms_id()
get_logger().log_test_case_step(f"Lock {standby_controller}")
system_host_lock_keywords.lock_host(standby_controller)
get_logger().log_test_case_step("Reinstall standby controller")
get_logger().log_test_case_step(f"Reinstall {standby_controller}")
system_host_reinstall_keywords.reinstall_host(standby_controller)
get_logger().log_test_case_step("Unlock standby controller")
get_logger().log_test_case_step(f"Unlock {standby_controller}")
system_host_lock_keywords.unlock_host(standby_controller)
get_logger().log_test_case_step("Checking if there are any active alarms")
alarms = alarm_list_keyword.alarm_list()
validate_equals(alarms, [], "No active alarms")
get_logger().log_test_case_step("Checking if there are any new active alarms")
final_alarm_list_ids = alarm_list_keywords.get_alarm_list().alarms_id()
validate_equals(AlarmListOutput.is_new_alarm_id_since(initial_alarm_list_ids, final_alarm_list_ids), False, "No new alarms should be present")
get_logger().log_test_case_step("Checking storage backend health after reinstall.")
ceph_status_keywords.wait_for_ceph_health_status(expect_health_status=True)