From b5611521571db2edde592bf18462d279fb3dee44 Mon Sep 17 00:00:00 2001
From: Chris Buggy <cbuggy@redhat.com>
Date: Fri, 13 Jun 2025 15:40:59 +0100
Subject: [PATCH] Add Health Monitor test for the OVN provider

Adding health monitor test for the OVN provider to validate
status progress when backend members in the loadbalancer pool
are reported as offline.

As this test is considered disruptived, it is added to faults
folder.

Test will validate the following sequence ONLINE, DEGRADED,
ERROR, DEGRADED, ONLINE.

Closes: OSPRH-14729

Assisted-by: cursor - claude-4.5-sonnet
Co-authored-by: Fernando Royo <froyo@redhat.com>
Co-authored-by: Chris Buggy <cbuggy@redhat.com>

Change-Id: Ifa03b8763f79892befe1e9bc79cacab0d4b37d9c
Signed-off-by: Fernando Royo <froyo@redhat.com>
---
 tobiko/openstack/octavia/__init__.py          |   7 +
 tobiko/openstack/octavia/_client.py           |  20 +++
 tobiko/openstack/octavia/_constants.py        |  16 +-
 tobiko/openstack/octavia/_deployers.py        |  39 +++++
 .../faults/octavia/test_health_monitor.py     | 139 ++++++++++++++++++
 5 files changed, 217 insertions(+), 4 deletions(-)
 create mode 100644 tobiko/tests/faults/octavia/test_health_monitor.py

diff --git a/tobiko/openstack/octavia/__init__.py b/tobiko/openstack/octavia/__init__.py
index 380183233..fc4dc6ec4 100644
--- a/tobiko/openstack/octavia/__init__.py
+++ b/tobiko/openstack/octavia/__init__.py
@@ -38,6 +38,7 @@ OctaviaClientType = _client.OctaviaClientType
 list_members = _client.list_members
 list_load_balancers = _client.list_load_balancers
 find_load_balancer = _client.find_load_balancer
+get_load_balancer = _client.get_load_balancer
 create_load_balancer = _client.create_load_balancer
 find_listener = _client.find_listener
 create_listener = _client.create_listener
@@ -45,6 +46,9 @@ find_pool = _client.find_pool
 create_pool = _client.create_pool
 find_member = _client.find_member
 create_member = _client.create_member
+create_health_monitor = _client.create_health_monitor
+find_health_monitor = _client.find_health_monitor
+get_health_monitor = _client.get_health_monitor
 
 # Waiters
 wait_for_status = _waiters.wait_for_status
@@ -68,6 +72,7 @@ ACTIVE = _constants.ACTIVE
 ERROR = _constants.ERROR
 PENDING_UPDATE = _constants.PENDING_UPDATE
 ONLINE = _constants.ONLINE
+DEGRADED = _constants.DEGRADED
 PROTOCOL_HTTP = _constants.PROTOCOL_HTTP
 PROTOCOL_TCP = _constants.PROTOCOL_TCP
 LB_ALGORITHM_ROUND_ROBIN = _constants.LB_ALGORITHM_ROUND_ROBIN
@@ -95,7 +100,9 @@ LB_OVN_NAME = _constants.LB_OVN_NAME
 LISTENER_OVN_NAME = _constants.LISTENER_OVN_NAME
 POOL_OVN_NAME = _constants.POOL_OVN_NAME
 MEMBER_OVN_NAME_PREFIX = _constants.MEMBER_OVN_NAME_PREFIX
+HM_NAME = _constants.HM_NAME
 
 # Deployers
 deploy_ipv4_amphora_lb = _deployers.deploy_ipv4_amphora_lb
 deploy_ipv4_ovn_lb = _deployers.deploy_ipv4_ovn_lb
+deploy_hm = _deployers.deploy_hm
diff --git a/tobiko/openstack/octavia/_client.py b/tobiko/openstack/octavia/_client.py
index 888cbeff2..888d87ab7 100644
--- a/tobiko/openstack/octavia/_client.py
+++ b/tobiko/openstack/octavia/_client.py
@@ -125,3 +125,23 @@ def find_member(member_name: str, pool: str):
 def create_member(member_kwargs):
     os_sdk_client = openstacksdkclient.openstacksdk_client()
     return os_sdk_client.load_balancer.create_member(**member_kwargs)
+
+
+def create_health_monitor(hm_kwargs):
+    os_sdk_client = openstacksdkclient.openstacksdk_client()
+    return os_sdk_client.load_balancer.create_health_monitor(**hm_kwargs)
+
+
+def find_health_monitor(hm_name: str):
+    os_sdk_client = openstacksdkclient.openstacksdk_client()
+    return os_sdk_client.load_balancer.find_health_monitor(hm_name)
+
+
+def get_load_balancer(lb_id: str):
+    os_sdk_client = openstacksdkclient.openstacksdk_client()
+    return os_sdk_client.load_balancer.get_load_balancer(lb_id)
+
+
+def get_health_monitor(hm_id: str):
+    os_sdk_client = openstacksdkclient.openstacksdk_client()
+    return os_sdk_client.load_balancer.get_health_monitor(hm_id)
diff --git a/tobiko/openstack/octavia/_constants.py b/tobiko/openstack/octavia/_constants.py
index 4f28b3adc..7c05adf15 100644
--- a/tobiko/openstack/octavia/_constants.py
+++ b/tobiko/openstack/octavia/_constants.py
@@ -22,6 +22,7 @@ ACTIVE = 'ACTIVE'
 ERROR = 'ERROR'
 PENDING_UPDATE = 'PENDING_UPDATE'
 ONLINE = 'ONLINE'
+DEGRADED = 'DEGRADED'
 
 # Octavia protocols
 PROTOCOL_HTTP = 'HTTP'
@@ -59,22 +60,29 @@ LISTENER_OVN_NAME = 'tobiko_octavia_tcp_listener'
 POOL_OVN_NAME = 'tobiko_octavia_tcp_pool'
 MEMBER_OVN_NAME_PREFIX = 'tobiko_octavia_tcp_member'
 
+# Health Montior resources
+HM_NAME = 'tobiko_octavia_hm'
+
 # Providers/lb-names dictionary
 OCTAVIA_PROVIDERS_NAMES = {
     'lb': {
         AMPHORA_PROVIDER: LB_AMP_NAME,
-        OVN_PROVIDER: LB_OVN_NAME
+        OVN_PROVIDER: LB_OVN_NAME,
     },
     'listener': {
         AMPHORA_PROVIDER: LISTENER_AMP_NAME,
-        OVN_PROVIDER: LISTENER_OVN_NAME
+        OVN_PROVIDER: LISTENER_OVN_NAME,
     },
     'pool': {
         AMPHORA_PROVIDER: POOL_AMP_NAME,
-        OVN_PROVIDER: POOL_OVN_NAME
+        OVN_PROVIDER: POOL_OVN_NAME,
     },
     'member': {
         AMPHORA_PROVIDER: MEMBER_AMP_NAME_PREFIX,
-        OVN_PROVIDER: MEMBER_OVN_NAME_PREFIX
+        OVN_PROVIDER: MEMBER_OVN_NAME_PREFIX,
+    },
+    'healthmonitor': {
+        AMPHORA_PROVIDER: HM_NAME,
+        OVN_PROVIDER: HM_NAME,
     }
 }
diff --git a/tobiko/openstack/octavia/_deployers.py b/tobiko/openstack/octavia/_deployers.py
index be5945e6a..f8707df25 100644
--- a/tobiko/openstack/octavia/_deployers.py
+++ b/tobiko/openstack/octavia/_deployers.py
@@ -147,6 +147,45 @@ def deploy_ipv4_lb(provider: str,
     return lb, listener, pool
 
 
+def deploy_hm(name: str,
+              pool_id: str,
+              delay: int = 3,
+              hm_timeout: int = 3,
+              hm_type: str = _constants.PROTOCOL_TCP,
+              max_retries: int = 2):
+    """Deploy a health monitor and attach it to the pool
+
+    :param name: the health monitor name. For example: ovn health monitor
+    :param pool_id: the id of the pool to attach the hm to.
+    :param delay: time, in seconds, between sending probes to members
+    :param timeout: maximum consecutive health probe tries
+    :param type:type of probe sent to verify the member state.
+    :param max_retries: a list of server stacks (until we remove heat
+    :return: all Octavia resources it has created (LB, listener, and pool)
+    """
+    if pool_id is not None:
+        health_monitor_kwargs = {
+            'name': octavia.HM_NAME,
+            'pool_id': pool_id,
+            'delay': delay,
+            'timeout': hm_timeout,
+            'max_retries': max_retries,
+            'type': hm_type,
+        }
+        hm = octavia.find_health_monitor(name)
+        if hm:
+            if pool_id in [pool['id'] for pool in hm.pools]:
+                LOG.debug(f'health_monitor {hm.name}: {hm.id} exists.')
+                return hm
+            else:
+                err_message = f'healht_mointor {hm.name} used in another pool'
+                LOG.error(err_message)
+                tobiko.fail(err_message)
+        else:
+            hm = octavia.create_health_monitor(health_monitor_kwargs)
+            return hm
+
+
 @tobiko.interworker_synched('deploy_ipv4_amphora_lb')
 def deploy_ipv4_amphora_lb(protocol: str = _constants.PROTOCOL_HTTP,
                            protocol_port: int = 80,
diff --git a/tobiko/tests/faults/octavia/test_health_monitor.py b/tobiko/tests/faults/octavia/test_health_monitor.py
new file mode 100644
index 000000000..807fc6002
--- /dev/null
+++ b/tobiko/tests/faults/octavia/test_health_monitor.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2025 Red Hat
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# mypy: disable-error-code="attr-defined"
+from __future__ import absolute_import
+
+import testtools
+from oslo_log import log
+
+import tobiko
+from tobiko.openstack import keystone
+from tobiko.openstack import neutron
+from tobiko.openstack import octavia
+from tobiko.openstack import nova
+from tobiko.openstack import stacks
+
+
+LOG = log.getLogger(__name__)
+
+
+@neutron.skip_unless_is_ovn()
+@keystone.skip_if_missing_service(name='octavia')
+class OctaviaOVNProviderHealthMonitorTest(testtools.TestCase):
+    # pylint: disable=no-member
+    """Octavia OVN provider health monitor test.
+
+    Create an OVN provider load balancer with 2 members.
+    Create a client that is connected to the load balancer
+    Create a health monitor that is connected to the load balancer,
+    pause server one validate degraded status
+    pause other server validate error status
+    bring both servers back and validate status
+    """
+    lb = None
+    listener = None
+    pool = None
+    health_monitor = None
+    server_stack = tobiko.required_fixture(
+        stacks.OctaviaServerStackFixture)
+    other_server_stack = tobiko.required_fixture(
+        stacks.OctaviaOtherServerStackFixture)
+
+    def setUp(self):
+        # pylint: disable=no-member
+        super(OctaviaOVNProviderHealthMonitorTest, self).setUp()
+
+        self.lb, self.listener, self.pool = octavia.deploy_ipv4_ovn_lb(
+            servers_stacks=[self.server_stack, self.other_server_stack]
+        )
+
+        self.health_monitor = octavia.deploy_hm(octavia.HM_NAME, self.pool.id)
+
+    def test_hm(self) -> None:
+        # Wait for health monitor to be ONLINE
+        octavia.wait_for_status(
+            object_id=self.health_monitor.id,
+            status_key=octavia.OPERATING_STATUS,
+            status=octavia.ONLINE,
+            get_client=octavia.get_health_monitor
+        )
+        LOG.info(f"Health monitor {self.health_monitor.name} is ONLINE")
+
+        # Wait for load balancer to be ONLINE
+        octavia.wait_for_status(
+            object_id=self.lb.id,
+            status_key=octavia.OPERATING_STATUS,
+            status=octavia.ONLINE,
+            get_client=octavia.get_load_balancer
+        )
+        LOG.info(f"Load balancer {self.lb.name} is ONLINE")
+
+        # Stop first server and wait for DEGRADED status
+        server_one = nova.find_server(
+            id=self.server_stack.outputs.server_id)
+        other_server = nova.find_server(
+            id=self.other_server_stack.outputs.server_id)
+
+        server_one.stop()
+        nova.wait_for_server_status(server=server_one.id, timeout=900,
+                                    status='SHUTOFF')
+        octavia.wait_for_status(
+            object_id=self.lb.id,
+            status_key=octavia.OPERATING_STATUS,
+            status=octavia.DEGRADED,
+            get_client=octavia.get_load_balancer
+        )
+        LOG.info(f"Load balancer {self.lb.name} is DEGRADED after pausing "
+                 "first server")
+
+        # Stop second server and wait for ERROR status
+        other_server.stop()
+        nova.wait_for_server_status(server=other_server.id,  timeout=900,
+                                    status='SHUTOFF')
+        octavia.wait_for_status(
+            object_id=self.lb.id,
+            status_key=octavia.OPERATING_STATUS,
+            status=octavia.ERROR,
+            get_client=octavia.get_load_balancer
+        )
+        LOG.info(f"Load balancer {self.lb.name} is ERROR after pausing both "
+                 "servers")
+
+        # Start second server and wait for DEGRADED status
+        other_server.start()
+        nova.wait_for_server_status(server=other_server.id,  timeout=900,
+                                    status='ACTIVE')
+
+        octavia.wait_for_status(
+            object_id=self.lb.id,
+            status_key=octavia.OPERATING_STATUS,
+            status=octavia.DEGRADED,
+            get_client=octavia.get_load_balancer
+        )
+        LOG.info(f"Load balancer {self.lb.name} is DEGRADED after unpausing "
+                 "second server")
+
+        # Start first server and wait for ONLINE status
+        server_one.start()
+        nova.wait_for_server_status(server=server_one.id,  timeout=900,
+                                    status='ACTIVE')
+        octavia.wait_for_status(
+            object_id=self.lb.id,
+            status_key=octavia.OPERATING_STATUS,
+            status=octavia.ONLINE,
+            get_client=octavia.get_load_balancer
+        )
+        LOG.info(f"Load balancer {self.lb.name} is ONLINE after unpausing "
+                 "both servers")