Merge "Migrate metadata rate limiting tests"

2024-04-10 09:32:27 +00:00 · 2024-04-10 09:32:27 +00:00 · 38e2beadbb
commit 38e2beadbb
parent 3736bb1bc2 6832c4dd4b
3 changed files with 340 additions and 1 deletions
--- a/whitebox_neutron_tempest_plugin/common/constants.py
+++ b/whitebox_neutron_tempest_plugin/common/constants.py
@ -14,6 +14,7 @@
 #    under the License.

 GLOBAL_IP = '1.1.1.1'
+METADATA_SERVICE_IP = '169.254.169.254'
 NCAT_PORT = 65000
 NCAT_TIMEOUT = 30
 IP_HEADER_LENGTH = 20
--- a/whitebox_neutron_tempest_plugin/tests/scenario/base.py
+++ b/whitebox_neutron_tempest_plugin/tests/scenario/base.py
@ -384,12 +384,34 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
        else:
            return False

+    @classmethod
+    def is_service_on_node(cls, service_name, ssh_client):
+        """Checks systemctl service existence on node using pattern.
+        Any non a-z char could be any char.
+        """
+        # NOTE(mblue): regex used to fit podified/devstack/tripleo
+        # different service names.
+        service_regex = re.sub(r'[^a-zA-Z]', '.', service_name)
+        host_ip = ssh_client.host
+        LOG.debug("Checking for service '%s' (regex) existence on host '%s'.",
+                  service_regex, host_ip)
+        catch = ssh_client.exec_command(
+            "systemctl --type=service | grep '{}'".format(
+                service_regex)).strip
+        if catch:
+            LOG.debug("Service found on host '%s':\n%s",
+                      host_ip, catch)
+            return True
+        LOG.debug("Service not found on host '%s' using glob '%s'.",
+                  host_ip, service_regex)
+        return False
+
    @classmethod
    def reset_node_service(cls, service_name, ssh_client,
                           wait_until_active=True, timeout=30):
        # NOTE(mblue): Globbing works on podified/devstack/tripleo
        service_glob = re.sub(r'[^a-zA-Z]', '?', service_name)
-        host_ip = ssh_client.__dict__['host']
+        host_ip = ssh_client.host
        LOG.debug("Restarting service '%s' on host '%s'.",
                  service_glob, host_ip)
        ssh_client.exec_command(
--- a/whitebox_neutron_tempest_plugin/tests/scenario/test_metadata_rate_limiting.py
+++ b/whitebox_neutron_tempest_plugin/tests/scenario/test_metadata_rate_limiting.py
@ -0,0 +1,316 @@
+# Copyright 2024 Red Hat, Inc.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from time import sleep
+
+from oslo_log import log
+from tempest import config
+from tempest.lib.common.utils import data_utils
+from tempest.lib import decorators
+from tempest.lib import exceptions
+
+from whitebox_neutron_tempest_plugin.common import constants
+from whitebox_neutron_tempest_plugin.tests.scenario import base as wb_base
+
+CONF = config.CONF
+WB_CONF = CONF.whitebox_neutron_plugin_options
+LOG = log.getLogger(__name__)
+
+
+class TestMetadataRateLimiting(wb_base.BaseTempestWhiteboxTestCase):
+    """Test class for verifying the rate limiting of metadata service.
+    This class tests the functionality of the metadata service's
+    rate limiting feature on both ML2/OVS and ML2/OVN environments.
+    """
+
+    @classmethod
+    def resource_setup(cls):
+        super(TestMetadataRateLimiting, cls).resource_setup()
+        cls.discover_nodes()
+        if cls.has_ovn_support:
+            cls.service_name = 'ovn metadata agent'
+        else:
+            cls.service_name = 'neutron l3 agent'
+        cls.metadata_nodes = [n for n in cls.nodes if cls.is_service_on_node(
+            cls.service_name, n['client'])]
+        if WB_CONF.openstack_type == 'devstack':
+            cls.metadata_conf_file = (
+                '/etc/neutron/neutron_ovn_metadata_agent.ini')
+        else:
+            cls.metadata_conf_file = WB_CONF.neutron_config
+        # OSP resources
+        cls.rand_name = data_utils.rand_name(
+            cls.__name__.rsplit('.', 1)[-1])
+        cls.keypair = cls.create_keypair(name=cls.rand_name)
+        cls.secgroup = cls.os_primary.network_client.create_security_group(
+            name=data_utils.rand_name('secgroup'))
+        cls.create_loginable_secgroup_rule(
+            cls.secgroup['security_group']['id'])
+        # enable metadata rate limiting feature
+        cls.cmd_base = 'sudo crudini --set ' + cls.metadata_conf_file\
+            + ' metadata_rate_limiting'
+        LOG.debug("Enable the metadata rate limiting using configuration.")
+        cls._set_rate_limiting_config(rate_limit_enabled='true')
+
+    @classmethod
+    def run_cmd_on_nodes(cls, cmd):
+        """Run a command on the given nodes.
+        Args:
+        - cmd (str): The command to run.
+        """
+        for node in cls.metadata_nodes:
+            node['client'].exec_command(cmd)
+
+    @classmethod
+    def _restart_metadata_agent(cls):
+        """Restart the metadata agent."""
+        # NOTE(mblue): podman inspect health/healthcheck output inconsistent
+        # in podified/tripleo envs, systemctl is-active used instead.
+        LOG.debug("Restarting the metadata agents")
+        for node in cls.metadata_nodes:
+            cls.reset_node_service(cls.service_name, node['client'])
+
+    @classmethod
+    def _set_rate_limiting_config(cls, **kwargs):
+        """Set the metadata rate limiting configuration
+        with optional arguments.
+
+        Args:
+        - **kwargs: Configuration parameters as key-value pairs.
+        """
+        # TODO(mblue): when conf change supported on computes in podified env,
+        # verify test fully instead of skipping.
+        for key, value in kwargs.items():
+            if value is not None:
+                LOG.debug(
+                    'Setting metadata rate limiting configuration:\n'
+                    f'File - {cls.metadata_conf_file}\n'
+                    'Section - metadata_rate_limiting\n'
+                    f'Parameter - {key}\n'
+                    f'Value - {value}\n')
+                if WB_CONF.openstack_type == 'podified':
+                    cls.set_service_setting(
+                        'compute',
+                        cls.metadata_conf_file,
+                        'metadata_rate_limiting',
+                        key, value)
+                else:
+                    cls.run_cmd_on_nodes(f"{cls.cmd_base} {key} {value}")
+        cls._restart_metadata_agent()
+
+    @classmethod
+    def resource_cleanup(cls):
+        super(TestMetadataRateLimiting, cls).resource_cleanup()
+        # disable the rate limiting configuration
+        cls._disable_metadata_rate_limiting_config()
+
+    @classmethod
+    def _reset_config(cls):
+        """Reset the metadata rate limiting configuration."""
+        # NOTE(mblue): 'oc patch' can't remove values, max count over min time
+        min_duration = 1
+        max_rate = 2 ** 16 - 2
+        cls._set_rate_limiting_config(
+            base_query_rate_limit=max_rate,
+            burst_window_duration=min_duration,
+            burst_query_rate_limit=max_rate,
+            base_window_duration=min_duration
+        )
+        LOG.debug(
+            "Set metadata rate limiting configuration permissive values.")
+
+    @classmethod
+    def _disable_metadata_rate_limiting_config(cls):
+        """Disable the metadata rate limiting using configuration."""
+        LOG.debug("Disable the metadata rate limiting using configuration.")
+        cls._set_rate_limiting_config(rate_limit_enabled='false')
+
+    def _test_limiting(self, limit, machine, exceed=True):
+        """send requests to the metadata service and check the responses.
+        The requests are sent from the given VM with retries mechanism.
+        Args:
+        - limit (int): The number of requests to send.
+        - machine (dict): The VM to send requests from.
+        - exceed (bool): Whether the requests should exceed the limit.
+        """
+        if exceed:
+            requests = self._send_requests(machine, limit + 1)
+            for response in requests[:-1]:
+                self.assertIn("latest", response)
+            self.assertIn("Too Many Requests", requests[-1])
+        else:
+            requests = self._send_requests(machine, limit)
+            for response in requests:
+                self.assertIn("latest", response)
+
+    @staticmethod
+    def _send_requests(vm, limit):
+        """Send requests to the metadata service.
+        The requests are sent from the given VM with retries mechanism.
+
+        Args:
+        - vm (dict): The VM to send requests from.
+        - limit (int): The number of requests to send.
+        """
+
+        responses = []
+        num_of_tries = 10
+        cmd = f"curl http://{constants.METADATA_SERVICE_IP}"
+
+        for i in range(num_of_tries):
+            try:
+                for _ in range(limit):
+                    LOG.debug("Sending request to metadata service")
+                    response = vm['ssh_client'].exec_command(cmd)
+                    responses.append(response)
+
+                # All requests in this batch were successful.
+                return responses
+
+            except exceptions.SSHExecCommandFailed:
+                responses = []
+                if i < num_of_tries - 1:
+                    sleep(3)
+                else:
+                    raise exceptions.SSHExecCommandFailed(
+                        "Failed to connect after multiple retries")
+
+    @decorators.idempotent_id('806fd684-af37-48a1-aa6f-a98524724142')
+    def test_metadata_rate_limiting(self):
+        """Test the metadata service's rate limiting functionality.
+        ensure that the metadata service correctly applies rate limits
+        to incoming requests and responds appropriately when those limits
+        are exceeded.
+        Steps:
+        1. Create a server with its network resources.
+        2. Send a series of requests to the metadata service from the server.
+        4. Check the responses to verify the rate limiting behavior.
+        """
+
+        LOG.debug("Test the metadata service's rate limiting")
+        base_query_rate_limit = 10
+        base_window_duration = 15
+
+        # modify the configuration for limiting
+        self._set_rate_limiting_config(
+            base_query_rate_limit=base_query_rate_limit,
+            base_window_duration=base_window_duration
+        )
+
+        # create resources
+        vm = self._create_vms_by_topology(num_vms_created=1)
+        self.check_connectivity(host=vm['ssh_client'].host,
+                                ssh_client=vm['ssh_client'])
+
+        # wait <base_window_duration> seconds for the metadata to recover from
+        # the  blocked status after the VM was booted
+        sleep(base_window_duration)
+
+        self._test_limiting(base_query_rate_limit, vm)
+        self._reset_config()
+
+    @decorators.idempotent_id('16381121-8a23-41db-8167-390c7ba1fe77')
+    def test_metadata_burst_rate_limiting(self):
+        """Test the metadata service's burst rate limiting.
+        ensure that the metadata service correctly let burst requests
+        to pass through and responds appropriately when those limits
+        are exceeded.
+
+        Steps:
+        1. Create a server with its network resources.
+        2. Send burst requests to the metadata service from the server.
+        3. Check the responses to verify the rate limiting behavior.
+        """
+
+        burst_query_rate_limit = 7
+        base_query_rate_limit = 15
+        burst_window_duration = 5
+        base_window_duration = 60
+
+        LOG.debug("Test the metadata service's burst rate limiting")
+        # modify the configuration for limiting
+        self._set_rate_limiting_config(
+            base_query_rate_limit=base_query_rate_limit,
+            burst_window_duration=burst_window_duration,
+            burst_query_rate_limit=burst_query_rate_limit,
+            base_window_duration=base_window_duration
+        )
+
+        # create resources
+        vm = self._create_vms_by_topology(num_vms_created=1)
+        self.check_connectivity(host=vm['ssh_client'].host,
+                                ssh_client=vm['ssh_client'])
+
+        # wait <base_window_duration> seconds for the metadata to recover from
+        # the  blocked status after the VM was booted
+        sleep(base_window_duration)
+
+        # Since the number of metadata requests don't exceed the base or the
+        # burst query rate limit, all of them should get "OK" response
+        self._test_limiting(burst_query_rate_limit, vm, exceed=False)
+
+        # Wait for haproxy to reset the burst window and then test it returns
+        # "Too Many Requests" after exceeding the burst query rate limit
+        sleep(burst_window_duration * 2)
+        self._test_limiting(burst_query_rate_limit, vm)
+        self._reset_config()
+
+    @decorators.idempotent_id('d564beda-5860-4c5f-96ac-13eb0995f7b7')
+    def test_metadata_base_and_burst_rate_limiting(self):
+        """Test the metadata service's base and burst rate limiting.
+
+        Steps:
+        1. Create a server with its network resources.
+        2. Send burst requests to the metadata service from the server.
+        3. Wait for the burst window to reset.
+        4. Send additional requests to the metadata service from the server
+           to the base rate limit.
+        3. Check the responses to verify the rate limiting behavior.
+        """
+
+        # Configuration values
+        base_query_rate_limit = 10
+        base_window_duration = 60
+        burst_query_rate_limit = 7
+        burst_window_duration = 5
+
+        LOG.debug("Test the metadata service's base and burst rate limiting")
+
+        # Set rate limiting configuration
+        self._set_rate_limiting_config(
+            base_query_rate_limit=base_query_rate_limit,
+            burst_window_duration=burst_window_duration,
+            burst_query_rate_limit=burst_query_rate_limit,
+            base_window_duration=base_window_duration
+        )
+
+        # Create resources
+        vm = self._create_vms_by_topology(num_vms_created=1)
+        self.check_connectivity(host=vm['ssh_client'].host,
+                                ssh_client=vm['ssh_client'])
+
+        # Wait for base window duration to ensure metadata service is ready
+        sleep(base_window_duration)
+
+        # Test burst limit
+        self._test_limiting(burst_query_rate_limit, vm, exceed=False)
+
+        # Wait for burst window to reset, but still be under the base window
+        sleep(burst_window_duration * 2)
+
+        # Send additional requests to verify they're within the base limit
+        # but not the burst limit
+        self._test_limiting(base_query_rate_limit - burst_query_rate_limit, vm)
+        self._reset_config()