diff --git a/whitebox_neutron_tempest_plugin/common/constants.py b/whitebox_neutron_tempest_plugin/common/constants.py index 375d27b..0beff5a 100644 --- a/whitebox_neutron_tempest_plugin/common/constants.py +++ b/whitebox_neutron_tempest_plugin/common/constants.py @@ -14,6 +14,7 @@ # under the License. GLOBAL_IP = '1.1.1.1' +METADATA_SERVICE_IP = '169.254.169.254' NCAT_PORT = 65000 NCAT_TIMEOUT = 30 IP_HEADER_LENGTH = 20 diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/base.py b/whitebox_neutron_tempest_plugin/tests/scenario/base.py index 24b6138..433787f 100644 --- a/whitebox_neutron_tempest_plugin/tests/scenario/base.py +++ b/whitebox_neutron_tempest_plugin/tests/scenario/base.py @@ -384,12 +384,34 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase): else: return False + @classmethod + def is_service_on_node(cls, service_name, ssh_client): + """Checks systemctl service existence on node using pattern. + Any non a-z char could be any char. + """ + # NOTE(mblue): regex used to fit podified/devstack/tripleo + # different service names. + service_regex = re.sub(r'[^a-zA-Z]', '.', service_name) + host_ip = ssh_client.host + LOG.debug("Checking for service '%s' (regex) existence on host '%s'.", + service_regex, host_ip) + catch = ssh_client.exec_command( + "systemctl --type=service | grep '{}'".format( + service_regex)).strip + if catch: + LOG.debug("Service found on host '%s':\n%s", + host_ip, catch) + return True + LOG.debug("Service not found on host '%s' using glob '%s'.", + host_ip, service_regex) + return False + @classmethod def reset_node_service(cls, service_name, ssh_client, wait_until_active=True, timeout=30): # NOTE(mblue): Globbing works on podified/devstack/tripleo service_glob = re.sub(r'[^a-zA-Z]', '?', service_name) - host_ip = ssh_client.__dict__['host'] + host_ip = ssh_client.host LOG.debug("Restarting service '%s' on host '%s'.", service_glob, host_ip) ssh_client.exec_command( diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/test_metadata_rate_limiting.py b/whitebox_neutron_tempest_plugin/tests/scenario/test_metadata_rate_limiting.py new file mode 100644 index 0000000..0c0cc36 --- /dev/null +++ b/whitebox_neutron_tempest_plugin/tests/scenario/test_metadata_rate_limiting.py @@ -0,0 +1,316 @@ +# Copyright 2024 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from time import sleep + +from oslo_log import log +from tempest import config +from tempest.lib.common.utils import data_utils +from tempest.lib import decorators +from tempest.lib import exceptions + +from whitebox_neutron_tempest_plugin.common import constants +from whitebox_neutron_tempest_plugin.tests.scenario import base as wb_base + +CONF = config.CONF +WB_CONF = CONF.whitebox_neutron_plugin_options +LOG = log.getLogger(__name__) + + +class TestMetadataRateLimiting(wb_base.BaseTempestWhiteboxTestCase): + """Test class for verifying the rate limiting of metadata service. + This class tests the functionality of the metadata service's + rate limiting feature on both ML2/OVS and ML2/OVN environments. + """ + + @classmethod + def resource_setup(cls): + super(TestMetadataRateLimiting, cls).resource_setup() + cls.discover_nodes() + if cls.has_ovn_support: + cls.service_name = 'ovn metadata agent' + else: + cls.service_name = 'neutron l3 agent' + cls.metadata_nodes = [n for n in cls.nodes if cls.is_service_on_node( + cls.service_name, n['client'])] + if WB_CONF.openstack_type == 'devstack': + cls.metadata_conf_file = ( + '/etc/neutron/neutron_ovn_metadata_agent.ini') + else: + cls.metadata_conf_file = WB_CONF.neutron_config + # OSP resources + cls.rand_name = data_utils.rand_name( + cls.__name__.rsplit('.', 1)[-1]) + cls.keypair = cls.create_keypair(name=cls.rand_name) + cls.secgroup = cls.os_primary.network_client.create_security_group( + name=data_utils.rand_name('secgroup')) + cls.create_loginable_secgroup_rule( + cls.secgroup['security_group']['id']) + # enable metadata rate limiting feature + cls.cmd_base = 'sudo crudini --set ' + cls.metadata_conf_file\ + + ' metadata_rate_limiting' + LOG.debug("Enable the metadata rate limiting using configuration.") + cls._set_rate_limiting_config(rate_limit_enabled='true') + + @classmethod + def run_cmd_on_nodes(cls, cmd): + """Run a command on the given nodes. + Args: + - cmd (str): The command to run. + """ + for node in cls.metadata_nodes: + node['client'].exec_command(cmd) + + @classmethod + def _restart_metadata_agent(cls): + """Restart the metadata agent.""" + # NOTE(mblue): podman inspect health/healthcheck output inconsistent + # in podified/tripleo envs, systemctl is-active used instead. + LOG.debug("Restarting the metadata agents") + for node in cls.metadata_nodes: + cls.reset_node_service(cls.service_name, node['client']) + + @classmethod + def _set_rate_limiting_config(cls, **kwargs): + """Set the metadata rate limiting configuration + with optional arguments. + + Args: + - **kwargs: Configuration parameters as key-value pairs. + """ + # TODO(mblue): when conf change supported on computes in podified env, + # verify test fully instead of skipping. + for key, value in kwargs.items(): + if value is not None: + LOG.debug( + 'Setting metadata rate limiting configuration:\n' + f'File - {cls.metadata_conf_file}\n' + 'Section - metadata_rate_limiting\n' + f'Parameter - {key}\n' + f'Value - {value}\n') + if WB_CONF.openstack_type == 'podified': + cls.set_service_setting( + 'compute', + cls.metadata_conf_file, + 'metadata_rate_limiting', + key, value) + else: + cls.run_cmd_on_nodes(f"{cls.cmd_base} {key} {value}") + cls._restart_metadata_agent() + + @classmethod + def resource_cleanup(cls): + super(TestMetadataRateLimiting, cls).resource_cleanup() + # disable the rate limiting configuration + cls._disable_metadata_rate_limiting_config() + + @classmethod + def _reset_config(cls): + """Reset the metadata rate limiting configuration.""" + # NOTE(mblue): 'oc patch' can't remove values, max count over min time + min_duration = 1 + max_rate = 2 ** 16 - 2 + cls._set_rate_limiting_config( + base_query_rate_limit=max_rate, + burst_window_duration=min_duration, + burst_query_rate_limit=max_rate, + base_window_duration=min_duration + ) + LOG.debug( + "Set metadata rate limiting configuration permissive values.") + + @classmethod + def _disable_metadata_rate_limiting_config(cls): + """Disable the metadata rate limiting using configuration.""" + LOG.debug("Disable the metadata rate limiting using configuration.") + cls._set_rate_limiting_config(rate_limit_enabled='false') + + def _test_limiting(self, limit, machine, exceed=True): + """send requests to the metadata service and check the responses. + The requests are sent from the given VM with retries mechanism. + Args: + - limit (int): The number of requests to send. + - machine (dict): The VM to send requests from. + - exceed (bool): Whether the requests should exceed the limit. + """ + if exceed: + requests = self._send_requests(machine, limit + 1) + for response in requests[:-1]: + self.assertIn("latest", response) + self.assertIn("Too Many Requests", requests[-1]) + else: + requests = self._send_requests(machine, limit) + for response in requests: + self.assertIn("latest", response) + + @staticmethod + def _send_requests(vm, limit): + """Send requests to the metadata service. + The requests are sent from the given VM with retries mechanism. + + Args: + - vm (dict): The VM to send requests from. + - limit (int): The number of requests to send. + """ + + responses = [] + num_of_tries = 10 + cmd = f"curl http://{constants.METADATA_SERVICE_IP}" + + for i in range(num_of_tries): + try: + for _ in range(limit): + LOG.debug("Sending request to metadata service") + response = vm['ssh_client'].exec_command(cmd) + responses.append(response) + + # All requests in this batch were successful. + return responses + + except exceptions.SSHExecCommandFailed: + responses = [] + if i < num_of_tries - 1: + sleep(3) + else: + raise exceptions.SSHExecCommandFailed( + "Failed to connect after multiple retries") + + @decorators.idempotent_id('806fd684-af37-48a1-aa6f-a98524724142') + def test_metadata_rate_limiting(self): + """Test the metadata service's rate limiting functionality. + ensure that the metadata service correctly applies rate limits + to incoming requests and responds appropriately when those limits + are exceeded. + Steps: + 1. Create a server with its network resources. + 2. Send a series of requests to the metadata service from the server. + 4. Check the responses to verify the rate limiting behavior. + """ + + LOG.debug("Test the metadata service's rate limiting") + base_query_rate_limit = 10 + base_window_duration = 15 + + # modify the configuration for limiting + self._set_rate_limiting_config( + base_query_rate_limit=base_query_rate_limit, + base_window_duration=base_window_duration + ) + + # create resources + vm = self._create_vms_by_topology(num_vms_created=1) + self.check_connectivity(host=vm['ssh_client'].host, + ssh_client=vm['ssh_client']) + + # wait seconds for the metadata to recover from + # the blocked status after the VM was booted + sleep(base_window_duration) + + self._test_limiting(base_query_rate_limit, vm) + self._reset_config() + + @decorators.idempotent_id('16381121-8a23-41db-8167-390c7ba1fe77') + def test_metadata_burst_rate_limiting(self): + """Test the metadata service's burst rate limiting. + ensure that the metadata service correctly let burst requests + to pass through and responds appropriately when those limits + are exceeded. + + Steps: + 1. Create a server with its network resources. + 2. Send burst requests to the metadata service from the server. + 3. Check the responses to verify the rate limiting behavior. + """ + + burst_query_rate_limit = 7 + base_query_rate_limit = 15 + burst_window_duration = 5 + base_window_duration = 60 + + LOG.debug("Test the metadata service's burst rate limiting") + # modify the configuration for limiting + self._set_rate_limiting_config( + base_query_rate_limit=base_query_rate_limit, + burst_window_duration=burst_window_duration, + burst_query_rate_limit=burst_query_rate_limit, + base_window_duration=base_window_duration + ) + + # create resources + vm = self._create_vms_by_topology(num_vms_created=1) + self.check_connectivity(host=vm['ssh_client'].host, + ssh_client=vm['ssh_client']) + + # wait seconds for the metadata to recover from + # the blocked status after the VM was booted + sleep(base_window_duration) + + # Since the number of metadata requests don't exceed the base or the + # burst query rate limit, all of them should get "OK" response + self._test_limiting(burst_query_rate_limit, vm, exceed=False) + + # Wait for haproxy to reset the burst window and then test it returns + # "Too Many Requests" after exceeding the burst query rate limit + sleep(burst_window_duration * 2) + self._test_limiting(burst_query_rate_limit, vm) + self._reset_config() + + @decorators.idempotent_id('d564beda-5860-4c5f-96ac-13eb0995f7b7') + def test_metadata_base_and_burst_rate_limiting(self): + """Test the metadata service's base and burst rate limiting. + + Steps: + 1. Create a server with its network resources. + 2. Send burst requests to the metadata service from the server. + 3. Wait for the burst window to reset. + 4. Send additional requests to the metadata service from the server + to the base rate limit. + 3. Check the responses to verify the rate limiting behavior. + """ + + # Configuration values + base_query_rate_limit = 10 + base_window_duration = 60 + burst_query_rate_limit = 7 + burst_window_duration = 5 + + LOG.debug("Test the metadata service's base and burst rate limiting") + + # Set rate limiting configuration + self._set_rate_limiting_config( + base_query_rate_limit=base_query_rate_limit, + burst_window_duration=burst_window_duration, + burst_query_rate_limit=burst_query_rate_limit, + base_window_duration=base_window_duration + ) + + # Create resources + vm = self._create_vms_by_topology(num_vms_created=1) + self.check_connectivity(host=vm['ssh_client'].host, + ssh_client=vm['ssh_client']) + + # Wait for base window duration to ensure metadata service is ready + sleep(base_window_duration) + + # Test burst limit + self._test_limiting(burst_query_rate_limit, vm, exceed=False) + + # Wait for burst window to reset, but still be under the base window + sleep(burst_window_duration * 2) + + # Send additional requests to verify they're within the base limit + # but not the burst limit + self._test_limiting(base_query_rate_limit - burst_query_rate_limit, vm) + self._reset_config()