From 5f4a41326d7b1da03e1929b4fd8bcdaf69da19ab Mon Sep 17 00:00:00 2001 From: Guillaume Espanel Date: Thu, 22 Sep 2022 11:27:04 +0200 Subject: [PATCH] Add rate-limiting to metadata agents Requests handled by the metadata-agents can now be rate-limited by source-ip. This is done to protect the OpenStack control plane against VMs querying the metadata endpoint in an overly enthusiastic way. Co-authored-by: Miguel Lavalle Related-Bug: #1989199 Change-Id: I748ccfa8b50496dcbcbe41fd22f84249a4d46b11 --- .../admin/config-metadata-rate-limiting.rst | 66 +++++++ doc/source/admin/config.rst | 1 + neutron/agent/l3_agent.py | 3 + neutron/agent/metadata/driver.py | 21 ++- neutron/agent/metadata_agent.py | 3 + neutron/agent/ovn/metadata/driver.py | 23 ++- neutron/agent/ovn/metadata_agent.py | 2 + neutron/common/metadata.py | 67 +++++++ neutron/conf/agent/dhcp.py | 5 +- neutron/conf/agent/metadata/config.py | 37 +++- neutron/tests/contrib/testing.filters | 2 +- .../agent/l3/test_metadata_proxy.py | 173 +++++++++++++++--- .../tests/unit/agent/metadata/test_driver.py | 52 +++++- .../unit/agent/ovn/metadata/test_driver.py | 44 ++++- ...tadata-rate-limiting-bf0c17a31f86ee16.yaml | 8 + 15 files changed, 459 insertions(+), 48 deletions(-) create mode 100644 doc/source/admin/config-metadata-rate-limiting.rst create mode 100644 releasenotes/notes/add-metadata-rate-limiting-bf0c17a31f86ee16.yaml diff --git a/doc/source/admin/config-metadata-rate-limiting.rst b/doc/source/admin/config-metadata-rate-limiting.rst new file mode 100644 index 00000000000..a5a61ce6dd9 --- /dev/null +++ b/doc/source/admin/config-metadata-rate-limiting.rst @@ -0,0 +1,66 @@ +.. _config-metadata-rate-limiting: + +==================================== +Metadata service query rate limiting +==================================== + +The OpenStack Networking service proxies the requests that VMs send to the +Compute service to obtain their metadata. The Networking service offers cloud +administrators the ability to limit the rate at which VMs query the Compute's +metadata service, in order to protect the OpenStack deployment from DoS or +misbehaved instances. + +Metadata requests rate limiting is configured through the following parameters +in the ``metadata_rate_limiting`` section of +``neutron.conf``: + +* ``rate_limit_enabled``: enables rate limiting of metadata requests. It is + a boolean that is set to ``False`` by default. +* ``ip_versions``: list of comma separated strings that specify the metadata + address versions (4 and/or 6) for which rate limiting must be enabled. The + default is to configure rate limiting only for the IPv4 address. +* ``base_window_duration``: defines in seconds the duration of the base time + sliding window in which query requests will be rate limited. The default + value is 10 seconds. +* ``base_query_rate_limit``: maximum number of requests to be allowed during + the base time window. The default value is 10 requests. +* ``burst_window_duration``: this parameter can be used to define, in seconds, + a shorter sliding window of time during which a requests rate higher than the + base one will be allowed. The default value is 10 seconds. +* ``burst_query_rate_limit``: maximum number of requests to be allowed during + the burst time window. The default value is 10 requests. + +.. note:: + These parameters are used to configure HAProxy servers to perform the rate + limiting. These servers run inside L3 routers and DHCP agents in the OVS + backend and the metadata agent in the OVN backend. + +.. note:: + At the moment, rate limiting can only be configured either for IPv4 or IPv6 + but not both at the same time, due to a limitation in the open source + version of HAProxy. + +.. note:: + From the point of view of the Networking services, the base and burst + windows are just two different sliding periods of time during which to + enforce two different metadata requests rate limits. The Networking service + doesn't enforce that the burst window should be shorter or that the burst + rate should be higher. It is recommended, though, that cloud administrators + use the burst window to allow, for shorter periods of time, a higher + requests rate than the allowed during the base window, if there is a need to + do so. + +In the following ``neutron.conf`` snippet, the Networking service is configured +to allow VMs to query the IPv4 metadata service address 6 times over a 60 +seconds period, while allowing a higher rate of 2 queries during shorter +periods of 10 seconds each: + +.. code-block:: console + + [metadata_rate_limiting] + rate_limit_enabled = True + ip_versions = 4 + base_window_duration = 60 + base_query_rate_limit = 6 + burst_window_duration = 10 + burst_query_rate_limit = 2 diff --git a/doc/source/admin/config.rst b/doc/source/admin/config.rst index ddf57b2e37e..8b162129f81 100644 --- a/doc/source/admin/config.rst +++ b/doc/source/admin/config.rst @@ -24,6 +24,7 @@ Configuration config-ipv6 config-logging config-macvtap + config-metadata-rate-limiting config-mtu config-ndp-proxy config-network-segment-ranges diff --git a/neutron/agent/l3_agent.py b/neutron/agent/l3_agent.py index 0b8b704698e..1da309d1086 100644 --- a/neutron/agent/l3_agent.py +++ b/neutron/agent/l3_agent.py @@ -35,6 +35,9 @@ def register_opts(conf): l3_config.register_l3_agent_config_opts(l3_config.OPTS, conf) ha_conf.register_l3_agent_ha_opts(conf) meta_conf.register_meta_conf_opts(meta_conf.SHARED_OPTS, conf) + meta_conf.register_meta_conf_opts(meta_conf.METADATA_RATE_LIMITING_OPTS, + cfg=conf, + group=meta_conf.RATE_LIMITING_GROUP) config.register_interface_driver_opts_helper(conf) config.register_agent_state_opts_helper(conf) config.register_interface_opts(conf) diff --git a/neutron/agent/metadata/driver.py b/neutron/agent/metadata/driver.py index a4a62444e23..cd1fe0315be 100644 --- a/neutron/agent/metadata/driver.py +++ b/neutron/agent/metadata/driver.py @@ -47,21 +47,24 @@ METADATA_SERVICE_NAME = 'metadata-proxy' HAPROXY_SERVICE = 'haproxy' PROXY_CONFIG_DIR = "ns-metadata-proxy" -_HAPROXY_CONFIG_TEMPLATE = comm_meta.METADATA_HAPROXY_GLOBAL + """ +_HEADER_CONFIG_TEMPLATE = """ + http-request del-header X-Neutron-%(res_type_del)s-ID + http-request set-header X-Neutron-%(res_type)s-ID %(res_id)s +""" + +_UNLIMITED_CONFIG_TEMPLATE = """ listen listener bind %(host)s:%(port)s %(bind_v6_line)s server metadata %(unix_socket_path)s - http-request del-header X-Neutron-%(res_type_del)s-ID - http-request set-header X-Neutron-%(res_type)s-ID %(res_id)s """ class HaproxyConfigurator(object): def __init__(self, network_id, router_id, unix_socket_path, host, port, - user, group, state_path, pid_file, host_v6=None, - bind_interface=None): + user, group, state_path, pid_file, rate_limiting_config, + host_v6=None, bind_interface=None): self.network_id = network_id self.router_id = router_id if network_id is None and router_id is None: @@ -76,6 +79,7 @@ class HaproxyConfigurator(object): self.state_path = state_path self.unix_socket_path = unix_socket_path self.pidfile = pid_file + self.rate_limiting_config = rate_limiting_config self.log_level = ( 'debug' if logging.is_debug_enabled(cfg.CONF) else 'info') # log-tag will cause entries to have the string pre-pended, so use @@ -133,7 +137,11 @@ class HaproxyConfigurator(object): cfg_info['res_id'] = self.router_id cfg_info['res_type_del'] = 'Network' - haproxy_cfg = _HAPROXY_CONFIG_TEMPLATE % cfg_info + haproxy_cfg = comm_meta.get_haproxy_config(cfg_info, + self.rate_limiting_config, + _HEADER_CONFIG_TEMPLATE, + _UNLIMITED_CONFIG_TEMPLATE) + LOG.debug("haproxy_cfg = %s", haproxy_cfg) cfg_dir = self.get_config_path(self.state_path) # uuid has to be included somewhere in the command line so that it can @@ -216,6 +224,7 @@ class MetadataDriver(object): group, conf.state_path, pid_file, + conf.metadata_rate_limiting, bind_address_v6, bind_interface) haproxy.create_config_file() diff --git a/neutron/agent/metadata_agent.py b/neutron/agent/metadata_agent.py index feee9917340..46be2e95983 100644 --- a/neutron/agent/metadata_agent.py +++ b/neutron/agent/metadata_agent.py @@ -33,6 +33,9 @@ def main(): meta.register_meta_conf_opts(meta.SHARED_OPTS) meta.register_meta_conf_opts(meta.UNIX_DOMAIN_METADATA_PROXY_OPTS) meta.register_meta_conf_opts(meta.METADATA_PROXY_HANDLER_OPTS) + meta.register_meta_conf_opts(meta.METADATA_RATE_LIMITING_OPTS, + cfg=cfg.CONF, + group=meta.RATE_LIMITING_GROUP) cache.register_oslo_configs(cfg.CONF) agent_conf.register_agent_state_opts_helper(cfg.CONF) service_conf.register_service_opts(service_conf.RPC_EXTRA_OPTS, cfg.CONF) diff --git a/neutron/agent/ovn/metadata/driver.py b/neutron/agent/ovn/metadata/driver.py index 2542fa60564..ec1abde1081 100644 --- a/neutron/agent/ovn/metadata/driver.py +++ b/neutron/agent/ovn/metadata/driver.py @@ -32,18 +32,22 @@ METADATA_SERVICE_NAME = 'metadata-proxy' HAPROXY_SERVICE = 'haproxy' PROXY_CONFIG_DIR = "ovn-metadata-proxy" -_HAPROXY_CONFIG_TEMPLATE = comm_meta.METADATA_HAPROXY_GLOBAL + """ +_HEADER_CONFIG_TEMPLATE = """ + http-request add-header X-OVN-%(res_type)s-ID %(res_id)s +""" + +_UNLIMITED_CONFIG_TEMPLATE = """ listen listener bind %(host)s:%(port)s server metadata %(unix_socket_path)s - http-request add-header X-OVN-%(res_type)s-ID %(res_id)s """ class HaproxyConfigurator(object): def __init__(self, network_id, router_id, unix_socket_path, host, - port, user, group, state_path, pid_file): + port, user, group, state_path, pid_file, + rate_limiting_config): self.network_id = network_id self.router_id = router_id if network_id is None and router_id is None: @@ -56,6 +60,7 @@ class HaproxyConfigurator(object): self.state_path = state_path self.unix_socket_path = unix_socket_path self.pidfile = pid_file + self.rate_limiting_config = rate_limiting_config self.log_level = ( 'debug' if logging.is_debug_enabled(cfg.CONF) else 'info') # log-tag will cause entries to have the string pre-pended, so use @@ -94,7 +99,8 @@ class HaproxyConfigurator(object): 'group': groupname, 'pidfile': self.pidfile, 'log_level': self.log_level, - 'log_tag': self.log_tag + 'log_tag': self.log_tag, + 'bind_v6_line': '', } if self.network_id: cfg_info['res_type'] = 'Network' @@ -103,7 +109,11 @@ class HaproxyConfigurator(object): cfg_info['res_type'] = 'Router' cfg_info['res_id'] = self.router_id - haproxy_cfg = _HAPROXY_CONFIG_TEMPLATE % cfg_info + haproxy_cfg = comm_meta.get_haproxy_config(cfg_info, + self.rate_limiting_config, + _HEADER_CONFIG_TEMPLATE, + _UNLIMITED_CONFIG_TEMPLATE) + LOG.debug("haproxy_cfg = %s", haproxy_cfg) cfg_dir = self.get_config_path(self.state_path) # uuid has to be included somewhere in the command line so that it can @@ -161,7 +171,8 @@ class MetadataDriver(object): user, group, conf.state_path, - pid_file) + pid_file, + conf.metadata_rate_limiting) haproxy.create_config_file() proxy_cmd = [HAPROXY_SERVICE, '-f', haproxy.cfg_path] diff --git a/neutron/agent/ovn/metadata_agent.py b/neutron/agent/ovn/metadata_agent.py index df188809bd7..d1c5d02fe1f 100644 --- a/neutron/agent/ovn/metadata_agent.py +++ b/neutron/agent/ovn/metadata_agent.py @@ -32,6 +32,8 @@ def main(): ovn_meta.register_meta_conf_opts(meta.SHARED_OPTS) ovn_meta.register_meta_conf_opts(meta.UNIX_DOMAIN_METADATA_PROXY_OPTS) ovn_meta.register_meta_conf_opts(meta.METADATA_PROXY_HANDLER_OPTS) + ovn_meta.register_meta_conf_opts(meta.METADATA_RATE_LIMITING_OPTS, + group=meta.RATE_LIMITING_GROUP) ovn_meta.register_meta_conf_opts(ovn_meta.OVS_OPTS, group='ovs') config.init(sys.argv[1:]) config.setup_logging() diff --git a/neutron/common/metadata.py b/neutron/common/metadata.py index cf57034cb30..72df3ad5da9 100644 --- a/neutron/common/metadata.py +++ b/neutron/common/metadata.py @@ -11,6 +11,11 @@ # License for the specific language governing permissions and limitations # under the License. +from neutron_lib import constants +from oslo_log import log as logging + +LOG = logging.getLogger(__name__) + PROXY_SERVICE_NAME = 'haproxy' PROXY_SERVICE_CMD = 'haproxy' @@ -44,3 +49,65 @@ defaults timeout server 32s timeout http-keep-alive 30s """ + +RATE_LIMITED_CONFIG_TEMPLATE = """ +backend base_rate_limiter + stick-table type %(ip_version)s size 10k expire %(stick_table_expire)ss store http_req_rate(%(base_window_duration)ss) + +backend burst_rate_limiter + stick-table type %(ip_version)s size 10k expire %(stick_table_expire)ss store http_req_rate(%(burst_window_duration)ss) + +listen listener + bind %(host)s:%(port)s + %(bind_v6_line)s + + http-request track-sc0 src table base_rate_limiter + http-request track-sc1 src table burst_rate_limiter + http-request deny deny_status 429 if { src_http_req_rate(base_rate_limiter) gt %(base_query_rate_limit)s } + http-request deny deny_status 429 if { src_http_req_rate(burst_rate_limiter) gt %(burst_query_rate_limit)s } + + server metadata %(unix_socket_path)s +""" # noqa: E501 line-length + + +def parse_ip_versions(ip_versions): + if not set(ip_versions).issubset({str(constants.IP_VERSION_4), + str(constants.IP_VERSION_6)}): + LOG.warning('Invalid metadata address IP versions: %s. Metadata rate ' + 'limiting will not be enabled.', ip_versions) + return + if len(ip_versions) != 1: + LOG.warning('Invalid metadata address IP versions: %s. Metadata rate ' + 'limiting cannot be enabled for IPv4 and IPv6 at the same ' + 'time. Metadata rate limiting will not be enabled.', + ip_versions) + return + return ip_versions[0] + + +def get_haproxy_config(cfg_info, rate_limiting_config, header_config_template, + unlimited_config_template): + ip_version = parse_ip_versions(rate_limiting_config.ip_versions) + if rate_limiting_config.rate_limit_enabled and ip_version: + cfg_info['ip_version'] = ( + 'ipv6' if ip_version == '6' else 'ip') + cfg_info['base_window_duration'] = ( + rate_limiting_config['base_window_duration']) + cfg_info['base_query_rate_limit'] = ( + rate_limiting_config['base_query_rate_limit']) + cfg_info['burst_window_duration'] = ( + rate_limiting_config['burst_window_duration']) + cfg_info['burst_query_rate_limit'] = ( + rate_limiting_config['burst_query_rate_limit']) + cfg_info['stick_table_expire'] = max( + rate_limiting_config['base_window_duration'], + rate_limiting_config['burst_window_duration']) + FINAL_CONFIG_TEMPLATE = (METADATA_HAPROXY_GLOBAL + + RATE_LIMITED_CONFIG_TEMPLATE + + header_config_template) + else: + FINAL_CONFIG_TEMPLATE = (METADATA_HAPROXY_GLOBAL + + unlimited_config_template + + header_config_template) + + return FINAL_CONFIG_TEMPLATE % cfg_info diff --git a/neutron/conf/agent/dhcp.py b/neutron/conf/agent/dhcp.py index cab642f1d90..be472bf235a 100644 --- a/neutron/conf/agent/dhcp.py +++ b/neutron/conf/agent/dhcp.py @@ -19,7 +19,7 @@ from oslo_config import cfg from neutron._i18n import _ from neutron.conf.agent import common - +from neutron.conf.agent.metadata import config as meta_conf DHCP_AGENT_OPTS = [ cfg.IntOpt('resync_interval', default=5, @@ -121,3 +121,6 @@ def register_agent_dhcp_opts(cfg=cfg.CONF): cfg.register_opts(DHCP_OPTS) cfg.register_opts(DNSMASQ_OPTS) cfg.register_opts(common.DHCP_PROTOCOL_OPTS) + meta_conf.register_meta_conf_opts(meta_conf.METADATA_RATE_LIMITING_OPTS, + cfg=cfg, + group=meta_conf.RATE_LIMITING_GROUP) diff --git a/neutron/conf/agent/metadata/config.py b/neutron/conf/agent/metadata/config.py index 1cb8f87121a..4de7260d6ff 100644 --- a/neutron/conf/agent/metadata/config.py +++ b/neutron/conf/agent/metadata/config.py @@ -21,6 +21,7 @@ USER_MODE = 'user' GROUP_MODE = 'group' ALL_MODE = 'all' SOCKET_MODES = (DEDUCE_MODE, USER_MODE, GROUP_MODE, ALL_MODE) +RATE_LIMITING_GROUP = 'metadata_rate_limiting' SHARED_OPTS = [ cfg.StrOpt('metadata_proxy_socket', @@ -103,5 +104,37 @@ UNIX_DOMAIN_METADATA_PROXY_OPTS = [ ] -def register_meta_conf_opts(opts, cfg=cfg.CONF): - cfg.register_opts(opts) +METADATA_RATE_LIMITING_OPTS = [ + cfg.BoolOpt('rate_limit_enabled', + default=False, + help=_('Enable rate limiting on the metadata API.')), + cfg.ListOpt('ip_versions', + default=['4'], + help=_('Comma separated list of the metadata address IP ' + 'versions (4, 6) for which rate limiting will be ' + 'enabled. The default is to rate limit only for the ' + 'metadata IPv4 address. NOTE: at the moment, the open ' + 'source version of HAProxy only allows us to rate ' + 'limit for IPv4 or IPv6, but not both at the same ' + 'time.')), + cfg.IntOpt('base_window_duration', + default=10, + help=_("Duration (seconds) of the base window on the " + "metadata API.")), + cfg.IntOpt('base_query_rate_limit', + default=10, + help=_("Max number of queries to accept during the base " + "window.")), + cfg.IntOpt('burst_window_duration', + default=10, + help=_("Duration (seconds) of the burst window on the " + "metadata API.")), + cfg.IntOpt('burst_query_rate_limit', + default=10, + help=_("Max number of queries to accept during the burst " + "window.")), +] + + +def register_meta_conf_opts(opts, cfg=cfg.CONF, group=None): + cfg.register_opts(opts, group=group) diff --git a/neutron/tests/contrib/testing.filters b/neutron/tests/contrib/testing.filters index e345a975e3f..8a22e1d9e86 100644 --- a/neutron/tests/contrib/testing.filters +++ b/neutron/tests/contrib/testing.filters @@ -10,7 +10,7 @@ ping6_filter: CommandFilter, ping6, root ping_kill: KillFilter, root, ping, -2 # enable curl from namespace -curl_filter: RegExpFilter, /usr/bin/curl, root, curl, --max-time, \d+, -D-, http://[0-9a-z:./-]+ +curl_filter: RegExpFilter, /usr/bin/curl, root, curl, --max-time, \d+, -D-, http://[0-9a-z:./-\[\]\%]+ ncat_filter: CommandFilter, ncat, root ncat_kill: KillFilter, root, ncat, -9 ss_filter: CommandFilter, ss, root diff --git a/neutron/tests/functional/agent/l3/test_metadata_proxy.py b/neutron/tests/functional/agent/l3/test_metadata_proxy.py index a8737282035..a2835701d3a 100644 --- a/neutron/tests/functional/agent/l3/test_metadata_proxy.py +++ b/neutron/tests/functional/agent/l3/test_metadata_proxy.py @@ -15,6 +15,7 @@ import os.path import time +import netaddr from neutron_lib import constants import webob import webob.dec @@ -28,6 +29,7 @@ from neutron.tests.functional.agent.linux import helpers METADATA_REQUEST_TIMEOUT = 60 METADATA_REQUEST_SLEEP = 5 +TOO_MANY_REQUESTS_CODE = '429' class MetadataFakeProxyHandler(object): @@ -41,6 +43,23 @@ class MetadataFakeProxyHandler(object): class MetadataL3AgentTestCase(framework.L3AgentTestFramework): + """Test access to the l3-agent metadata proxy. + + The test cases in this class create: + * A l3-agent metadata service: + * A router (which creates a metadata proxy in the router namespace), + * A fake metadata server + * A "client" namespace (simulating a vm) with a port on router + internal subnet. + + The test cases query from the "client" namespace the metadata proxy on + http://169.254.169.254 or http://[fe80::a9fe:a9fe] and assert that the + metadata proxy forwarded successfully the http request to the fake metadata + server and a 200 (OK) response was sent to the "client" namespace. Some of + the test cases additionally test the metadata proxy rate limiting, by + asserting that, after a requests limit is exceeded, the "client" namespace + receives a 429 (Too Many Requests) response. + """ SOCKET_MODE = 0o644 @@ -58,10 +77,31 @@ class MetadataL3AgentTestCase(framework.L3AgentTestFramework): self.agent.conf.metadata_proxy_socket, workers=0, backlog=4096, mode=self.SOCKET_MODE) - def _query_metadata_proxy(self, machine): - url = 'http://%(host)s:%(port)s' % {'host': constants.METADATA_V4_IP, - 'port': constants.METADATA_PORT} - cmd = 'curl', '--max-time', METADATA_REQUEST_TIMEOUT, '-D-', url + def _get_command(self, machine, ipv6=False, interface=None): + if ipv6: + params = {'host': constants.METADATA_V6_IP, + 'interface': interface, + 'port': constants.METADATA_PORT} + url = 'http://[%(host)s%%%(interface)s]:%(port)s' % params + else: + params = {'host': constants.METADATA_V4_IP, + 'port': constants.METADATA_PORT} + url = 'http://%(host)s:%(port)s' % params + return 'curl', '--max-time', METADATA_REQUEST_TIMEOUT, '-D-', url + + def _setup_for_ipv6(self, machine, qr_lla): + lla_info = (machine.port.addr.list(scope='link', + ip_version=6)[0]) + interface = lla_info['name'] + machine.port.addr.wait_until_address_ready( + lla_info['cidr'].split('/')[0]) + machine.execute(('ip', '-6', 'route', 'add', + constants.METADATA_V6_IP, 'via', qr_lla, 'dev', + interface,)) + return interface + + def _query_metadata_proxy(self, machine, ipv6=False, interface=None): + cmd = self._get_command(machine, ipv6, interface) i = 0 CONNECTION_REFUSED_TIMEOUT = METADATA_REQUEST_TIMEOUT // 2 while i <= CONNECTION_REFUSED_TIMEOUT: @@ -74,29 +114,15 @@ class MetadataL3AgentTestCase(framework.L3AgentTestFramework): i += METADATA_REQUEST_SLEEP else: self.fail('metadata proxy unreachable ' - 'on %s before timeout' % url) + 'on %s before timeout' % cmd[-1]) if i > CONNECTION_REFUSED_TIMEOUT: self.fail('Timed out waiting metadata proxy to become available') return raw_headers.splitlines()[0] - def test_access_to_metadata_proxy(self): - """Test access to the l3-agent metadata proxy. - - The test creates: - * A l3-agent metadata service: - * A router (which creates a metadata proxy in the router namespace), - * A fake metadata server - * A "client" namespace (simulating a vm) with a port on router - internal subnet. - - The test queries from the "client" namespace the metadata proxy on - http://169.254.169.254 and asserts that the metadata proxy added - the X-Forwarded-For and X-Neutron-Router-Id headers to the request - and forwarded the http request to the fake metadata server and the - response to the "client" namespace. - """ - router_info = self.generate_router_info(enable_ha=False) + def _create_resources(self): + router_info = self.generate_router_info(enable_ha=False, + dual_stack=True) router = self.manage_router(self.agent, router_info) self._create_metadata_fake_server(webob.exc.HTTPOk.code) @@ -110,13 +136,114 @@ class MetadataL3AgentTestCase(framework.L3AgentTestFramework): br_int, net_helpers.increment_ip_cidr(router_ip_cidr), router_ip_cidr.partition('/')[0])) + router_ifs = router_info[constants.INTERFACE_KEY] + qr_lla = str( + netaddr.EUI(router_ifs[0]['mac_address']).ipv6_link_local()) + return machine, qr_lla + + def _test_access_to_metadata_proxy(self, ipv6=False): + machine, qr_lla = self._create_resources() + interface = self._setup_for_ipv6(machine, qr_lla) if ipv6 else None # Query metadata proxy - firstline = self._query_metadata_proxy(machine) + firstline = self._query_metadata_proxy(machine, ipv6=ipv6, + interface=interface) # Check status code self.assertIn(str(webob.exc.HTTPOk.code), firstline.split()) + def _set_up_for_rate_limiting_test(self, ipv6=False): + self.conf.set_override('rate_limit_enabled', True, + 'metadata_rate_limiting') + if ipv6: + self.conf.set_override('ip_versions', ['6'], + 'metadata_rate_limiting') + machine, qr_lla = self._create_resources() + interface = self._setup_for_ipv6(machine, qr_lla) if ipv6 else None + return machine, interface + + def _test_rate_limiting(self, limit, machine, ipv6=False, interface=None, + exceed=True): + # The first "limit" requests should succeed + for _ in range(limit): + firstline = self._query_metadata_proxy(machine, ipv6=ipv6, + interface=interface) + self.assertIn(str(webob.exc.HTTPOk.code), firstline.split()) + + if exceed: + firstline = self._query_metadata_proxy(machine, ipv6=ipv6, + interface=interface) + self.assertIn(TOO_MANY_REQUESTS_CODE, firstline.split()) + + def test_access_to_metadata_proxy(self): + self._test_access_to_metadata_proxy() + + def test_access_to_metadata_proxy_ipv6(self): + self._test_access_to_metadata_proxy(ipv6=True) + + def test_metadata_proxy_rate_limiting(self): + self.conf.set_override('base_query_rate_limit', 2, + 'metadata_rate_limiting') + machine, _ = self._set_up_for_rate_limiting_test() + self._test_rate_limiting(2, machine) + + def test_metadata_proxy_rate_limiting_ipv6(self): + self.conf.set_override('base_query_rate_limit', 2, + 'metadata_rate_limiting') + machine, interface = self._set_up_for_rate_limiting_test(ipv6=True) + self._test_rate_limiting(2, machine, ipv6=True, interface=interface) + + def test_metadata_proxy_burst_rate_limiting(self): + self.conf.set_override('base_query_rate_limit', 10, + 'metadata_rate_limiting') + self.conf.set_override('base_window_duration', 60, + 'metadata_rate_limiting') + self.conf.set_override('burst_query_rate_limit', 2, + 'metadata_rate_limiting') + self.conf.set_override('burst_window_duration', 5, + 'metadata_rate_limiting') + machine, _ = self._set_up_for_rate_limiting_test() + + # Since the number of metadata requests don't exceed the base or the + # burst query rate limit, all of them should get "OK" response + self._test_rate_limiting(2, machine, exceed=False) + + # Wait for haproxy to reset the burst window and then test it returns + # "Too Many Requests" after exceeding the burst query rate limit + time.sleep(10) + self._test_rate_limiting(2, machine) + + def test_metadata_proxy_base_and_burst_rate_limiting(self): + self.conf.set_override('base_query_rate_limit', 3, + 'metadata_rate_limiting') + self.conf.set_override('base_window_duration', 60, + 'metadata_rate_limiting') + self.conf.set_override('burst_query_rate_limit', 2, + 'metadata_rate_limiting') + self.conf.set_override('burst_window_duration', 5, + 'metadata_rate_limiting') + machine, _ = self._set_up_for_rate_limiting_test() + + # Since the number of metadata requests don't exceed the base or the + # burst query rate limit, all of them should get "OK" response + self._test_rate_limiting(2, machine, exceed=False) + + # Wait for haproxy to reset the burst window and then test it returns + # "Too Many Requests" after exceeding the base query rate limit + time.sleep(10) + self._test_rate_limiting(1, machine) + + def test_metadata_proxy_rate_limiting_invalid_ip_versions(self): + self.conf.set_override('base_query_rate_limit', 2, + 'metadata_rate_limiting') + self.conf.set_override('ip_versions', ['4', '6'], + 'metadata_rate_limiting') + machine, _ = self._set_up_for_rate_limiting_test() + # Since we are passing an invalid ip_versions configuration, rate + # limiting will not be configuerd and more than 2 requests should + # succeed + self._test_rate_limiting(3, machine, exceed=False) + class UnprivilegedUserMetadataL3AgentTestCase(MetadataL3AgentTestCase): """Test metadata proxy with least privileged user. diff --git a/neutron/tests/unit/agent/metadata/test_driver.py b/neutron/tests/unit/agent/metadata/test_driver.py index e3b0b8ef6e3..60493670773 100644 --- a/neutron/tests/unit/agent/metadata/test_driver.py +++ b/neutron/tests/unit/agent/metadata/test_driver.py @@ -81,6 +81,12 @@ class TestMetadataDriverProcess(base.BaseTestCase): METADATA_PORT = 8080 METADATA_SOCKET = '/socket/path' PIDFILE = 'pidfile' + RATE_LIMIT_CONFIG = { + 'base_window_duration': 10, + 'base_query_rate_limit': 5, + 'burst_window_duration': 1, + 'burst_query_rate_limit': 10, + } def setUp(self): super(TestMetadataDriverProcess, self).setUp() @@ -101,6 +107,9 @@ class TestMetadataDriverProcess(base.BaseTestCase): l3_config.register_l3_agent_config_opts(l3_config.OPTS, cfg.CONF) ha_conf.register_l3_agent_ha_opts() meta_conf.register_meta_conf_opts(meta_conf.SHARED_OPTS, cfg.CONF) + meta_conf.register_meta_conf_opts( + meta_conf.METADATA_RATE_LIMITING_OPTS, cfg.CONF, + group=meta_conf.RATE_LIMITING_GROUP) def test_after_router_updated_called_on_agent_process_update(self): with mock.patch.object(metadata_driver, 'after_router_updated') as f,\ @@ -142,7 +151,7 @@ class TestMetadataDriverProcess(base.BaseTestCase): agent._process_updated_router(router) f.assert_not_called() - def _test_spawn_metadata_proxy(self, dad_failed=False): + def _test_spawn_metadata_proxy(self, dad_failed=False, rate_limited=False): router_id = _uuid() router_ns = 'qrouter-%s' % router_id service_name = 'haproxy' @@ -202,7 +211,8 @@ class TestMetadataDriverProcess(base.BaseTestCase): "-" + router_id) bind_v6_line = 'bind %s:%s interface %s' % ( self.METADATA_DEFAULT_IPV6, self.METADATA_PORT, 'fake-if') - cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % { + + expected_params = { 'user': self.EUNAME, 'group': self.EGNAME, 'host': self.METADATA_DEFAULT_IP, @@ -222,9 +232,25 @@ class TestMetadataDriverProcess(base.BaseTestCase): 'fake-if', namespace=router_ns) else: + if rate_limited: + expected_params.update(self.RATE_LIMIT_CONFIG, + stick_table_expire=10, + ip_version='ip') + expected_config_template = ( + comm_meta.METADATA_HAPROXY_GLOBAL + + comm_meta.RATE_LIMITED_CONFIG_TEMPLATE + + metadata_driver._HEADER_CONFIG_TEMPLATE) + else: + expected_config_template = ( + comm_meta.METADATA_HAPROXY_GLOBAL + + metadata_driver._UNLIMITED_CONFIG_TEMPLATE + + metadata_driver._HEADER_CONFIG_TEMPLATE) + mock_open.assert_has_calls([ mock.call(cfg_file, 'w'), - mock.call().write(cfg_contents)], any_order=True) + mock.call().write(expected_config_template % + expected_params)], + any_order=True) env = {ep.PROCESS_TAG: service_name + '-' + router_id} ip_mock.assert_has_calls([ @@ -241,6 +267,20 @@ class TestMetadataDriverProcess(base.BaseTestCase): def test_spawn_metadata_proxy(self): self._test_spawn_metadata_proxy() + def test_spawn_rate_limited_metadata_proxy(self): + cfg.CONF.set_override('rate_limit_enabled', True, + group=meta_conf.RATE_LIMITING_GROUP) + for k, v in self.RATE_LIMIT_CONFIG.items(): + cfg.CONF.set_override(k, v, group=meta_conf.RATE_LIMITING_GROUP) + + return self._test_spawn_metadata_proxy(rate_limited=True) + + def test_metadata_proxy_conf_parse_ip_versions(self): + self.assertEqual('4', comm_meta.parse_ip_versions(['4'])) + self.assertEqual('6', comm_meta.parse_ip_versions(['6'])) + self.assertIsNone(comm_meta.parse_ip_versions(['4', '6'])) + self.assertIsNone(comm_meta.parse_ip_versions(['5', '6'])) + def test_spawn_metadata_proxy_dad_failed(self): self._test_spawn_metadata_proxy(dad_failed=True) @@ -251,7 +291,8 @@ class TestMetadataDriverProcess(base.BaseTestCase): mock.ANY, mock.ANY, self.EUNAME, self.EGNAME, - mock.ANY, mock.ANY) + mock.ANY, mock.ANY, + mock.ANY) self.assertRaises(comm_meta.InvalidUserOrGroupException, config.create_config_file) @@ -264,7 +305,8 @@ class TestMetadataDriverProcess(base.BaseTestCase): mock.ANY, mock.ANY, self.EUNAME, self.EGNAME, - mock.ANY, mock.ANY) + mock.ANY, mock.ANY, + mock.ANY) self.assertRaises(comm_meta.InvalidUserOrGroupException, config.create_config_file) diff --git a/neutron/tests/unit/agent/ovn/metadata/test_driver.py b/neutron/tests/unit/agent/ovn/metadata/test_driver.py index 412d5eb5af2..aacee9a0e4c 100644 --- a/neutron/tests/unit/agent/ovn/metadata/test_driver.py +++ b/neutron/tests/unit/agent/ovn/metadata/test_driver.py @@ -41,15 +41,35 @@ class TestMetadataDriverProcess(base.BaseTestCase): METADATA_PORT = 8080 METADATA_SOCKET = '/socket/path' PIDFILE = 'pidfile' + RATE_LIMIT_CONFIG = { + 'base_window_duration': 10, + 'base_query_rate_limit': 5, + 'burst_window_duration': 1, + 'burst_query_rate_limit': 10, + } def setUp(self): super(TestMetadataDriverProcess, self).setUp() mock.patch('eventlet.spawn').start() ovn_meta_conf.register_meta_conf_opts(meta_conf.SHARED_OPTS, cfg.CONF) + ovn_meta_conf.register_meta_conf_opts( + meta_conf.METADATA_RATE_LIMITING_OPTS, cfg.CONF, + group=meta_conf.RATE_LIMITING_GROUP) ovn_conf.register_opts() def test_spawn_metadata_proxy(self): + return self._test_spawn_metadata_proxy(rate_limited=False) + + def test_spawn_rate_limited_metadata_proxy(self): + cfg.CONF.set_override('rate_limit_enabled', True, + group=meta_conf.RATE_LIMITING_GROUP) + for k, v in self.RATE_LIMIT_CONFIG.items(): + cfg.CONF.set_override(k, v, group=meta_conf.RATE_LIMITING_GROUP) + + return self._test_spawn_metadata_proxy(rate_limited=True) + + def _test_spawn_metadata_proxy(self, rate_limited=False): datapath_id = _uuid() metadata_ns = metadata_agent.NS_PREFIX + datapath_id ip_class_path = 'neutron.agent.linux.ip_lib.IPWrapper' @@ -93,7 +113,7 @@ class TestMetadataDriverProcess(base.BaseTestCase): service_name, metadata_driver.METADATA_SERVICE_NAME, datapath_id) - cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % { + expected_params = { 'user': self.EUNAME, 'group': self.EGNAME, 'host': self.METADATA_DEFAULT_IP, @@ -103,8 +123,24 @@ class TestMetadataDriverProcess(base.BaseTestCase): 'res_id': datapath_id, 'pidfile': self.PIDFILE, 'log_level': 'debug', - 'log_tag': log_tag} + 'log_tag': log_tag, + 'bind_v6_line': ''} + if rate_limited: + expected_params.update(self.RATE_LIMIT_CONFIG, + stick_table_expire=10, + ip_version='ip') + expected_config_template = ( + comm_meta.METADATA_HAPROXY_GLOBAL + + comm_meta.RATE_LIMITED_CONFIG_TEMPLATE + + metadata_driver._HEADER_CONFIG_TEMPLATE) + else: + expected_config_template = ( + comm_meta.METADATA_HAPROXY_GLOBAL + + metadata_driver._UNLIMITED_CONFIG_TEMPLATE + + metadata_driver._HEADER_CONFIG_TEMPLATE) + + cfg_contents = expected_config_template % expected_params mock_open.assert_has_calls([ mock.call(cfg_file, 'w'), mock.call().write(cfg_contents)], @@ -123,7 +159,7 @@ class TestMetadataDriverProcess(base.BaseTestCase): mock.ANY, mock.ANY, mock.ANY, self.EUNAME, self.EGNAME, mock.ANY, - mock.ANY) + mock.ANY, mock.ANY) self.assertRaises(comm_meta.InvalidUserOrGroupException, config.create_config_file) @@ -135,6 +171,6 @@ class TestMetadataDriverProcess(base.BaseTestCase): mock.ANY, mock.ANY, mock.ANY, self.EUNAME, self.EGNAME, mock.ANY, - mock.ANY) + mock.ANY, mock.ANY) self.assertRaises(comm_meta.InvalidUserOrGroupException, config.create_config_file) diff --git a/releasenotes/notes/add-metadata-rate-limiting-bf0c17a31f86ee16.yaml b/releasenotes/notes/add-metadata-rate-limiting-bf0c17a31f86ee16.yaml new file mode 100644 index 00000000000..a2c6beb2532 --- /dev/null +++ b/releasenotes/notes/add-metadata-rate-limiting-bf0c17a31f86ee16.yaml @@ -0,0 +1,8 @@ +--- +features: + - Neutron allows cloud administrators to limit the rate at which VMs query + the Nova metadata service in order to protect the OpenStack deployment + from DoS or misbehaved instances. This new feature can be configured in + the neutron.conf file. Please see the "Metadata service query rate + limiting" section under Neutron configuration in the documentation for + more details.