Ensures accurate quota calculation during the readiness checks

Current deployments of OpenShift platform with Kuryr CNI
in real OpenStack installations (multi-projects environments)
are crashing because of kuryr-controller cannot come to
READY state.

This is due to inaccurate quota calculations in the readiness
process and an unscalable fetching of objects from Neutron API
to count and comparing with limits.

This commit ensures accurate quota calculation for installation
project during the readiness checks and removes the harsh
Neutron API calls. It will dramatically speedup readiness checks.

Change-Id: Ia5e90d6bd5a8d30d0596508abd541e1508dc23ec
Closes-Bug: 1864327
This commit is contained in:
ITD27M01 2020-02-24 22:26:58 +03:00
parent 0ff30ac053
commit 9cdd1c8112
8 changed files with 63 additions and 84 deletions

View File

@ -16,7 +16,6 @@ import eventlet
import time import time
from openstack import exceptions as os_exc from openstack import exceptions as os_exc
from oslo_cache import core as cache
from oslo_config import cfg as oslo_cfg from oslo_config import cfg as oslo_cfg
from oslo_log import log as logging from oslo_log import log as logging
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
@ -35,21 +34,6 @@ LOG = logging.getLogger(__name__)
DEFAULT_CLEANUP_INTERVAL = 60 DEFAULT_CLEANUP_INTERVAL = 60
DEFAULT_CLEANUP_RETRIES = 10 DEFAULT_CLEANUP_RETRIES = 10
namespace_handler_caching_opts = [
oslo_cfg.BoolOpt('caching', default=True),
oslo_cfg.IntOpt('cache_time', default=120),
]
oslo_cfg.CONF.register_opts(namespace_handler_caching_opts,
"namespace_handler_caching")
cache.configure(oslo_cfg.CONF)
namespace_handler_cache_region = cache.create_region()
MEMOIZE = cache.get_memoization_decorator(
oslo_cfg.CONF, namespace_handler_cache_region, "namespace_handler_caching")
cache.configure_cache_region(oslo_cfg.CONF, namespace_handler_cache_region)
class NamespaceHandler(k8s_base.ResourceEventHandler): class NamespaceHandler(k8s_base.ResourceEventHandler):
OBJECT_KIND = constants.K8S_OBJ_NAMESPACE OBJECT_KIND = constants.K8S_OBJ_NAMESPACE
@ -172,18 +156,13 @@ class NamespaceHandler(k8s_base.ResourceEventHandler):
return False return False
return self._check_quota(quota) return self._check_quota(quota)
@MEMOIZE
def _check_quota(self, quota): def _check_quota(self, quota):
os_net = clients.get_network_client() resources = ('subnets', 'networks', 'security_groups')
resources = {'subnets': os_net.subnets,
'networks': os_net.networks,
'security_groups': os_net.security_groups}
for resource, network_func in resources.items(): for resource in resources:
resource_quota = quota[resource] resource_quota = quota[resource]
if utils.has_limit(resource_quota): if utils.has_limit(resource_quota):
if not utils.is_available(resource, resource_quota, if not utils.is_available(resource, resource_quota):
network_func):
return False return False
return True return True

View File

@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
from openstack import exceptions as os_exc from openstack import exceptions as os_exc
from oslo_cache import core as cache
from oslo_config import cfg as oslo_cfg from oslo_config import cfg as oslo_cfg
from oslo_log import log as logging from oslo_log import log as logging
@ -27,21 +26,6 @@ from kuryr_kubernetes import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
np_handler_caching_opts = [
oslo_cfg.BoolOpt('caching', default=True),
oslo_cfg.IntOpt('cache_time', default=120),
]
oslo_cfg.CONF.register_opts(np_handler_caching_opts,
"np_handler_caching")
cache.configure(oslo_cfg.CONF)
np_handler_cache_region = cache.create_region()
MEMOIZE = cache.get_memoization_decorator(
oslo_cfg.CONF, np_handler_cache_region, "np_handler_caching")
cache.configure_cache_region(oslo_cfg.CONF, np_handler_cache_region)
class NetworkPolicyHandler(k8s_base.ResourceEventHandler): class NetworkPolicyHandler(k8s_base.ResourceEventHandler):
"""NetworkPolicyHandler handles k8s Network Policies events""" """NetworkPolicyHandler handles k8s Network Policies events"""
@ -147,12 +131,9 @@ class NetworkPolicyHandler(k8s_base.ResourceEventHandler):
return False return False
return self._check_quota(quota) return self._check_quota(quota)
@MEMOIZE
def _check_quota(self, quota): def _check_quota(self, quota):
os_net = clients.get_network_client()
if utils.has_limit(quota.security_groups): if utils.has_limit(quota.security_groups):
return utils.is_available('security_groups', quota.security_groups, return utils.is_available('security_groups', quota.security_groups)
os_net.security_groups)
return True return True
def _is_service_affected(self, service, affected_pods): def _is_service_affected(self, service, affected_pods):

View File

@ -15,7 +15,6 @@
from neutronclient.common import exceptions as n_exc from neutronclient.common import exceptions as n_exc
from openstack import exceptions as os_exc from openstack import exceptions as os_exc
from oslo_cache import core as cache
from oslo_config import cfg as oslo_cfg from oslo_config import cfg as oslo_cfg
from oslo_log import log as logging from oslo_log import log as logging
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
@ -32,22 +31,6 @@ from kuryr_kubernetes import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
vif_handler_caching_opts = [
oslo_cfg.BoolOpt('caching', default=True),
oslo_cfg.IntOpt('cache_time', default=120),
]
oslo_cfg.CONF.register_opts(vif_handler_caching_opts,
"vif_handler_caching")
cache.configure(oslo_cfg.CONF)
vif_handler_cache_region = cache.create_region()
MEMOIZE = cache.get_memoization_decorator(
oslo_cfg.CONF, vif_handler_cache_region, "vif_handler_caching")
cache.configure_cache_region(oslo_cfg.CONF, vif_handler_cache_region)
class VIFHandler(k8s_base.ResourceEventHandler): class VIFHandler(k8s_base.ResourceEventHandler):
"""Controller side of VIF binding process for Kubernetes pods. """Controller side of VIF binding process for Kubernetes pods.
@ -212,11 +195,9 @@ class VIFHandler(k8s_base.ResourceEventHandler):
services = driver_utils.get_services() services = driver_utils.get_services()
self._update_services(services, crd_pod_selectors, project_id) self._update_services(services, crd_pod_selectors, project_id)
@MEMOIZE
def is_ready(self, quota): def is_ready(self, quota):
os_net = clients.get_network_client()
if utils.has_limit(quota.ports): if utils.has_limit(quota.ports):
return utils.is_available('ports', quota.ports, os_net.ports) return utils.is_available('ports', quota.ports)
return True return True
@staticmethod @staticmethod

View File

@ -61,7 +61,7 @@ class HealthServer(object):
def _components_ready(self): def _components_ready(self):
os_net = clients.get_network_client() os_net = clients.get_network_client()
project_id = config.CONF.neutron_defaults.project project_id = config.CONF.neutron_defaults.project
quota = os_net.get_quota(project_id) quota = os_net.get_quota(quota=project_id, details=True)
for component in self._registry: for component in self._registry:
if not component.is_ready(quota): if not component.is_ready(quota):

View File

@ -19,9 +19,6 @@ from kuryr_kubernetes import config
from kuryr_kubernetes.controller.drivers import namespace_subnet from kuryr_kubernetes.controller.drivers import namespace_subnet
from kuryr_kubernetes.controller.drivers import utils as driver_utils from kuryr_kubernetes.controller.drivers import utils as driver_utils
from kuryr_kubernetes.controller.drivers import vif_pool from kuryr_kubernetes.controller.drivers import vif_pool
from kuryr_kubernetes.controller.handlers import namespace
from kuryr_kubernetes.controller.handlers import policy
from kuryr_kubernetes.controller.handlers import vif
from kuryr_kubernetes.controller.managers import health from kuryr_kubernetes.controller.managers import health
from kuryr_kubernetes.controller.managers import pool from kuryr_kubernetes.controller.managers import pool
from kuryr_kubernetes import utils from kuryr_kubernetes import utils
@ -42,9 +39,6 @@ _kuryr_k8s_opts = [
('cni_health_server', cni_health.cni_health_server_opts), ('cni_health_server', cni_health.cni_health_server_opts),
('namespace_subnet', namespace_subnet.namespace_subnet_driver_opts), ('namespace_subnet', namespace_subnet.namespace_subnet_driver_opts),
('sriov', config.sriov_opts), ('sriov', config.sriov_opts),
('namespace_handler_caching', namespace.namespace_handler_caching_opts),
('np_handler_caching', policy.np_handler_caching_opts),
('vif_handler_caching', vif.vif_handler_caching_opts),
('pod_ip_caching', driver_utils.pod_ip_caching_opts), ('pod_ip_caching', driver_utils.pod_ip_caching_opts),
] ]

View File

@ -23,15 +23,51 @@ from oslo_config import cfg as oslo_cfg
def get_quota_obj(): def get_quota_obj():
return { return {
'quota': { 'quota': {
'subnet': 100, 'subnet': {
'network': 100, 'used': 50,
'floatingip': 50, 'limit': 100,
'subnetpool': -1, 'reserved': 0
'security_group_rule': 100, },
'security_group': 10, 'network': {
'router': 10, 'used': 50,
'rbac_policy': 10, 'limit': 100,
'port': 500 'reserved': 0
},
'floatingip': {
'used': 25,
'limit': 50,
'reserved': 0
},
'subnetpool': {
'used': 0,
'limit': -1,
'reserved': 0
},
'security_group_rule': {
'used': 50,
'limit': 100,
'reserved': 0
},
'security_group': {
'used': 5,
'limit': 10,
'reserved': 0
},
'router': {
'used': 5,
'limit': 10,
'reserved': 0
},
'rbac_policy': {
'used': 5,
'limit': 10,
'reserved': 0
},
'port': {
'used': 250,
'limit': 500,
'reserved': 0
}
} }
} }

View File

@ -217,12 +217,11 @@ def extract_pod_annotation(annotation):
def has_limit(quota): def has_limit(quota):
NO_LIMIT = -1 NO_LIMIT = -1
return quota != NO_LIMIT return quota['limit'] != NO_LIMIT
def is_available(resource, resource_quota, network_func): def is_available(resource, resource_quota):
qnt_resources = len(list(network_func())) availability = resource_quota['limit'] - resource_quota['used']
availability = resource_quota - qnt_resources
if availability <= 0: if availability <= 0:
LOG.error("Quota exceeded for resource: %s", resource) LOG.error("Quota exceeded for resource: %s", resource)
return False return False

View File

@ -0,0 +1,9 @@
---
deprecations:
- |
Configuration sections ``[namespace_handler_caching]``, ``[np_handler_caching]``
and ``[vif_handler_caching]`` have been deprecated due to simplifying quota usage
calculation for readiness checks. Instead of counting Neutron objects
(ports, sg, subnets, and networks), the quota_details extension is used,
which includes used, limit and reserved counts per resource.
In this way, caching becomes unnecessary.