Ensures accurate quota calculation during the readiness checks
Current deployments of OpenShift platform with Kuryr CNI in real OpenStack installations (multi-projects environments) are crashing because of kuryr-controller cannot come to READY state. This is due to inaccurate quota calculations in the readiness process and an unscalable fetching of objects from Neutron API to count and comparing with limits. This commit ensures accurate quota calculation for installation project during the readiness checks and removes the harsh Neutron API calls. It will dramatically speedup readiness checks. Change-Id: Ia5e90d6bd5a8d30d0596508abd541e1508dc23ec Closes-Bug: 1864327
This commit is contained in:
parent
0ff30ac053
commit
9cdd1c8112
@ -16,7 +16,6 @@ import eventlet
|
||||
import time
|
||||
|
||||
from openstack import exceptions as os_exc
|
||||
from oslo_cache import core as cache
|
||||
from oslo_config import cfg as oslo_cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
@ -35,21 +34,6 @@ LOG = logging.getLogger(__name__)
|
||||
DEFAULT_CLEANUP_INTERVAL = 60
|
||||
DEFAULT_CLEANUP_RETRIES = 10
|
||||
|
||||
namespace_handler_caching_opts = [
|
||||
oslo_cfg.BoolOpt('caching', default=True),
|
||||
oslo_cfg.IntOpt('cache_time', default=120),
|
||||
]
|
||||
|
||||
oslo_cfg.CONF.register_opts(namespace_handler_caching_opts,
|
||||
"namespace_handler_caching")
|
||||
|
||||
cache.configure(oslo_cfg.CONF)
|
||||
namespace_handler_cache_region = cache.create_region()
|
||||
MEMOIZE = cache.get_memoization_decorator(
|
||||
oslo_cfg.CONF, namespace_handler_cache_region, "namespace_handler_caching")
|
||||
|
||||
cache.configure_cache_region(oslo_cfg.CONF, namespace_handler_cache_region)
|
||||
|
||||
|
||||
class NamespaceHandler(k8s_base.ResourceEventHandler):
|
||||
OBJECT_KIND = constants.K8S_OBJ_NAMESPACE
|
||||
@ -172,18 +156,13 @@ class NamespaceHandler(k8s_base.ResourceEventHandler):
|
||||
return False
|
||||
return self._check_quota(quota)
|
||||
|
||||
@MEMOIZE
|
||||
def _check_quota(self, quota):
|
||||
os_net = clients.get_network_client()
|
||||
resources = {'subnets': os_net.subnets,
|
||||
'networks': os_net.networks,
|
||||
'security_groups': os_net.security_groups}
|
||||
resources = ('subnets', 'networks', 'security_groups')
|
||||
|
||||
for resource, network_func in resources.items():
|
||||
for resource in resources:
|
||||
resource_quota = quota[resource]
|
||||
if utils.has_limit(resource_quota):
|
||||
if not utils.is_available(resource, resource_quota,
|
||||
network_func):
|
||||
if not utils.is_available(resource, resource_quota):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
from openstack import exceptions as os_exc
|
||||
from oslo_cache import core as cache
|
||||
from oslo_config import cfg as oslo_cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
@ -27,21 +26,6 @@ from kuryr_kubernetes import utils
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
np_handler_caching_opts = [
|
||||
oslo_cfg.BoolOpt('caching', default=True),
|
||||
oslo_cfg.IntOpt('cache_time', default=120),
|
||||
]
|
||||
|
||||
oslo_cfg.CONF.register_opts(np_handler_caching_opts,
|
||||
"np_handler_caching")
|
||||
|
||||
cache.configure(oslo_cfg.CONF)
|
||||
np_handler_cache_region = cache.create_region()
|
||||
MEMOIZE = cache.get_memoization_decorator(
|
||||
oslo_cfg.CONF, np_handler_cache_region, "np_handler_caching")
|
||||
|
||||
cache.configure_cache_region(oslo_cfg.CONF, np_handler_cache_region)
|
||||
|
||||
|
||||
class NetworkPolicyHandler(k8s_base.ResourceEventHandler):
|
||||
"""NetworkPolicyHandler handles k8s Network Policies events"""
|
||||
@ -147,12 +131,9 @@ class NetworkPolicyHandler(k8s_base.ResourceEventHandler):
|
||||
return False
|
||||
return self._check_quota(quota)
|
||||
|
||||
@MEMOIZE
|
||||
def _check_quota(self, quota):
|
||||
os_net = clients.get_network_client()
|
||||
if utils.has_limit(quota.security_groups):
|
||||
return utils.is_available('security_groups', quota.security_groups,
|
||||
os_net.security_groups)
|
||||
return utils.is_available('security_groups', quota.security_groups)
|
||||
return True
|
||||
|
||||
def _is_service_affected(self, service, affected_pods):
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
from neutronclient.common import exceptions as n_exc
|
||||
from openstack import exceptions as os_exc
|
||||
from oslo_cache import core as cache
|
||||
from oslo_config import cfg as oslo_cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
@ -32,22 +31,6 @@ from kuryr_kubernetes import utils
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
vif_handler_caching_opts = [
|
||||
oslo_cfg.BoolOpt('caching', default=True),
|
||||
oslo_cfg.IntOpt('cache_time', default=120),
|
||||
]
|
||||
|
||||
oslo_cfg.CONF.register_opts(vif_handler_caching_opts,
|
||||
"vif_handler_caching")
|
||||
|
||||
cache.configure(oslo_cfg.CONF)
|
||||
vif_handler_cache_region = cache.create_region()
|
||||
MEMOIZE = cache.get_memoization_decorator(
|
||||
oslo_cfg.CONF, vif_handler_cache_region, "vif_handler_caching")
|
||||
|
||||
cache.configure_cache_region(oslo_cfg.CONF, vif_handler_cache_region)
|
||||
|
||||
|
||||
class VIFHandler(k8s_base.ResourceEventHandler):
|
||||
"""Controller side of VIF binding process for Kubernetes pods.
|
||||
|
||||
@ -212,11 +195,9 @@ class VIFHandler(k8s_base.ResourceEventHandler):
|
||||
services = driver_utils.get_services()
|
||||
self._update_services(services, crd_pod_selectors, project_id)
|
||||
|
||||
@MEMOIZE
|
||||
def is_ready(self, quota):
|
||||
os_net = clients.get_network_client()
|
||||
if utils.has_limit(quota.ports):
|
||||
return utils.is_available('ports', quota.ports, os_net.ports)
|
||||
return utils.is_available('ports', quota.ports)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
|
@ -61,7 +61,7 @@ class HealthServer(object):
|
||||
def _components_ready(self):
|
||||
os_net = clients.get_network_client()
|
||||
project_id = config.CONF.neutron_defaults.project
|
||||
quota = os_net.get_quota(project_id)
|
||||
quota = os_net.get_quota(quota=project_id, details=True)
|
||||
|
||||
for component in self._registry:
|
||||
if not component.is_ready(quota):
|
||||
|
@ -19,9 +19,6 @@ from kuryr_kubernetes import config
|
||||
from kuryr_kubernetes.controller.drivers import namespace_subnet
|
||||
from kuryr_kubernetes.controller.drivers import utils as driver_utils
|
||||
from kuryr_kubernetes.controller.drivers import vif_pool
|
||||
from kuryr_kubernetes.controller.handlers import namespace
|
||||
from kuryr_kubernetes.controller.handlers import policy
|
||||
from kuryr_kubernetes.controller.handlers import vif
|
||||
from kuryr_kubernetes.controller.managers import health
|
||||
from kuryr_kubernetes.controller.managers import pool
|
||||
from kuryr_kubernetes import utils
|
||||
@ -42,9 +39,6 @@ _kuryr_k8s_opts = [
|
||||
('cni_health_server', cni_health.cni_health_server_opts),
|
||||
('namespace_subnet', namespace_subnet.namespace_subnet_driver_opts),
|
||||
('sriov', config.sriov_opts),
|
||||
('namespace_handler_caching', namespace.namespace_handler_caching_opts),
|
||||
('np_handler_caching', policy.np_handler_caching_opts),
|
||||
('vif_handler_caching', vif.vif_handler_caching_opts),
|
||||
('pod_ip_caching', driver_utils.pod_ip_caching_opts),
|
||||
]
|
||||
|
||||
|
@ -23,15 +23,51 @@ from oslo_config import cfg as oslo_cfg
|
||||
def get_quota_obj():
|
||||
return {
|
||||
'quota': {
|
||||
'subnet': 100,
|
||||
'network': 100,
|
||||
'floatingip': 50,
|
||||
'subnetpool': -1,
|
||||
'security_group_rule': 100,
|
||||
'security_group': 10,
|
||||
'router': 10,
|
||||
'rbac_policy': 10,
|
||||
'port': 500
|
||||
'subnet': {
|
||||
'used': 50,
|
||||
'limit': 100,
|
||||
'reserved': 0
|
||||
},
|
||||
'network': {
|
||||
'used': 50,
|
||||
'limit': 100,
|
||||
'reserved': 0
|
||||
},
|
||||
'floatingip': {
|
||||
'used': 25,
|
||||
'limit': 50,
|
||||
'reserved': 0
|
||||
},
|
||||
'subnetpool': {
|
||||
'used': 0,
|
||||
'limit': -1,
|
||||
'reserved': 0
|
||||
},
|
||||
'security_group_rule': {
|
||||
'used': 50,
|
||||
'limit': 100,
|
||||
'reserved': 0
|
||||
},
|
||||
'security_group': {
|
||||
'used': 5,
|
||||
'limit': 10,
|
||||
'reserved': 0
|
||||
},
|
||||
'router': {
|
||||
'used': 5,
|
||||
'limit': 10,
|
||||
'reserved': 0
|
||||
},
|
||||
'rbac_policy': {
|
||||
'used': 5,
|
||||
'limit': 10,
|
||||
'reserved': 0
|
||||
},
|
||||
'port': {
|
||||
'used': 250,
|
||||
'limit': 500,
|
||||
'reserved': 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,12 +217,11 @@ def extract_pod_annotation(annotation):
|
||||
|
||||
def has_limit(quota):
|
||||
NO_LIMIT = -1
|
||||
return quota != NO_LIMIT
|
||||
return quota['limit'] != NO_LIMIT
|
||||
|
||||
|
||||
def is_available(resource, resource_quota, network_func):
|
||||
qnt_resources = len(list(network_func()))
|
||||
availability = resource_quota - qnt_resources
|
||||
def is_available(resource, resource_quota):
|
||||
availability = resource_quota['limit'] - resource_quota['used']
|
||||
if availability <= 0:
|
||||
LOG.error("Quota exceeded for resource: %s", resource)
|
||||
return False
|
||||
|
@ -0,0 +1,9 @@
|
||||
---
|
||||
deprecations:
|
||||
- |
|
||||
Configuration sections ``[namespace_handler_caching]``, ``[np_handler_caching]``
|
||||
and ``[vif_handler_caching]`` have been deprecated due to simplifying quota usage
|
||||
calculation for readiness checks. Instead of counting Neutron objects
|
||||
(ports, sg, subnets, and networks), the quota_details extension is used,
|
||||
which includes used, limit and reserved counts per resource.
|
||||
In this way, caching becomes unnecessary.
|
Loading…
Reference in New Issue
Block a user