From 9743f6b3c9c33078a2b45d8ac3bd3d2d3878326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Dulko?= Date: Fri, 4 Sep 2020 13:41:38 +0200 Subject: [PATCH] Civilize logging vol 2 This is another attempt at getting the useless tracebacks out of logs for any level higher than debug. In particular: * All pools logs regarding "Kuryr-controller not yet ready to *" are now on debug level. * The ugly ResourceNotReady raised from _populate_pool is now suppressed if method is run from eventlet.spawn(), which prevents that exception being logged by eventlet. * ResourceNotReady will only print namespace/name or name, not the full resource representation. * ConnectionResetError is suppressed on Retry handler level just as any other K8sClientError. Change-Id: Ic6e6ee556f36ef4fe3429e8e1e4a2ddc7e8251dc --- .../controller/drivers/network_policy.py | 2 + .../controller/drivers/vif_pool.py | 47 +++++++++++-------- kuryr_kubernetes/controller/handlers/lbaas.py | 12 ++--- kuryr_kubernetes/exceptions.py | 14 +++++- kuryr_kubernetes/handlers/retry.py | 5 +- .../unit/controller/drivers/test_vif_pool.py | 21 +++++++++ kuryr_kubernetes/utils.py | 9 ++-- 7 files changed, 78 insertions(+), 32 deletions(-) diff --git a/kuryr_kubernetes/controller/drivers/network_policy.py b/kuryr_kubernetes/controller/drivers/network_policy.py index 54412455d..ce2f74b33 100644 --- a/kuryr_kubernetes/controller/drivers/network_policy.py +++ b/kuryr_kubernetes/controller/drivers/network_policy.py @@ -676,6 +676,8 @@ class NetworkPolicyDriver(base.NetworkPolicyDriver): constants.K8S_API_CRD_NAMESPACES, namespace) netpolicy_crd = self.kubernetes.post(url, netpolicy_crd) + except exceptions.K8sNamespaceTerminating: + raise except exceptions.K8sClientException: LOG.exception("Kubernetes Client Exception creating " "KuryrNetworkPolicy CRD.") diff --git a/kuryr_kubernetes/controller/drivers/vif_pool.py b/kuryr_kubernetes/controller/drivers/vif_pool.py index 249d4d3d3..5f7a081a1 100644 --- a/kuryr_kubernetes/controller/drivers/vif_pool.py +++ b/kuryr_kubernetes/controller/drivers/vif_pool.py @@ -193,7 +193,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): def request_vif(self, pod, project_id, subnets, security_groups): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to handle new pods.") + LOG.debug("Kuryr-controller not yet ready to handle new pods.") raise exceptions.ResourceNotReady(pod) try: host_addr = self._get_host_addr(pod) @@ -206,22 +206,29 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): return self._get_port_from_pool(pool_key, pod, subnets, tuple(sorted(security_groups))) except exceptions.ResourceNotReady: - LOG.debug("Ports pool does not have available ports: %s", - pool_key) + LOG.debug("Ports pool does not have available ports: %s", pool_key) + # NOTE(dulek): We're passing raise_not_ready=False because this + # will be run outside of handlers thread, so raising + # it will only result in an ugly log from eventlet. eventlet.spawn(self._populate_pool, pool_key, pod, subnets, - tuple(sorted(security_groups))) + tuple(sorted(security_groups)), + raise_not_ready=False) raise def _get_port_from_pool(self, pool_key, pod, subnets, security_groups): raise NotImplementedError() - def _populate_pool(self, pool_key, pod, subnets, security_groups): + def _populate_pool(self, pool_key, pod, subnets, security_groups, + raise_not_ready=True): # REVISIT(ltomasbo): Drop the subnets parameter and get the information # from the pool_key, which will be required when multi-network is # supported if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to populate pools.") - raise exceptions.ResourceNotReady(pod) + LOG.debug("Kuryr-controller not yet ready to populate pools.") + if raise_not_ready: + raise exceptions.ResourceNotReady(pod) + else: + return now = time.time() last_update = 0 pool_updates = self._last_update.get(pool_key) @@ -233,7 +240,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): LOG.info("Not enough time since the last pool update") return except AttributeError: - LOG.info("Kuryr-controller not yet ready to populate pools") + LOG.debug("Kuryr-controller not yet ready to populate pools.") return self._last_update[pool_key] = {security_groups: now} @@ -258,7 +265,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): def release_vif(self, pod, vif, project_id, security_groups, host_addr=None): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to remove pods.") + LOG.debug("Kuryr-controller not yet ready to remove pods.") raise exceptions.ResourceNotReady(pod) if not host_addr: host_addr = self._get_host_addr(pod) @@ -271,7 +278,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): self._existing_vifs[vif.id] = vif self._recyclable_ports[vif.id] = pool_key except AttributeError: - LOG.info("Kuryr-controller is not ready to handle the pools yet.") + LOG.debug("Kuryr-controller is not ready to handle the pools yet.") raise exceptions.ResourceNotReady(pod) def _return_ports_to_pool(self): @@ -522,8 +529,8 @@ class NeutronVIFPool(BaseVIFPool): @lockutils.synchronized('return_to_pool_baremetal') def _trigger_return_to_pool(self): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to return ports to " - "pools.") + LOG.debug("Kuryr-controller not yet ready to return ports to " + "pools.") return os_net = clients.get_network_client() sg_current = {} @@ -624,8 +631,8 @@ class NeutronVIFPool(BaseVIFPool): def delete_network_pools(self, net_id): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to delete network " - "pools.") + LOG.debug("Kuryr-controller not yet ready to delete network " + "pools.") raise exceptions.ResourceNotReady(net_id) os_net = clients.get_network_client() @@ -690,7 +697,7 @@ class NestedVIFPool(BaseVIFPool): def release_vif(self, pod, vif, project_id, security_groups): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to remove pods.") + LOG.debug("Kuryr-controller not yet ready to remove pods.") raise exceptions.ResourceNotReady(pod) try: host_addr = self._get_host_addr(pod) @@ -775,8 +782,8 @@ class NestedVIFPool(BaseVIFPool): @lockutils.synchronized('return_to_pool_nested') def _trigger_return_to_pool(self): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to return ports to " - "pools.") + LOG.debug("Kuryr-controller not yet ready to return ports to " + "pools.") return os_net = clients.get_network_client() sg_current = {} @@ -942,7 +949,7 @@ class NestedVIFPool(BaseVIFPool): @lockutils.synchronized('return_to_pool_nested') def populate_pool(self, trunk_ip, project_id, subnets, security_groups): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to populate pools.") + LOG.debug("Kuryr-controller not yet ready to populate pools.") raise exceptions.ResourceNotReady(trunk_ip) pool_key = self._get_pool_key(trunk_ip, project_id, None, subnets) pools = self._available_ports_pools.get(pool_key) @@ -990,8 +997,8 @@ class NestedVIFPool(BaseVIFPool): def delete_network_pools(self, net_id): if not self._recovered_pools: - LOG.info("Kuryr-controller not yet ready to delete network " - "pools.") + LOG.debug("Kuryr-controller not yet ready to delete network " + "pools.") raise exceptions.ResourceNotReady(net_id) os_net = clients.get_network_client() # NOTE(ltomasbo): Note the pods should already be deleted, but their diff --git a/kuryr_kubernetes/controller/handlers/lbaas.py b/kuryr_kubernetes/controller/handlers/lbaas.py index 45f6dd62d..a04242695 100644 --- a/kuryr_kubernetes/controller/handlers/lbaas.py +++ b/kuryr_kubernetes/controller/handlers/lbaas.py @@ -160,10 +160,10 @@ class ServiceHandler(k8s_base.ResourceEventHandler): loadbalancer_crd) except k_exc.K8sConflict: raise k_exc.ResourceNotReady(svc_name) + except k_exc.K8sNamespaceTerminating: + raise except k_exc.K8sClientException: - LOG.exception("Kubernetes Client Exception creating " - "kuryrloadbalancer CRD. %s" - % k_exc.K8sClientException) + LOG.exception("Exception when creating KuryrLoadBalancer CRD.") raise return loadbalancer_crd @@ -354,10 +354,10 @@ class EndpointsHandler(k8s_base.ResourceEventHandler): k_const.K8S_API_CRD_NAMESPACES, namespace), loadbalancer_crd) except k_exc.K8sConflict: raise k_exc.ResourceNotReady(loadbalancer_crd) + except k_exc.K8sNamespaceTerminating: + raise except k_exc.K8sClientException: - LOG.exception("Kubernetes Client Exception creating " - "kuryrloadbalancer CRD. %s" % - k_exc.K8sClientException) + LOG.exception("Exception when creating KuryrLoadBalancer CRD.") raise def _update_crd_spec(self, loadbalancer_crd, endpoints): diff --git a/kuryr_kubernetes/exceptions.py b/kuryr_kubernetes/exceptions.py index d42718121..b33db59eb 100644 --- a/kuryr_kubernetes/exceptions.py +++ b/kuryr_kubernetes/exceptions.py @@ -13,6 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. +from kuryr_kubernetes import utils + class K8sClientException(Exception): pass @@ -24,8 +26,16 @@ class IntegrityError(RuntimeError): class ResourceNotReady(Exception): def __init__(self, resource): - super(ResourceNotReady, self).__init__("Resource not ready: %r" - % resource) + msg = resource + if type(resource) == dict: + if resource.get('metadata', {}).get('name', None): + res_name = utils.get_res_unique_name(resource) + kind = resource.get('kind') + if kind: + msg = f'{kind} {res_name}' + else: + msg = res_name + super(ResourceNotReady, self).__init__("Resource not ready: %r" % msg) class K8sResourceNotFound(K8sClientException): diff --git a/kuryr_kubernetes/handlers/retry.py b/kuryr_kubernetes/handlers/retry.py index 76f15db37..a0255f341 100644 --- a/kuryr_kubernetes/handlers/retry.py +++ b/kuryr_kubernetes/handlers/retry.py @@ -16,6 +16,8 @@ import itertools import time +import requests + from openstack import exceptions as os_exc from oslo_log import log as logging from oslo_utils import excutils @@ -70,7 +72,8 @@ class Retry(base.EventHandler): "retry as the object %s has already " "been deleted.", obj_link) return - except exceptions.K8sClientException: + except (exceptions.K8sClientException, + requests.ConnectionError): LOG.debug("Kubernetes client error getting the " "object. Continuing with handler " "execution.") diff --git a/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py b/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py index 52809852f..ba68cfe04 100644 --- a/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py +++ b/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py @@ -200,6 +200,27 @@ class BaseVIFPool(test_base.TestCase): tuple(security_groups)) m_driver._drv_vif.request_vifs.assert_not_called() + @ddt.data((neutron_vif.NeutronPodVIFDriver), + (nested_vlan_vif.NestedVlanPodVIFDriver)) + def test__populate_pool_not_ready_dont_raise(self, m_vif_driver): + cls = vif_pool.BaseVIFPool + m_driver = mock.MagicMock(spec=cls) + + cls_vif_driver = m_vif_driver + vif_driver = mock.MagicMock(spec=cls_vif_driver) + m_driver._drv_vif = vif_driver + + pod = mock.sentinel.pod + project_id = str(uuid.uuid4()) + subnets = mock.sentinel.subnets + security_groups = 'test-sg' + pool_key = (mock.sentinel.host_addr, project_id) + m_driver._recovered_pools = False + + cls._populate_pool(m_driver, pool_key, pod, subnets, + tuple(security_groups), raise_not_ready=False) + m_driver._drv_vif.request_vifs.assert_not_called() + @mock.patch('time.time', return_value=0) def test__populate_pool_no_update(self, m_time): cls = vif_pool.BaseVIFPool diff --git a/kuryr_kubernetes/utils.py b/kuryr_kubernetes/utils.py index 5aeabf2f1..24777441f 100644 --- a/kuryr_kubernetes/utils.py +++ b/kuryr_kubernetes/utils.py @@ -101,11 +101,14 @@ def get_res_unique_name(resource): """Returns a unique name for the resource like pod or CRD. It returns a unique name for the resource composed of its name and the - namespace it is running on. + namespace it is created in or just name for cluster-scoped resources. - :returns: String with namespace/name of the resource + :returns: String with name of the resource """ - return "%(namespace)s/%(name)s" % resource['metadata'] + try: + return "%(namespace)s/%(name)s" % resource['metadata'] + except KeyError: + return "%(name)s" % resource['metadata'] def check_suitable_multi_pool_driver_opt(pool_driver, pod_driver):