From 67d7bd4b7e4eb05b5e72e95661eea40341ebe078 Mon Sep 17 00:00:00 2001 From: Jake Yip Date: Mon, 20 Jan 2020 21:53:59 +1100 Subject: [PATCH] Fix Field `health_status_reason[api]' cannot be None` If nodes in a cluster were deleted, period health checks for the cluster will timeout. This will result in logs of logs like the following: ERROR oslo.service.loopingcall ValueError: Field `health_status_reason[api]' cannot be None The timeout is successfully caught by exception handling in this part of the code. However, it is thrown as a type MaxRetryError exception, which does not have body or message attrs. E.g. MaxRetryError("HTTPSConnectionPool(host='115.146.81.72', port=6443): Max retries exceeded with url: /healthz (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 110] ETIMEDOUT',))",) This means health_status_reason will be a dict like `{'api': None}`. Saving this using oslo_versionedobjects will throw an ValueError, because althought the dict itself is corced as a `Dict(default=,nullable=True)`, the None value will be coerced as a `String(default=,nullable=False)` and that is not nullable. Task: 38316 Change-Id: I8fd8d363284b06cf0bfba45d5845ba8687a2c783 (cherry picked from commit 30436350afb88dc3e62af872aa2b39cf439e8eae) (cherry picked from commit 62c5de074391c3922504f0e185ec0c9d4af633bd) --- magnum/drivers/common/k8s_monitor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/magnum/drivers/common/k8s_monitor.py b/magnum/drivers/common/k8s_monitor.py index 7739569345..fe188418c2 100644 --- a/magnum/drivers/common/k8s_monitor.py +++ b/magnum/drivers/common/k8s_monitor.py @@ -233,6 +233,7 @@ class K8sMonitor(monitors.MonitorBase): if not api_status: api_status = (getattr(exp_api, 'body', None) or getattr(exp_api, 'message', None)) - health_status_reason['api'] = api_status + if api_status is not None: + health_status_reason['api'] = api_status return health_status, health_status_reason