From baf5356a4fb61590a95f64a63c0dcabfebb3baaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Suchomel?= Date: Tue, 9 Apr 2019 10:37:46 +0200 Subject: [PATCH] Use nova's ping method to find out if the service is alive Currently there is fake rpc call "pod_health_probe_method_ignore_errors" that is passed to the service, just to find out if it is responding. Because such method does not exist, it is needed to catch and handle the exception that is inevitably thrown by the service. While this is technically working correctly, the exceptions pollute the log files and make it harder for user to see possible real errors. This is how the error looks like: ERROR oslo_messaging.rpc.server [-] Exception during message handling: oslo_messaging.rpc.dispatcher.UnsupportedVersion: Endpoint does not support RPC version 1.0. Attempted method: pod_health_probe_method_ignore_errors ERROR oslo_messaging.rpc.server Traceback (most recent call last): ERROR oslo_messaging.rpc.server File "/var/lib/openstack/lib/python3.6/site-packages/oslo_messaging/rpc/server.py", line 163, in _process_incoming ERROR oslo_messaging.rpc.server res = self.dispatcher.dispatch(message) ERROR oslo_messaging.rpc.server File "/var/lib/openstack/lib/python3.6/site-packages/oslo_messaging/rpc/dispatcher.py", line 276, in dispatch ERROR oslo_messaging.rpc.server raise UnsupportedVersion(version, method=method) ERROR oslo_messaging.rpc.server oslo_messaging.rpc.dispatcher.UnsupportedVersion: Endpoint does not support RPC version 1.0. Attempted method: pod_health_probe_method_ignore_errors This situation is new since https://review.openstack.org/#/c/639711/ which (correctly) increased the default level of logging. Before 639711 error messages from oslo (both real and ones that could be ignored) were not present in nova logs at all. Fortunatelly, nova's BaseAPI class provides 'ping' method that is can be used for this basic purpose by all nova components. Change-Id: I0062e74bed399206becb8d9e00f9ec805da864a3 --- nova/templates/bin/_health-probe.py.tpl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nova/templates/bin/_health-probe.py.tpl b/nova/templates/bin/_health-probe.py.tpl index 6434e45c9..4c1aa453f 100644 --- a/nova/templates/bin/_health-probe.py.tpl +++ b/nova/templates/bin/_health-probe.py.tpl @@ -17,8 +17,8 @@ """ Health probe script for OpenStack service that uses RPC/unix domain socket for communication. Check's the RPC tcp socket status on the process and send -message to service through rpc call method and expects a reply. It is expected -to receive failure from the service's RPC server as the method does not exist. +message to service through rpc call method and expects a reply. +Use nova's ping method that is designed just for such simple purpose. Script returns failure to Kubernetes only when a. TCP socket for the RPC communication are not established. @@ -28,7 +28,7 @@ Script returns failure to Kubernetes only when sys.stderr.write() writes to pod's events on failures. Usage example for Nova Compute: -# python health-probe-rpc.py --config-file /etc/nova/nova.conf \ +# python health-probe.py --config-file /etc/nova/nova.conf \ # --service-queue-name compute """ @@ -50,12 +50,15 @@ def check_service_status(transport): """Verify service status. Return success if service consumes message""" try: target = oslo_messaging.Target(topic=cfg.CONF.service_queue_name, - server=socket.gethostname()) + server=socket.gethostname(), + namespace='baseapi', + version="1.1") client = oslo_messaging.RPCClient(transport, target, timeout=60, retry=2) client.call(context.RequestContext(), - 'pod_health_probe_method_ignore_errors') + 'ping', + arg=None) except oslo_messaging.exceptions.MessageDeliveryFailure: # Log to pod events sys.stderr.write("Health probe unable to reach message bus")