From eef66c4128ebdbe8a4815d2f09af6c0af4a36b72 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Wed, 24 Jul 2013 14:46:53 -0400 Subject: [PATCH] [WORKER] Return node status in STATS message The message returned to the statsd daemon will now contain a list of all nodes defined in the haproxy.cfg file and their status. Status is currently as reported from HAProxy (UP/DOWN/no check/etc.) Fields added to the returned JSON response: "nodes": [ { "id": "1234", "status": "UP" } ] The 'id' part of the JSON response will contain either the unique ID of the node (as defined during the UPDATE message), or "serverN" for older configs that haven't been updated to use the node ID. Change-Id: I794b4901f542d922ebccd9ece9d86236584d4b46 --- libra/common/lbstats.py | 8 ++++++++ libra/worker/controller.py | 9 +++++++-- libra/worker/drivers/haproxy/driver.py | 2 +- libra/worker/drivers/haproxy/query.py | 17 +++++++++++++---- libra/worker/drivers/haproxy/ubuntu_services.py | 8 ++++---- 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/libra/common/lbstats.py b/libra/common/lbstats.py index 7b5b7f0e..faee48f7 100644 --- a/libra/common/lbstats.py +++ b/libra/common/lbstats.py @@ -22,6 +22,7 @@ class LBStatistics(object): self.stats = {} self.bytes_out = 0L self.bytes_in = 0L + self.nodes = dict() self.utc_timestamp = datetime.datetime.utcnow() @property @@ -54,3 +55,10 @@ class LBStatistics(object): if not isinstance(value, datetime.datetime): raise TypeError("Must be a datetime.datetime: '%s'" % value) self._utc_ts = value + + def add_node_status(self, node, status): + self.nodes[node] = status + + def node_status_map(self): + """ Return a dictionary, indexed by node ID, of the node status """ + return self.nodes diff --git a/libra/worker/controller.py b/libra/worker/controller.py index 5e82575b..d2c7078e 100644 --- a/libra/worker/controller.py +++ b/libra/worker/controller.py @@ -50,7 +50,6 @@ class LBaaSController(object): return self.msg action = self.msg[self.ACTION_FIELD].upper() - self.logger.info("Requested action: %s" % action) try: if action == 'UPDATE': @@ -353,7 +352,7 @@ class LBaaSController(object): try: # TODO: Do something with the returned statistics - self.driver.get_stats(protocol=None) + stats = self.driver.get_stats() except NotImplementedError: error = "Selected driver does not support STATS action." self.logger.error(error) @@ -368,5 +367,11 @@ class LBaaSController(object): self.msg[self.RESPONSE_FIELD] = self.RESPONSE_FAILURE self.msg[self.ERROR_FIELD] = str(e) else: + node_status = stats.node_status_map() + self.msg['nodes'] = [] + for node in node_status.keys(): + self.msg['nodes'].append({'id': node, + 'status': node_status[node]}) self.msg[self.RESPONSE_FIELD] = self.RESPONSE_SUCCESS + return self.msg diff --git a/libra/worker/drivers/haproxy/driver.py b/libra/worker/drivers/haproxy/driver.py index 88fb247b..12024c02 100644 --- a/libra/worker/drivers/haproxy/driver.py +++ b/libra/worker/drivers/haproxy/driver.py @@ -278,7 +278,7 @@ class HAProxyDriver(LoadBalancerDriver): # restart, otherwise the log file will be kept open and not reappear. self.ossvc.syslog_restart() - def get_stats(self, protocol): + def get_stats(self, protocol=None): return self.ossvc.get_stats(protocol) def archive(self, method, params): diff --git a/libra/worker/drivers/haproxy/query.py b/libra/worker/drivers/haproxy/query.py index a4760d06..f1411043 100644 --- a/libra/worker/drivers/haproxy/query.py +++ b/libra/worker/drivers/haproxy/query.py @@ -71,21 +71,30 @@ class HAProxyQuery(object): list_results = results.split('\n') return list_results - def get_server_status(self, protocol): + def get_server_status(self, protocol=None): """ Get status for each server for a protocol backend. Return a list of tuples containing server name and status. """ - filter_string = protocol.lower() + "-servers" + if protocol: + filter_string = protocol.lower() + "-servers" + results = self.show_stat(object_type=4) # servers only final_results = [] for line in results[1:]: elements = line.split(',') - if elements[0] != filter_string: + if protocol and elements[0] != filter_string: next else: # 1 - server name, 17 - status - final_results.append((elements[1], elements[17])) + # Here we look for the new server name form of "id-NNNN" + # where NNNN is the unique node ID. The old form could + # be "serverX", in which case we leave it alone. + if elements[1][0:3] == "id-": + junk, node_id = elements[1].split('-') + else: + node_id = elements[1] + final_results.append((node_id, elements[17])) return final_results diff --git a/libra/worker/drivers/haproxy/ubuntu_services.py b/libra/worker/drivers/haproxy/ubuntu_services.py index 267381e5..d11b8359 100644 --- a/libra/worker/drivers/haproxy/ubuntu_services.py +++ b/libra/worker/drivers/haproxy/ubuntu_services.py @@ -130,7 +130,7 @@ class UbuntuServices(ServicesBase): self.sudo_rm(self._config_file) self.sudo_rm(self._backup_config) - def get_stats(self, protocol): + def get_stats(self, protocol=None): """ Query HAProxy socket for stats on the given protocol. @@ -155,8 +155,8 @@ class UbuntuServices(ServicesBase): stats = LBStatistics() query = HAProxyQuery('/var/run/haproxy-stats.socket') - # TODO: Do something with the returned results. For now, we are - # basically just treating this as a 'ping' to the process. - query.show_info() + node_status_list = query.get_server_status(protocol) + for node, status in node_status_list: + stats.add_node_status(node, status) return stats