Properly handle more states from HAProxy health messages

DRAIN / MAINT are handled now.
Also, transitional UP/DOWN states report correctly now.

Change-Id: I8b2bf54de6f79c98822689e48f6ec0b65c6296c5
Closes-Bug: #1708042
This commit is contained in:
Adam Harwell 2017-07-26 19:35:24 -07:00
parent 4bbc3fa885
commit 12135dcc92
8 changed files with 215 additions and 20 deletions

View File

@ -466,6 +466,8 @@ Operating Status Codes
| ONLINE | - Entity is operating normally |
| | - All pool members are healthy |
+------------+--------------------------------------------------------------+
| DRAINING | - The member is not accepting new connections |
+------------+--------------------------------------------------------------+
| OFFLINE | - Entity is administratively disabled |
+------------+--------------------------------------------------------------+
| DEGRADED | - One or more of the entity's components are in ERROR |

View File

@ -105,7 +105,7 @@ class HAProxyQuery(object):
{<pool-name>: {
'uuid': <uuid>,
'status': 'UP'|'DOWN',
'members': [<name>: 'UP'|'DOWN'] }}
'members': [<name>: 'UP'|'DOWN'|'DRAIN'|'no check'] }}
"""
results = self.show_stat(object_type=6) # servers + pool
@ -114,10 +114,10 @@ class HAProxyQuery(object):
for line in results:
# pxname: pool, svname: server_name, status: status
# All the way up is UP, otherwise call it DOWN
if (line['status'] != consts.UP and
line['status'] != consts.NO_CHECK):
line['status'] = consts.DOWN
# Due to a bug in some versions of HAProxy, DRAIN mode isn't
# calculated correctly, but we can spoof the correct value here.
if line['status'] == consts.UP and line['weight'] == 0:
line['status'] = consts.DRAIN
if line['pxname'] not in final_results:
final_results[line['pxname']] = dict(members={})

View File

@ -105,10 +105,12 @@ ONLINE = 'ONLINE'
OFFLINE = 'OFFLINE'
DEGRADED = 'DEGRADED'
ERROR = 'ERROR'
DRAINING = 'DRAINING'
NO_MONITOR = 'NO_MONITOR'
OPERATING_STATUS = 'operating_status'
PROVISIONING_STATUS = 'provisioning_status'
SUPPORTED_OPERATING_STATUSES = (ONLINE, OFFLINE, DEGRADED, ERROR, NO_MONITOR)
SUPPORTED_OPERATING_STATUSES = (ONLINE, OFFLINE, DEGRADED, ERROR, DRAINING,
NO_MONITOR)
AMPHORA_VM = 'VM'
SUPPORTED_AMPHORA_TYPES = (AMPHORA_VM,)
@ -345,9 +347,14 @@ DOWN = 'DOWN'
HAPROXY_BACKEND_STATUSES = (UP, DOWN)
DRAIN = 'DRAIN'
MAINT = 'MAINT'
NO_CHECK = 'no check'
HAPROXY_MEMBER_STATUSES = (UP, DOWN, NO_CHECK)
# DRAIN = member is weight 0 and is in draining mode
# MAINT = member is downed for maintenance? not sure when this happens
# NO_CHECK = no health monitor is enabled
HAPROXY_MEMBER_STATUSES = (UP, DOWN, DRAIN, MAINT, NO_CHECK)
# Quota Constants
QUOTA_UNLIMITED = -1

View File

@ -58,6 +58,8 @@ class UpdateHealthDb(object):
entity_type, entity_id, entity.operating_status,
new_op_status)
repo.update(session, entity_id, operating_status=new_op_status)
if new_op_status == constants.DRAINING:
new_op_status = constants.ONLINE
message.update({constants.OPERATING_STATUS: new_op_status})
if self.sync_prv_status:
LOG.debug("%s %s provisioning_status %s. Updating db and sending"
@ -167,14 +169,20 @@ class UpdateHealthDb(object):
for member_id, status in members.items():
member_status = None
if status == constants.UP:
# Member status can be "UP" or "UP #/#" (transitional)
if status.startswith(constants.UP):
member_status = constants.ONLINE
elif status == constants.DOWN:
# Member status can be "DOWN" or "DOWN #/#" (transitional)
elif status.startswith(constants.DOWN):
member_status = constants.ERROR
if pool_status == constants.ONLINE:
pool_status = constants.DEGRADED
if lb_status == constants.ONLINE:
lb_status = constants.DEGRADED
elif status == constants.DRAIN:
member_status = constants.DRAINING
elif status == constants.MAINT:
member_status = constants.OFFLINE
elif status == constants.NO_CHECK:
member_status = constants.NO_MONITOR
else:

View File

@ -0,0 +1,34 @@
# Copyright 2017 GoDaddy
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Add DRAINING operating status
Revision ID: 4aeb9e23ad43
Revises: e6672bda93bf
Create Date: 2017-07-27 00:54:07.128617
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '4aeb9e23ad43'
down_revision = 'e6672bda93bf'
def upgrade():
bind = op.get_bind()
md = sa.MetaData()
sa.Table('operating_status', md, autoload=True, autoload_with=bind)
op.bulk_insert(md.tables['operating_status'], [{'name': 'DRAINING'}])

View File

@ -18,6 +18,7 @@ import mock
import six
from octavia.amphorae.backends.utils import haproxy_query as query
from octavia.common import constants
import octavia.tests.unit.base as base
STATS_SOCKET_SAMPLE = (
@ -38,6 +39,10 @@ STATS_SOCKET_SAMPLE = (
"1,5,1,,0,,2,0,,0,L4TOUT,,30000,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n"
"tcp-servers,id-34836,0,0,0,0,,0,0,0,,0,,0,0,0,0,UP,1,1,0,1,1,552,552,,"
"1,5,2,,0,,2,0,,0,L4TOUT,,30001,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n"
"tcp-servers,id-34839,0,0,0,0,,0,0,0,,0,,0,0,0,0,DRAIN,0,1,0,0,0,552,0,,"
"1,5,2,,0,,2,0,,0,L7OK,,30001,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n"
"tcp-servers,id-34842,0,0,0,0,,0,0,0,,0,,0,0,0,0,MAINT,0,1,0,0,0,552,0,,"
"1,5,2,,0,,2,0,,0,L7OK,,30001,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n"
"tcp-servers,BACKEND,0,0,0,0,200,0,0,0,0,0,,0,0,0,0,UP,0,0,0,,1,552,552"
",,1,5,0,,0,,1,0,,0,,,,,,,,,,,,,,0,0,0,0,0,0,-1,,,0,0,0,0,"
)
@ -90,17 +95,19 @@ class QueryTestCase(base.TestCase):
query_mock.return_value = STATS_SOCKET_SAMPLE
self.assertEqual(
{'tcp-servers': {
'status': 'UP',
'status': constants.UP,
'uuid': 'tcp-servers',
'members':
{'id-34833': 'UP',
'id-34836': 'UP'}},
{'id-34833': constants.UP,
'id-34836': constants.UP,
'id-34839': constants.DRAIN,
'id-34842': constants.MAINT}},
'http-servers': {
'status': 'DOWN',
'status': constants.DOWN,
'uuid': 'http-servers',
'members':
{'id-34821': 'DOWN',
'id-34824': 'DOWN'}}},
{'id-34821': constants.DOWN,
'id-34824': constants.DOWN}}},
self.q.get_pool_status()
)

View File

@ -145,6 +145,120 @@ class TestUpdateHealthDb(base.TestCase):
self.hm.update_health(health)
def test_update_health_member_drain(self):
health = {
"id": self.FAKE_UUID_1,
"listeners": {
"listener-id-1": {
"status": constants.OPEN,
"pools": {
"pool-id-1": {
"status": constants.UP,
"members": {"member-id-1": constants.DRAIN}}}}}}
self.mock_session.return_value = 'blah'
self.hm.update_health(health)
self.assertTrue(self.amphora_health_repo.replace.called)
# test listener, member
for listener_id, listener in six.iteritems(
health.get('listeners', {})):
self.listener_repo.update.assert_any_call(
'blah', listener_id, operating_status=constants.ONLINE)
for pool_id, pool in six.iteritems(listener.get('pools', {})):
self.hm.pool_repo.update.assert_any_call(
'blah', pool_id, operating_status=constants.ONLINE)
for member_id, member in six.iteritems(
pool.get('members', {})):
self.member_repo.update.assert_any_call(
'blah', member_id,
operating_status=constants.DRAINING)
self.hm.listener_repo.count.return_value = 2
self.hm.update_health(health)
def test_update_health_member_maint(self):
health = {
"id": self.FAKE_UUID_1,
"listeners": {
"listener-id-1": {
"status": constants.OPEN,
"pools": {
"pool-id-1": {
"status": constants.UP,
"members": {"member-id-1": constants.MAINT}}}}}}
self.mock_session.return_value = 'blah'
self.hm.update_health(health)
self.assertTrue(self.amphora_health_repo.replace.called)
# test listener, member
for listener_id, listener in six.iteritems(
health.get('listeners', {})):
self.listener_repo.update.assert_any_call(
'blah', listener_id, operating_status=constants.ONLINE)
for pool_id, pool in six.iteritems(listener.get('pools', {})):
self.hm.pool_repo.update.assert_any_call(
'blah', pool_id, operating_status=constants.ONLINE)
for member_id, member in six.iteritems(
pool.get('members', {})):
self.member_repo.update.assert_any_call(
'blah', member_id,
operating_status=constants.OFFLINE)
self.hm.listener_repo.count.return_value = 2
self.hm.update_health(health)
def test_update_health_member_unknown(self):
health = {
"id": self.FAKE_UUID_1,
"listeners": {
"listener-id-1": {
"status": constants.OPEN,
"pools": {
"pool-id-1": {
"status": constants.UP,
"members": {"member-id-1": "blah"}}}}}}
self.mock_session.return_value = 'blah'
self.hm.update_health(health)
self.assertTrue(self.amphora_health_repo.replace.called)
# test listener, member
for listener_id, listener in six.iteritems(
health.get('listeners', {})):
self.listener_repo.update.assert_any_call(
'blah', listener_id, operating_status=constants.ONLINE)
for pool_id, pool in six.iteritems(listener.get('pools', {})):
self.hm.pool_repo.update.assert_any_call(
'blah', pool_id, operating_status=constants.ONLINE)
self.assertTrue(not self.member_repo.update.called)
self.hm.listener_repo.count.return_value = 2
self.hm.update_health(health)
def test_update_health_member_down(self):
health = {
@ -334,11 +448,23 @@ class TestUpdateHealthDb(base.TestCase):
}
}
},
"listener-id-4": {"status": "bogus", "pools": {
"pool-id-4": {"status": "bogus",
"members": {"member-id-4": "bogus"}
}
}
"listener-id-4": {
"status": constants.OPEN,
"pools": {
"pool-id-4": {
"status": constants.UP,
"members": {"member-id-4": constants.DRAINING}
}
}
},
"listener-id-5": {
"status": "bogus",
"pools": {
"pool-id-5": {
"status": "bogus",
"members": {"member-id-5": "bogus"}
}
}
}
}
}
@ -358,6 +484,8 @@ class TestUpdateHealthDb(base.TestCase):
'blah', "pool-id-2", operating_status=constants.ONLINE)
self.pool_repo.update.assert_any_call(
'blah', "pool-id-3", operating_status=constants.DEGRADED)
self.pool_repo.update.assert_any_call(
'blah', "pool-id-4", operating_status=constants.ONLINE)
# Test code paths where objects are not found in the database
def test_update_health_not_found(self):

View File

@ -0,0 +1,9 @@
---
fixes:
- |
Some versions of HAProxy incorrectly reported nodes in DRAIN status as
being UP, and Octavia code was written around this incorrect reporting.
This has been fixed in some versions of HAProxy and is now handled
properly in Octavia as well. Now it is possible for members to be in the
status DRAINING. Note that this is masked when statuses are forwarded to
neutron-lbaas in the eventstream, so no compatibility change is necessary.