Heat notifier: notify alarm without Octavia tags

This patch improves heat notifier by removing the hard requirement for
Octavia tags. When Aodh is deployed in an environment that Octavia
doens't support tags yet(before stable/stein), heat notifier will do
resource search by the Octavia member ID.

Note:

Both approaches need specific Heat template definition:

1. Use Octavia tags.

    pool_member:
      type: OS::Octavia::PoolMember
      properties:
        address: {get_attr: [server, first_address]}
        pool: {get_param: pool_id}
        protocol_port: {get_param: webserver_port}
        tags: [{get_param: "OS::stack_id"}]

2. Resource search.

    outputs:
      OS::stack_id:
        value: { get_resource: pool_member }

Change-Id: Ic2328aa907d23c6b6f827661a3fb3cb26f8496fd
This commit is contained in:
Lingxian Kong 2019-08-09 15:15:11 +12:00
parent 4b93caca42
commit e2d2ebf44e
2 changed files with 99 additions and 34 deletions

View File

@ -39,12 +39,21 @@ class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
member unhealthy, then update Heat stack in place. In order to do that, the
notifier needs to know:
- Heat stack ID.
- Heat top/root stack ID.
- Heat autoscaling group ID.
- The failed Octavia pool members.
The resource ID in the autoscaling group is saved in the Octavia member
tags. So, only Octavia stable/stein or later versions are supported.
In order to find which autoscaling group member that the failed pool member
belongs to, there are two ways supported(both ways require specific
definition in the Heat template):
1. The autoscaling group member resource ID is saved in the Octavia member
tag, the user should define that using 'tags' property of the
OS::Octavia::PoolMember resource. So, only Octavia stable/stein or later
versions are supported.
2. User customizes the autoscaling group member resource identifier
according to
https://docs.openstack.org/heat/latest/template_guide/composition.html#making-your-template-resource-more-transparent
"""
def __init__(self, conf):
@ -69,46 +78,61 @@ class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
trust_id = action.username
stack_id = reason_data.get("stack_id")
asg_id = reason_data.get("asg_id")
unhealthy_members = reason_data.get("unhealthy_members", [])
unhealthy_resources = []
if not stack_id or not asg_id:
LOG.warning(
LOG.error(
"stack_id and asg_id must exist to notify alarm %s", alarm_id
)
return
resources = []
unhealthy_members = reason_data.get("unhealthy_members", [])
heat_client = aodh_keystone.get_heat_client_from_trust(
self.conf, trust_id
)
for member in unhealthy_members:
for tag in member.get("tags", []):
if uuidutils.is_uuid_like(tag):
resources.append(tag)
unhealthy_resources.append(tag)
if resources:
try:
heat_client = aodh_keystone.get_heat_client_from_trust(
self.conf, trust_id
if not unhealthy_resources:
# Fall back to search resource by the pool member ID.
for member in unhealthy_members:
target_resources = heat_client.resources.list(
stack_id, nested_depth=3, filters={"id": member["id"]})
if len(target_resources) > 0:
# There should be only one item.
unhealthy_resources.append(
target_resources[0].resource_name)
# If we still can't find expected resources, do nothing.
if not unhealthy_resources:
LOG.warning("No unhealthy resource found for the alarm %s",
alarm_id)
return
try:
for res in unhealthy_resources:
heat_client.resources.mark_unhealthy(
asg_id,
res,
True,
"unhealthy load balancer member"
)
LOG.info(
"Heat resource %(resource_id)s is marked as unhealthy "
"for alarm %(alarm_id)s",
{"resource_id": res, "alarm_id": alarm_id}
)
for res in resources:
heat_client.resources.mark_unhealthy(
asg_id,
res,
True,
"unhealthy load balancer member"
)
LOG.info(
"Heat resource %(resource_id)s is marked as unhealthy "
"for alarm %(alarm_id)s",
{"resource_id": res, "alarm_id": alarm_id}
)
heat_client.stacks.update(stack_id, existing=True)
LOG.info(
"Heat stack %(stack_id)s is updated for alarm "
"%(alarm_id)s",
{"stack_id": stack_id, "alarm_id": alarm_id}
)
except Exception as e:
LOG.exception("Failed to communicate with Heat service, "
"error: %s", six.text_type(e))
heat_client.stacks.update(stack_id, existing=True)
LOG.info(
"Heat stack %(stack_id)s is updated for alarm "
"%(alarm_id)s",
{"stack_id": stack_id, "alarm_id": alarm_id}
)
except Exception as e:
LOG.exception("Failed to communicate with Heat service for alarm "
"%s, error: %s",
alarm_id, six.text_type(e))

View File

@ -21,7 +21,7 @@ from aodh.tests.unit.notifier import base
class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
def test_notify(self, mock_heatclient):
def test_notify_with_tags(self, mock_heatclient):
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
alarm_id = "fake_alarm_id"
alarm_name = "fake_alarm_name"
@ -55,6 +55,47 @@ class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
"fake_stack_id", existing=True
)
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
def test_notify_without_tags(self, mock_heatclient):
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
alarm_id = "fake_alarm_id"
alarm_name = "fake_alarm_name"
severity = "low"
previous = "ok"
current = "alarm"
reason = "no good reason"
reason_data = {
"stack_id": "fake_stack_id",
"asg_id": "fake_asg_id",
"unhealthy_members": [
{"id": "3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"}
]
}
class FakeResource(object):
def __init__(self, resource_name):
self.resource_name = resource_name
mock_client = mock_heatclient.return_value
mock_client.resources.list.return_value = [
FakeResource("fake_resource_name")
]
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
notifier.notify(action, alarm_id, alarm_name, severity, previous,
current, reason, reason_data)
mock_heatclient.assert_called_once_with(self.conf, "fake_trust_id")
mock_client.resources.mark_unhealthy.assert_called_once_with(
"fake_asg_id",
"fake_resource_name",
True,
"unhealthy load balancer member"
)
mock_client.stacks.update.assert_called_once_with(
"fake_stack_id", existing=True
)
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
def test_notify_stack_id_missing(self, mock_heatclient):
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")