diff --git a/aodh/keystone_client.py b/aodh/keystone_client.py index 3cf10ca7d..4f3e02e7e 100644 --- a/aodh/keystone_client.py +++ b/aodh/keystone_client.py @@ -15,6 +15,7 @@ import os +from heatclient import client as heatclient from keystoneauth1 import exceptions as ka_exception from keystoneauth1.identity.generic import password from keystoneauth1 import loading as ka_loading @@ -93,6 +94,19 @@ def url_for(conf, **kwargs): return sess.get_endpoint(**kwargs) +def get_heat_client_from_trust(conf, trust_id): + ks_client = get_trusted_client(conf, trust_id) + sess = ks_client.session + + endpoint = sess.get_endpoint( + service_type='orchestration', + interface="internal", + region_name=conf.service_credentials.region_name + ) + + return heatclient.Client("1", endpoint=endpoint, session=sess) + + OPTS = [ cfg.StrOpt('region-name', default=os.environ.get('OS_REGION_NAME'), diff --git a/aodh/notifier/heat.py b/aodh/notifier/heat.py new file mode 100644 index 000000000..157ad968f --- /dev/null +++ b/aodh/notifier/heat.py @@ -0,0 +1,114 @@ +# Copyright 2019 Catalyst Cloud Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from oslo_log import log +from oslo_utils import uuidutils +import six + +from aodh import keystone_client as aodh_keystone +from aodh import notifier + +LOG = log.getLogger(__name__) + + +class TrustHeatAlarmNotifier(notifier.AlarmNotifier): + """Heat autohealing notifier. + + The auto-healing notifier works together with loadbalancer_member_health + evaluator. + + Presumably, the end user defines a Heat template which contains an + autoscaling group and all the members in the group are joined in an Octavia + load balancer in order to expose service to the outside, so that when the + stack scales up or scales down, Heat makes sure the new members are joining + the load balancer automatically and the old members are removed. + + However, this notifier deals with the situation that when some member + fails, the stack could be recovered by marking the given autoscaling group + member unhealthy, then update Heat stack in place. In order to do that, the + notifier needs to know: + + - Heat stack ID. + - Heat autoscaling group ID. + - The failed Octavia pool members. + + The resource ID in the autoscaling group is saved in the Octavia member + tags. So, only Octavia stable/stein or later versions are supported. + """ + + def __init__(self, conf): + super(TrustHeatAlarmNotifier, self).__init__(conf) + self.conf = conf + + def notify(self, action, alarm_id, alarm_name, severity, previous, current, + reason, reason_data): + LOG.info( + "Notifying alarm %(alarm_name)s %(alarm_id)s of %(severity)s " + "priority from %(previous)s to %(current)s with action %(action)s" + " because %(reason)s." % + {'alarm_name': alarm_name, + 'alarm_id': alarm_id, + 'severity': severity, + 'previous': previous, + 'current': current, + 'action': action.geturl(), + 'reason': reason} + ) + + trust_id = action.username + stack_id = reason_data.get("stack_id") + asg_id = reason_data.get("asg_id") + + if not stack_id or not asg_id: + LOG.warning( + "stack_id and asg_id must exist to notify alarm %s", alarm_id + ) + return + + resources = [] + unhealthy_members = reason_data.get("unhealthy_members", []) + + for member in unhealthy_members: + for tag in member.get("tags", []): + if uuidutils.is_uuid_like(tag): + resources.append(tag) + + if resources: + try: + heat_client = aodh_keystone.get_heat_client_from_trust( + self.conf, trust_id + ) + + for res in resources: + heat_client.resources.mark_unhealthy( + asg_id, + res, + True, + "unhealthy load balancer member" + ) + LOG.info( + "Heat resource %(resource_id)s is marked as unhealthy " + "for alarm %(alarm_id)s", + {"resource_id": res, "alarm_id": alarm_id} + ) + + heat_client.stacks.update(stack_id, existing=True) + LOG.info( + "Heat stack %(stack_id)s is updated for alarm " + "%(alarm_id)s", + {"stack_id": stack_id, "alarm_id": alarm_id} + ) + except Exception as e: + LOG.exception("Failed to communicate with Heat service, " + "error: %s", six.text_type(e)) diff --git a/aodh/tests/unit/notifier/__init__.py b/aodh/tests/unit/notifier/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/aodh/tests/unit/notifier/base.py b/aodh/tests/unit/notifier/base.py new file mode 100644 index 000000000..74e87e45e --- /dev/null +++ b/aodh/tests/unit/notifier/base.py @@ -0,0 +1,27 @@ +# Copyright 2019 Catalyst Cloud Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from oslo_config import fixture +from oslotest import base + +from aodh import service + + +class TestNotifierBase(base.BaseTestCase): + def setUp(self): + super(TestNotifierBase, self).setUp() + + conf = service.prepare_service(argv=[], config_files=[]) + + self.conf = self.useFixture(fixture.Config(conf)).conf diff --git a/aodh/tests/unit/notifier/test_heat.py b/aodh/tests/unit/notifier/test_heat.py new file mode 100644 index 000000000..c889e0fb3 --- /dev/null +++ b/aodh/tests/unit/notifier/test_heat.py @@ -0,0 +1,78 @@ +# Copyright 2019 Catalyst Cloud Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +from oslo_utils import netutils + +from aodh.notifier import heat as heat_notifier +from aodh.tests.unit.notifier import base + + +class TestTrustHeatAlarmNotifier(base.TestNotifierBase): + @mock.patch("aodh.keystone_client.get_heat_client_from_trust") + def test_notify(self, mock_heatclient): + action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@") + alarm_id = "fake_alarm_id" + alarm_name = "fake_alarm_name" + severity = "low" + previous = "ok" + current = "alarm" + reason = "no good reason" + reason_data = { + "stack_id": "fake_stack_id", + "asg_id": "fake_asg_id", + "unhealthy_members": [ + {"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]} + ] + } + + notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf) + notifier.notify(action, alarm_id, alarm_name, severity, previous, + current, reason, reason_data) + + mock_heatclient.assert_called_once_with(self.conf, "fake_trust_id") + + mock_client = mock_heatclient.return_value + mock_client.resources.mark_unhealthy.assert_called_once_with( + "fake_asg_id", + "3bd8bc5a-7632-11e9-84cd-00224d6b7bc1", + True, + "unhealthy load balancer member" + ) + + mock_client.stacks.update.assert_called_once_with( + "fake_stack_id", existing=True + ) + + @mock.patch("aodh.keystone_client.get_heat_client_from_trust") + def test_notify_stack_id_missing(self, mock_heatclient): + action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@") + alarm_id = "fake_alarm_id" + alarm_name = "fake_alarm_name" + severity = "low" + previous = "ok" + current = "alarm" + reason = "no good reason" + reason_data = { + "asg_id": "fake_asg_id", + "unhealthy_members": [ + {"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]} + ] + } + + notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf) + notifier.notify(action, alarm_id, alarm_name, severity, previous, + current, reason, reason_data) + + self.assertFalse(mock_heatclient.called) diff --git a/releasenotes/notes/auto-healing-notifier-794b64de776811e9.yaml b/releasenotes/notes/auto-healing-notifier-794b64de776811e9.yaml new file mode 100644 index 000000000..cb84da46d --- /dev/null +++ b/releasenotes/notes/auto-healing-notifier-794b64de776811e9.yaml @@ -0,0 +1,3 @@ +features: + - Added a new notifier(``trust+heat``) that works together with + ``loadbalancer_member_health`` evaluator for auto-healing purpose. diff --git a/requirements.txt b/requirements.txt index 5745eeb2e..8e09a8f3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,3 +37,4 @@ keystoneauth1>=2.1 debtcollector>=1.2.0 # Apache-2.0 python-octaviaclient>=1.8.0 python-dateutil # BSD +python-heatclient>=1.17.0 diff --git a/setup.cfg b/setup.cfg index 62485c63d..9ca36fac3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -91,6 +91,7 @@ aodh.notifier = trust+https = aodh.notifier.trust:TrustRestAlarmNotifier zaqar = aodh.notifier.zaqar:ZaqarAlarmNotifier trust+zaqar = aodh.notifier.zaqar:TrustZaqarAlarmNotifier + trust+heat = aodh.notifier.heat:TrustHeatAlarmNotifier wsgi_scripts = aodh-api = aodh.api.app:build_wsgi_app