Merge "Support Heat auto-healing notifier"
This commit is contained in:
@@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from heatclient import client as heatclient
|
||||||
from keystoneauth1 import exceptions as ka_exception
|
from keystoneauth1 import exceptions as ka_exception
|
||||||
from keystoneauth1.identity.generic import password
|
from keystoneauth1.identity.generic import password
|
||||||
from keystoneauth1 import loading as ka_loading
|
from keystoneauth1 import loading as ka_loading
|
||||||
@@ -93,6 +94,19 @@ def url_for(conf, **kwargs):
|
|||||||
return sess.get_endpoint(**kwargs)
|
return sess.get_endpoint(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_heat_client_from_trust(conf, trust_id):
|
||||||
|
ks_client = get_trusted_client(conf, trust_id)
|
||||||
|
sess = ks_client.session
|
||||||
|
|
||||||
|
endpoint = sess.get_endpoint(
|
||||||
|
service_type='orchestration',
|
||||||
|
interface="internal",
|
||||||
|
region_name=conf.service_credentials.region_name
|
||||||
|
)
|
||||||
|
|
||||||
|
return heatclient.Client("1", endpoint=endpoint, session=sess)
|
||||||
|
|
||||||
|
|
||||||
OPTS = [
|
OPTS = [
|
||||||
cfg.StrOpt('region-name',
|
cfg.StrOpt('region-name',
|
||||||
default=os.environ.get('OS_REGION_NAME'),
|
default=os.environ.get('OS_REGION_NAME'),
|
||||||
|
|||||||
114
aodh/notifier/heat.py
Normal file
114
aodh/notifier/heat.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
# Copyright 2019 Catalyst Cloud Ltd.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from oslo_log import log
|
||||||
|
from oslo_utils import uuidutils
|
||||||
|
import six
|
||||||
|
|
||||||
|
from aodh import keystone_client as aodh_keystone
|
||||||
|
from aodh import notifier
|
||||||
|
|
||||||
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
|
||||||
|
"""Heat autohealing notifier.
|
||||||
|
|
||||||
|
The auto-healing notifier works together with loadbalancer_member_health
|
||||||
|
evaluator.
|
||||||
|
|
||||||
|
Presumably, the end user defines a Heat template which contains an
|
||||||
|
autoscaling group and all the members in the group are joined in an Octavia
|
||||||
|
load balancer in order to expose service to the outside, so that when the
|
||||||
|
stack scales up or scales down, Heat makes sure the new members are joining
|
||||||
|
the load balancer automatically and the old members are removed.
|
||||||
|
|
||||||
|
However, this notifier deals with the situation that when some member
|
||||||
|
fails, the stack could be recovered by marking the given autoscaling group
|
||||||
|
member unhealthy, then update Heat stack in place. In order to do that, the
|
||||||
|
notifier needs to know:
|
||||||
|
|
||||||
|
- Heat stack ID.
|
||||||
|
- Heat autoscaling group ID.
|
||||||
|
- The failed Octavia pool members.
|
||||||
|
|
||||||
|
The resource ID in the autoscaling group is saved in the Octavia member
|
||||||
|
tags. So, only Octavia stable/stein or later versions are supported.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, conf):
|
||||||
|
super(TrustHeatAlarmNotifier, self).__init__(conf)
|
||||||
|
self.conf = conf
|
||||||
|
|
||||||
|
def notify(self, action, alarm_id, alarm_name, severity, previous, current,
|
||||||
|
reason, reason_data):
|
||||||
|
LOG.info(
|
||||||
|
"Notifying alarm %(alarm_name)s %(alarm_id)s of %(severity)s "
|
||||||
|
"priority from %(previous)s to %(current)s with action %(action)s"
|
||||||
|
" because %(reason)s." %
|
||||||
|
{'alarm_name': alarm_name,
|
||||||
|
'alarm_id': alarm_id,
|
||||||
|
'severity': severity,
|
||||||
|
'previous': previous,
|
||||||
|
'current': current,
|
||||||
|
'action': action.geturl(),
|
||||||
|
'reason': reason}
|
||||||
|
)
|
||||||
|
|
||||||
|
trust_id = action.username
|
||||||
|
stack_id = reason_data.get("stack_id")
|
||||||
|
asg_id = reason_data.get("asg_id")
|
||||||
|
|
||||||
|
if not stack_id or not asg_id:
|
||||||
|
LOG.warning(
|
||||||
|
"stack_id and asg_id must exist to notify alarm %s", alarm_id
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
resources = []
|
||||||
|
unhealthy_members = reason_data.get("unhealthy_members", [])
|
||||||
|
|
||||||
|
for member in unhealthy_members:
|
||||||
|
for tag in member.get("tags", []):
|
||||||
|
if uuidutils.is_uuid_like(tag):
|
||||||
|
resources.append(tag)
|
||||||
|
|
||||||
|
if resources:
|
||||||
|
try:
|
||||||
|
heat_client = aodh_keystone.get_heat_client_from_trust(
|
||||||
|
self.conf, trust_id
|
||||||
|
)
|
||||||
|
|
||||||
|
for res in resources:
|
||||||
|
heat_client.resources.mark_unhealthy(
|
||||||
|
asg_id,
|
||||||
|
res,
|
||||||
|
True,
|
||||||
|
"unhealthy load balancer member"
|
||||||
|
)
|
||||||
|
LOG.info(
|
||||||
|
"Heat resource %(resource_id)s is marked as unhealthy "
|
||||||
|
"for alarm %(alarm_id)s",
|
||||||
|
{"resource_id": res, "alarm_id": alarm_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
heat_client.stacks.update(stack_id, existing=True)
|
||||||
|
LOG.info(
|
||||||
|
"Heat stack %(stack_id)s is updated for alarm "
|
||||||
|
"%(alarm_id)s",
|
||||||
|
{"stack_id": stack_id, "alarm_id": alarm_id}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception("Failed to communicate with Heat service, "
|
||||||
|
"error: %s", six.text_type(e))
|
||||||
0
aodh/tests/unit/notifier/__init__.py
Normal file
0
aodh/tests/unit/notifier/__init__.py
Normal file
27
aodh/tests/unit/notifier/base.py
Normal file
27
aodh/tests/unit/notifier/base.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Copyright 2019 Catalyst Cloud Ltd.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from oslo_config import fixture
|
||||||
|
from oslotest import base
|
||||||
|
|
||||||
|
from aodh import service
|
||||||
|
|
||||||
|
|
||||||
|
class TestNotifierBase(base.BaseTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
super(TestNotifierBase, self).setUp()
|
||||||
|
|
||||||
|
conf = service.prepare_service(argv=[], config_files=[])
|
||||||
|
|
||||||
|
self.conf = self.useFixture(fixture.Config(conf)).conf
|
||||||
78
aodh/tests/unit/notifier/test_heat.py
Normal file
78
aodh/tests/unit/notifier/test_heat.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
# Copyright 2019 Catalyst Cloud Ltd.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import mock
|
||||||
|
from oslo_utils import netutils
|
||||||
|
|
||||||
|
from aodh.notifier import heat as heat_notifier
|
||||||
|
from aodh.tests.unit.notifier import base
|
||||||
|
|
||||||
|
|
||||||
|
class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
|
||||||
|
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||||
|
def test_notify(self, mock_heatclient):
|
||||||
|
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||||
|
alarm_id = "fake_alarm_id"
|
||||||
|
alarm_name = "fake_alarm_name"
|
||||||
|
severity = "low"
|
||||||
|
previous = "ok"
|
||||||
|
current = "alarm"
|
||||||
|
reason = "no good reason"
|
||||||
|
reason_data = {
|
||||||
|
"stack_id": "fake_stack_id",
|
||||||
|
"asg_id": "fake_asg_id",
|
||||||
|
"unhealthy_members": [
|
||||||
|
{"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||||
|
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||||
|
current, reason, reason_data)
|
||||||
|
|
||||||
|
mock_heatclient.assert_called_once_with(self.conf, "fake_trust_id")
|
||||||
|
|
||||||
|
mock_client = mock_heatclient.return_value
|
||||||
|
mock_client.resources.mark_unhealthy.assert_called_once_with(
|
||||||
|
"fake_asg_id",
|
||||||
|
"3bd8bc5a-7632-11e9-84cd-00224d6b7bc1",
|
||||||
|
True,
|
||||||
|
"unhealthy load balancer member"
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client.stacks.update.assert_called_once_with(
|
||||||
|
"fake_stack_id", existing=True
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||||
|
def test_notify_stack_id_missing(self, mock_heatclient):
|
||||||
|
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||||
|
alarm_id = "fake_alarm_id"
|
||||||
|
alarm_name = "fake_alarm_name"
|
||||||
|
severity = "low"
|
||||||
|
previous = "ok"
|
||||||
|
current = "alarm"
|
||||||
|
reason = "no good reason"
|
||||||
|
reason_data = {
|
||||||
|
"asg_id": "fake_asg_id",
|
||||||
|
"unhealthy_members": [
|
||||||
|
{"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||||
|
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||||
|
current, reason, reason_data)
|
||||||
|
|
||||||
|
self.assertFalse(mock_heatclient.called)
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
features:
|
||||||
|
- Added a new notifier(``trust+heat``) that works together with
|
||||||
|
``loadbalancer_member_health`` evaluator for auto-healing purpose.
|
||||||
@@ -37,3 +37,4 @@ keystoneauth1>=2.1
|
|||||||
debtcollector>=1.2.0 # Apache-2.0
|
debtcollector>=1.2.0 # Apache-2.0
|
||||||
python-octaviaclient>=1.8.0
|
python-octaviaclient>=1.8.0
|
||||||
python-dateutil # BSD
|
python-dateutil # BSD
|
||||||
|
python-heatclient>=1.17.0
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ aodh.notifier =
|
|||||||
trust+https = aodh.notifier.trust:TrustRestAlarmNotifier
|
trust+https = aodh.notifier.trust:TrustRestAlarmNotifier
|
||||||
zaqar = aodh.notifier.zaqar:ZaqarAlarmNotifier
|
zaqar = aodh.notifier.zaqar:ZaqarAlarmNotifier
|
||||||
trust+zaqar = aodh.notifier.zaqar:TrustZaqarAlarmNotifier
|
trust+zaqar = aodh.notifier.zaqar:TrustZaqarAlarmNotifier
|
||||||
|
trust+heat = aodh.notifier.heat:TrustHeatAlarmNotifier
|
||||||
|
|
||||||
wsgi_scripts =
|
wsgi_scripts =
|
||||||
aodh-api = aodh.api.app:build_wsgi_app
|
aodh-api = aodh.api.app:build_wsgi_app
|
||||||
|
|||||||
Reference in New Issue
Block a user