Merge "Support Heat auto-healing notifier"
This commit is contained in:
commit
c6f07bf92d
|
@ -15,6 +15,7 @@
|
|||
|
||||
import os
|
||||
|
||||
from heatclient import client as heatclient
|
||||
from keystoneauth1 import exceptions as ka_exception
|
||||
from keystoneauth1.identity.generic import password
|
||||
from keystoneauth1 import loading as ka_loading
|
||||
|
@ -93,6 +94,19 @@ def url_for(conf, **kwargs):
|
|||
return sess.get_endpoint(**kwargs)
|
||||
|
||||
|
||||
def get_heat_client_from_trust(conf, trust_id):
|
||||
ks_client = get_trusted_client(conf, trust_id)
|
||||
sess = ks_client.session
|
||||
|
||||
endpoint = sess.get_endpoint(
|
||||
service_type='orchestration',
|
||||
interface="internal",
|
||||
region_name=conf.service_credentials.region_name
|
||||
)
|
||||
|
||||
return heatclient.Client("1", endpoint=endpoint, session=sess)
|
||||
|
||||
|
||||
OPTS = [
|
||||
cfg.StrOpt('region-name',
|
||||
default=os.environ.get('OS_REGION_NAME'),
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
# Copyright 2019 Catalyst Cloud Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from oslo_log import log
|
||||
from oslo_utils import uuidutils
|
||||
import six
|
||||
|
||||
from aodh import keystone_client as aodh_keystone
|
||||
from aodh import notifier
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
|
||||
"""Heat autohealing notifier.
|
||||
|
||||
The auto-healing notifier works together with loadbalancer_member_health
|
||||
evaluator.
|
||||
|
||||
Presumably, the end user defines a Heat template which contains an
|
||||
autoscaling group and all the members in the group are joined in an Octavia
|
||||
load balancer in order to expose service to the outside, so that when the
|
||||
stack scales up or scales down, Heat makes sure the new members are joining
|
||||
the load balancer automatically and the old members are removed.
|
||||
|
||||
However, this notifier deals with the situation that when some member
|
||||
fails, the stack could be recovered by marking the given autoscaling group
|
||||
member unhealthy, then update Heat stack in place. In order to do that, the
|
||||
notifier needs to know:
|
||||
|
||||
- Heat stack ID.
|
||||
- Heat autoscaling group ID.
|
||||
- The failed Octavia pool members.
|
||||
|
||||
The resource ID in the autoscaling group is saved in the Octavia member
|
||||
tags. So, only Octavia stable/stein or later versions are supported.
|
||||
"""
|
||||
|
||||
def __init__(self, conf):
|
||||
super(TrustHeatAlarmNotifier, self).__init__(conf)
|
||||
self.conf = conf
|
||||
|
||||
def notify(self, action, alarm_id, alarm_name, severity, previous, current,
|
||||
reason, reason_data):
|
||||
LOG.info(
|
||||
"Notifying alarm %(alarm_name)s %(alarm_id)s of %(severity)s "
|
||||
"priority from %(previous)s to %(current)s with action %(action)s"
|
||||
" because %(reason)s." %
|
||||
{'alarm_name': alarm_name,
|
||||
'alarm_id': alarm_id,
|
||||
'severity': severity,
|
||||
'previous': previous,
|
||||
'current': current,
|
||||
'action': action.geturl(),
|
||||
'reason': reason}
|
||||
)
|
||||
|
||||
trust_id = action.username
|
||||
stack_id = reason_data.get("stack_id")
|
||||
asg_id = reason_data.get("asg_id")
|
||||
|
||||
if not stack_id or not asg_id:
|
||||
LOG.warning(
|
||||
"stack_id and asg_id must exist to notify alarm %s", alarm_id
|
||||
)
|
||||
return
|
||||
|
||||
resources = []
|
||||
unhealthy_members = reason_data.get("unhealthy_members", [])
|
||||
|
||||
for member in unhealthy_members:
|
||||
for tag in member.get("tags", []):
|
||||
if uuidutils.is_uuid_like(tag):
|
||||
resources.append(tag)
|
||||
|
||||
if resources:
|
||||
try:
|
||||
heat_client = aodh_keystone.get_heat_client_from_trust(
|
||||
self.conf, trust_id
|
||||
)
|
||||
|
||||
for res in resources:
|
||||
heat_client.resources.mark_unhealthy(
|
||||
asg_id,
|
||||
res,
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
LOG.info(
|
||||
"Heat resource %(resource_id)s is marked as unhealthy "
|
||||
"for alarm %(alarm_id)s",
|
||||
{"resource_id": res, "alarm_id": alarm_id}
|
||||
)
|
||||
|
||||
heat_client.stacks.update(stack_id, existing=True)
|
||||
LOG.info(
|
||||
"Heat stack %(stack_id)s is updated for alarm "
|
||||
"%(alarm_id)s",
|
||||
{"stack_id": stack_id, "alarm_id": alarm_id}
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.exception("Failed to communicate with Heat service, "
|
||||
"error: %s", six.text_type(e))
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright 2019 Catalyst Cloud Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from oslo_config import fixture
|
||||
from oslotest import base
|
||||
|
||||
from aodh import service
|
||||
|
||||
|
||||
class TestNotifierBase(base.BaseTestCase):
|
||||
def setUp(self):
|
||||
super(TestNotifierBase, self).setUp()
|
||||
|
||||
conf = service.prepare_service(argv=[], config_files=[])
|
||||
|
||||
self.conf = self.useFixture(fixture.Config(conf)).conf
|
|
@ -0,0 +1,78 @@
|
|||
# Copyright 2019 Catalyst Cloud Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import mock
|
||||
from oslo_utils import netutils
|
||||
|
||||
from aodh.notifier import heat as heat_notifier
|
||||
from aodh.tests.unit.notifier import base
|
||||
|
||||
|
||||
class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
alarm_id = "fake_alarm_id"
|
||||
alarm_name = "fake_alarm_name"
|
||||
severity = "low"
|
||||
previous = "ok"
|
||||
current = "alarm"
|
||||
reason = "no good reason"
|
||||
reason_data = {
|
||||
"stack_id": "fake_stack_id",
|
||||
"asg_id": "fake_asg_id",
|
||||
"unhealthy_members": [
|
||||
{"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]}
|
||||
]
|
||||
}
|
||||
|
||||
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||
current, reason, reason_data)
|
||||
|
||||
mock_heatclient.assert_called_once_with(self.conf, "fake_trust_id")
|
||||
|
||||
mock_client = mock_heatclient.return_value
|
||||
mock_client.resources.mark_unhealthy.assert_called_once_with(
|
||||
"fake_asg_id",
|
||||
"3bd8bc5a-7632-11e9-84cd-00224d6b7bc1",
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
|
||||
mock_client.stacks.update.assert_called_once_with(
|
||||
"fake_stack_id", existing=True
|
||||
)
|
||||
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify_stack_id_missing(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
alarm_id = "fake_alarm_id"
|
||||
alarm_name = "fake_alarm_name"
|
||||
severity = "low"
|
||||
previous = "ok"
|
||||
current = "alarm"
|
||||
reason = "no good reason"
|
||||
reason_data = {
|
||||
"asg_id": "fake_asg_id",
|
||||
"unhealthy_members": [
|
||||
{"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]}
|
||||
]
|
||||
}
|
||||
|
||||
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||
current, reason, reason_data)
|
||||
|
||||
self.assertFalse(mock_heatclient.called)
|
|
@ -0,0 +1,3 @@
|
|||
features:
|
||||
- Added a new notifier(``trust+heat``) that works together with
|
||||
``loadbalancer_member_health`` evaluator for auto-healing purpose.
|
|
@ -37,3 +37,4 @@ keystoneauth1>=2.1
|
|||
debtcollector>=1.2.0 # Apache-2.0
|
||||
python-octaviaclient>=1.8.0
|
||||
python-dateutil # BSD
|
||||
python-heatclient>=1.17.0
|
||||
|
|
|
@ -91,6 +91,7 @@ aodh.notifier =
|
|||
trust+https = aodh.notifier.trust:TrustRestAlarmNotifier
|
||||
zaqar = aodh.notifier.zaqar:ZaqarAlarmNotifier
|
||||
trust+zaqar = aodh.notifier.zaqar:TrustZaqarAlarmNotifier
|
||||
trust+heat = aodh.notifier.heat:TrustHeatAlarmNotifier
|
||||
|
||||
wsgi_scripts =
|
||||
aodh-api = aodh.api.app:build_wsgi_app
|
||||
|
|
Loading…
Reference in New Issue