Switch partitioned alarm evaluation to a hash-based approach
Short version: make use of the new distributed workload partitioning utilities in Ceilometer to simplify the alarm evaluation partitioning. Code is intentionally non-consolidated to enable easy deletion of 'singleton' and 'partitioned' services in the Kilo cycle. Longer version: The assignment of alarms to individual partitioned alarm evaluators now follows the same pattern as the division of resources between scaled-out central agents. The evaluators each join a tooz group and emit a periodic heartbeat to tooz. Tooz provides distributed group membership information. Thus the set of evaluators share minimal knowledge, but this is sufficient to guide a hash-based approach to determining whether an individual alarm UUID falls under the responsibility of an individual evaluator. The current RPC-fanout-based presence reporting and the master/slave division of responsibilities can be dropped in the next cycle. Also the rebalancing logic when a certain threshold of alarm deletion is crossed will no longer be required. DocImpact Change-Id: Ica8dae569f9ff1c2f8fe58be6ae2def66be0da54 Implements: blueprint hash-based-alarm-partitioning
This commit is contained in:
parent
9a2f8618de
commit
3571a607f2
@ -32,7 +32,10 @@ OPTS = [
|
|||||||
cfg.StrOpt('partition_rpc_topic',
|
cfg.StrOpt('partition_rpc_topic',
|
||||||
default='alarm_partition_coordination',
|
default='alarm_partition_coordination',
|
||||||
help='The topic that ceilometer uses for alarm partition '
|
help='The topic that ceilometer uses for alarm partition '
|
||||||
'coordination messages.'),
|
'coordination messages. DEPRECATED: RPC-based partitioned'
|
||||||
|
'alarm evaluation service will be removed in Kilo in '
|
||||||
|
'favour of the default alarm evaluation service using '
|
||||||
|
'tooz for partitioning.'),
|
||||||
]
|
]
|
||||||
|
|
||||||
cfg.CONF.register_opts(OPTS, group='alarm')
|
cfg.CONF.register_opts(OPTS, group='alarm')
|
||||||
|
@ -25,8 +25,9 @@ from oslo.utils import netutils
|
|||||||
import six
|
import six
|
||||||
from stevedore import extension
|
from stevedore import extension
|
||||||
|
|
||||||
from ceilometer.alarm.partition import coordination
|
from ceilometer.alarm.partition import coordination as alarm_coordination
|
||||||
from ceilometer.alarm import rpc as rpc_alarm
|
from ceilometer.alarm import rpc as rpc_alarm
|
||||||
|
from ceilometer import coordination as coordination
|
||||||
from ceilometer import messaging
|
from ceilometer import messaging
|
||||||
from ceilometer.openstack.common.gettextutils import _
|
from ceilometer.openstack.common.gettextutils import _
|
||||||
from ceilometer.openstack.common import log
|
from ceilometer.openstack.common import log
|
||||||
@ -48,6 +49,8 @@ cfg.CONF.import_opt('notifier_rpc_topic', 'ceilometer.alarm.rpc',
|
|||||||
group='alarm')
|
group='alarm')
|
||||||
cfg.CONF.import_opt('partition_rpc_topic', 'ceilometer.alarm.rpc',
|
cfg.CONF.import_opt('partition_rpc_topic', 'ceilometer.alarm.rpc',
|
||||||
group='alarm')
|
group='alarm')
|
||||||
|
cfg.CONF.import_opt('heartbeat', 'ceilometer.coordination',
|
||||||
|
group='coordination')
|
||||||
|
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
@ -109,6 +112,46 @@ class AlarmService(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@six.add_metaclass(abc.ABCMeta)
|
||||||
|
class AlarmEvaluationService(AlarmService, os_service.Service):
|
||||||
|
|
||||||
|
PARTITIONING_GROUP_NAME = "alarm_evaluator"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(AlarmEvaluationService, self).__init__()
|
||||||
|
self._load_evaluators()
|
||||||
|
self.api_client = None
|
||||||
|
self.partition_coordinator = coordination.PartitionCoordinator()
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
super(AlarmEvaluationService, self).start()
|
||||||
|
self.partition_coordinator.start()
|
||||||
|
self.partition_coordinator.join_group(self.PARTITIONING_GROUP_NAME)
|
||||||
|
|
||||||
|
# allow time for coordination if necessary
|
||||||
|
delay_start = self.partition_coordinator.is_active()
|
||||||
|
|
||||||
|
if self.evaluators:
|
||||||
|
interval = cfg.CONF.alarm.evaluation_interval
|
||||||
|
self.tg.add_timer(
|
||||||
|
interval,
|
||||||
|
self._evaluate_assigned_alarms,
|
||||||
|
initial_delay=interval if delay_start else None)
|
||||||
|
if self.partition_coordinator.is_active():
|
||||||
|
heartbeat_interval = min(cfg.CONF.coordination.heartbeat,
|
||||||
|
cfg.CONF.alarm.evaluation_interval / 4)
|
||||||
|
self.tg.add_timer(heartbeat_interval,
|
||||||
|
self.partition_coordinator.heartbeat)
|
||||||
|
# Add a dummy thread to have wait() working
|
||||||
|
self.tg.add_timer(604800, lambda: None)
|
||||||
|
|
||||||
|
def _assigned_alarms(self):
|
||||||
|
all_alarms = self._client.alarms.list(q=[{'field': 'enabled',
|
||||||
|
'value': True}])
|
||||||
|
return self.partition_coordinator.extract_my_subset(
|
||||||
|
self.PARTITIONING_GROUP_NAME, all_alarms)
|
||||||
|
|
||||||
|
|
||||||
class SingletonAlarmService(AlarmService, os_service.Service):
|
class SingletonAlarmService(AlarmService, os_service.Service):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -142,7 +185,7 @@ class PartitionedAlarmService(AlarmService, os_service.Service):
|
|||||||
|
|
||||||
self._load_evaluators()
|
self._load_evaluators()
|
||||||
self.api_client = None
|
self.api_client = None
|
||||||
self.partition_coordinator = coordination.PartitionCoordinator()
|
self.partition_coordinator = alarm_coordination.PartitionCoordinator()
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
super(PartitionedAlarmService, self).start()
|
super(PartitionedAlarmService, self).start()
|
||||||
|
@ -24,8 +24,12 @@ from ceilometer import service
|
|||||||
|
|
||||||
|
|
||||||
OPTS = [
|
OPTS = [
|
||||||
cfg.StrOpt('evaluation_service', default='singleton',
|
cfg.StrOpt('evaluation_service', default='default',
|
||||||
help='Driver to use for alarm evaluation service.'),
|
help='Driver to use for alarm evaluation service. DEPRECATED: '
|
||||||
|
'"singleton" and "partitioned" alarm evaluator '
|
||||||
|
'services will be removed in Kilo in favour of the '
|
||||||
|
'default alarm evaluation service using tooz for '
|
||||||
|
'partitioning.'),
|
||||||
]
|
]
|
||||||
|
|
||||||
cfg.CONF.register_opts(OPTS, group='alarm')
|
cfg.CONF.register_opts(OPTS, group='alarm')
|
||||||
|
143
ceilometer/tests/alarm/test_alarm_svc.py
Normal file
143
ceilometer/tests/alarm/test_alarm_svc.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2013 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# Author: Eoghan Glynn <eglynn@redhat.com>
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
"""Tests for ceilometer.alarm.service.SingletonAlarmService.
|
||||||
|
"""
|
||||||
|
import mock
|
||||||
|
from oslo.config import fixture as fixture_config
|
||||||
|
from stevedore import extension
|
||||||
|
|
||||||
|
from ceilometer.alarm import service
|
||||||
|
from ceilometer.tests import base as tests_base
|
||||||
|
|
||||||
|
|
||||||
|
class TestAlarmEvaluationService(tests_base.BaseTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
super(TestAlarmEvaluationService, self).setUp()
|
||||||
|
self.CONF = self.useFixture(fixture_config.Config()).conf
|
||||||
|
self.setup_messaging(self.CONF)
|
||||||
|
|
||||||
|
self.threshold_eval = mock.Mock()
|
||||||
|
self.evaluators = extension.ExtensionManager.make_test_instance(
|
||||||
|
[
|
||||||
|
extension.Extension(
|
||||||
|
'threshold',
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
self.threshold_eval),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
self.api_client = mock.MagicMock()
|
||||||
|
self.svc = service.AlarmEvaluationService()
|
||||||
|
self.svc.tg = mock.Mock()
|
||||||
|
self.svc.partition_coordinator = mock.MagicMock()
|
||||||
|
p_coord = self.svc.partition_coordinator
|
||||||
|
p_coord.extract_my_subset.side_effect = lambda _, x: x
|
||||||
|
self.svc.evaluators = self.evaluators
|
||||||
|
self.svc.supported_evaluators = ['threshold']
|
||||||
|
|
||||||
|
def _do_test_start(self, test_interval=120,
|
||||||
|
coordination_heartbeat=1.0,
|
||||||
|
coordination_active=False):
|
||||||
|
self.CONF.set_override('evaluation_interval',
|
||||||
|
test_interval,
|
||||||
|
group='alarm')
|
||||||
|
self.CONF.set_override('heartbeat',
|
||||||
|
coordination_heartbeat,
|
||||||
|
group='coordination')
|
||||||
|
with mock.patch('ceilometerclient.client.get_client',
|
||||||
|
return_value=self.api_client):
|
||||||
|
p_coord_mock = self.svc.partition_coordinator
|
||||||
|
p_coord_mock.is_active.return_value = coordination_active
|
||||||
|
|
||||||
|
self.svc.start()
|
||||||
|
self.svc.partition_coordinator.start.assert_called_once_with()
|
||||||
|
self.svc.partition_coordinator.join_group.assert_called_once_with(
|
||||||
|
self.svc.PARTITIONING_GROUP_NAME)
|
||||||
|
|
||||||
|
initial_delay = test_interval if coordination_active else None
|
||||||
|
expected = [
|
||||||
|
mock.call(test_interval,
|
||||||
|
self.svc._evaluate_assigned_alarms,
|
||||||
|
initial_delay=initial_delay),
|
||||||
|
mock.call(604800, mock.ANY),
|
||||||
|
]
|
||||||
|
if coordination_active:
|
||||||
|
hb_interval = min(coordination_heartbeat, test_interval / 4)
|
||||||
|
hb_call = mock.call(hb_interval,
|
||||||
|
self.svc.partition_coordinator.heartbeat)
|
||||||
|
expected.insert(1, hb_call)
|
||||||
|
actual = self.svc.tg.add_timer.call_args_list
|
||||||
|
self.assertEqual(expected, actual)
|
||||||
|
|
||||||
|
def test_start_singleton(self):
|
||||||
|
self._do_test_start(coordination_active=False)
|
||||||
|
|
||||||
|
def test_start_coordinated(self):
|
||||||
|
self._do_test_start(coordination_active=True)
|
||||||
|
|
||||||
|
def test_start_coordinated_high_hb_interval(self):
|
||||||
|
self._do_test_start(coordination_active=True, test_interval=10,
|
||||||
|
coordination_heartbeat=5)
|
||||||
|
|
||||||
|
def test_evaluation_cycle(self):
|
||||||
|
alarm = mock.Mock(type='threshold')
|
||||||
|
self.api_client.alarms.list.return_value = [alarm]
|
||||||
|
with mock.patch('ceilometerclient.client.get_client',
|
||||||
|
return_value=self.api_client):
|
||||||
|
p_coord_mock = self.svc.partition_coordinator
|
||||||
|
p_coord_mock.extract_my_subset.return_value = [alarm]
|
||||||
|
|
||||||
|
self.svc._evaluate_assigned_alarms()
|
||||||
|
|
||||||
|
p_coord_mock.extract_my_subset.assert_called_once_with(
|
||||||
|
self.svc.PARTITIONING_GROUP_NAME, [alarm])
|
||||||
|
self.threshold_eval.evaluate.assert_called_once_with(alarm)
|
||||||
|
|
||||||
|
def test_unknown_extension_skipped(self):
|
||||||
|
alarms = [
|
||||||
|
mock.Mock(type='not_existing_type'),
|
||||||
|
mock.Mock(type='threshold')
|
||||||
|
]
|
||||||
|
|
||||||
|
self.api_client.alarms.list.return_value = alarms
|
||||||
|
with mock.patch('ceilometerclient.client.get_client',
|
||||||
|
return_value=self.api_client):
|
||||||
|
self.svc.start()
|
||||||
|
self.svc._evaluate_assigned_alarms()
|
||||||
|
self.threshold_eval.evaluate.assert_called_once_with(alarms[1])
|
||||||
|
|
||||||
|
def test_singleton_endpoint_types(self):
|
||||||
|
endpoint_types = ["internalURL", "publicURL"]
|
||||||
|
for endpoint_type in endpoint_types:
|
||||||
|
self.CONF.set_override('os_endpoint_type',
|
||||||
|
endpoint_type,
|
||||||
|
group='service_credentials')
|
||||||
|
with mock.patch('ceilometerclient.client.get_client') as client:
|
||||||
|
self.svc.api_client = None
|
||||||
|
self.svc._evaluate_assigned_alarms()
|
||||||
|
conf = self.CONF.service_credentials
|
||||||
|
expected = [mock.call(2,
|
||||||
|
os_auth_url=conf.os_auth_url,
|
||||||
|
os_region_name=conf.os_region_name,
|
||||||
|
os_tenant_name=conf.os_tenant_name,
|
||||||
|
os_password=conf.os_password,
|
||||||
|
os_username=conf.os_username,
|
||||||
|
os_cacert=conf.os_cacert,
|
||||||
|
os_endpoint_type=conf.os_endpoint_type,
|
||||||
|
insecure=conf.insecure)]
|
||||||
|
actual = client.call_args_list
|
||||||
|
self.assertEqual(expected, actual)
|
@ -130,7 +130,10 @@ class BinAlarmEvaluatorServiceTestCase(base.BaseTestCase):
|
|||||||
os.remove(self.tempfile)
|
os.remove(self.tempfile)
|
||||||
|
|
||||||
def test_default_config(self):
|
def test_default_config(self):
|
||||||
self._do_test(None, "SingletonAlarmService")
|
self._do_test(None, "AlarmEvaluationService")
|
||||||
|
|
||||||
|
def test_singleton_driver(self):
|
||||||
|
self._do_test('singleton', "SingletonAlarmService")
|
||||||
|
|
||||||
def test_backward_compat(self):
|
def test_backward_compat(self):
|
||||||
self._do_test("ceilometer.alarm.service.PartitionedAlarmService",
|
self._do_test("ceilometer.alarm.service.PartitionedAlarmService",
|
||||||
|
@ -225,6 +225,7 @@ ceilometer.alarm.evaluator =
|
|||||||
combination = ceilometer.alarm.evaluator.combination:CombinationEvaluator
|
combination = ceilometer.alarm.evaluator.combination:CombinationEvaluator
|
||||||
|
|
||||||
ceilometer.alarm.evaluator_service =
|
ceilometer.alarm.evaluator_service =
|
||||||
|
default = ceilometer.alarm.service:AlarmEvaluationService
|
||||||
singleton = ceilometer.alarm.service:SingletonAlarmService
|
singleton = ceilometer.alarm.service:SingletonAlarmService
|
||||||
partitioned = ceilometer.alarm.service:PartitionedAlarmService
|
partitioned = ceilometer.alarm.service:PartitionedAlarmService
|
||||||
# NOTE(sileht): for backward compatibility
|
# NOTE(sileht): for backward compatibility
|
||||||
|
Loading…
Reference in New Issue
Block a user