Implementation of deferred restarts

Add deferred event actions and config.

Change-Id: Ifbb15c0c04117a5a98672b2af4fd7203dae9a18e
This commit is contained in:
Liam Young 2021-03-29 16:04:27 +00:00
parent 19ee453fb7
commit 81c33953f9
13 changed files with 235 additions and 25 deletions

View File

@ -52,3 +52,30 @@ force-boot:
messages, in particular if the cluster received messages after the
unit was shut down.
See https://www.rabbitmq.com/clustering.html#restarting and LP: #1828988
restart-services:
description: |
Restarts services this charm manages.
params:
deferred-only:
type: boolean
default: false
description: |
Restart all deferred services.
services:
type: string
default: ""
description: |
List of services to restart.
run-hooks:
type: boolean
default: true
description: |
Run any hooks which have been deferred.
run-deferred-hooks:
description: |
Run deferable hooks and restart services.
.
NOTE: Service will be restarted as needed irrespective of enable-auto-restarts
show-deferred-events:
descrpition: |
Show the outstanding restarts

View File

@ -34,6 +34,9 @@ def _add_path(path):
_add_path(_root)
_add_path(_hooks)
import charmhelpers.contrib.openstack.deferred_events as deferred_events
import charmhelpers.contrib.openstack.utils as os_utils
from charmhelpers.core.host import (
service_start,
service_stop,
@ -54,6 +57,8 @@ from charmhelpers.core.host import (
cmp_pkgrevno,
)
import rabbitmq_server_relations
from hooks.rabbit_utils import (
ConfigRenderer,
CONFIG_FILES,
@ -214,6 +219,74 @@ def force_boot(args):
return False
def restart(args):
"""Restart services.
:param args: Unused
:type args: List[str]
"""
deferred_only = action_get("deferred-only")
svcs = action_get("services").split()
# Check input
if deferred_only and svcs:
action_fail("Cannot set deferred-only and services")
return
if not (deferred_only or svcs):
action_fail("Please specify deferred-only or services")
return
if action_get('run-hooks'):
_run_deferred_hooks()
if deferred_only:
os_utils.restart_services_action(deferred_only=True)
else:
os_utils.restart_services_action(services=svcs)
assess_status(ConfigRenderer(CONFIG_FILES))
def _run_deferred_hooks():
"""Run supported deferred hooks as needed.
Run supported deferred hooks as needed. If support for deferring a new
hook is added to the charm then this method will need updating.
"""
if not deferred_events.is_restart_permitted():
if 'config-changed' in deferred_events.get_deferred_hooks():
log("Running hook config-changed", level=INFO)
rabbitmq_server_relations.config_changed(
check_deferred_restarts=False)
deferred_events.clear_deferred_hook('config-changed')
if 'amqp-relation-changed' in deferred_events.get_deferred_hooks():
log("Running hook amqp-relation-changed", level=INFO)
# update_clients cycles through amqp relations running
# amqp-relation-changed hook.
rabbitmq_server_relations.update_clients(
check_deferred_restarts=False)
deferred_events.clear_deferred_hook('amqp-relation-changed')
log("Remaining hooks: {}".format(
deferred_events.get_deferred_hooks()),
level=INFO)
def run_deferred_hooks(args):
"""Run deferred hooks.
:param args: Unused
:type args: List[str]
"""
_run_deferred_hooks()
os_utils.restart_services_action(deferred_only=True)
assess_status(ConfigRenderer(CONFIG_FILES))
def show_deferred_events(args):
"""Show the deferred events.
:param args: Unused
:type args: List[str]
"""
os_utils.show_deferred_events_action_helper()
# A dictionary of all the defined actions to callables (which take
# parsed arguments).
ACTIONS = {
@ -225,6 +298,9 @@ ACTIONS = {
"forget-cluster-node": forget_cluster_node,
"list-unconsumed-queues": list_unconsumed_queues,
"force-boot": force_boot,
"restart-services": restart,
"run-deferred-hooks": run_deferred_hooks,
"show-deferred-events": show_deferred_events,
}

1
actions/restart-services Symbolic link
View File

@ -0,0 +1 @@
actions.py

1
actions/run-deferred-hooks Symbolic link
View File

@ -0,0 +1 @@
actions.py

View File

@ -0,0 +1 @@
actions.py

View File

@ -309,3 +309,9 @@ options:
- client-local, Pick the node the client that declares the queue is connected to.
- random, Pick a random node.
This option is only available for RabbitMQ >= 3.6
enable-auto-restarts:
type: boolean
default: True
description: |
Allow the charm and packages to restart services automatically when
required.

View File

@ -24,7 +24,7 @@ import shutil
import socket
import yaml
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from rabbitmq_context import (
RabbitMQSSLContext,
@ -34,6 +34,7 @@ from rabbitmq_context import (
)
from charmhelpers.contrib.charmsupport import nrpe
import charmhelpers.contrib.openstack.deferred_events as deferred_events
from charmhelpers.core.templating import render
from charmhelpers.contrib.openstack.utils import (
@ -930,7 +931,9 @@ def restart_on_change(restart_map, stopstart=False):
return pausable_restart_on_change(
restart_map,
stopstart=stopstart,
pre_restarts_wait_f=cluster_wait
pre_restarts_wait_f=cluster_wait,
can_restart_now_f=deferred_events.check_and_record_restart_request,
post_svc_restart_f=deferred_events.process_svc_restart
)
@ -944,6 +947,7 @@ def assess_status(configs):
@param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect
"""
deferred_events.check_restart_timestamps()
assess_status_func(configs)()
rmq_version = get_upstream_version(VERSION_PACKAGE)
if rmq_version:
@ -983,6 +987,23 @@ def assess_status_func(configs):
# of the first unit.
if state == "waiting":
state = "active"
# Deferred restarts should be managed by _determine_os_workload_status
# but rabbits wlm code needs refactoring to make it consistent with
# other charms as any message returned by _determine_os_workload_status
# is currently dropped on the floor if: state == 'active'
events = defaultdict(set)
for e in deferred_events.get_deferred_events():
events[e.action].add(e.service)
for action, svcs in events.items():
svc_msg = "Services queued for {}: {}".format(
action, ', '.join(sorted(svcs)))
message = "{}. {}".format(message, svc_msg)
deferred_hooks = deferred_events.get_deferred_hooks()
if deferred_hooks:
svc_msg = "Hooks skipped due to disabled auto restarts: {}".format(
', '.join(sorted(deferred_hooks)))
message = "{}. {}".format(message, svc_msg)
status_set(state, message)
return _assess_status_func

View File

@ -41,12 +41,19 @@ from lib.utils import (
chown, chmod,
is_newer,
)
from charmhelpers.contrib.charmsupport import nrpe
from charmhelpers.contrib.hahelpers.cluster import (
is_clustered,
is_elected_leader,
)
from charmhelpers.contrib.openstack.deferred_events import (
configure_deferred_restarts,
get_deferred_restarts,
is_restart_permitted,
)
from charmhelpers.contrib.openstack.utils import (
is_hook_allowed,
is_unit_paused_set,
set_unit_upgrading,
clear_unit_paused,
@ -220,21 +227,48 @@ def configure_amqp(username, vhost, relation_id, admin=False):
return password
def update_clients():
def update_clients(check_deferred_restarts=True):
"""Update amqp client relation hooks
IFF leader node is ready. Client nodes are considered ready once the leader
has already run amqp_changed.
:param check_deferred_events: Whether to check if restarts are
permitted before running hook.
:type check_deferred_events: bool
"""
if check_deferred_restarts and get_deferred_restarts():
log("Not sendinfg client update as a restart is pending.", INFO)
return
if rabbit.leader_node_is_ready() or rabbit.client_node_is_ready():
for rid in relation_ids('amqp'):
for unit in related_units(rid):
amqp_changed(relation_id=rid, remote_unit=unit)
amqp_changed(
relation_id=rid,
remote_unit=unit,
check_deferred_restarts=check_deferred_restarts)
@validate_amqp_config_tracker
@hooks.hook('amqp-relation-changed')
def amqp_changed(relation_id=None, remote_unit=None):
def amqp_changed(relation_id=None, remote_unit=None,
check_deferred_restarts=True):
"""Update amqp relations.
:param relation_id: Relation id to update
:type relation_id: str
:param remote_unit: Remote unit on relation_id to update
:type remote_unit: str
:param check_deferred_events: Whether to check if restarts are
permitted before running hook.
:type check_deferred_events: bool
"""
allowed, reason = is_hook_allowed(
'amqp-relation-changed',
check_deferred_restarts=check_deferred_restarts)
if not allowed:
log(reason, "WARN")
return
singleset = set(['username', 'vhost'])
host_addr = ch_ip.get_relation_ip(
rabbit_net_utils.AMQP_INTERFACE,
@ -460,6 +494,8 @@ def update_cookie(leaders_cookie=None):
if cookie_local == cookie:
log('Cookie already synchronized with peer.')
return
elif not is_restart_permitted():
raise Exception("rabbitmq-server must be restarted but not permitted")
service_stop('rabbitmq-server')
with open(rabbit.COOKIE_PATH, 'wb') as out:
@ -616,12 +652,20 @@ MAN_PLUGIN = 'rabbitmq_management'
@hooks.hook('config-changed')
@rabbit.restart_on_change(rabbit.restart_map())
@harden()
def config_changed():
def config_changed(check_deferred_restarts=True):
"""Run config-chaged hook.
if is_unit_paused_set():
log("Do not run config_changed while unit is paused", "WARNING")
:param check_deferred_events: Whether to check if restarts are
permitted before running hook.
:type check_deferred_events: bool
"""
configure_deferred_restarts(rabbit.services())
allowed, reason = is_hook_allowed(
'config-changed',
check_deferred_restarts=check_deferred_restarts)
if not allowed:
log(reason, "WARN")
return
# Update hosts with this unit's information
cluster_ip = ch_ip.get_relation_ip(
rabbit_net_utils.CLUSTER_INTERFACE,

View File

@ -18,17 +18,20 @@ import os
from charmhelpers.contrib.ssl.service import ServiceCA
from charmhelpers.core.hookenv import (
INFO,
config,
relation_ids,
relation_set,
relation_get,
local_unit,
log,
)
from charmhelpers.contrib.network.ip import (
get_hostname,
get_relation_ip,
)
import charmhelpers.contrib.openstack.cert_utils as ch_cert_utils
import charmhelpers.contrib.openstack.deferred_events as deferred_events
import rabbit_net_utils
@ -112,6 +115,9 @@ def configure_client_ssl(relation_data):
def reconfigure_client_ssl(ssl_enabled=False):
if deferred_events.get_deferred_restarts():
log("Deferred event detected, not updating client", INFO)
return
ssl_config_keys = set(('ssl_key', 'ssl_cert', 'ssl_ca'))
for rid in relation_ids('amqp'):
rdata = relation_get(rid=rid, unit=local_unit())

View File

@ -17,6 +17,7 @@ dev_bundles:
- bionic-ussuri
- focal-victoria
tests:
- zaza.openstack.charm_tests.rabbitmq_server.tests.RabbitMQDeferredRestartTest
- zaza.openstack.charm_tests.rabbitmq_server.tests.RmqTests
tests_options:
force_deploy:

View File

@ -18,14 +18,15 @@ from functools import wraps
from unit_tests.test_utils import CharmTestCase
with mock.patch('charmhelpers.core.hookenv.cached') as cached:
def passthrough(func):
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
wrapper._wrapped = func
return wrapper
cached.side_effect = passthrough
import actions
with mock.patch('os.getenv'):
def passthrough(func):
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
wrapper._wrapped = func
return wrapper
cached.side_effect = passthrough
import actions
class PauseTestCase(CharmTestCase):

View File

@ -512,6 +512,8 @@ class UtilsTests(CharmTestCase):
callee.assert_called_once_with()
mock_application_version_set.assert_called_with('3.5.7')
@mock.patch.object(rabbit_utils.deferred_events, 'get_deferred_hooks')
@mock.patch.object(rabbit_utils.deferred_events, 'get_deferred_restarts')
@mock.patch.object(rabbit_utils, 'clustered')
@mock.patch.object(rabbit_utils, 'status_set')
@mock.patch.object(rabbit_utils, 'assess_cluster_status')
@ -522,7 +524,11 @@ class UtilsTests(CharmTestCase):
services,
assess_cluster_status,
status_set,
clustered):
clustered,
get_deferred_restarts,
get_deferred_hooks):
get_deferred_hooks.return_value = []
get_deferred_restarts.return_value = []
self.leader_get.return_value = None
services.return_value = 's1'
_determine_os_workload_status.return_value = ('active', '')
@ -535,6 +541,8 @@ class UtilsTests(CharmTestCase):
status_set.assert_called_once_with('active',
'Unit is ready and clustered')
@mock.patch.object(rabbit_utils.deferred_events, 'get_deferred_hooks')
@mock.patch.object(rabbit_utils.deferred_events, 'get_deferred_restarts')
@mock.patch.object(rabbit_utils, 'clustered')
@mock.patch.object(rabbit_utils, 'status_set')
@mock.patch.object(rabbit_utils, 'assess_cluster_status')
@ -542,7 +550,10 @@ class UtilsTests(CharmTestCase):
@mock.patch.object(rabbit_utils, '_determine_os_workload_status')
def test_assess_status_func_cluster_upgrading(
self, _determine_os_workload_status, services,
assess_cluster_status, status_set, clustered):
assess_cluster_status, status_set, clustered,
get_deferred_restarts, get_deferred_hooks):
get_deferred_hooks.return_value = []
get_deferred_restarts.return_value = []
self.leader_get.return_value = True
services.return_value = 's1'
_determine_os_workload_status.return_value = ('active', '')
@ -557,6 +568,8 @@ class UtilsTests(CharmTestCase):
'complete-cluster-series-upgrade when the cluster has completed '
'its upgrade.')
@mock.patch.object(rabbit_utils.deferred_events, 'get_deferred_hooks')
@mock.patch.object(rabbit_utils.deferred_events, 'get_deferred_restarts')
@mock.patch.object(rabbit_utils, 'clustered')
@mock.patch.object(rabbit_utils, 'status_set')
@mock.patch.object(rabbit_utils, 'assess_cluster_status')
@ -564,7 +577,10 @@ class UtilsTests(CharmTestCase):
@mock.patch.object(rabbit_utils, '_determine_os_workload_status')
def test_assess_status_func_cluster_upgrading_first_unit(
self, _determine_os_workload_status, services,
assess_cluster_status, status_set, clustered):
assess_cluster_status, status_set, clustered,
get_deferred_restarts, get_deferred_hooks):
get_deferred_hooks.return_value = []
get_deferred_restarts.return_value = []
self.leader_get.return_value = True
services.return_value = 's1'
_determine_os_workload_status.return_value = ('waiting', 'No peers')

View File

@ -56,6 +56,7 @@ class RelationUtil(CharmTestCase):
shutil.rmtree(self.tmp_dir)
super(RelationUtil, self).tearDown()
@patch('rabbitmq_server_relations.is_hook_allowed')
@patch('rabbitmq_server_relations.rabbit.leader_node_is_ready')
@patch('rabbitmq_server_relations.peer_store_and_set')
@patch('rabbitmq_server_relations.config')
@ -77,7 +78,8 @@ class RelationUtil(CharmTestCase):
relation_set,
mock_config,
mock_peer_store_and_set,
mock_leader_node_is_ready):
mock_leader_node_is_ready,
is_hook_allowed):
"""
Compare version above and below 3.0.1.
Make sure ha_queues is set correctly on each side.
@ -89,6 +91,7 @@ class RelationUtil(CharmTestCase):
return None
is_hook_allowed.return_value = (True, '')
mock_leader_node_is_ready.return_value = True
mock_config.side_effect = config
host_addr = "10.1.2.3"
@ -112,6 +115,7 @@ class RelationUtil(CharmTestCase):
'hostname': host_addr},
relation_id=None)
@patch('rabbitmq_server_relations.is_hook_allowed')
@patch('rabbitmq_server_relations.rabbit.leader_node_is_ready')
@patch('rabbitmq_server_relations.peer_store_and_set')
@patch('rabbitmq_server_relations.config')
@ -133,7 +137,8 @@ class RelationUtil(CharmTestCase):
relation_set,
mock_config,
mock_peer_store_and_set,
mock_leader_node_is_ready):
mock_leader_node_is_ready,
is_hook_allowed):
"""
Compare version above and below 3.0.1.
Make sure ha_queues is set correctly on each side.
@ -147,6 +152,7 @@ class RelationUtil(CharmTestCase):
mock_leader_node_is_ready.return_value = True
mock_config.side_effect = config
is_hook_allowed.return_value = (True, '')
ipv6_addr = "2001:db8:1:0:f816:3eff:fed6:c140"
get_relation_ip.return_value = ipv6_addr
is_elected_leader.return_value = True
@ -187,7 +193,8 @@ class RelationUtil(CharmTestCase):
mock_client_node_is_ready.return_value = False
rabbitmq_server_relations.update_clients()
mock_amqp_changed.assert_called_with(relation_id='amqp:0',
remote_unit='client/0')
remote_unit='client/0',
check_deferred_restarts=True)
# Client Ready
self.relation_ids.return_value = ['amqp:0']
@ -196,7 +203,8 @@ class RelationUtil(CharmTestCase):
mock_client_node_is_ready.return_value = True
rabbitmq_server_relations.update_clients()
mock_amqp_changed.assert_called_with(relation_id='amqp:0',
remote_unit='client/0')
remote_unit='client/0',
check_deferred_restarts=True)
# Both Ready
self.relation_ids.return_value = ['amqp:0']
@ -205,7 +213,8 @@ class RelationUtil(CharmTestCase):
mock_client_node_is_ready.return_value = True
rabbitmq_server_relations.update_clients()
mock_amqp_changed.assert_called_with(relation_id='amqp:0',
remote_unit='client/0')
remote_unit='client/0',
check_deferred_restarts=True)
@patch.object(rabbitmq_server_relations.rabbit,
'configure_notification_ttl')