Defer restart of services on AppArmor change

AppArmor changes cause restarts of all OSD
services in all units at the same time, which
can cause ceph-cluster outage.

Defer actual update and set a status message prompting
to run an action for each unit separately.

Related-bug: #2068020
Change-Id: I32398a0525b7098503de36d72e593c14207102a1
This commit is contained in:
Rodrigo Barbieri 2024-06-10 16:26:14 -03:00
parent 8328f0f6a4
commit b4a870af2c
5 changed files with 207 additions and 71 deletions

View File

@ -149,6 +149,11 @@ stop:
- osds
security-checklist:
description: Validate the running configuration against the OpenStack security guides checklist
update-apparmor-and-restart-osds:
description: |
Invoke pending continuation of update of AppArmor profiles followed by restarting OSD
services. Make sure to run this action separately in each unit at different times
to avoid simultaneous restart of OSDs.
get-availability-zone:
description: |
Obtain information about the availability zone, which will contain information about the CRUSH

View File

@ -28,7 +28,8 @@ from charmhelpers.core.hookenv import (
function_fail,
log,
)
from ceph_hooks import assess_status
from ceph_hooks import assess_status, update_apparmor
from utils import parse_osds_arguments, ALL
START = 'start'
@ -138,6 +139,7 @@ def start():
ACTIONS = {'stop': stop,
'start': start,
'update-apparmor-and-restart-osds': update_apparmor,
}

View File

@ -0,0 +1 @@
service.py

View File

@ -132,6 +132,10 @@ STORAGE_MOUNT_PATH = '/var/lib/ceph'
CRON_CEPH_CHECK_FILE = '/etc/cron.d/check-osd-services'
class AppArmorProfileNeverInstalledException(Exception):
pass
def check_for_upgrade():
if not os.path.exists(_upgrade_keyring):
@ -211,18 +215,45 @@ def tune_network_adapters():
ceph.tune_nic(interface)
def check_aa_profile_needs_update():
"""
Compares the hash of a new AA profile and the previously installed one,
if one exists.
"""
db = kv()
for x in glob.glob('files/apparmor/*'):
db_key = 'hash:{}'.format(x)
previous_hash = db.get(db_key)
if previous_hash is None:
raise AppArmorProfileNeverInstalledException()
new_hash = file_hash(x)
if new_hash != previous_hash:
return True
return False
def _set_pending_apparmor_update_status():
# Setting to active to avoid impact of other workflows
status_set('active',
('Pending update-apparmor-and-restart-osds action required,'
' please refer to the action documentation.'))
def aa_profile_changed(service_name='ceph-osd-all'):
"""
Reload AA profile and restart OSD processes.
"""
log("Loading new AppArmor profile")
service_reload('apparmor')
if config('aa-profile-mode') == 'disable':
# No need to restart services if AppArmor is not enabled
return
log("Restarting ceph-osd services with new AppArmor profile")
if ceph.systemd():
for osd_id in ceph.get_local_osd_ids():
service_restart('ceph-osd@{}'.format(osd_id))
service_restart('ceph-osd.target')
else:
service_restart(service_name)
assess_status()
def copy_profile_into_place():
@ -285,12 +316,8 @@ def use_vaultlocker():
return False
def install_apparmor_profile():
"""
Install ceph apparmor profiles and configure
based on current setting of 'aa-profile-mode'
configuration option.
"""
def update_apparmor():
"""Action: Proceed to updating the profile and restarting OSDs."""
changes = copy_profile_into_place()
# NOTE(jamespage): If any profiles where changed or
# freshly installed then force
@ -302,6 +329,28 @@ def install_apparmor_profile():
aa_profile_changed()
def install_apparmor_profile():
"""
Install ceph apparmor profiles and configure
based on current setting of 'aa-profile-mode'
configuration option.
"""
changes = False
try:
changes = check_aa_profile_needs_update()
except AppArmorProfileNeverInstalledException:
update_apparmor()
return
if not changes:
return
if config('aa-profile-mode') != 'disable':
log("Deferring update of AppArmor profiles to avoid "
"restarting ceph-osd services all at the same time.")
_set_pending_apparmor_update_status()
else:
update_apparmor()
def install_udev_rules():
"""
Install and reload udev rules for ceph-volume LV
@ -959,8 +1008,16 @@ def assess_status():
status_set('blocked',
'No block devices detected using current configuration')
else:
status_set('active',
'Unit is ready ({} OSD)'.format(len(running_osds)))
aa_needs_update = False
try:
aa_needs_update = check_aa_profile_needs_update()
except AppArmorProfileNeverInstalledException:
pass
if aa_needs_update and config('aa-profile-mode') != 'disable':
_set_pending_apparmor_update_status()
else:
status_set('active',
'Unit is ready ({} OSD)'.format(len(running_osds)))
else:
pristine = True
# Check unmounted disks that should be configured but don't check

View File

@ -481,19 +481,104 @@ class CephHooksTestCase(unittest.TestCase):
}
self.assertEqual(ctxt, expected)
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks.glob, "glob")
@patch.object(ceph_hooks, "file_hash")
def test_check_aa_profile_needs_update_True(
self, mock_hash, mock_glob, mock_kv):
mock_glob.return_value = ['file1', 'file2', 'file3']
mock_hash.side_effect = ['hash1', 'hash2']
mock_kv.return_value = {'hash:file1': 'hash1',
'hash:file2': 'hash2_old'}
result = ceph_hooks.check_aa_profile_needs_update()
self.assertTrue(result)
mock_hash.assert_has_calls([call('file1'), call('file2')])
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks.glob, "glob")
@patch.object(ceph_hooks, "file_hash")
def test_check_aa_profile_needs_update_False(
self, mock_hash, mock_glob, mock_kv):
mock_glob.return_value = ['file1', 'file2', 'file3']
mock_hash.side_effect = ['hash1', 'hash2', 'hash3']
mock_kv.return_value = {'hash:file1': 'hash1',
'hash:file2': 'hash2',
'hash:file3': 'hash3'}
result = ceph_hooks.check_aa_profile_needs_update()
self.assertFalse(result)
mock_hash.assert_has_calls(
[call('file1'), call('file2'), call('file3')])
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks.glob, "glob")
@patch.object(ceph_hooks, "file_hash")
def test_check_aa_profile_needs_update_never_installed(
self, mock_hash, mock_glob, mock_kv):
mock_glob.return_value = ['file1', 'file2', 'file3']
mock_kv.return_value = {}
self.assertRaises(ceph_hooks.AppArmorProfileNeverInstalledException,
ceph_hooks.check_aa_profile_needs_update)
mock_hash.assert_not_called()
@patch.object(ceph_hooks, 'check_aa_profile_needs_update')
@patch.object(ceph_hooks, 'update_apparmor')
@patch.object(ceph_hooks, '_set_pending_apparmor_update_status')
def test_install_apparmor_profile_no_change(
self, mock_set, mock_update, mock_check):
mock_check.return_value = False
ceph_hooks.install_apparmor_profile()
mock_set.assert_not_called()
mock_update.assert_not_called()
@patch.object(ceph_hooks, 'check_aa_profile_needs_update')
@patch.object(ceph_hooks, 'update_apparmor')
@patch.object(ceph_hooks, '_set_pending_apparmor_update_status')
@patch.object(ceph_hooks, 'config')
def test_install_apparmor_profile_disable(
self, mock_config, mock_set, mock_update, mock_check):
mock_check.return_value = True
mock_config.return_value = 'disable'
ceph_hooks.install_apparmor_profile()
mock_set.assert_not_called()
mock_update.assert_called_once_with()
@patch.object(ceph_hooks, 'check_aa_profile_needs_update')
@patch.object(ceph_hooks, 'update_apparmor')
@patch.object(ceph_hooks, '_set_pending_apparmor_update_status')
@patch.object(ceph_hooks, 'config')
def test_install_apparmor_profile_never_installed(
self, mock_config, mock_set, mock_update, mock_check):
mock_check.side_effect = (
ceph_hooks.AppArmorProfileNeverInstalledException)
ceph_hooks.install_apparmor_profile()
mock_config.assert_not_called()
mock_set.assert_not_called()
mock_update.assert_called_once_with()
@patch.object(ceph_hooks, 'check_aa_profile_needs_update')
@patch.object(ceph_hooks, 'update_apparmor')
@patch.object(ceph_hooks, '_set_pending_apparmor_update_status')
@patch.object(ceph_hooks, 'config')
def test_install_apparmor_profile_enforce(
self, mock_config, mock_set, mock_update, mock_check):
mock_check.return_value = True
mock_config.return_value = 'enforce'
ceph_hooks.install_apparmor_profile()
mock_set.assert_called_once_with()
mock_update.assert_not_called()
@patch.object(ceph_hooks, 'assess_status')
@patch.object(ceph_hooks, 'ceph')
@patch.object(ceph_hooks, 'service_restart')
@patch.object(ceph_hooks, 'service_reload')
@patch.object(ceph_hooks, 'copy_profile_into_place')
@patch.object(ceph_hooks, 'CephOsdAppArmorContext')
@patch.object(ceph_hooks, 'config')
def test_install_apparmor_profile(self, mock_config,
mock_apparmor_context,
mock_copy_profile_into_place,
mock_service_reload,
mock_service_restart,
mock_ceph):
'''Apparmor profile reloaded when config changes (upstart)'''
def test_update_apparmor_upstart_config_changed(
self, mock_config, mock_apparmor_context,
mock_copy_profile_into_place, mock_service_reload,
mock_service_restart, mock_ceph,
mock_assess_status):
m_config = MagicMock()
m_config.changed.return_value = True
mock_config.return_value = m_config
@ -502,81 +587,67 @@ class CephHooksTestCase(unittest.TestCase):
mock_ceph.systemd.return_value = False
mock_copy_profile_into_place.return_value = False
ceph_hooks.install_apparmor_profile()
ceph_hooks.update_apparmor()
m_aa_context.setup_aa_profile.assert_called()
mock_copy_profile_into_place.assert_called()
mock_service_restart.assert_called_with('ceph-osd-all')
m_config.changed.assert_called_with('aa-profile-mode')
mock_service_reload.assert_called_with('apparmor')
mock_assess_status.assert_called_once_with()
@patch.object(ceph_hooks, 'assess_status')
@patch.object(ceph_hooks, 'ceph')
@patch.object(ceph_hooks, 'service_restart')
@patch.object(ceph_hooks, 'service_reload')
@patch.object(ceph_hooks, 'copy_profile_into_place')
@patch.object(ceph_hooks, 'CephOsdAppArmorContext')
@patch.object(ceph_hooks, 'config')
def test_install_apparmor_profile_systemd(self, mock_config,
mock_apparmor_context,
mock_copy_profile_into_place,
mock_service_reload,
mock_service_restart,
mock_ceph):
'''Apparmor profile reloaded when config changes (systemd)'''
m_config = MagicMock()
m_config.changed.return_value = True
mock_config.return_value = m_config
def test_update_apparmor_systemd_profile_changed(
self, mock_config, mock_apparmor_context,
mock_copy_profile_into_place, mock_service_reload,
mock_service_restart, mock_ceph,
mock_assess_status):
m_aa_context = MagicMock()
mock_apparmor_context.return_value = m_aa_context
mock_ceph.systemd.return_value = True
mock_ceph.get_local_osd_ids.return_value = [0, 1, 2]
mock_copy_profile_into_place.return_value = False
ceph_hooks.install_apparmor_profile()
m_aa_context.setup_aa_profile.assert_called()
mock_copy_profile_into_place.assert_called()
m_config.changed.assert_called_with('aa-profile-mode')
mock_service_reload.assert_called_with('apparmor')
mock_service_restart.assert_has_calls([
call('ceph-osd@0'),
call('ceph-osd@1'),
call('ceph-osd@2'),
])
@patch.object(ceph_hooks, 'ceph')
@patch.object(ceph_hooks, 'service_restart')
@patch.object(ceph_hooks, 'service_reload')
@patch.object(ceph_hooks, 'copy_profile_into_place')
@patch.object(ceph_hooks, 'CephOsdAppArmorContext')
@patch.object(ceph_hooks, 'config')
def test_install_apparmor_profile_new_install(self, mock_config,
mock_apparmor_context,
mock_copy_profile_into_place,
mock_service_reload,
mock_service_restart,
mock_ceph):
'''Apparmor profile always reloaded on fresh install'''
m_config = MagicMock()
m_config.changed.return_value = True
mock_config.return_value = m_config
m_aa_context = MagicMock()
mock_apparmor_context.return_value = m_aa_context
mock_ceph.systemd.return_value = True
mock_ceph.get_local_osd_ids.return_value = [0, 1, 2]
mock_copy_profile_into_place.return_value = True
ceph_hooks.install_apparmor_profile()
ceph_hooks.update_apparmor()
m_aa_context.setup_aa_profile.assert_called()
mock_copy_profile_into_place.assert_called()
m_config.changed.assert_not_called()
mock_config.changed.assert_not_called()
mock_service_reload.assert_called_with('apparmor')
mock_service_restart.assert_has_calls([
call('ceph-osd@0'),
call('ceph-osd@1'),
call('ceph-osd@2'),
])
mock_service_restart.assert_called_once_with('ceph-osd.target')
mock_assess_status.assert_called_once_with()
@patch.object(ceph_hooks, 'assess_status')
@patch.object(ceph_hooks, 'ceph')
@patch.object(ceph_hooks, 'service_restart')
@patch.object(ceph_hooks, 'service_reload')
@patch.object(ceph_hooks, 'copy_profile_into_place')
@patch.object(ceph_hooks, 'CephOsdAppArmorContext')
@patch.object(ceph_hooks, 'config')
def test_update_apparmor_disable(
self, mock_config, mock_apparmor_context,
mock_copy_profile_into_place,
mock_service_reload, mock_service_restart,
mock_ceph, mock_assess_status):
mock_config.return_value = 'disable'
m_aa_context = MagicMock()
mock_apparmor_context.return_value = m_aa_context
mock_ceph.systemd.return_value = True
mock_copy_profile_into_place.return_value = True
ceph_hooks.update_apparmor()
m_aa_context.setup_aa_profile.assert_called()
mock_copy_profile_into_place.assert_called()
mock_config.changed.assert_not_called()
mock_service_reload.assert_called_with('apparmor')
mock_service_restart.assert_not_called()
mock_assess_status.assert_not_called()
@patch.object(ceph_hooks, 'is_block_device')
@patch.object(ceph_hooks, 'storage_list')