Make Fernet key distribution more robust

The related bug indicated that the Fernet keys could get out of sync
between the leader and non-leader units.  This patchset assumes that
hooks fail, or that units are off-line when the rotation occurs.  Thus
it tries hard to ensure that the keys are in sync.  It still uses juju
to 'send' the keys from the leader to the subordinate units, so in that
sense, it is not a fix to the related bug, but it does make it more
robust.

Change-Id: Id40a3ccbe565bd742e3fdbd5190deb6b21204a82
Related-Bug: #1849519
This commit is contained in:
Alex Kavanagh 2020-11-13 11:31:50 +00:00
parent 3fcdbc0513
commit c7e34558c4
5 changed files with 181 additions and 66 deletions

View File

@ -365,7 +365,7 @@ class FernetCronContext(context.OSContextGenerator):
def __call__(self):
token_expiration = int(config('token-expiration'))
ctxt = {
'enabled': (fernet_enabled() and is_leader()),
'enabled': fernet_enabled(),
'unit_name': local_unit(),
'charm_dir': charm_dir(),
'minute': ('*/5' if token_expiration > 300 else '*')

View File

@ -566,6 +566,11 @@ def leader_elected():
@restart_on_change(restart_map(), stopstart=True)
def leader_settings_changed():
# we always want to write the keys on leader-settings-changed regardless of
# whether the unit is paused or not.
if fernet_enabled():
key_write()
# if we are paused, delay doing any config changed hooks.
# It is forced on the resume.
if is_unit_paused_set():
@ -582,9 +587,6 @@ def leader_settings_changed():
os_release('keystone')) >= 'liberty':
CONFIGS.write(POLICY_JSON)
if fernet_enabled():
key_write()
update_all_identity_relation_units()
inform_peers_if_ready(check_api_unit_ready)

View File

@ -50,25 +50,25 @@ from charmhelpers.contrib.openstack.ip import (
)
from charmhelpers.contrib.openstack.utils import (
configure_installation_source,
error_out,
get_os_codename_install_source,
os_release,
save_script_rc as _save_script_rc,
pause_unit,
resume_unit,
make_assess_status_func,
os_application_version_set,
os_application_status_set,
CompareOpenStackReleases,
reset_os_release,
snap_install_requested,
install_os_snaps,
get_snaps_install_info_from_origin,
enable_memcache,
is_unit_paused_set,
check_api_unit_ready,
CompareOpenStackReleases,
configure_installation_source,
enable_memcache,
error_out,
get_api_application_status,
get_os_codename_install_source,
get_snaps_install_info_from_origin,
install_os_snaps,
is_unit_paused_set,
make_assess_status_func,
os_application_status_set,
os_application_version_set,
os_release,
pause_unit,
reset_os_release,
resume_unit,
save_script_rc as _save_script_rc,
snap_install_requested,
)
from charmhelpers.core.decorators import (
@ -2507,8 +2507,8 @@ def key_setup():
keystone-manage credential_migrate
Note that we only want to do this once, so we store success in the leader
settings (which we should be).
Note that we only want to do this once, so we touch an empty file
(KEY_SETUP_FILE) to indicate that it has been done.
:raises: `:class:subprocess.CallProcessError` if either of the commands
fails.
@ -2565,10 +2565,14 @@ def key_leader_set():
with open(os.path.join(key_repository, key_number),
'r') as f:
disk_keys[key_repository][key_number] = f.read()
_leader_set_secret({'key_repository': json.dumps(disk_keys)})
# compare current leader_settings of the keys with the current set of keys
leader_keys_str = leader_get('key_repository') or "{}"
current_disk_keys = json.loads(leader_keys_str)
if current_disk_keys != disk_keys:
_leader_set_secret({'key_repository': json.dumps(disk_keys)})
def key_write():
def key_write(log_func=log):
"""Get keys from leader storage and write out to disk
The keys are written to the `FERNET_KEY_REPOSITORY` and
@ -2576,40 +2580,111 @@ def key_write():
written to a tmp file and then moved to the key to avoid any races. Any
'excess' keys are deleted, which may occur if the "number of keys" has been
reduced on the leader.
Note: Only the non leaders do this; the leader uses
fernet_keys_rotate_and_sync() to perform rotations and 'owns' the keys that
are in use.
"""
# don't do this on the leader
if is_leader():
return
leader_keys = leader_get('key_repository')
if not leader_keys:
log('"key_repository" not in leader settings yet...', level=DEBUG)
log_func('"key_repository" not in leader settings yet...', level=DEBUG)
return
leader_keys = json.loads(leader_keys)
for key_repository in [FERNET_KEY_REPOSITORY, CREDENTIAL_KEY_REPOSITORY]:
mkdir(key_repository,
owner=KEYSTONE_USER,
group=KEYSTONE_USER,
perms=0o700)
for key_number, key in leader_keys[key_repository].items():
tmp_filename = os.path.join(key_repository,
".{}".format(key_number))
key_filename = os.path.join(key_repository, key_number)
# write to tmp file first, move the key into place in an atomic
# operation avoiding any races with consumers of the key files
write_file(tmp_filename,
key,
owner=KEYSTONE_USER,
group=KEYSTONE_USER,
perms=0o600)
os.rename(tmp_filename, key_filename)
# now delete any keys that shouldn't be there
for key_number in os.listdir(key_repository):
if key_number not in leader_keys[key_repository].keys():
# ignore if it is not a file
if os.path.isfile(os.path.join(key_repository, key_number)):
os.remove(os.path.join(key_repository, key_number))
for key_repository in [FERNET_KEY_REPOSITORY,
CREDENTIAL_KEY_REPOSITORY]:
tmp_key_repository = key_repository + ".tmp"
try:
if not os.path.exists(key_repository):
mkdir(key_repository,
owner=KEYSTONE_USER,
group=KEYSTONE_USER,
perms=0o700)
# order of this mkdir's is important as this is inside the above
# one.
mkdir(tmp_key_repository,
owner=KEYSTONE_USER,
group=KEYSTONE_USER,
perms=0o700)
for key_number, key in leader_keys[key_repository].items():
tmp_filename = os.path.join(tmp_key_repository,
"{}".format(key_number))
key_filename = os.path.join(key_repository, key_number)
if not _file_equal_to(key_filename, key):
log_func("Replacing/adding into repository: {}, "
"key number: {}"
.format(key_repository, key_number),
level=DEBUG)
# write to tmp file first, move the key into place in an
# atomic operation avoiding any races with consumers of the
# key files
write_file(tmp_filename,
key,
owner=KEYSTONE_USER,
group=KEYSTONE_USER,
perms=0o600)
os.rename(tmp_filename, key_filename)
# now delete any keys that shouldn't be there
for key_number in os.listdir(key_repository):
if key_number not in leader_keys[key_repository].keys():
# ignore if it is not a file
if os.path.isfile(
os.path.join(key_repository, key_number)):
log_func("fernet keys: repository: {}, "
"removing key number: {}"
.format(key_repository, key),
level=DEBUG)
os.remove(os.path.join(key_repository, key_number))
finally:
if os.path.exists(tmp_key_repository):
shutil.rmtree(tmp_key_repository, ignore_errors=False,
onerror=None)
# also say that keys have been setup for this system.
open(KEY_SETUP_FILE, "w").close()
def _file_equal_to(name, contents):
"""Compare a file and a string
The file is assumed to be small so that they can be compared in memory. Do
not use a large file!
:param name: name of file contents to compare
:type name: str
:param contents: contents to compare to
:type contents: str
:returns: true if the file's contents is the same as contents
:rtype: boolean
"""
try:
with open(name, "rt") as f1:
return f1.read() == contents
except FileNotFoundError:
return False
def handle_fernet_keys_cron_call(log_func=log):
"""Handle the cronjob which runs on all units (including leader)
This calls the fernet_keys_rotate_and_sync() on the leader and
key_write() on the non-leader to ensure that keys are always synced across
all units regardless of hook execution issues or other failures.
Note if a unit is in error then it won't get synced keys due to not being
able to use the leadership settings.
:param log_func: The log function to use.
:type log_func: Callable[str, str]
"""
if is_leader():
fernet_keys_rotate_and_sync(log_func)
else:
key_write(log_func)
def fernet_keys_rotate_and_sync(log_func=log):
"""Rotate and sync the keys if the unit is the leader and the primary key
has expired.
@ -2649,11 +2724,13 @@ def fernet_keys_rotate_and_sync(log_func=log):
rotation_time = expiration // (max_keys - 2)
now = time.time()
if last_rotation + rotation_time > now:
# Nothing to do as not reached rotation time
log_func("No rotation until at least {}"
# No rotation to do as not reached rotation time
log_func("No rotation until at least {}, but checking keys are "
"set in leader settings."
.format(
time.asctime(time.gmtime(last_rotation + rotation_time))),
level=DEBUG)
key_leader_set()
return
# now rotate the keys and sync them
fernet_rotate()

View File

@ -45,7 +45,5 @@ def cli_log(msg, level=charmhelpers.core.hookenv.INFO):
print('{}: {}'.format(time.ctime(), msg), file=output)
# the rotate_and_sync_keys() function checks for leadership AND whether to
# rotate the keys or not.
if __name__ == "__main__":
keystone_utils.fernet_keys_rotate_and_sync(log_func=cli_log)
keystone_utils.handle_fernet_keys_cron_call(log_func=cli_log)

View File

@ -1706,11 +1706,13 @@ class TestKeystoneUtils(CharmTestCase):
self.subprocess.check_output.called_with(cmd)
@patch.object(utils, 'is_leader')
@patch.object(utils, 'leader_get')
@patch.object(utils, 'leader_set')
@patch('os.listdir')
def test_key_leader_set(self, listdir, leader_set, is_leader):
def test_key_leader_set(self, listdir, leader_set, leader_get, is_leader):
listdir.return_value = ['0', '1']
is_leader.return_value = True
leader_get.return_value = None
self.time.time.return_value = "the-time"
with patch.object(builtins, 'open', mock_open(
read_data="some_data")):
@ -1726,11 +1728,40 @@ class TestKeystoneUtils(CharmTestCase):
{'0': 'some_data', '1': 'some_data'}})
})
@patch.object(utils, 'is_leader')
@patch.object(utils, 'leader_get')
@patch.object(utils, 'leader_set')
@patch('os.listdir')
def test_key_leader_set_no_change(
self, listdir, leader_set, leader_get, is_leader):
listdir.return_value = ['0', '1']
is_leader.return_value = True
leader_get.return_value = json.dumps(
{utils.FERNET_KEY_REPOSITORY:
{'0': 'some_data', '1': 'some_data'},
utils.CREDENTIAL_KEY_REPOSITORY:
{'0': 'some_data', '1': 'some_data'}})
self.time.time.return_value = "the-time"
with patch.object(builtins, 'open', mock_open(
read_data="some_data")):
utils.key_leader_set()
listdir.has_calls([
call(utils.FERNET_KEY_REPOSITORY),
call(utils.CREDENTIAL_KEY_REPOSITORY)])
leader_set.assert_not_called()
@patch.object(utils, '_file_equal_to')
@patch.object(utils, "is_leader")
@patch('os.rename')
@patch.object(utils, 'leader_get')
@patch('os.listdir')
@patch('os.remove')
def test_key_write(self, remove, listdir, leader_get, rename):
def test_key_write(
self, remove, listdir, leader_get, rename, is_leader,
mock_file_equal_to
):
mock_file_equal_to.return_value = False
is_leader.return_value = False
leader_get.return_value = json.dumps(
{utils.FERNET_KEY_REPOSITORY:
{'0': 'key0', '1': 'key1'},
@ -1746,27 +1777,29 @@ class TestKeystoneUtils(CharmTestCase):
call(utils.FERNET_KEY_REPOSITORY,
owner='keystone', group='keystone',
perms=0o700)])
tmp_fernet_dir = utils.FERNET_KEY_REPOSITORY + ".tmp"
tmp_creds_dir = utils.CREDENTIAL_KEY_REPOSITORY + ".tmp"
# note 'any_order=True' as we are dealing with dictionaries in Py27
self.write_file.assert_has_calls(
[
call(os.path.join(utils.CREDENTIAL_KEY_REPOSITORY, '.0'),
call(os.path.join(tmp_creds_dir, '0'),
u'key0', owner='keystone', group='keystone', perms=0o600),
call(os.path.join(utils.CREDENTIAL_KEY_REPOSITORY, '.1'),
call(os.path.join(tmp_creds_dir, '1'),
u'key1', owner='keystone', group='keystone', perms=0o600),
call(os.path.join(utils.FERNET_KEY_REPOSITORY, '.0'), u'key0',
call(os.path.join(tmp_fernet_dir, '0'), u'key0',
owner='keystone', group='keystone', perms=0o600),
call(os.path.join(utils.FERNET_KEY_REPOSITORY, '.1'), u'key1',
call(os.path.join(tmp_fernet_dir, '1'), u'key1',
owner='keystone', group='keystone', perms=0o600),
], any_order=True)
rename.assert_has_calls(
[
call(os.path.join(utils.CREDENTIAL_KEY_REPOSITORY, '.0'),
call(os.path.join(tmp_creds_dir, '0'),
os.path.join(utils.CREDENTIAL_KEY_REPOSITORY, '0')),
call(os.path.join(utils.CREDENTIAL_KEY_REPOSITORY, '.1'),
call(os.path.join(tmp_creds_dir, '1'),
os.path.join(utils.CREDENTIAL_KEY_REPOSITORY, '1')),
call(os.path.join(utils.FERNET_KEY_REPOSITORY, '.0'),
call(os.path.join(tmp_fernet_dir, '0'),
os.path.join(utils.FERNET_KEY_REPOSITORY, '0')),
call(os.path.join(utils.FERNET_KEY_REPOSITORY, '.1'),
call(os.path.join(tmp_fernet_dir, '1'),
os.path.join(utils.FERNET_KEY_REPOSITORY, '1')),
], any_order=True)
@ -1806,13 +1839,18 @@ class TestKeystoneUtils(CharmTestCase):
_stat = MagicMock()
_stat.st_mtime = 10
mock_os.stat.return_value = _stat
mock_key_leader_set.reset_mock()
utils.fernet_keys_rotate_and_sync(log_func=self.log)
self.log.assert_called_once_with(
'No rotation until at least Thu Jan 1 00:01:10 1970',
'No rotation until at least Thu Jan 1 00:01:10 1970, '
'but checking keys are set in leader settings.',
level='DEBUG')
mock_key_leader_set.assert_not_called()
# key_leader_set is always called once, as even if keys are not rotated
# it's needs to be certain that they are in the leader settings
mock_key_leader_set.assert_called_once_with()
# finally, set it up so that the rotation and sync occur
self.time.time.return_value = 71
mock_key_leader_set.reset_mock()
utils.fernet_keys_rotate_and_sync()
mock_fernet_rotate.assert_called_once_with()
mock_key_leader_set.assert_called_once_with()