From e381a20f1d5238caf567ac66c13b3a943ecc654d Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Wed, 22 Feb 2023 14:56:27 +0000 Subject: [PATCH] Fix focal to jammy series upgrade This is a fix/workaround to the package upgrade bug that affects the charm. The post-inst package script updates the .erlang.cookie if it is insecure during the upgrade of rabbit from 3.8 to 3.9. This breaks the series-upgrade resulting in a charm erroring on the post-series-upgrade hook. This fix works by checking if the .erlang.cookie has changed during the post-series-upgrade hook and either updating the cookie in peer storage (if it is insecure) or ensuring that the cookie from peer storage is written to the .erlang.cookie if it isn't the leader. This ensures that the cluster continues to work and that the series-upgrade can be completed across the cluster. Change-Id: I540ea8da85b3b4326ccb8194f1d8b1050b04eae9 Closes-Bug: #2006484 (cherry picked from commit 55b985f55ca4eb2b2a7229c8dcc70abc8c8940f4) --- hooks/rabbitmq_server_relations.py | 98 +++++++- unit_tests/test_rabbitmq_server_relations.py | 231 ++++++++++++++++++- 2 files changed, 320 insertions(+), 9 deletions(-) diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index 67bd4ccb..1348b5c7 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -16,6 +16,7 @@ import glob import os +import re import shutil import sys import subprocess @@ -56,7 +57,6 @@ from charmhelpers.contrib.openstack.utils import ( is_hook_allowed, is_unit_paused_set, set_unit_upgrading, - clear_unit_paused, clear_unit_upgrading, ) @@ -560,7 +560,7 @@ def cluster_joined(relation_id=None): if is_leader(): log('Leader peer_storing cookie', level=INFO) - cookie = open(rabbit.COOKIE_PATH, 'r').read().strip() + cookie = read_erlang_cookie() peer_store('cookie', cookie) peer_store('leader_node_ip', unit_private_ip()) peer_store('leader_node_hostname', rabbit.get_unit_hostname()) @@ -598,6 +598,11 @@ def cluster_changed(relation_id=None, remote_unit=None): 'rabbitmq cluster config.', level=INFO) return + if is_unit_paused_set(): + log('Unit is paused-set, so avoiding any other cluster activities', + level=INFO) + return + if rabbit.is_sufficient_peers(): rabbit.update_peer_cluster_status() if not rabbit.clustered_with_leader(): @@ -658,9 +663,7 @@ def update_cookie(leaders_cookie=None): cookie = leaders_cookie else: cookie = peer_retrieve('cookie') - cookie_local = None - with open(rabbit.COOKIE_PATH, 'r') as f: - cookie_local = f.read().strip() + cookie_local = read_erlang_cookie() if cookie_local == cookie: log('Cookie already synchronized with peer.') @@ -669,13 +672,92 @@ def update_cookie(leaders_cookie=None): raise Exception("rabbitmq-server must be restarted but not permitted") service_stop('rabbitmq-server') - with open(rabbit.COOKIE_PATH, 'wb') as out: - out.write(cookie.encode('ascii')) + write_erlang_cookie(cookie) if not is_unit_paused_set(): service_restart('rabbitmq-server') rabbit.wait_app() +def read_erlang_cookie(): + """Read the erlang cookie and return it stripped of newline. + + :returns: the erlang cookie + :rtype: str + """ + with open(rabbit.COOKIE_PATH, 'r') as f: + return f.read().strip() + + +def write_erlang_cookie(cookie): + """Write the erlang cookie to the cookie file. + + :param cookie: the cookie to write. + :type cookie: str + """ + try: + with open(rabbit.COOKIE_PATH, 'wb') as out: + out.write(cookie.encode('ascii')) + except Exception as e: + log("Could't write new cookie value to {}: reason: {}" + .format(rabbit.COOKIE_PATH, str(e)), + level=ERROR) + + +def check_erlang_cookie_on_series_upgrade(): + """Check the erlang cookie with peer storage. + + During series upgrade from focal to jammy (rabbitmq upgrade from 3.8 to + 3.9,) there is a package bug [1] that overwrites + the rabbit.COOKIEPATH with a more secure version of the cookie if it isn't + secure enough, which is exactly what the 3.8 and prior cookie looks like. + If the unit is the leader (and thus the first) it gets a new cookie and + 'survives' the upgrade, but doesn't copy the cookie to the peer storage. A + non leader then fails during the post-series-upgrade hook. This function + does two things: + + 1. It ensures that the cookie is secure (if it's the leader), and places + it in peer storage. + 2. If it's the non-leader then, it copies the cookie from peer storage and + puts it in the rabbit.COOKIE_PATH file. + """ + COOKIE_KEY = 'cookie' + RABBITMQ_SERVER = 'rabbitmq-server' + + restart_with_cookie = None + if is_leader(): + # grab the leader cookie and check if it's not secure. + cookie_local = read_erlang_cookie() + if re.match(r'^[A-Z]{20}$', cookie_local): + # write a new, more secure, cookie + cmd = "openssl rand -base64 42" + try: + new_cookie = subprocess.check_output(cmd.split()).decode() + cookie_local = new_cookie + restart_with_cookie = new_cookie + except subprocess.CalledProcessError as e: + log("Couldn't generate a new {}: reason: {}" + .format(rabbit.COOKIE_PATH, str(e)), + level=ERROR) + # ensure the local cookie matches the one stored. + cookie = peer_retrieve(COOKIE_KEY) + if cookie != cookie_local: + peer_store(COOKIE_KEY, cookie_local) + else: + # non leader so see if we need to change the cookie + cookie = peer_retrieve(COOKIE_KEY) + cookie_local = read_erlang_cookie() + if cookie != cookie_local: + restart_with_cookie = cookie + + # now if a restart is required, restart rabbitmq + if restart_with_cookie is not None: + service_stop(RABBITMQ_SERVER) + write_erlang_cookie(restart_with_cookie) + if not is_unit_paused_set(): + service_restart(RABBITMQ_SERVER) + rabbit.wait_app() + + @hooks.hook('ha-relation-joined') @rabbit.coordinated_restart_on_change({rabbit.ENV_CONF: rabbit.restart_map()[rabbit.ENV_CONF]}, @@ -1037,7 +1119,7 @@ def series_upgrade_complete(): os.remove(rabbit.RABBITMQ_CONFIG) rabbit.ConfigRenderer( rabbit.CONFIG_FILES()).write_all() - clear_unit_paused() + check_erlang_cookie_on_series_upgrade() clear_unit_upgrading() rabbit.resume_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES())) diff --git a/unit_tests/test_rabbitmq_server_relations.py b/unit_tests/test_rabbitmq_server_relations.py index 094adcef..312dfbe9 100644 --- a/unit_tests/test_rabbitmq_server_relations.py +++ b/unit_tests/test_rabbitmq_server_relations.py @@ -18,7 +18,7 @@ import subprocess import sys import tempfile -from unit_tests.test_utils import CharmTestCase +from unit_tests.test_utils import CharmTestCase, patch_open from unittest.mock import patch, MagicMock, call from charmhelpers.core.unitdata import Storage @@ -480,6 +480,235 @@ class RelationUtil(CharmTestCase): wait_app.assert_called_once_with() self.assertFalse(update_clients.called) + def test_read_erlang_cookie(self): + with patch_open() as (mock_open, mock_file): + mock_file.read.return_value = "the-cookie\n" + self.assertEqual(rabbitmq_server_relations.read_erlang_cookie(), + "the-cookie") + mock_open.assert_called_once_with(rabbit_utils.COOKIE_PATH, 'r') + + def test_write_erlang_cookie(self): + with patch_open() as (mock_open, mock_file): + rabbitmq_server_relations.write_erlang_cookie('a-cookie') + mock_open.assert_called_once_with(rabbit_utils.COOKIE_PATH, 'wb') + mock_file.write.assert_called_once_with(b'a-cookie') + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that reading the peer cookie writes to the peer storage, but no + # new cookie is generated, no restart needed. + self.is_leader.return_value = True + read_erlang_cookie.return_value = 'the-cookie' + peer_retrieve.return_value = 'worse-cookie' + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'the-cookie') + service_stop.assert_not_called() + write_erlang_cookie.assert_not_called() + is_unit_paused_set.assert_not_called() + wait_app.assert_not_called() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader__insecure_cookie( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that a new cookie does have to be upgraded (e.g. it matches the + # 20 charmacters of A-Z. Also, the unit is not paused. + self.is_leader.return_value = True + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = False + subprocess_check_output.return_value = b"really-good-cookie" + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_not_called() + subprocess_check_output.assert_called_once_with( + ['openssl', 'rand', '-base64', '42']) + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'really-good-cookie') + service_stop.assert_called_once_with('rabbitmq-server') + write_erlang_cookie.assert_called_once_with('really-good-cookie') + is_unit_paused_set.assert_called_once_with() + service_restart.assert_called_once_with('rabbitmq-server') + wait_app.assert_called_once_with() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader__unit_paused( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that a new cookie does have to be upgraded (e.g. it matches the + # 20 charmacters of A-Z. Also, the unit is paused. + self.is_leader.return_value = True + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = True + subprocess_check_output.return_value = b"really-good-cookie" + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_not_called() + subprocess_check_output.assert_called_once_with( + ['openssl', 'rand', '-base64', '42']) + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'really-good-cookie') + service_stop.assert_called_once_with('rabbitmq-server') + write_erlang_cookie.assert_called_once_with('really-good-cookie') + is_unit_paused_set.assert_called_once_with() + service_restart.assert_not_called() + wait_app.assert_not_called() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader__subp_error( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that a new cookie does have to be upgraded (e.g. it matches the + # 20 charmacters of A-Z. However, the subprocess fails and so no new + # cookie is generated. + self.is_leader.return_value = True + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = True + + def _raise_error(*args, **kwargs): + raise subprocess.CalledProcessError( + cmd="some-command", + returncode=1, + stderr="went bang", + output="nothing good") + + subprocess_check_output.side_effect = _raise_error + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_called_once_with( + "Couldn't generate a new /var/lib/rabbitmq/.erlang.cookie: reason:" + " Command 'some-command' returned non-zero exit status 1.", + level='ERROR') + subprocess_check_output.assert_called_once_with( + ['openssl', 'rand', '-base64', '42']) + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'ABCDEABCDEABCDEABCDE') + service_stop.assert_not_called() + write_erlang_cookie.assert_not_called() + is_unit_paused_set.assert_not_called() + service_restart.assert_not_called() + wait_app.assert_not_called() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_not_leader( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # Not the leader, so just verify if the cookie in the peer relation is + # different and if so, store it and restart the app (start by being + # paused). + self.is_leader.return_value = False + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = True + subprocess_check_output.return_value = b"really-good-cookie" + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_not_called() + subprocess_check_output.assert_not_called() + read_erlang_cookie.assert_called_once_with() + peer_store.assert_not_called() + service_stop.assert_called_once_with('rabbitmq-server') + write_erlang_cookie.assert_called_once_with('worse-cookie') + is_unit_paused_set.assert_called_once_with() + service_restart.assert_not_called() + wait_app.assert_not_called() + @patch.object(rabbitmq_server_relations, 'leader_set') @patch.object(rabbitmq_server_relations, 'leader_get') @patch.object(rabbitmq_server_relations, 'apt_install')