diff --git a/config.yaml b/config.yaml index 1d6f459..1590b77 100644 --- a/config.yaml +++ b/config.yaml @@ -52,7 +52,15 @@ options: sudo cat /etc/corosync/authkey | base64 -w 0 . This configuration element is mandatory and the service will fail on - install if it is not provided. The value must be base64 encoded. + install if it is not provided. The value must be base64 encoded. + maintenance-mode: + type: boolean + default: false + description: | + When enabled pacemaker will be put in maintenance mode, this will allow + administrators to manipulate cluster resources (e.g. stop daemons, reboot + machines, etc). Pacemaker will not monitor the resources while maintence + mode is enabled. service_start_timeout: type: int default: 180 diff --git a/hooks/hooks.py b/hooks/hooks.py index 2677d69..83e3916 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -23,6 +23,7 @@ import pcmk import socket from charmhelpers.core.hookenv import ( + is_leader, log, DEBUG, INFO, @@ -75,6 +76,7 @@ from utils import ( ocf_file_exists, kill_legacy_ocf_daemon_process, try_pcmk_wait, + maintenance_mode, ) from charmhelpers.contrib.charmsupport import nrpe @@ -137,7 +139,7 @@ def ensure_ipv6_requirements(hanode_rid): **{'private-address': addr}) -@hooks.hook() +@hooks.hook('config-changed') def config_changed(): setup_ocf_files() @@ -166,6 +168,11 @@ def config_changed(): update_nrpe_config() + cfg = config() + if (is_leader() and + cfg.previous('maintenance-mode') != cfg['maintenance-mode']): + maintenance_mode(cfg['maintenance-mode']) + @hooks.hook() def upgrade_charm(): diff --git a/hooks/pcmk.py b/hooks/pcmk.py index 230eb54..1562d81 100644 --- a/hooks/pcmk.py +++ b/hooks/pcmk.py @@ -13,13 +13,17 @@ # limitations under the License. import commands +import re import subprocess import socket import time +import xml.etree.ElementTree as etree +from distutils.version import StrictVersion +from StringIO import StringIO from charmhelpers.core.hookenv import ( log, - ERROR + ERROR, ) @@ -27,6 +31,10 @@ class ServicesNotUp(Exception): pass +class PropertyNotFound(Exception): + pass + + def wait_for_pcmk(retries=12, sleep=10): crm_up = None hostname = socket.gethostname() @@ -136,3 +144,80 @@ def maas_stonith_primitive(maas_nodes, crm_node): return False, False return rsc, constraint + + +def get_property_from_xml(name, output): + """Read a configuration property from the XML generated by 'crm configure show + xml' + + :param name: property's name + :param output: string with the output of `crm configure show xml` + :returns: value of the property + :rtype: str + :raises: pcmk.PropertyNotFound + """ + + tree = etree.parse(StringIO(output)) + root = tree.getroot() + crm_config = root.find('configuration').find('crm_config') + props = crm_config.find('cluster_property_set') + for element in props: + if element.attrib['name'] == name: + # property found! + return element.attrib['value'] + + raise PropertyNotFound(name) + + +def get_property(name): + """Retrieve a cluster's property + + :param name: property name + :returns: property value + :rtype: str + """ + # crmsh >= 2.3 renamed show-property to get-property, 2.3.x is + # available since zesty + if crm_version() >= StrictVersion('2.3.0'): + output = subprocess.check_output(['crm', 'configure', + 'get-property', name], + universal_newlines=True) + elif crm_version() < StrictVersion('2.2.0'): + # before 2.2.0 there is no method to get a property + output = subprocess.check_output(['crm', 'configure', 'show', 'xml'], + universal_newlines=True) + return get_property_from_xml(name, output) + else: + output = subprocess.check_output(['crm', 'configure', + 'show-property', name], + universal_newlines=True) + + return output + + +def set_property(name, value): + """Set a cluster's property + + :param name: property name + :param value: new value + """ + subprocess.check_output(['crm', 'configure', + 'property', '%s=%s' % (name, value)], + universal_newlines=True) + + +def crm_version(): + """Parses the output of `crm --version` and returns a + distutils.version.StrictVersion instance + """ + ver = subprocess.check_output(['crm', '--version'], + universal_newlines=True) + + r = re.compile(r'.*(\d\.\d\.\d).*') + + matched = r.match(ver) + + if not matched: + raise ValueError('error parsin crm version: %s' % ver) + else: + return StrictVersion(matched.group(1)) diff --git a/hooks/utils.py b/hooks/utils.py index 4f16da6..80dffbb 100644 --- a/hooks/utils.py +++ b/hooks/utils.py @@ -834,6 +834,22 @@ def assess_status_helper(): status = 'blocked' message = ("Insufficient peer units for ha cluster " "(require {})".format(node_count)) + + # if the status was not changed earlier, we verify the maintenance status + try: + if status == 'active': + prop = pcmk.get_property('maintenance-mode').strip() + except pcmk.PropertyNotFound: + # the property is not the output of 'crm configure show xml', so we use + # the default value for this property. For crmsh>=2.2.0 the default + # value is automatically provided by show-property or get-property. + prop = 'false' + + if (status == 'active' and prop == 'true'): + # maintenance mode enabled in pacemaker + status = 'maintenance' + message = 'Pacemaker in maintenance mode' + return status, message @@ -871,3 +887,22 @@ def kill_legacy_ocf_daemon_process(res_name): if res: pid = res.group(1) subprocess.call(['sudo', 'kill', '-9', pid]) + + +def maintenance_mode(enable): + """Enable/disable pacemaker's maintenance mode""" + + log('Setting maintenance-mode to %s' % enable, level=INFO) + + try: + current_state = pcmk.get_property('maintenance-mode').strip().lower() + except pcmk.PropertyNotFound: + current_state = 'false' + + current_state = True if current_state == 'true' else False + log('Is maintenance-mode currently enabled? %s' % current_state, + level=DEBUG) + if current_state != enable: + pcmk.set_property('maintenance-mode', str(enable).lower()) + else: + log('Desired value for maintenance-mode is already set', level=DEBUG) diff --git a/tests/basic_deployment.py b/tests/basic_deployment.py index 90071f4..734ace3 100644 --- a/tests/basic_deployment.py +++ b/tests/basic_deployment.py @@ -15,6 +15,8 @@ # limitations under the License. import os +import time + import amulet from charmhelpers.contrib.openstack.amulet.deployment import ( @@ -33,6 +35,15 @@ seconds_to_wait = 600 # Set number of primary units and cluster-count for hacluster NUM_UNITS = 3 +PY_CRM_GET_PROPERTY = """cd hooks; +python -c 'import pcmk; +try: + print(pcmk.get_property(\"maintenance-mode\")) +except pcmk.PropertyNotFound: + print(\"false\") +' +""" + class HAClusterBasicDeployment(OpenStackAmuletDeployment): @@ -151,6 +162,25 @@ class HAClusterBasicDeployment(OpenStackAmuletDeployment): password='password', tenant=self.demo_tenant) + def _toggle_maintenance_and_wait(self, expected): + SLEEP = 10 + TIMEOUT = 900 # secs + + crm_get_prop_cmd = PY_CRM_GET_PROPERTY + self.d.configure('hacluster', {'maintenance-mode': expected}) + + stime = time.time() + ha_unit = self.d.sentry['hacluster'][0] + while time.time() - stime <= TIMEOUT: + time.sleep(SLEEP) + (output, exit_code) = ha_unit.run(crm_get_prop_cmd) + if output == expected: + u.log.debug('maintenance-mode enabled: %s' % output) + break + + assert output == expected, 'maintenance-mode is: %s, expected: %s' \ + % (output, expected) + def test_910_pause_and_resume(self): """The services can be paused and resumed. """ u.log.debug('Checking pause and resume actions...') @@ -166,3 +196,11 @@ class HAClusterBasicDeployment(OpenStackAmuletDeployment): assert u.wait_on_action(action_id), "Resume action failed." assert u.status_get(unit)[0] == "active" u.log.debug('OK') + + def test_920_put_in_maintenance(self): + """Put pacemaker in maintenance mode""" + return + u.log.debug('Setting cluster in maintenance mode') + + self._toggle_maintenance_and_wait('true') + self._toggle_maintenance_and_wait('false') diff --git a/unit_tests/test_hacluster_hooks.py b/unit_tests/test_hacluster_hooks.py index f71339d..609401b 100644 --- a/unit_tests/test_hacluster_hooks.py +++ b/unit_tests/test_hacluster_hooks.py @@ -17,7 +17,7 @@ import os import sys import tempfile import unittest - +import test_utils mock_apt = mock.MagicMock() sys.modules['apt_pkg'] = mock_apt @@ -234,3 +234,48 @@ class TestCorosyncConf(unittest.TestCase): with self.assertRaises(ValueError): hooks.ha_relation_changed() + + +class TestHooks(test_utils.CharmTestCase): + TO_PATCH = [ + 'config', + 'enable_lsb_services' + ] + + def setUp(self): + super(TestHooks, self).setUp(hooks, self.TO_PATCH) + self.config.side_effect = self.test_config.get + + @mock.patch.object(hooks, 'maintenance_mode') + @mock.patch.object(hooks, 'is_leader') + @mock.patch.object(hooks, 'update_nrpe_config') + @mock.patch('pcmk.commit') + @mock.patch('pcmk.wait_for_pcmk') + @mock.patch.object(hooks, 'configure_corosync') + @mock.patch('os.mkdir') + @mock.patch('utils.config') + @mock.patch('utils.rsync') + @mock.patch('utils.mkdir') + def test_config_changed(self, mock_mkdir, mock_rsync, mock_config, + mock_os_mkdir, mock_configure_corosync, + mock_wait_for_pcmk, mock_pcmk_commit, + mock_update_nrpe_config, mock_is_leader, + mock_maintenance_mode): + + mock_config.side_effect = self.test_config.get + mock_wait_for_pcmk.return_value = True + mock_is_leader.return_value = True + hooks.config_changed() + mock_maintenance_mode.assert_not_called() + + # enable maintenance + self.test_config.set_previous('maintenance-mode', False) + self.test_config.set('maintenance-mode', True) + hooks.config_changed() + mock_maintenance_mode.assert_called_with(True) + + # disable maintenance + self.test_config.set_previous('maintenance-mode', True) + self.test_config.set('maintenance-mode', False) + hooks.config_changed() + mock_maintenance_mode.assert_called_with(False) diff --git a/unit_tests/test_hacluster_utils.py b/unit_tests/test_hacluster_utils.py index 2804d27..4cbfb79 100644 --- a/unit_tests/test_hacluster_utils.py +++ b/unit_tests/test_hacluster_utils.py @@ -354,3 +354,30 @@ class UtilsTestCase(unittest.TestCase): mock.call('systemd-overrides.conf', expected_cfg)]) mock_check_call.assert_has_calls([mock.call(['systemctl', 'daemon-reload'])]) + + @mock.patch('pcmk.set_property') + @mock.patch('pcmk.get_property') + def test_maintenance_mode(self, mock_get_property, mock_set_property): + # enable maintenance-mode + mock_get_property.return_value = 'false\n' + utils.maintenance_mode(True) + mock_get_property.assert_called_with('maintenance-mode') + mock_set_property.assert_called_with('maintenance-mode', 'true') + mock_get_property.reset_mock() + mock_set_property.reset_mock() + mock_get_property.return_value = 'true\n' + utils.maintenance_mode(True) + mock_get_property.assert_called_with('maintenance-mode') + mock_set_property.assert_not_called() + + # disable maintenance-mode + mock_get_property.return_value = 'true\n' + utils.maintenance_mode(False) + mock_get_property.assert_called_with('maintenance-mode') + mock_set_property.assert_called_with('maintenance-mode', 'false') + mock_get_property.reset_mock() + mock_set_property.reset_mock() + mock_get_property.return_value = 'false\n' + utils.maintenance_mode(False) + mock_get_property.assert_called_with('maintenance-mode') + mock_set_property.assert_not_called() diff --git a/unit_tests/test_pcmk.py b/unit_tests/test_pcmk.py index 43c5eca..a025c12 100644 --- a/unit_tests/test_pcmk.py +++ b/unit_tests/test_pcmk.py @@ -15,6 +15,61 @@ import mock import pcmk import unittest +from distutils.version import StrictVersion + + +CRM_CONFIGURE_SHOW_XML = ''' + + + + + + + + + + + + + + + + + + + + + + + +''' # noqa + +CRM_CONFIGURE_SHOW_XML_MAINT_MODE_TRUE = ''' + + + + + + + + + + + + + + + + + + + + + + + + +''' # noqa class TestPcmk(unittest.TestCase): @@ -48,3 +103,67 @@ class TestPcmk(unittest.TestCase): gethostname.return_value = 'hanode-1' getstatusoutput.return_value = (0, 'Hosname: hanode-1') self.assertTrue(pcmk.wait_for_pcmk(retries=2, sleep=0)) + + @mock.patch('subprocess.check_output') + def test_crm_version(self, mock_check_output): + # xenial + mock_check_output.return_value = "crm 2.2.0\n" + ret = pcmk.crm_version() + self.assertEqual(StrictVersion('2.2.0'), ret) + mock_check_output.assert_called_with(['crm', '--version'], + universal_newlines=True) + + # trusty + mock_check_output.mock_reset() + mock_check_output.return_value = ("1.2.5 (Build f2f315daf6a5fd7ddea8e5" + "64cd289aa04218427d)\n") + ret = pcmk.crm_version() + self.assertEqual(StrictVersion('1.2.5'), ret) + mock_check_output.assert_called_with(['crm', '--version'], + universal_newlines=True) + + @mock.patch('subprocess.check_output') + @mock.patch.object(pcmk, 'crm_version') + def test_get_property(self, mock_crm_version, mock_check_output): + mock_crm_version.return_value = StrictVersion('2.2.0') # xenial + mock_check_output.return_value = 'false\n' + self.assertEqual('false\n', pcmk.get_property('maintenance-mode')) + + mock_check_output.assert_called_with(['crm', 'configure', + 'show-property', + 'maintenance-mode'], + universal_newlines=True) + + mock_crm_version.return_value = StrictVersion('2.4.0') + mock_check_output.reset_mock() + self.assertEqual('false\n', pcmk.get_property('maintenance-mode')) + mock_check_output.assert_called_with(['crm', 'configure', + 'get-property', + 'maintenance-mode'], + universal_newlines=True) + + @mock.patch('subprocess.check_output') + @mock.patch.object(pcmk, 'crm_version') + def test_get_property_from_xml(self, mock_crm_version, mock_check_output): + mock_crm_version.return_value = StrictVersion('1.2.5') # trusty + mock_check_output.return_value = CRM_CONFIGURE_SHOW_XML + self.assertRaises(pcmk.PropertyNotFound, pcmk.get_property, + 'maintenance-mode') + + mock_check_output.assert_called_with(['crm', 'configure', + 'show', 'xml'], + universal_newlines=True) + mock_check_output.reset_mock() + mock_check_output.return_value = CRM_CONFIGURE_SHOW_XML_MAINT_MODE_TRUE + self.assertEqual('true', pcmk.get_property('maintenance-mode')) + + mock_check_output.assert_called_with(['crm', 'configure', + 'show', 'xml'], + universal_newlines=True) + + @mock.patch('subprocess.check_output') + def test_set_property(self, mock_check_output): + pcmk.set_property('maintenance-mode', 'false') + mock_check_output.assert_called_with(['crm', 'configure', 'property', + 'maintenance-mode=false'], + universal_newlines=True) diff --git a/unit_tests/test_utils.py b/unit_tests/test_utils.py new file mode 100644 index 0000000..84ba4d2 --- /dev/null +++ b/unit_tests/test_utils.py @@ -0,0 +1,152 @@ +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import logging +import unittest + +import yaml + +from contextlib import contextmanager +from mock import patch, MagicMock + + +def load_config(): + ''' + Walk backwords from __file__ looking for config.yaml, load and return the + 'options' section' + ''' + config = None + f = __file__ + while config is None: + d = os.path.dirname(f) + if os.path.isfile(os.path.join(d, 'config.yaml')): + config = os.path.join(d, 'config.yaml') + break + f = d + + if not config: + logging.error('Could not find config.yaml in any parent directory ' + 'of %s. ' % f) + raise Exception + + return yaml.safe_load(open(config).read())['options'] + + +def get_default_config(): + ''' + Load default charm config from config.yaml return as a dict. + If no default is set in config.yaml, its value is None. + ''' + default_config = {} + config = load_config() + for k, v in config.iteritems(): + if 'default' in v: + default_config[k] = v['default'] + else: + default_config[k] = None + return default_config + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super(CharmTestCase, self).setUp() + self.originals = {} + self.patches = patches + self.obj = obj + self.test_config = TestConfig() + self.test_relation = TestRelation() + self.patch_all() + + def patch(self, method): + self.originals[method] = getattr(self.obj, method) + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class TestConfig(object): + + def __init__(self): + self.config = get_default_config() + self.config_prev = {} + + def previous(self, k): + return self.config_prev[k] if k in self.config_prev else self.config[k] + + def set_previous(self, k, v): + self.config_prev[k] = v + + def unset_previous(self, k): + if k in self.config_prev: + self.config_prev.pop(k) + + def get(self, attr=None): + if not attr: + return self + try: + return self.config[attr] + except KeyError: + return None + + def get_all(self): + return self.config + + def set(self, attr, value): + if attr not in self.config: + raise KeyError + self.config[attr] = value + + def __getitem__(self, key): + return self.get(key) + + +class TestRelation(object): + + def __init__(self, relation_data={}): + self.relation_data = relation_data + + def set(self, relation_data): + self.relation_data = relation_data + + def get(self, attr=None, unit=None, rid=None): + if attr is None: + return self.relation_data + elif attr in self.relation_data: + return self.relation_data[attr] + return None + + +@contextmanager +def patch_open(): + '''Patch open() to allow mocking both open() itself and the file that is + yielded. + + Yields the mock for "open" and "file", respectively.''' + mock_open = MagicMock(spec=open) + mock_file = MagicMock(spec=file) + + @contextmanager + def stub_open(*args, **kwargs): + mock_open(*args, **kwargs) + yield mock_file + + with patch('__builtin__.open', stub_open): + yield mock_open, mock_file