From 461b4a9f291918eddfca4ac70ad8fee08f239e17 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Thu, 4 Sep 2014 11:09:07 +0100 Subject: [PATCH 01/21] Allow reconfiguration of cluster resources, enforce quorum --- config.yaml | 8 +++++++- hooks/hooks.py | 18 ++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/config.yaml b/config.yaml index 2a12023..026e08e 100644 --- a/config.yaml +++ b/config.yaml @@ -34,7 +34,13 @@ options: cluster_count: type: int default: 2 - description: Number of peer units required to bootstrap cluster services. + description: | + Number of peer units required to bootstrap cluster services. + . + If less that 3 is specified, the cluster will be configured to + ignore any quorum problems; with 3 or more units, quorum will be + enforced and services will be stopped in the event of a loss + of quorum. monitor_host: type: string description: | diff --git a/hooks/hooks.py b/hooks/hooks.py index a554224..74a8a37 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -144,10 +144,6 @@ HAMARKER = '/var/lib/juju/haconfigured' 'hanode-relation-joined', 'hanode-relation-changed') def configure_cluster(): - # Check that we are not already configured - if os.path.exists(HAMARKER): - log('HA already configured, not reconfiguring') - return # Check that we are related to a principle and that # it has already provided the required corosync configuration if not get_corosync_conf(): @@ -233,8 +229,17 @@ def configure_cluster(): log('Doing global cluster configuration') cmd = "crm configure property stonith-enabled=false" pcmk.commit(cmd) - cmd = "crm configure property no-quorum-policy=ignore" + + if int(config('cluster_count')) >= 3: + # NOTE(jamespage) if 3 or more nodes, then quorum can be + # managed effectively, so stop if quorum lost + cmd = "crm configure property no-quorum-policy=stop" + else: + # NOTE(jamespage) if less that 3 nodes, quorum not possible + # so ignore + cmd = "crm configure property no-quorum-policy=ignore" pcmk.commit(cmd) + cmd = 'crm configure rsc_defaults $id="rsc-options"' \ ' resource-stickiness="100"' pcmk.commit(cmd) @@ -361,9 +366,6 @@ def configure_cluster(): relation_set(relation_id=rel_id, clustered="yes") - with open(HAMARKER, 'w') as marker: - marker.write('done') - configure_stonith() From 9b952eb708c57a29d84dc06e9757df2a7d901a3a Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Thu, 4 Sep 2014 11:22:43 +0100 Subject: [PATCH 02/21] Ensure monitor host can also be disabled --- hooks/hooks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hooks/hooks.py b/hooks/hooks.py index 74a8a37..e10d4ef 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -257,6 +257,10 @@ def configure_cluster(): ' meta interleave="true"' pcmk.commit(cmd) pcmk.commit(cmd2) + if not monitor_host: + if pcmk.crm_opt_exists('ping'): + pcmk.commit('crm -w -F resource stop ping') + pcmk.commit('crm -w -F configure delete ping') # Only configure the cluster resources # from the oldest peer unit. From bbf8dc13e40b18be08778939fbeee1b66c039d47 Mon Sep 17 00:00:00 2001 From: James Page Date: Thu, 4 Sep 2014 10:52:46 +0000 Subject: [PATCH 03/21] Add support for deleting resources --- hooks/hooks.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index e10d4ef..48b4602 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -126,7 +126,6 @@ def config_changed(): @hooks.hook() def upgrade_charm(): install() - config_changed() def restart_corosync(): @@ -172,6 +171,13 @@ def configure_cluster(): unit, relid) is None \ else ast.literal_eval(relation_get("resources", unit, relid)) + + delete_resources = \ + [] if relation_get("delete_resources", + unit, relid) is None \ + else ast.literal_eval(relation_get("delete_resources", + unit, relid)) + resource_params = \ {} if relation_get("resource_params", unit, relid) is None \ @@ -265,6 +271,14 @@ def configure_cluster(): # Only configure the cluster resources # from the oldest peer unit. if oldest_peer(peer_units()): + log('Deleting Resources') + log(str(delete_resources)) + for res_name in delete_resources: + if pcmk.crm_opt_exists(res_name): + if pcmk.crm_res_running(res_name): + pcmk.commit('crm -w -F resource stop %s' % res_name) + pcmk.commit('crm -w -F configure delete %s' % res_name) + log('Configuring Resources') log(str(resources)) for res_name, res_type in resources.iteritems(): From a9270d6cbfb756b5f5ca66bab7d85dfd4f4b336c Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Thu, 4 Sep 2014 12:41:15 +0100 Subject: [PATCH 04/21] Ensure peer relation is up before using it --- hooks/hooks.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 48b4602..f515871 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -149,9 +149,14 @@ def configure_cluster(): log('Unable to configure corosync right now, bailing') return else: - log('Ready to form cluster - informing peers') - relation_set(relation_id=relation_ids('hanode')[0], - ready=True) + if relation_ids('hanode'): + log('Ready to form cluster - informing peers') + relation_set(relation_id=relation_ids('hanode')[0], + ready=True) + else: + log('Ready to form cluster, but not related to peers just yet') + return + # Check that there's enough nodes in order to perform the # configuration of the HA cluster if (len(get_cluster_nodes()) < From 03c981b5c579b9cb9232de3689a80a44409d8c4c Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Thu, 4 Sep 2014 13:46:47 +0100 Subject: [PATCH 05/21] Drop node entry and purge packages on stop --- hooks/hooks.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hooks/hooks.py b/hooks/hooks.py index f515871..56a6f07 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -16,6 +16,7 @@ from base64 import b64decode import maas as MAAS import pcmk import hacluster +import socket from charmhelpers.core.hookenv import ( log, @@ -37,6 +38,7 @@ from charmhelpers.core.host import ( from charmhelpers.fetch import ( apt_install, + apt_purge ) from charmhelpers.contrib.hahelpers.cluster import ( @@ -474,6 +476,13 @@ def render_template(template_name, context, template_dir=TEMPLATES_DIR): return template.render(context) +@hooks.hook() +def stop(): + cmd = 'crm -w -F node delete %s' % socket.gethostname() + pcmk.commit(cmd) + apt_purge(['corosync', 'pacemaker'], fatal=True) + + if __name__ == '__main__': try: hooks.execute(sys.argv) From 5e7c7bad84f2e016dca351d61f84aa7787d04a11 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 12:50:00 +0100 Subject: [PATCH 06/21] Pull things apart a bit --- config.yaml | 15 +++++ hooks/hooks.py | 169 +++++++++++++++++++++++++++---------------------- 2 files changed, 108 insertions(+), 76 deletions(-) diff --git a/config.yaml b/config.yaml index 026e08e..1c2a3c5 100644 --- a/config.yaml +++ b/config.yaml @@ -6,6 +6,18 @@ options: Multicast IP address to use for exchanging messages over the network. If multiple clusters are on the same bindnetaddr network, this value can be changed. + corosync_bindiface: + type: string + default: + description: | + Default network interface on which HA cluster will bind to communication + with the other members of the HA Cluster. + corosync_mcastport: + type: int + default: + description: | + Default multicast port number that will be used to communicate between + HA Cluster nodes. corosync_key: type: string default: "64RxJNcCkwo8EJYBsaacitUvbQp5AW4YolJi5/2urYZYp2jfLxY+3IUCOaAUJHPle4Yqfy+WBXO0I/6ASSAjj9jaiHVNaxmVhhjcmyBqy2vtPf+m+0VxVjUXlkTyYsODwobeDdO3SIkbIABGfjLTu29yqPTsfbvSYr6skRb9ne0=" @@ -27,9 +39,11 @@ options: parameters are properly configured in its invenvory. maas_url: type: string + default: description: MAAS API endpoint (required for STONITH). maas_credentials: type: string + default: description: MAAS credentials (required for STONITH). cluster_count: type: int @@ -43,6 +57,7 @@ options: of quorum. monitor_host: type: string + default: description: | One or more IPs, separated by space, that will be used as a saftey check for avoiding split brain situations. Nodes in the cluster will ping these diff --git a/hooks/hooks.py b/hooks/hooks.py index 56a6f07..86db18e 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -34,6 +34,8 @@ from charmhelpers.core.host import ( service_start, service_restart, service_running, + write_file, + mkdir ) from charmhelpers.fetch import ( @@ -53,10 +55,10 @@ hooks = Hooks() def install(): apt_install(['corosync', 'pacemaker', 'python-netaddr', 'ipmitool'], fatal=True) - # XXX rbd OCF only included with newer versions of ceph-resource-agents. - # Bundle /w charm until we figure out a better way to install it. - if not os.path.exists('/usr/lib/ocf/resource.d/ceph'): - os.makedirs('/usr/lib/ocf/resource.d/ceph') + # NOTE(adam_g) rbd OCF only included with newer versions of + # ceph-resource-agents. Bundle /w charm until we figure out a + # better way to install it. + mkdir('/usr/lib/ocf/resource.d/ceph') if not os.path.isfile('/usr/lib/ocf/resource.d/ceph/rbd'): shutil.copy('ocf/ceph/rbd', '/usr/lib/ocf/resource.d/ceph/rbd') @@ -68,11 +70,13 @@ def get_corosync_conf(): conf = { 'corosync_bindnetaddr': hacluster.get_network_address( + config('corosync_bindiface') or relation_get('corosync_bindiface', unit, relid) ), - 'corosync_mcastport': relation_get('corosync_mcastport', - unit, relid), + 'corosync_mcastport': (config('corosync_mcastport') or + relation_get('corosync_mcastport', + unit, relid)), 'corosync_mcastaddr': config('corosync_mcastaddr'), } if None not in conf.itervalues(): @@ -83,26 +87,30 @@ def get_corosync_conf(): def emit_corosync_conf(): - # read config variables corosync_conf_context = get_corosync_conf() - # write config file (/etc/corosync/corosync.conf - with open('/etc/corosync/corosync.conf', 'w') as corosync_conf: - corosync_conf.write(render_template('corosync.conf', - corosync_conf_context)) + if corosync_conf_context: + write_file(path='/etc/corosync/corosync.conf', + content=render_template('corosync.conf', + corosync_conf_context)) + return True + else: + return False def emit_base_conf(): corosync_default_context = {'corosync_enabled': 'yes'} - # write /etc/default/corosync file - with open('/etc/default/corosync', 'w') as corosync_default: - corosync_default.write(render_template('corosync', - corosync_default_context)) + write_file(path='/etc/default/corosync', + content=render_template('corosync', + corosync_default_context)) + corosync_key = config('corosync_key') if corosync_key: - # write the authkey - with open('/etc/corosync/authkey', 'w') as corosync_key_file: - corosync_key_file.write(b64decode(corosync_key)) - os.chmod = ('/etc/corosync/authkey', 0o400) + write_file(path='/etc/corosync/authkey', + content=b64decode(corosync_key), + perms=0o400) + return True + else: + return False @hooks.hook() @@ -115,14 +123,11 @@ def config_changed(): hacluster.enable_lsb_services('pacemaker') - # Create a new config file - emit_base_conf() - - # Reconfigure the cluster if required - configure_cluster() - - # Setup fencing. - configure_stonith() + if configure_corosync(): + pcmk.wait_for_pcmk() + configure_cluster_global() + configure_monitor_host() + configure_stonith() @hooks.hook() @@ -137,14 +142,63 @@ def restart_corosync(): time.sleep(5) service_start("pacemaker") -HAMARKER = '/var/lib/juju/haconfigured' + +def configure_corosync(): + # TODO: conditional restarts + log('Configuring and restarting corosync') + if emit_base_conf() and emit_corosync_conf(): + restart_corosync() + return True + else: + return False + + +def configure_monitor_host(): + '''Configure extra monitor host for better network failure detection''' + monitor_host = config('monitor_host') + if monitor_host: + if not pcmk.crm_opt_exists('ping'): + log('Implementing monitor host configuration') + monitor_interval = config('monitor_interval') + cmd = 'crm -w -F configure primitive ping' \ + ' ocf:pacemaker:ping params host_list="%s"' \ + ' multiplier="100" op monitor interval="%s"' %\ + (monitor_host, monitor_interval) + cmd2 = 'crm -w -F configure clone cl_ping ping' \ + ' meta interleave="true"' + pcmk.commit(cmd) + pcmk.commit(cmd2) + else: + if pcmk.crm_opt_exists('ping'): + log('Disabling monitor host configuration') + pcmk.commit('crm -w -F resource stop ping') + pcmk.commit('crm -w -F configure delete ping') + + +def configure_cluster_global(): + '''Configure global cluster options''' + log('Doing global cluster configuration') + if int(config('cluster_count')) >= 3: + # NOTE(jamespage) if 3 or more nodes, then quorum can be + # managed effectively, so stop if quorum lost + cmd = "crm configure property no-quorum-policy=stop" + else: + # NOTE(jamespage) if less that 3 nodes, quorum not possible + # so ignore + cmd = "crm configure property no-quorum-policy=ignore" + pcmk.commit(cmd) + + cmd = 'crm configure rsc_defaults $id="rsc-options"' \ + ' resource-stickiness="100"' + pcmk.commit(cmd) + @hooks.hook('ha-relation-joined', 'ha-relation-changed', 'hanode-relation-joined', 'hanode-relation-changed') -def configure_cluster(): +def configure_principle_cluster_resources(): # Check that we are related to a principle and that # it has already provided the required corosync configuration if not get_corosync_conf(): @@ -232,48 +286,12 @@ def configure_cluster(): for ra in resources.itervalues()]: apt_install('ceph-resource-agents') - log('Configuring and restarting corosync') - emit_corosync_conf() - restart_corosync() - - log('Waiting for PCMK to start') + # NOTE: this should be removed in 15.04 cycle as corosync + # configuration should be set directly on subordinate + configure_corosync() pcmk.wait_for_pcmk() - - log('Doing global cluster configuration') - cmd = "crm configure property stonith-enabled=false" - pcmk.commit(cmd) - - if int(config('cluster_count')) >= 3: - # NOTE(jamespage) if 3 or more nodes, then quorum can be - # managed effectively, so stop if quorum lost - cmd = "crm configure property no-quorum-policy=stop" - else: - # NOTE(jamespage) if less that 3 nodes, quorum not possible - # so ignore - cmd = "crm configure property no-quorum-policy=ignore" - pcmk.commit(cmd) - - cmd = 'crm configure rsc_defaults $id="rsc-options"' \ - ' resource-stickiness="100"' - pcmk.commit(cmd) - - # Configure Ping service - monitor_host = config('monitor_host') - if monitor_host: - if not pcmk.crm_opt_exists('ping'): - monitor_interval = config('monitor_interval') - cmd = 'crm -w -F configure primitive ping' \ - ' ocf:pacemaker:ping params host_list="%s"' \ - ' multiplier="100" op monitor interval="%s"' %\ - (monitor_host, monitor_interval) - cmd2 = 'crm -w -F configure clone cl_ping ping' \ - ' meta interleave="true"' - pcmk.commit(cmd) - pcmk.commit(cmd2) - if not monitor_host: - if pcmk.crm_opt_exists('ping'): - pcmk.commit('crm -w -F resource stop ping') - pcmk.commit('crm -w -F configure delete ping') + configure_cluster_global() + configure_monitor_host() # Only configure the cluster resources # from the oldest peer unit. @@ -313,7 +331,7 @@ def configure_cluster(): resource_params[res_name]) pcmk.commit(cmd) log('%s' % cmd) - if monitor_host: + if config('monitor_host'): cmd = 'crm -F configure location Ping-%s %s rule' \ ' -inf: pingd lte 0' % (res_name, res_name) pcmk.commit(cmd) @@ -395,11 +413,10 @@ def configure_cluster(): def configure_stonith(): - if config('stonith_enabled') not in ['true', 'True']: - return - - if not os.path.exists(HAMARKER): - log('HA not yet configured, skipping STONITH config.') + if config('stonith_enabled') not in ['true', 'True', True]: + log('Disabling stonith') + cmd = "crm configure property stonith-enabled=false" + pcmk.commit(cmd) return log('Configuring STONITH for all nodes in cluster.') From 15432d47d9bebe53ce90f9851cd7e77590bd85bc Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 13:03:22 +0100 Subject: [PATCH 07/21] Fixup stuff --- hooks/hooks.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 86db18e..33a5f70 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -64,25 +64,31 @@ def install(): def get_corosync_conf(): - conf = {} + # NOTE(jamespage) use local charm configuration over any provided by + # principle charm + conf = { + 'corosync_bindnetaddr': \ + hacluster.get_network_address(config('corosync_bindiface')), + 'corosync_mcastport': config('corosync_mcastport'), + 'corosync_mcastaddr': config('corosync_mcastaddr'), + } + if None not in conf.itervalues(): + return conf for relid in relation_ids('ha'): for unit in related_units(relid): conf = { - 'corosync_bindnetaddr': - hacluster.get_network_address( - config('corosync_bindiface') or - relation_get('corosync_bindiface', - unit, relid) + 'corosync_bindnetaddr': \ + hacluster.get_network_address(relation_get('corosync_bindiface', + unit, relid) ), - 'corosync_mcastport': (config('corosync_mcastport') or - relation_get('corosync_mcastport', - unit, relid)), + 'corosync_mcastport': relation_get('corosync_mcastport', + unit, relid), 'corosync_mcastaddr': config('corosync_mcastaddr'), } if None not in conf.itervalues(): return conf missing = [k for k, v in conf.iteritems() if v is None] - log('Missing required principle configuration: %s' % missing) + log('Missing required configuration: %s' % missing) return None From 3ad20c69be2807a5bca4d1c3cdef6488aba67864 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 13:09:54 +0100 Subject: [PATCH 08/21] Implement conditional restarts --- hooks/hooks.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 33a5f70..13a3b0b 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -35,7 +35,8 @@ from charmhelpers.core.host import ( service_restart, service_running, write_file, - mkdir + mkdir, + file_hash ) from charmhelpers.fetch import ( @@ -149,14 +150,33 @@ def restart_corosync(): service_start("pacemaker") +COROSYNC_CONF_FILES = [ + '/etc/default/corosync', + '/etc/corosync/authkey', + '/etc/corosync/corosync.conf' +] + + +def restart_corosync_on_change(): + '''Simple decorator to restart corosync if any of its config changes''' + def wrap(f): + def wrapped_f(*args): + checksums = {} + for path in COROSYNC_CONF_FILES: + checksums[path] = file_hash(path) + f(*args) + for path in COROSYNC_CONF_FILES: + if checksums[path] != file_hash(path): + restart_corosync() + break + return wrapped_f + return wrap + + +@restart_corosync_on_change() def configure_corosync(): - # TODO: conditional restarts - log('Configuring and restarting corosync') - if emit_base_conf() and emit_corosync_conf(): - restart_corosync() - return True - else: - return False + log('Configuring and (maybe) restarting corosync') + return emit_base_conf() and emit_corosync_conf() def configure_monitor_host(): From 5384059968e44bb8e7319d6e7740162b5298e9f2 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 13:15:56 +0100 Subject: [PATCH 09/21] Fixup problem with missing principle units --- hooks/hooks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 13a3b0b..ba53236 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -250,7 +250,11 @@ def configure_principle_cluster_resources(): if len(relids) == 1: # Should only ever be one of these # Obtain relation information relid = relids[0] - unit = related_units(relid)[0] + units = related_units(relid) + if len(units) < 1: + log('No principle unit found, bailing') + return + unit = units[0] log('Using rid {} unit {}'.format(relid, unit)) import ast resources = \ From 707a3e1baeb91350aa221058c98572fdade16553 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:13:50 +0100 Subject: [PATCH 10/21] Always reconfigure monitor host if set --- hooks/hooks.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index ba53236..371318e 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -181,19 +181,19 @@ def configure_corosync(): def configure_monitor_host(): '''Configure extra monitor host for better network failure detection''' + log('Checking monitor host configuration') monitor_host = config('monitor_host') if monitor_host: - if not pcmk.crm_opt_exists('ping'): - log('Implementing monitor host configuration') - monitor_interval = config('monitor_interval') - cmd = 'crm -w -F configure primitive ping' \ - ' ocf:pacemaker:ping params host_list="%s"' \ - ' multiplier="100" op monitor interval="%s"' %\ - (monitor_host, monitor_interval) - cmd2 = 'crm -w -F configure clone cl_ping ping' \ - ' meta interleave="true"' - pcmk.commit(cmd) - pcmk.commit(cmd2) + log('Implementing monitor host configuration') + monitor_interval = config('monitor_interval') + cmd = 'crm -w -F configure primitive ping' \ + ' ocf:pacemaker:ping params host_list="%s"' \ + ' multiplier="100" op monitor interval="%s"' %\ + (monitor_host, monitor_interval) + pcmk.commit(cmd) + cmd = 'crm -w -F configure clone cl_ping ping' \ + ' meta interleave="true"' + pcmk.commit(cmd) else: if pcmk.crm_opt_exists('ping'): log('Disabling monitor host configuration') From 1ceb74267d76ac064d1317b3e79d525abde6e759 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:15:33 +0100 Subject: [PATCH 11/21] Sort out wrapper --- hooks/hooks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 371318e..8f2e853 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -164,11 +164,12 @@ def restart_corosync_on_change(): checksums = {} for path in COROSYNC_CONF_FILES: checksums[path] = file_hash(path) - f(*args) + return_data = f(*args) for path in COROSYNC_CONF_FILES: if checksums[path] != file_hash(path): restart_corosync() break + return return_data return wrapped_f return wrap From 9d95db67e5a2ac9440be59df6cb66b20f2d0a190 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:24:04 +0100 Subject: [PATCH 12/21] Tidy + add monitor host reconfigure --- hooks/hooks.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 8f2e853..8051b68 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -185,16 +185,21 @@ def configure_monitor_host(): log('Checking monitor host configuration') monitor_host = config('monitor_host') if monitor_host: - log('Implementing monitor host configuration') - monitor_interval = config('monitor_interval') - cmd = 'crm -w -F configure primitive ping' \ - ' ocf:pacemaker:ping params host_list="%s"' \ - ' multiplier="100" op monitor interval="%s"' %\ - (monitor_host, monitor_interval) - pcmk.commit(cmd) - cmd = 'crm -w -F configure clone cl_ping ping' \ - ' meta interleave="true"' - pcmk.commit(cmd) + if not pcmk.crm_opt_exists('ping'): + log('Implementing monitor host configuration') + monitor_interval = config('monitor_interval') + cmd = 'crm -w -F configure primitive ping' \ + ' ocf:pacemaker:ping params host_list="%s"' \ + ' multiplier="100" op monitor interval="%s"' %\ + (monitor_host, monitor_interval) + pcmk.commit(cmd) + cmd = 'crm -w -F configure clone cl_ping ping' \ + ' meta interleave="true"' + pcmk.commit(cmd) + else: + log('Reconfiguring monitor host configuration') + cmd = 'crm -w -F resource param ping set host_list="%s"' %\ + monitor_host else: if pcmk.crm_opt_exists('ping'): log('Disabling monitor host configuration') @@ -208,10 +213,12 @@ def configure_cluster_global(): if int(config('cluster_count')) >= 3: # NOTE(jamespage) if 3 or more nodes, then quorum can be # managed effectively, so stop if quorum lost + log('Configuring no-quorum-policy to stop') cmd = "crm configure property no-quorum-policy=stop" else: # NOTE(jamespage) if less that 3 nodes, quorum not possible # so ignore + log('Configuring no-quorum-policy to ignore') cmd = "crm configure property no-quorum-policy=ignore" pcmk.commit(cmd) @@ -220,7 +227,6 @@ def configure_cluster_global(): pcmk.commit(cmd) - @hooks.hook('ha-relation-joined', 'ha-relation-changed', 'hanode-relation-joined', From 8064a744cd5056f0117968cf5688a79335111f70 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:26:06 +0100 Subject: [PATCH 13/21] Tidy + add monitor host reconfigure --- hooks/hooks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 8051b68..2b2398d 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -186,7 +186,8 @@ def configure_monitor_host(): monitor_host = config('monitor_host') if monitor_host: if not pcmk.crm_opt_exists('ping'): - log('Implementing monitor host configuration') + log('Implementing monitor host' + ' configuration (monitor host: %s)' % monitor_host) monitor_interval = config('monitor_interval') cmd = 'crm -w -F configure primitive ping' \ ' ocf:pacemaker:ping params host_list="%s"' \ @@ -197,7 +198,8 @@ def configure_monitor_host(): ' meta interleave="true"' pcmk.commit(cmd) else: - log('Reconfiguring monitor host configuration') + log('Reconfiguring monitor host' + ' configuration (monitor host: %s)' % monitor_host) cmd = 'crm -w -F resource param ping set host_list="%s"' %\ monitor_host else: From f7cd7e23d91c46082e293467de7d7d9594497c23 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:27:37 +0100 Subject: [PATCH 14/21] More tidy --- hooks/hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 2b2398d..72fb3ee 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -187,7 +187,7 @@ def configure_monitor_host(): if monitor_host: if not pcmk.crm_opt_exists('ping'): log('Implementing monitor host' - ' configuration (monitor host: %s)' % monitor_host) + ' configuration (host: %s)' % monitor_host) monitor_interval = config('monitor_interval') cmd = 'crm -w -F configure primitive ping' \ ' ocf:pacemaker:ping params host_list="%s"' \ @@ -199,7 +199,7 @@ def configure_monitor_host(): pcmk.commit(cmd) else: log('Reconfiguring monitor host' - ' configuration (monitor host: %s)' % monitor_host) + ' configuration (host: %s)' % monitor_host) cmd = 'crm -w -F resource param ping set host_list="%s"' %\ monitor_host else: From e7f5bc6a3381e3c87afc50467caa64f921ea47e1 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:31:32 +0100 Subject: [PATCH 15/21] Do all global stuff in one place --- hooks/hooks.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 72fb3ee..592f497 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -211,7 +211,7 @@ def configure_monitor_host(): def configure_cluster_global(): '''Configure global cluster options''' - log('Doing global cluster configuration') + log('Applying global cluster configuration') if int(config('cluster_count')) >= 3: # NOTE(jamespage) if 3 or more nodes, then quorum can be # managed effectively, so stop if quorum lost @@ -331,6 +331,7 @@ def configure_principle_cluster_resources(): pcmk.wait_for_pcmk() configure_cluster_global() configure_monitor_host() + configure_stonith() # Only configure the cluster resources # from the oldest peer unit. @@ -448,8 +449,6 @@ def configure_principle_cluster_resources(): relation_set(relation_id=rel_id, clustered="yes") - configure_stonith() - def configure_stonith(): if config('stonith_enabled') not in ['true', 'True', True]: From c87fab04eb79c802d4505a2d8c6ec7dc1ff0c5a5 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:33:04 +0100 Subject: [PATCH 16/21] Indent-astic --- hooks/hooks.py | 83 +++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 592f497..06d05c8 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -452,53 +452,52 @@ def configure_principle_cluster_resources(): def configure_stonith(): if config('stonith_enabled') not in ['true', 'True', True]: - log('Disabling stonith') + log('Disabling STONITH') cmd = "crm configure property stonith-enabled=false" pcmk.commit(cmd) - return - - log('Configuring STONITH for all nodes in cluster.') - # configure stontih resources for all nodes in cluster. - # note: this is totally provider dependent and requires - # access to the MAAS API endpoint, using endpoint and credentials - # set in config. - url = config('maas_url') - creds = config('maas_credentials') - if None in [url, creds]: - log('maas_url and maas_credentials must be set' - ' in config to enable STONITH.') - sys.exit(1) - - maas = MAAS.MAASHelper(url, creds) - nodes = maas.list_nodes() - if not nodes: - log('Could not obtain node inventory from ' - 'MAAS @ %s.' % url) - sys.exit(1) - - cluster_nodes = pcmk.list_nodes() - for node in cluster_nodes: - rsc, constraint = pcmk.maas_stonith_primitive(nodes, node) - if not rsc: - log('Failed to determine STONITH primitive for node' - ' %s' % node) + else: + log('Enabling STONITH for all nodes in cluster.') + # configure stontih resources for all nodes in cluster. + # note: this is totally provider dependent and requires + # access to the MAAS API endpoint, using endpoint and credentials + # set in config. + url = config('maas_url') + creds = config('maas_credentials') + if None in [url, creds]: + log('maas_url and maas_credentials must be set' + ' in config to enable STONITH.') sys.exit(1) - rsc_name = str(rsc).split(' ')[1] - if not pcmk.is_resource_present(rsc_name): - log('Creating new STONITH primitive %s.' % - rsc_name) - cmd = 'crm -F configure %s' % rsc - pcmk.commit(cmd) - if constraint: - cmd = 'crm -F configure %s' % constraint - pcmk.commit(cmd) - else: - log('STONITH primitive already exists ' - 'for node.') + maas = MAAS.MAASHelper(url, creds) + nodes = maas.list_nodes() + if not nodes: + log('Could not obtain node inventory from ' + 'MAAS @ %s.' % url) + sys.exit(1) - cmd = "crm configure property stonith-enabled=true" - pcmk.commit(cmd) + cluster_nodes = pcmk.list_nodes() + for node in cluster_nodes: + rsc, constraint = pcmk.maas_stonith_primitive(nodes, node) + if not rsc: + log('Failed to determine STONITH primitive for node' + ' %s' % node) + sys.exit(1) + + rsc_name = str(rsc).split(' ')[1] + if not pcmk.is_resource_present(rsc_name): + log('Creating new STONITH primitive %s.' % + rsc_name) + cmd = 'crm -F configure %s' % rsc + pcmk.commit(cmd) + if constraint: + cmd = 'crm -F configure %s' % constraint + pcmk.commit(cmd) + else: + log('STONITH primitive already exists ' + 'for node.') + + cmd = "crm configure property stonith-enabled=true" + pcmk.commit(cmd) def get_cluster_nodes(): From cf9ccc39ac804f2ad5e52e00ddefbb0601fc75d1 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Tue, 23 Sep 2014 14:41:02 +0100 Subject: [PATCH 17/21] Tidy log data --- hooks/hooks.py | 78 ++++++++++++++++---------------------------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 06d05c8..888cd7a 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -7,6 +7,7 @@ # Andres Rodriguez # +import ast import shutil import sys import time @@ -229,6 +230,15 @@ def configure_cluster_global(): pcmk.commit(cmd) +def parse_data(relid, unit, key): + '''Simple helper to ast parse relation data''' + data = relation_get(key, unit, relid) + if data: + return ast.literal_eval(data) + else: + return {} + + @hooks.hook('ha-relation-joined', 'ha-relation-changed', 'hanode-relation-joined', @@ -237,7 +247,7 @@ def configure_principle_cluster_resources(): # Check that we are related to a principle and that # it has already provided the required corosync configuration if not get_corosync_conf(): - log('Unable to configure corosync right now, bailing') + log('Unable to configure corosync right now, deferring configuration') return else: if relation_ids('hanode'): @@ -252,7 +262,7 @@ def configure_principle_cluster_resources(): # configuration of the HA cluster if (len(get_cluster_nodes()) < int(config('cluster_count'))): - log('Not enough nodes in cluster, bailing') + log('Not enough nodes in cluster, deferring configuration') return relids = relation_ids('ha') @@ -261,59 +271,20 @@ def configure_principle_cluster_resources(): relid = relids[0] units = related_units(relid) if len(units) < 1: - log('No principle unit found, bailing') + log('No principle unit found, deferring configuration') return unit = units[0] - log('Using rid {} unit {}'.format(relid, unit)) - import ast - resources = \ - {} if relation_get("resources", - unit, relid) is None \ - else ast.literal_eval(relation_get("resources", - unit, relid)) - - delete_resources = \ - [] if relation_get("delete_resources", - unit, relid) is None \ - else ast.literal_eval(relation_get("delete_resources", - unit, relid)) - - resource_params = \ - {} if relation_get("resource_params", - unit, relid) is None \ - else ast.literal_eval(relation_get("resource_params", - unit, relid)) - groups = \ - {} if relation_get("groups", - unit, relid) is None \ - else ast.literal_eval(relation_get("groups", - unit, relid)) - ms = \ - {} if relation_get("ms", - unit, relid) is None \ - else ast.literal_eval(relation_get("ms", - unit, relid)) - orders = \ - {} if relation_get("orders", - unit, relid) is None \ - else ast.literal_eval(relation_get("orders", - unit, relid)) - colocations = \ - {} if relation_get("colocations", - unit, relid) is None \ - else ast.literal_eval(relation_get("colocations", - unit, relid)) - clones = \ - {} if relation_get("clones", - unit, relid) is None \ - else ast.literal_eval(relation_get("clones", - unit, relid)) - init_services = \ - {} if relation_get("init_services", - unit, relid) is None \ - else ast.literal_eval(relation_get("init_services", - unit, relid)) - + log('Parsing cluster configuration' + ' using rid: {}, unit: {}'.format(relid, unit)) + resources = parse_data(relid, unit, 'resources') + delete_resources = parse_data(relid, unit, 'delete_resources') + resource_params = parse_data(relid, unit, 'resource_params') + groups = parse_data(relid, unit, 'groups') + ms = parse_data(relid, unit, 'ms') + orders = parse_data(relid, unit, 'orders') + colocations = parse_data(relid, unit, 'colocations') + clones = parse_data(relid, unit, 'clones') + init_services = parse_data(relid, unit, 'init_services') else: log('Related to {} ha services'.format(len(relids))) return @@ -340,6 +311,7 @@ def configure_principle_cluster_resources(): log(str(delete_resources)) for res_name in delete_resources: if pcmk.crm_opt_exists(res_name): + log('Stopping and deleting resource %s' % res_name) if pcmk.crm_res_running(res_name): pcmk.commit('crm -w -F resource stop %s' % res_name) pcmk.commit('crm -w -F configure delete %s' % res_name) From e550a2fe9149caee6e470ff1ae2f9a8a42083324 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Wed, 24 Sep 2014 09:54:02 +0100 Subject: [PATCH 18/21] Fixup restarts of corosync to early in process --- hooks/hooks.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 888cd7a..55fe0d4 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -166,10 +166,14 @@ def restart_corosync_on_change(): for path in COROSYNC_CONF_FILES: checksums[path] = file_hash(path) return_data = f(*args) - for path in COROSYNC_CONF_FILES: - if checksums[path] != file_hash(path): - restart_corosync() - break + # NOTE: this assumes that this call is always done around + # configure_corosync, which returns true if configuration + # files where actually generated + if return_data: + for path in COROSYNC_CONF_FILES: + if checksums[path] != file_hash(path): + restart_corosync() + break return return_data return wrapped_f return wrap From 5924a2c31250fe322a1dd554e351b6c30673dd40 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Wed, 24 Sep 2014 10:22:00 +0100 Subject: [PATCH 19/21] Tidy lint --- hooks/hooks.py | 41 +++++++++++++++++++++++------------------ hooks/pcmk.py | 1 - 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/hooks/hooks.py b/hooks/hooks.py index 55fe0d4..b28f5c1 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -52,11 +52,22 @@ from charmhelpers.contrib.hahelpers.cluster import ( hooks = Hooks() +COROSYNC_CONF = '/etc/corosync/corosync.conf' +COROSYNC_DEFAULT = '/etc/default/corosync' +COROSYNC_AUTHKEY = '/etc/corosync/authkey' + +COROSYNC_CONF_FILES = [ + COROSYNC_DEFAULT, + COROSYNC_AUTHKEY, + COROSYNC_CONF +] + +PACKAGES = ['corosync', 'pacemaker', 'python-netaddr', 'ipmitool'] + @hooks.hook() def install(): - apt_install(['corosync', 'pacemaker', 'python-netaddr', 'ipmitool'], - fatal=True) + apt_install(PACKAGES, fatal=True) # NOTE(adam_g) rbd OCF only included with newer versions of # ceph-resource-agents. Bundle /w charm until we figure out a # better way to install it. @@ -69,7 +80,7 @@ def get_corosync_conf(): # NOTE(jamespage) use local charm configuration over any provided by # principle charm conf = { - 'corosync_bindnetaddr': \ + 'corosync_bindnetaddr': hacluster.get_network_address(config('corosync_bindiface')), 'corosync_mcastport': config('corosync_mcastport'), 'corosync_mcastaddr': config('corosync_mcastaddr'), @@ -79,10 +90,11 @@ def get_corosync_conf(): for relid in relation_ids('ha'): for unit in related_units(relid): conf = { - 'corosync_bindnetaddr': \ - hacluster.get_network_address(relation_get('corosync_bindiface', - unit, relid) - ), + 'corosync_bindnetaddr': + hacluster.get_network_address( + relation_get('corosync_bindiface', + unit, relid) + ), 'corosync_mcastport': relation_get('corosync_mcastport', unit, relid), 'corosync_mcastaddr': config('corosync_mcastaddr'), @@ -97,7 +109,7 @@ def get_corosync_conf(): def emit_corosync_conf(): corosync_conf_context = get_corosync_conf() if corosync_conf_context: - write_file(path='/etc/corosync/corosync.conf', + write_file(path=COROSYNC_CONF, content=render_template('corosync.conf', corosync_conf_context)) return True @@ -107,13 +119,13 @@ def emit_corosync_conf(): def emit_base_conf(): corosync_default_context = {'corosync_enabled': 'yes'} - write_file(path='/etc/default/corosync', + write_file(path=COROSYNC_DEFAULT, content=render_template('corosync', corosync_default_context)) corosync_key = config('corosync_key') if corosync_key: - write_file(path='/etc/corosync/authkey', + write_file(path=COROSYNC_AUTHKEY, content=b64decode(corosync_key), perms=0o400) return True @@ -151,13 +163,6 @@ def restart_corosync(): service_start("pacemaker") -COROSYNC_CONF_FILES = [ - '/etc/default/corosync', - '/etc/corosync/authkey', - '/etc/corosync/corosync.conf' -] - - def restart_corosync_on_change(): '''Simple decorator to restart corosync if any of its config changes''' def wrap(f): @@ -337,7 +342,7 @@ def configure_principle_cluster_resources(): # Put the services in HA, if not already done so # if not pcmk.is_resource_present(res_name): if not pcmk.crm_opt_exists(res_name): - if not res_name in resource_params: + if res_name not in resource_params: cmd = 'crm -w -F configure primitive %s %s' % (res_name, res_type) else: diff --git a/hooks/pcmk.py b/hooks/pcmk.py index 6b0a271..0e1530b 100644 --- a/hooks/pcmk.py +++ b/hooks/pcmk.py @@ -1,4 +1,3 @@ -#import lib.utils as utils import commands import subprocess import socket From 2c53f5b496f2d45555fcd1a24dc794f9510d5250 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Thu, 2 Oct 2014 20:26:09 +0100 Subject: [PATCH 20/21] Resync --- hooks/charmhelpers/contrib/network/ip.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hooks/charmhelpers/contrib/network/ip.py b/hooks/charmhelpers/contrib/network/ip.py index 9a3c2bf..17df06f 100644 --- a/hooks/charmhelpers/contrib/network/ip.py +++ b/hooks/charmhelpers/contrib/network/ip.py @@ -57,6 +57,8 @@ def get_address_in_network(network, fallback=None, fatal=False): else: if fatal: not_found_error_out() + else: + return None _validate_cidr(network) network = netaddr.IPNetwork(network) From 1e1e3c5b876f7dc5a7a5d9c3e75213adedb8a380 Mon Sep 17 00:00:00 2001 From: "james.page@ubuntu.com" <> Date: Mon, 6 Oct 2014 10:29:34 +0100 Subject: [PATCH 21/21] Fixup README --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bdeb7f4..a8e9425 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,17 @@ in order for clustering to occur - otherwise nothing actually get configured. The hacluster interface supports a number of different cluster configuration options. -## Mandatory Relation Data +## Mandatory Relation Data (deprecated) -All principle charms must provide basic corosync configuration: +Principle charms should provide basic corosync configuration: corosync\_bindiface: The network interface to use for cluster messaging. corosync\_mcastport: The multicast port to use for cluster messaging. +however, these can also be provided via configuration on the hacluster charm +itself. If configuration is provided directly to the hacluster charm, this +will be preferred over these relation options from the principle charm. + ## Resource Configuration The hacluster interface provides support for a number of different ways