452 lines
14 KiB
Python
Raw Normal View History

# Copyright 2014-2015 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2014-06-05 11:59:23 +01:00
#
# Copyright 2012 Canonical Ltd.
#
# Authors:
# James Page <james.page@ubuntu.com>
# Adam Gandelman <adamg@ubuntu.com>
#
2014-10-13 14:31:28 +01:00
"""
Helpers for clustering and determining "cluster leadership" and other
clustering-related helpers.
"""
import functools
2014-06-05 11:59:23 +01:00
import subprocess
import os
import time
2015-01-14 09:23:53 +00:00
2014-06-05 11:59:23 +01:00
from socket import gethostname as get_unit_hostname
2014-12-10 20:28:54 +00:00
import six
2014-06-05 11:59:23 +01:00
from charmhelpers.core.hookenv import (
log,
relation_ids,
related_units as relation_list,
relation_get,
config as config_get,
INFO,
DEBUG,
2014-10-13 14:31:28 +01:00
WARNING,
2014-06-05 11:59:23 +01:00
unit_get,
is_leader as juju_is_leader,
status_set,
2014-06-05 11:59:23 +01:00
)
from charmhelpers.core.host import (
modulo_distribution,
)
2015-01-14 09:23:53 +00:00
from charmhelpers.core.decorators import (
retry_on_exception,
)
2015-02-24 11:06:13 +00:00
from charmhelpers.core.strutils import (
bool_from_string,
)
2014-06-05 11:59:23 +01:00
2015-05-20 12:44:27 -07:00
DC_RESOURCE_NAME = 'DC'
2014-06-05 11:59:23 +01:00
class HAIncompleteConfig(Exception):
pass
class HAIncorrectConfig(Exception):
pass
2015-01-14 09:23:53 +00:00
class CRMResourceNotFound(Exception):
pass
2015-06-19 15:09:38 +00:00
class CRMDCNotFound(Exception):
pass
2014-10-13 14:31:28 +01:00
def is_elected_leader(resource):
"""
Returns True if the charm executing this is the elected cluster leader.
It relies on two mechanisms to determine leadership:
2015-06-19 15:09:38 +00:00
1. If juju is sufficiently new and leadership election is supported,
the is_leader command will be used.
2. If the charm is part of a corosync cluster, call corosync to
2014-10-13 14:31:28 +01:00
determine leadership.
2015-06-19 15:09:38 +00:00
3. If the charm is not part of a corosync cluster, the leader is
2014-10-13 14:31:28 +01:00
determined as being "the alive unit with the lowest unit numer". In
other words, the oldest surviving unit.
"""
2015-06-19 15:09:38 +00:00
try:
return juju_is_leader()
except NotImplementedError:
log('Juju leadership election feature not enabled'
', using fallback support',
level=WARNING)
2014-10-13 14:31:28 +01:00
if is_clustered():
if not is_crm_leader(resource):
log('Deferring action to CRM leader.', level=INFO)
return False
else:
peers = peer_units()
if peers and not oldest_peer(peers):
log('Deferring action to oldest service unit.', level=INFO)
return False
return True
2014-06-05 11:59:23 +01:00
def is_clustered():
for r_id in (relation_ids('ha') or []):
for unit in (relation_list(r_id) or []):
clustered = relation_get('clustered',
rid=r_id,
unit=unit)
if clustered:
return True
return False
2015-05-20 12:44:27 -07:00
def is_crm_dc():
"""
Determine leadership by querying the pacemaker Designated Controller
"""
cmd = ['crm', 'status']
try:
status = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
if not isinstance(status, six.text_type):
status = six.text_type(status, "utf-8")
2015-06-19 15:09:38 +00:00
except subprocess.CalledProcessError as ex:
raise CRMDCNotFound(str(ex))
2015-05-20 12:44:27 -07:00
current_dc = ''
for line in status.split('\n'):
if line.startswith('Current DC'):
# Current DC: juju-lytrusty-machine-2 (168108163) - partition with quorum
current_dc = line.split(':')[1].split()[0]
if current_dc == get_unit_hostname():
return True
2015-06-19 15:09:38 +00:00
elif current_dc == 'NONE':
raise CRMDCNotFound('Current DC: NONE')
2015-05-20 12:44:27 -07:00
return False
2015-06-19 15:09:38 +00:00
@retry_on_exception(5, base_delay=2,
exc_type=(CRMResourceNotFound, CRMDCNotFound))
2015-01-14 09:23:53 +00:00
def is_crm_leader(resource, retry=False):
2014-10-13 14:31:28 +01:00
"""
Returns True if the charm calling this is the elected corosync leader,
as returned by calling the external "crm" command.
2015-01-14 09:23:53 +00:00
We allow this operation to be retried to avoid the possibility of getting a
false negative. See LP #1396246 for more info.
2014-10-13 14:31:28 +01:00
"""
2015-05-20 12:44:27 -07:00
if resource == DC_RESOURCE_NAME:
return is_crm_dc()
2015-01-14 09:23:53 +00:00
cmd = ['crm', 'resource', 'show', resource]
2014-06-05 11:59:23 +01:00
try:
2015-01-14 09:23:53 +00:00
status = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
if not isinstance(status, six.text_type):
status = six.text_type(status, "utf-8")
2014-06-05 11:59:23 +01:00
except subprocess.CalledProcessError:
2015-01-14 09:23:53 +00:00
status = None
if status and get_unit_hostname() in status:
return True
if status and "resource %s is NOT running" % (resource) in status:
raise CRMResourceNotFound("CRM resource %s not found" % (resource))
return False
2014-06-05 11:59:23 +01:00
2014-10-13 14:31:28 +01:00
def is_leader(resource):
log("is_leader is deprecated. Please consider using is_crm_leader "
"instead.", level=WARNING)
return is_crm_leader(resource)
def peer_units(peer_relation="cluster"):
2014-06-05 11:59:23 +01:00
peers = []
2014-10-13 14:31:28 +01:00
for r_id in (relation_ids(peer_relation) or []):
2014-06-05 11:59:23 +01:00
for unit in (relation_list(r_id) or []):
peers.append(unit)
return peers
2014-10-13 14:31:28 +01:00
def peer_ips(peer_relation='cluster', addr_key='private-address'):
'''Return a dict of peers and their private-address'''
peers = {}
for r_id in relation_ids(peer_relation):
for unit in relation_list(r_id):
peers[unit] = relation_get(addr_key, rid=r_id, unit=unit)
return peers
2014-06-05 11:59:23 +01:00
def oldest_peer(peers):
2014-10-13 14:31:28 +01:00
"""Determines who the oldest peer is by comparing unit numbers."""
2014-06-05 11:59:23 +01:00
local_unit_no = int(os.getenv('JUJU_UNIT_NAME').split('/')[1])
for peer in peers:
remote_unit_no = int(peer.split('/')[1])
if remote_unit_no < local_unit_no:
return False
return True
def eligible_leader(resource):
2014-10-13 14:31:28 +01:00
log("eligible_leader is deprecated. Please consider using "
"is_elected_leader instead.", level=WARNING)
return is_elected_leader(resource)
2014-06-05 11:59:23 +01:00
def https():
'''
Determines whether enough data has been provided in configuration
or relation data to configure HTTPS
.
returns: boolean
'''
2015-02-24 11:06:13 +00:00
use_https = config_get('use-https')
if use_https and bool_from_string(use_https):
2014-06-05 11:59:23 +01:00
return True
if config_get('ssl_cert') and config_get('ssl_key'):
return True
for r_id in relation_ids('certificates'):
for unit in relation_list(r_id):
ca = relation_get('ca', rid=r_id, unit=unit)
if ca:
return True
2014-06-05 11:59:23 +01:00
for r_id in relation_ids('identity-service'):
for unit in relation_list(r_id):
2014-10-13 14:31:28 +01:00
# TODO - needs fixing for new helper as ssl_cert/key suffixes with CN
2014-06-05 11:59:23 +01:00
rel_state = [
relation_get('https_keystone', rid=r_id, unit=unit),
relation_get('ca_cert', rid=r_id, unit=unit),
]
# NOTE: works around (LP: #1203241)
if (None not in rel_state) and ('' not in rel_state):
return True
return False
2014-12-10 20:28:54 +00:00
def determine_api_port(public_port, singlenode_mode=False):
2014-06-05 11:59:23 +01:00
'''
Determine correct API server listening port based on
existence of HTTPS reverse proxy and/or haproxy.
public_port: int: standard public port for given service
2014-12-10 20:28:54 +00:00
singlenode_mode: boolean: Shuffle ports when only a single unit is present
2014-06-05 11:59:23 +01:00
returns: int: the correct listening port for the API service
'''
i = 0
2014-12-10 20:28:54 +00:00
if singlenode_mode:
i += 1
elif len(peer_units()) > 0 or is_clustered():
2014-06-05 11:59:23 +01:00
i += 1
if https():
i += 1
return public_port - (i * 10)
2014-12-10 20:28:54 +00:00
def determine_apache_port(public_port, singlenode_mode=False):
2014-06-05 11:59:23 +01:00
'''
Description: Determine correct apache listening port based on public IP +
state of the cluster.
public_port: int: standard public port for given service
2014-12-10 20:28:54 +00:00
singlenode_mode: boolean: Shuffle ports when only a single unit is present
2014-06-05 11:59:23 +01:00
returns: int: the correct listening port for the HAProxy service
'''
i = 0
2014-12-10 20:28:54 +00:00
if singlenode_mode:
i += 1
elif len(peer_units()) > 0 or is_clustered():
2014-06-05 11:59:23 +01:00
i += 1
return public_port - (i * 10)
determine_apache_port_single = functools.partial(
determine_apache_port, singlenode_mode=True)
def get_hacluster_config(exclude_keys=None):
2014-06-05 11:59:23 +01:00
'''
Obtains all relevant configuration from charm configuration required
for initiating a relation to hacluster:
ha-bindiface, ha-mcastport, vip, os-internal-hostname,
os-admin-hostname, os-public-hostname, os-access-hostname
2014-06-05 11:59:23 +01:00
param: exclude_keys: list of setting key(s) to be excluded.
2014-06-05 11:59:23 +01:00
returns: dict: A dict containing settings keyed by setting name.
raises: HAIncompleteConfig if settings are missing or incorrect.
2014-06-05 11:59:23 +01:00
'''
settings = ['ha-bindiface', 'ha-mcastport', 'vip', 'os-internal-hostname',
'os-admin-hostname', 'os-public-hostname', 'os-access-hostname']
2014-06-05 11:59:23 +01:00
conf = {}
for setting in settings:
if exclude_keys and setting in exclude_keys:
continue
2014-06-05 11:59:23 +01:00
conf[setting] = config_get(setting)
if not valid_hacluster_config():
raise HAIncorrectConfig('Insufficient or incorrect config data to '
'configure hacluster.')
2014-06-05 11:59:23 +01:00
return conf
def valid_hacluster_config():
'''
Check that either vip or dns-ha is set. If dns-ha then one of os-*-hostname
must be set.
Note: ha-bindiface and ha-macastport both have defaults and will always
be set. We only care that either vip or dns-ha is set.
:returns: boolean: valid config returns true.
raises: HAIncompatibileConfig if settings conflict.
raises: HAIncompleteConfig if settings are missing.
'''
vip = config_get('vip')
dns = config_get('dns-ha')
if not(bool(vip) ^ bool(dns)):
msg = ('HA: Either vip or dns-ha must be set but not both in order to '
'use high availability')
status_set('blocked', msg)
raise HAIncorrectConfig(msg)
# If dns-ha then one of os-*-hostname must be set
if dns:
dns_settings = ['os-internal-hostname', 'os-admin-hostname',
'os-public-hostname', 'os-access-hostname']
# At this point it is unknown if one or all of the possible
# network spaces are in HA. Validate at least one is set which is
# the minimum required.
for setting in dns_settings:
if config_get(setting):
log('DNS HA: At least one hostname is set {}: {}'
''.format(setting, config_get(setting)),
level=DEBUG)
return True
msg = ('DNS HA: At least one os-*-hostname(s) must be set to use '
'DNS HA')
status_set('blocked', msg)
raise HAIncompleteConfig(msg)
log('VIP HA: VIP is set {}'.format(vip), level=DEBUG)
return True
2014-07-24 11:28:51 +01:00
def canonical_url(configs, vip_setting='vip'):
2014-06-05 11:59:23 +01:00
'''
Returns the correct HTTP URL to this host given the state of HTTPS
configuration and hacluster.
:configs : OSTemplateRenderer: A config tempating object to inspect for
a complete https context.
2014-06-27 14:04:10 +01:00
2014-06-05 11:59:23 +01:00
:vip_setting: str: Setting in charm config that specifies
VIP address.
'''
scheme = 'http'
if 'https' in configs.complete_contexts():
scheme = 'https'
if is_clustered():
addr = config_get(vip_setting)
else:
2014-07-24 11:28:51 +01:00
addr = unit_get('private-address')
2014-06-05 11:59:23 +01:00
return '%s://%s' % (scheme, addr)
def distributed_wait(modulo=None, wait=None, operation_name='operation'):
''' Distribute operations by waiting based on modulo_distribution
If modulo and or wait are not set, check config_get for those values.
If config values are not set, default to modulo=3 and wait=30.
:param modulo: int The modulo number creates the group distribution
:param wait: int The constant time wait value
:param operation_name: string Operation name for status message
i.e. 'restart'
:side effect: Calls config_get()
:side effect: Calls log()
:side effect: Calls status_set()
:side effect: Calls time.sleep()
'''
if modulo is None:
modulo = config_get('modulo-nodes') or 3
if wait is None:
wait = config_get('known-wait') or 30
if juju_is_leader():
# The leader should never wait
calculated_wait = 0
else:
# non_zero_wait=True guarantees the non-leader who gets modulo 0
# will still wait
calculated_wait = modulo_distribution(modulo=modulo, wait=wait,
non_zero_wait=True)
msg = "Waiting {} seconds for {} ...".format(calculated_wait,
operation_name)
log(msg, DEBUG)
status_set('maintenance', msg)
time.sleep(calculated_wait)
def get_managed_services_and_ports(services, external_ports,
external_services=None,
port_conv_f=determine_apache_port_single):
"""Get the services and ports managed by this charm.
Return only the services and corresponding ports that are managed by this
charm. This excludes haproxy when there is a relation with hacluster. This
is because this charm passes responsability for stopping and starting
haproxy to hacluster.
Similarly, if a relation with hacluster exists then the ports returned by
this method correspond to those managed by the apache server rather than
haproxy.
:param services: List of services.
:type services: List[str]
:param external_ports: List of ports managed by external services.
:type external_ports: List[int]
:param external_services: List of services to be removed if ha relation is
present.
:type external_services: List[str]
:param port_conv_f: Function to apply to ports to calculate the ports
managed by services controlled by this charm.
:type port_convert_func: f()
:returns: A tuple containing a list of services first followed by a list of
ports.
:rtype: Tuple[List[str], List[int]]
"""
if external_services is None:
external_services = ['haproxy']
if relation_ids('ha'):
for svc in external_services:
try:
services.remove(svc)
except ValueError:
pass
external_ports = [port_conv_f(p) for p in external_ports]
return services, external_ports