Browse Source

Redesign cluster buildup process

In order to fix bug#1756928 the whole cluster buildup process needed to
be redesigned. The assumptions about what is_bootstrapped and clustered
meant and when to restart on configuration changed needed to be
re-evaluated.

The timing of restarts needed to be protected to avoid collisions.
Only bootstrapped hosts should go in to the
wsrep_cluster_address=gcomm:// setting. Adding or removing units should
be handled gracefully. Starting with a single unit and expanding to a
cluster must work.

This change guarantees mysqld is restarted when the configuration file
changes and meets all the above requirements. As a consequence of the redesign,
the workload status now more accurately reflects the state of the unit.

Charm-helpers sync to bring in distributed_wait fix.

Closes-Bug: #1756308
Closes-Bug: #1756928
Change-Id: I0742e6889b32201806cec6a0b5835e11a8027567
changes/94/555494/11
David Ames 2 years ago
parent
commit
801c2e7829
39 changed files with 3931 additions and 305 deletions
  1. +1
    -0
      .gitignore
  2. +2
    -1
      charm-helpers-tests.yaml
  3. +11
    -3
      charmhelpers/contrib/hahelpers/cluster.py
  4. +1
    -1
      charmhelpers/contrib/openstack/utils.py
  5. +29
    -0
      charmhelpers/contrib/storage/linux/lvm.py
  6. +67
    -19
      charmhelpers/core/hookenv.py
  7. +9
    -2
      charmhelpers/core/host.py
  8. +15
    -6
      charmhelpers/core/services/base.py
  9. +1
    -1
      config.yaml
  10. +52
    -68
      hooks/percona_hooks.py
  11. +111
    -68
      hooks/percona_utils.py
  12. +4
    -5
      templates/my.cnf
  13. +1
    -5
      templates/mysqld.cnf
  14. +51
    -1
      tests/basic_deployment.py
  15. +5
    -5
      tests/charmhelpers/contrib/openstack/amulet/deployment.py
  16. +199
    -24
      tests/charmhelpers/contrib/openstack/amulet/utils.py
  17. +55
    -0
      tests/charmhelpers/core/decorators.py
  18. +43
    -0
      tests/charmhelpers/core/files.py
  19. +132
    -0
      tests/charmhelpers/core/fstab.py
  20. +67
    -19
      tests/charmhelpers/core/hookenv.py
  21. +1028
    -0
      tests/charmhelpers/core/host.py
  22. +0
    -0
      tests/charmhelpers/core/host_factory/__init__.py
  23. +72
    -0
      tests/charmhelpers/core/host_factory/centos.py
  24. +90
    -0
      tests/charmhelpers/core/host_factory/ubuntu.py
  25. +69
    -0
      tests/charmhelpers/core/hugepage.py
  26. +72
    -0
      tests/charmhelpers/core/kernel.py
  27. +0
    -0
      tests/charmhelpers/core/kernel_factory/__init__.py
  28. +17
    -0
      tests/charmhelpers/core/kernel_factory/centos.py
  29. +13
    -0
      tests/charmhelpers/core/kernel_factory/ubuntu.py
  30. +16
    -0
      tests/charmhelpers/core/services/__init__.py
  31. +360
    -0
      tests/charmhelpers/core/services/base.py
  32. +290
    -0
      tests/charmhelpers/core/services/helpers.py
  33. +129
    -0
      tests/charmhelpers/core/strutils.py
  34. +54
    -0
      tests/charmhelpers/core/sysctl.py
  35. +93
    -0
      tests/charmhelpers/core/templating.py
  36. +520
    -0
      tests/charmhelpers/core/unitdata.py
  37. +25
    -0
      tests/charmhelpers/osplatform.py
  38. +71
    -38
      unit_tests/test_percona_hooks.py
  39. +156
    -39
      unit_tests/test_percona_utils.py

+ 1
- 0
.gitignore View File

@ -9,4 +9,5 @@ __pycache__
.testrepository
.tox
.stestr
.unit-state.db
func-results.json

+ 2
- 1
charm-helpers-tests.yaml View File

@ -3,4 +3,5 @@ destination: tests/charmhelpers
include:
- contrib.amulet
- contrib.openstack.amulet
- core.hookenv
- core
- osplatform

+ 11
- 3
charmhelpers/contrib/hahelpers/cluster.py View File

@ -371,6 +371,7 @@ def distributed_wait(modulo=None, wait=None, operation_name='operation'):
''' Distribute operations by waiting based on modulo_distribution
If modulo and or wait are not set, check config_get for those values.
If config values are not set, default to modulo=3 and wait=30.
:param modulo: int The modulo number creates the group distribution
:param wait: int The constant time wait value
@ -382,10 +383,17 @@ def distributed_wait(modulo=None, wait=None, operation_name='operation'):
:side effect: Calls time.sleep()
'''
if modulo is None:
modulo = config_get('modulo-nodes')
modulo = config_get('modulo-nodes') or 3
if wait is None:
wait = config_get('known-wait')
calculated_wait = modulo_distribution(modulo=modulo, wait=wait)
wait = config_get('known-wait') or 30
if juju_is_leader():
# The leader should never wait
calculated_wait = 0
else:
# non_zero_wait=True guarantees the non-leader who gets modulo 0
# will still wait
calculated_wait = modulo_distribution(modulo=modulo, wait=wait,
non_zero_wait=True)
msg = "Waiting {} seconds for {} ...".format(calculated_wait,
operation_name)
log(msg, DEBUG)


+ 1
- 1
charmhelpers/contrib/openstack/utils.py View File

@ -182,7 +182,7 @@ SWIFT_CODENAMES = OrderedDict([
('pike',
['2.13.0', '2.15.0']),
('queens',
['2.16.0']),
['2.16.0', '2.17.0']),
])
# >= Liberty version->codename mapping


+ 29
- 0
charmhelpers/contrib/storage/linux/lvm.py View File

@ -151,3 +151,32 @@ def extend_logical_volume_by_device(lv_name, block_device):
'''
cmd = ['lvextend', lv_name, block_device]
check_call(cmd)
def create_logical_volume(lv_name, volume_group, size=None):
'''
Create a new logical volume in an existing volume group
:param lv_name: str: name of logical volume to be created.
:param volume_group: str: Name of volume group to use for the new volume.
:param size: str: Size of logical volume to create (100% if not supplied)
:raises subprocess.CalledProcessError: in the event that the lvcreate fails.
'''
if size:
check_call([
'lvcreate',
'--yes',
'-L',
'{}'.format(size),
'-n', lv_name, volume_group
])
# create the lv with all the space available, this is needed because the
# system call is different for LVM
else:
check_call([
'lvcreate',
'--yes',
'-l',
'100%FREE',
'-n', lv_name, volume_group
])

+ 67
- 19
charmhelpers/core/hookenv.py View File

@ -27,6 +27,7 @@ import glob
import os
import json
import yaml
import re
import subprocess
import sys
import errno
@ -67,7 +68,7 @@ def cached(func):
@wraps(func)
def wrapper(*args, **kwargs):
global cache
key = str((func, args, kwargs))
key = json.dumps((func, args, kwargs), sort_keys=True, default=str)
try:
return cache[key]
except KeyError:
@ -1043,7 +1044,6 @@ def juju_version():
universal_newlines=True).strip()
@cached
def has_juju_version(minimum_version):
"""Return True if the Juju version is at least the provided version"""
return LooseVersion(juju_version()) >= LooseVersion(minimum_version)
@ -1103,6 +1103,8 @@ def _run_atexit():
@translate_exc(from_exc=OSError, to_exc=NotImplementedError)
def network_get_primary_address(binding):
'''
Deprecated since Juju 2.3; use network_get()
Retrieve the primary network address for a named binding
:param binding: string. The name of a relation of extra-binding
@ -1123,7 +1125,6 @@ def network_get_primary_address(binding):
return response
@translate_exc(from_exc=OSError, to_exc=NotImplementedError)
def network_get(endpoint, relation_id=None):
"""
Retrieve the network details for a relation endpoint
@ -1131,24 +1132,20 @@ def network_get(endpoint, relation_id=None):
:param endpoint: string. The name of a relation endpoint
:param relation_id: int. The ID of the relation for the current context.
:return: dict. The loaded YAML output of the network-get query.
:raise: NotImplementedError if run on Juju < 2.1
:raise: NotImplementedError if request not supported by the Juju version.
"""
if not has_juju_version('2.2'):
raise NotImplementedError(juju_version()) # earlier versions require --primary-address
if relation_id and not has_juju_version('2.3'):
raise NotImplementedError # 2.3 added the -r option
cmd = ['network-get', endpoint, '--format', 'yaml']
if relation_id:
cmd.append('-r')
cmd.append(relation_id)
try:
response = subprocess.check_output(
cmd,
stderr=subprocess.STDOUT).decode('UTF-8').strip()
except CalledProcessError as e:
# Early versions of Juju 2.0.x required the --primary-address argument.
# We catch that condition here and raise NotImplementedError since
# the requested semantics are not available - the caller can then
# use the network_get_primary_address() method instead.
if '--primary-address is currently required' in e.output.decode('UTF-8'):
raise NotImplementedError
raise
response = subprocess.check_output(
cmd,
stderr=subprocess.STDOUT).decode('UTF-8').strip()
return yaml.safe_load(response)
@ -1204,9 +1201,23 @@ def iter_units_for_relation_name(relation_name):
def ingress_address(rid=None, unit=None):
"""
Retrieve the ingress-address from a relation when available. Otherwise,
return the private-address. This function is to be used on the consuming
side of the relation.
Retrieve the ingress-address from a relation when available.
Otherwise, return the private-address.
When used on the consuming side of the relation (unit is a remote
unit), the ingress-address is the IP address that this unit needs
to use to reach the provided service on the remote unit.
When used on the providing side of the relation (unit == local_unit()),
the ingress-address is the IP address that is advertised to remote
units on this relation. Remote units need to use this address to
reach the local provided service on this unit.
Note that charms may document some other method to use in
preference to the ingress_address(), such as an address provided
on a different relation attribute or a service discovery mechanism.
This allows charms to redirect inbound connections to their peers
or different applications such as load balancers.
Usage:
addresses = [ingress_address(rid=u.rid, unit=u.unit)
@ -1220,3 +1231,40 @@ def ingress_address(rid=None, unit=None):
settings = relation_get(rid=rid, unit=unit)
return (settings.get('ingress-address') or
settings.get('private-address'))
def egress_subnets(rid=None, unit=None):
"""
Retrieve the egress-subnets from a relation.
This function is to be used on the providing side of the
relation, and provides the ranges of addresses that client
connections may come from. The result is uninteresting on
the consuming side of a relation (unit == local_unit()).
Returns a stable list of subnets in CIDR format.
eg. ['192.168.1.0/24', '2001::F00F/128']
If egress-subnets is not available, falls back to using the published
ingress-address, or finally private-address.
:param rid: string relation id
:param unit: string unit name
:side effect: calls relation_get
:return: list of subnets in CIDR format. eg. ['192.168.1.0/24', '2001::F00F/128']
"""
def _to_range(addr):
if re.search(r'^(?:\d{1,3}\.){3}\d{1,3}$', addr) is not None:
addr += '/32'
elif ':' in addr and '/' not in addr: # IPv6
addr += '/128'
return addr
settings = relation_get(rid=rid, unit=unit)
if 'egress-subnets' in settings:
return [n.strip() for n in settings['egress-subnets'].split(',') if n.strip()]
if 'ingress-address' in settings:
return [_to_range(settings['ingress-address'])]
if 'private-address' in settings:
return [_to_range(settings['private-address'])]
return [] # Should never happen

+ 9
- 2
charmhelpers/core/host.py View File

@ -993,7 +993,7 @@ def updatedb(updatedb_text, new_path):
return output
def modulo_distribution(modulo=3, wait=30):
def modulo_distribution(modulo=3, wait=30, non_zero_wait=False):
""" Modulo distribution
This helper uses the unit number, a modulo value and a constant wait time
@ -1015,7 +1015,14 @@ def modulo_distribution(modulo=3, wait=30):
@param modulo: int The modulo number creates the group distribution
@param wait: int The constant time wait value
@param non_zero_wait: boolean Override unit % modulo == 0,
return modulo * wait. Used to avoid collisions with
leader nodes which are often given priority.
@return: int Calculated time to wait for unit operation
"""
unit_number = int(local_unit().split('/')[1])
return (unit_number % modulo) * wait
calculated_wait_time = (unit_number % modulo) * wait
if non_zero_wait and calculated_wait_time == 0:
return modulo * wait
else:
return calculated_wait_time

+ 15
- 6
charmhelpers/core/services/base.py View File

@ -313,17 +313,26 @@ class PortManagerCallback(ManagerCallback):
with open(port_file) as fp:
old_ports = fp.read().split(',')
for old_port in old_ports:
if bool(old_port):
old_port = int(old_port)
if old_port not in new_ports:
hookenv.close_port(old_port)
if bool(old_port) and not self.ports_contains(old_port, new_ports):
hookenv.close_port(old_port)
with open(port_file, 'w') as fp:
fp.write(','.join(str(port) for port in new_ports))
for port in new_ports:
# A port is either a number or 'ICMP'
protocol = 'TCP'
if str(port).upper() == 'ICMP':
protocol = 'ICMP'
if event_name == 'start':
hookenv.open_port(port)
hookenv.open_port(port, protocol)
elif event_name == 'stop':
hookenv.close_port(port)
hookenv.close_port(port, protocol)
def ports_contains(self, port, ports):
if not bool(port):
return False
if str(port).upper() != 'ICMP':
port = int(port)
return port in ports
def service_stop(service_name):


+ 1
- 1
config.yaml View File

@ -317,4 +317,4 @@ options:
For very busy clouds or in resource restricted environments this value can be changed.
WARNING Please read all documentation before changing the default value which may have
unintended consequences. It may be necessary to set this value higher during deploy time
(PTS15) and subsequently change it back to the default (PT3S) after deployment.
(PT15S) and subsequently change it back to the default (PT3S) after deployment.

+ 52
- 68
hooks/percona_hooks.py View File

@ -4,7 +4,6 @@ import sys
import json
import os
import socket
import time
from charmhelpers.core.hookenv import (
Hooks, UnregisteredHookError,
@ -12,7 +11,6 @@ from charmhelpers.core.hookenv import (
log,
relation_get,
relation_set,
relation_id,
relation_ids,
related_units,
unit_get,
@ -32,8 +30,8 @@ from charmhelpers.core.hookenv import (
)
from charmhelpers.core.host import (
service_restart,
service_start,
service_running,
service_stop,
file_hash,
lsb_release,
CompareHostReleases,
@ -91,6 +89,8 @@ from percona_utils import (
install_mysql_ocf,
notify_bootstrapped,
is_bootstrapped,
clustered_once,
INITIAL_CLUSTERED_KEY,
is_leader_bootstrapped,
get_wsrep_value,
assess_status,
@ -168,7 +168,7 @@ def install():
install_percona_xtradb_cluster()
def render_config(clustered=False, hosts=None):
def render_config(hosts=None):
if hosts is None:
hosts = []
@ -179,7 +179,6 @@ def render_config(clustered=False, hosts=None):
context = {
'cluster_name': 'juju_cluster',
'private_address': get_cluster_host_ip(),
'clustered': clustered,
'cluster_hosts': ",".join(hosts),
'sst_method': config('sst-method'),
'sst_password': sst_password(),
@ -223,7 +222,7 @@ def render_config(clustered=False, hosts=None):
render(os.path.basename(config_file), config_file, context, perms=0o444)
def render_config_restart_on_changed(clustered, hosts, bootstrap=False):
def render_config_restart_on_changed(hosts, bootstrap=False):
"""Render mysql config and restart mysql service if file changes as a
result.
@ -235,13 +234,9 @@ def render_config_restart_on_changed(clustered, hosts, bootstrap=False):
it is started so long as the new node to be added is guaranteed to have
been restarted so as to apply the new config.
"""
if not is_leader() and not is_bootstrapped():
log('Non-leader waiting on leader bootstrap, skipping render',
DEBUG)
return
config_file = resolve_cnf_file()
pre_hash = file_hash(config_file)
render_config(clustered, hosts)
render_config(hosts)
create_binlogs_directory()
update_db_rels = False
if file_hash(config_file) != pre_hash or bootstrap:
@ -251,37 +246,28 @@ def render_config_restart_on_changed(clustered, hosts, bootstrap=False):
# relation id exists yet.
notify_bootstrapped()
update_db_rels = True
elif not service_running('mysql@bootstrap'):
else:
# NOTE(jamespage):
# if mysql@bootstrap is running, then the native
# bootstrap systemd service was used to start this
# instance, and it was the initial seed unit
# so don't try start the mysql.service unit;
# this also deals with seed units after they have been
# rebooted and mysqld was started by mysql.service.
delay = 1
# stop the bootstap version before restarting normal mysqld
if service_running('mysql@bootstrap'):
service_stop('mysql@bootstrap')
attempts = 0
max_retries = 5
# NOTE(dosaboy): avoid unnecessary restarts. Once mysql is started
# it needn't be restarted when new units join the cluster since the
# new units will join and apply their own config.
if not seeded():
action = service_restart
# If we are restarting avoid simultaneous restart collisions
cluster_wait()
else:
action = service_start
while not action('mysql'):
cluster_wait()
while not service_restart('mysql'):
if attempts == max_retries:
raise Exception("Failed to start mysql (max retries "
"reached)")
log("Failed to start mysql - retrying in %ss" % (delay),
log("Failed to start mysql - retrying per distributed wait",
WARNING)
time.sleep(delay)
delay += 2
attempts += 1
cluster_wait()
# If we get here we assume prior actions have succeeded to always
# this unit is marked as seeded so that subsequent calls don't result
@ -330,6 +316,13 @@ def upgrade():
if not leader_get('root-password') and leader_get('mysql.passwd'):
leader_set(**{'root-password': leader_get('mysql.passwd')})
# On upgrade-charm we assume the cluster was complete at some point
kvstore = kv()
initial_clustered = kvstore.get(INITIAL_CLUSTERED_KEY, False)
if not initial_clustered:
kvstore.set(key=INITIAL_CLUSTERED_KEY, value=True)
kvstore.flush()
# broadcast the bootstrap-uuid
wsrep_ready = get_wsrep_value('wsrep_ready') or ""
if wsrep_ready.lower() in ['on', 'ready']:
@ -368,47 +361,44 @@ def config_changed():
assert_charm_supports_ipv6()
hosts = get_cluster_hosts()
clustered = len(hosts) > 1
bootstrapped = is_bootstrapped()
leader_bootstrapped = is_leader_bootstrapped()
leader_ip = leader_get('leader-ip')
# Handle Edge Cases
if not is_leader():
# Fix Bug #1738896
# Speed up cluster process
if not clustered and leader_bootstrapped:
clustered = True
bootstrapped = True
hosts = [leader_ip]
# Fix gcomm timeout to non-bootstrapped node
if hosts and leader_ip not in hosts:
hosts = [leader_ip] + hosts
# NOTE: only configure the cluster if we have sufficient peers. This only
# applies if min-cluster-size is provided and is used to avoid extraneous
# configuration changes and premature bootstrapping as the cluster is
# deployed.
if is_leader():
# If the cluster has not been fully bootstrapped once yet, use an empty
# hosts list to avoid restarting the leader node's mysqld during
# cluster buildup.
# After, the cluster has bootstrapped at least one time, it is much
# less likely to have restart collisions. It is then safe to use the
# full hosts list and have the leader node's mysqld restart.
if not clustered_once():
hosts = []
log("Leader unit - bootstrap required=%s" % (not leader_bootstrapped),
DEBUG)
render_config_restart_on_changed(clustered, hosts,
render_config_restart_on_changed(hosts,
bootstrap=not leader_bootstrapped)
elif bootstrapped:
log("Cluster is bootstrapped - configuring mysql on this node",
elif leader_bootstrapped:
# Speed up cluster process by bootstrapping when the leader has
# bootstrapped
if leader_ip not in hosts:
# Fix Bug #1738896
hosts = [leader_ip] + hosts
log("Leader is bootstrapped - configuring mysql on this node",
DEBUG)
render_config_restart_on_changed(clustered, hosts)
# Rendering the mysqld.cnf and restarting is bootstrapping for a
# non-leader node.
render_config_restart_on_changed(hosts)
# Assert we are bootstrapped. This will throw an
# InconsistentUUIDError exception if UUIDs do not match.
update_bootstrap_uuid()
else:
log("Not configuring", DEBUG)
if bootstrapped:
try:
update_bootstrap_uuid()
except LeaderNoBootstrapUUIDError:
# until the bootstrap-uuid attribute is not replicated
# cluster_ready() will evaluate to False, so it is necessary to
# feed back this info to the user.
status_set('waiting', "Waiting for bootstrap-uuid set by leader")
# Until the bootstrap-uuid attribute is set by the leader,
# cluster_ready() will evaluate to False. So it is necessary to
# feed this information to the user.
status_set('waiting', "Waiting for bootstrap-uuid set by leader")
log('Non-leader waiting on leader bootstrap, skipping render',
DEBUG)
return
# Notify any changes to the access network
update_client_db_relations()
@ -427,7 +417,7 @@ def config_changed():
# the password needs to be updated only if the node was already
# bootstrapped
if bootstrapped:
if is_bootstrapped():
update_root_password()
@ -446,12 +436,6 @@ def cluster_joined():
level=INFO)
relation_set(relation_settings=relation_settings)
# Ensure all new peers are aware
cluster_state_uuid = leader_get('bootstrap-uuid')
if cluster_state_uuid:
notify_bootstrapped(cluster_rid=relation_id(),
cluster_uuid=cluster_state_uuid)
@hooks.hook('cluster-relation-departed')
@hooks.hook('cluster-relation-changed')


+ 111
- 68
hooks/percona_utils.py View File

@ -39,6 +39,7 @@ from charmhelpers.core.hookenv import (
leader_get,
leader_set,
)
from charmhelpers.core.unitdata import kv
from charmhelpers.fetch import (
apt_install,
filter_installed_packages,
@ -76,6 +77,7 @@ deb-src http://repo.percona.com/apt {release} main"""
SEEDED_MARKER = "{data_dir}/seeded"
HOSTS_FILE = '/etc/hosts'
DEFAULT_MYSQL_PORT = 3306
INITIAL_CLUSTERED_KEY = 'initial-cluster-complete'
# NOTE(ajkavanagh) - this is 'required' for the pause/resume code for
# maintenance mode, but is currently not populated as the
@ -217,6 +219,20 @@ def is_sufficient_peers():
def get_cluster_hosts():
"""Get the bootstrapped cluster peers
Determine the cluster peers that have bootstrapped and return the list
hosts. Secondarily, update the hosts file with IPv6 address name
resolution.
The returned host list is intended to be used in the
wsrep_cluster_address=gcomm:// setting. Therefore, the hosts must have
already been bootstrapped. If an un-bootstrapped host happens to be first
in the list, mysql will fail to start.
@side_effect update_hosts_file called for IPv6 hostname resolution
@returns list of hosts
"""
hosts_map = {}
local_cluster_address = get_cluster_host_ip()
@ -227,7 +243,7 @@ def get_cluster_hosts():
addr = get_ipv6_addr(exc_list=[config('vip')], fatal=True)[0]
hosts_map = {addr: socket.gethostname()}
hosts = [local_cluster_address]
hosts = []
for relid in relation_ids('cluster'):
for unit in related_units(relid):
rdata = relation_get(unit=unit, rid=relid)
@ -247,13 +263,26 @@ def get_cluster_hosts():
level=DEBUG)
hosts_map[cluster_address] = hostname
hosts.append(hostname)
host = hostname
else:
hosts.append(resolve_hostname_to_ip(cluster_address))
host = resolve_hostname_to_ip(cluster_address)
# Add only cluster peers who have set bootstrap-uuid
# An indiction they themselves are bootstrapped.
# Un-bootstrapped hosts in gcom lead mysql to fail to start
# if it happens to be the first address in the list
# Also fix strange bug when executed from actions where the local
# unit is returned in related_units. We do not want the local IP
# in the gcom hosts list.
if (rdata.get('bootstrap-uuid') and
host not in hosts and
host != local_cluster_address):
hosts.append(host)
if hosts_map:
update_hosts_file(hosts_map)
# Return a sorted list to avoid uneccessary restarts
hosts.sort()
return hosts
@ -443,28 +472,83 @@ def is_leader_bootstrapped():
return True
def clustered_once():
"""Determine if the cluster has ever bootstrapped completely
Check unittest.kv if the cluster has bootstrapped at least once.
@returns boolean
"""
# Run is_bootstrapped once to guarantee kvstore is up to date
is_bootstrapped()
kvstore = kv()
return kvstore.get(INITIAL_CLUSTERED_KEY, False)
def is_bootstrapped():
""" Check that this unit is bootstrapped
"""Determine if each node in the cluster has been bootstrapped and the
cluster is complete with the expected number of peers.
Check that each node in the cluster, including this one, has set
bootstrap-uuid on the cluster relation.
Having min-cluster-size set will guarantee is_bootstrapped will not
return True until the expected number of peers are bootstrapped. If
min-cluster-size is not set, it will check peer relations to estimate the
expected cluster size. If min-cluster-size is not set and there are no
peers it must assume the cluster is bootstrapped in order to allow for
single unit deployments.
@returns boolean
"""
uuids = []
rids = relation_ids('cluster') or []
for rid in rids:
units = related_units(rid)
units.append(local_unit())
for unit in units:
id = relation_get('bootstrap-uuid', unit=unit, rid=rid)
if id:
uuids.append(id)
min_size = config('min-cluster-size')
if not min_size:
units = 1
for relation_id in relation_ids('cluster'):
units += len(related_units(relation_id))
min_size = units
if uuids:
if len(set(uuids)) > 1:
log("Found inconsistent bootstrap uuids - %s" % (uuids), WARNING)
if not is_sufficient_peers():
return False
elif min_size > 1:
uuids = []
for relation_id in relation_ids('cluster'):
units = related_units(relation_id) or []
units.append(local_unit())
for unit in units:
if not relation_get(attribute='bootstrap-uuid',
rid=relation_id,
unit=unit):
log("{} is not yet clustered".format(unit),
DEBUG)
return False
else:
bootstrap_uuid = relation_get(attribute='bootstrap-uuid',
rid=relation_id,
unit=unit)
if bootstrap_uuid:
uuids.append(bootstrap_uuid)
return True
if len(uuids) < min_size:
log("Fewer than minimum cluster size: "
"{} percona units reporting clustered".format(min_size),
DEBUG)
return False
elif len(set(uuids)) > 1:
raise Exception("Found inconsistent bootstrap uuids: "
"{}".format((uuids)))
else:
log("All {} percona units reporting clustered".format(min_size),
DEBUG)
return False
# Set INITIAL_CLUSTERED_KEY as the cluster has fully bootstrapped
kvstore = kv()
if not kvstore.get(INITIAL_CLUSTERED_KEY, False):
kvstore.set(key=INITIAL_CLUSTERED_KEY, value=True)
kvstore.flush()
return True
def bootstrap_pxc():
@ -606,12 +690,14 @@ def charm_check_func():
# and has the required peers
if not is_bootstrapped():
return ('waiting', 'Unit waiting for cluster bootstrap')
elif is_bootstrapped():
elif cluster_ready():
try:
_cluster_in_sync()
return ('active', 'Unit is ready and clustered')
except DesyncedException:
return ('blocked', 'Unit is not in sync')
else:
return ('waiting', 'Unit waiting on hacluster relation')
else:
return ('active', 'Unit is ready')
@ -763,15 +849,14 @@ def get_cluster_host_ip():
def cluster_ready():
"""Determine if each node in the cluster is ready and the cluster is
complete with the expected number of peers.
"""Determine if each node in the cluster is ready to respond to client
requests.
Once cluster_ready returns True it is safe to execute client relation
hooks. Having min-cluster-size set will guarantee cluster_ready will not
return True until the expected number of peers are clustered and ready.
hooks.
If min-cluster-size is not set it must assume the cluster is ready in order
to allow for single unit deployments.
If a VIP is set do not return ready until hacluster relationship is
complete.
@returns boolean
"""
@ -780,49 +865,7 @@ def cluster_ready():
DEBUG)
return False
min_size = config('min-cluster-size')
units = 1
for relation_id in relation_ids('cluster'):
units += len(related_units(relation_id))
if not min_size:
min_size = units
if not is_sufficient_peers():
return False
elif min_size > 1:
uuids = []
for relation_id in relation_ids('cluster'):
units = related_units(relation_id) or []
units.append(local_unit())
for unit in units:
if not relation_get(attribute='bootstrap-uuid',
rid=relation_id,
unit=unit):
log("{} is not yet clustered".format(unit),
DEBUG)
return False
else:
bootstrap_uuid = relation_get(attribute='bootstrap-uuid',
rid=relation_id,
unit=unit)
if bootstrap_uuid:
uuids.append(bootstrap_uuid)
if len(uuids) < min_size:
log("Fewer than minimum cluster size: "
"{} percona units reporting clustered".format(min_size),
DEBUG)
return False
elif len(set(uuids)) > 1:
raise Exception("Found inconsistent bootstrap uuids: "
"{}".format((uuids)))
else:
log("All {} percona units reporting clustered".format(min_size),
DEBUG)
return True
log("Must assume this is a single unit returning 'cluster' ready", DEBUG)
return True
return is_bootstrapped()
def client_node_is_ready():


+ 4
- 5
templates/my.cnf View File

@ -19,13 +19,12 @@ pid_file = /var/run/mysqld/mysqld.pid
# Path to Galera library
wsrep_provider=/usr/lib/libgalera_smm.so
{% if not clustered %}
# Empty gcomm address is being used when cluster is getting bootstrapped
wsrep_cluster_address=gcomm://
{% else %}
# Add address of other cluster nodes here
# Cluster connection URL contains the IPs of node#1, node#2 and node#3
# Empty when bootstrapping the cluster
wsrep_cluster_address=gcomm://{{ cluster_hosts }}
{% endif %}
# In order for Galera to work correctly binlog format should be ROW
binlog_format=ROW


+ 1
- 5
templates/mysqld.cnf View File

@ -131,13 +131,9 @@ innodb_autoinc_lock_mode = {{ innodb_autoinc_lock_mode }}
wsrep_provider={{ wsrep_provider }}
# Add address of other cluster nodes here
{% if not clustered and is_leader -%}
# Empty gcomm address is being used when cluster is getting bootstrapped
wsrep_cluster_address=gcomm://
{% else -%}
# Cluster connection URL contains the IPs of node#1, node#2 and node#3
# Empty when bootstrapping the cluster
wsrep_cluster_address=gcomm://{{ cluster_hosts }}
{% endif %}
#
# Node address


+ 51
- 1
tests/basic_deployment.py View File

@ -12,7 +12,6 @@ from charmhelpers.contrib.openstack.amulet.deployment import (
)
from charmhelpers.contrib.amulet.utils import AmuletUtils
PXC_ROOT_PASSWD = 'ubuntu'
@ -107,6 +106,7 @@ class BasicDeployment(OpenStackAmuletDeployment):
self.test_pxc_running()
self.test_bootstrapped_and_clustered()
self.test_bootstrap_uuid_set_in_the_relation()
self.test_restart_on_config_change()
self.test_pause_resume()
if self.ha:
self.test_kill_master()
@ -192,6 +192,7 @@ class BasicDeployment(OpenStackAmuletDeployment):
action_id = self.utils.run_action(unit, "pause")
assert self.utils.wait_on_action(action_id), "Pause action failed."
self.d.sentry.wait()
# Note that is_mysqld_running will print an error message when
# mysqld is not running. This is by design but it looks odd
@ -206,6 +207,7 @@ class BasicDeployment(OpenStackAmuletDeployment):
assert self.utils.status_get(unit)[0] == "active"
assert self.is_mysqld_running(unit=unit), \
"mysqld not running after resume."
self._auto_wait_for_status()
def test_kill_master(self):
'''
@ -349,3 +351,51 @@ class BasicDeployment(OpenStackAmuletDeployment):
" to %s:%s" % (addr,
port))
return False
def resolve_cnf_file(self):
if self._get_openstack_release() < self.xenial_mitaka:
return '/etc/mysql/my.cnf'
else:
return '/etc/mysql/percona-xtradb-cluster.conf.d/mysqld.cnf'
def test_restart_on_config_change(self):
"""Verify that the specified services are restarted when the
config is changed."""
sentry = self.d.sentry['percona-cluster'][0]
juju_service = 'percona-cluster'
# Expected default and alternate values
set_default = {'peer-timeout': 'PT3S'}
set_alternate = {'peer-timeout': 'PT15S'}
# Config file affected by juju set config change
conf_file = self.resolve_cnf_file()
# Services which are expected to restart upon config change
services = {
'mysqld': conf_file,
}
# Make config change, check for service restarts
self.utils.log.debug('Making config change on {}...'
.format(juju_service))
mtime = self.utils.get_sentry_time(sentry)
self.d.configure(juju_service, set_alternate)
self._auto_wait_for_status()
sleep_time = 40
for s, conf_file in services.iteritems():
self.utils.log.debug("Checking that service restarted: {}"
.format(s))
if not self.utils.validate_service_config_changed(
sentry, mtime, s, conf_file, retry_count=5,
retry_sleep_time=sleep_time,
sleep_time=sleep_time):
self.d.configure(juju_service, set_default)
msg = "service {} didn't restart after config change".format(s)
amulet.raise_status(amulet.FAIL, msg=msg)
sleep_time = 0
self.d.configure(juju_service, set_default)
self._auto_wait_for_status()

+ 5
- 5
tests/charmhelpers/contrib/openstack/amulet/deployment.py View File

@ -21,6 +21,9 @@ from collections import OrderedDict
from charmhelpers.contrib.amulet.deployment import (
AmuletDeployment
)
from charmhelpers.contrib.openstack.amulet.utils import (
OPENSTACK_RELEASES_PAIRS
)
DEBUG = logging.DEBUG
ERROR = logging.ERROR
@ -271,11 +274,8 @@ class OpenStackAmuletDeployment(AmuletDeployment):
release.
"""
# Must be ordered by OpenStack release (not by Ubuntu release):
(self.trusty_icehouse, self.trusty_kilo, self.trusty_liberty,
self.trusty_mitaka, self.xenial_mitaka, self.xenial_newton,
self.yakkety_newton, self.xenial_ocata, self.zesty_ocata,
self.xenial_pike, self.artful_pike, self.xenial_queens,
self.bionic_queens,) = range(13)
for i, os_pair in enumerate(OPENSTACK_RELEASES_PAIRS):
setattr(self, os_pair, i)
releases = {
('trusty', None): self.trusty_icehouse,


+ 199
- 24
tests/charmhelpers/contrib/openstack/amulet/utils.py View File

@ -50,6 +50,13 @@ ERROR = logging.ERROR
NOVA_CLIENT_VERSION = "2"
OPENSTACK_RELEASES_PAIRS = [
'trusty_icehouse', 'trusty_kilo', 'trusty_liberty',
'trusty_mitaka', 'xenial_mitaka', 'xenial_newton',
'yakkety_newton', 'xenial_ocata', 'zesty_ocata',
'xenial_pike', 'artful_pike', 'xenial_queens',
'bionic_queens']
class OpenStackAmuletUtils(AmuletUtils):
"""OpenStack amulet utilities.
@ -63,7 +70,34 @@ class OpenStackAmuletUtils(AmuletUtils):
super(OpenStackAmuletUtils, self).__init__(log_level)
def validate_endpoint_data(self, endpoints, admin_port, internal_port,
public_port, expected):
public_port, expected, openstack_release=None):
"""Validate endpoint data. Pick the correct validator based on
OpenStack release. Expected data should be in the v2 format:
{
'id': id,
'region': region,
'adminurl': adminurl,
'internalurl': internalurl,
'publicurl': publicurl,
'service_id': service_id}
"""
validation_function = self.validate_v2_endpoint_data
xenial_queens = OPENSTACK_RELEASES_PAIRS.index('xenial_queens')
if openstack_release and openstack_release >= xenial_queens:
validation_function = self.validate_v3_endpoint_data
expected = {
'id': expected['id'],
'region': expected['region'],
'region_id': 'RegionOne',
'url': self.valid_url,
'interface': self.not_null,
'service_id': expected['service_id']}
return validation_function(endpoints, admin_port, internal_port,
public_port, expected)
def validate_v2_endpoint_data(self, endpoints, admin_port, internal_port,
public_port, expected):
"""Validate endpoint data.
Validate actual endpoint data vs expected endpoint data. The ports
@ -141,7 +175,86 @@ class OpenStackAmuletUtils(AmuletUtils):
if len(found) != expected_num_eps:
return 'Unexpected number of endpoints found'
def validate_svc_catalog_endpoint_data(self, expected, actual):
def convert_svc_catalog_endpoint_data_to_v3(self, ep_data):
"""Convert v2 endpoint data into v3.
{
'service_name1': [
{
'adminURL': adminURL,
'id': id,
'region': region.
'publicURL': publicURL,
'internalURL': internalURL
}],
'service_name2': [
{
'adminURL': adminURL,
'id': id,
'region': region.
'publicURL': publicURL,
'internalURL': internalURL
}],
}
"""
self.log.warn("Endpoint ID and Region ID validation is limited to not "
"null checks after v2 to v3 conversion")
for svc in ep_data.keys():
assert len(ep_data[svc]) == 1, "Unknown data format"
svc_ep_data = ep_data[svc][0]
ep_data[svc] = [
{
'url': svc_ep_data['adminURL'],
'interface': 'admin',
'region': svc_ep_data['region'],
'region_id': self.not_null,
'id': self.not_null},
{
'url': svc_ep_data['publicURL'],
'interface': 'public',
'region': svc_ep_data['region'],
'region_id': self.not_null,
'id': self.not_null},
{
'url': svc_ep_data['internalURL'],
'interface': 'internal',
'region': svc_ep_data['region'],
'region_id': self.not_null,
'id': self.not_null}]
return ep_data
def validate_svc_catalog_endpoint_data(self, expected, actual,
openstack_release=None):
"""Validate service catalog endpoint data. Pick the correct validator
for the OpenStack version. Expected data should be in the v2 format:
{
'service_name1': [
{
'adminURL': adminURL,
'id': id,
'region': region.
'publicURL': publicURL,
'internalURL': internalURL
}],
'service_name2': [
{
'adminURL': adminURL,
'id': id,
'region': region.
'publicURL': publicURL,
'internalURL': internalURL
}],
}
"""
validation_function = self.validate_v2_svc_catalog_endpoint_data
xenial_queens = OPENSTACK_RELEASES_PAIRS.index('xenial_queens')
if openstack_release and openstack_release >= xenial_queens:
validation_function = self.validate_v3_svc_catalog_endpoint_data
expected = self.convert_svc_catalog_endpoint_data_to_v3(expected)
return validation_function(expected, actual)
def validate_v2_svc_catalog_endpoint_data(self, expected, actual):
"""Validate service catalog endpoint data.
Validate a list of actual service catalog endpoints vs a list of
@ -350,16 +463,13 @@ class OpenStackAmuletUtils(AmuletUtils):
deployment._auto_wait_for_status()
self.keystone_wait_for_propagation(sentry_relation_pairs, api_version)
def authenticate_cinder_admin(self, keystone_sentry, username,
password, tenant, api_version=2):
def authenticate_cinder_admin(self, keystone, api_version=2):
"""Authenticates admin user with cinder."""
# NOTE(beisner): cinder python client doesn't accept tokens.
keystone_ip = keystone_sentry.info['public-address']
ept = "http://{}:5000/v2.0".format(keystone_ip.strip().decode('utf-8'))
self.log.debug('Authenticating cinder admin...')
_clients = {
1: cinder_client.Client,
2: cinder_clientv2.Client}
return _clients[api_version](username, password, tenant, ept)
return _clients[api_version](session=keystone.session)
def authenticate_keystone(self, keystone_ip, username, password,
api_version=False, admin_port=False,
@ -367,13 +477,36 @@ class OpenStackAmuletUtils(AmuletUtils):
project_domain_name=None, project_name=None):
"""Authenticate with Keystone"""
self.log.debug('Authenticating with keystone...')
port = 5000
if admin_port:
port = 35357
base_ep = "http://{}:{}".format(keystone_ip.strip().decode('utf-8'),
port)
if not api_version or api_version == 2:
ep = base_ep + "/v2.0"
if not api_version:
api_version = 2
sess, auth = self.get_keystone_session(
keystone_ip=keystone_ip,
username=username,
password=password,
api_version=api_version,
admin_port=admin_port,
user_domain_name=user_domain_name,
domain_name=domain_name,
project_domain_name=project_domain_name,
project_name=project_name
)
if api_version == 2:
client = keystone_client.Client(session=sess)
else:
client = keystone_client_v3.Client(session=sess)
# This populates the client.service_catalog
client.auth_ref = auth.get_access(sess)
return client
def get_keystone_session(self, keystone_ip, username, password,
api_version=False, admin_port=False,
user_domain_name=None, domain_name=None,
project_domain_name=None, project_name=None):
"""Return a keystone session object"""
ep = self.get_keystone_endpoint(keystone_ip,
api_version=api_version,
admin_port=admin_port)
if api_version == 2:
auth = v2.Password(
username=username,
password=password,
@ -381,12 +514,7 @@ class OpenStackAmuletUtils(AmuletUtils):
auth_url=ep
)
sess = keystone_session.Session(auth=auth)
client = keystone_client.Client(session=sess)
# This populates the client.service_catalog
client.auth_ref = auth.get_access(sess)
return client
else:
ep = base_ep + "/v3"
auth = v3.Password(
user_domain_name=user_domain_name,
username=username,
@ -397,10 +525,57 @@ class OpenStackAmuletUtils(AmuletUtils):
auth_url=ep
)
sess = keystone_session.Session(auth=auth)
client = keystone_client_v3.Client(session=sess)
# This populates the client.service_catalog
client.auth_ref = auth.get_access(sess)
return client
return (sess, auth)
def get_keystone_endpoint(self, keystone_ip, api_version=None,
admin_port=False):
"""Return keystone endpoint"""
port = 5000
if admin_port:
port = 35357
base_ep = "http://{}:{}".format(keystone_ip.strip().decode('utf-8'),
port)
if api_version == 2:
ep = base_ep + "/v2.0"
else:
ep = base_ep + "/v3"
return ep
def get_default_keystone_session(self, keystone_sentry,
openstack_release=None):
"""Return a keystone session object and client object assuming standard
default settings
Example call in amulet tests:
self.keystone_session, self.keystone = u.get_default_keystone_session(
self.keystone_sentry,
openstack_release=self._get_openstack_release())
The session can then be used to auth other clients:
neutronclient.Client(session=session)
aodh_client.Client(session=session)
eyc
"""
self.log.debug('Authenticating keystone admin...')
api_version = 2
client_class = keystone_client.Client
# 11 => xenial_queens
if openstack_release and openstack_release >= 11:
api_version = 3
client_class = keystone_client_v3.Client
keystone_ip = keystone_sentry.info['public-address']
session, auth = self.get_keystone_session(
keystone_ip,
api_version=api_version,
username='admin',
password='openstack',
project_name='admin',
user_domain_name='admin_domain',
project_domain_name='admin_domain')
client = client_class(session=session)
# This populates the client.service_catalog
client.auth_ref = auth.get_access(session)
return session, client
def authenticate_keystone_admin(self, keystone_sentry, user, password,
tenant=None, api_version=None,


+ 55
- 0
tests/charmhelpers/core/decorators.py View File

@ -0,0 +1,55 @@
# Copyright 2014-2015 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright 2014 Canonical Ltd.
#
# Authors:
# Edward Hope-Morley <opentastic@gmail.com>
#
import time
from charmhelpers.core.hookenv import (
log,
INFO,
)
def retry_on_exception(num_retries, base_delay=0, exc_type=Exception):
"""If the decorated function raises exception exc_type, allow num_retries
retry attempts before raise the exception.
"""
def _retry_on_exception_inner_1(f):
def _retry_on_exception_inner_2(*args, **kwargs):
retries = num_retries
multiplier = 1
while True:
try:
return f(*args, **kwargs)
except exc_type:
if not retries:
raise
delay = base_delay * multiplier
multiplier += 1
log("Retrying '%s' %d more times (delay=%s)" %
(f.__name__, retries, delay), level=INFO)
retries -= 1
if delay:
time.sleep(delay)
return _retry_on_exception_inner_2
return _retry_on_exception_inner_1

+ 43
- 0
tests/charmhelpers/core/files.py View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2014-2015 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__author__ = 'Jorge Niedbalski <niedbalski@ubuntu.com>'
import os
import subprocess
def sed(filename, before, after, flags='g'):
"""
Search and replaces the given pattern on filename.
:param filename: relative or absolute file path.
:param before: expression to be replaced (see 'man sed')
:param after: expression to replace with (see 'man sed')
:param flags: sed-compatible regex flags in example, to make
the search and replace case insensitive, specify ``flags="i"``.
The ``g`` flag is always specified regardless, so you do not
need to remember to include it when overriding this parameter.
:returns: If the sed command exit code was zero then return,
otherwise raise CalledProcessError.
"""
expression = r's/{0}/{1}/{2}'.format(before,
after, flags)
return subprocess.check_call(["sed", "-i", "-r", "-e",
expression,
os.path.expanduser(filename)])

+ 132
- 0
tests/charmhelpers/core/fstab.py View File

@ -0,0 +1,132 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2014-2015 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
__author__ = 'Jorge Niedbalski R. <jorge.niedbalski@canonical.com>'
class Fstab(io.FileIO):
"""This class extends file in order to implement a file reader/writer
for file `/etc/fstab`
"""
class Entry(object):
"""Entry class represents a non-comment line on the `/etc/fstab` file
"""
def __init__(self, device, mountpoint, filesystem,
options, d=0, p=0):
self.device = device
self.mountpoint = mountpoint
self.filesystem = filesystem
if not options:
options = "defaults"
self.options = options
self.d = int(d)
self.p = int(p)
def __eq__(self, o):
return str(self) == str(o)
def __str__(self):
return "{} {} {} {} {} {}".format(self.device,
self.mountpoint,
self.filesystem,
self.options,
self.d,
self.p)
DEFAULT_PATH = os.path.join(os.path.sep, 'etc', 'fstab')
def __init__(self, path=None):
if path:
self._path = path
else:
self._path = self.DEFAULT_PATH
super(Fstab, self).__init__(self._path, 'rb+')
def _hydrate_entry(self, line):
# NOTE: use split with no arguments to split on any
# whitespace including tabs
return Fstab.Entry(*filter(
lambda x: x not in ('', None),
line.strip("\n").split()))
@property
def entries(self):
self.seek(0)
for line in self.readlines():
line = line.decode('us-ascii')
try:
if line.strip() and not line.strip().startswith("#"):
yield self._hydrate_entry(line)
except ValueError:
pass
def get_entry_by_attr(self, attr, value):
for entry in self.entries:
e_attr = getattr(entry, attr)
if e_attr == value:
return entry
return None
def add_entry(self, entry):
if self.get_entry_by_attr('device', entry.device):
return False
self.write((str(entry) + '\n').encode('us-ascii'))
self.truncate()
return entry
def remove_entry(self, entry):
self.seek(0)
lines = [l.decode('us-ascii') for l in self.readlines()]
found = False
for index, line in enumerate(lines):
if line.strip() and not line.strip().startswith("#"):
if self._hydrate_entry(line) == entry:
found = True
break
if not found:
return False
lines.remove(line)
self.seek(0)
self.write(''.join(lines).encode('us-ascii'))
self.truncate()
return True
@classmethod
def remove_by_mountpoint(cls, mountpoint, path=None):
fstab = cls(path=path)
entry = fstab.get_entry_by_attr('mountpoint', mountpoint)
if entry:
return fstab.remove_entry(entry)
return False
@classmethod
def add(cls, device, mountpoint, filesystem, options=None, path=None):