Guard yaml load of grastate on cold boot

Occasionally after cold boot yaml load of grastate will be throw an
exception. Do not error out in this instance.
Update percona to use TEST_ variables
Fix HA overlays

func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/116
Change-Id: I6e40970423acb6f70dcc3b91f8b5109de6f46bfc
This commit is contained in:
David Ames 2019-11-06 15:38:14 -08:00
parent 9b68baa799
commit d8b13606e4
8 changed files with 45 additions and 455 deletions

View File

@ -1506,20 +1506,37 @@ def check_mysql_connection(password=None):
return False return False
def get_grastate():
"""Get GR State.
Read the grastate yaml file and return dictionary.
:returns: dict grastate data
"""
grastate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if os.path.exists(grastate_file):
try:
with open(grastate_file, 'r') as f:
return yaml.safe_load(f)
except yaml.reader.ReaderError:
pass
# Something is amiss but we should not error out
# return expected dictionary but zeroed out
return {"seqno": "0", "safe_to_bootstrap": "0"}
def get_grastate_seqno(): def get_grastate_seqno():
"""Get GR State safe sequence number. """Get GR State safe sequence number.
Read the grastate yaml file to determine the sequence number for this Read the grastate yaml file to determine the sequence number for this
instance. instance.
:returns: int Sequence Number :returns: str Sequence Number
""" """
grastate_file = os.path.join(resolve_data_dir(), "grastate.dat") return get_grastate().get("seqno")
if os.path.exists(grastate_file):
with open(grastate_file, 'r') as f:
grastate = yaml.safe_load(f)
return grastate.get("seqno")
def get_grastate_safe_to_bootstrap(): def get_grastate_safe_to_bootstrap():
@ -1528,14 +1545,10 @@ def get_grastate_safe_to_bootstrap():
Read the grastate yaml file to determine if it is safe to bootstrap from Read the grastate yaml file to determine if it is safe to bootstrap from
this instance. this instance.
:returns: int Safe to bootstrap 0 or 1 :returns: str Safe to bootstrap 0 or 1
""" """
grastate_file = os.path.join(resolve_data_dir(), "grastate.dat") return get_grastate().get("safe_to_bootstrap")
if os.path.exists(grastate_file):
with open(grastate_file, 'r') as f:
grastate = yaml.safe_load(f)
return grastate.get("safe_to_bootstrap")
def set_grastate_safe_to_bootstrap(): def set_grastate_safe_to_bootstrap():

View File

@ -1,407 +0,0 @@
# basic deployment test class for percona-xtradb-cluster
import amulet
import hashlib
import re
import os
import socket
import time
import telnetlib
import yaml
from charmhelpers.contrib.openstack.amulet.deployment import (
OpenStackAmuletDeployment
)
from charmhelpers.contrib.amulet.utils import AmuletUtils
PXC_ROOT_PASSWD = 'ubuntu'
class BasicDeployment(OpenStackAmuletDeployment):
utils = AmuletUtils()
def __init__(self, vip=None, units=1, series="trusty", openstack=None,
source=None, stable=False):
super(BasicDeployment, self).__init__(series, openstack, source,
stable)
self.units = units
self.master_unit = None
self.vip = None
self.ha = False
if units > 1:
self.ha = True
if vip:
self.vip = vip
elif 'AMULET_OS_VIP' in os.environ:
self.vip = os.environ.get('AMULET_OS_VIP')
elif os.path.isfile('local.yaml'):
with open('local.yaml', 'rb') as f:
self.cfg = yaml.safe_load(f.read())
self.vip = self.cfg.get('vip')
else:
amulet.raise_status(amulet.SKIP,
("Please set the vip in local.yaml or "
"env var AMULET_OS_VIP to run this test "
"suite"))
self.log = self.utils.get_logger()
def _add_services(self):
"""Add services
Add the services that we're testing, where percona-cluster is local,
and the rest of the service are from lp branches that are
compatible with the local charm (e.g. stable or next).
"""
this_service = {'name': 'percona-cluster',
'units': self.units}
other_services = []
if self.units > 1 and self.ha:
other_services.append({'name': 'hacluster'})
super(BasicDeployment, self)._add_services(this_service,
other_services)
def _add_relations(self):
"""Add all of the relations for the services."""
if self.units > 1 and self.ha:
relations = {'percona-cluster:ha': 'hacluster:ha'}
super(BasicDeployment, self)._add_relations(relations)
def _get_configs(self):
"""Configure all of the services."""
cfg_percona = {'min-cluster-size': self.units,
'vip': self.vip,
'root-password': PXC_ROOT_PASSWD,
'wsrep-slave-threads': 2,
'gcs-fc-limit': 32}
cfg_ha = {'debug': True,
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
configs = {}
if self.units > 1 and self.ha:
cfg_ha['cluster_count'] = str(self.units)
configs['hacluster'] = cfg_ha
configs['percona-cluster'] = cfg_percona
return configs
def _configure_services(self):
super(BasicDeployment, self)._configure_services(self._get_configs())
def run(self):
self._add_services()
self._add_relations()
self._configure_services()
self._deploy()
self.d.sentry.wait()
self.test_deployment()
def test_deployment(self):
'''Top level test function executor'''
self.test_pacemaker()
self.test_pxc_running()
self.test_bootstrapped_and_clustered()
self.test_bootstrap_uuid_set_in_the_relation()
self.test_restart_on_config_change()
self.test_pause_resume()
if self.ha:
self.test_kill_master()
def test_pacemaker(self):
'''
Ensure that pacemaker and corosync are correctly configured in
clustered deployments.
side effect: self.master_unit should be set after execution
'''
if self.units > 1 and self.ha:
i = 0
while i < 30 and not self.master_unit:
self.master_unit = self.find_master(ha=self.ha)
i += 1
time.sleep(10)
msg = 'percona-cluster vip not found'
assert self.master_unit is not None, msg
_, code = self.master_unit.run('sudo crm_verify --live-check')
assert code == 0, "'crm_verify --live-check' failed"
vip_key = 'res_mysql_{}_vip'.format(
hashlib.sha1(self.vip.encode('UTF-8')).hexdigest()[:7])
resources = [vip_key]
resources += ['res_mysql_monitor:%d' %
m for m in range(self.units)]
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
else:
self.master_unit = self.find_master(ha=self.ha)
def test_pxc_running(self):
'''
Ensure PXC is running on all units
'''
for unit in self.d.sentry['percona-cluster']:
assert self.is_mysqld_running(unit), 'mysql not running: %s' % unit
def test_bootstrapped_and_clustered(self):
'''
Ensure PXC is bootstrapped and that peer units are clustered
'''
self.log.info('Ensuring PXC is bootstrapped')
msg = "Percona cluster failed to bootstrap"
assert self.is_pxc_bootstrapped(), msg
self.log.info('Checking PXC cluster size == {}'.format(self.units))
got = int(self.get_cluster_size())
msg = ("Percona cluster unexpected size"
" (wanted=%s, got=%s)" % (self.units, got))
assert got == self.units, msg
def test_bootstrap_uuid_set_in_the_relation(self):
"""Verify that the bootstrap-uuid attribute was set by the leader and
all the peers where notified.
"""
(leader_uuid, code) = self.master_unit.run("leader-get bootstrap-uuid")
assert leader_uuid
cmd_rel_get = ("relation-get -r `relation-ids cluster` "
"bootstrap-uuid %s")
units = self.d.sentry['percona-cluster']
for unit in units:
for peer in units:
cmd = cmd_rel_get % peer.info['unit_name']
self.log.debug(cmd)
(output, code) = unit.run(cmd)
assert code == 0
assert output == leader_uuid, "%s != %s" % (output,
leader_uuid)
def test_pause_resume(self):
'''
Ensure pasue/resume actions stop/start mysqld on units
'''
self.log.info('Testing pause/resume actions')
self.log.info('Pausing service on first PXC unit')
unit = self.d.sentry['percona-cluster'][0]
assert self.is_mysqld_running(unit), 'mysql not running'
assert self.utils.status_get(unit)[0] == "active"
action_id = self.utils.run_action(unit, "pause")
assert self.utils.wait_on_action(action_id), "Pause action failed."
self.d.sentry.wait()
# Note that is_mysqld_running will print an error message when
# mysqld is not running. This is by design but it looks odd
# in the output.
assert not self.is_mysqld_running(unit=unit), \
"mysqld is still running!"
self.log.info('Resuming service on first PXC unit')
assert self.utils.status_get(unit)[0] == "maintenance"
action_id = self.utils.run_action(unit, "resume")
assert self.utils.wait_on_action(action_id), "Resume action failed"
assert self.utils.status_get(unit)[0] == "active"
assert self.is_mysqld_running(unit=unit), \
"mysqld not running after resume."
self._auto_wait_for_status()
def test_kill_master(self):
'''
Ensure that killing the mysqld on the master unit results
in a VIP failover
'''
self.log.info('Testing failover of master unit on mysqld failure')
# we are going to kill the master
old_master = self.master_unit
self.log.info(
'kill -9 mysqld on {}'.format(self.master_unit.info['unit_name'])
)
self.master_unit.run('sudo killall -9 mysqld')
self.log.info('looking for the new master')
i = 0
changed = False
while i < 10 and not changed:
i += 1
time.sleep(5) # give some time to pacemaker to react
new_master = self.find_master(ha=self.ha)
if (new_master and new_master.info['unit_name'] !=
old_master.info['unit_name']):
self.log.info(
'New master unit detected'
' on {}'.format(new_master.info['unit_name'])
)
changed = True
assert changed, "The master didn't change"
assert self.is_port_open(address=self.vip), 'cannot connect to vip'
def test_change_root_password(self):
"""
Change root password and verify the change was effectively applied.
"""
new_root_passwd = 'openstack'
u = self.master_unit
root_password, _ = PXC_ROOT_PASSWD
cmd = "mysql -uroot -p{} -e\"select 1;\" ".format(root_password)
output, code = u.run(cmd)
assert code == 0, output
self.d.configure('percona-cluster', {'root-password': new_root_passwd})
time.sleep(5) # give some time to the unit to start the hook
self.d.sentry.wait() # wait until the hook finishes
# try to connect using the new root password
cmd = "mysql -uroot -p{} -e\"select 1;\" ".format(new_root_passwd)
output, code = u.run(cmd)
assert code == 0, output
def find_master(self, ha=True):
for unit in self.d.sentry['percona-cluster']:
if not ha:
return unit
# is the vip running here?
output, code = unit.run('sudo ip a | grep "inet %s/"' % self.vip)
self.log.info("Checking {}".format(unit.info['unit_name']))
self.log.debug(output)
if code == 0:
self.log.info('vip ({}) running in {}'.format(
self.vip,
unit.info['unit_name'])
)
return unit
def get_pcmkr_resources(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
output, code = u.run('sudo crm_resource -l')
assert code == 0, 'could not get "crm resource list"'
return output.split('\n')
def is_mysqld_running(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
_, code = u.run('pidof mysqld')
if code != 0:
self.log.debug("command returned non-zero '%s'" % (code))
return False
return True
def get_wsrep_value(self, attr, unit=None):
if unit:
u = unit
else:
u = self.master_unit
root_password, _ = u.run('leader-get root-password')
cmd = ("mysql -uroot -p{} -e\"show status like '{}';\"| "
"grep {}".format(root_password, attr, attr))
output, code = u.run(cmd)
if code != 0:
self.log.debug("command returned non-zero '%s'" % (code))
return ""
value = re.search(r"^.+?\s+(.+)", output).group(1)
self.log.info("%s = %s" % (attr, value))
return value
def is_pxc_bootstrapped(self, unit=None):
value = self.get_wsrep_value('wsrep_ready', unit)
return value.lower() in ['on', 'ready']
def get_cluster_size(self, unit=None):
return self.get_wsrep_value('wsrep_cluster_size', unit)
def is_port_open(self, unit=None, port='3306', address=None):
if unit:
addr = unit.info['public-address']
elif address:
addr = address
else:
raise Exception('Please provide a unit or address')
try:
telnetlib.Telnet(addr, port)
return True
except socket.error as e:
if e.errno == 113:
self.log.error("could not connect to %s:%s" % (addr, port))
if e.errno == 111:
self.log.error("connection refused connecting"
" to %s:%s" % (addr,
port))
return False
def resolve_cnf_file(self):
if self._get_openstack_release() < self.xenial_mitaka:
return '/etc/mysql/my.cnf'
else:
return '/etc/mysql/percona-xtradb-cluster.conf.d/mysqld.cnf'
def test_restart_on_config_change(self):
"""Verify that the specified services are restarted when the
config is changed."""
sentry = self.d.sentry['percona-cluster'][0]
juju_service = 'percona-cluster'
# Expected default and alternate values
set_default = {'peer-timeout': 'PT3S'}
set_alternate = {'peer-timeout': 'PT15S'}
# Config file affected by juju set config change
conf_file = self.resolve_cnf_file()
# Services which are expected to restart upon config change
services = {
'mysqld': conf_file,
}
# Make config change, check for service restarts
self.utils.log.debug('Making config change on {}...'
.format(juju_service))
mtime = self.utils.get_sentry_time(sentry)
self.d.configure(juju_service, set_alternate)
self._auto_wait_for_status()
sleep_time = 40
for s, conf_file in services.iteritems():
self.utils.log.debug("Checking that service restarted: {}"
.format(s))
if not self.utils.validate_service_config_changed(
sentry, mtime, s, conf_file, retry_count=5,
retry_sleep_time=sleep_time,
sleep_time=sleep_time):
self.d.configure(juju_service, set_default)
msg = "service {} didn't restart after config change".format(s)
amulet.raise_status(amulet.FAIL, msg=msg)
sleep_time = 0
self.d.configure(juju_service, set_default)
self._auto_wait_for_status()

View File

@ -6,6 +6,7 @@ applications:
percona-cluster: percona-cluster:
num_units: 3 num_units: 3
options: options:
vip: {{ TEST_VIP00 }}
min-cluster-size: 3 min-cluster-size: 3
hacluster: hacluster:
charm: cs:~openstack-charmers-next/hacluster charm: cs:~openstack-charmers-next/hacluster

View File

@ -1,21 +0,0 @@
#applications:
# percona-cluster:
# options:
# vip: {{ OS_VIP00 }}
relations:
- - percona-cluster
- hacluster
applications:
percona-cluster:
num_units: 3
options:
vip: {{ OS_VIP00 }}
min-cluster-size: 3
hacluster:
charm: cs:~openstack-charmers-next/hacluster
num_units: 0
options:
cluster_count: 3

View File

@ -439,26 +439,30 @@ class UtilsTests(CharmTestCase):
@mock.patch("percona_utils.resolve_data_dir") @mock.patch("percona_utils.resolve_data_dir")
@mock.patch("percona_utils.os") @mock.patch("percona_utils.os")
def test_get_grastate_seqno(self, _os, _resolve_dd): def test_get_grastate(self, _os, _resolve_dd):
_resolve_dd.return_value = "/tmp" _bootstrap = "1"
_seqno = "25" _seqno = "5422"
_data = {"seqno": _seqno, "safe_to_bootstrap": _bootstrap}
_os.path.exists.return_value = True _os.path.exists.return_value = True
self.yaml.safe_load.return_value = {"seqno": _seqno}
with patch_open() as (_open, _file):
_open.return_value = _file
self.assertEqual(_seqno, percona_utils.get_grastate_seqno())
@mock.patch("percona_utils.resolve_data_dir")
@mock.patch("percona_utils.os")
def test_get_grastate_safe_to_bootstrap(self, _os, _resolve_dd):
_resolve_dd.return_value = "/tmp" _resolve_dd.return_value = "/tmp"
_bootstrap = "0" self.yaml.safe_load.return_value = _data
_os.path.exists.return_value = True
self.yaml.safe_load.return_value = {"safe_to_bootstrap": _bootstrap}
with patch_open() as (_open, _file): with patch_open() as (_open, _file):
_open.return_value = _file _open.return_value = _file
self.assertEqual( self.assertEqual(
_bootstrap, percona_utils.get_grastate_safe_to_bootstrap()) _data, percona_utils.get_grastate())
@mock.patch("percona_utils.get_grastate")
def test_get_grastate_seqno(self, _get_grastate):
_seqno = "25"
_get_grastate.return_value = {"seqno": _seqno}
self.assertEqual(_seqno, percona_utils.get_grastate_seqno())
@mock.patch("percona_utils.get_grastate")
def test_get_grastate_safe_to_bootstrap(self, _get_grastate):
_bootstrap = "0"
_get_grastate.return_value = {"safe_to_bootstrap": _bootstrap}
self.assertEqual(
_bootstrap, percona_utils.get_grastate_safe_to_bootstrap())
@mock.patch("percona_utils.resolve_data_dir") @mock.patch("percona_utils.resolve_data_dir")
@mock.patch("percona_utils.os") @mock.patch("percona_utils.os")