Spread pacemaker remote resources across cluster.
Use location directives to spread pacemaker remote resources across cluster. This is to prevent multiple resources being taken down in the event of a single node failure. This would usually not be a problem but if the node is being queried by masakari host monitors at the time the node goes down then the query can hang. Change-Id: Ib8a667d0d82ef3dcd4da27e62460b4f0ce32ee43 Partial-Bug: #1889094 Depends-On: Ic45dbdd9d8581f25549580c7e98a8d6e0bf8c3e7
This commit is contained in:
parent
b40a6754b0
commit
527fd2c704
126
hooks/pcmk.py
126
hooks/pcmk.py
@ -283,23 +283,40 @@ def crm_version():
|
||||
return StrictVersion(matched.group(1))
|
||||
|
||||
|
||||
def crm_update_resource(res_name, res_type, res_params=None, force=False):
|
||||
"""Update a resource using `crm configure load update`
|
||||
def _crm_update_object(update_template, update_ctxt, hash_keys, unitdata_key,
|
||||
res_params=None, force=False):
|
||||
"""Update a object using `crm configure load update`
|
||||
|
||||
:param res_name: resource name
|
||||
:param res_type: resource type (e.g. IPaddr2)
|
||||
:param res_params: resource's parameters (e.g. "params ip=10.5.250.250")
|
||||
:param update_template: Format string to create object when update_ctxt is
|
||||
applied.
|
||||
:type update_template: str
|
||||
:param update_ctxt: Context to apply to update_template to generate object
|
||||
creation directive.
|
||||
:type update_ctxt: dict
|
||||
:param hash_keys: List of keys to use from update_ctxt when generating
|
||||
objects hash.
|
||||
:type hash_keys: List[str]
|
||||
:param unitdata_key: Key to use when storing objects hash in in unitdata
|
||||
kv.
|
||||
:type unitdata_key: str
|
||||
:param res_params: Resource's additional parameters
|
||||
(e.g. "params ip=10.5.250.250")
|
||||
:type res_params: str or None
|
||||
:param force: Whether to force the update irrespective of whats currently
|
||||
configured.
|
||||
:type force: bool
|
||||
:returns: Return code (0 => success)
|
||||
:rtype: int
|
||||
"""
|
||||
db = unitdata.kv()
|
||||
res_hash = resource_checksum(res_name, res_type, res_params)
|
||||
|
||||
if not force and db.get('{}-{}'.format(res_name, res_type)) == res_hash:
|
||||
res_hash = generate_checksum(update_ctxt[k] for k in hash_keys)
|
||||
if not force and db.get(unitdata_key) == res_hash:
|
||||
log("Resource {} already defined and parameters haven't changed"
|
||||
.format(res_name))
|
||||
.format(update_ctxt['object_name']))
|
||||
return 0
|
||||
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
f.write('primitive {} {}'.format(res_name, res_type).encode('ascii'))
|
||||
f.write(update_template.format(**update_ctxt).encode('ascii'))
|
||||
|
||||
if res_params:
|
||||
f.write(' \\\n\t{}'.format(res_params).encode('ascii'))
|
||||
@ -308,7 +325,9 @@ def crm_update_resource(res_name, res_type, res_params=None, force=False):
|
||||
|
||||
f.flush()
|
||||
f.seek(0)
|
||||
log('Updating resource {}'.format(res_name), level=INFO)
|
||||
log(
|
||||
'Updating resource {}'.format(update_ctxt['object_name']),
|
||||
level=INFO)
|
||||
log('File content:\n{}'.format(f.read()), level=DEBUG)
|
||||
cmd = "crm configure load update {}".format(f.name)
|
||||
log('Update command: {}'.format(cmd))
|
||||
@ -321,11 +340,86 @@ def crm_update_resource(res_name, res_type, res_params=None, force=False):
|
||||
log('crm command exit code: {}'.format(retcode), level=level)
|
||||
|
||||
if retcode == 0:
|
||||
db.set('{}-{}'.format(res_name, res_type), res_hash)
|
||||
db.set(unitdata_key, res_hash)
|
||||
db.flush()
|
||||
|
||||
return retcode
|
||||
|
||||
|
||||
def crm_update_resource(res_name, res_type, res_params=None, force=False):
|
||||
"""Update a resource using `crm configure load update`
|
||||
|
||||
:param res_name: resource name
|
||||
:type res_name: str
|
||||
:param res_type: resource type (e.g. IPaddr2)
|
||||
:type res_type: str
|
||||
:param res_params: resource's parameters (e.g. "params ip=10.5.250.250")
|
||||
:type res_params: str or None
|
||||
:param force: Whether to force the update irrespective of whats currently
|
||||
configured.
|
||||
:type force: bool
|
||||
:returns: Return code (0 => success)
|
||||
:rtype: int
|
||||
"""
|
||||
hash_keys = ['resource_type']
|
||||
if res_params:
|
||||
hash_keys.append('resource_params')
|
||||
return _crm_update_object(
|
||||
'primitive {object_name} {resource_type}',
|
||||
{
|
||||
'object_name': res_name,
|
||||
'resource_params': res_params,
|
||||
'resource_type': res_type},
|
||||
hash_keys,
|
||||
'{}-{}'.format(res_name, res_type),
|
||||
res_params=res_params,
|
||||
force=force)
|
||||
|
||||
|
||||
def crm_update_location(location_name, resource_name, score, node,
|
||||
force=False):
|
||||
"""Update a location rule.
|
||||
|
||||
:param location_name: Name of location rule.
|
||||
:type location_name: str
|
||||
:param resource_name: Resource name location rule governs.
|
||||
:type resource_name: str
|
||||
:param score: The score for the resource running on node.
|
||||
:type score: int
|
||||
:param node: Name of the node this rule applies to.
|
||||
:type node: str
|
||||
:param force: Whether to force the update irrespective of whats currently
|
||||
configured.
|
||||
:type force: bool
|
||||
:returns: Return code (0 => success)
|
||||
:rtype: int
|
||||
"""
|
||||
return _crm_update_object(
|
||||
'location {object_name} {resource_name} {score}: {node}',
|
||||
{
|
||||
'object_name': location_name,
|
||||
'resource_name': resource_name,
|
||||
'score': str(score),
|
||||
'node': node},
|
||||
['resource_name', 'score', 'node'],
|
||||
'{}-{}'.format(location_name, resource_name),
|
||||
force=force)
|
||||
|
||||
|
||||
def generate_checksum(check_strings):
|
||||
"""Create a md5 checksum using each string in the list.
|
||||
|
||||
:param check_strings: resource name
|
||||
:type check_strings: List[str]
|
||||
:returns: Hash generated from strings.
|
||||
:rtype: str
|
||||
"""
|
||||
m = hashlib.md5()
|
||||
for entry in check_strings:
|
||||
m.update(entry.encode('utf-8'))
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
def resource_checksum(res_name, res_type, res_params=None):
|
||||
"""Create a md5 checksum of the resource parameters.
|
||||
|
||||
@ -333,9 +427,7 @@ def resource_checksum(res_name, res_type, res_params=None):
|
||||
:param res_type: resource type (e.g. IPaddr2)
|
||||
:param res_params: resource's parameters (e.g. "params ip=10.5.250.250")
|
||||
"""
|
||||
|
||||
m = hashlib.md5()
|
||||
m.update(res_type.encode('utf-8'))
|
||||
data = [res_type]
|
||||
if res_params is not None:
|
||||
m.update(res_params.encode('utf-8'))
|
||||
return m.hexdigest()
|
||||
data.append(res_params)
|
||||
return generate_checksum(data)
|
||||
|
@ -26,6 +26,7 @@ import fcntl
|
||||
import struct
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
import itertools
|
||||
|
||||
from base64 import b64decode
|
||||
|
||||
@ -763,6 +764,24 @@ def set_cluster_symmetry():
|
||||
pcmk.commit(cmd, failure_is_fatal=True)
|
||||
|
||||
|
||||
def add_score_location_rule(res_name, node, location_score):
|
||||
"""Add or update a location rule that uses a score.
|
||||
|
||||
:param res_name: Resource that this location rule controls.
|
||||
:type res_name: str
|
||||
:param node: Node that this location rule relates to.
|
||||
:type node: str
|
||||
:param location_score: The score to give this location.
|
||||
:type location_score: int
|
||||
"""
|
||||
loc_constraint_name = 'loc-{}-{}'.format(res_name, node)
|
||||
pcmk.crm_update_location(
|
||||
loc_constraint_name,
|
||||
res_name,
|
||||
location_score,
|
||||
node)
|
||||
|
||||
|
||||
def add_location_rules_for_local_nodes(res_name):
|
||||
"""Add location rules for running resource on local nodes.
|
||||
|
||||
@ -783,6 +802,29 @@ def add_location_rules_for_local_nodes(res_name):
|
||||
log('%s' % cmd, level=DEBUG)
|
||||
|
||||
|
||||
def add_location_rules_for_pacemaker_remotes(res_names):
|
||||
"""Add location rules for pacemaker remote resources on local nodes.
|
||||
|
||||
Add location rules allowing the pacemaker remote resource to run on a local
|
||||
node. Use location score rules to spread resources out.
|
||||
|
||||
:param res_names: Pacemaker remote resource names.
|
||||
:type res_names: List[str]
|
||||
"""
|
||||
res_names = sorted(res_names)
|
||||
nodes = sorted(pcmk.list_nodes())
|
||||
prefered_nodes = list(zip(res_names, itertools.cycle(nodes)))
|
||||
for res_name in res_names:
|
||||
for node in nodes:
|
||||
location_score = 0
|
||||
if (res_name, node) in prefered_nodes:
|
||||
location_score = 200
|
||||
add_score_location_rule(
|
||||
res_name,
|
||||
node,
|
||||
location_score)
|
||||
|
||||
|
||||
def configure_pacemaker_remote(remote_hostname, remote_ip):
|
||||
"""Create a resource corresponding to the pacemaker remote node.
|
||||
|
||||
@ -912,9 +954,14 @@ def configure_resources_on_remotes(resources=None, clones=None, groups=None):
|
||||
'location constraints')
|
||||
log(msg, level=WARNING)
|
||||
return
|
||||
pacemaker_remotes = []
|
||||
for res_name, res_type in resources.items():
|
||||
if res_name not in list(clones.values()) + list(groups.values()):
|
||||
add_location_rules_for_local_nodes(res_name)
|
||||
if res_type == 'ocf:pacemaker:remote':
|
||||
pacemaker_remotes.append(res_name)
|
||||
else:
|
||||
add_location_rules_for_local_nodes(res_name)
|
||||
add_location_rules_for_pacemaker_remotes(pacemaker_remotes)
|
||||
for cl_name in clones:
|
||||
add_location_rules_for_local_nodes(cl_name)
|
||||
# Limit clone resources to only running on X number of nodes where X
|
||||
|
@ -34,6 +34,7 @@ class TestCorosyncConf(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tmpdir = tempfile.mkdtemp()
|
||||
self.tmpfile = tempfile.NamedTemporaryFile(delete=False)
|
||||
os.environ['UNIT_STATE_DB'] = ':memory:'
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
@ -704,6 +704,16 @@ class UtilsTestCase(unittest.TestCase):
|
||||
utils.set_cluster_symmetry()
|
||||
self.assertFalse(commit.called)
|
||||
|
||||
@mock.patch('pcmk.crm_update_location')
|
||||
def test_add_score_location_rule(self, crm_update_location):
|
||||
# Check no update required
|
||||
utils.add_score_location_rule('res1', 'juju-lxd-0', 0)
|
||||
crm_update_location.assert_called_once_with(
|
||||
'loc-res1-juju-lxd-0',
|
||||
'res1',
|
||||
0,
|
||||
'juju-lxd-0')
|
||||
|
||||
@mock.patch('pcmk.commit')
|
||||
@mock.patch('pcmk.crm_opt_exists')
|
||||
@mock.patch('pcmk.list_nodes')
|
||||
@ -717,6 +727,35 @@ class UtilsTestCase(unittest.TestCase):
|
||||
'crm -w -F configure location loc-res1-node2 res1 0: node2',
|
||||
failure_is_fatal=True)
|
||||
|
||||
@mock.patch.object(utils, 'add_score_location_rule')
|
||||
@mock.patch('pcmk.list_nodes')
|
||||
def test_add_location_rules_for_pacemaker_remotes(self, list_nodes,
|
||||
add_score_location_rule):
|
||||
list_nodes.return_value = ['node1', 'node2', 'node3']
|
||||
utils.add_location_rules_for_pacemaker_remotes([
|
||||
'res1',
|
||||
'res2',
|
||||
'res3',
|
||||
'res4',
|
||||
'res5'])
|
||||
expect = [
|
||||
mock.call('res1', 'node1', 200),
|
||||
mock.call('res1', 'node2', 0),
|
||||
mock.call('res1', 'node3', 0),
|
||||
mock.call('res2', 'node1', 0),
|
||||
mock.call('res2', 'node2', 200),
|
||||
mock.call('res2', 'node3', 0),
|
||||
mock.call('res3', 'node1', 0),
|
||||
mock.call('res3', 'node2', 0),
|
||||
mock.call('res3', 'node3', 200),
|
||||
mock.call('res4', 'node1', 200),
|
||||
mock.call('res4', 'node2', 0),
|
||||
mock.call('res4', 'node3', 0),
|
||||
mock.call('res5', 'node1', 0),
|
||||
mock.call('res5', 'node2', 200),
|
||||
mock.call('res5', 'node3', 0)]
|
||||
add_score_location_rule.assert_has_calls(expect)
|
||||
|
||||
@mock.patch('pcmk.is_resource_present')
|
||||
@mock.patch('pcmk.commit')
|
||||
def test_configure_pacemaker_remote(self, commit, is_resource_present):
|
||||
|
@ -231,6 +231,8 @@ class TestPcmk(unittest.TestCase):
|
||||
|
||||
@mock.patch('subprocess.call')
|
||||
def test_crm_update_resource(self, mock_call):
|
||||
db = unitdata.kv()
|
||||
db.set('res_test-IPaddr2', '')
|
||||
mock_call.return_value = 0
|
||||
|
||||
with mock.patch.object(tempfile, "NamedTemporaryFile",
|
||||
|
Loading…
Reference in New Issue
Block a user