From d4517cd9cd7093ff70c11e162b5332cf930d1bc1 Mon Sep 17 00:00:00 2001 From: "Brad P. Crochet" Date: Mon, 6 Feb 2017 11:00:37 -0500 Subject: [PATCH] Implement check node counts as a custom action Adds a custom action to check the statistics of the hypervisors and check the node counts of the stack. Change-Id: I4a0576801ffbf8d29135966d7181f277ad2f8db0 Partial-Bug: #1638697 --- .../check-node-counts-bb80a5cdd8d10475.yaml | 4 + setup.cfg | 1 + tripleo_common/actions/validations.py | 73 +++++++ .../tests/actions/test_validations.py | 180 ++++++++++++++++++ workbooks/validations.yaml | 121 ++++++++++++ 5 files changed, 379 insertions(+) create mode 100644 releasenotes/notes/check-node-counts-bb80a5cdd8d10475.yaml diff --git a/releasenotes/notes/check-node-counts-bb80a5cdd8d10475.yaml b/releasenotes/notes/check-node-counts-bb80a5cdd8d10475.yaml new file mode 100644 index 000000000..70cda7223 --- /dev/null +++ b/releasenotes/notes/check-node-counts-bb80a5cdd8d10475.yaml @@ -0,0 +1,4 @@ +--- +features: + - Adds an action and workflow used to check the node + counts and the hypervisor. diff --git a/setup.cfg b/setup.cfg index 1307dd9f7..70817eec8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -98,6 +98,7 @@ mistral.actions = tripleo.validations.check_boot_images = tripleo_common.actions.validations:CheckBootImagesAction tripleo.validations.check_flavors = tripleo_common.actions.validations:CheckFlavorsAction tripleo.validations.check_node_boot_configuration = tripleo_common.actions.validations:CheckNodeBootConfigurationAction + tripleo.validations.check_nodes_count = tripleo_common.actions.validations:CheckNodesCountAction tripleo.validations.get_pubkey = tripleo_common.actions.validations:GetPubkeyAction tripleo.validations.enabled = tripleo_common.actions.validations:Enabled tripleo.validations.list_groups = tripleo_common.actions.validations:ListGroupsAction diff --git a/tripleo_common/actions/validations.py b/tripleo_common/actions/validations.py index 861166e19..27f988f68 100644 --- a/tripleo_common/actions/validations.py +++ b/tripleo_common/actions/validations.py @@ -384,3 +384,76 @@ class VerifyProfilesAction(base.TripleOAction): """Get node capabilities.""" return nodeutils.capabilities_to_dict( node['properties'].get('capabilities')) + + +class CheckNodesCountAction(base.TripleOAction): + """Validate hypervisor statistics""" + + # TODO(bcrochet): The validation actions are temporary. This logic should + # move to the tripleo-validations project eventually. + def __init__(self, statistics, stack, associated_nodes, available_nodes, + parameters, default_role_counts): + super(CheckNodesCountAction, self).__init__() + self.statistics = statistics + self.stack = stack + self.associated_nodes = associated_nodes + self.available_nodes = available_nodes + self.parameters = parameters + self.default_role_counts = default_role_counts + + def run(self): + errors = [] + warnings = [] + + requested_count = 0 + + for param, default in self.default_role_counts.items(): + if self.stack: + try: + current = int(self.stack['parameters'][param]) + except KeyError: + # We could be adding a new role on stack-update, so there's + # no assumption the parameter exists in the stack. + current = self.parameters.get(param, default) + requested_count += self.parameters.get(param, current) + else: + requested_count += self.parameters.get(param, default) + + # We get number of nodes usable for the stack by getting already + # used (associated) nodes and number of nodes which can be used + # (not in maintenance mode). + # Assumption is that associated nodes are part of the stack (only + # one overcloud is supported). + associated = len(self.associated_nodes) + available = len(self.available_nodes) + + available_count = associated + available + + if requested_count > available_count: + errors.append('Not enough baremetal nodes - available: %d, ' + 'requested: %d' % + (available_count, requested_count)) + + if self.statistics['count'] < available_count: + errors.append('Only %d nodes are exposed to Nova of %d requests. ' + 'Check that enough nodes are in "available" state ' + 'with maintenance mode off.' % + (self.statistics['count'], available_count)) + + return_value = { + 'errors': errors, + 'warnings': warnings, + 'result': { + 'statistics': self.statistics, + 'enough_nodes': True, + 'requested_count': requested_count, + 'available_count': available_count, + } + } + if errors: + return_value['result']['enough_nodes'] = False + mistral_result = {'error': return_value} + else: + mistral_result = {'data': return_value} + + return mistral_workflow_utils.Result(**mistral_result) diff --git a/tripleo_common/tests/actions/test_validations.py b/tripleo_common/tests/actions/test_validations.py index 25bd0ad6c..163f980b5 100644 --- a/tripleo_common/tests/actions/test_validations.py +++ b/tripleo_common/tests/actions/test_validations.py @@ -656,3 +656,183 @@ class TestVerifyProfilesAction(base.TestCase): ] }) self._test(expected) + + +class TestCheckNodesCountAction(base.TestCase): + def setUp(self): + super(TestCheckNodesCountAction, self).setUp() + self.defaults = { + 'ControllerCount': 1, + 'ComputeCount': 1, + 'ObjectStorageCount': 0, + 'BlockStorageCount': 0, + 'CephStorageCount': 0, + } + self.stack = None + self.action_args = { + 'stack': None, + 'associated_nodes': self._ironic_node_list(True, False), + 'available_nodes': self._ironic_node_list(False, True), + 'parameters': {}, + 'default_role_counts': self.defaults, + 'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1}, + } + + def _ironic_node_list(self, associated, maintenance): + if associated: + nodes = range(2) + elif maintenance: + nodes = range(1) + return nodes + + def test_run_check_hypervisor_stats(self): + action_args = self.action_args.copy() + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + data={ + 'result': { + 'requested_count': 2, + 'available_count': 3, + 'statistics': {'count': 3, 'vcpus': 1, 'memory_mb': 1}, + 'enough_nodes': True + }, + 'errors': [], + 'warnings': [], + }) + self.assertEqual(expected, result) + + def test_run_check_hypervisor_stats_not_met(self): + statistics = {'count': 0, 'memory_mb': 0, 'vcpus': 0} + + action_args = self.action_args.copy() + action_args.update({'statistics': statistics}) + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + error={ + 'errors': [ + 'Only 0 nodes are exposed to Nova of 3 requests. Check ' + 'that enough nodes are in "available" state with ' + 'maintenance mode off.'], + 'warnings': [], + 'result': { + 'statistics': statistics, + 'enough_nodes': False, + 'requested_count': 2, + 'available_count': 3, + } + }) + self.assertEqual(expected, result) + + def test_check_nodes_count_deploy_enough_nodes(self): + action_args = self.action_args.copy() + action_args['parameters'] = {'ControllerCount': 2} + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + data={ + 'errors': [], + 'warnings': [], + 'result': { + 'enough_nodes': True, + 'requested_count': 3, + 'available_count': 3, + 'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1} + } + }) + self.assertEqual(expected, result) + + def test_check_nodes_count_deploy_too_much(self): + action_args = self.action_args.copy() + action_args['parameters'] = {'ControllerCount': 3} + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + error={ + 'errors': [ + "Not enough baremetal nodes - available: 3, requested: 4"], + 'warnings': [], + 'result': { + 'enough_nodes': False, + 'requested_count': 4, + 'available_count': 3, + 'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1} + } + }) + self.assertEqual(expected, result) + + def test_check_nodes_count_scale_enough_nodes(self): + action_args = self.action_args.copy() + action_args['parameters'] = {'ControllerCount': 2} + action_args['stack'] = {'parameters': self.defaults.copy()} + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + data={ + 'errors': [], + 'warnings': [], + 'result': { + 'enough_nodes': True, + 'requested_count': 3, + 'available_count': 3, + 'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1} + }, + }) + self.assertEqual(expected, result) + + def test_check_nodes_count_scale_too_much(self): + action_args = self.action_args.copy() + action_args['parameters'] = {'ControllerCount': 3} + action_args['stack'] = {'parameters': self.defaults.copy()} + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + error={ + 'errors': [ + 'Not enough baremetal nodes - available: 3, requested: 4'], + 'warnings': [], + 'result': { + 'enough_nodes': False, + 'requested_count': 4, + 'available_count': 3, + 'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1} + } + }) + self.assertEqual(expected, result) + + def test_check_default_param_not_in_stack(self): + missing_param = 'CephStorageCount' + action_args = self.action_args.copy() + action_args['parameters'] = {'ControllerCount': 3} + action_args['stack'] = {'parameters': self.defaults.copy()} + del action_args['stack']['parameters'][missing_param] + + action = validations.CheckNodesCountAction(**action_args) + result = action.run() + + expected = mistral_workflow_utils.Result( + error={ + 'errors': [ + 'Not enough baremetal nodes - available: 3, requested: 4'], + 'warnings': [], + 'result': { + 'enough_nodes': False, + 'requested_count': 4, + 'available_count': 3, + 'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1} + } + }) + self.assertEqual(expected, result) diff --git a/workbooks/validations.yaml b/workbooks/validations.yaml index 36f472091..d4e88ed70 100644 --- a/workbooks/validations.yaml +++ b/workbooks/validations.yaml @@ -517,3 +517,124 @@ workflows: warnings: <% $.warnings %> on-success: - fail: <% $.get('status') = "FAILED" %> + + check_default_nodes_count: + input: + - stack_id: overcloud + - parameters: {} + - default_role_counts: {} + - run_validations: true + - queue_name: tripleo + output: + statistics: <% $.statistics %> + errors: <% $.errors %> + warnings: <% $.warnings %> + + tasks: + check_run_validations: + on-complete: + - get_hypervisor_statistics: <% $.run_validations %> + - send_message: <% not $.run_validations %> + + get_hypervisor_statistics: + action: nova.hypervisors_statistics + on-success: get_stack + on-error: fail_get_hypervisor_statistics + publish: + statistics: <% task(get_hypervisor_statistics).result %> + + fail_get_hypervisor_statistics: + on-success: send_message + publish: + status: FAILED + message: <% task(get_hypervisor_statistics).result %> + errors: [] + warnings: [] + statistics: null + + get_stack: + action: heat.stacks_get + input: + stack_id: <% $.stack_id %> + on-complete: get_associated_nodes + publish: + stack: <% task(get_stack).result %> + publish-on-error: + stack: null + + get_associated_nodes: + action: ironic.node_list + on-success: get_available_nodes + on-error: fail_get_associated_nodes + input: + associated: true + publish: + associated_nodes: <% task(get_associated_nodes).result %> + + fail_get_associated_nodes: + on-success: send_message + publish: + status: FAILED + message: <% task(get_associated_nodes).result %> + errors: [] + warnings: [] + + get_available_nodes: + action: ironic.node_list + on-success: check_nodes_count + on-error: fail_get_available_nodes + input: + associated: false + maintenance: false + publish: + available_nodes: <% task(get_available_nodes).result %> + + fail_get_available_nodes: + on-success: send_message + publish: + status: FAILED + message: <% task(get_available_nodes).result %> + errors: [] + warnings: [] + + check_nodes_count: + action: tripleo.validations.check_nodes_count + input: + statistics: <% $.statistics %> + stack: <% $.stack %> + associated_nodes: <% $.associated_nodes %> + available_nodes: <% $.available_nodes %> + parameters: <% $.parameters %> + default_role_counts: <% $.default_role_counts %> + on-success: send_message + on-error: fail_check_nodes_count + publish: + errors: <% task(check_nodes_count).result.errors %> + warnings: <% task(check_nodes_count).result.warnings %> + + fail_check_nodes_count: + on-success: send_message + publish: + status: FAILED + message: <% task(check_nodes_count).result %> + statistics: null + errors: <% task(check_nodes_count).result.errors %> + warnings: <% task(check_nodes_count).result.warnings %> + + send_message: + action: zaqar.queue_post + retry: count=5 delay=1 + input: + queue_name: <% $.queue_name %> + messages: + body: + type: tripleo.validations.v1.check_hypervisor_stats + payload: + status: <% $.get('status', 'SUCCESS') %> + message: <% $.get('message', '') %> + execution: <% execution() %> + statistics: <% $.statistics %> + errors: <% $.errors %> + warnings: <% $.warnings %> + on-success: + - fail: <% $.get('status') = "FAILED" %>