From 8a4b7af33c9270ccc433c8845a86908af996d50b Mon Sep 17 00:00:00 2001 From: Federico Ressi Date: Thu, 11 Apr 2019 10:34:10 +0200 Subject: [PATCH] Make setup_stack aware of Heat concurrency issue This is a workaround for a concurrency issue detected on Heat OSP-14, but that could be affecting any version of Heat (no investigation has been perfomed). It is unknow if this is a bug or just a limitation of the service. The issue: Heat looks like has a concurrency issue when creating new stacks with the same name on ame time from more parallel processes. When Tobiko Neutron test cases are executed using tox -e neutron command test case execution is spawn along some worker processes. It happens that the same HeatStackFixture, as it is shared between more test cases, is being created on the same time from more workers. The stack creation request is therefore submitted to Heat service with the same parameters and despite what understood from the documentation, more than one stack starts being created on the same time with the same name. Workaround implementation After these stack creation is started tobiko asks for stack status using stack name and all worker processes receives information of the same stack (with the same stack ID). Therefore those workers that creates a new stack with an ID different from the one got by the stack name are deleted, leaving on most of the cases the cloud with only one stack instance for the same stack name. Change-Id: Ibcdc04d9436664b788d2ed6d68e2a20c74bd6147 --- roles/run-tox/tasks/main.yaml | 30 +------ tobiko/openstack/heat/_stack.py | 100 +++++++++++++++------- tobiko/tests/openstack/heat/test_stack.py | 65 ++++++++------ 3 files changed, 110 insertions(+), 85 deletions(-) diff --git a/roles/run-tox/tasks/main.yaml b/roles/run-tox/tasks/main.yaml index 97c0988ec..0d22f0403 100644 --- a/roles/run-tox/tasks/main.yaml +++ b/roles/run-tox/tasks/main.yaml @@ -1,33 +1,5 @@ -# The number of vcpus is not available on all systems. -# See https://github.com/ansible/ansible/issues/30688 -# When not available, we fall back to ansible_processor_cores -- name: Get hw.logicalcpu from sysctl - shell: sysctl hw.logicalcpu | cut -d' ' -f2 - register: sysctl_hw_logicalcpu - when: ansible_processor_vcpus is not defined - -- name: Get number of cores - set_fact: - num_cores: "{{ansible_processor_vcpus|default(sysctl_hw_logicalcpu.stdout)}}" - -- name: Set default concurrency - set_fact: - stestr_concurrency: "{{ num_cores|int // 2 }}" - -- name: Limit min concurrency to 1 - set_fact: - stestr_concurrency: 1 - when: - stestr_concurrency|int < 1 - -- name: Limit max concurrency to 2 - set_fact: - stestr_concurrency: 2 - when: - stestr_concurrency|int > 2 - - name: Run Tobiko - command: tox -e {{tox_envlist}} {{tox_extra_args}} -- --concurrency={{stestr_concurrency}} + command: tox -e {{tox_envlist}} {{tox_extra_args}} args: chdir: "{{tox_dir}}" become: true diff --git a/tobiko/openstack/heat/_stack.py b/tobiko/openstack/heat/_stack.py index b0e29a185..eed2e9553 100644 --- a/tobiko/openstack/heat/_stack.py +++ b/tobiko/openstack/heat/_stack.py @@ -26,7 +26,6 @@ from tobiko.openstack.heat import _template LOG = log.getLogger(__name__) - # Status INIT_IN_PROGRESS = 'INIT_IN_PROGRESS' INIT_COMPLETE = 'INIT_COMPLETE' @@ -47,6 +46,7 @@ class HeatStackFixture(tobiko.SharedFixture): client = None client_fixture = None + retry_create_stack = 1 wait_interval = 5 stack_name = None template = None @@ -127,31 +127,58 @@ class HeatStackFixture(tobiko.SharedFixture): def setup_stack(self): self.create_stack() - def create_stack(self): + def create_stack(self, retry=None): """Creates stack based on passed parameters.""" - stack = self.wait_for_stack_status( - expected_status={CREATE_COMPLETE, CREATE_FAILED, - CREATE_IN_PROGRESS, DELETE_COMPLETE, - DELETE_FAILED}) - if stack and stack.stack_status in {CREATE_COMPLETE, - CREATE_IN_PROGRESS}: - LOG.debug('Stack %r already exists.', self.stack_name) - return stack + created_stack_ids = set() + retry = retry or self.retry_create_stack or 1 + while True: + stack = self.wait_for_stack_status( + expected_status={CREATE_COMPLETE, CREATE_FAILED, + CREATE_IN_PROGRESS, DELETE_COMPLETE, + DELETE_FAILED}) + stack_status = getattr(stack, 'stack_status', DELETE_COMPLETE) + expected_status = {CREATE_COMPLETE, CREATE_IN_PROGRESS} + if stack_status in expected_status: + LOG.debug('Stack created: %r (id=%r)', self.stack_name, + stack.id) + for stack_id in created_stack_ids: + if self.stack.id != stack_id: + LOG.warning("Concurrent stack creation: delete " + "duplicated stack is %r (id=%r).", + self.stack_name, stack_id) + self.delete_stack(stack_id) - if stack and stack.stack_status.endswith('_FAILED'): - self.delete_stack() - self.wait_for_delete_complete() + return stack - self.stack = None - try: - self.client.stacks.create(stack_name=self.stack_name, - template=self.template.yaml, - parameters=self.parameters) - except exc.HTTPConflict: - LOG.debug('Stack %r already exists.', self.stack_name) - else: - LOG.debug('Creating stack %r...', self.stack_name) - return self.stack + if not retry: + status_reason = getattr(stack, 'stack_status_reason', None) + raise HeatStackCreationFailed(name=self.stack_name, + observed=stack_status, + expected=expected_status, + status_reason=status_reason) + + retry -= 1 + if stack_status.endswith('_FAILED'): + LOG.debug('Delete existing failed stack: %r (id=%r)', + self.stack_name, stack.id) + self.delete_stack() + stack = self.wait_for_stack_status( + expected_status={DELETE_COMPLETE}) + + self.stack = self.outputs = None + try: + LOG.debug('Creating stack %r (re-tries left %d)...', + self.stack_name, retry) + stack_id = self.client.stacks.create( + stack_name=self.stack_name, + template=self.template.yaml, + parameters=self.parameters)['stack']['id'] + except exc.HTTPConflict: + LOG.debug('Stack %r already exists.', self.stack_name) + else: + created_stack_ids.add(stack_id) + LOG.debug('Creating stack %r (id=%r)...', self.stack_name, + stack_id) def cleanup_fixture(self): self.setup_client() @@ -160,15 +187,27 @@ class HeatStackFixture(tobiko.SharedFixture): def cleanup_stack(self): self.delete_stack() - def delete_stack(self): + def delete_stack(self, stack_id=None): """Deletes stack.""" - self.stack = None + if not stack_id: + stack_id = self.stack_id + self.stack = self.outputs = None try: - self.client.stacks.delete(self.stack_name) + self.client.stacks.delete(stack_id) except exc.NotFound: - LOG.debug('Stack %r already deleted.', self.stack_name) + LOG.debug('Stack already deleted: %r (id=%r)', self.stack_name, + stack_id) else: - LOG.debug('Deleting stack %r...', self.stack_name) + LOG.debug('Deleting stack %r (id=%r)...', self.stack_name, + stack_id) + + @property + def stack_id(self): + stack = self.stack + if stack: + return stack.id + else: + return self.stack_name def get_stack(self, resolve_outputs=False): """Returns stack ID.""" @@ -194,8 +233,9 @@ class HeatStackFixture(tobiko.SharedFixture): stack = self.stack or self.get_stack() while (stack and stack.stack_status.endswith('_IN_PROGRESS') and stack.stack_status not in expected_status): - LOG.debug("Waiting for %r stack status (observed=%r, expected=%r)", - self.stack_name, stack.stack_status, expected_status) + LOG.debug("Waiting for %r (id=%r) stack status " + "(observed=%r, expected=%r)", self.stack_name, + stack.id, stack.stack_status, expected_status) time.sleep(self.wait_interval) stack = self.get_stack() diff --git a/tobiko/tests/openstack/heat/test_stack.py b/tobiko/tests/openstack/heat/test_stack.py index 49f321133..5f07b39b0 100644 --- a/tobiko/tests/openstack/heat/test_stack.py +++ b/tobiko/tests/openstack/heat/test_stack.py @@ -137,21 +137,23 @@ class HeatStackFixtureTest(base.OpenstackTest): def test_setup(self, fixture_class=MyStack, stack_name=None, template=None, parameters=None, wait_interval=None, - stack_states=None, create_conflict=False, + stacks=None, create_conflict=False, call_create=True, call_delete=False, call_sleep=False): client = mock.MagicMock(specs=heatclient.Client) get_heat_client = self.patch( 'tobiko.openstack.heat._client.get_heat_client', return_value=client) - if stack_states: - client.stacks.get.side_effect = [ - mock.MagicMock(stack_status=stack_status) - for stack_status in stack_states] - else: - client.stacks.get.side_effect = exc.HTTPNotFound + + stacks = stacks or [ + exc.HTTPNotFound, + mock_stack('CREATE_IN_PROGRESS')] + client.stacks.get.side_effect = stacks if create_conflict: client.stacks.create.side_effect = exc.HTTPConflict + else: + client.stacks.create.return_value = { + 'stack': {'id': ''}} sleep = self.patch('time.sleep') stack = fixture_class(stack_name=stack_name, parameters=parameters, @@ -173,7 +175,7 @@ class HeatStackFixtureTest(base.OpenstackTest): resolve_outputs=False)]) if call_delete: - client.stacks.delete.assert_called_once_with(stack.stack_name) + client.stacks.delete.assert_called_once_with(stack.stack_id) else: client.stacks.delete.assert_not_called() @@ -247,47 +249,54 @@ class HeatStackFixtureTest(base.OpenstackTest): self.test_setup(fixture_class=MyStackWithWaitInterval) def test_setup_when_delete_completed(self): - self.test_setup(stack_states=[_stack.DELETE_COMPLETE]) + self.test_setup(stacks=[mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')]) def test_setup_when_delete_failed(self): - self.test_setup(stack_states=[_stack.DELETE_FAILED, - _stack.DELETE_IN_PROGRESS, - _stack.DELETE_COMPLETE], + self.test_setup(stacks=[mock_stack('DELETE_FAILED'), + mock_stack('DELETE_IN_PROGRESS'), + mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')], call_delete=True, call_sleep=True) def test_setup_when_delete_failed_fast_delete(self): - self.test_setup(stack_states=[_stack.DELETE_FAILED, - _stack.DELETE_COMPLETE], + self.test_setup(stacks=[mock_stack('DELETE_FAILED'), + mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')], call_delete=True) def test_setup_when_create_complete(self): - self.test_setup(stack_states=[_stack.CREATE_COMPLETE], + self.test_setup(stacks=[mock_stack('CREATE_COMPLETE')], call_create=False) def test_setup_when_create_failed(self): - self.test_setup(stack_states=[_stack.CREATE_FAILED, - _stack.DELETE_IN_PROGRESS, - _stack.DELETE_COMPLETE], + self.test_setup(stacks=[mock_stack('CREATE_FAILED'), + mock_stack('DELETE_IN_PROGRESS'), + mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')], call_delete=True, call_sleep=True) def test_setup_when_create_failed_fast_delete(self): - self.test_setup(stack_states=[_stack.CREATE_FAILED, - _stack.DELETE_COMPLETE], + self.test_setup(stacks=[mock_stack('CREATE_FAILED'), + mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')], call_delete=True) def test_setup_when_create_in_progress(self): - self.test_setup(stack_states=[_stack.CREATE_IN_PROGRESS], + self.test_setup(stacks=[mock_stack('CREATE_IN_PROGRESS')], call_create=False) def test_setup_when_delete_in_progress_then_complete(self): - self.test_setup(stack_states=[_stack.DELETE_IN_PROGRESS, - _stack.DELETE_COMPLETE], + self.test_setup(stacks=[mock_stack('DELETE_IN_PROGRESS'), + mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')], call_sleep=True) def test_setup_when_delete_in_progress_then_failed(self): - self.test_setup(stack_states=[_stack.DELETE_IN_PROGRESS, - _stack.DELETE_FAILED, - _stack.DELETE_COMPLETE], + self.test_setup(stacks=[mock_stack('DELETE_IN_PROGRESS'), + mock_stack('DELETE_FAILED'), + mock_stack('DELETE_COMPLETE'), + mock_stack('CREATE_IN_PROGRESS')], call_sleep=True, call_delete=True) def test_setup_when_create_conflict(self): @@ -315,3 +324,7 @@ class HeatStackFixtureTest(base.OpenstackTest): resolve_outputs=True) self.assertEqual('value1', outputs.key1) self.assertEqual('value2', outputs.key2) + + +def mock_stack(status, stack_id=''): + return mock.MagicMock(stack_status=status, id=stack_id)