From 2f67da1534f79b7ec3ce0783cccab004a116fdfa Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Tue, 29 Aug 2017 22:43:54 +0000 Subject: [PATCH] Revert "Adds execution of post-upgrade validations on undercloud upgrade" There is a race condition that makes overcloud deployment randomly failing. See the bug report. Reverting for now because the gate is failing too much times on it and we don't have a proper solution. Partial-Bug: #1713832 This reverts commit dd3398f21407f2a0b7630d1724d2b10c93059c03. Change-Id: I18a55efc78b6dc5fcb83248961eee078cdd6e89d (cherry picked from commit 5e4b286a55426b9e4bdd00aa7535562171291b98) --- instack_undercloud/tests/test_undercloud.py | 78 +------------ instack_undercloud/undercloud.py | 108 ++++++------------ ...ercloud-upgrade.yaml-244b86a00b260888.yaml | 9 -- 3 files changed, 36 insertions(+), 159 deletions(-) delete mode 100644 releasenotes/notes/wire-up-tripleo-validations-undercloud-upgrade.yaml-244b86a00b260888.yaml diff --git a/instack_undercloud/tests/test_undercloud.py b/instack_undercloud/tests/test_undercloud.py index c068db573..2bf65b628 100644 --- a/instack_undercloud/tests/test_undercloud.py +++ b/instack_undercloud/tests/test_undercloud.py @@ -18,7 +18,6 @@ import json import os import subprocess import tempfile -import time import fixtures from keystoneauth1 import exceptions as ks_exceptions @@ -55,9 +54,7 @@ class TestUndercloud(BaseTestCase): @mock.patch('instack_undercloud.undercloud._generate_environment') @mock.patch('instack_undercloud.undercloud._load_config') @mock.patch('instack_undercloud.undercloud._die_tuskar_die') - @mock.patch('instack_undercloud.undercloud._run_validation_groups') - def test_install(self, mock_run_validation_groups, - mock_die_tuskar_die, mock_load_config, + def test_install(self, mock_die_tuskar_die, mock_load_config, mock_generate_environment, mock_run_instack, mock_run_clean_all, mock_run_yum_update, mock_run_orc, mock_post_config, mock_run_command, @@ -74,7 +71,6 @@ class TestUndercloud(BaseTestCase): ['sudo', 'rm', '-f', '/tmp/svc-map-services'], None, 'rm') mock_upgrade_fact.assert_called_with(False) mock_die_tuskar_die.assert_not_called() - mock_run_validation_groups.assert_not_called() @mock.patch('instack_undercloud.undercloud._handle_upgrade_fact') @mock.patch('instack_undercloud.undercloud._configure_logging') @@ -88,9 +84,7 @@ class TestUndercloud(BaseTestCase): @mock.patch('instack_undercloud.undercloud._generate_environment') @mock.patch('instack_undercloud.undercloud._load_config') @mock.patch('instack_undercloud.undercloud._die_tuskar_die') - @mock.patch('instack_undercloud.undercloud._run_validation_groups') - def test_install_upgrade(self, mock_run_validation_groups, - mock_die_tuskar_die, mock_load_config, + def test_install_upgrade(self, mock_die_tuskar_die, mock_load_config, mock_generate_environment, mock_run_instack, mock_run_yum_clean_all, mock_run_yum_update, mock_run_orc, mock_post_config, mock_run_command, @@ -107,7 +101,6 @@ class TestUndercloud(BaseTestCase): ['sudo', 'rm', '-f', '/tmp/svc-map-services'], None, 'rm') mock_upgrade_fact.assert_called_with(True) mock_die_tuskar_die.assert_called_once() - mock_run_validation_groups.assert_called_once() @mock.patch('instack_undercloud.undercloud._configure_logging') def test_install_exception(self, mock_configure_logging): @@ -747,67 +740,6 @@ class TestPostConfig(base.BaseTestCase): mock_post_config_mistral.assert_called_once_with( instack_env, mock_instance_mistral, mock_instance_swift) - @mock.patch('instack_undercloud.undercloud._get_auth_values') - @mock.patch('instack_undercloud.undercloud._get_session') - @mock.patch('mistralclient.api.client.client', autospec=True) - def test_run_validation_groups_success(self, mock_mistral_client, - mock_get_session, - mock_auth_values): - mock_mistral = mock.Mock() - mock_mistral_client.return_value = mock_mistral - mock_mistral.environments.list.return_value = [] - mock_mistral.executions.get.return_value = mock.Mock(state="SUCCESS") - mock_get_session.return_value = mock.MagicMock() - undercloud._run_validation_groups(["post-upgrade"]) - mock_mistral.executions.create.assert_called_once_with( - 'tripleo.validations.v1.run_groups', - workflow_input={ - 'group_names': ['post-upgrade'], - } - ) - - @mock.patch('instack_undercloud.undercloud._get_auth_values') - @mock.patch('instack_undercloud.undercloud._get_session') - @mock.patch('mistralclient.api.client.client', autospec=True) - @mock.patch('time.strptime') - def test_run_validation_groups_fail(self, mock_strptime, - mock_mistral_client, mock_get_session, - mock_auth_values): - mock_mistral = mock.Mock() - mock_mistral_client.return_value = mock_mistral - mock_mistral.environments.list.return_value = [] - mock_mistral.executions.get.return_value = mock.Mock(state="FAIL") - mock_mistral.executions.get_output.return_value = "ERROR!" - mock_mistral.executions.get.id = "1234" - mock_mistral.action_executions.list.return_value = [] - mock_strptime.return_value = time.mktime(time.localtime()) - mock_get_session.return_value = mock.MagicMock() - self.assertRaises( - RuntimeError, undercloud._run_validation_groups, ["post-upgrade"], - "", 360, True) - - @mock.patch('instack_undercloud.undercloud._get_auth_values') - @mock.patch('instack_undercloud.undercloud._get_session') - @mock.patch('mistralclient.api.client.client', autospec=True) - @mock.patch('time.strptime') - def test_run_validation_groups_timeout(self, mock_strptime, - mock_mistral_client, - mock_get_session, mock_auth_values): - mock_mistral = mock.Mock() - mock_mistral_client.return_value = mock_mistral - mock_mistral.environments.list.return_value = [] - mock_mistral.executions.get.id = "1234" - mock_mistral.action_executions.list.return_value = [] - mock_get_session.return_value = mock.MagicMock() - mock_time = mock.MagicMock() - mock_time.return_value = time.mktime(time.localtime()) - mock_strptime.return_value = time.mktime(time.localtime()) - with mock.patch('time.time', mock_time): - self.assertRaisesRegexp(RuntimeError, ("TIMEOUT waiting for " - "execution"), - undercloud._run_validation_groups, - ["post-upgrade"], "", -1, True) - def test_create_default_plan(self): mock_mistral = mock.Mock() mock_mistral.environments.list.return_value = [] @@ -870,12 +802,10 @@ class TestPostConfig(base.BaseTestCase): RuntimeError, undercloud._create_default_plan, mock_mistral, [], timeout=0) - @mock.patch('time.strptime') - def test_create_default_plan_failed(self, mock_strptime): + def test_create_default_plan_failed(self): mock_mistral = mock.Mock() mock_mistral.executions.get.return_value = mock.Mock(state="ERROR") - mock_mistral.action_executions.list.return_value = [] - mock_strptime.return_value = time.mktime(time.localtime()) + self.assertRaises( RuntimeError, undercloud._create_default_plan, mock_mistral, []) diff --git a/instack_undercloud/undercloud.py b/instack_undercloud/undercloud.py index f5b8d21cd..2eac8ce27 100644 --- a/instack_undercloud/undercloud.py +++ b/instack_undercloud/undercloud.py @@ -1488,72 +1488,6 @@ def _migrate_plans(mistral, swift, plans): mistral.environments.delete(plan) -def _wait_for_mistral_execution(timeout_at, mistral, execution, message='', - fail_on_error=False): - while time.time() < timeout_at: - exe = mistral.executions.get(execution.id) - if exe.state == "RUNNING": - time.sleep(5) - continue - if exe.state == "SUCCESS": - return - else: - exe_out = "" - exe_created_at = time.strptime(exe.created_at, - "%Y-%m-%d %H:%M:%S") - ae_list = mistral.action_executions.list() - for ae in ae_list: - if ((ae.task_name == "run_validation") and - (ae.state == "ERROR") and - (time.strptime(ae.created_at, "%Y-%m-%d %H:%M:%S") > - exe_created_at)): - task = mistral.tasks.get(ae.task_execution_id) - task_res = task.to_dict().get('result') - exe_out = "%s %s" % (exe_out, task_res) - error_message = "ERROR %s %s Mistral execution ID: %s" % ( - message, exe_out, execution.id) - LOG.error(error_message) - if fail_on_error: - raise RuntimeError(error_message) - return - else: - exe = mistral.executions.get(execution.id) - error_message = ("TIMEOUT waiting for execution %s to finish. " - "State: %s" % (exe.id, exe.state)) - LOG.error(error_message) - if fail_on_error: - raise RuntimeError(error_message) - - -def _get_session(): - user, password, project, auth_url = _get_auth_values() - auth_kwargs = { - 'auth_url': auth_url, - 'username': user, - 'password': password, - 'project_name': project, - 'project_domain_name': 'Default', - 'user_domain_name': 'Default', - } - auth_plugin = ks_auth.Password(**auth_kwargs) - return session.Session(auth=auth_plugin) - - -def _run_validation_groups(groups=[], mistral_url='', timeout=360, - fail_on_error=False): - sess = _get_session() - mistral = mistralclient.client(mistral_url=mistral_url, session=sess) - LOG.info('Starting and waiting for validation groups %s ', groups) - execution = mistral.executions.create( - 'tripleo.validations.v1.run_groups', - workflow_input={'group_names': groups} - ) - fail_message = ("error running the validation groups %s " % groups) - timeout_at = time.time() + timeout - _wait_for_mistral_execution(timeout_at, mistral, execution, fail_message, - fail_on_error) - - def _create_default_plan(mistral, plans, timeout=360): plan_name = 'overcloud' queue_name = str(uuid.uuid4()) @@ -1567,13 +1501,29 @@ def _create_default_plan(mistral, plans, timeout=360): 'tripleo.plan_management.v1.create_default_deployment_plan', workflow_input={'container': plan_name, 'queue_name': queue_name} ) + timeout_at = time.time() + timeout - fail_message = ("error creating the default Deployment Plan %s " - "Check the create_default_deployment_plan execution " - "in Mistral with openstack workflow execution list " % - plan_name) - _wait_for_mistral_execution(timeout_at, mistral, execution, fail_message, - fail_on_error=True) + + while time.time() < timeout_at: + exe = mistral.executions.get(execution.id) + if exe.state == "RUNNING": + time.sleep(5) + continue + if exe.state == "SUCCESS": + return + else: + raise RuntimeError( + "Failed to create the default Deployment Plan. Please check " + "the create_default_deployment_plan execution in Mistral with " + "`openstack workflow execution list`.") + else: + exe = mistral.executions.get(execution.id) + LOG.error("Timed out waiting for execution %s to finish. State: %s", + exe.id, exe.state) + raise RuntimeError( + "Timed out creating the default Deployment Plan. Please check " + "the create_default_deployment_plan execution in Mistral with " + "`openstack workflow execution list`.") def _prepare_ssh_environment(mistral): @@ -1606,7 +1556,16 @@ def _post_config_mistral(instack_env, mistral, swift): def _post_config(instack_env): _copy_stackrc() user, password, project, auth_url = _get_auth_values() - sess = _get_session() + auth_kwargs = { + 'auth_url': auth_url, + 'username': user, + 'password': password, + 'project_name': project, + 'project_domain_name': 'Default', + 'user_domain_name': 'Default', + } + auth_plugin = ks_auth.Password(**auth_kwargs) + sess = session.Session(auth=auth_plugin) # TODO(andreykurilin): remove this check with support of novaclient 6.0.0 if nc.__version__[0] == "6": nova = novaclient.Client(2, user, password, project, auth_url=auth_url) @@ -1724,9 +1683,6 @@ def install(instack_root, upgrade=False): _run_orc(instack_env) _post_config(instack_env) _run_command(['sudo', 'rm', '-f', '/tmp/svc-map-services'], None, 'rm') - if upgrade and CONF.enable_validations: # Run post-upgrade validations - mistral_url = instack_env['UNDERCLOUD_ENDPOINT_MISTRAL_PUBLIC'] - _run_validation_groups(["post-upgrade"], mistral_url) except Exception as e: LOG.debug("An exception occurred", exc_info=True) LOG.error(FAILURE_MESSAGE, diff --git a/releasenotes/notes/wire-up-tripleo-validations-undercloud-upgrade.yaml-244b86a00b260888.yaml b/releasenotes/notes/wire-up-tripleo-validations-undercloud-upgrade.yaml-244b86a00b260888.yaml deleted file mode 100644 index 7fed25e5b..000000000 --- a/releasenotes/notes/wire-up-tripleo-validations-undercloud-upgrade.yaml-244b86a00b260888.yaml +++ /dev/null @@ -1,9 +0,0 @@ ---- -upgrade: - - Wires up execution of the "post-upgrade" group of tripleo-validations to - sanity check the undercloud. The validations are executed at the - very end of the process, after the undercloud has been fully upgraded - and all services started in the upgraded versions. If there is an error it - is logged but not raised so these validations will not fail the upgrade. - The operator can set the existing 'enable_validations' to false to skip - these validations.