From 374fafd66afa792ba197403b479dadbfa3055bce Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Mon, 25 Mar 2019 15:48:47 -0400 Subject: [PATCH] mistral: configure heartbeat parameters to avoid action timeout This patch configures and increases the defaults heartbeat parameters in Mistral so we don't hit timeouts when an action in a workflow takes times to reply back in Mistral, when deploying an Overcloud. Parameters added: MistralMaxMissedHeartbeats: type: number default: 15 description: > The maximum amount of missed heartbeats to be allowed. If set to 0 then this feature is disabled. See check_interval for more details. constraints: - range: { min: 0 } MistralCheckInterval: type: number default: 20 description: > How often (in seconds) action executions are checked. For example when check_interval is 10, check action executions every 10 seconds. When the checker runs it will transit all running action executions to error if the last heartbeat received is older than 10 * max_missed_heartbeats seconds. If set to 0 then this feature is disabled. constraints: - range: { min: 0 } MistralFirstHeartbeatTimeout: type: number default: 3600 description: > The first heartbeat is handled differently, to provide a grace period in case there is no available executor to handle the action execution. For example when first_heartbeat_timeout = 3600, wait 3600 seconds before closing the action executions that never received a heartbeat. constraints: - range: { min: 0 } Configuration applied to Undercloud: Maximum missed heartbeats: 30 seconds Time between interval checks: 40 seconds First Heartbeat timeout after 7200 seconds Depends-On: I7a2313bed58485e077ae210d222902f4f997f0f0 Change-Id: Id8663e76b61c9e09547c228da226b706383a3e20 Closes-Bug: #1821611 --- deployment/mistral/mistral-base.yaml | 35 +++++++++++++++++++ environments/undercloud.yaml | 4 +++ .../mistral_timeout-c00344d5b3d8c4b0.yaml | 6 ++++ 3 files changed, 45 insertions(+) create mode 100644 releasenotes/notes/mistral_timeout-c00344d5b3d8c4b0.yaml diff --git a/deployment/mistral/mistral-base.yaml b/deployment/mistral/mistral-base.yaml index 7ff32a6fd1..56446e04b8 100644 --- a/deployment/mistral/mistral-base.yaml +++ b/deployment/mistral/mistral-base.yaml @@ -71,6 +71,38 @@ parameters: Messaging client subscriber parameter to specify an SSL connection to the messaging host. type: string + MistralMaxMissedHeartbeats: + type: number + default: 15 + description: > + The maximum amount of missed heartbeats to be allowed. + If set to 0 then this feature is disabled. See check_interval for more + details. + constraints: + - range: { min: 0 } + MistralCheckInterval: + type: number + default: 20 + description: > + How often (in seconds) action executions are checked. + For example when check_interval is 10, check action + executions every 10 seconds. When the checker runs it will + transit all running action executions to error if the last + heartbeat received is older than 10 * max_missed_heartbeats + seconds. If set to 0 then this feature is disabled. + constraints: + - range: { min: 0 } + MistralFirstHeartbeatTimeout: + type: number + default: 3600 + description: > + The first heartbeat is handled differently, to provide a + grace period in case there is no available executor to handle + the action execution. For example when + first_heartbeat_timeout = 3600, wait 3600 seconds before + closing the action executions that never received a heartbeat. + constraints: + - range: { min: 0 } conditions: service_debug_unset: {equals : [{get_param: MistralDebug}, '']} @@ -103,6 +135,9 @@ outputs: - {get_param: Debug } - {get_param: MistralDebug } mistral::rpc_response_timeout: 120 + mistral::max_missed_heartbeats: {get_param: MistralMaxMissedHeartbeats} + mistral::check_interval: {get_param: MistralCheckInterval} + mistral::first_heartbeat_timeout: {get_param: MistralFirstHeartbeatTimeout} mistral::keystone::authtoken::project_name: 'service' mistral::keystone::authtoken::user_domain_name: 'Default' mistral::keystone::authtoken::project_domain_name: 'Default' diff --git a/environments/undercloud.yaml b/environments/undercloud.yaml index d460c96efb..c91b2d0aa8 100644 --- a/environments/undercloud.yaml +++ b/environments/undercloud.yaml @@ -129,6 +129,10 @@ parameter_defaults: MistralExecutionFieldSizeLimit: 16384 MistralExecutorVolumes: - /var/lib/config-data/nova/etc/nova:/etc/nova:ro + # https://bugs.launchpad.net/tripleo/+bug/1821611 + MistralMaxMissedHeartbeats: 30 + MistralCheckInterval: 40 + MistralFirstHeartBeatTimeout: 7200 NeutronServicePlugins: router,segments NeutronMechanismDrivers: ['openvswitch', 'baremetal'] NeutronNetworkVLANRanges: 'physnet1:1000:2999' diff --git a/releasenotes/notes/mistral_timeout-c00344d5b3d8c4b0.yaml b/releasenotes/notes/mistral_timeout-c00344d5b3d8c4b0.yaml new file mode 100644 index 0000000000..6703ff1974 --- /dev/null +++ b/releasenotes/notes/mistral_timeout-c00344d5b3d8c4b0.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Allow to configure Mistral parameters for Heartbeat and set sane defaults + for the Undercloud so we can deploy an Overcloud in Stein. + Fixes `bug 1821611 `__.