From 3be270396415e8b8282eb825f97d5be6bf7059fe Mon Sep 17 00:00:00 2001 From: Kamil Sambor Date: Thu, 17 Oct 2019 15:30:58 +0200 Subject: [PATCH] Add configurable monitor timeouts for ovn dbs Under pressure, the default monitor timeout value of 20 seconds is not enough to prevent unnecessary failovers of the ovn-dbs pacemaker resource. While spawning a few VMs in the same time this could lead to unnecessary movements of master DB, then re-connections of ovn-controllers (slaves are read-only), further peaks of load on DBs, and at the end it could lead to snowball effect. Now this value can be configurable by dbs_timeout in tripleo::profile::pacemaker::ovn_dbs_bundle and by default is set to 60s. Change-Id: Ib95c6b7614631eed264d42e6cf61672b705e7893 Signed-off-by: Kamil Sambor (cherry picked from commit 15e21010a8a8594678afe385821ee804ec9e16c7) (cherry picked from commit 223e786c5716015c7ac1bdda94feabcd9c79716a) (cherry picked from commit 98042540864790372980ede921f4d3960140c20e) (cherry picked from commit dad647277e932cc8c52b4e08772a77379647c97e) --- manifests/profile/pacemaker/ovn_dbs_bundle.pp | 8 +++++++- .../notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml diff --git a/manifests/profile/pacemaker/ovn_dbs_bundle.pp b/manifests/profile/pacemaker/ovn_dbs_bundle.pp index 0da490b7c..e0b0f91e6 100644 --- a/manifests/profile/pacemaker/ovn_dbs_bundle.pp +++ b/manifests/profile/pacemaker/ovn_dbs_bundle.pp @@ -56,6 +56,10 @@ # (optional) Sets PCMK_tls_priorities in /etc/sysconfig/pacemaker when set # Defaults to hiera('tripleo::pacemaker::tls_priorities', undef) # +# [*dbs_timeout*] +# (Optional) timeout for monitor of ovn dbs resource +# Defaults to 60 +# class tripleo::profile::pacemaker::ovn_dbs_bundle ( $ovn_dbs_docker_image = hiera('tripleo::profile::pacemaker::ovn_dbs_bundle::ovn_dbs_docker_image', undef), @@ -67,6 +71,7 @@ class tripleo::profile::pacemaker::ovn_dbs_bundle ( $nb_db_port = 6641, $sb_db_port = 6642, $tls_priorities = hiera('tripleo::pacemaker::tls_priorities', undef), + $dbs_timeout = hiera('tripleo::profile::pacemaker::ovn_dbs_bundle::dbs_timeout', 60), ) { if $::hostname == downcase($bootstrap_node) { @@ -152,7 +157,8 @@ class tripleo::profile::pacemaker::ovn_dbs_bundle ( pacemaker::resource::ocf { "${ovndb_servers_resource_name}": ocf_agent_name => "${ovndb_servers_ocf_name}", master_params => '', - op_params => 'start timeout=200s stop timeout=200s', + op_params => "start timeout=200s stop timeout=200s monitor interval=10s role=Master timeout=${dbs_timeout}s \ +monitor interval=30s role=Slave timeout=${dbs_timeout}s", resource_params => "master_ip=${ovn_dbs_vip_norm} nb_master_port=${nb_db_port} \ sb_master_port=${sb_db_port} manage_northd=yes inactive_probe_interval=180000", tries => $pcs_tries, diff --git a/releasenotes/notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml b/releasenotes/notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml new file mode 100644 index 000000000..6a5d466a3 --- /dev/null +++ b/releasenotes/notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Under pressure, the default monitor timeout value of 20 seconds is not + enough to prevent unnecessary failovers of the ovn-dbs pacemaker resource. + While spawning a few VMs in the same time this could lead to unnecessary + movements of master DB, then re-connections of ovn-controllers (slaves are + read-only), further peaks of load on DBs, and at the end it could lead to + snowball effect. Now this value can be configurable by dbs_timeout in + tripleo::profile::pacemaker::ovn_dbs_bundle and by default is set to 60s. \ No newline at end of file