From 4d9650ab2d79540123525486ef17f5185bed120d Mon Sep 17 00:00:00 2001 From: Doug Szumski Date: Thu, 23 Jul 2020 10:26:27 +0000 Subject: [PATCH] Improve Grafana DB bootstrap This fixes an issue where multiple Grafana instances would race to bootstrap the Grafana DB. The following changes are made: - Only start additional Grafana instances after the DB has been configured. - During upgrade, don't allow old instances to run with an upgraded DB schema. Change-Id: I3e0e077ba6a6f43667df042eb593107418a06c39 Closes-Bug: #1888681 (cherry picked from commit 2c730590d72e7b6d3056c53804f924d43dab8363) --- ansible/roles/grafana/handlers/main.yml | 39 ++++++++++++++++++- ansible/roles/grafana/tasks/upgrade.yml | 26 +++++++++++++ ansible/roles/monasca/handlers/main.yml | 39 ++++++++++++++++++- ansible/roles/monasca/tasks/upgrade.yml | 29 ++++++++++++++ ...grafana-db-bootstrap-298feba3e1750aca.yaml | 5 +++ 5 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/fix-multi-instance-grafana-db-bootstrap-298feba3e1750aca.yaml diff --git a/ansible/roles/grafana/handlers/main.yml b/ansible/roles/grafana/handlers/main.yml index 871c9a8c03..1cdabf943e 100644 --- a/ansible/roles/grafana/handlers/main.yml +++ b/ansible/roles/grafana/handlers/main.yml @@ -1,5 +1,6 @@ --- -- name: Restart grafana container +- name: Restart first grafana container + listen: Restart grafana container vars: service_name: "grafana" service: "{{ grafana_services[service_name] }}" @@ -13,3 +14,39 @@ dimensions: "{{ service.dimensions }}" when: - kolla_action != "config" + - inventory_hostname == groups[service.group]|first + +- name: Waiting for grafana to start on first node + listen: Restart grafana container + vars: + service_name: "grafana" + service: "{{ grafana_services[service_name] }}" + become: true + kolla_toolbox: + module_name: uri + module_args: + url: "http://{{ api_interface_address | put_address_in_context('url') }}:{{ grafana_server_port }}/login" + status_code: 200 + register: result + until: result.get('status') == 200 + retries: 10 + delay: 2 + when: + - inventory_hostname == groups[service.group]|first + +- name: Restart remaining grafana containers + listen: Restart grafana container + vars: + service_name: "grafana" + service: "{{ grafana_services[service_name] }}" + become: true + kolla_docker: + action: "recreate_or_restart_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + when: + - kolla_action != "config" + - inventory_hostname != groups[service.group]|first diff --git a/ansible/roles/grafana/tasks/upgrade.yml b/ansible/roles/grafana/tasks/upgrade.yml index 375dcad19b..8c8caee607 100644 --- a/ansible/roles/grafana/tasks/upgrade.yml +++ b/ansible/roles/grafana/tasks/upgrade.yml @@ -1,5 +1,31 @@ --- +- name: Checking if Grafana container needs upgrading + vars: + service_name: "grafana" + service: "{{ grafana_services[service_name] }}" + become: true + kolla_docker: + action: "compare_image" + common_options: "{{ docker_common_options }}" + name: "{{ project_name }}" + image: "{{ grafana_image_full }}" + when: inventory_hostname in groups['grafana'] + register: grafana_differs + - include_tasks: config.yml +# NOTE(dszumski): We don't want old Grafana instances running after +# a new instance has updated the DB schema. Since the first instance +# is upgraded first, we stop all the other ones. +- name: Stopping all Grafana instances but the first node + become: true + kolla_docker: + action: "stop_container" + common_options: "{{ docker_common_options }}" + name: "{{ project_name }}" + when: + - inventory_hostname != groups['grafana']|first + - grafana_differs['result'] + - name: Flush handlers meta: flush_handlers diff --git a/ansible/roles/monasca/handlers/main.yml b/ansible/roles/monasca/handlers/main.yml index a8fe79761e..dd3f1016c1 100644 --- a/ansible/roles/monasca/handlers/main.yml +++ b/ansible/roles/monasca/handlers/main.yml @@ -150,7 +150,8 @@ when: - kolla_action != "config" -- name: Restart monasca-grafana container +- name: Restart first monasca-grafana container + listen: Restart monasca-grafana container vars: service_name: "monasca-grafana" service: "{{ monasca_services[service_name] }}" @@ -164,3 +165,39 @@ dimensions: "{{ service.dimensions }}" when: - kolla_action != "config" + - inventory_hostname == groups[service.group]|first + +- name: Waiting for monasca-grafana to start on first node + listen: Restart monasca-grafana container + vars: + service_name: "monasca-grafana" + service: "{{ monasca_services[service_name] }}" + become: true + kolla_toolbox: + module_name: uri + module_args: + url: "http://{{ api_interface_address | put_address_in_context('url') }}:{{ monasca_grafana_server_port }}/login" + status_code: 200 + register: result + until: result.get('status') == 200 + retries: 10 + delay: 2 + when: + - inventory_hostname == groups[service.group]|first + +- name: Restart remaining monasca-grafana containers + listen: Restart monasca-grafana container + vars: + service_name: "monasca-grafana" + service: "{{ monasca_services[service_name] }}" + become: true + kolla_docker: + action: "recreate_or_restart_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + when: + - kolla_action != "config" + - inventory_hostname != groups[service.group]|first diff --git a/ansible/roles/monasca/tasks/upgrade.yml b/ansible/roles/monasca/tasks/upgrade.yml index 0a96ea6f21..1db21f8116 100644 --- a/ansible/roles/monasca/tasks/upgrade.yml +++ b/ansible/roles/monasca/tasks/upgrade.yml @@ -8,8 +8,37 @@ common_options: "{{ docker_common_options }}" name: "monasca_log_api" +- name: Checking if Monasca Grafana container needs upgrading + vars: + service_name: "monasca-grafana" + service: "{{ monasca_services[service_name] }}" + become: true + kolla_docker: + action: "compare_image" + common_options: "{{ docker_common_options }}" + name: "{{ project_name }}" + image: "{{ monasca_grafana_image_full }}" + when: inventory_hostname in groups['monasca-grafana'] + register: monasca_grafana_differs + - include_tasks: config.yml +# NOTE(dszumski): We don't want old Grafana instances running after +# a new instance has updated the DB schema. Since the first instance +# is upgraded first, we stop all the other ones. +- name: Stopping all Monasca Grafana instances but the first node + vars: + service_name: "monasca-grafana" + service: "{{ monasca_services[service_name] }}" + become: true + kolla_docker: + action: "stop_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + when: + - inventory_hostname != groups['monasca-grafana']|first + - monasca_grafana_differs['result'] + - include_tasks: register.yml when: inventory_hostname in groups['monasca-api'] diff --git a/releasenotes/notes/fix-multi-instance-grafana-db-bootstrap-298feba3e1750aca.yaml b/releasenotes/notes/fix-multi-instance-grafana-db-bootstrap-298feba3e1750aca.yaml new file mode 100644 index 0000000000..8b93ca8613 --- /dev/null +++ b/releasenotes/notes/fix-multi-instance-grafana-db-bootstrap-298feba3e1750aca.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes an issue where Grafana instances would race to bootstrap the Grafana + DB. See `LP#1888681 `__.