diff --git a/ansible/roles/mariadb/handlers/main.yml b/ansible/roles/mariadb/handlers/main.yml index 7ac1f0f6ba..4aaee273b1 100644 --- a/ansible/roles/mariadb/handlers/main.yml +++ b/ansible/roles/mariadb/handlers/main.yml @@ -17,16 +17,10 @@ restart_policy: no volumes: "{{ service.volumes }}" dimensions: "{{ service.dimensions }}" - when: - - bootstrap_host is defined - - bootstrap_host == inventory_hostname listen: Bootstrap MariaDB cluster - notify: - - restart mariadb -# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed -# https://github.com/ansible/ansible-modules-core/issues/2788 -- name: wait first mariadb container +# NOTE(yoctozepto): We have to loop this to avoid breaking on connection resets +- name: Wait for first MariaDB service port liveness wait_for: host: "{{ api_interface_address }}" port: "{{ mariadb_port }}" @@ -37,157 +31,60 @@ until: check_mariadb_port is success retries: 10 delay: 6 - when: - - bootstrap_host is defined - - bootstrap_host == inventory_hostname listen: Bootstrap MariaDB cluster -- name: Wait for MariaDB to become operational +- name: Wait for first MariaDB service to sync WSREP become: true command: >- docker exec {{ mariadb_service.container_name }} mysql -uroot -p{{ database_password }} --silent --skip-column-names - -e 'SHOW STATUS LIKE "wsrep_evs_state"' + -e 'SHOW STATUS LIKE "wsrep_local_state_comment"' changed_when: false register: result - until: '"OPERATIONAL" in result.stdout' + until: result.stdout == "wsrep_local_state_comment\tSynced" retries: 10 delay: 6 no_log: true - when: - - bootstrap_host is defined - - bootstrap_host == inventory_hostname listen: Bootstrap MariaDB cluster -- name: restart slave mariadb - vars: - service_name: "mariadb" - service: "{{ mariadb_services[service_name] }}" +- name: Creating haproxy mysql user become: true - kolla_docker: - action: "recreate_or_restart_container" - common_options: "{{ docker_common_options }}" - name: "{{ service.container_name }}" - image: "{{ service.image }}" - volumes: "{{ service.volumes }}" - dimensions: "{{ service.dimensions }}" + kolla_toolbox: + module_name: mysql_user + module_args: + login_host: "{{ api_interface_address }}" + login_port: "{{ mariadb_port }}" + login_user: "{{ database_user }}" + login_password: "{{ database_password }}" + name: "haproxy" + password: "" + host: "%" + priv: "*.*:USAGE" + listen: Bootstrap MariaDB cluster + +- name: Restart MariaDB on existing cluster members + include_tasks: 'restart_services.yml' when: + - groups.mariadb_port_alive_True is defined + - inventory_hostname in groups.mariadb_port_alive_True + - groups.mariadb_port_alive_True.index(inventory_hostname) % 3 == item - kolla_action != "config" - - inventory_hostname != master_host - - not mariadb_recover | default(false) listen: restart mariadb + loop: + - 0 + - 1 + - 2 -# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed -# https://github.com/ansible/ansible-modules-core/issues/2788 -- name: wait for slave mariadb - wait_for: - host: "{{ api_interface_address }}" - port: "{{ mariadb_port }}" - connect_timeout: 1 - timeout: 60 - search_regex: "MariaDB" - register: check_mariadb_port - until: check_mariadb_port is success - retries: 10 - delay: 6 +- name: Start MariaDB on new nodes + include_tasks: 'restart_services.yml' when: + - bootstrap_host is not defined or bootstrap_host != inventory_hostname + - groups.mariadb_port_alive_False is defined + - inventory_hostname in groups.mariadb_port_alive_False - kolla_action != "config" - - inventory_hostname != master_host - - not mariadb_recover | default(false) listen: restart mariadb -- name: run upgrade on slave - vars: - service_name: "mariadb" - service: "{{ mariadb_services[service_name] }}" - become: true - kolla_docker: - action: "start_container" - common_options: "{{ docker_common_options }}" - detach: False - dimensions: "{{ service.dimensions }}" - environment: - KOLLA_UPGRADE: - KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" - DB_HOST: "{{ api_interface_address }}" - DB_PORT: "{{ mariadb_port }}" - DB_ROOT_PASSWORD: "{{ database_password }}" - image: "{{ service.image }}" - labels: - UPGRADE: - name: "upgrade_mariadb" - restart_policy: no - volumes: "{{ service.volumes }}" - no_log: true - when: - - kolla_action == "upgrade" - - inventory_hostname != master_host - - not mariadb_recover | default(false) - listen: restart mariadb - -- name: restart master mariadb - vars: - service_name: "mariadb" - service: "{{ mariadb_services[service_name] }}" - become: true - kolla_docker: - action: "recreate_or_restart_container" - common_options: "{{ docker_common_options }}" - name: "{{ service.container_name }}" - image: "{{ service.image }}" - volumes: "{{ service.volumes }}" - dimensions: "{{ service.dimensions }}" - when: - - kolla_action != "config" - - inventory_hostname == master_host - - not mariadb_recover | default(false) - listen: restart mariadb - -# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed -# https://github.com/ansible/ansible-modules-core/issues/2788 -- name: Waiting for master mariadb - wait_for: - host: "{{ api_interface_address }}" - port: "{{ mariadb_port }}" - connect_timeout: 1 - timeout: 60 - search_regex: "MariaDB" - register: check_mariadb_port - until: check_mariadb_port is success - retries: 10 - delay: 6 - when: - - kolla_action != "config" - - inventory_hostname == master_host - - not mariadb_recover | default(false) - listen: restart mariadb - -- name: run upgrade on master - vars: - service_name: "mariadb" - service: "{{ mariadb_services[service_name] }}" - become: true - kolla_docker: - action: "start_container" - common_options: "{{ docker_common_options }}" - detach: False - dimensions: "{{ service.dimensions }}" - environment: - KOLLA_UPGRADE: - KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" - DB_HOST: "{{ api_interface_address }}" - DB_PORT: "{{ mariadb_port }}" - DB_ROOT_PASSWORD: "{{ database_password }}" - image: "{{ service.image }}" - labels: - UPGRADE: - name: "upgrade_mariadb" - restart_policy: no - volumes: "{{ service.volumes }}" - no_log: true - when: - - kolla_action == "upgrade" - - inventory_hostname == master_host - - not mariadb_recover | default(false) - listen: restart mariadb +- name: Ensure MariaDB is running normally on bootstrap host + include_tasks: 'restart_services.yml' + listen: Bootstrap MariaDB cluster diff --git a/ansible/roles/mariadb/tasks/bootstrap.yml b/ansible/roles/mariadb/tasks/bootstrap.yml index 249af27e03..279f3a43d6 100644 --- a/ansible/roles/mariadb/tasks/bootstrap.yml +++ b/ansible/roles/mariadb/tasks/bootstrap.yml @@ -1,13 +1,9 @@ --- -- name: Set a fact about the master host - set_fact: - master_host: "{{ groups['mariadb'][0] }}" - - include_tasks: lookup_cluster.yml - include_tasks: bootstrap_cluster.yml when: - - not has_cluster | bool + - not mariadb_cluster_exists - inventory_hostname == groups['mariadb'][0] - include_tasks: recover_cluster.yml diff --git a/ansible/roles/mariadb/tasks/deploy-containers.yml b/ansible/roles/mariadb/tasks/deploy-containers.yml index 9be12d61a3..8a1a708698 100644 --- a/ansible/roles/mariadb/tasks/deploy-containers.yml +++ b/ansible/roles/mariadb/tasks/deploy-containers.yml @@ -1,6 +1,5 @@ --- -- name: Set a fact about the master host - set_fact: - master_host: "{{ groups['mariadb'][0] }}" - - import_tasks: check-containers.yml + +# NOTE(yoctozepto): handlers prerequisite +- import_tasks: lookup_cluster.yml diff --git a/ansible/roles/mariadb/tasks/lookup_cluster.yml b/ansible/roles/mariadb/tasks/lookup_cluster.yml index 4f7bedf722..35f0b5b3f8 100644 --- a/ansible/roles/mariadb/tasks/lookup_cluster.yml +++ b/ansible/roles/mariadb/tasks/lookup_cluster.yml @@ -1,25 +1,5 @@ --- -- name: Cleaning up temp file on localhost - file: - path: /tmp/kolla_mariadb_cluster - state: absent - delegate_to: localhost - changed_when: False - check_mode: no - run_once: True - -# NOTE(mnasiadka): Due to the way that we are setting fact has_cluster - content needs to be '' -- name: Creating temp file on localhost - copy: - content: '' - dest: /tmp/kolla_mariadb_cluster - mode: 0644 - delegate_to: localhost - changed_when: False - check_mode: no - run_once: True - -- name: Creating mariadb volume +- name: Create MariaDB volume become: true kolla_docker: action: "create_volume" @@ -27,25 +7,59 @@ name: "mariadb" register: mariadb_volume -- name: Writing hostname of host with existing cluster files to temp file - copy: - content: "{{ ansible_hostname }}" - dest: /tmp/kolla_mariadb_cluster - mode: 0644 - delegate_to: localhost - changed_when: False - check_mode: no - when: mariadb_volume is not changed +- name: Divide hosts by their MariaDB volume availability + group_by: + key: mariadb_had_volume_{{ mariadb_volume is not changed }} -- name: Registering host from temp file +- name: Establish whether the cluster has already existed set_fact: - has_cluster: "{{ lookup('file', '/tmp/kolla_mariadb_cluster') | length > 0 }}" + mariadb_cluster_exists: "{{ groups.mariadb_had_volume_True is defined }}" -- name: Cleaning up temp file on localhost - file: - path: /tmp/kolla_mariadb_cluster - state: absent - delegate_to: localhost - changed_when: False - check_mode: no - run_once: True +- block: + - name: Check MariaDB service port liveness + wait_for: + host: "{{ api_interface_address }}" + port: "{{ mariadb_port }}" + connect_timeout: 1 + timeout: 10 + search_regex: "MariaDB" + register: check_mariadb_port_liveness + ignore_errors: yes + + - name: Divide hosts by their MariaDB service port liveness + group_by: + key: mariadb_port_alive_{{ check_mariadb_port_liveness is success }} + + - block: + - name: Check MariaDB service WSREP sync status + become: true + command: >- + docker exec {{ mariadb_service.container_name }} + mysql -uroot -p{{ database_password }} + --silent --skip-column-names + -e 'SHOW STATUS LIKE "wsrep_local_state_comment"' + changed_when: false + register: check_mariadb_sync_status + no_log: true + + # NOTE(yoctozepto): this is extracted separately to properly escape + # the TAB character which likes to go wrong due to interaction between + # Python/Ansible/Jinja2/YAML, the way below works + - name: Extract MariaDB service WSREP sync status + set_fact: + mariadb_sync_status: "{{ check_mariadb_sync_status.stdout.split('\t')[1] }}" + + - name: Divide hosts by their MariaDB service WSREP sync status + group_by: + key: mariadb_sync_status_{{ mariadb_sync_status }} + + - name: Fail when MariaDB service is not synced + fail: + msg: MariaDB service is not synced. Please wait for WSREP sync before proceeding. + when: + - groups.mariadb_sync_status_Synced is not defined or + inventory_hostname not in groups.mariadb_sync_status_Synced + when: + - groups.mariadb_port_alive_True is defined + - inventory_hostname in groups.mariadb_port_alive_True + when: not mariadb_recover | default(False) diff --git a/ansible/roles/mariadb/tasks/recover_cluster.yml b/ansible/roles/mariadb/tasks/recover_cluster.yml index 4c097d80f8..b662e4a9c0 100644 --- a/ansible/roles/mariadb/tasks/recover_cluster.yml +++ b/ansible/roles/mariadb/tasks/recover_cluster.yml @@ -1,7 +1,7 @@ --- - fail: msg: "MariaDB cluster was not found. Is your inventory correct?" - when: not has_cluster | bool + when: not mariadb_cluster_exists - name: Cleaning up temp file on mariadb hosts file: @@ -97,8 +97,6 @@ - set_fact: bootstrap_host: "{{ mariadb_recover_inventory_name }}" - master_host: "{{ mariadb_recover_inventory_name }}" - changed_when: true - name: Copying grastate.dat file from MariaDB container in bootstrap host become: true diff --git a/ansible/roles/mariadb/tasks/register.yml b/ansible/roles/mariadb/tasks/register.yml index b4b87d97f3..ab60c16ccb 100644 --- a/ansible/roles/mariadb/tasks/register.yml +++ b/ansible/roles/mariadb/tasks/register.yml @@ -1,19 +1,4 @@ --- -- name: Creating haproxy mysql user - become: true - kolla_toolbox: - module_name: mysql_user - module_args: - login_host: "{{ api_interface_address }}" - login_port: "{{ mariadb_port }}" - login_user: "{{ database_user }}" - login_password: "{{ database_password }}" - name: "haproxy" - password: "" - host: "%" - priv: "*.*:USAGE" - run_once: True - - import_tasks: wait_for_loadbalancer.yml - name: Creating the Mariabackup database @@ -65,7 +50,3 @@ run_once: True when: - enable_mariabackup | bool - -- name: Cleaning up facts - set_fact: - delegate_host: "bootstraped" diff --git a/ansible/roles/mariadb/tasks/restart_services.yml b/ansible/roles/mariadb/tasks/restart_services.yml new file mode 100644 index 0000000000..86768d309c --- /dev/null +++ b/ansible/roles/mariadb/tasks/restart_services.yml @@ -0,0 +1,46 @@ +--- +- name: Restart MariaDB container + vars: + service_name: "mariadb" + service: "{{ mariadb_services[service_name] }}" + become: true + kolla_docker: + action: "recreate_or_restart_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + +# NOTE(yoctozepto): We have to loop this to avoid breaking on connection resets +- name: Wait for MariaDB service port liveness + wait_for: + host: "{{ api_interface_address }}" + port: "{{ mariadb_port }}" + connect_timeout: 1 + timeout: 60 + search_regex: "MariaDB" + register: check_mariadb_port + until: check_mariadb_port is success + retries: 10 + delay: 6 + +- name: Wait for MariaDB service to sync WSREP + become: true + command: >- + docker exec {{ mariadb_service.container_name }} + mysql -uroot -p{{ database_password }} + --silent --skip-column-names + -e 'SHOW STATUS LIKE "wsrep_local_state_comment"' + changed_when: false + register: result + until: result.stdout == "wsrep_local_state_comment\tSynced" + retries: 10 + delay: 6 + no_log: true + when: + # NOTE(yoctozepto): we don't want to wait for new nodes to fully sync + # with an existing cluster as this could take time + - not mariadb_cluster_exists or + (groups.mariadb_port_alive_True is defined and + inventory_hostname in groups.mariadb_port_alive_True) diff --git a/ansible/roles/mariadb/tasks/upgrade.yml b/ansible/roles/mariadb/tasks/upgrade.yml index f670a5b78d..3d7686eabb 100644 --- a/ansible/roles/mariadb/tasks/upgrade.yml +++ b/ansible/roles/mariadb/tasks/upgrade.yml @@ -1,2 +1,26 @@ --- - include_tasks: deploy.yml + +- name: Run upgrade in MariaDB container + vars: + service_name: "mariadb" + service: "{{ mariadb_services[service_name] }}" + become: true + kolla_docker: + action: "start_container" + common_options: "{{ docker_common_options }}" + detach: False + dimensions: "{{ service.dimensions }}" + environment: + KOLLA_UPGRADE: + KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" + DB_HOST: "{{ api_interface_address }}" + DB_PORT: "{{ mariadb_port }}" + DB_ROOT_PASSWORD: "{{ database_password }}" + image: "{{ service.image }}" + labels: + UPGRADE: + name: "upgrade_mariadb" + restart_policy: no + volumes: "{{ service.volumes }}" + no_log: true diff --git a/releasenotes/notes/fix-multiple-mariadb-issues-d023478df6d76622.yaml b/releasenotes/notes/fix-multiple-mariadb-issues-d023478df6d76622.yaml new file mode 100644 index 0000000000..564a81c842 --- /dev/null +++ b/releasenotes/notes/fix-multiple-mariadb-issues-d023478df6d76622.yaml @@ -0,0 +1,17 @@ +--- +fixes: + - | + Fixes MariaDB issues in multinode scenarios which affected + deployment, reconfiguration, upgrade and Galera cluster resizing. + They were usually manifested by WSREP issues in various places + and could lead to need to recover the Galera cluster. + Note these issues were due to how MariaDB was handled during + Kolla Ansible runs and did not affect Galera cluster during normal + operations unless MariaDB was later touched by Kolla Ansible. + Users wishing to run actions on their Galera clusters using + Kolla Ansible are strongly advised to update. + For details please see the following Launchpad bug records: + `bug 1857908 + `__ and + `bug 1859145 + `__.