Fix MariaDB 10.3 upgrade

Upgrading MariaDB from Rocky to Stein currently fails, with the new
container left continually restarting. The problem is that the Rocky
container does not shutdown cleanly, leaving behind state that the new
container cannot recover. The container does not shutdown cleanly
because we run dumb-init with a --single-child argument, causing it to
forward signals to only the process executed by dumb-init. In our case
this is mysqld_safe, which ignores various signals, including SIGTERM.
After a (default 10 second) timeout, Docker then kills the container.

A Kolla change [1] removes the --single-child argument from dumb-init
for the MariaDB container, however we still need to support upgrading
from Rocky images that don't have this change. To do that, we add new
handlers to execute 'mysqladmin shutdown' to cleanly shutdown the
service.

A second issue with the current upgrade approach is that we don't
execute mysql_upgrade after starting the new service. This can leave the
database state using the format of the previous release. This patch also
adds handlers to execute mysql_upgrade.

[1] https://review.openstack.org/644244

Depends-On: https://review.openstack.org/644244
Depends-On: https://review.openstack.org/645990
Change-Id: I08a655a359ff9cfa79043f2166dca59199c7d67f
Closes-Bug: #1820325
This commit is contained in:
Mark Goddard 2019-03-17 17:48:54 +00:00
parent e956cd87c8
commit b25c0ee477
4 changed files with 138 additions and 37 deletions

View File

@ -20,11 +20,9 @@
when: when:
- bootstrap_host is defined - bootstrap_host is defined
- bootstrap_host == inventory_hostname - bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
notify: notify:
- wait first mariadb container - restart mariadb
- restart slave mariadb
- restart master mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788 # https://github.com/ansible/ansible-modules-core/issues/2788
@ -42,12 +40,45 @@
when: when:
- bootstrap_host is defined - bootstrap_host is defined
- bootstrap_host == inventory_hostname - bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not
# stop on demand, and would result in Docker forcibly killing the container.
# This could lead to a failed upgrade if the new image is unable to recover
# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325.
# TODO(mgoddard): Remove this task in Train.
- name: shutdown slave mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
command: >-
bash -c '
sudo -E kolla_set_configs &&
mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }}
'
common_options: "{{ docker_common_options }}"
detach: False
name: "mariadb_shutdown"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
labels:
UPGRADE:
restart_policy: "never"
no_log: true
when:
- kolla_action != "config"
- has_cluster | bool
- inventory_hostname != master_host
listen: restart mariadb
- name: restart slave mariadb - name: restart slave mariadb
vars: vars:
service_name: "mariadb" service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}" service: "{{ mariadb_services[service_name] }}"
mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
become: true become: true
kolla_docker: kolla_docker:
action: "recreate_or_restart_container" action: "recreate_or_restart_container"
@ -59,15 +90,7 @@
when: when:
- kolla_action != "config" - kolla_action != "config"
- inventory_hostname != master_host - inventory_hostname != master_host
- inventory_hostname in groups[service.group] listen: restart mariadb
- service.enabled | bool
- mariadb_config_json.changed | bool
or mariadb_galera_conf.changed | bool
or mariadb_wsrep_notify.changed | bool
or mariadb_container.changed | bool
or bootstrap_host is defined
notify:
- wait for slave mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788 # https://github.com/ansible/ansible-modules-core/issues/2788
@ -85,12 +108,72 @@
when: when:
- kolla_action != "config" - kolla_action != "config"
- inventory_hostname != master_host - inventory_hostname != master_host
listen: restart mariadb
- name: run upgrade on slave
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: False
dimensions: "{{ service.dimensions }}"
environment:
KOLLA_UPGRADE:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
DB_HOST: "{{ api_interface_address }}"
DB_PORT: "{{ mariadb_port }}"
DB_ROOT_PASSWORD: "{{ database_password }}"
image: "{{ service.image }}"
labels:
UPGRADE:
name: "upgrade_mariadb"
restart_policy: "never"
volumes: "{{ service.volumes }}"
no_log: true
when:
- kolla_action == "upgrade"
- inventory_hostname != master_host
listen: restart mariadb
# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not
# stop on demand, and would result in Docker forcibly killing the container.
# This could lead to a failed upgrade if the new image is unable to recover
# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325.
# TODO(mgoddard): Remove this task in Train.
- name: shutdown master mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
command: >-
bash -c '
sudo -E kolla_set_configs &&
mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }}
'
common_options: "{{ docker_common_options }}"
detach: False
name: "mariadb_shutdown"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
labels:
UPGRADE:
restart_policy: "never"
no_log: true
when:
- kolla_action != "config"
- inventory_hostname == master_host
listen: restart mariadb
- name: restart master mariadb - name: restart master mariadb
vars: vars:
service_name: "mariadb" service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}" service: "{{ mariadb_services[service_name] }}"
mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
become: true become: true
kolla_docker: kolla_docker:
action: "recreate_or_restart_container" action: "recreate_or_restart_container"
@ -102,15 +185,7 @@
when: when:
- kolla_action != "config" - kolla_action != "config"
- inventory_hostname == master_host - inventory_hostname == master_host
- inventory_hostname in groups[service.group] listen: restart mariadb
- service.enabled | bool
- mariadb_config_json.changed | bool
or mariadb_galera_conf.changed | bool
or mariadb_wsrep_notify.changed | bool
or mariadb_container.changed | bool
or bootstrap_host is defined
notify:
- Waiting for master mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788 # https://github.com/ansible/ansible-modules-core/issues/2788
@ -128,3 +203,32 @@
when: when:
- kolla_action != "config" - kolla_action != "config"
- inventory_hostname == master_host - inventory_hostname == master_host
listen: restart mariadb
- name: run upgrade on master
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: False
dimensions: "{{ service.dimensions }}"
environment:
KOLLA_UPGRADE:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
DB_HOST: "{{ api_interface_address }}"
DB_PORT: "{{ mariadb_port }}"
DB_ROOT_PASSWORD: "{{ database_password }}"
image: "{{ service.image }}"
labels:
UPGRADE:
name: "upgrade_mariadb"
restart_policy: "never"
volumes: "{{ service.volumes }}"
no_log: true
when:
- kolla_action == "upgrade"
- inventory_hostname == master_host
listen: restart mariadb

View File

@ -20,7 +20,7 @@
restart_policy: "never" restart_policy: "never"
volumes: "{{ service.volumes }}" volumes: "{{ service.volumes }}"
notify: notify:
- Starting first MariaDB container - Bootstrap MariaDB cluster
- set_fact: - set_fact:
bootstrap_host: "{{ inventory_hostname }}" bootstrap_host: "{{ inventory_hostname }}"

View File

@ -48,13 +48,11 @@
dest: "{{ node_config_directory }}/{{ service_name }}/config.json" dest: "{{ node_config_directory }}/{{ service_name }}/config.json"
mode: "0660" mode: "0660"
become: true become: true
register: mariadb_config_json
when: when:
- inventory_hostname in groups[service.group] - inventory_hostname in groups[service.group]
- service.enabled | bool - service.enabled | bool
notify: notify:
- restart slave mariadb - restart mariadb
- restart master mariadb
- name: Copying over galera.cnf - name: Copying over galera.cnf
vars: vars:
@ -68,13 +66,11 @@
dest: "{{ node_config_directory }}/{{ service_name }}/galera.cnf" dest: "{{ node_config_directory }}/{{ service_name }}/galera.cnf"
mode: "0660" mode: "0660"
become: true become: true
register: mariadb_galera_conf
when: when:
- inventory_hostname in groups[service.group] - inventory_hostname in groups[service.group]
- service.enabled | bool - service.enabled | bool
notify: notify:
- restart slave mariadb - restart mariadb
- restart master mariadb
- name: Copying over wsrep-notify.sh - name: Copying over wsrep-notify.sh
template: template:
@ -82,14 +78,12 @@
dest: "{{ node_config_directory }}/{{ item.key }}/wsrep-notify.sh" dest: "{{ node_config_directory }}/{{ item.key }}/wsrep-notify.sh"
mode: "0770" mode: "0770"
become: true become: true
register: mariadb_wsrep_notify
when: when:
- inventory_hostname in groups[item.value.group] - inventory_hostname in groups[item.value.group]
- item.value.enabled | bool - item.value.enabled | bool
with_dict: "{{ mariadb_services }}" with_dict: "{{ mariadb_services }}"
notify: notify:
- restart slave mariadb - restart mariadb
- restart master mariadb
- name: Check mariadb containers - name: Check mariadb containers
become: true become: true
@ -100,12 +94,10 @@
image: "{{ item.value.image }}" image: "{{ item.value.image }}"
volumes: "{{ item.value.volumes }}" volumes: "{{ item.value.volumes }}"
dimensions: "{{ item.value.dimensions }}" dimensions: "{{ item.value.dimensions }}"
register: check_mariadb_containers
when: when:
- kolla_action != "config" - kolla_action != "config"
- inventory_hostname in groups[item.value.group] - inventory_hostname in groups[item.value.group]
- item.value.enabled | bool - item.value.enabled | bool
with_dict: "{{ mariadb_services }}" with_dict: "{{ mariadb_services }}"
notify: notify:
- restart slave mariadb - restart mariadb
- restart master mariadb

View File

@ -54,5 +54,10 @@ innodb_buffer_pool_size = '{{ dynamic_pool_size_mb }}M'
innodb_buffer_pool_size = '8192M' innodb_buffer_pool_size = '8192M'
{% endif %} {% endif %}
# The default value for innodb_lock_schedule_algorithm is VATS, but this does
# not work with galera. Set FCFS explicitly to avoid a warning.
# https://mariadb.com/kb/en/library/innodb-system-variables/#innodb_lock_schedule_algorithm.
innodb_lock_schedule_algorithm = FCFS
[server] [server]
pid-file=/var/lib/mysql/mariadb.pid pid-file=/var/lib/mysql/mariadb.pid