Fix MariaDB 10.3 upgrade

Upgrading MariaDB from Rocky to Stein currently fails, with the new
container left continually restarting. The problem is that the Rocky
container does not shutdown cleanly, leaving behind state that the new
container cannot recover. The container does not shutdown cleanly
because we run dumb-init with a --single-child argument, causing it to
forward signals to only the process executed by dumb-init. In our case
this is mysqld_safe, which ignores various signals, including SIGTERM.
After a (default 10 second) timeout, Docker then kills the container.

A Kolla change [1] removes the --single-child argument from dumb-init
for the MariaDB container, however we still need to support upgrading
from Rocky images that don't have this change. To do that, we add new
handlers to execute 'mysqladmin shutdown' to cleanly shutdown the
service.

A second issue with the current upgrade approach is that we don't
execute mysql_upgrade after starting the new service. This can leave the
database state using the format of the previous release. This patch also
adds handlers to execute mysql_upgrade.

[1] https://review.openstack.org/644244

Depends-On: https://review.openstack.org/644244
Depends-On: https://review.openstack.org/645990
Change-Id: I08a655a359ff9cfa79043f2166dca59199c7d67f
Closes-Bug: #1820325
This commit is contained in:
Mark Goddard 2019-03-17 17:48:54 +00:00
parent e956cd87c8
commit b25c0ee477
4 changed files with 138 additions and 37 deletions

View File

@ -20,11 +20,9 @@
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
notify:
- wait first mariadb container
- restart slave mariadb
- restart master mariadb
- restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788
@ -42,12 +40,45 @@
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not
# stop on demand, and would result in Docker forcibly killing the container.
# This could lead to a failed upgrade if the new image is unable to recover
# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325.
# TODO(mgoddard): Remove this task in Train.
- name: shutdown slave mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
command: >-
bash -c '
sudo -E kolla_set_configs &&
mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }}
'
common_options: "{{ docker_common_options }}"
detach: False
name: "mariadb_shutdown"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
labels:
UPGRADE:
restart_policy: "never"
no_log: true
when:
- kolla_action != "config"
- has_cluster | bool
- inventory_hostname != master_host
listen: restart mariadb
- name: restart slave mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
become: true
kolla_docker:
action: "recreate_or_restart_container"
@ -59,15 +90,7 @@
when:
- kolla_action != "config"
- inventory_hostname != master_host
- inventory_hostname in groups[service.group]
- service.enabled | bool
- mariadb_config_json.changed | bool
or mariadb_galera_conf.changed | bool
or mariadb_wsrep_notify.changed | bool
or mariadb_container.changed | bool
or bootstrap_host is defined
notify:
- wait for slave mariadb
listen: restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788
@ -85,12 +108,72 @@
when:
- kolla_action != "config"
- inventory_hostname != master_host
listen: restart mariadb
- name: run upgrade on slave
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: False
dimensions: "{{ service.dimensions }}"
environment:
KOLLA_UPGRADE:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
DB_HOST: "{{ api_interface_address }}"
DB_PORT: "{{ mariadb_port }}"
DB_ROOT_PASSWORD: "{{ database_password }}"
image: "{{ service.image }}"
labels:
UPGRADE:
name: "upgrade_mariadb"
restart_policy: "never"
volumes: "{{ service.volumes }}"
no_log: true
when:
- kolla_action == "upgrade"
- inventory_hostname != master_host
listen: restart mariadb
# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not
# stop on demand, and would result in Docker forcibly killing the container.
# This could lead to a failed upgrade if the new image is unable to recover
# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325.
# TODO(mgoddard): Remove this task in Train.
- name: shutdown master mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
command: >-
bash -c '
sudo -E kolla_set_configs &&
mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }}
'
common_options: "{{ docker_common_options }}"
detach: False
name: "mariadb_shutdown"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
labels:
UPGRADE:
restart_policy: "never"
no_log: true
when:
- kolla_action != "config"
- inventory_hostname == master_host
listen: restart mariadb
- name: restart master mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
become: true
kolla_docker:
action: "recreate_or_restart_container"
@ -102,15 +185,7 @@
when:
- kolla_action != "config"
- inventory_hostname == master_host
- inventory_hostname in groups[service.group]
- service.enabled | bool
- mariadb_config_json.changed | bool
or mariadb_galera_conf.changed | bool
or mariadb_wsrep_notify.changed | bool
or mariadb_container.changed | bool
or bootstrap_host is defined
notify:
- Waiting for master mariadb
listen: restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788
@ -128,3 +203,32 @@
when:
- kolla_action != "config"
- inventory_hostname == master_host
listen: restart mariadb
- name: run upgrade on master
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: False
dimensions: "{{ service.dimensions }}"
environment:
KOLLA_UPGRADE:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
DB_HOST: "{{ api_interface_address }}"
DB_PORT: "{{ mariadb_port }}"
DB_ROOT_PASSWORD: "{{ database_password }}"
image: "{{ service.image }}"
labels:
UPGRADE:
name: "upgrade_mariadb"
restart_policy: "never"
volumes: "{{ service.volumes }}"
no_log: true
when:
- kolla_action == "upgrade"
- inventory_hostname == master_host
listen: restart mariadb

View File

@ -20,7 +20,7 @@
restart_policy: "never"
volumes: "{{ service.volumes }}"
notify:
- Starting first MariaDB container
- Bootstrap MariaDB cluster
- set_fact:
bootstrap_host: "{{ inventory_hostname }}"

View File

@ -48,13 +48,11 @@
dest: "{{ node_config_directory }}/{{ service_name }}/config.json"
mode: "0660"
become: true
register: mariadb_config_json
when:
- inventory_hostname in groups[service.group]
- service.enabled | bool
notify:
- restart slave mariadb
- restart master mariadb
- restart mariadb
- name: Copying over galera.cnf
vars:
@ -68,13 +66,11 @@
dest: "{{ node_config_directory }}/{{ service_name }}/galera.cnf"
mode: "0660"
become: true
register: mariadb_galera_conf
when:
- inventory_hostname in groups[service.group]
- service.enabled | bool
notify:
- restart slave mariadb
- restart master mariadb
- restart mariadb
- name: Copying over wsrep-notify.sh
template:
@ -82,14 +78,12 @@
dest: "{{ node_config_directory }}/{{ item.key }}/wsrep-notify.sh"
mode: "0770"
become: true
register: mariadb_wsrep_notify
when:
- inventory_hostname in groups[item.value.group]
- item.value.enabled | bool
with_dict: "{{ mariadb_services }}"
notify:
- restart slave mariadb
- restart master mariadb
- restart mariadb
- name: Check mariadb containers
become: true
@ -100,12 +94,10 @@
image: "{{ item.value.image }}"
volumes: "{{ item.value.volumes }}"
dimensions: "{{ item.value.dimensions }}"
register: check_mariadb_containers
when:
- kolla_action != "config"
- inventory_hostname in groups[item.value.group]
- item.value.enabled | bool
with_dict: "{{ mariadb_services }}"
notify:
- restart slave mariadb
- restart master mariadb
- restart mariadb

View File

@ -54,5 +54,10 @@ innodb_buffer_pool_size = '{{ dynamic_pool_size_mb }}M'
innodb_buffer_pool_size = '8192M'
{% endif %}
# The default value for innodb_lock_schedule_algorithm is VATS, but this does
# not work with galera. Set FCFS explicitly to avoid a warning.
# https://mariadb.com/kb/en/library/innodb-system-variables/#innodb_lock_schedule_algorithm.
innodb_lock_schedule_algorithm = FCFS
[server]
pid-file=/var/lib/mysql/mariadb.pid