Browse Source

Fix MariaDB 10.3 upgrade

Upgrading MariaDB from Rocky to Stein currently fails, with the new
container left continually restarting. The problem is that the Rocky
container does not shutdown cleanly, leaving behind state that the new
container cannot recover. The container does not shutdown cleanly
because we run dumb-init with a --single-child argument, causing it to
forward signals to only the process executed by dumb-init. In our case
this is mysqld_safe, which ignores various signals, including SIGTERM.
After a (default 10 second) timeout, Docker then kills the container.

A Kolla change [1] removes the --single-child argument from dumb-init
for the MariaDB container, however we still need to support upgrading
from Rocky images that don't have this change. To do that, we add new
handlers to execute 'mysqladmin shutdown' to cleanly shutdown the
service.

A second issue with the current upgrade approach is that we don't
execute mysql_upgrade after starting the new service. This can leave the
database state using the format of the previous release. This patch also
adds handlers to execute mysql_upgrade.

[1] https://review.openstack.org/644244

Depends-On: https://review.openstack.org/644244
Depends-On: https://review.openstack.org/645990
Change-Id: I08a655a359ff9cfa79043f2166dca59199c7d67f
Closes-Bug: #1820325
tags/8.0.0.0rc1
Mark Goddard 3 months ago
parent
commit
b25c0ee477

+ 128
- 24
ansible/roles/mariadb/handlers/main.yml View File

@@ -20,11 +20,9 @@
20 20
   when:
21 21
     - bootstrap_host is defined
22 22
     - bootstrap_host == inventory_hostname
23
+  listen: Bootstrap MariaDB cluster
23 24
   notify:
24
-    - wait first mariadb container
25
-    - restart slave mariadb
26
-    - restart master mariadb
27
-
25
+    - restart mariadb
28 26
 
29 27
 # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
30 28
 # https://github.com/ansible/ansible-modules-core/issues/2788
@@ -42,12 +40,45 @@
42 40
   when:
43 41
     - bootstrap_host is defined
44 42
     - bootstrap_host == inventory_hostname
43
+  listen: Bootstrap MariaDB cluster
44
+
45
+# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not
46
+# stop on demand, and would result in Docker forcibly killing the container.
47
+# This could lead to a failed upgrade if the new image is unable to recover
48
+# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325.
49
+# TODO(mgoddard): Remove this task in Train.
50
+- name: shutdown slave mariadb
51
+  vars:
52
+    service_name: "mariadb"
53
+    service: "{{ mariadb_services[service_name] }}"
54
+  become: true
55
+  kolla_docker:
56
+    action: "start_container"
57
+    command: >-
58
+      bash -c '
59
+      sudo -E kolla_set_configs &&
60
+      mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }}
61
+      '
62
+    common_options: "{{ docker_common_options }}"
63
+    detach: False
64
+    name: "mariadb_shutdown"
65
+    image: "{{ service.image }}"
66
+    volumes: "{{ service.volumes }}"
67
+    dimensions: "{{ service.dimensions }}"
68
+    labels:
69
+      UPGRADE:
70
+    restart_policy: "never"
71
+  no_log: true
72
+  when:
73
+    - kolla_action != "config"
74
+    - has_cluster | bool
75
+    - inventory_hostname != master_host
76
+  listen: restart mariadb
45 77
 
46 78
 - name: restart slave mariadb
47 79
   vars:
48 80
     service_name: "mariadb"
49 81
     service: "{{ mariadb_services[service_name] }}"
50
-    mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
51 82
   become: true
52 83
   kolla_docker:
53 84
     action: "recreate_or_restart_container"
@@ -59,15 +90,7 @@
59 90
   when:
60 91
     - kolla_action != "config"
61 92
     - inventory_hostname != master_host
62
-    - inventory_hostname in groups[service.group]
63
-    - service.enabled | bool
64
-    - mariadb_config_json.changed | bool
65
-      or mariadb_galera_conf.changed | bool
66
-      or mariadb_wsrep_notify.changed | bool
67
-      or mariadb_container.changed | bool
68
-      or bootstrap_host is defined
69
-  notify:
70
-    - wait for slave mariadb
93
+  listen: restart mariadb
71 94
 
72 95
 # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
73 96
 # https://github.com/ansible/ansible-modules-core/issues/2788
@@ -85,12 +108,72 @@
85 108
   when:
86 109
     - kolla_action != "config"
87 110
     - inventory_hostname != master_host
111
+  listen: restart mariadb
112
+
113
+- name: run upgrade on slave
114
+  vars:
115
+    service_name: "mariadb"
116
+    service: "{{ mariadb_services[service_name] }}"
117
+  become: true
118
+  kolla_docker:
119
+    action: "start_container"
120
+    common_options: "{{ docker_common_options }}"
121
+    detach: False
122
+    dimensions: "{{ service.dimensions }}"
123
+    environment:
124
+      KOLLA_UPGRADE:
125
+      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
126
+      DB_HOST: "{{ api_interface_address }}"
127
+      DB_PORT: "{{ mariadb_port }}"
128
+      DB_ROOT_PASSWORD: "{{ database_password }}"
129
+    image: "{{ service.image }}"
130
+    labels:
131
+      UPGRADE:
132
+    name: "upgrade_mariadb"
133
+    restart_policy: "never"
134
+    volumes: "{{ service.volumes }}"
135
+  no_log: true
136
+  when:
137
+    - kolla_action == "upgrade"
138
+    - inventory_hostname != master_host
139
+  listen: restart mariadb
140
+
141
+# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not
142
+# stop on demand, and would result in Docker forcibly killing the container.
143
+# This could lead to a failed upgrade if the new image is unable to recover
144
+# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325.
145
+# TODO(mgoddard): Remove this task in Train.
146
+- name: shutdown master mariadb
147
+  vars:
148
+    service_name: "mariadb"
149
+    service: "{{ mariadb_services[service_name] }}"
150
+  become: true
151
+  kolla_docker:
152
+    action: "start_container"
153
+    command: >-
154
+      bash -c '
155
+      sudo -E kolla_set_configs &&
156
+      mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }}
157
+      '
158
+    common_options: "{{ docker_common_options }}"
159
+    detach: False
160
+    name: "mariadb_shutdown"
161
+    image: "{{ service.image }}"
162
+    volumes: "{{ service.volumes }}"
163
+    dimensions: "{{ service.dimensions }}"
164
+    labels:
165
+      UPGRADE:
166
+    restart_policy: "never"
167
+  no_log: true
168
+  when:
169
+    - kolla_action != "config"
170
+    - inventory_hostname == master_host
171
+  listen: restart mariadb
88 172
 
89 173
 - name: restart master mariadb
90 174
   vars:
91 175
     service_name: "mariadb"
92 176
     service: "{{ mariadb_services[service_name] }}"
93
-    mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
94 177
   become: true
95 178
   kolla_docker:
96 179
     action: "recreate_or_restart_container"
@@ -102,15 +185,7 @@
102 185
   when:
103 186
     - kolla_action != "config"
104 187
     - inventory_hostname == master_host
105
-    - inventory_hostname in groups[service.group]
106
-    - service.enabled | bool
107
-    - mariadb_config_json.changed | bool
108
-      or mariadb_galera_conf.changed | bool
109
-      or mariadb_wsrep_notify.changed | bool
110
-      or mariadb_container.changed | bool
111
-      or bootstrap_host is defined
112
-  notify:
113
-    - Waiting for master mariadb
188
+  listen: restart mariadb
114 189
 
115 190
 # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
116 191
 # https://github.com/ansible/ansible-modules-core/issues/2788
@@ -128,3 +203,32 @@
128 203
   when:
129 204
     - kolla_action != "config"
130 205
     - inventory_hostname == master_host
206
+  listen: restart mariadb
207
+
208
+- name: run upgrade on master
209
+  vars:
210
+    service_name: "mariadb"
211
+    service: "{{ mariadb_services[service_name] }}"
212
+  become: true
213
+  kolla_docker:
214
+    action: "start_container"
215
+    common_options: "{{ docker_common_options }}"
216
+    detach: False
217
+    dimensions: "{{ service.dimensions }}"
218
+    environment:
219
+      KOLLA_UPGRADE:
220
+      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
221
+      DB_HOST: "{{ api_interface_address }}"
222
+      DB_PORT: "{{ mariadb_port }}"
223
+      DB_ROOT_PASSWORD: "{{ database_password }}"
224
+    image: "{{ service.image }}"
225
+    labels:
226
+      UPGRADE:
227
+    name: "upgrade_mariadb"
228
+    restart_policy: "never"
229
+    volumes: "{{ service.volumes }}"
230
+  no_log: true
231
+  when:
232
+    - kolla_action == "upgrade"
233
+    - inventory_hostname == master_host
234
+  listen: restart mariadb

+ 1
- 1
ansible/roles/mariadb/tasks/bootstrap_cluster.yml View File

@@ -20,7 +20,7 @@
20 20
     restart_policy: "never"
21 21
     volumes: "{{ service.volumes }}"
22 22
   notify:
23
-    - Starting first MariaDB container
23
+    - Bootstrap MariaDB cluster
24 24
 
25 25
 - set_fact:
26 26
     bootstrap_host: "{{ inventory_hostname }}"

+ 4
- 12
ansible/roles/mariadb/tasks/config.yml View File

@@ -48,13 +48,11 @@
48 48
     dest: "{{ node_config_directory }}/{{ service_name }}/config.json"
49 49
     mode: "0660"
50 50
   become: true
51
-  register: mariadb_config_json
52 51
   when:
53 52
     - inventory_hostname in groups[service.group]
54 53
     - service.enabled | bool
55 54
   notify:
56
-    - restart slave mariadb
57
-    - restart master mariadb
55
+    - restart mariadb
58 56
 
59 57
 - name: Copying over galera.cnf
60 58
   vars:
@@ -68,13 +66,11 @@
68 66
     dest: "{{ node_config_directory }}/{{ service_name }}/galera.cnf"
69 67
     mode: "0660"
70 68
   become: true
71
-  register: mariadb_galera_conf
72 69
   when:
73 70
     - inventory_hostname in groups[service.group]
74 71
     - service.enabled | bool
75 72
   notify:
76
-    - restart slave mariadb
77
-    - restart master mariadb
73
+    - restart mariadb
78 74
 
79 75
 - name: Copying over wsrep-notify.sh
80 76
   template:
@@ -82,14 +78,12 @@
82 78
     dest: "{{ node_config_directory }}/{{ item.key }}/wsrep-notify.sh"
83 79
     mode: "0770"
84 80
   become: true
85
-  register: mariadb_wsrep_notify
86 81
   when:
87 82
     - inventory_hostname in groups[item.value.group]
88 83
     - item.value.enabled | bool
89 84
   with_dict: "{{ mariadb_services }}"
90 85
   notify:
91
-    - restart slave mariadb
92
-    - restart master mariadb
86
+    - restart mariadb
93 87
 
94 88
 - name: Check mariadb containers
95 89
   become: true
@@ -100,12 +94,10 @@
100 94
     image: "{{ item.value.image }}"
101 95
     volumes: "{{ item.value.volumes }}"
102 96
     dimensions: "{{ item.value.dimensions }}"
103
-  register: check_mariadb_containers
104 97
   when:
105 98
     - kolla_action != "config"
106 99
     - inventory_hostname in groups[item.value.group]
107 100
     - item.value.enabled | bool
108 101
   with_dict: "{{ mariadb_services }}"
109 102
   notify:
110
-    - restart slave mariadb
111
-    - restart master mariadb
103
+    - restart mariadb

+ 5
- 0
ansible/roles/mariadb/templates/galera.cnf.j2 View File

@@ -54,5 +54,10 @@ innodb_buffer_pool_size = '{{ dynamic_pool_size_mb }}M'
54 54
 innodb_buffer_pool_size = '8192M'
55 55
 {% endif %}
56 56
 
57
+# The default value for innodb_lock_schedule_algorithm is VATS, but this does
58
+# not work with galera. Set FCFS explicitly to avoid a warning.
59
+# https://mariadb.com/kb/en/library/innodb-system-variables/#innodb_lock_schedule_algorithm.
60
+innodb_lock_schedule_algorithm = FCFS
61
+
57 62
 [server]
58 63
 pid-file=/var/lib/mysql/mariadb.pid

Loading…
Cancel
Save