Better control of mariadb restarts in upgrades

This commit adds additional control around how the galera cluster
nodes are restarted during an upgrade (both the openstack upgrade
as well as mariadb upgrade). lxc config that gets added during the
lxc-container-create play would normally force a container restart.
This commit essentially does the following:

- run lxc-container-create on galera nodes but prevent container restarts
  from being triggered by the new lxc config that gets laid down
- run a mariadb upgrade
- run a controlled rolling restart of all mariadb cluster containers

Change-Id: I5d979eb15c471274cc14ce6f41c8ae479c5131d6
This commit is contained in:
Darren Birkett 2016-07-01 14:42:04 +01:00
parent 9a3a7ae495
commit fd690e1fc3
3 changed files with 83 additions and 3 deletions

View File

@ -0,0 +1,8 @@
---
upgrade:
- During upgrades, container and service restarts for the mariadb/galera
cluster were being triggered multiple times and causing the cluster to
become unstable and often unrecoverable. This situation has been improved
immensely, and we now have tight control such that restarts of the galera
containers only need to happen once, and are done so in a controlled,
predictable and repeatable way.

View File

@ -145,11 +145,24 @@ function main {
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/deploy-config-changes.yml")
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/user-secrets-adjustment.yml")
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/db-collation-alter.yml")
RUN_TASKS+=("setup-hosts.yml --limit '!galera_all[0]'")
RUN_TASKS+=("lxc-containers-create.yml --limit galera_all[0]")
# we don't want to trigger galera container restarts yet
RUN_TASKS+=("setup-hosts.yml --limit '!galera_all'")
# add new container config to galera containers but don't restart
RUN_TASKS+=("lxc-containers-create.yml -e 'lxc_container_allow_restarts=false' --limit galera_all")
# rebuild the repo servers
RUN_TASKS+=("repo-install.yml")
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/repo-server-pip-conf-removal.yml")
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/old-hostname-compatibility.yml")
RUN_TASKS+=("setup-infrastructure.yml -e 'galera_upgrade=true' -e 'rabbitmq_upgrade=true'")
# explicitly perform mariadb upgrade
RUN_TASKS+=("galera-install.yml -e 'galera_upgrade=true'")
# explicitly perform controlled galera cluster restart
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/galera-cluster-rolling-restart.yml")
# individually run each of the remaining plays from setup-infrastructure
RUN_TASKS+=("haproxy-install.yml")
RUN_TASKS+=("memcached-install.yml")
RUN_TASKS+=("rabbitmq-install.yml -e 'rabbitmq_upgrade=true'")
RUN_TASKS+=("utility-install.yml")
RUN_TASKS+=("rsyslog-install.yml")
RUN_TASKS+=("${UPGRADE_PLAYBOOKS}/memcached-flush.yml")
RUN_TASKS+=("setup-openstack.yml")
# Run the tasks in order

View File

@ -0,0 +1,59 @@
---
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Gracefully restart mariadb/galera cluster
hosts: galera_all
serial: 1
max_fail_percentage: 0
gather_facts: false
user: root
tasks:
- name: Stop mariadb
service:
name: mysql
state: stopped
retries: 5
delay: 10
- name: Stop container
lxc_container:
name: "{{ inventory_hostname }}"
state: "stopped"
delegate_to: "{{ physical_host }}"
- name: Start container
lxc_container:
name: "{{ inventory_hostname }}"
state: "started"
delegate_to: "{{ physical_host }}"
post_tasks:
- name: Wait for mariadb port 3306 to be available
local_action:
module: wait_for
port: "3306"
host: "{{ ansible_ssh_host | default(inventory_hostname) }}"
search_regex: MariaDB
retries: 10
delay: 10
- name: Check that WSREP is ready and Synced
shell: "/usr/bin/mysqladmin --defaults-file=/etc/mysql/debian.cnf extended-status | egrep '(wsrep_local_state_comment)'"
register: mysql_ready
until:
- mysql_ready.rc == 0
- (mysql_ready.stdout).find("Synced") != -1
retries: 60
delay: 1