2021-03-25 11:22:04 +00:00
heat_template_version : wallaby
2017-04-10 14:43:09 +00:00
description : >
MySQL service deployment with pacemaker bundle
parameters :
2019-05-13 14:13:04 +00:00
ContainerMysqlImage :
2017-04-10 14:43:09 +00:00
description : image
type : string
2019-05-13 14:13:04 +00:00
ContainerMysqlConfigImage :
2017-06-23 16:21:43 +00:00
description : The container image to use for the mysql config_volume
type : string
2019-12-02 12:01:45 +00:00
ClusterCommonTag :
default : false
description : When set to false, a pacemaker service is configured
to use a floating tag for its container image name,
e.g. 'REGISTRY/NAMESPACE/IMAGENAME:pcmklatest'. When
set to true, the service uses a floating prefix as
2020-01-07 15:22:49 +00:00
well, e.g. 'cluster.common.tag/IMAGENAME:pcmklatest'.
2019-12-02 12:01:45 +00:00
type : boolean
2020-09-09 07:08:26 +00:00
ClusterFullTag :
default : false
description : When set to true, the pacemaker service uses a fully
constant tag for its container image name, e.g.
'cluster.common.tag/SERVICENAME:pcmklatest' .
type : boolean
2017-04-10 14:43:09 +00:00
EndpointMap :
default : {}
description : Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type : json
2017-06-22 15:25:03 +00:00
ServiceData :
default : {}
description : Dictionary packing service data
type : json
2017-04-10 14:43:09 +00:00
ServiceNetMap :
default : {}
description : Mapping of service_name -> network name. Typically set
2021-04-08 11:01:10 +00:00
via parameter_defaults in the resource registry. Use
parameter_merge_strategies to merge it with the defaults.
2017-04-10 14:43:09 +00:00
type : json
MysqlRootPassword :
type : string
hidden : true
default : ''
2017-07-28 16:13:53 +00:00
MysqlClustercheckPassword :
type : string
hidden : true
2019-01-16 10:48:06 +00:00
MysqlUpgradePersist :
type : boolean
default : false
2019-01-17 12:21:18 +00:00
MysqlUpgradeTransfer :
type : boolean
default : true
2017-04-10 14:43:09 +00:00
RoleName :
default : ''
description : Role name on which the service is applied
type : string
RoleParameters :
default : {}
description : Parameters specific to the role
type : json
2017-08-02 10:13:48 +00:00
EnableInternalTLS :
type : boolean
default : false
InternalTLSCAFile :
default : '/etc/ipa/ca.crt'
type : string
description : Specifies the default CA cert to use if TLS is used for
services in the internal network.
2017-12-17 19:03:02 +00:00
ConfigDebug :
default : false
description : Whether to run config management (e.g. Puppet) in debug mode.
type : boolean
2018-04-11 10:36:55 +00:00
DeployIdentifier :
default : ''
type : string
description : >
Setting this to a unique value will re-run any deployment tasks which
perform configuration on a Heat stack-update.
2018-11-13 20:03:47 +00:00
ContainerCli :
type : string
2019-03-05 00:17:04 +00:00
default : 'podman'
2018-11-13 20:03:47 +00:00
description : CLI tool used to manage containers.
constraints :
- allowed_values : [ 'docker' , 'podman' ]
2017-04-10 14:43:09 +00:00
resources :
ContainersCommon :
2019-04-05 16:16:13 +00:00
type : ../containers-common.yaml
2017-04-10 14:43:09 +00:00
2019-01-30 15:38:15 +00:00
MysqlBase :
type : ./mysql-base.yaml
2017-04-10 14:43:09 +00:00
properties :
EndpointMap : {get_param : EndpointMap}
2017-06-22 15:25:03 +00:00
ServiceData : {get_param : ServiceData}
2017-04-10 14:43:09 +00:00
ServiceNetMap : {get_param : ServiceNetMap}
RoleName : {get_param : RoleName}
RoleParameters : {get_param : RoleParameters}
2017-08-02 10:13:48 +00:00
conditions :
2019-07-29 15:02:03 +00:00
docker_enabled : {equals : [ {get_param : ContainerCli}, 'docker']}
2017-08-02 10:13:48 +00:00
2017-04-10 14:43:09 +00:00
outputs :
role_data :
description : Containerized service MySQL using composable services.
value :
2019-01-30 15:38:15 +00:00
service_name : {get_attr : [ MysqlBase, role_data, service_name]}
2019-08-19 15:38:24 +00:00
firewall_rules :
'104 mysql galera-bundle' :
dport :
- 873
- 3123
- 3306
- 4444
- 4567
- 4568
- 9200
2017-04-10 14:43:09 +00:00
config_settings :
map_merge :
2019-01-30 15:38:15 +00:00
- get_attr : [ MysqlBase, role_data, config_settings]
- tripleo::profile::pacemaker::database::mysql::bind_address :
str_replace :
template :
"%{hiera('fqdn_$NETWORK')}"
params :
$NETWORK : {get_param : [ ServiceNetMap, MysqlNetwork]}
# NOTE: bind IP is found in hiera replacing the network name with the
# local node IP for the given network; replacement examples
# (eg. for internal_api):
# internal_api -> IP
# internal_api_uri -> [IP]
# internal_api_subnet - > IP/CIDR
tripleo::profile::pacemaker::database::mysql::gmcast_listen_addr :
str_replace :
template :
"%{hiera('$NETWORK')}"
params :
$NETWORK : {get_param : [ ServiceNetMap, MysqlNetwork]}
tripleo::profile::pacemaker::database::mysql::ca_file :
get_param : InternalTLSCAFile
tripleo::profile::pacemaker::database::mysql_bundle::mysql_docker_image : &mysql_image_pcmklatest
2020-09-09 07:08:26 +00:00
if :
2021-04-12 10:45:23 +00:00
- {get_param : ClusterFullTag}
2020-09-09 07:08:26 +00:00
- "cluster.common.tag/mariadb:pcmklatest"
- yaql :
data :
if :
2021-04-12 10:45:23 +00:00
- {get_param : ClusterCommonTag}
2020-09-09 07:08:26 +00:00
- yaql :
data : {get_param : ContainerMysqlImage}
expression : concat("cluster.common.tag/", $.data.rightSplit(separator => "/", maxSplits => 1)[1])
- {get_param : ContainerMysqlImage}
expression : concat($.data.rightSplit(separator => ":", maxSplits => 1)[0], ":pcmklatest")
2017-07-21 08:41:41 +00:00
tripleo::profile::pacemaker::database::mysql_bundle::control_port : 3123
2018-11-13 20:03:47 +00:00
tripleo::profile::pacemaker::database::mysql_bundle::container_backend : {get_param : ContainerCli}
2017-09-26 13:23:11 +00:00
tripleo::profile::pacemaker::database::mysql_bundle::bind_address :
str_replace :
template :
"%{hiera('fqdn_$NETWORK')}"
params :
$NETWORK : {get_param : [ ServiceNetMap, MysqlNetwork]}
2021-04-12 10:45:23 +00:00
tripleo::profile::pacemaker::database::mysql_bundle::ca_file :
if :
- {get_param : EnableInternalTLS}
- {get_param : InternalTLSCAFile}
2017-04-10 14:43:09 +00:00
# BEGIN DOCKER SETTINGS #
puppet_config :
config_volume : mysql
puppet_tags : file # set this even though file is the default
step_config :
list_join :
- "\n"
- - "['Mysql_datadir', 'Mysql_user', 'Mysql_database', 'Mysql_grant', 'Mysql_plugin'].each |String $val| { noop_resource($val) }"
- "exec {'wait-for-settle': command => '/bin/true' }"
2020-04-09 12:07:34 +00:00
- "include tripleo::profile::pacemaker::database::mysql_bundle"
2019-05-13 14:13:04 +00:00
config_image : {get_param : ContainerMysqlConfigImage}
2017-04-10 14:43:09 +00:00
kolla_config :
/var/lib/kolla/config_files/mysql.json :
command : /usr/sbin/pacemaker_remoted
config_files :
2017-06-21 14:02:55 +00:00
- dest : /etc/libqb/force-filesystem-sockets
source : /dev/null
owner : root
perm : '0644'
- source : "/var/lib/kolla/config_files/src/*"
dest : "/"
merge : true
preserve_properties : true
2017-08-02 10:13:48 +00:00
- source : "/var/lib/kolla/config_files/src-tls/*"
dest : "/"
merge : true
optional : true
preserve_properties : true
permissions :
2018-03-15 14:15:37 +00:00
- path : /var/log/mysql
owner : mysql:mysql
recurse : true
2017-08-02 10:13:48 +00:00
- path : /etc/pki/tls/certs/mysql.crt
owner : mysql:mysql
perm : '0600'
optional : true
- path : /etc/pki/tls/private/mysql.key
owner : mysql:mysql
perm : '0600'
optional : true
2019-03-06 00:22:34 +00:00
container_config_scripts : {get_attr : [ ContainersCommon, container_config_scripts]}
2017-04-10 14:43:09 +00:00
docker_config :
step_1 :
mysql_data_ownership :
start_order : 0
detach : false
2019-05-13 14:13:04 +00:00
image : {get_param : ContainerMysqlImage}
2017-04-10 14:43:09 +00:00
net : host
user : root
# Kolla does only non-recursive chown
command : [ 'chown' , '-R' , 'mysql:' , '/var/lib/mysql' ]
volumes :
2018-09-05 15:28:06 +00:00
- /var/lib/mysql:/var/lib/mysql:z
2017-04-10 14:43:09 +00:00
mysql_bootstrap :
start_order : 1
detach : false
2019-05-13 14:13:04 +00:00
image : {get_param : ContainerMysqlImage}
2017-04-10 14:43:09 +00:00
net : host
2017-09-14 12:49:04 +00:00
user : root
2017-04-10 14:43:09 +00:00
# Kolla bootstraps aren't idempotent, explicitly checking if bootstrap was done
2017-07-28 16:13:53 +00:00
command :
- 'bash'
2018-04-19 19:00:32 +00:00
- '-ec'
2017-07-28 16:13:53 +00:00
-
list_join :
- "\n"
- - 'if [ -e /var/lib/mysql/mysql ]; then exit 0; fi'
2017-09-14 12:49:04 +00:00
- 'echo -e "\n[mysqld]\nwsrep_provider=none" >> /etc/my.cnf'
2018-04-19 19:00:32 +00:00
- 'kolla_set_configs'
- 'sudo -u mysql -E kolla_extend_start'
2020-09-17 14:09:50 +00:00
- 'timeout ${DB_MAX_TIMEOUT} /bin/bash -c ' 'while pgrep -af /usr/bin/mysqld_safe | grep -q -v grep; do sleep 1; done' ''
2017-09-14 12:49:04 +00:00
- 'mysqld_safe --skip-networking --wsrep-on=OFF &'
2017-07-28 16:13:53 +00:00
- 'timeout ${DB_MAX_TIMEOUT} /bin/bash -c ' 'until mysqladmin -uroot -p"${DB_ROOT_PASSWORD}" ping 2>/dev/null; do sleep 1; done' ''
- 'mysql -uroot -p"${DB_ROOT_PASSWORD}" -e "CREATE USER ' 'clustercheck' '@' 'localhost' ' IDENTIFIED BY ' '${DB_CLUSTERCHECK_PASSWORD}' ';"'
- 'mysql -uroot -p"${DB_ROOT_PASSWORD}" -e "GRANT PROCESS ON *.* TO ' 'clustercheck' '@' 'localhost' ' WITH GRANT OPTION;"'
2020-03-12 16:06:14 +00:00
- 'mysql -uroot -p"${DB_ROOT_PASSWORD}" -e "DELETE FROM mysql.user WHERE user = ' 'root' ' AND host NOT IN (' '%' ',' 'localhost' ');"'
2017-07-28 16:13:53 +00:00
- 'timeout ${DB_MAX_TIMEOUT} mysqladmin -uroot -p"${DB_ROOT_PASSWORD}" shutdown'
2020-12-17 12:23:58 +00:00
volumes :
2017-04-10 14:43:09 +00:00
list_concat :
- {get_attr : [ ContainersCommon, volumes]}
2020-12-17 12:23:58 +00:00
- &mysql_volumes
2019-11-07 08:42:08 +00:00
- /var/lib/kolla/config_files/mysql.json:/var/lib/kolla/config_files/config.json:rw,z
2019-12-04 06:47:19 +00:00
- /var/lib/config-data/puppet-generated/mysql:/var/lib/kolla/config_files/src:ro,z
2019-11-07 08:42:08 +00:00
- /var/lib/mysql:/var/lib/mysql:rw,z
2017-04-10 14:43:09 +00:00
environment :
2019-10-08 03:36:43 +00:00
KOLLA_CONFIG_STRATEGY : COPY_ALWAYS
KOLLA_BOOTSTRAP : true
DB_MAX_TIMEOUT : 60
DB_CLUSTERCHECK_PASSWORD : {get_param : MysqlClustercheckPassword}
2021-02-12 04:23:59 +00:00
DB_ROOT_PASSWORD : {get_param : MysqlRootPassword}
2017-04-10 14:43:09 +00:00
step_2 :
2020-05-01 11:12:46 +00:00
mysql_wait_bundle :
HA: reorder init_bundle and restart_bundle for improved updates
A pacemaker bundle can be restarted either because:
. a tripleo config has been updated (from /var/lib/config-data)
. the bundle config has been updated (container image, bundle
parameter,...)
In HA services, special container "*_restart_bundle" is in charge
of restarting the HA service on tripleo config change. Special
container "*_init_bundle" handles restart on bundle config change.
When both types of change occur at the same time, the bundle must
be restarted first, so that the container has a chance to be
recreated with all bind-mounts updated before it tries to reload
the updated config.
Implement the improvement with two changes:
1. Make the "*_restart_bundle" start after the "*_init_bundle", and
make sure "*_restart_bundle" is only enabled after the initial
deployment.
2. During minor update, make sure that the "*_restart_bundle" not
only restarts the container, but also waits until the service
is operational (e.g. galera fully promoted to Master). This forces
the rolling restart to happen sequentially, and avoid service
disruption in quorum-based clustered services like galera and
rabbitmq.
Tested the following update use cases:
* minor update: ensure that *_restart_bundle restarts all types of
resources (OCF, bundles, A/P, A/P Master/Slave).
* minor update: ensure *_restart_bundle is not executed when no
config or image update happened for a service.
* restart_bundle: when resource (OCF or container) fails to
restart, bail out early instead of waiting for nothing until
timeout is reached.
* restart_bundle: make sure a resource is restarted even when it
is in failed stated when *_restart_bundle is called.
* restart_bundle: A/P can be restarted on any node, so watch
restart globally. When the resource restarts as Slave, continue
watching for a Master elsewhere in the cluster.
* restart_bundle: if an A/P is not running locally, make sure it
doesn't get restarted anywhere else in the cluster.
* restart_bundle: do not try to restart stopped (disabled) or
unmanaged resource. Bail out early instead, to not wait until
timeout is reached.
* stack update: make sure that running a stack update with no
change does not trigger any *_restart_bundle, and does not
restart any HA container either.
* stack update: when bundle and config will change, ensure bundle
is updated before HA containers are restarted (e.g. HAProxy
migration to TLS everywhere)
Change-Id: Ic41d4597e9033f9d7847bb6c10c25f443fbd5b0e
Closes-Bug: #1839858
2019-11-15 16:41:42 +00:00
start_order : 0
2017-04-10 14:43:09 +00:00
detach : false
net : host
2018-11-13 08:58:36 +00:00
ipc : host
2017-04-10 14:43:09 +00:00
user : root
2019-03-08 15:11:35 +00:00
command : # '/container_puppet_apply.sh "STEP" "TAGS" "CONFIG" "DEBUG"'
2018-01-04 23:23:37 +00:00
list_concat :
2019-03-08 15:11:35 +00:00
- - '/container_puppet_apply.sh'
2018-01-04 23:23:37 +00:00
- '2'
2020-05-01 11:12:46 +00:00
- 'file,file_line,concat,augeas,galera_ready,mysql_database,mysql_grant,mysql_user'
- 'include tripleo::profile::pacemaker::database::mysql_bundle'
2018-01-04 23:23:37 +00:00
- if :
2021-04-12 10:45:23 +00:00
- {get_param : ConfigDebug}
2018-01-04 23:23:37 +00:00
- - '--debug'
2019-05-13 14:13:04 +00:00
image : {get_param : ContainerMysqlImage}
2017-04-10 14:43:09 +00:00
volumes :
2018-01-04 23:23:37 +00:00
list_concat :
2019-03-08 15:11:35 +00:00
- {get_attr : [ ContainersCommon, container_puppet_apply_volumes]}
2019-07-29 15:02:03 +00:00
- - /var/lib/mysql:/var/lib/mysql:rw,z
2018-09-16 11:38:35 +00:00
- /var/lib/config-data/puppet-generated/mysql/root:/root:rw
2019-07-29 15:02:03 +00:00
- if :
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
2018-04-11 10:36:55 +00:00
environment :
# NOTE: this should force this container to re-run on each
# update (scale-out, etc.)
2019-10-08 03:36:43 +00:00
TRIPLEO_DEPLOY_IDENTIFIER : {get_param : DeployIdentifier}
2017-04-10 14:43:09 +00:00
host_prep_tasks :
2018-03-15 14:15:37 +00:00
- name : create persistent directories
2017-04-10 14:43:09 +00:00
file :
2018-09-05 15:28:06 +00:00
path : "{{ item.path }}"
2017-04-10 14:43:09 +00:00
state : directory
2018-09-05 15:28:06 +00:00
setype : "{{ item.setype }}"
2020-04-06 21:15:44 +00:00
mode : "{{ item.mode|default(omit) }}"
2018-03-15 14:15:37 +00:00
with_items :
2020-02-07 12:33:20 +00:00
- {'path': /var/log/containers/mysql, 'setype': 'container_file_t', 'mode' : '0750' }
- {'path': /var/lib/mysql, 'setype' : 'container_file_t' }
- {'path': /var/log/mariadb, 'setype': 'container_file_t', 'mode' : '0750' }
2017-08-02 10:13:48 +00:00
metadata_settings :
2019-01-30 15:38:15 +00:00
get_attr : [ MysqlBase, role_data, metadata_settings]
2018-11-01 18:27:39 +00:00
deploy_steps_tasks :
2021-01-21 15:37:32 +00:00
list_concat :
- get_attr : [ MysqlBase, role_data, deploy_steps_tasks]
- - name : MySQL tag container image for pacemaker
when : step|int == 1
2020-05-01 11:12:46 +00:00
import_role :
2021-01-21 15:37:32 +00:00
name : tripleo_container_tag
2020-05-01 11:12:46 +00:00
vars :
2021-01-21 15:37:32 +00:00
container_image : {get_param : ContainerMysqlImage}
container_image_latest : *mysql_image_pcmklatest
- name : MySQL HA Wrappers Step
when : step|int == 2
block : &mysql_puppet_bundle
- name : Mysql puppet bundle
import_role :
name : tripleo_ha_wrapper
vars :
tripleo_ha_wrapper_service_name : mysql
tripleo_ha_wrapper_resource_name : galera
tripleo_ha_wrapper_bundle_name : galera-bundle
tripleo_ha_wrapper_resource_state : Master
tripleo_ha_wrapper_puppet_config_volume : mysql
tripleo_ha_wrapper_puppet_execute : '["Mysql_datadir", "Mysql_user", "Mysql_database", "Mysql_grant", "Mysql_plugin"].each |String $val| { noop_resource($val) }; include ::tripleo::profile::base::pacemaker; include ::tripleo::profile::pacemaker::database::mysql_bundle'
tripleo_ha_wrapper_puppet_tags : 'pacemaker::resource::bundle,pacemaker::property,pacemaker::resource::ocf,pacemaker::constraint::order,pacemaker::constraint::colocation'
tripleo_ha_wrapper_puppet_debug : {get_param : ConfigDebug}
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
2018-01-30 16:23:47 +00:00
update_tasks :
2016-10-16 20:57:09 +00:00
- name : Tear-down non-HA mysql container
when :
- step|int == 1
block : &mysql_teardown_nonha
- name : "stat mysql container"
command : "podman container exists mysql"
failed_when : false
changed_when : false
register : stat_mysql_container
- name : Create clustercheck user and permissions
command :
argv : "{{ mysql_exec_data | container_exec_cmd }}"
changed_when : true
when :
- stat_mysql_container.rc == 0
vars :
mysql_exec_data :
environment :
CLUSTERCHECK_PASSWORD : {get_param : MysqlClustercheckPassword}
command :
- 'mysql'
- '/bin/sh'
- '-c'
- mysql -e "CREATE USER IF NOT EXISTS 'clustercheck'@'localhost' IDENTIFIED BY '${CLUSTERCHECK_PASSWORD}'; GRANT PROCESS ON *.* TO 'clustercheck'@'localhost' WITH GRANT OPTION;"
- name : Remove non-HA mysql container
include_role :
name : tripleo_container_rm
vars :
tripleo_container_cli : "{{ container_cli }}"
tripleo_containers_to_rm :
- mysql
2018-01-30 16:23:47 +00:00
- name : Mariadb fetch and retag container image for pacemaker
2017-10-11 06:51:08 +00:00
when : step|int == 2
2018-01-30 16:23:47 +00:00
block : &mysql_fetch_retag_container_tasks
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
- name : Get container galera image
2018-01-30 16:23:47 +00:00
set_fact :
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
galera_image : {get_param : ContainerMysqlImage}
galera_image_latest : *mysql_image_pcmklatest
- name : Pull latest galera images
command : "{{container_cli}} pull {{galera_image}}"
2020-10-08 16:38:16 +00:00
register : result
retries : 3
delay : 3
until : result.rc == 0
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
- name : Get previous galera image id
shell : "{{container_cli}} inspect --format '{{'{{'}}.Id{{'}}'}}' {{galera_image_latest}}"
register : old_galera_image_id
failed_when : false
- name : Get new galera image id
shell : "{{container_cli}} inspect --format '{{'{{'}}.Id{{'}}'}}' {{galera_image}}"
register : new_galera_image_id
- name : Retag pcmklatest to latest galera image
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_container_tag
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
vars :
container_image : "{{galera_image}}"
container_image_latest : "{{galera_image_latest}}"
when :
- old_galera_image_id.stdout != new_galera_image_id.stdout
2020-05-01 11:12:46 +00:00
post_update_tasks :
- name : Mysql bundle post update
when : step|int == 1
block : *mysql_puppet_bundle
vars :
tripleo_ha_wrapper_minor_update : true
2018-01-30 16:23:47 +00:00
upgrade_tasks :
2016-10-16 20:57:09 +00:00
- name : Tear-down non-HA mysql container
when :
- step|int == 0
block : *mysql_teardown_nonha
2019-01-16 10:48:06 +00:00
- vars :
mysql_upgrade_persist : {get_param : MysqlUpgradePersist}
when :
2019-08-13 13:01:44 +00:00
- step|int == 3
2019-01-16 10:48:06 +00:00
- mysql_upgrade_persist
tags :
- never
2019-08-13 13:01:44 +00:00
- system_upgrade
2019-01-16 10:48:06 +00:00
- system_upgrade_prepare
block :
- name : Persist mysql data
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_persist
2019-01-16 10:48:06 +00:00
tasks_from : persist.yml
vars :
tripleo_persist_dir : /var/lib/mysql
- vars :
mysql_upgrade_persist : {get_param : MysqlUpgradePersist}
when :
2019-08-13 13:01:44 +00:00
- step|int == 5
2019-01-16 10:48:06 +00:00
- mysql_upgrade_persist
tags :
- never
2019-08-13 13:01:44 +00:00
- system_upgrade
2019-01-16 10:48:06 +00:00
- system_upgrade_run
block :
- name : Restore mysql data
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_persist
2019-01-16 10:48:06 +00:00
tasks_from : restore.yml
vars :
tripleo_persist_dir : /var/lib/mysql
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
- name : Prepare switch of galera image name
when :
- step|int == 0
block :
- name : Get galera image id currently used by pacemaker
shell : "pcs resource config galera-bundle | grep -Eo 'image=[^ ]+' | awk -F= '{print $2;}'"
register : galera_image_current_res
failed_when : false
- name : Image facts for galera
set_fact :
galera_image_latest : *mysql_image_pcmklatest
galera_image_current : "{{galera_image_current_res.stdout}}"
2019-10-04 16:27:54 +00:00
- name : Temporarily tag the current galera image id with the upgraded image name
import_role :
2020-01-20 16:31:22 +00:00
name : tripleo_container_tag
2019-10-04 16:27:54 +00:00
vars :
container_image : "{{galera_image_current}}"
container_image_latest : "{{galera_image_latest}}"
pull_image : false
when :
- galera_image_current != ''
- galera_image_current != galera_image_latest
# During an OS Upgrade, the cluster may not exist so we use
# the shell module instead.
# TODO(odyssey4me):
# Fix the pacemaker_resource module to handle the exception
# for a non-existant cluster more gracefully.
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
- name : Check galera cluster resource status
shell : pcs resource config galera-bundle
failed_when : false
2019-10-04 16:27:54 +00:00
changed_when : false
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
register : galera_pcs_res_result
- name : Set fact galera_pcs_res
set_fact :
2019-08-09 13:52:29 +00:00
galera_pcs_res : "{{galera_pcs_res_result.rc == 0}}"
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
- name : set is_mysql_bootstrap_node fact
tags : common
2021-02-19 15:29:01 +00:00
set_fact : is_mysql_bootstrap_node={{mysql_short_bootstrap_node_name|lower == ansible_facts['hostname']|lower}}
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
- name : Update galera pcs resource bundle for new container image
when :
- step|int == 1
- is_mysql_bootstrap_node|bool
- galera_pcs_res|bool
- galera_image_current != galera_image_latest
block :
- name : Disable the galera cluster resource before container upgrade
pacemaker_resource :
resource : galera
state : disable
wait_for_resource : true
register : output
retries : 5
until : output.rc == 0
- name : Move Mysql logging to /var/log/containers
block :
- name : Check Mysql logging configuration in pacemaker
command : cibadmin --query --xpath "//storage-mapping[@id='mysql-log']"
2020-03-03 16:59:14 +00:00
failed_when : false
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
register : mysql_logs_moved
- name : Change Mysql logging configuration in pacemaker
# rc == 6 means the configuration doesn't exist in the CIB
when : mysql_logs_moved.rc == 6
block :
- name : Add a bind mount for logging in the galera bundle
command : pcs resource bundle update galera-bundle storage-map add id=mysql-log source-dir=/var/log/containers/mysql target-dir=/var/log/mysql options=rw
- name : Reconfigure Mysql log file in the galera resource agent
command : pcs resource update galera log=/var/log/mysql/mysqld.log
- name : Update the galera bundle to use the new container image name
command : "pcs resource bundle update galera-bundle container image={{galera_image_latest}}"
- name : Enable the galera cluster resource
pacemaker_resource :
resource : galera
state : enable
wait_for_resource : true
register : output
retries : 5
until : output.rc == 0
2019-03-13 12:15:56 +00:00
- name : Create hiera data to upgrade mysql in a stepwise manner.
when :
- step|int == 1
2020-06-02 15:39:13 +00:00
- cluster_recreate|bool
2019-03-13 12:15:56 +00:00
block :
- name : set mysql upgrade node facts in a single-node environment
set_fact :
mysql_short_node_names_upgraded : "{{ mysql_short_node_names }}"
mysql_node_names_upgraded : "{{ mysql_node_names }}"
cacheable : no
2019-07-10 13:24:13 +00:00
when : groups['mysql'] | length <= 1
2019-03-13 12:15:56 +00:00
- name : set mysql upgrade node facts from the limit option
set_fact :
mysql_short_node_names_upgraded : "{{ mysql_short_node_names_upgraded|default([]) + [item.split('.')[0]] }}"
mysql_node_names_upgraded : "{{ mysql_node_names_upgraded|default([]) + [item] }}"
cacheable : no
2019-07-10 13:24:13 +00:00
when :
- groups['mysql'] | length > 1
2020-05-22 14:21:41 +00:00
- item.split('.')[0] in ansible_limit.split(':')
2016-10-16 20:57:09 +00:00
loop : "{{ mysql_node_names | default([]) }}"
2019-03-13 12:15:56 +00:00
- fail :
msg : >
You can't upgrade galera without staged upgrade.
You need to use the limit option in order to do so.
when : >-
mysql_short_node_names_upgraded is not defined or
mysql_short_node_names_upgraded | length == 0 or
mysql_node_names_upgraded is not defined or
mysql_node_names_upgraded | length == 0
2020-02-03 09:32:49 +00:00
- debug :
msg : "Prepare galera upgrade for {{ mysql_short_node_names_upgraded }}"
2019-08-13 11:59:44 +00:00
- name : remove mysql init container on upgrade-scaleup to force re-init
2020-01-14 13:41:37 +00:00
include_role :
2020-03-19 07:20:24 +00:00
name : tripleo_container_rm
2020-01-14 13:41:37 +00:00
vars :
tripleo_containers_to_rm :
2020-05-01 11:12:46 +00:00
- mysql_wait_bundle
2020-01-14 13:41:37 +00:00
when :
- mysql_short_node_names_upgraded | length > 1
2019-03-13 12:15:56 +00:00
- name : add the mysql short name to hiera data for the upgrade.
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_upgrade_hiera
2019-03-13 12:15:56 +00:00
tasks_from : set.yml
vars :
tripleo_upgrade_key : mysql_short_node_names_override
tripleo_upgrade_value : "{{mysql_short_node_names_upgraded}}"
- name : add the mysql long name to hiera data for the upgrade
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_upgrade_hiera
2019-03-13 12:15:56 +00:00
tasks_from : set.yml
vars :
tripleo_upgrade_key : mysql_node_names_override
tripleo_upgrade_value : "{{mysql_node_names_upgraded}}"
- name : remove the extra hiera data needed for the upgrade.
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_upgrade_hiera
2019-03-13 12:15:56 +00:00
tasks_from : remove.yml
vars :
tripleo_upgrade_key : "{{item}}"
loop :
- mysql_short_node_names_override
- mysql_node_names_override
when : mysql_short_node_names_upgraded | length == mysql_node_names | length
2018-02-23 14:53:08 +00:00
- name : Retag the pacemaker image if containerized
when :
- step|int == 3
block : *mysql_fetch_retag_container_tasks
2018-02-21 15:23:22 +00:00
- name : Check and upgrade Mysql database after major version upgrade
2019-08-26 08:20:04 +00:00
# Note: during upgrade to Stein, a new pacemaker cluster is recreated,
# controller nodes added sequentially to this new cluster, and the upgrade
# workflow (upgrade tasks, deploy/convertge) is ran once per controller.
# This mysql upgrade block must run only once per controller, before
# the controller is added into the cluster (by mysql_init_bundle) and
# before pacemaker has a chance to start galera on that controller.
# So do not exercise the upgrade if mysql is already running.
Upgrade: make bundles use new container image name after upgrade
The major_upgrade tasks for HA services only allows to change the container
image tag used by bundles. It doesn't work when the image name changes.
Fix this unwanted behaviour by updating the bundle's attribute in pacemaker
to use container image <NEW>:pcmklatest instead of <CURRENT>:pcmklatest
We are constrained by the steps at when we can modify the bundle:
. Image update must stay at step 3 when pacemaker is stopped.
. image name used by the bundle must be available in docker when the
bundle is updated
So we re-use the double tagging idiom to perform the image update:
. At step 0, we tag the image pointed to by <CURRENT>:pcmklatest
with an additional temporary tag <NEW>:pcmklatest.
=> this ensures that at step1, the new tag is available on all
controller nodes.
. At step 1, we update the resource bundle to use the new image
name <NEW>:pcmklatest.
=> at the end of step1, the bundle will be configured with the
new name, and be able to start even if the real container
image hasn't be pulled yet.
. At step 3, the existing code will download the real image
<NEW>:<NEWTAG> and make tag <NEW>:pcmklatest point to it.
Since the bundle is always modified, we now stop and restart the
bundle resources unconditionally.
Also, move the mariadb upgrade task to step 3, when pacemaker is
guaranteed to be stopped, because the task assumes that no mysql
is running while it runs. Fix the mysql permission after rpm
upgrade on the host.
Change-Id: Ic87a66753b104b9f15db70fdccbd66d88cef94df
Closes-Bug: #1763001
2018-04-11 11:57:59 +00:00
when : step|int == 3
2018-02-21 15:23:22 +00:00
block :
Upgrade: make bundles use new container image name after upgrade
The major_upgrade tasks for HA services only allows to change the container
image tag used by bundles. It doesn't work when the image name changes.
Fix this unwanted behaviour by updating the bundle's attribute in pacemaker
to use container image <NEW>:pcmklatest instead of <CURRENT>:pcmklatest
We are constrained by the steps at when we can modify the bundle:
. Image update must stay at step 3 when pacemaker is stopped.
. image name used by the bundle must be available in docker when the
bundle is updated
So we re-use the double tagging idiom to perform the image update:
. At step 0, we tag the image pointed to by <CURRENT>:pcmklatest
with an additional temporary tag <NEW>:pcmklatest.
=> this ensures that at step1, the new tag is available on all
controller nodes.
. At step 1, we update the resource bundle to use the new image
name <NEW>:pcmklatest.
=> at the end of step1, the bundle will be configured with the
new name, and be able to start even if the real container
image hasn't be pulled yet.
. At step 3, the existing code will download the real image
<NEW>:<NEWTAG> and make tag <NEW>:pcmklatest point to it.
Since the bundle is always modified, we now stop and restart the
bundle resources unconditionally.
Also, move the mariadb upgrade task to step 3, when pacemaker is
guaranteed to be stopped, because the task assumes that no mysql
is running while it runs. Fix the mysql permission after rpm
upgrade on the host.
Change-Id: Ic87a66753b104b9f15db70fdccbd66d88cef94df
Closes-Bug: #1763001
2018-04-11 11:57:59 +00:00
# mariadb package changes ownership of /var/lib/mysql on package
# update, so update here rather than in tripleo-package, to
# guarantee that ownership is fixed at the end of step 3
- name : Update host mariadb packages
when : step|int == 3
2018-07-20 18:03:57 +00:00
package : name=mariadb-server-galera state=latest
2018-02-21 15:23:22 +00:00
- name : Mysql upgrade script
set_fact :
mysql_upgrade_script :
list_join :
2020-12-17 12:23:58 +00:00
- "\n"
- # start a temporary mariadb server for running the upgrade
- |
kolla_set_configs
if mysqladmin ping --silent; then exit 0; fi
chown -R mysql:mysql /var/lib/mysql
chown -R mysql:mysql /var/log/mysql
mysqld_safe --user=mysql --wsrep-provider=none --skip-networking --wsrep-on=off --log-error=/var/log/mysql/mysqld-upgrade.log &
# an idempotent script takes care of all upgrade steps
# we inline the content here because at the time this is executed,
# the script is not yet copied in /var/lib/container-config-scripts
- { get_file : ../../container_config_scripts/mysql_upgrade_db.sh }
# stop the temporary server
- mysqladmin shutdown
2018-02-21 15:23:22 +00:00
- name : Bind mounts for temporary container
set_fact :
2020-12-17 12:23:58 +00:00
mysql_upgrade_db_bind_mounts :
list_concat :
- {get_attr : [ ContainersCommon, volumes]}
- *mysql_volumes
- - /var/log/containers/mysql:/var/log/mysql:rw,z
2018-02-21 15:23:22 +00:00
- name : Upgrade Mysql database from a temporary container
shell :
str_replace :
template :
2019-11-07 08:42:08 +00:00
'CONTAINER_CLI run --rm --log-driver=k8s-file --log-opt path=LOG_DIR/db-upgrade.log \
2020-12-17 12:23:58 +00:00
-u root --net=host UPGRADE_ENV UPGRADE_VOLUMES "UPGRADE_IMAGE" /bin/bash -ecx "$UPGRADE_SCRIPT"'
2018-02-21 15:23:22 +00:00
params :
UPGRADE_ENV : '-e "KOLLA_CONFIG_STRATEGY=COPY_ALWAYS"'
UPGRADE_IMAGE : *mysql_image_pcmklatest
2020-12-17 12:23:58 +00:00
UPGRADE_VOLUMES : "-v {{ mysql_upgrade_db_bind_mounts | join(' -v ')}}"
2019-03-13 10:17:10 +00:00
CONTAINER_CLI : "{{ container_cli }}"
2019-11-19 15:39:30 +00:00
LOG_DIR : '/var/log/containers/mysql'
2020-12-17 12:23:58 +00:00
environment :
UPGRADE_SCRIPT : "{{ mysql_upgrade_script }}"
Pacemaker resource upgrade tasks compatible with staged upgrade
Add better idempotency checks on editing the pacemaker resources and
fetching and re-tagging new images, which prevents the upgrade from
failing. The latest status after staged upgrade looks like this:
Online: [ controller-0 controller-1 controller-2 ]
GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 redis-bundle-0@controller-0 redis-bundle-1@controller-1 ]
Full list of resources:
podman container set: galera-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-mariadb:pcmklatest]
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
podman container set: rabbitmq-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-rabbitmq:pcmklatest]
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1
podman container set: redis-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-redis:pcmklatest]
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
ip-192.168.24.8 (ocf::heartbeat:IPaddr2): Started controller-0
ip-10.0.0.106 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.16 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.1.23 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.11 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.4.25 (ocf::heartbeat:IPaddr2): Started controller-0
podman container set: haproxy-bundle [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-haproxy:pcmklatest]
haproxy-bundle-podman-0 (ocf::heartbeat:podman): Started controller-0
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-1
haproxy-bundle-podman-2 (ocf::heartbeat:podman): Stopped
podman container: openstack-cinder-volume [brew-pulp-docker01.web.prod.ext.phx2.redhat.com:8888/rhosp15/openstack-cinder-volume:pcmklatest]
openstack-cinder-volume-podman-0 (ocf::heartbeat:podman): Started controller-1
Failed Resource Actions:
* rabbitmq_monitor_10000 on rabbitmq-bundle-0 'unknown error' (1): call=4861, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:37:51 2019', queued=0ms, exec=0ms
* rabbitmq_monitor_10000 on rabbitmq-bundle-1 'unknown error' (1): call=42, status=Timed Out, exitreason='',
last-rc-change='Mon Aug 5 10:15:55 2019', queued=0ms, exec=0ms
This indicates that there are still issues we'll need to solve, but at
least the upgrade passes now and we can keep solving the follow-up
issues while the critical upgrade path is unblocked.
Closes-Bug: #1838971
Change-Id: I2e88dc34fa59624523de4c52a1873438c78e972f
2019-07-30 08:57:54 +00:00
2019-01-17 12:21:18 +00:00
external_upgrade_tasks :
- vars :
mysql_upgrade_transfer : {get_param : MysqlUpgradeTransfer}
when :
2019-07-04 08:17:24 +00:00
- step|int == 2
2019-01-17 12:21:18 +00:00
- mysql_upgrade_transfer
tags :
- never
- system_upgrade_transfer_data
block :
- name : Transfer mysql data
include_role :
2020-01-20 16:31:22 +00:00
name : tripleo_transfer
2019-01-17 12:21:18 +00:00
vars :
tripleo_transfer_src_dir : /var/lib/mysql
tripleo_transfer_src_host : "{{hostvars[groups['overcloud'][0]]['mysql_short_node_names'][1]}}"
tripleo_transfer_dest_dir : /var/lib/mysql
tripleo_transfer_dest_host : "{{hostvars[groups['overcloud'][0]]['mysql_short_bootstrap_node_name']}}"
2019-09-27 08:18:22 +00:00
tripleo_transfer_flag_file : /var/lib/tripleo/transfer-flags/var-lib-mysql