Merge "Add resources for major upgrade in Pacemaker scenario"

This commit is contained in:
Jenkins 2016-02-25 11:00:41 +00:00 committed by Gerrit Code Review
commit cbe1474ec9
4 changed files with 177 additions and 0 deletions

View File

@ -0,0 +1,3 @@
resource_registry:
OS::TripleO::Tasks::ControllerPrePuppet: ../extraconfig/tasks/major_upgrade_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostPuppet: ../extraconfig/tasks/noop.yaml

View File

@ -0,0 +1,45 @@
heat_template_version: 2014-10-16
description: 'Upgrade for Pacemaker deployments'
parameters:
servers:
type: json
input_values:
type: json
description: input values for the software deployments
resources:
ControllerPacemakerUpgradeConfig_Step1:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_1.sh
ControllerPacemakerUpgradeDeployment_Step1:
type: OS::Heat::SoftwareDeploymentGroup
properties:
servers: {get_param: servers}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step2:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_2.sh
ControllerPacemakerUpgrade2Deployment_Step2:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: ControllerPacemakerUpgradeDeployment_Step1
properties:
servers: {get_param: servers}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step2}
input_values: {get_param: input_values}

View File

@ -0,0 +1,58 @@
#!/bin/bash
set -eu
cluster_sync_timeout=600
if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then
echo_error "ERROR: upgrade cannot start with some cluster nodes being offline"
exit 1
fi
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
pcs resource disable httpd
check_resource httpd stopped 1800
if pcs status | grep openstack-keystone; then
pcs resource disable openstack-keystone
check_resource openstack-keystone stopped 1800
fi
pcs resource disable redis
check_resource redis stopped 600
pcs resource disable mongod
check_resource mongod stopped 600
pcs resource disable rabbitmq
check_resource rabbitmq stopped 600
pcs resource disable memcached
check_resource memcached stopped 600
pcs resource disable galera
check_resource galera stopped 600
pcs cluster stop --all
fi
# Swift isn't controled by pacemaker
for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
systemctl stop $S
done
tstart=$(date +%s)
while systemctl is-active pacemaker; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_sync_timeout )) ; then
echo_error "ERROR: cluster shutdown timed out"
exit 1
fi
done
yum update -y
# Pin messages sent to compute nodes to kilo, these will be upgraded later
crudini --set /etc/nova/nova.conf upgrade_levels compute liberty
# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"

View File

@ -0,0 +1,71 @@
#!/bin/bash
set -eu
cluster_form_timeout=600
cluster_settle_timeout=600
galera_sync_timeout=600
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
pcs cluster start --all
tstart=$(date +%s)
while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_form_timeout )) ; then
echo_error "ERROR: timed out forming the cluster"
exit 1
fi
done
if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
echo_error "ERROR: timed out waiting for cluster to finish transition"
exit 1
fi
pcs resource enable galera
check_resource galera started 600
pcs resource enable mongod
check_resource mongod started 600
tstart=$(date +%s)
while ! clustercheck; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > galera_sync_timeout )) ; then
echo_error "ERROR galera sync timed out"
exit 1
fi
done
# Run all the db syncs
# TODO: check if this can be triggered in puppet and removed from here
ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
cinder-manage db sync
glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
heat-manage --config-file /etc/heat/heat.conf db_sync
keystone-manage db_sync
neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
nova-manage db sync
pcs resource enable memcached
check_resource memcached started 600
pcs resource enable rabbitmq
check_resource rabbitmq started 600
pcs resource enable redis
check_resource redis started 600
if pcs status | grep openstack-keystone; then
pcs resource enable openstack-keystone
check_resource openstack-keystone started 1800
fi
pcs resource enable httpd
check_resource httpd started 1800
fi
# Swift isn't controled by heat
for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
systemctl start $S
done