Merge "Remove obsolete code for handling Pacemakerized resource restarts"

This commit is contained in:
Zuul 2018-10-15 11:31:45 +00:00 committed by Gerrit Code Review
commit ad06ebce20
23 changed files with 15 additions and 807 deletions

View File

@ -10,9 +10,6 @@ resource_registry:
OS::TripleO::Services::Clustercheck: ../../docker/services/pacemaker/clustercheck.yaml
OS::TripleO::Services::MySQL: ../../docker/services/pacemaker/database/mysql.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
# Some infra instances don't pass the ping test but are otherwise working.
# Since the OVB jobs also test this functionality we can shut it off here.
OS::TripleO::AllNodes::Validation: ../common/all-nodes-validation-disabled.yaml

View File

@ -9,9 +9,6 @@ resource_registry:
OS::TripleO::Services::Clustercheck: ../../docker/services/pacemaker/clustercheck.yaml
OS::TripleO::Services::MySQL: ../../docker/services/pacemaker/database/mysql.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
# Some infra instances don't pass the ping test but are otherwise working.
# Since the OVB jobs also test this functionality we can shut it off here.
OS::TripleO::AllNodes::Validation: ../common/all-nodes-validation-disabled.yaml

View File

@ -1,77 +0,0 @@
resource_registry:
OS::TripleO::Controller::Net::SoftwareConfig: ../common/net-config-multinode-os-net-config.yaml
OS::TripleO::Compute::Net::SoftwareConfig: ../common/net-config-multinode-os-net-config.yaml
OS::TripleO::Services::RabbitMQ: ../../puppet/services/pacemaker/rabbitmq.yaml
OS::TripleO::Services::HAproxy: ../../puppet/services/pacemaker/haproxy.yaml
OS::TripleO::Services::Pacemaker: ../../puppet/services/pacemaker.yaml
OS::TripleO::Services::MySQL: ../../puppet/services/pacemaker/database/mysql.yaml
OS::TripleO::Services::CinderVolume: ../../puppet/services/pacemaker/cinder-volume.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: ../../extraconfig/tasks/pre_puppet_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostConfig: ../../extraconfig/tasks/post_puppet_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
# Some infra instances don't pass the ping test but are otherwise working.
# Since the OVB jobs also test this functionality we can shut it off here.
OS::TripleO::AllNodes::Validation: ../common/all-nodes-validation-disabled.yaml
parameter_defaults:
ControllerServices:
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::CephMon
- OS::TripleO::Services::CephOSD
- OS::TripleO::Services::CinderApi
- OS::TripleO::Services::CinderScheduler
- OS::TripleO::Services::CinderVolume
- OS::TripleO::Services::Clustercheck
- OS::TripleO::Services::ContainerImagePrepare
- OS::TripleO::Services::Docker
- OS::TripleO::Services::Kernel
- OS::TripleO::Services::Keystone
- OS::TripleO::Services::GlanceApi
- OS::TripleO::Services::HeatApi
- OS::TripleO::Services::HeatApiCfn
- OS::TripleO::Services::HeatEngine
- OS::TripleO::Services::MySQL
- OS::TripleO::Services::MySQLClient
- OS::TripleO::Services::NeutronDhcpAgent
- OS::TripleO::Services::NeutronL3Agent
- OS::TripleO::Services::NeutronMetadataAgent
- OS::TripleO::Services::NeutronServer
- OS::TripleO::Services::NeutronCorePlugin
- OS::TripleO::Services::NeutronOvsAgent
- OS::TripleO::Services::RabbitMQ
- OS::TripleO::Services::HAproxy
- OS::TripleO::Services::Keepalived
- OS::TripleO::Services::Memcached
- OS::TripleO::Services::Pacemaker
- OS::TripleO::Services::NovaConductor
- OS::TripleO::Services::NovaApi
- OS::TripleO::Services::NovaPlacement
- OS::TripleO::Services::NovaMetadata
- OS::TripleO::Services::NovaScheduler
- OS::TripleO::Services::Ntp
- OS::TripleO::Services::Snmp
- OS::TripleO::Services::Timezone
- OS::TripleO::Services::NovaCompute
- OS::TripleO::Services::NovaLibvirt
- OS::TripleO::Services::Horizon
- OS::TripleO::Services::TripleoFirewall
- OS::TripleO::Services::NovaMigrationTarget
- OS::TripleO::Services::Sshd
- OS::TripleO::Services::Iscsid
ControllerExtraConfig:
nova::compute::libvirt::services::libvirt_virt_type: qemu
nova::compute::libvirt::libvirt_virt_type: qemu
# Required for Centos 7.3 and Qemu 2.6.0
nova::compute::libvirt::libvirt_cpu_mode: 'none'
#NOTE(gfidente): not great but we need this to deploy on ext4
#http://docs.ceph.com/docs/jewel/rados/configuration/filesystem-recommendations/
ceph::profile::params::osd_max_object_name_len: 256
ceph::profile::params::osd_max_object_namespace_len: 64
#NOTE(gfidente): necessary when deploying a single OSD
ceph::profile::params::osd_pool_default_pg_num: 32
ceph::profile::params::osd_pool_default_pgp_num: 32
SwiftCeilometerPipelineEnabled: False
Debug: True
NotificationDriver: 'noop'
GlanceBackend: 'file'

View File

@ -1,70 +0,0 @@
resource_registry:
OS::TripleO::Controller::Net::SoftwareConfig: ../common/net-config-multinode.yaml
OS::TripleO::Compute::Net::SoftwareConfig: ../common/net-config-multinode.yaml
OS::TripleO::Services::RabbitMQ: ../../puppet/services/pacemaker/rabbitmq.yaml
OS::TripleO::Services::HAproxy: ../../puppet/services/pacemaker/haproxy.yaml
OS::TripleO::Services::Pacemaker: ../../puppet/services/pacemaker.yaml
OS::TripleO::Services::MySQL: ../../puppet/services/pacemaker/database/mysql.yaml
OS::TripleO::Services::CinderVolume: ../../puppet/services/pacemaker/cinder-volume.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: ../../extraconfig/tasks/pre_puppet_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostConfig: ../../extraconfig/tasks/post_puppet_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
parameter_defaults:
ControllerServices:
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::Clustercheck
- OS::TripleO::Services::ContainerImagePrepare
- OS::TripleO::Services::Docker
- OS::TripleO::Services::Kernel
- OS::TripleO::Services::Keystone
- OS::TripleO::Services::GlanceApi
- OS::TripleO::Services::GlanceRegistry
- OS::TripleO::Services::NeutronDhcpAgent
- OS::TripleO::Services::NeutronL3Agent
- OS::TripleO::Services::NeutronMetadataAgent
- OS::TripleO::Services::NeutronServer
- OS::TripleO::Services::NeutronCorePlugin
- OS::TripleO::Services::NeutronOvsAgent
- OS::TripleO::Services::CinderApi
- OS::TripleO::Services::CinderScheduler
- OS::TripleO::Services::CinderVolume
- OS::TripleO::Services::HeatApi
- OS::TripleO::Services::HeatApiCfn
- OS::TripleO::Services::HeatEngine
- OS::TripleO::Services::SaharaApi
- OS::TripleO::Services::SaharaEngine
- OS::TripleO::Services::MySQL
- OS::TripleO::Services::MySQLClient
- OS::TripleO::Services::RabbitMQ
- OS::TripleO::Services::HAproxy
- OS::TripleO::Services::Keepalived
- OS::TripleO::Services::Memcached
- OS::TripleO::Services::Ntp
- OS::TripleO::Services::Timezone
- OS::TripleO::Services::TripleoPackages
- OS::TripleO::Services::TripleoFirewall
- OS::TripleO::Services::NovaConductor
- OS::TripleO::Services::NovaApi
- OS::TripleO::Services::NovaPlacement
- OS::TripleO::Services::NovaMetadata
- OS::TripleO::Services::NovaScheduler
- OS::TripleO::Services::NovaCompute
- OS::TripleO::Services::NovaLibvirt
- OS::TripleO::Services::NovaMigrationTarget
- OS::TripleO::Services::Pacemaker
- OS::TripleO::Services::Horizon
- OS::TripleO::Services::Sshd
- OS::TripleO::Services::Iscsid
ControllerExtraConfig:
nova::compute::libvirt::services::libvirt_virt_type: qemu
nova::compute::libvirt::libvirt_virt_type: qemu
# Required for Centos 7.3 and Qemu 2.6.0
nova::compute::libvirt::libvirt_cpu_mode: 'none'
heat::rpc_response_timeout: 600
SwiftCeilometerPipelineEnabled: False
Debug: True
DockerPuppetDebug: True
NotificationDriver: 'noop'
GlanceBackend: 'file'

View File

@ -13,9 +13,6 @@ resource_registry:
OS::TripleO::Services::Clustercheck: ../../docker/services/pacemaker/clustercheck.yaml
OS::TripleO::Services::MySQL: ../../docker/services/pacemaker/database/mysql.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
parameter_defaults:
ControllerServices:

View File

@ -19,9 +19,6 @@ resource_registry:
OS::TripleO::Services::CinderBackup: ../../docker/services/pacemaker/cinder-backup.yaml
OS::TripleO::Services::CinderVolume: ../../docker/services/pacemaker/cinder-volume.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
#Needs to run scenario001
OS::TripleO::Services::Fluentd: ../../docker/services/fluentd.yaml
# NOTE(mmagr): We need to disable Sensu client deployment for now as the container health check is based

View File

@ -17,9 +17,6 @@ resource_registry:
OS::TripleO::Services::CinderVolume: ../../docker/services/pacemaker/cinder-volume.yaml
OS::TripleO::Services::BarbicanBackendSimpleCrypto: ../../puppet/services/barbican-backend-simple-crypto.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
# Some infra instances don't pass the ping test but are otherwise working.
# Since the OVB jobs also test this functionality we can shut it off here.
OS::TripleO::AllNodes::Validation: ../common/all-nodes-validation-disabled.yaml

View File

@ -20,9 +20,6 @@ resource_registry:
OS::TripleO::Services::DesignateWorker: ../../docker/services/designate-worker.yaml
OS::TripleO::Services::DesignateMDNS: ../../docker/services/designate-mdns.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
# Some infra instances don't pass the ping test but are otherwise working.
# Since the OVB jobs also test this functionality we can shut it off here.
OS::TripleO::AllNodes::Validation: ../common/all-nodes-validation-disabled.yaml

View File

@ -21,9 +21,6 @@ resource_registry:
# OS::TripleO::Services::NeutronL2gwApi: ../../puppet/services/neutron-l2gw-api.yaml
# OS::TripleO::Services::NeutronL2gwAgent: ../../puppet/services/neutron-l2gw-agent.yaml
# These enable Pacemaker
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
OS::TripleO::Services::OsloMessagingRpc: ../../docker/services/pacemaker/rpc-rabbitmq.yaml
OS::TripleO::Services::OsloMessagingNotify: ../../docker/services/messaging/notify-rabbitmq-shared.yaml
OS::TripleO::Services::HAproxy: ../../docker/services/pacemaker/haproxy.yaml

View File

@ -1,9 +1,6 @@
resource_registry:
OS::TripleO::Controller::Net::SoftwareConfig: ../common/net-config-multinode.yaml
OS::TripleO::Compute::Net::SoftwareConfig: ../common/net-config-multinode.yaml
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: .OS::Heat::None
OS::TripleO::Services::OpenDaylightApi: ../../docker/services/opendaylight-api.yaml
OS::TripleO::Services::OpenDaylightOvs: ../../puppet/services/opendaylight-ovs.yaml
OS::TripleO::Docker::NeutronMl2PluginBase: ../../puppet/services/neutron-plugin-ml2-odl.yaml

View File

@ -11,9 +11,6 @@ resource_registry:
OS::TripleO::Services::Clustercheck: ../../docker/services/pacemaker/clustercheck.yaml
OS::TripleO::Services::MySQL: ../../docker/services/pacemaker/database/mysql.yaml
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
OS::TripleO::AllNodes::Validation: ../common/all-nodes-validation-disabled.yaml
OS::TripleO::Services::OctaviaApi: ../../docker/services/octavia-api.yaml
OS::TripleO::Services::OctaviaHousekeeping: ../../docker/services/octavia-housekeeping.yaml

View File

@ -6,9 +6,6 @@ resource_registry:
OS::TripleO::Services::IronicConductor: ../../docker/services/ironic-conductor.yaml
OS::TripleO::Services::IronicPxe: ../../docker/services/ironic-pxe.yaml
# These enable Pacemaker
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
OS::TripleO::Services::OsloMessagingRpc: ../../docker/services/pacemaker/rpc-rabbitmq.yaml
OS::TripleO::Services::OsloMessagingNotify: ../../docker/services/messaging/notify-rabbitmq-shared.yaml
OS::TripleO::Services::HAproxy: ../../docker/services/pacemaker/haproxy.yaml

View File

@ -1,4 +0,0 @@
resource_registry:
OS::TripleO::Tasks::ControllerDeployedServerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerDeployedServerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerDeployedServerPostPuppetRestart: ../extraconfig/tasks/post_puppet_pacemaker_restart.yaml

View File

@ -1,10 +1,6 @@
# An environment which creates an Overcloud without the use of pacemaker
# (i.e. only with keepalived and systemd for all resources)
resource_registry:
OS::TripleO::Tasks::ControllerPreConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostConfig: OS::Heat::None
OS::TripleO::Tasks::ControllerPostPuppetRestart: OS::Heat::None
OS::TripleO::Services::CinderVolume: ../docker/services/cinder-volume.yaml
OS::TripleO::Services::RabbitMQ: ../docker/services/rabbitmq.yaml
OS::TripleO::Services::HAproxy: ../docker/services/haproxy.yaml
@ -13,4 +9,3 @@ resource_registry:
OS::TripleO::Services::Keepalived: OS::Heat::None
OS::TripleO::Services::Pacemaker: OS::Heat::None
OS::TripleO::Services::PacemakerRemote: OS::Heat::None

View File

@ -1,3 +0,0 @@
# use this file *in addition* to puppet-pacemaker.yaml
resource_registry:
OS::TripleO::Tasks::ControllerPostPuppetRestart: OS::Heat::None

View File

@ -1,21 +0,0 @@
# An environment which enables configuration of an
# Overcloud controller with Pacemaker.
resource_registry:
OS::TripleO::Tasks::ControllerPreConfig: ../extraconfig/tasks/pre_puppet_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostConfig: ../extraconfig/tasks/post_puppet_pacemaker.yaml
OS::TripleO::Tasks::ControllerPostPuppetRestart: ../extraconfig/tasks/post_puppet_pacemaker_restart.yaml
# custom pacemaker services
# NOTE: Please before adding any pacemaker-managed services, get in touch
# with bandini, Ng or beekhof
OS::TripleO::Services::CinderVolume: ../puppet/services/pacemaker/cinder-volume.yaml
OS::TripleO::Services::RabbitMQ: ../puppet/services/pacemaker/rabbitmq.yaml
OS::TripleO::Services::HAproxy: ../puppet/services/pacemaker/haproxy.yaml
OS::TripleO::Services::Pacemaker: ../puppet/services/pacemaker.yaml
OS::TripleO::Services::PacemakerRemote: ../puppet/services/pacemaker_remote.yaml
OS::TripleO::Services::Redis: ../puppet/services/pacemaker/database/redis.yaml
OS::TripleO::Services::MySQL: ../puppet/services/pacemaker/database/mysql.yaml
# Services that are disabled by default (use relevant environment files):
# Services that are disabled for HA deployments with pacemaker
OS::TripleO::Services::Keepalived: OS::Heat::None

View File

@ -1,447 +0,0 @@
#!/bin/bash
set -eu
DEBUG="true" # set false if the verbosity is a problem
SCRIPT_NAME=$(basename $0)
function log_debug {
if [[ $DEBUG = "true" ]]; then
echo "`date` $SCRIPT_NAME tripleo-upgrade $(facter hostname) $1"
fi
}
function is_bootstrap_node {
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid | tr '[:upper:]' '[:lower:]')" = "$(facter hostname | tr '[:upper:]' '[:lower:]')" ]; then
log_debug "Node is bootstrap"
echo "true"
fi
}
function check_resource_pacemaker {
if [ "$#" -ne 3 ]; then
echo_error "ERROR: check_resource function expects 3 parameters, $# given"
exit 1
fi
local service=$1
local state=$2
local timeout=$3
if [[ -z $(is_bootstrap_node) ]] ; then
log_debug "Node isn't bootstrap, skipping check for $service to be $state here "
return
else
log_debug "Node is bootstrap checking $service to be $state here"
fi
if [ "$state" = "stopped" ]; then
match_for_incomplete='Started'
else # started
match_for_incomplete='Stopped'
fi
nodes_local=$(pcs status | grep ^Online | sed 's/.*\[ \(.*\) \]/\1/g' | sed 's/ /\|/g')
if timeout -k 10 $timeout crm_resource --wait; then
node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } )
if echo "$node_states" | grep -q "$match_for_incomplete"; then
echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting."
exit 1
else
echo "$service has $state"
fi
else
echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting."
exit 1
fi
}
function pcmk_running {
if [[ $(systemctl is-active pacemaker) = "active" ]] ; then
echo "true"
fi
}
function is_systemd_unknown {
local service=$1
if [[ $(systemctl is-active "$service") = "unknown" ]]; then
log_debug "$service found to be unkown to systemd"
echo "true"
fi
}
function grep_is_cluster_controlled {
local service=$1
if [[ -n $(systemctl status $service -l | grep Drop-In -A 5 | grep pacemaker) ||
-n $(systemctl status $service -l | grep "Cluster Controlled $service") ]] ; then
log_debug "$service is pcmk managed from systemctl grep"
echo "true"
fi
}
function is_systemd_managed {
local service=$1
#if we have pcmk check to see if it is managed there
if [[ -n $(pcmk_running) ]]; then
if [[ -z $(pcs status --full | grep $service) && -z $(is_systemd_unknown $service) ]] ; then
log_debug "$service found to be systemd managed from pcs status"
echo "true"
fi
else
# if it is "unknown" to systemd, then it is pacemaker managed
if [[ -n $(is_systemd_unknown $service) ]] ; then
return
elif [[ -z $(grep_is_cluster_controlled $service) ]] ; then
echo "true"
fi
fi
}
function is_pacemaker_managed {
local service=$1
#if we have pcmk check to see if it is managed there
if [[ -n $(pcmk_running) ]]; then
if [[ -n $(pcs status --full | grep $service) ]]; then
log_debug "$service found to be pcmk managed from pcs status"
echo "true"
fi
else
# if it is unknown to systemd, then it is pcmk managed
if [[ -n $(is_systemd_unknown $service) ]]; then
echo "true"
elif [[ -n $(grep_is_cluster_controlled $service) ]] ; then
echo "true"
fi
fi
}
function is_managed {
local service=$1
if [[ -n $(is_pacemaker_managed $service) || -n $(is_systemd_managed $service) ]]; then
echo "true"
fi
}
function check_resource_systemd {
if [ "$#" -ne 3 ]; then
echo_error "ERROR: check_resource function expects 3 parameters, $# given"
exit 1
fi
local service=$1
local state=$2
local timeout=$3
local check_interval=3
if [ "$state" = "stopped" ]; then
match_for_incomplete='active'
else # started
match_for_incomplete='inactive'
fi
log_debug "Going to check_resource_systemd for $service to be $state"
#sanity check is systemd managed:
if [[ -z $(is_systemd_managed $service) ]]; then
echo "ERROR - $service not found to be systemd managed."
exit 1
fi
tstart=$(date +%s)
tend=$(( $tstart + $timeout ))
while (( $(date +%s) < $tend )); do
if [[ "$(systemctl is-active $service)" = $match_for_incomplete ]]; then
echo "$service not yet $state, sleeping $check_interval seconds."
sleep $check_interval
else
echo "$service is $state"
return
fi
done
echo "Timed out waiting for $service to go to $state after $timeout seconds"
exit 1
}
function check_resource {
local service=$1
local pcmk_managed=$(is_pacemaker_managed $service)
local systemd_managed=$(is_systemd_managed $service)
if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
log_debug "ERROR $service managed by both systemd and pcmk - SKIPPING"
return
fi
if [[ -n $pcmk_managed ]]; then
check_resource_pacemaker $@
return
elif [[ -n $systemd_managed ]]; then
check_resource_systemd $@
return
fi
log_debug "ERROR cannot check_resource for $service, not managed here?"
}
function manage_systemd_service {
local action=$1
local service=$2
log_debug "Going to systemctl $action $service"
systemctl $action $service
}
function manage_pacemaker_service {
local action=$1
local service=$2
# not if pacemaker isn't running!
if [[ -z $(pcmk_running) ]]; then
echo "$(facter hostname) pacemaker not active, skipping $action $service here"
elif [[ -n $(is_bootstrap_node) ]]; then
log_debug "Going to pcs resource $action $service"
pcs resource $action $service
fi
}
function stop_or_disable_service {
local service=$1
local pcmk_managed=$(is_pacemaker_managed $service)
local systemd_managed=$(is_systemd_managed $service)
if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
log_debug "Skipping stop_or_disable $service due to management conflict"
return
fi
log_debug "Stopping or disabling $service"
if [[ -n $pcmk_managed ]]; then
manage_pacemaker_service disable $service
return
elif [[ -n $systemd_managed ]]; then
manage_systemd_service stop $service
return
fi
log_debug "ERROR: $service not managed here?"
}
function start_or_enable_service {
local service=$1
local pcmk_managed=$(is_pacemaker_managed $service)
local systemd_managed=$(is_systemd_managed $service)
if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
log_debug "Skipping start_or_enable $service due to management conflict"
return
fi
log_debug "Starting or enabling $service"
if [[ -n $pcmk_managed ]]; then
manage_pacemaker_service enable $service
return
elif [[ -n $systemd_managed ]]; then
manage_systemd_service start $service
return
fi
log_debug "ERROR $service not managed here?"
}
function restart_service {
local service=$1
local pcmk_managed=$(is_pacemaker_managed $service)
local systemd_managed=$(is_systemd_managed $service)
if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
log_debug "ERROR $service managed by both systemd and pcmk - SKIPPING"
return
fi
log_debug "Restarting $service"
if [[ -n $pcmk_managed ]]; then
manage_pacemaker_service restart $service
return
elif [[ -n $systemd_managed ]]; then
manage_systemd_service restart $service
return
fi
log_debug "ERROR $service not managed here?"
}
function echo_error {
echo "$@" | tee /dev/fd2
}
# swift is a special case because it is/was never handled by pacemaker
# when stand-alone swift is used, only swift-proxy is running on controllers
function systemctl_swift {
services=( openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy )
local action=$1
case $action in
stop)
services=$(systemctl | grep openstack-swift- | grep running | awk '{print $1}')
;;
start)
enable_swift_storage=$(hiera -c /etc/puppet/hiera.yaml tripleo::profile::base::swift::storage::enable_swift_storage)
if [[ $enable_swift_storage != "true" ]]; then
services=( openstack-swift-proxy )
fi
;;
*) echo "Unknown action $action passed to systemctl_swift"
exit 1
;; # shouldn't ever happen...
esac
for service in ${services[@]}; do
manage_systemd_service $action $service
done
}
# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
# Update condition and add --notriggerun for +bug/1669714
function special_case_ovs_upgrade_if_needed {
if rpm -qa | grep "^openvswitch-2.5.0-14" || rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart" ; then
echo "Manual upgrade of openvswitch - ovs-2.5.0-14 or restart in postun detected"
rm -rf OVS_UPGRADE
mkdir OVS_UPGRADE && pushd OVS_UPGRADE
echo "Attempting to downloading latest openvswitch with yumdownloader"
yumdownloader --resolve openvswitch
for pkg in $(ls -1 *.rpm); do
if rpm -U --test $pkg 2>&1 | grep "already installed" ; then
echo "Looks like newer version of $pkg is already installed, skipping"
else
echo "Updating $pkg with --nopostun --notriggerun"
rpm -U --replacepkgs --nopostun --notriggerun $pkg
fi
done
popd
else
echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
fi
}
# This code is meant to fix https://bugs.launchpad.net/tripleo/+bug/1686357 on
# existing setups via a minor update workflow and be idempotent. We need to
# run this before the yum update because we fix this up even when there are no
# packages to update on the system (in which case the script exits).
# This code must be called with set +eu (due to the ocf scripts being sourced)
function fixup_wrong_ipv6_vip {
# This XPath query identifies of all the VIPs in pacemaker with netmask /64. Those are IPv6 only resources that have the wrong netmask
# This gives the address of the resource in the CIB, one address per line. For example:
# /cib/configuration/resources/primitive[@id='ip-2001.db8.ca2.4..10']/instance_attributes[@id='ip-2001.db8.ca2.4..10-instance_attributes']\
# /nvpair[@id='ip-2001.db8.ca2.4..10-instance_attributes-cidr_netmask']
vip_xpath_query="//resources/primitive[@type='IPaddr2']/instance_attributes/nvpair[@name='cidr_netmask' and @value='64']"
vip_xpath_xml_addresses=$(cibadmin --query --xpath "$vip_xpath_query" -e 2>/dev/null)
# The following extracts the @id value of the resource
vip_resources_to_fix=$(echo -e "$vip_xpath_xml_addresses" | sed -n "s/.*primitive\[@id='\([^']*\)'.*/\1/p")
# Runnning this in a subshell so that sourcing files cannot possibly affect the running script
(
OCF_PATH="/usr/lib/ocf/lib/heartbeat"
if [ -n "$vip_resources_to_fix" -a -f $OCF_PATH/ocf-shellfuncs -a -f $OCF_PATH/findif.sh ]; then
source $OCF_PATH/ocf-shellfuncs
source $OCF_PATH/findif.sh
for resource in $vip_resources_to_fix; do
echo "Updating IPv6 VIP $resource with a /128 and a correct addrlabel"
# The following will give us something like:
# <nvpair id="ip-2001.db8.ca2.4..10-instance_attributes-ip" name="ip" value="2001:db8:ca2:4::10"/>
ip_cib_nvpair=$(cibadmin --query --xpath "//resources/primitive[@type='IPaddr2' and @id='$resource']/instance_attributes/nvpair[@name='ip']")
# Let's filter out the value of the nvpair to get the ip address
ip_address=$(echo $ip_cib_nvpair | xmllint --xpath 'string(//nvpair/@value)' -)
OCF_RESKEY_cidr_netmask="64"
OCF_RESKEY_ip="$ip_address"
# Unfortunately due to https://bugzilla.redhat.com/show_bug.cgi?id=1445628
# we need to find out the appropiate nic given the ip address.
nic=$(findif $ip_address | awk '{ print $1 }')
ret=$?
if [ -z "$nic" -o $ret -ne 0 ]; then
echo "NIC autodetection failed for VIP $ip_address, not updating VIPs"
# Only exits the subshell
exit 1
fi
ocf_run -info pcs resource update --wait "$resource" ip="$ip_address" cidr_netmask=128 nic="$nic" lvs_ipv6_addrlabel=true lvs_ipv6_addrlabel_value=99
ret=$?
if [ $ret -ne 0 ]; then
echo "pcs resource update for VIP $resource failed, not updating VIPs"
# Only exits the subshell
exit 1
fi
done
fi
)
}
# https://bugs.launchpad.net/tripleo/+bug/1704131 guard against yum update
# waiting for an existing process until the heat stack time out
function check_for_yum_lock {
if [[ -f /var/run/yum.pid ]] ; then
ERR="ERROR existing yum.pid detected - can't continue! Please ensure
there is no other package update process for the duration of the minor update
worfklow. Exiting."
echo $ERR
exit 1
fi
}
# This function tries to resolve an RPM dependency issue that can arise when
# updating ceph packages on nodes that do not run the ceph-osd service. These
# nodes do not require the ceph-osd package, and updates will fail if the
# ceph-osd package cannot be updated because it's not available in any enabled
# repo. The dependency issue is resolved by removing the ceph-osd package from
# nodes that don't require it.
#
# No change is made to nodes that use the ceph-osd service (e.g. ceph storage
# nodes, and hyperconverged nodes running ceph-osd and compute services). The
# ceph-osd package is left in place, and the currently enabled repos will be
# used to update all ceph packages.
function yum_pre_update {
echo "Checking for ceph-osd dependency issues"
# No need to proceed if the ceph-osd package isn't installed
if ! rpm -q ceph-osd >/dev/null 2>&1; then
echo "ceph-osd package is not installed"
return
fi
# Do not proceed if there's any sign that the ceph-osd package is in use:
# - Are there OSD entries in /var/lib/ceph/osd?
# - Are any ceph-osd processes running?
# - Are there any ceph data disks (as identified by 'ceph-disk')
if [ -n "$(ls -A /var/lib/ceph/osd 2>/dev/null)" ]; then
echo "ceph-osd package is required (there are OSD entries in /var/lib/ceph/osd)"
return
fi
if [ "$(pgrep -xc ceph-osd)" != "0" ]; then
echo "ceph-osd package is required (there are ceph-osd processes running)"
return
fi
if ceph-disk list |& grep -q "ceph data"; then
echo "ceph-osd package is required (ceph data disks detected)"
return
fi
# Get a list of all ceph packages available from the currently enabled
# repos. Use "--showduplicates" to ensure the list includes installed
# packages that happen to be up to date.
local ceph_pkgs="$(yum list available --showduplicates 'ceph-*' |& awk '/^ceph/ {print $1}' | sort -u)"
# No need to proceed if no ceph packages are available from the currently
# enabled repos.
if [ -z "$ceph_pkgs" ]; then
echo "ceph packages are not available from any enabled repo"
return
fi
# No need to proceed if the ceph-osd package *is* available
if [[ $ceph_pkgs =~ ceph-osd ]]; then
echo "ceph-osd package is available from an enabled repo"
return
fi
echo "ceph-osd package is not required, but is preventing updates to other ceph packages"
echo "Removing ceph-osd package to allow updates to other ceph packages"
yum -y remove ceph-osd
}

View File

@ -1,19 +0,0 @@
#!/bin/bash
set -x
# On initial deployment, the pacemaker service is disabled and is-active exits
# 3 in that case, so allow this to fail gracefully.
pacemaker_status=$(systemctl is-active pacemaker || :)
if [ "$pacemaker_status" = "active" ]; then
pcs property set maintenance-mode=true
fi
# We need to reload haproxy in case the certificate changed because
# puppet doesn't know the contents of the cert file. We shouldn't
# reload it if it wasn't already active (such as if using external
# loadbalancer or on initial deployment).
haproxy_status=$(systemctl is-active haproxy || :)
if [ "$haproxy_status" = "active" ]; then
systemctl reload haproxy
fi

View File

@ -1,33 +0,0 @@
#!/bin/bash
set -eux
# Run if pacemaker is running, we're the bootstrap node,
# and we're updating the deployment (not creating).
RESTART_FOLDER="/var/lib/tripleo/pacemaker-restarts"
if [[ -d "$RESTART_FOLDER" && -n $(pcmk_running) && -n $(is_bootstrap_node) ]]; then
TIMEOUT=600
PCS_STATUS_OUTPUT="$(pcs status)"
SERVICES_TO_RESTART="$(ls $RESTART_FOLDER)"
for service in $SERVICES_TO_RESTART; do
if ! echo "$PCS_STATUS_OUTPUT" | grep $service; then
echo "Service $service not found as a pacemaker resource, cannot restart it."
exit 1
fi
done
for service in $SERVICES_TO_RESTART; do
echo "Restarting $service..."
pcs resource restart --wait=$TIMEOUT $service
rm -f "$RESTART_FOLDER"/$service
done
fi
if [ $(systemctl is-active haproxy) = "active" ]; then
systemctl reload haproxy
fi

View File

@ -1,43 +0,0 @@
heat_template_version: rocky
description: 'Post-Puppet Config for Pacemaker deployments'
parameters:
servers:
type: json
input_values:
type: json
description: input values for the software deployments
resources:
{%- for role in roles %}
{%- if 'controller' in role.tags %}
{{role.name}}PostPuppetMaintenanceModeConfig:
type: OS::Heat::SoftwareConfig
properties:
group: script
config: |
#!/bin/bash
pacemaker_status=$(systemctl is-active pacemaker)
if [ "$pacemaker_status" = "active" ]; then
pcs property set maintenance-mode=false
fi
{{role.name}}PostPuppetMaintenanceModeDeployment:
type: OS::Heat::SoftwareDeployments
properties:
name: {{role.name}}PostPuppetMaintenanceModeDeployment
servers: {get_param: [servers, {{role.name}}]}
config: {get_resource: {{role.name}}PostPuppetMaintenanceModeConfig}
input_values: {get_param: input_values}
{{role.name}}PostPuppetRestart:
type: OS::TripleO::Tasks::{{role.name}}PostPuppetRestart
depends_on: {{role.name}}PostPuppetMaintenanceModeDeployment
properties:
servers: {get_param: [servers, {{role.name}}]}
input_values: {get_param: input_values}
{%- endif %}
{%- endfor %}

View File

@ -1,29 +0,0 @@
heat_template_version: rocky
description: 'Post-Puppet restart config for Pacemaker deployments'
parameters:
servers:
type: json
input_values:
type: json
description: input values for the software deployments
resources:
ControllerPostPuppetRestartConfig:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- get_file: pacemaker_resource_restart.sh
ControllerPostPuppetRestartDeployment:
type: OS::Heat::SoftwareDeployments
properties:
name: ControllerPostPuppetRestartDeployment
servers: {get_param: servers}
config: {get_resource: ControllerPostPuppetRestartConfig}
input_values: {get_param: input_values}

View File

@ -1,26 +0,0 @@
heat_template_version: rocky
description: 'Pre-Puppet Config for Pacemaker deployments'
parameters:
servers:
type: json
input_values:
type: json
description: input values for the software deployments
resources:
ControllerPrePuppetMaintenanceModeConfig:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
get_file: pacemaker_maintenance_mode.sh
ControllerPrePuppetMaintenanceModeDeployment:
type: OS::Heat::SoftwareDeployments
properties:
name: ControllerPrePuppetMaintenanceModeDeployment
servers: {get_param: servers}
config: {get_resource: ControllerPrePuppetMaintenanceModeConfig}
input_values: {get_param: input_values}

View File

@ -0,0 +1,15 @@
---
upgrade:
- |
The environment file puppet-pacemaker.yaml has been removed, make
sure that you no longer reference it. The docker-ha.yaml file
should have already been used in place of puppet-pacemaker.yaml
during upgrade from Ocata to Pike. The environment file
puppet-pacemaker-no-restart.yaml has been removed too, it was only
used in conjunction with puppet-pacemaker.yaml.
- |
The environment file deployed-server-pacemaker-environment.yaml
has been removed, make sure that you no longer reference it. Its
current contents result in no tangible difference from the default
resource registry state, so removing the file should not change
the overcloud.