diff --git a/environments/puppet-pacemaker.yaml b/environments/puppet-pacemaker.yaml index f235cf8f95..8986e35fb1 100644 --- a/environments/puppet-pacemaker.yaml +++ b/environments/puppet-pacemaker.yaml @@ -2,3 +2,5 @@ # Overcloud controller with Pacemaker. resource_registry: OS::TripleO::ControllerConfig: ../puppet/controller-config-pacemaker.yaml + OS::TripleO::Tasks::ControllerPrePuppet: ../extraconfig/tasks/pre_puppet_pacemaker.yaml + OS::TripleO::Tasks::ControllerPostPuppet: ../extraconfig/tasks/post_puppet_pacemaker.yaml diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml new file mode 100644 index 0000000000..0cff74694b --- /dev/null +++ b/extraconfig/tasks/noop.yaml @@ -0,0 +1,10 @@ +heat_template_version: 2014-10-16 +description: 'No-op task' + +parameters: + servers: + type: json + input_values: + type: json + default: {} + description: input values for the software deployments diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh new file mode 100755 index 0000000000..ad3c3701d5 --- /dev/null +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +set -eux + +pacemaker_status=$(systemctl is-active pacemaker) +check_interval=3 + +function check_resource { + + service=$1 + state=$2 + timeout=$3 + tstart=$(date +%s) + tend=$(( $tstart + $timeout )) + + if [ "$state" = "stopped" ]; then + match_for_incomplete='Started' + else # started + match_for_incomplete='Stopped' + fi + + while (( $(date +%s) < $tend )); do + node_states=$(pcs status --full | grep "$service" | grep -v Clone) + if echo "$node_states" | grep -q "$match_for_incomplete"; then + echo "$service not yet $state, sleeping $check_interval seconds." + sleep $check_interval + else + echo "$service has $state" + return + fi + done + + echo "$service never $state after $timeout seconds" | tee /dev/fd/2 + exit 1 + +} + +# Run if pacemaker is running, we're the bootstrap node, +# and we're updating the deployment (not creating). +if [ "$pacemaker_status" = "active" -a \ + "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \ + "$(hiera update_identifier)" != "nil" ]; then + + pcs resource disable httpd + check_resource httpd stopped 300 + pcs resource disable openstack-keystone + check_resource openstack-keystone stopped 1200 + + if pcs status | grep haproxy-clone; then + pcs resource restart haproxy-clone + fi + pcs resource restart redis-master + pcs resource restart mongod-clone + pcs resource restart rabbitmq-clone + pcs resource restart memcached-clone + pcs resource restart galera-master + + pcs resource enable openstack-keystone + check_resource openstack-keystone started 300 + pcs resource enable httpd + check_resource httpd started 800 + +fi diff --git a/extraconfig/tasks/post_puppet_pacemaker.yaml b/extraconfig/tasks/post_puppet_pacemaker.yaml new file mode 100644 index 0000000000..7de41d9473 --- /dev/null +++ b/extraconfig/tasks/post_puppet_pacemaker.yaml @@ -0,0 +1,44 @@ +heat_template_version: 2014-10-16 +description: 'Post-Puppet Config for Pacemaker deployments' + +parameters: + servers: + type: json + input_values: + type: json + description: input values for the software deployments + +resources: + + ControllerPostPuppetMaintenanceModeConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + pacemaker_status=$(systemctl is-active pacemaker) + + if [ "$pacemaker_status" = "active" ]; then + pcs property set maintenance-mode=false + fi + + ControllerPostPuppetMaintenanceModeDeployment: + type: OS::Heat::SoftwareDeployments + properties: + servers: {get_param: servers} + config: {get_resource: ControllerPostPuppetMaintenanceModeConfig} + input_values: {get_param: input_values} + + ControllerPostPuppetRestartConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: {get_file: pacemaker_resource_restart.sh} + + ControllerPostPuppetRestartDeployment: + type: OS::Heat::SoftwareDeployments + depends_on: ControllerPostPuppetMaintenanceModeDeployment + properties: + servers: {get_param: servers} + config: {get_resource: ControllerPostPuppetRestartConfig} + input_values: {get_param: input_values} diff --git a/extraconfig/tasks/pre_puppet_pacemaker.yaml b/extraconfig/tasks/pre_puppet_pacemaker.yaml new file mode 100644 index 0000000000..2cfe92a78e --- /dev/null +++ b/extraconfig/tasks/pre_puppet_pacemaker.yaml @@ -0,0 +1,30 @@ +heat_template_version: 2014-10-16 +description: 'Pre-Puppet Config for Pacemaker deployments' + +parameters: + servers: + type: json + input_values: + type: json + description: input values for the software deployments + +resources: + + ControllerPrePuppetMaintenanceModeConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + pacemaker_status=$(systemctl is-active pacemaker) + + if [ "$pacemaker_status" = "active" ]; then + pcs property set maintenance-mode=true + fi + + ControllerPrePuppetMaintenanceModeDeployment: + type: OS::Heat::SoftwareDeployments + properties: + servers: {get_param: servers} + config: {get_resource: ControllerPrePuppetMaintenanceModeConfig} + input_values: {get_param: input_values} diff --git a/overcloud-resource-registry-puppet.yaml b/overcloud-resource-registry-puppet.yaml index c072c292d5..77368d0a62 100644 --- a/overcloud-resource-registry-puppet.yaml +++ b/overcloud-resource-registry-puppet.yaml @@ -21,7 +21,11 @@ resource_registry: OS::TripleO::CephClusterConfig::SoftwareConfig: puppet/ceph-cluster-config.yaml OS::TripleO::AllNodes::SoftwareConfig: puppet/all-nodes-config.yaml OS::TripleO::BootstrapNode::SoftwareConfig: puppet/bootstrap-config.yaml + + # Tasks (for internal TripleO usage) OS::TripleO::Tasks::PackageUpdate: extraconfig/tasks/yum_update.yaml + OS::TripleO::Tasks::ControllerPrePuppet: extraconfig/tasks/noop.yaml + OS::TripleO::Tasks::ControllerPostPuppet: extraconfig/tasks/noop.yaml # This creates the "heat-admin" user for all OS images by default # To disable, replace with firstboot/userdata_default.yaml diff --git a/overcloud.yaml b/overcloud.yaml index 676361820f..82b5f408db 100644 --- a/overcloud.yaml +++ b/overcloud.yaml @@ -1127,6 +1127,8 @@ resources: neutron_api_node_ips: {get_attr: [ControllerIpListMap, net_ip_map, {get_param: [ServiceNetMap, NeutronApiNetwork]}]} keystone_public_api_node_ips: {get_attr: [ControllerIpListMap, net_ip_map, {get_param: [ServiceNetMap, KeystonePublicApiNetwork]}]} keystone_admin_api_node_ips: {get_attr: [ControllerIpListMap, net_ip_map, {get_param: [ServiceNetMap, KeystoneAdminApiNetwork]}]} + DeployIdentifier: {get_param: DeployIdentifier} + UpdateIdentifier: {get_param: UpdateIdentifier} MysqlRootPassword: type: OS::Heat::RandomString diff --git a/puppet/all-nodes-config.yaml b/puppet/all-nodes-config.yaml index 2bc519bb4d..1147b85656 100644 --- a/puppet/all-nodes-config.yaml +++ b/puppet/all-nodes-config.yaml @@ -51,6 +51,17 @@ parameters: keystone_admin_api_node_ips: type: comma_delimited_list + DeployIdentifier: + type: string + description: > + Setting this to a unique value will re-run any deployment tasks which + perform configuration on a Heat stack-update. + UpdateIdentifier: + type: string + description: > + Setting to a previously unused value during stack-update will trigger + package update on all nodes + resources: allNodesConfigImpl: @@ -240,6 +251,9 @@ resources: nova::rabbit_hosts: *rabbit_nodes_array keystone::rabbit_hosts: *rabbit_nodes_array + deploy_identifier: {get_param: DeployIdentifier} + update_identifier: {get_param: UpdateIdentifier} + outputs: config_id: description: The ID of the allNodesConfigImpl resource. diff --git a/puppet/controller-post.yaml b/puppet/controller-post.yaml index 941e1ac519..ed8129e787 100644 --- a/puppet/controller-post.yaml +++ b/puppet/controller-post.yaml @@ -17,6 +17,13 @@ parameters: resources: + ControllerPrePuppet: + type: OS::TripleO::Tasks::ControllerPrePuppet + properties: + servers: {get_param: servers} + input_values: + update_identifier: {get_param: NodeConfigIdentifiers} + ControllerPuppetConfig: type: OS::TripleO::ControllerConfig @@ -26,6 +33,7 @@ resources: # e.g all Deployment resources should have a *Deployment_StepN suffix ControllerLoadBalancerDeployment_Step1: type: OS::Heat::StructuredDeployments + depends_on: ControllerPrePuppet properties: servers: {get_param: servers} config: {get_resource: ControllerPuppetConfig} @@ -98,10 +106,18 @@ resources: step: 5 update_identifier: {get_param: NodeConfigIdentifiers} + ControllerPostPuppet: + type: OS::TripleO::Tasks::ControllerPostPuppet + depends_on: ControllerOvercloudServicesDeployment_Step6 + properties: + servers: {get_param: servers} + input_values: + update_identifier: {get_param: NodeConfigIdentifiers} + # Note, this should come last, so use depends_on to ensure # this is created after any other resources. ExtraConfig: - depends_on: ControllerOvercloudServicesDeployment_Step5 + depends_on: ControllerPostPuppet type: OS::TripleO::NodeExtraConfigPost properties: servers: {get_param: servers}