From 911edab71e435736dc094c9633520315d310f60c Mon Sep 17 00:00:00 2001 From: marios Date: Mon, 24 Jul 2017 14:01:06 +0300 Subject: [PATCH] Adds pacemaker update_tasks for Pike minor update workflow Adds update_tasks for the minor update workflow. These will be collected into playbooks during an initial 'update init' heat stack update and then invoked later by the operator as ansible playbooks. Current understanding/workflow: Step=1: stop the cluster on the updated node Step=2: Pull the latest image and retag the it pcmklatest Step=3: yum upgrade happens on the host Step=4: Restart the cluster on the node Step=5: Verification: test pacemaker services are running. https://etherpad.openstack.org/p/tripleo-pike-updates-upgrades Related-Bug: 1715557 Co-Authored-By: Damien Ciabrini Co-Authored-By: Sofer Athlan-Guyot Change-Id: I101e0f5d221045fbf94fb9dc11a2f30706843806 (cherry picked from commit a953bda0ae615dc44d3e8a70aa7ab0160e26f3af) --- docker/services/README.rst | 21 ++++++++++++ docker/services/pacemaker/cinder-backup.yaml | 30 +++++++++++++++++ docker/services/pacemaker/cinder-volume.yaml | 33 +++++++++++++++++-- docker/services/pacemaker/clustercheck.yaml | 2 ++ docker/services/pacemaker/database/mysql.yaml | 30 +++++++++++++++++ docker/services/pacemaker/database/redis.yaml | 30 +++++++++++++++++ docker/services/pacemaker/haproxy.yaml | 30 +++++++++++++++++ docker/services/pacemaker/manila-share.yaml | 30 +++++++++++++++++ docker/services/pacemaker/rabbitmq.yaml | 30 +++++++++++++++++ docker/services/rabbitmq.yaml | 6 ++++ puppet/services/pacemaker.yaml | 12 +++++++ puppet/services/tripleo-packages.yaml | 9 ++++- tools/yaml-validate.py | 3 +- 13 files changed, 261 insertions(+), 5 deletions(-) diff --git a/docker/services/README.rst b/docker/services/README.rst index ce255ba86c..a843efc32f 100644 --- a/docker/services/README.rst +++ b/docker/services/README.rst @@ -124,3 +124,24 @@ Steps correlate to the following: 5) Service activation (Pacemaker) a) step 5 baremetal b) step 5 containers + +Update steps: +------------- + +All services have an associated update_tasks output that is an ansible +snippet that will be run during update in an rolling update that is +expected to run in a rolling update fashion (one node at a time) + +For Controller (where pacemaker is running) we have the following states: + 1. Step=1: stop the cluster on the updated node; + 2. Step=2: Pull the latest image and retag the it pcmklatest + 3. Step=3: yum upgrade happens on the host. + 4. Step=4: Restart the cluster on the node + 5. Step=5: Verification: + Currently we test that the pacemaker services are running. + +Then the usual deploy steps are run which pull in the latest image for +all containerized services and the updated configuration if any. + +Note: as pacemaker is not containerized, the points 1 and 4 happen in +puppet/services/pacemaker.yaml. diff --git a/docker/services/pacemaker/cinder-backup.yaml b/docker/services/pacemaker/cinder-backup.yaml index 46b9932a01..4a99184fdd 100644 --- a/docker/services/pacemaker/cinder-backup.yaml +++ b/docker/services/pacemaker/cinder-backup.yaml @@ -207,3 +207,33 @@ outputs: - name: Disable cinder_backup service tags: step2 service: name=openstack-cinder-backup enabled=no + update_tasks: + - name: Get docker Cinder-Backup image + set_fact: + docker_image: {get_param: DockerCinderBackupImage} + docker_image_latest: *cinder_backup_image_pcmklatest + when: step == '2' + - name: Pull latest Cinder-Backup images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Get previous Cinder-Backup image id + shell: "docker images | awk '/cinder-backup.* pcmklatest/{print $3}'" + register: cinder_backup_image_id + - block: + - name: Get a list of container using Cinder-Backup image + shell: "docker ps -q -f 'ancestor={{cinder_backup_image_id.stdout}}'" + register: cinder_backup_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Cinder-Backup image + shell: "docker rm -fv {{item}}" + with_items: "{{ cinder_backup_containers_to_destroy.stdout_lines }}" + - name: Remove previous Cinder-Backup images + shell: "docker rmi -f {{cinder_backup_image_id.stdout}}" + when: + - step == '2' + - cinder_backup_image_id.stdout != '' + - name: Retag pcmklatest to latest Cinder-Backup image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/pacemaker/cinder-volume.yaml b/docker/services/pacemaker/cinder-volume.yaml index 2f68b126d1..ce93e5f136 100644 --- a/docker/services/pacemaker/cinder-volume.yaml +++ b/docker/services/pacemaker/cinder-volume.yaml @@ -225,6 +225,33 @@ outputs: - name: Disable cinder_volume service from boot tags: step2 service: name=openstack-cinder-volume enabled=no - - - + update_tasks: + - name: Get docker Cinder-Volume image + set_fact: + docker_image: {get_param: DockerCinderVolumeImage} + docker_image_latest: *cinder_volume_image_pcmklatest + when: step == '2' + - name: Get previous Cinder-Volume image id + shell: "docker images | awk '/cinder-volume.* pcmklatest/{print $3}'" + register: cinder_volume_image_id + - block: + - name: Get a list of container using Cinder-Volume image + shell: "docker ps -q -f 'ancestor={{cinder_volume_image_id.stdout}}'" + register: cinder_volume_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Cinder-Volume image + shell: "docker rm -fv {{item}}" + with_items: "{{ cinder_volume_containers_to_destroy.stdout_lines }}" + - name: Remove previous Cinder-Volume images + shell: "docker rmi -f {{cinder_volume_image_id.stdout}}" + when: + - step == '2' + - cinder_volume_image_id.stdout != '' + - name: Pull latest Cinder-Volume images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Retag pcmklatest to latest Cinder-Volume image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/pacemaker/clustercheck.yaml b/docker/services/pacemaker/clustercheck.yaml index 69f306b1df..b5d128d437 100644 --- a/docker/services/pacemaker/clustercheck.yaml +++ b/docker/services/pacemaker/clustercheck.yaml @@ -95,3 +95,5 @@ outputs: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS host_prep_tasks: upgrade_tasks: + update_tasks: + # Nothing: It's not managed by pacemaker, so let paunch do it. diff --git a/docker/services/pacemaker/database/mysql.yaml b/docker/services/pacemaker/database/mysql.yaml index 936272fa05..39bbf42a88 100644 --- a/docker/services/pacemaker/database/mysql.yaml +++ b/docker/services/pacemaker/database/mysql.yaml @@ -307,3 +307,33 @@ outputs: - name: Restart xinetd service after clustercheck removal tags: step2 service: name=xinetd state=restarted + update_tasks: + - name: Get docker Mariadb image + set_fact: + docker_image: {get_param: DockerMysqlImage} + docker_image_latest: *mysql_image_pcmklatest + when: step == '2' + - name: Get previous Mariadb image id + shell: "docker images | awk '/mariadb.* pcmklatest/{print $3}'" + register: mariadb_image_id + - block: + - name: Get a list of container using Mariadb image + shell: "docker ps -q -f 'ancestor={{mariadb_image_id.stdout}}'" + register: mariadb_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Mariadb image + shell: "docker rm -fv {{item}}" + with_items: "{{ mariadb_containers_to_destroy.stdout_lines }}" + - name: Remove previous Mariadb images + shell: "docker rmi -f {{mariadb_image_id.stdout}}" + when: + - step == '2' + - mariadb_image_id.stdout != '' + - name: Pull latest Mariadb images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Retag pcmklatest to latest Mariadb image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/pacemaker/database/redis.yaml b/docker/services/pacemaker/database/redis.yaml index bcda4efc65..fe69bb008c 100644 --- a/docker/services/pacemaker/database/redis.yaml +++ b/docker/services/pacemaker/database/redis.yaml @@ -255,3 +255,33 @@ outputs: - name: Disable redis service tags: step2 service: name=redis enabled=no + update_tasks: + - name: Get docker Redis image + set_fact: + docker_image: {get_param: DockerRedisImage} + docker_image_latest: *redis_image_pcmklatest + when: step == '2' + - name: Get previous Redis image id + shell: "docker images | awk '/redis.* pcmklatest/{print $3}'" + register: redis_image_id + - block: + - name: Get a list of container using Redis image + shell: "docker ps -q -f 'ancestor={{redis_image_id.stdout}}'" + register: redis_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Redis image + shell: "docker rm -fv {{item}}" + with_items: "{{ redis_containers_to_destroy.stdout_lines }}" + - name: Remove previous Redis images + shell: "docker rmi -f {{redis_image_id.stdout}}" + when: + - step == '2' + - redis_image_id.stdout != '' + - name: Pull latest Redis images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Retag pcmklatest to latest Redis image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/pacemaker/haproxy.yaml b/docker/services/pacemaker/haproxy.yaml index ba0f0efcda..f1c8f905ea 100644 --- a/docker/services/pacemaker/haproxy.yaml +++ b/docker/services/pacemaker/haproxy.yaml @@ -253,3 +253,33 @@ outputs: retries: 5 until: output.rc == 0 when: is_bootstrap_node and haproxy_res|succeeded + update_tasks: + - name: Get docker Haproxy image + set_fact: + docker_image: {get_param: DockerHAProxyImage} + docker_image_latest: *haproxy_image_pcmklatest + when: step == '2' + - name: Get previous Haproxy image id + shell: "docker images | awk '/haproxy.* pcmklatest/{print $3}'" + register: haproxy_image_id + - block: + - name: Get a list of container using Haproxy image + shell: "docker ps -q -f 'ancestor={{haproxy_image_id.stdout}}'" + register: haproxy_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Haproxy image + shell: "docker rm -fv {{item}}" + with_items: "{{ haproxy_containers_to_destroy.stdout_lines }}" + - name: Remove previous Haproxy images + shell: "docker rmi -f {{haproxy_image_id.stdout}}" + when: + - step == '2' + - haproxy_image_id.stdout != '' + - name: Pull latest Haproxy images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Retag pcmklatest to latest Haproxy image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/pacemaker/manila-share.yaml b/docker/services/pacemaker/manila-share.yaml index a31c1c74df..55f66b9b56 100644 --- a/docker/services/pacemaker/manila-share.yaml +++ b/docker/services/pacemaker/manila-share.yaml @@ -166,3 +166,33 @@ outputs: - name: Stop and disable manila_share service tags: step2 service: name=openstack-manila-share state=stopped enabled=no + update_tasks: + - name: Get docker Manila-Share image + set_fact: + docker_image: {get_param: DockerManilaShareImage} + docker_image_latest: *manila_share_image_pcmklatest + when: step == '2' + - name: Get previous Manila-Share image id + shell: "docker images | awk '/manila-share.* pcmklatest/{print $3}'" + register: manila_share_image_id + - block: + - name: Get a list of container using Manila-Share image + shell: "docker ps -q -f 'ancestor={{manila_share_image_id.stdout}}'" + register: manila-share_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Manila-Share image + shell: "docker rm -fv {{item}}" + with_items: "{{ manila_share_containers_to_destroy.stdout_lines }}" + - name: Remove previous Manila-Share images + shell: "docker rmi -f {{manila_share_image_id.stdout}}" + when: + - step == '2' + - manila_share_image_id.stdout != '' + - name: Pull latest Manila-Share images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Retag pcmklatest to latest Manila-Share image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/pacemaker/rabbitmq.yaml b/docker/services/pacemaker/rabbitmq.yaml index 5660856aa1..f17747c0bd 100644 --- a/docker/services/pacemaker/rabbitmq.yaml +++ b/docker/services/pacemaker/rabbitmq.yaml @@ -248,3 +248,33 @@ outputs: - name: Disable rabbitmq service tags: step2 service: name=rabbitmq-server enabled=no + update_tasks: + - name: Get docker Rabbitmq image + set_fact: + docker_image: {get_param: DockerRabbitmqImage} + docker_image_latest: *rabbitmq_image_pcmklatest + when: step == '2' + - name: Get previous Rabbitmq image id + shell: "docker images | awk '/rabbitmq.* pcmklatest/{print $3}'" + register: rabbitmq_image_id + - block: + - name: Get a list of container using Rabbitmq image + shell: "docker ps -q -f 'ancestor={{rabbitmq_image_id.stdout}}'" + register: rabbitmq_containers_to_destroy + # It will be recreated with the delpoy step. + - name: Remove any container using the same Rabbitmq image + shell: "docker rm -fv {{item}}" + with_items: "{{ rabbitmq_containers_to_destroy.stdout_lines }}" + - name: Remove previous Rabbitmq images + shell: "docker rmi -f {{rabbitmq_image_id.stdout}}" + when: + - step == '2' + - rabbitmq_image_id.stdout != '' + - name: Pull latest Rabbitmq images + command: "docker pull {{docker_image}}" + when: step == "2" + - name: Retag pcmklatest to latest Rabbitmq image + shell: "docker tag {{docker_image}} {{docker_image_latest}}" + when: step == "2" + # Got to check that pacemaker_is_active is working fine with bundle. + # TODO: pacemaker_is_active resource doesn't support bundle. diff --git a/docker/services/rabbitmq.yaml b/docker/services/rabbitmq.yaml index 632bdc29cd..237995b996 100644 --- a/docker/services/rabbitmq.yaml +++ b/docker/services/rabbitmq.yaml @@ -220,3 +220,9 @@ outputs: - name: Stop and disable rabbitmq service tags: step2 service: name=rabbitmq-server state=stopped enabled=no + update_tasks: + # TODO: Are we sure we want to support this. Rolling update + # without pacemaker may fail. Do we test this ? In any case, + # this is under paunch control so the latest image should be + # pulled in by the deploy steps. Same question for other + # usually managed by pacemaker container. diff --git a/puppet/services/pacemaker.yaml b/puppet/services/pacemaker.yaml index 158d04bdae..badb1a4e72 100644 --- a/puppet/services/pacemaker.yaml +++ b/puppet/services/pacemaker.yaml @@ -156,3 +156,15 @@ outputs: - name: Start pacemaker cluster tags: step4 pacemaker_cluster: state=online + update_tasks: + - name: Check pacemaker cluster running before the minor update + when: step == "0" # TODO(marios) disabling validations? + pacemaker_cluster: state=online check_and_fail=true + async: 30 + poll: 4 + - name: Stop pacemaker cluster + when: step == "1" + pacemaker_cluster: state=offline + - name: Start pacemaker cluster + when: step == "4" + pacemaker_cluster: state=online diff --git a/puppet/services/tripleo-packages.yaml b/puppet/services/tripleo-packages.yaml index 2a8620c8fe..766c75253a 100644 --- a/puppet/services/tripleo-packages.yaml +++ b/puppet/services/tripleo-packages.yaml @@ -57,6 +57,13 @@ outputs: tags: step3 yum: name=* state=latest update_tasks: + - name: Check for existing yum.pid + stat: path=/var/run/yum.pid + register: yum_pid_file + when: step == "0" or step == "3" + - name: Exit if existing yum process + fail: msg="ERROR existing yum.pid detected - can't continue! Please ensure there is no other package update process for the duration of the minor update worfklow. Exiting." + when: (step == "0" or step == "3") and yum_pid_file.stat.exists - name: Update all packages - yum: name=* state=latest + yum: name=* state=latest update_cache=yes # cache for tripleo/+bug/1703830 when: step == "3" diff --git a/tools/yaml-validate.py b/tools/yaml-validate.py index de8ba806a9..c322962192 100755 --- a/tools/yaml-validate.py +++ b/tools/yaml-validate.py @@ -35,7 +35,8 @@ OPTIONAL_SECTIONS = ['workflow_tasks'] REQUIRED_DOCKER_SECTIONS = ['service_name', 'docker_config', 'puppet_config', 'config_settings', 'step_config'] OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks', - 'post_upgrade_tasks', 'service_config_settings', + 'post_upgrade_tasks', 'update_tasks', + 'service_config_settings', 'host_prep_tasks', 'metadata_settings', 'kolla_config', 'logging_source', 'logging_groups'] REQUIRED_DOCKER_PUPPET_CONFIG_SECTIONS = ['config_volume', 'step_config',