Merge "Restart all nova services after upgrade"

2019-06-27 13:39:12 +00:00 · 2019-06-27 13:39:12 +00:00 · 651b983bdb
commit 651b983bdb
parent e8f210a2d4 e6d2b92200
5 changed files with 61 additions and 42 deletions
--- a/ansible/roles/nova/defaults/main.yml
+++ b/ansible/roles/nova/defaults/main.yml
@ -387,6 +387,15 @@ nova_services_require_nova_conf:
  - nova-scheduler
  - nova-spicehtml5proxy

+# After upgrading nova-compute, services will have an RPC version cap in place.
+# We need to restart all services that communicate with nova-compute in order
+# to allow them to use the latest RPC version. Ideally, there would be a way to
+# check whether all nova services are using the latest version, but currently
+# there is not. Instead, wait a short time for all nova compute services to
+# update the version of their service in the database.  This seems to take
+# around 10 seconds, but the default is 30 to allow room for slowness.
+nova_compute_startup_delay: 30
+
 ####################
 # Notification
 ####################
--- a/ansible/roles/nova/handlers/main.yml
+++ b/ansible/roles/nova/handlers/main.yml
@ -293,3 +293,55 @@
    - kolla_action != "config"
    - inventory_hostname in groups['compute']
    - enable_nova_fake | bool
+
+# NOTE(mgoddard): After upgrading nova-compute, services will have an RPC
+# version cap in place.  We need to restart all services that communicate with
+# nova-compute in order to allow them to use the latest RPC version. Ideally,
+# there would be a way to check whether all nova services are using the latest
+# version, but currently there is not. Instead, wait a short time for all nova
+# compute services to update the version of their service in the database.
+# This seems to take around 10 seconds, but the default is 30 to allow room
+# for slowness.
+
+- name: Wait for nova-compute services to update service versions
+  pause:
+    seconds: "{{ nova_compute_startup_delay }}"
+  run_once: true
+  when:
+    - kolla_action == 'upgrade'
+  listen:
+    - Restart nova-compute container
+    - Restart nova-compute-ironic container
+    - Restart nova-compute-fake containers
+
+# NOTE(mgoddard): Currently (just prior to Stein release), sending SIGHUP to
+# nova compute services leaves them in a broken state in which they cannot
+# start new instances. The following error is seen in the logs:
+# "In shutdown, no new events can be scheduled"
+# To work around this we restart the nova-compute services.
+# Speaking to the nova team, this seems to be an issue in oslo.service,
+# with a fix proposed here: https://review.openstack.org/#/c/641907.
+# This issue also seems to affect the proxy services, which exit non-zero in
+# reponse to a SIGHUP, so restart those too.
+# The issue actually affects all nova services, since they remain with RPC
+# version pinned to the previous release:
+# https://bugs.launchpad.net/kolla-ansible/+bug/1833069.
+# TODO(mgoddard): Use SIGHUP when this bug has been fixed.
+
+- name: Restart nova services to remove RPC version cap
+  become: true
+  kolla_docker:
+    action: restart_container
+    common_options: "{{ docker_common_options }}"
+    name: "{{ item.value.container_name }}"
+  when:
+    - kolla_action == 'upgrade'
+    - inventory_hostname in groups[item.value.group]
+    - item.value.enabled | bool
+    - item.key in nova_services_require_nova_conf
+    - item.key != 'placement-api'
+  with_dict: "{{ nova_services }}"
+  listen:
+    - Restart nova-compute container
+    - Restart nova-compute-ironic container
+    - Restart nova-compute-fake containers
--- a/ansible/roles/nova/tasks/legacy_upgrade.yml
+++ b/ansible/roles/nova/tasks/legacy_upgrade.yml
@ -26,5 +26,3 @@

 - name: Flush handlers
  meta: flush_handlers
-
- include_tasks: reload.yml
--- a/ansible/roles/nova/tasks/reload.yml
+++ b/ansible/roles/nova/tasks/reload.yml
@ -1,38 +0,0 @@
---
-# This play calls sighup on every service to refresh upgrade levels
-
-# NOTE(mgoddard): Currently (just prior to Stein release), sending SIGHUP to
-# nova compute services leaves them in a broken state in which they cannot
-# start new instances. The following error is seen in the logs:
-# "In shutdown, no new events can be scheduled"
-# To work around this we restart the nova-compute services.
-# Speaking to the nova team, this seems to be an issue in oslo.service,
-# with a fix proposed here: https://review.opendev.org/#/c/641907.
-# This issue also seems to affect the proxy services, which exit non-zero in
-# reponse to a SIGHUP, so restart those too.
-# TODO(mgoddard): Remove this workaround when this bug has been fixed.
-
- name: Send SIGHUP to nova services
-  become: true
-  command: docker exec -t {{ item.value.container_name }} kill -1 1
-  when:
-    - inventory_hostname in groups[item.value.group]
-    - item.value.enabled | bool
-    - item.key in nova_services_require_nova_conf
-    - not item.key.startswith('nova-compute')
-    - not item.key.endswith('proxy')
-  with_dict: "{{ nova_services }}"
-
- name: Restart nova compute and proxy services
-  become: true
-  kolla_docker:
-    action: restart_container
-    common_options: "{{ docker_common_options }}"
-    name: "{{ item.value.container_name }}"
-  when:
-    - inventory_hostname in groups[item.value.group]
-    - item.value.enabled | bool
-    - item.key in nova_services_require_nova_conf
-    - item.key.startswith('nova-compute')
-      or item.key.endswith('proxy')
-  with_dict: "{{ nova_services }}"
--- a/ansible/roles/nova/tasks/rolling_upgrade.yml
+++ b/ansible/roles/nova/tasks/rolling_upgrade.yml
@ -25,8 +25,6 @@
 - name: Flush handlers
  meta: flush_handlers

- include_tasks: reload.yml
-
 - name: Migrate Nova database
  vars:
    nova_api: "{{ nova_services['nova-api'] }}"