From 20b485fe8e58110116cee38cb45e58360caabec0 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Mon, 9 Sep 2019 15:48:23 +0100 Subject: [PATCH] Ensure nova-api is running before starting nova-compute containers If nova-api is delayed starting then the nova_wait_for_compute_service can timeout. A deployment using a slow/busy remote container repository is particularly susceptible to this issue. To resolve this nova_compute and nova_wait_for_compute_service have been postponed to step_5 and a task has been added to step_4 to ensure nova_api is active before proceeding. Conflicts: deployment/nova/nova-compute-container-puppet.yaml deployment/placement/placement-api-container-puppet.yaml Note: Since this is not a direct cherry-pick due to the placement extraction in train release, this backport also includes needed changed from https://review.opendev.org/688399. Change-Id: I6fcbc5cb5d4f3cbb618d9661d2a36c868e18b3d6 Closes-bug: #1842948 (cherry picked from commit 8a87cbcc349feb9cbd710e91d9805b0db2b8aba9) --- .../nova_wait_for_api_service.py | 109 +++++++++++++++ ...rvice.py => placement_wait_for_service.py} | 0 .../nova/nova-placement-container-puppet.yaml | 31 ++++- .../nova/nova-api-container-puppet.yaml | 124 +++++++++++------- .../nova-compute-common-container-puppet.yaml | 25 ++-- .../nova/nova-compute-container-puppet.yaml | 23 +--- .../nova/nova-ironic-container-puppet.yaml | 4 +- .../wait_for_nova_api-7af0c6db1b607216.yaml | 9 ++ 8 files changed, 242 insertions(+), 83 deletions(-) create mode 100755 container_config_scripts/nova_wait_for_api_service.py rename container_config_scripts/{nova_wait_for_placement_service.py => placement_wait_for_service.py} (100%) create mode 100644 releasenotes/notes/wait_for_nova_api-7af0c6db1b607216.yaml diff --git a/container_config_scripts/nova_wait_for_api_service.py b/container_config_scripts/nova_wait_for_api_service.py new file mode 100755 index 0000000000..8159aa0506 --- /dev/null +++ b/container_config_scripts/nova_wait_for_api_service.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# +# Copyright 2018 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import logging +from optparse import OptionParser +import os +import six +import sys +import time + + +from keystoneauth1 import loading +from keystoneauth1 import session + +from novaclient import client +from novaclient.exceptions import ClientException + +# In python3 SafeConfigParser was renamed to ConfigParser and the default +# for duplicate options default to true. In case of nova it is valid to +# have duplicate option lines, e.g. passthrough_whitelist which leads to +# issues reading the nova.conf +# https://bugs.launchpad.net/tripleo/+bug/1827775 +if six.PY3: + from six.moves.configparser import ConfigParser + config = ConfigParser(strict=False) +else: + from six.moves.configparser import SafeConfigParser + config = SafeConfigParser() + +debug = os.getenv('__OS_DEBUG', 'false') + +if debug.lower() == 'true': + loglevel = logging.DEBUG +else: + loglevel = logging.INFO + +logging.basicConfig(stream=sys.stdout, level=loglevel) +LOG = logging.getLogger('nova_wait_for_api_service') + +iterations = 60 +timeout = 10 +nova_cfg = '/etc/nova/nova.conf' + +if __name__ == '__main__': + parser = OptionParser(usage="usage: %prog [options]") + parser.add_option('-k', '--insecure', + action="store_false", + dest='insecure', + default=True, + help='Allow insecure connection when using SSL') + + (options, args) = parser.parse_args() + LOG.debug('Running with parameter insecure = %s', + options.insecure) + + if os.path.isfile(nova_cfg): + try: + config.read(nova_cfg) + except Exception: + LOG.exception('Error while reading nova.conf:') + else: + LOG.error('Nova configuration file %s does not exist', nova_cfg) + sys.exit(1) + + loader = loading.get_plugin_loader('password') + auth = loader.load_from_options( + auth_url=config.get('neutron', + 'auth_url'), + username=config.get('neutron', + 'username'), + password=config.get('neutron', + 'password'), + project_name=config.get('neutron', + 'project_name'), + project_domain_name=config.get('neutron', + 'project_domain_name'), + user_domain_name=config.get('neutron', + 'user_domain_name')) + sess = session.Session(auth=auth, verify=options.insecure) + + # Wait until this host is listed in the service list + for i in range(iterations): + try: + nova = client.Client('2.11', session=sess, + endpoint_type='internal') + nova.versions.list() + LOG.info('Nova-api service active') + sys.exit(0) + except ClientException: + LOG.info('Waiting for nova-api service') + except Exception: + LOG.exception( + 'Error while waiting for nova-api service') + time.sleep(timeout) +sys.exit(1) + +# vim: set et ts=4 sw=4 : diff --git a/container_config_scripts/nova_wait_for_placement_service.py b/container_config_scripts/placement_wait_for_service.py similarity index 100% rename from container_config_scripts/nova_wait_for_placement_service.py rename to container_config_scripts/placement_wait_for_service.py diff --git a/deployment/deprecated/nova/nova-placement-container-puppet.yaml b/deployment/deprecated/nova/nova-placement-container-puppet.yaml index 3fec8763b4..a64051bd1d 100644 --- a/deployment/deprecated/nova/nova-placement-container-puppet.yaml +++ b/deployment/deprecated/nova/nova-placement-container-puppet.yaml @@ -194,6 +194,12 @@ outputs: - path: /var/log/nova owner: nova:nova recurse: true + container_config_scripts: + map_merge: + - {get_attr: [ContainersCommon, container_config_scripts]} + - placement_wait_for_service.py: + mode: "0755" + content: { get_file: ../../../container_config_scripts/placement_wait_for_service.py } docker_config: step_2: get_attr: [NovaPlacementLogging, docker_config, step_2] @@ -201,7 +207,7 @@ outputs: step_4: nova_placement: start_order: 1 - image: {get_param: DockerNovaPlacementImage} + image: &nova_placement_api_image {get_param: DockerNovaPlacementImage} net: host user: root restart: always @@ -226,6 +232,29 @@ outputs: - '' environment: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS + nova_placement_wait_for_service: + start_order: 2 + image: *nova_placement_api_image + user: root + net: host + privileged: false + detach: false + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - get_attr: [NovaPlacementLogging, volumes] + - + - /var/lib/container-config-scripts/:/container-config-scripts/:z + - /var/lib/config-data/nova_placement/etc/placement/:/etc/placement/:ro + command: "/usr/bin/bootstrap_host_exec nova_placement su nova -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/placement_wait_for_service.py'" + environment: + - list_join: + - '' + - - '__OS_DEBUG=' + - yaql: + expression: str($.data.debug) + data: + debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']} host_prep_tasks: - {get_attr: [NovaPlacementLogging, host_prep_tasks]} - name: create persistent directory diff --git a/deployment/nova/nova-api-container-puppet.yaml b/deployment/nova/nova-api-container-puppet.yaml index b2cd2167c1..9c8b8cf1e5 100644 --- a/deployment/nova/nova-api-container-puppet.yaml +++ b/deployment/nova/nova-api-container-puppet.yaml @@ -310,54 +310,59 @@ outputs: owner: nova:nova recurse: true container_config_scripts: - nova_api_ensure_default_cell.sh: - mode: "0700" - content: - str_replace: - template: | - #!/bin/bash - DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}') - if [ "$DEFID" ]; then - echo "(cellv2) Updating default cell_v2 cell $DEFID" - su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'" - else - echo "(cellv2) Creating default cell_v2 cell" - su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'" - fi - params: - CELLDB: - list_join: - - '' - - - '{scheme}' - - '://' - - '{username}' - - ':' - - '{password}' - - '@' - - - if: - - mysql_ipv6_use_ip_address - - '[{hostname}]' + map_merge: + - {get_attr: [ContainersCommon, container_config_scripts]} + - nova_wait_for_api_service.py: + mode: "0755" + content: { get_file: ../../container_config_scripts/nova_wait_for_api_service.py } + nova_api_ensure_default_cell.sh: + mode: "0700" + content: + str_replace: + template: | + #!/bin/bash + DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}') + if [ "$DEFID" ]; then + echo "(cellv2) Updating default cell_v2 cell $DEFID" + su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'" + else + echo "(cellv2) Creating default cell_v2 cell" + su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'" + fi + params: + CELLDB: + list_join: + - '' + - - '{scheme}' + - '://' + - '{username}' + - ':' + - '{password}' + - '@' + - + if: + - mysql_ipv6_use_ip_address + - '[{hostname}]' + - '{hostname}' + - '/' + - 'nova' + - '?' + - '{query}' + TRANSPORTURL: + list_join: + - '' + - - '{scheme}' + - '://' + - '{username}' + - ':' + - '{password}' + - '@' - '{hostname}' - - '/' - - 'nova' - - '?' - - '{query}' - TRANSPORTURL: - list_join: - - '' - - - '{scheme}' - - '://' - - '{username}' - - ':' - - '{password}' - - '@' - - '{hostname}' - - ':' - - '{port}' - - '/' - - '?' - - '{query}' + - ':' + - '{port}' + - '/' + - '?' + - '{query}' docker_config: step_2: get_attr: [NovaApiLogging, docker_config, step_2] @@ -447,7 +452,32 @@ outputs: - '' environment: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS + nova_wait_for_api_service: + start_order: 3 + image: *nova_api_image + user: root + net: host + privileged: false + detach: false + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /var/lib/config-data/nova/etc/my.cnf.d/:/etc/my.cnf.d/:ro + - /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro + - /var/log/containers/nova:/var/log/nova + - /var/lib/container-config-scripts/:/container-config-scripts/:z + command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_api_service.py'" + environment: + - list_join: + - '' + - - '__OS_DEBUG=' + - yaql: + expression: str($.data.debug) + data: + debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']} nova_api_cron: + start_order: 4 image: *nova_api_image net: host user: root diff --git a/deployment/nova/nova-compute-common-container-puppet.yaml b/deployment/nova/nova-compute-common-container-puppet.yaml index 317ce4ac8e..0e522afd7b 100644 --- a/deployment/nova/nova-compute-common-container-puppet.yaml +++ b/deployment/nova/nova-compute-common-container-puppet.yaml @@ -44,40 +44,41 @@ outputs: nova_statedir_ownership.py: mode: "0700" content: { get_file: ../../container_config_scripts/nova_statedir_ownership.py } - nova_wait_for_placement_service.py: - mode: "0755" - content: { get_file: ../../container_config_scripts/nova_wait_for_placement_service.py } nova_wait_for_compute_service.py: mode: "0755" content: { get_file: ../../container_config_scripts/nova_wait_for_compute_service.py } nova_compute_common_deploy_steps_tasks: description: Common host prep tasks for nova-compute services (compute + ironic) + # Runs as external_post_deploy_tasks value: &nova_compute_common_deploy_steps_tasks - - when: step|int == 5 - block: + - block: - name: is additonal Cell? set_fact: nova_additional_cell: {get_param: NovaAdditionalCell} - - name: discover nodes if it is not an additional cell + - name: check if discover hosts is required when: - not nova_additional_cell|bool + - nova_cellv2_discovery_done is not defined block: - name: discover via nova_compute? set_fact: - delegate_host: "{{ groups['nova_compute'][0] }}" + nova_cellv2_discovery_delegate_host: "{{ groups['nova_compute'][0] }}" when: - groups['nova_compute'] is defined and (groups['nova_compute']|length>0) - name: discover via nova_ironic? set_fact: - delegate_host: "{{ groups['nova_ironic'][0] }}" + nova_cellv2_discovery_delegate_host: "{{ groups['nova_ironic'][0] }}" when: - - delegate_host is not defined + - nova_cellv2_discovery_delegate_host is not defined - groups['nova_ironic'] is defined and (groups['nova_ironic']|length>0) - name: Discovering nova hosts command: "{{ container_cli }} exec nova_compute nova-manage cell_v2 discover_hosts --by-service" become: true - changed_when: False + changed_when: false + delegate_to: '{{ nova_cellv2_discovery_delegate_host }}' when: - - delegate_host is defined - - inventory_hostname == delegate_host + - nova_cellv2_discovery_delegate_host is defined + # Could be included multiple times so ensure it only runs once + - set_fact: + nova_cellv2_discovery_done: true diff --git a/deployment/nova/nova-compute-container-puppet.yaml b/deployment/nova/nova-compute-container-puppet.yaml index 418f1597ae..914e7e4f55 100644 --- a/deployment/nova/nova-compute-container-puppet.yaml +++ b/deployment/nova/nova-compute-container-puppet.yaml @@ -612,27 +612,8 @@ outputs: expression: str($.data.debug) data: debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']} - step_4: + step_5: map_merge: - - nova_wait_for_placement_service: - start_order: 2 - image: *nova_compute_image - user: nova - net: host - privileged: false - detach: false - volumes: - - /var/lib/container-config-scripts/:/container-config-scripts/:z - - /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova:/etc/nova:ro - command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_placement_service.py" - environment: - - list_join: - - '' - - - '__OS_DEBUG=' - - yaql: - expression: str($.data.debug) - data: - debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']} - nova_compute: start_order: 3 image: *nova_compute_image @@ -890,7 +871,7 @@ outputs: with_items: - ksm.service - ksmtuned.service - deploy_steps_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]} + external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]} upgrade_tasks: - name: Remove openstack-nova-compute and python-nova package during upgrade package: diff --git a/deployment/nova/nova-ironic-container-puppet.yaml b/deployment/nova/nova-ironic-container-puppet.yaml index d475fb9c89..dc3d4fb034 100644 --- a/deployment/nova/nova-ironic-container-puppet.yaml +++ b/deployment/nova/nova-ironic-container-puppet.yaml @@ -150,7 +150,7 @@ outputs: - - /var/lib/container-config-scripts/:/container-config-scripts/ command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_statedir_ownership.py" - step_4: + step_5: nova_compute: start_order: 100 # After the ironic services image: *nova_ironic_image @@ -216,7 +216,7 @@ outputs: name: virt_sandbox_use_netlink persistent: yes state: yes - deploy_steps_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]} + external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]} post_upgrade_tasks: - when: step|int == 1 import_role: diff --git a/releasenotes/notes/wait_for_nova_api-7af0c6db1b607216.yaml b/releasenotes/notes/wait_for_nova_api-7af0c6db1b607216.yaml new file mode 100644 index 0000000000..957e716bfd --- /dev/null +++ b/releasenotes/notes/wait_for_nova_api-7af0c6db1b607216.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + If nova-api is delayed starting then the nova_wait_for_compute_service + can timeout. A deployment using a slow/busy remote container repository is + particularly susceptible to this issue. To resolve this nova_compute and + nova_wait_for_compute_service have been postponed to step_5 and a task + has been added to step_4 to ensure nova_api is active before proceeding. + Resolves Bug `1842948 `_.