Browse Source

Merge "Ensure nova-api is running before starting nova-compute containers"

tags/11.3.0
Zuul 1 month ago
parent
commit
cb5a99b905

+ 108
- 0
container_config_scripts/nova_wait_for_api_service.py View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python
#
# Copyright 2018 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
from optparse import OptionParser
import os
import six
import sys
import time


from keystoneauth1 import loading
from keystoneauth1 import session

from novaclient import client
from novaclient.exceptions import ClientException

# In python3 SafeConfigParser was renamed to ConfigParser and the default
# for duplicate options default to true. In case of nova it is valid to
# have duplicate option lines, e.g. passthrough_whitelist which leads to
# issues reading the nova.conf
# https://bugs.launchpad.net/tripleo/+bug/1827775
if six.PY3:
from six.moves.configparser import ConfigParser
config = ConfigParser(strict=False)
else:
from six.moves.configparser import SafeConfigParser
config = SafeConfigParser()

debug = os.getenv('__OS_DEBUG', 'false')

if debug.lower() == 'true':
loglevel = logging.DEBUG
else:
loglevel = logging.INFO

logging.basicConfig(stream=sys.stdout, level=loglevel)
LOG = logging.getLogger('nova_wait_for_api_service')

iterations = 60
timeout = 10
nova_cfg = '/etc/nova/nova.conf'

if __name__ == '__main__':
parser = OptionParser(usage="usage: %prog [options]")
parser.add_option('-k', '--insecure',
action="store_false",
dest='insecure',
default=True,
help='Allow insecure connection when using SSL')

(options, args) = parser.parse_args()
LOG.debug('Running with parameter insecure = %s',
options.insecure)

if os.path.isfile(nova_cfg):
try:
config.read(nova_cfg)
except Exception:
LOG.exception('Error while reading nova.conf:')
else:
LOG.error('Nova configuration file %s does not exist', nova_cfg)
sys.exit(1)

loader = loading.get_plugin_loader('password')
auth = loader.load_from_options(
auth_url=config.get('neutron',
'auth_url'),
username=config.get('neutron',
'username'),
password=config.get('neutron',
'password'),
project_name=config.get('neutron',
'project_name'),
project_domain_name=config.get('neutron',
'project_domain_name'),
user_domain_name=config.get('neutron',
'user_domain_name'))
sess = session.Session(auth=auth, verify=options.insecure)

# Wait until this host is listed in the service list
for i in range(iterations):
try:
nova = client.Client('2.11', session=sess, endpoint_type='internal')
nova.versions.list()
LOG.info('Nova-api service active')
sys.exit(0)
except ClientException:
LOG.info('Waiting for nova-api service')
except Exception:
LOG.exception(
'Error while waiting for nova-api service')
time.sleep(timeout)
sys.exit(1)

# vim: set et ts=4 sw=4 :

container_config_scripts/nova_wait_for_placement_service.py → container_config_scripts/placement_wait_for_service.py View File


+ 77
- 47
deployment/nova/nova-api-container-puppet.yaml View File

@@ -272,54 +272,59 @@ outputs:
owner: nova:nova
recurse: true
container_config_scripts:
nova_api_ensure_default_cell.sh:
mode: "0700"
content:
str_replace:
template: |
#!/bin/bash
DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}')
if [ "$DEFID" ]; then
echo "(cellv2) Updating default cell_v2 cell $DEFID"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
else
echo "(cellv2) Creating default cell_v2 cell"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
fi
params:
CELLDB:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
-
if:
- mysql_ipv6_use_ip_address
- '[{hostname}]'
map_merge:
- {get_attr: [ContainersCommon, container_config_scripts]}
- nova_wait_for_api_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/nova_wait_for_api_service.py }
nova_api_ensure_default_cell.sh:
mode: "0700"
content:
str_replace:
template: |
#!/bin/bash
DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}')
if [ "$DEFID" ]; then
echo "(cellv2) Updating default cell_v2 cell $DEFID"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
else
echo "(cellv2) Creating default cell_v2 cell"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
fi
params:
CELLDB:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
-
if:
- mysql_ipv6_use_ip_address
- '[{hostname}]'
- '{hostname}'
- '/'
- 'nova'
- '?'
- '{query}'
TRANSPORTURL:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
- '{hostname}'
- '/'
- 'nova'
- '?'
- '{query}'
TRANSPORTURL:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
- '{hostname}'
- ':'
- '{port}'
- '/'
- '?'
- '{query}'
- ':'
- '{port}'
- '/'
- '?'
- '{query}'
docker_config:
step_2:
get_attr: [NovaApiLogging, docker_config, step_2]
@@ -409,7 +414,32 @@ outputs:
- ''
environment:
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
nova_wait_for_api_service:
start_order: 3
image: *nova_api_image
user: root
net: host
privileged: false
detach: false
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
-
- /var/lib/config-data/nova/etc/my.cnf.d/:/etc/my.cnf.d/:ro
- /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro
- /var/log/containers/nova:/var/log/nova
- /var/lib/container-config-scripts/:/container-config-scripts/:z
command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_api_service.py'"
environment:
- list_join:
- ''
- - '__OS_DEBUG='
- yaql:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
nova_api_cron:
start_order: 4
image: *nova_api_image
net: host
user: root

+ 13
- 12
deployment/nova/nova-compute-common-container-puppet.yaml View File

@@ -44,40 +44,41 @@ outputs:
nova_statedir_ownership.py:
mode: "0700"
content: { get_file: ../../container_config_scripts/nova_statedir_ownership.py }
nova_wait_for_placement_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/nova_wait_for_placement_service.py }
nova_wait_for_compute_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/nova_wait_for_compute_service.py }

nova_compute_common_deploy_steps_tasks:
description: Common host prep tasks for nova-compute services (compute + ironic)
# Runs as external_post_deploy_tasks
value: &nova_compute_common_deploy_steps_tasks
- when: step|int == 5
block:
- block:
- name: is additonal Cell?
set_fact:
nova_additional_cell: {get_param: NovaAdditionalCell}
- name: discover nodes if it is not an additional cell
- name: check if discover hosts is required
when:
- not nova_additional_cell|bool
- nova_cellv2_discovery_done is not defined
block:
- name: discover via nova_compute?
set_fact:
delegate_host: "{{ groups['nova_compute'][0] }}"
nova_cellv2_discovery_delegate_host: "{{ groups['nova_compute'][0] }}"
when:
- groups['nova_compute'] is defined and (groups['nova_compute']|length>0)
- name: discover via nova_ironic?
set_fact:
delegate_host: "{{ groups['nova_ironic'][0] }}"
nova_cellv2_discovery_delegate_host: "{{ groups['nova_ironic'][0] }}"
when:
- delegate_host is not defined
- nova_cellv2_discovery_delegate_host is not defined
- groups['nova_ironic'] is defined and (groups['nova_ironic']|length>0)
- name: Discovering nova hosts
command: "{{ container_cli }} exec nova_compute nova-manage cell_v2 discover_hosts --by-service"
become: true
changed_when: False
changed_when: false
delegate_to: '{{ nova_cellv2_discovery_delegate_host }}'
when:
- delegate_host is defined
- inventory_hostname == delegate_host
- nova_cellv2_discovery_delegate_host is defined
# Could be included multiple times so ensure it only runs once
- set_fact:
nova_cellv2_discovery_done: true

+ 25
- 45
deployment/nova/nova-compute-container-puppet.yaml View File

@@ -654,27 +654,8 @@ outputs:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
step_4:
step_5:
map_merge:
- nova_wait_for_placement_service:
start_order: 2
image: *nova_compute_image
user: nova
net: host
privileged: false
detach: false
volumes:
- /var/lib/container-config-scripts/:/container-config-scripts/:z
- /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova:/etc/nova:ro
command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_placement_service.py"
environment:
- list_join:
- ''
- - '__OS_DEBUG='
- yaql:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
- nova_compute:
start_order: 3
image: *nova_compute_image
@@ -939,31 +920,30 @@ outputs:
- ksm.service
- ksmtuned.service
deploy_steps_tasks:
list_concat:
- {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
- - name: validate nova compute container state
when:
- container_cli == 'podman'
- not container_healthcheck_disabled
- step|int == 5
- false
tags:
- opendev-validation
- opendev-validation-nova
block:
- name: Get nova-compute healthcheck status
register: nova_compute_healthcheck_state
systemd:
name: tripleo_nova_compute_healthcheck
retries: 10
delay: 30
until: nova_compute_healthcheck_state.status.ExecMainPID != '0' and
nova_compute_healthcheck_state.status.ActiveState in ['inactive', 'failed']
ignore_errors: yes
- name: Fail if nova-compute healthcheck report failed status
fail:
msg: nova-compute isn't working (healthcheck failed)
when: nova_compute_healthcheck_state.status.ExecMainStatus != '0'
- name: validate nova compute container state
when:
- container_cli == 'podman'
- not container_healthcheck_disabled
- step|int == 6 #FIXME: there is no step6
- false
tags:
- opendev-validation
- opendev-validation-nova
block:
- name: Get nova-compute healthcheck status
register: nova_compute_healthcheck_state
systemd:
name: tripleo_nova_compute_healthcheck
retries: 10
delay: 30
until: nova_compute_healthcheck_state.status.ExecMainPID != '0' and
nova_compute_healthcheck_state.status.ActiveState in ['inactive', 'failed']
ignore_errors: yes
- name: Fail if nova-compute healthcheck report failed status
fail:
msg: nova-compute isn't working (healthcheck failed)
when: nova_compute_healthcheck_state.status.ExecMainStatus != '0'
external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
upgrade_tasks:
- name: Remove openstack-nova-compute and python-nova package during upgrade
package:

+ 2
- 2
deployment/nova/nova-ironic-container-puppet.yaml View File

@@ -153,7 +153,7 @@ outputs:
-
- /var/lib/container-config-scripts/:/container-config-scripts/
command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_statedir_ownership.py"
step_4:
step_5:
nova_compute:
start_order: 100 # After the ironic services
image: *nova_ironic_image
@@ -225,7 +225,7 @@ outputs:
name: virt_sandbox_use_netlink
persistent: yes
state: yes
deploy_steps_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
post_upgrade_tasks:
- when: step|int == 1
import_role:

+ 29
- 0
deployment/placement/placement-api-container-puppet.yaml View File

@@ -68,6 +68,10 @@ parameters:
description: The password for the nova service and db account
type: string
hidden: true
Debug:
type: boolean
default: false
description: Set to True to enable debugging on all services.

conditions:
placement_workers_zero: {equals : [{get_param: PlacementWorkers}, 0]}
@@ -190,6 +194,12 @@ outputs:
- path: /var/log/placement
owner: placement:placement
recurse: true
container_config_scripts:
map_merge:
- {get_attr: [ContainersCommon, container_config_scripts]}
- placement_wait_for_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/placement_wait_for_service.py }
docker_config:
step_2:
get_attr: [PlacementLogging, docker_config, step_2]
@@ -280,6 +290,25 @@ outputs:
- ''
environment:
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
placement_wait_for_service:
start_order: 2
image: *placement_api_image
user: root
net: host
privileged: false
detach: false
volumes:
- /var/lib/container-config-scripts/:/container-config-scripts/:z
- /var/lib/config-data/puppet-generated/placement/:/var/lib/kolla/config_files/src:ro
command: "/usr/bin/bootstrap_host_exec placement su placement -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/placement_wait_for_service.py'"
environment:
- list_join:
- ''
- - '__OS_DEBUG='
- yaql:
expression: str($.data.debug)
data:
debug: {get_param: Debug}
host_prep_tasks: {get_attr: [PlacementLogging, host_prep_tasks]}
upgrade_tasks: []
post_upgrade_tasks:

+ 9
- 0
releasenotes/notes/wait_for_nova_api-7af0c6db1b607216.yaml View File

@@ -0,0 +1,9 @@
---
fixes:
- |
If nova-api is delayed starting then the nova_wait_for_compute_service
can timeout. A deployment using a slow/busy remote container repository is
particularly susceptible to this issue. To resolve this nova_compute and
nova_wait_for_compute_service have been postponed to step_5 and a task
has been added to step_4 to ensure nova_api is active before proceeding.
Resolves Bug `1842948 <https://bugs.launchpad.net/tripleo/+bug/1842948>`_.

Loading…
Cancel
Save