Avoid concurrent nova cell_v2 discovery instances

The nova_cell_v2_discover_hosts.py was moved to run on compute
nodes instead of controllers to allow adding computes without
touching controllers and in case multiple stacks are used to
manage compute nodes. In case the nova-manage command, run by
nova_cell_v2_discover_hosts.py, gets triggered at the same time
on compute nodes races.

With this change if this is _not_ an additional cell:
* in docker_config step4, on every compute, we start the nova-compute
  container and then start a (detach=false) container to wait for
  it's service to appear in the service list.
* in docker_config step5, on the bootstrap node only, we run the
  discovery.

Change-Id: I1a159a7c2ac286373df2b7c566426b37b7734961
Closes-bug: 1824445
Co-authored-by: mschuppert@redhat.com
(cherry picked from commit 908e6b9810)
(cherry picked from commit 8fd58b4b87)
(cherry picked from commit cf6ae21fc9)
This commit is contained in:
Oliver Walsh 2019-04-12 11:42:07 +01:00 committed by Martin Schuppert
parent 3f6ef5c74e
commit d8e4ee655e
5 changed files with 164 additions and 86 deletions

View File

@ -41,8 +41,11 @@ outputs:
mode: "0700"
content: { get_file: ../../docker_config_scripts/nova_statedir_ownership.py }
nova_wait_for_placement_service.py:
mode: "0700"
mode: "0755"
content: { get_file: ../../docker_config_scripts/nova_wait_for_placement_service.py }
nova_cell_v2_discover_host.py:
mode: "0700"
content: { get_file: ../../docker_config_scripts/nova_cell_v2_discover_host.py }
nova_cell_v2_discover_hosts.py:
mode: "0755"
content: { get_file: ../../docker_config_scripts/nova_cell_v2_discover_hosts.py }
nova_wait_for_compute_service.py:
mode: "0755"
content: { get_file: ../../docker_config_scripts/nova_wait_for_compute_service.py }

View File

@ -195,60 +195,76 @@ outputs:
- - 'TRIPLEO_DEPLOY_IDENTIFIER='
- {get_param: DeployIdentifier}
step_4:
nova_wait_for_placement_service:
start_order: 2
image: *nova_compute_image
user: root
net: host
privileged: false
detach: false
volumes:
- /var/lib/docker-config-scripts/:/docker-config-scripts/
- /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova:/etc/nova:ro
command: "/docker-config-scripts/nova_wait_for_placement_service.py"
nova_compute:
start_order: 3
image: *nova_compute_image
ulimit: {get_param: DockerNovaComputeUlimit}
ipc: host
net: host
privileged: true
user: nova
restart: always
healthcheck:
test:
list_join:
- ' '
- - '/openstack/healthcheck'
- yaql:
expression: str($.data.port)
data:
port: {get_attr: [NovaComputeBase, role_data, config_settings, 'nova::rabbit_port']}
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
- {get_attr: [NovaLogging, volumes]}
- {get_param: NovaComputeOptVolumes}
-
- /var/lib/kolla/config_files/nova_compute.json:/var/lib/kolla/config_files/config.json:ro
- /var/lib/config-data/puppet-generated/nova_libvirt/:/var/lib/kolla/config_files/src:ro
- /etc/iscsi:/var/lib/kolla/config_files/src-iscsid:ro
- /etc/ceph:/var/lib/kolla/config_files/src-ceph:ro
- /dev:/dev
- /lib/modules:/lib/modules:ro
- /run:/run
- /var/lib/iscsi:/var/lib/iscsi
- /var/lib/nova:/var/lib/nova:shared
- /var/lib/libvirt:/var/lib/libvirt
- /sys/class/net:/sys/class/net
- /sys/bus/pci:/sys/bus/pci
environment:
list_concat:
- {get_param: NovaComputeOptEnvVars}
-
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
map_merge:
- nova_wait_for_placement_service:
start_order: 2
image: *nova_compute_image
user: nova
net: host
privileged: false
detach: false
volumes:
- /var/lib/docker-config-scripts/:/docker-config-scripts/
- /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova:/etc/nova:ro
command: "/docker-config-scripts/nova_wait_for_placement_service.py"
- nova_compute:
start_order: 3
image: *nova_compute_image
ulimit: {get_param: DockerNovaComputeUlimit}
ipc: host
net: host
privileged: true
user: nova
restart: always
healthcheck:
test:
list_join:
- ' '
- - '/openstack/healthcheck'
- yaql:
expression: str($.data.port)
data:
port: {get_attr: [NovaComputeBase, role_data, config_settings, 'nova::rabbit_port']}
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
- {get_attr: [NovaLogging, volumes]}
- {get_param: NovaComputeOptVolumes}
-
- /var/lib/kolla/config_files/nova_compute.json:/var/lib/kolla/config_files/config.json:ro
- /var/lib/config-data/puppet-generated/nova_libvirt/:/var/lib/kolla/config_files/src:ro
- /etc/iscsi:/var/lib/kolla/config_files/src-iscsid:ro
- /etc/ceph:/var/lib/kolla/config_files/src-ceph:ro
- /dev:/dev
- /lib/modules:/lib/modules:ro
- /run:/run
- /var/lib/iscsi:/var/lib/iscsi
- /var/lib/nova:/var/lib/nova:shared
- /var/lib/libvirt:/var/lib/libvirt
- /sys/class/net:/sys/class/net
- /sys/bus/pci:/sys/bus/pci
environment:
list_concat:
- {get_param: NovaComputeOptEnvVars}
-
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
- nova_wait_for_compute_service:
start_order: 4
image: *nova_compute_image
net: host
detach: false
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
-
- /var/lib/config-data/nova_libvirt/etc/my.cnf.d/:/etc/my.cnf.d/:ro
- /var/lib/config-data/nova_libvirt/etc/nova/:/etc/nova/:ro
- /var/log/containers/nova:/var/log/nova
- /var/lib/docker-config-scripts/:/docker-config-scripts/
user: nova
command: "/docker-config-scripts/nova_wait_for_compute_service.py"
step_5:
nova_cellv2_discover_hosts:
nova_cell_v2_discover_hosts:
start_order: 0
image: *nova_compute_image
net: host
@ -262,7 +278,7 @@ outputs:
- /var/log/containers/nova:/var/log/nova
- /var/lib/docker-config-scripts/:/docker-config-scripts/
user: root
command: "/docker-config-scripts/nova_cell_v2_discover_host.py"
command: "su nova -s /bin/bash -c '/docker-config-scripts/nova_cell_v2_discover_hosts.py'"
host_prep_tasks:
list_concat:
- {get_attr: [NovaLogging, host_prep_tasks]}

View File

@ -114,8 +114,9 @@ outputs:
- /var/lib/nova:/var/lib/nova:shared
- /var/lib/docker-config-scripts/:/docker-config-scripts/
command: "/docker-config-scripts/nova_statedir_ownership.py"
step_5:
step_4:
nova_compute:
start_order: 100 # After the ironic services
image: *nova_ironic_image
net: host
privileged: true
@ -144,8 +145,8 @@ outputs:
- /var/log/containers/nova:/var/log/nova
environment:
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
nova_cellv2_discover_hosts:
start_order: 1
nova_wait_for_compute_service:
start_order: 101
image: *nova_ironic_image
net: host
detach: false
@ -158,7 +159,23 @@ outputs:
- /var/log/containers/nova:/var/log/nova
- /var/lib/docker-config-scripts/:/docker-config-scripts/
user: root
command: "/docker-config-scripts/nova_cell_v2_discover_host.py"
command: "/docker-config-scripts/nova_wait_for_compute_service.py"
step_5:
nova_cell_v2_discover_hosts:
start_order: 0
image: *nova_ironic_image
net: host
detach: false
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
-
- /var/lib/config-data/nova/etc/my.cnf.d/:/etc/my.cnf.d/:ro
- /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro
- /var/log/containers/nova:/var/log/nova
- /var/lib/docker-config-scripts/:/docker-config-scripts/
user: root
command: "su nova -s /bin/bash -c '/docker-config-scripts/nova_cell_v2_discover_hosts.py'"
host_prep_tasks:
- name: create persistent directories
file:

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
#
# Copyright 2018 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import random
import subprocess
import sys
import time
random.seed()
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
LOG = logging.getLogger('nova_cell_v2_discover_hosts')
iterations = 10
timeout_max = 30
nova_cfg = '/etc/nova/nova.conf'
if __name__ == '__main__':
if not os.path.isfile(nova_cfg):
LOG.error('Nova configuration file %s does not exist', nova_cfg)
sys.exit(1)
for i in range(iterations):
try:
subprocess.check_call([
'/usr/bin/nova-manage',
'cell_v2',
'discover_hosts',
'--by-service',
'--verbose'
])
sys.exit(0)
except subprocess.CalledProcessError as e:
LOG.error('Cell v2 discovery failed with exit code %d, retrying',
e.returncode)
except Exception as e:
LOG.exception('Error during host discovery')
time.sleep(random.randint(1, timeout_max))
sys.exit(1)
# vim: set et ts=4 sw=4 :

View File

@ -13,13 +13,10 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import logging
from optparse import OptionParser
import os
import socket
import subprocess
import sys
import time
@ -32,8 +29,10 @@ from novaclient import client
from six.moves.configparser import SafeConfigParser
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
LOG = logging.getLogger('nova_cell_v2_discover_host')
LOG = logging.getLogger('nova_wait_for_compute_service')
iterations = 60
timeout = 10
nova_cfg = '/etc/nova/nova.conf'
if __name__ == '__main__':
@ -77,33 +76,21 @@ if __name__ == '__main__':
sess = session.Session(auth=auth, verify=options.insecure)
nova = client.Client('2.11', session=sess, endpoint_type='internal')
# Wait until this host is listed in the service list then
# run cellv2 host discovery
retries = 10
for i in range(retries):
# Wait until this host is listed in the service list
for i in range(iterations):
try:
service_list = nova.services.list(binary='nova-compute')
for entry in service_list:
host = getattr(entry, 'host', '')
zone = getattr(entry, 'zone', '')
if host == my_host and zone != 'internal':
LOG.info('(cellv2) Service registered, running discovery')
sys.exit(subprocess.call([
'/usr/bin/nova-manage',
'cell_v2',
'discover_hosts',
'--by-service',
'--verbose'
]))
if len(service_list) == 0:
LOG.warning('(cellv2) no nova-compute service registered' +
' after %i checks', i)
LOG.info('(cellv2) Waiting for service to register')
except subprocess.CalledProcessError:
LOG.info('(cellv2) Retrying')
LOG.info('Nova-compute service registered')
sys.exit(0)
LOG.info('Waiting for nova-compute service to register')
except Exception as e:
LOG.exception('Error during host discovery:')
time.sleep(30)
LOG.exception(
'Error while waiting for nova-compute service to register')
time.sleep(timeout)
sys.exit(1)
# vim: set et ts=4 sw=4 :