Adding an optional startup delay to nova-compute
We need an optional delay on nova-compute when it's waiting for ceph to be healthy. This commit is adding a wrapper that will be deployed when necessary. Related: https://bugzilla.redhat.com/show_bug.cgi?id=1498621 Change-Id: Ie7ad2d835c1762dc4b9341e305e6a428cb087935
This commit is contained in:
parent
91de0b33b9
commit
6eb72aa769
@ -69,6 +69,14 @@ parameters:
|
||||
type: json
|
||||
tags:
|
||||
- role_specific
|
||||
NovaComputeStartupDelay:
|
||||
default: 0
|
||||
description: Delays the startup of nova-compute service after compute node is
|
||||
booted. This is to give a chance to ceph to get back healthy
|
||||
before booting instances after and overcloud reboot.
|
||||
type: number
|
||||
constraints:
|
||||
- range: { min: 0, max: 600 }
|
||||
EnableInstanceHA:
|
||||
default: false
|
||||
description: Whether to enable an Instance Ha configurarion or not.
|
||||
@ -701,6 +709,11 @@ resources:
|
||||
|
||||
conditions:
|
||||
enable_instance_ha: {equals: [{get_param: EnableInstanceHA}, true]}
|
||||
compute_startup_delay:
|
||||
and:
|
||||
- not: {equals: [{get_param: NovaComputeStartupDelay}, 0]}
|
||||
- not: enable_instance_ha
|
||||
|
||||
|
||||
enable_live_migration_tunnelled:
|
||||
or:
|
||||
@ -860,6 +873,12 @@ outputs:
|
||||
list_join:
|
||||
- ' '
|
||||
- - if:
|
||||
- compute_startup_delay
|
||||
- str_replace:
|
||||
template: '/var/lib/nova/delay-nova-compute --delay DELAY --nova-binary'
|
||||
params: { DELAY: {get_param: NovaComputeStartupDelay} }
|
||||
- ''
|
||||
- if:
|
||||
- enable_instance_ha
|
||||
- /var/lib/nova/instanceha/check-run-nova-compute
|
||||
- /usr/bin/nova-compute
|
||||
@ -1122,6 +1141,15 @@ outputs:
|
||||
- name: If instance HA is enabled on the node activate the evacuation completed check
|
||||
file: path=/var/lib/nova/instanceha/enabled state=touch
|
||||
when: iha_nodes.stdout|lower is search('"'+ansible_hostname|lower+'"')
|
||||
- name: Do we prepend nova startup with a delay
|
||||
set_fact:
|
||||
nova_compute_delay: {get_param: NovaComputeStartupDelay}
|
||||
- name: install nova-compute delay wrapper script
|
||||
copy:
|
||||
content: {get_file: ../../scripts/delay-nova-compute}
|
||||
dest: /var/lib/nova/delay-nova-compute
|
||||
mode: 0755
|
||||
when: nova_compute_delay|int > 0
|
||||
- name: Is irqbalance enabled
|
||||
set_fact:
|
||||
compute_irqbalance_disabled: {get_attr: [RoleParametersValue, value, compute_disable_irqbalance]}
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
The parameter ``NovaComputeStartupDelay`` allows the operator to delay the
|
||||
startup of ``nova-compute`` after a compute node reboot.
|
||||
When all the overcloud nodes are rebooted at the same time, it can take a
|
||||
few minutes to the Ceph cluster to get in a healthy state. This delay will
|
||||
prevent the instances from booting before the Ceph cluster is healthy.
|
@ -17,6 +17,7 @@
|
||||
subnet: storage_mgmt_subnet
|
||||
RoleParametersDefault:
|
||||
TunedProfileName: "throughput-performance"
|
||||
NovaComputeStartupDelay: 180
|
||||
# CephOSD present so serial has to be 1
|
||||
update_serial: 1
|
||||
ServicesDefault:
|
||||
|
@ -23,6 +23,7 @@
|
||||
VhostuserSocketGroup: "hugetlbfs"
|
||||
NovaLibvirtRxQueueSize: 1024
|
||||
NovaLibvirtTxQueueSize: 1024
|
||||
NovaComputeStartupDelay: 180
|
||||
ServicesDefault:
|
||||
- OS::TripleO::Services::Aide
|
||||
- OS::TripleO::Services::AuditD
|
||||
|
@ -17,6 +17,7 @@
|
||||
subnet: storage_mgmt_subnet
|
||||
RoleParametersDefault:
|
||||
TunedProfileName: "cpu-partitioning"
|
||||
NovaComputeStartupDelay: 180
|
||||
# CephOSD present so serial has to be 1
|
||||
update_serial: 1
|
||||
ServicesDefault:
|
||||
|
45
scripts/delay-nova-compute
Normal file
45
scripts/delay-nova-compute
Normal file
@ -0,0 +1,45 @@
|
||||
#!/usr/libexec/platform-python
|
||||
"""
|
||||
This wrapper was created to add an optional delay to the startup of nova-compute.
|
||||
We know that instances will fail to boot, after a compute reboot, if ceph is not
|
||||
healthy.
|
||||
|
||||
Ideally, we would poll ceph to get its health, but it's not guaranteed that the
|
||||
compute node will have access to the keys.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||
parser.add_argument('--config-file', dest='nova_config', action='store',
|
||||
default="/etc/nova/nova.conf",
|
||||
help='path to nova configuration (default: /etc/nova/nova.conf)')
|
||||
parser.add_argument('--nova-binary', dest='nova_binary', action='store',
|
||||
default="/usr/bin/nova-compute",
|
||||
help='path to nova compute binary (default: /usr/bin/nova-compute)')
|
||||
parser.add_argument('--delay', dest='delay', action='store',
|
||||
default=120, type=int,
|
||||
help='Number of seconds to wait until nova-compute is started')
|
||||
parser.add_argument('--state-file', dest='state_file', action='store',
|
||||
default="/run/nova-compute-delayed",
|
||||
help='file exists if we already delayed nova-compute startup'\
|
||||
'(default: /run/nova-compute-delayed)')
|
||||
|
||||
|
||||
sections = {}
|
||||
(args, remaining) = parser.parse_known_args(sys.argv)
|
||||
|
||||
real_args = [args.nova_binary, '--config-file', args.nova_config]
|
||||
real_args.extend(remaining[1:])
|
||||
|
||||
if not os.path.isfile(args.state_file):
|
||||
logging.info("Delaying nova-compute startup by %s seconds" % args.delay)
|
||||
time.sleep(args.delay)
|
||||
open(args.state_file, 'a').close()
|
||||
|
||||
logging.info("Executing %s" % real_args)
|
||||
os.execv(args.nova_binary, real_args)
|
Loading…
Reference in New Issue
Block a user