Merge "Adding an optional startup delay to nova-compute"
This commit is contained in:
commit
1bfbc7169b
@ -74,6 +74,14 @@ parameters:
|
|||||||
type: json
|
type: json
|
||||||
tags:
|
tags:
|
||||||
- role_specific
|
- role_specific
|
||||||
|
NovaComputeStartupDelay:
|
||||||
|
default: 0
|
||||||
|
description: Delays the startup of nova-compute service after compute node is
|
||||||
|
booted. This is to give a chance to ceph to get back healthy
|
||||||
|
before booting instances after and overcloud reboot.
|
||||||
|
type: number
|
||||||
|
constraints:
|
||||||
|
- range: { min: 0, max: 600 }
|
||||||
EnableInstanceHA:
|
EnableInstanceHA:
|
||||||
default: false
|
default: false
|
||||||
description: Whether to enable an Instance Ha configurarion or not.
|
description: Whether to enable an Instance Ha configurarion or not.
|
||||||
@ -706,6 +714,11 @@ resources:
|
|||||||
|
|
||||||
conditions:
|
conditions:
|
||||||
enable_instance_ha: {equals: [{get_param: EnableInstanceHA}, true]}
|
enable_instance_ha: {equals: [{get_param: EnableInstanceHA}, true]}
|
||||||
|
compute_startup_delay:
|
||||||
|
and:
|
||||||
|
- not: {equals: [{get_param: NovaComputeStartupDelay}, 0]}
|
||||||
|
- not: enable_instance_ha
|
||||||
|
|
||||||
|
|
||||||
enable_live_migration_tunnelled:
|
enable_live_migration_tunnelled:
|
||||||
or:
|
or:
|
||||||
@ -865,6 +878,12 @@ outputs:
|
|||||||
list_join:
|
list_join:
|
||||||
- ' '
|
- ' '
|
||||||
- - if:
|
- - if:
|
||||||
|
- compute_startup_delay
|
||||||
|
- str_replace:
|
||||||
|
template: '/var/lib/nova/delay-nova-compute --delay DELAY --nova-binary'
|
||||||
|
params: { DELAY: {get_param: NovaComputeStartupDelay} }
|
||||||
|
- ''
|
||||||
|
- if:
|
||||||
- enable_instance_ha
|
- enable_instance_ha
|
||||||
- /var/lib/nova/instanceha/check-run-nova-compute
|
- /var/lib/nova/instanceha/check-run-nova-compute
|
||||||
- /usr/bin/nova-compute
|
- /usr/bin/nova-compute
|
||||||
@ -1131,6 +1150,15 @@ outputs:
|
|||||||
- name: If instance HA is enabled on the node activate the evacuation completed check
|
- name: If instance HA is enabled on the node activate the evacuation completed check
|
||||||
file: path=/var/lib/nova/instanceha/enabled state=touch
|
file: path=/var/lib/nova/instanceha/enabled state=touch
|
||||||
when: iha_nodes.stdout|lower is search('"'+ansible_hostname|lower+'"')
|
when: iha_nodes.stdout|lower is search('"'+ansible_hostname|lower+'"')
|
||||||
|
- name: Do we prepend nova startup with a delay
|
||||||
|
set_fact:
|
||||||
|
nova_compute_delay: {get_param: NovaComputeStartupDelay}
|
||||||
|
- name: install nova-compute delay wrapper script
|
||||||
|
copy:
|
||||||
|
content: {get_file: ../../scripts/delay-nova-compute}
|
||||||
|
dest: /var/lib/nova/delay-nova-compute
|
||||||
|
mode: 0755
|
||||||
|
when: nova_compute_delay|int > 0
|
||||||
- name: Is irqbalance enabled
|
- name: Is irqbalance enabled
|
||||||
set_fact:
|
set_fact:
|
||||||
compute_irqbalance_disabled: {get_attr: [RoleParametersValue, value, compute_disable_irqbalance]}
|
compute_irqbalance_disabled: {get_attr: [RoleParametersValue, value, compute_disable_irqbalance]}
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
The parameter ``NovaComputeStartupDelay`` allows the operator to delay the
|
||||||
|
startup of ``nova-compute`` after a compute node reboot.
|
||||||
|
When all the overcloud nodes are rebooted at the same time, it can take a
|
||||||
|
few minutes to the Ceph cluster to get in a healthy state. This delay will
|
||||||
|
prevent the instances from booting before the Ceph cluster is healthy.
|
@ -17,6 +17,7 @@
|
|||||||
subnet: storage_mgmt_subnet
|
subnet: storage_mgmt_subnet
|
||||||
RoleParametersDefault:
|
RoleParametersDefault:
|
||||||
TunedProfileName: "throughput-performance"
|
TunedProfileName: "throughput-performance"
|
||||||
|
NovaComputeStartupDelay: 180
|
||||||
# CephOSD present so serial has to be 1
|
# CephOSD present so serial has to be 1
|
||||||
update_serial: 1
|
update_serial: 1
|
||||||
ServicesDefault:
|
ServicesDefault:
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
VhostuserSocketGroup: "hugetlbfs"
|
VhostuserSocketGroup: "hugetlbfs"
|
||||||
NovaLibvirtRxQueueSize: 1024
|
NovaLibvirtRxQueueSize: 1024
|
||||||
NovaLibvirtTxQueueSize: 1024
|
NovaLibvirtTxQueueSize: 1024
|
||||||
|
NovaComputeStartupDelay: 180
|
||||||
ServicesDefault:
|
ServicesDefault:
|
||||||
- OS::TripleO::Services::Aide
|
- OS::TripleO::Services::Aide
|
||||||
- OS::TripleO::Services::AuditD
|
- OS::TripleO::Services::AuditD
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
subnet: storage_mgmt_subnet
|
subnet: storage_mgmt_subnet
|
||||||
RoleParametersDefault:
|
RoleParametersDefault:
|
||||||
TunedProfileName: "cpu-partitioning"
|
TunedProfileName: "cpu-partitioning"
|
||||||
|
NovaComputeStartupDelay: 180
|
||||||
# CephOSD present so serial has to be 1
|
# CephOSD present so serial has to be 1
|
||||||
update_serial: 1
|
update_serial: 1
|
||||||
ServicesDefault:
|
ServicesDefault:
|
||||||
|
45
scripts/delay-nova-compute
Normal file
45
scripts/delay-nova-compute
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/libexec/platform-python
|
||||||
|
"""
|
||||||
|
This wrapper was created to add an optional delay to the startup of nova-compute.
|
||||||
|
We know that instances will fail to boot, after a compute reboot, if ceph is not
|
||||||
|
healthy.
|
||||||
|
|
||||||
|
Ideally, we would poll ceph to get its health, but it's not guaranteed that the
|
||||||
|
compute node will have access to the keys.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||||
|
parser.add_argument('--config-file', dest='nova_config', action='store',
|
||||||
|
default="/etc/nova/nova.conf",
|
||||||
|
help='path to nova configuration (default: /etc/nova/nova.conf)')
|
||||||
|
parser.add_argument('--nova-binary', dest='nova_binary', action='store',
|
||||||
|
default="/usr/bin/nova-compute",
|
||||||
|
help='path to nova compute binary (default: /usr/bin/nova-compute)')
|
||||||
|
parser.add_argument('--delay', dest='delay', action='store',
|
||||||
|
default=120, type=int,
|
||||||
|
help='Number of seconds to wait until nova-compute is started')
|
||||||
|
parser.add_argument('--state-file', dest='state_file', action='store',
|
||||||
|
default="/run/nova-compute-delayed",
|
||||||
|
help='file exists if we already delayed nova-compute startup'\
|
||||||
|
'(default: /run/nova-compute-delayed)')
|
||||||
|
|
||||||
|
|
||||||
|
sections = {}
|
||||||
|
(args, remaining) = parser.parse_known_args(sys.argv)
|
||||||
|
|
||||||
|
real_args = [args.nova_binary, '--config-file', args.nova_config]
|
||||||
|
real_args.extend(remaining[1:])
|
||||||
|
|
||||||
|
if not os.path.isfile(args.state_file):
|
||||||
|
logging.info("Delaying nova-compute startup by %s seconds" % args.delay)
|
||||||
|
time.sleep(args.delay)
|
||||||
|
open(args.state_file, 'a').close()
|
||||||
|
|
||||||
|
logging.info("Executing %s" % real_args)
|
||||||
|
os.execv(args.nova_binary, real_args)
|
Loading…
Reference in New Issue
Block a user