Files
ironic/ironic/conf/conductor.py
Dmitry Tantsur adec0f6f01 Add a reserved workers pool (5% by default)
I've seen a situation where heartbeats managed to completely saturate
the conductor workers, so that no API requests could come through that
required interaction with the conductor (i.e. everything other than
reads). Add periodic tasks for a large (thousands) number of nodes, and
you get a completely locked up Ironic.

This change reserves 5% (configurable) of the threads for API requests.
This is done by splitting one executor into two, of which the latter is
only used by normal _spawn_worker calls and only when the former is
exhausted. This allows an operator to apply a remediation, e.g. abort
some deployments or outright power off some nodes.

Partial-Bug: #2038438
Change-Id: Iacc62d33ffccfc11694167ee2a7bc6aad82c1f2f
2023-12-07 13:47:39 +01:00

425 lines
23 KiB
Python

# Copyright 2016 Intel Corporation
# Copyright 2013 Hewlett-Packard Development Company, L.P.
# Copyright 2013 International Business Machines Corporation
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_config import cfg
from oslo_config import types
from ironic.common.i18n import _
opts = [
cfg.IntOpt('workers_pool_size',
default=300, min=3,
help=_('The size of the workers greenthread pool. '
'Note that 2 threads will be reserved by the conductor '
'itself for handling heart beats and periodic tasks. '
'On top of that, `sync_power_state_workers` will take '
'up to 7 green threads with the default value of 8.')),
cfg.IntOpt('reserved_workers_pool_percentage',
default=5, min=0, max=50,
help=_('The percentage of the whole workers pool that will be '
'kept for API requests and other important tasks. '
'This part of the pool will not be used for periodic '
'tasks or agent heartbeats. Set to 0 to disable.')),
cfg.IntOpt('heartbeat_interval',
default=10,
help=_('Seconds between conductor heart beats.')),
cfg.IntOpt('heartbeat_timeout',
default=60,
# We're using timedelta which can overflow if somebody sets this
# too high, so limit to a sane value of 10 years.
max=315576000,
mutable=True,
help=_('Maximum time (in seconds) since the last check-in '
'of a conductor. A conductor is considered inactive '
'when this time has been exceeded.')),
cfg.IntOpt('sync_power_state_interval',
default=60,
help=_('Interval between syncing the node power state to the '
'database, in seconds. Set to 0 to disable syncing.')),
cfg.IntOpt('check_provision_state_interval',
default=60,
min=0,
help=_('Interval between checks of provision timeouts, '
'in seconds. Set to 0 to disable checks.')),
cfg.IntOpt('check_rescue_state_interval',
default=60,
min=1,
help=_('Interval (seconds) between checks of rescue '
'timeouts.')),
cfg.IntOpt('check_allocations_interval',
default=60,
min=0,
help=_('Interval between checks of orphaned allocations, '
'in seconds. Set to 0 to disable checks.')),
cfg.IntOpt('cache_clean_up_interval',
default=3600, min=0,
help=_('Interval between cleaning up image caches, in seconds. '
'Set to 0 to disable periodic clean-up.')),
cfg.IntOpt('deploy_callback_timeout',
default=1800,
min=0,
help=_('Timeout (seconds) to wait for a callback from '
'a deploy ramdisk. Set to 0 to disable timeout.')),
cfg.BoolOpt('force_power_state_during_sync',
default=True,
mutable=True,
help=_('During sync_power_state, should the hardware power '
'state be set to the state recorded in the database '
'(True) or should the database be updated based on '
'the hardware state (False).')),
cfg.IntOpt('power_state_sync_max_retries',
default=3,
help=_('During sync_power_state failures, limit the '
'number of times Ironic should try syncing the '
'hardware node power state with the node power state '
'in DB')),
cfg.IntOpt('sync_power_state_workers',
default=8, min=1,
help=_('The maximum number of worker threads that can be '
'started simultaneously to sync nodes power states from '
'the periodic task.')),
cfg.IntOpt('periodic_max_workers',
default=8,
help=_('Maximum number of worker threads that can be started '
'simultaneously by a periodic task. Should be less '
'than RPC thread pool size.')),
cfg.IntOpt('node_locked_retry_attempts',
default=3,
help=_('Number of attempts to grab a node lock.')),
cfg.IntOpt('node_locked_retry_interval',
default=1,
help=_('Seconds to sleep between node lock attempts.')),
cfg.IntOpt('sync_local_state_interval',
default=180,
help=_('When conductors join or leave the cluster, existing '
'conductors may need to update any persistent '
'local state as nodes are moved around the cluster. '
'This option controls how often, in seconds, each '
'conductor will check for nodes that it should '
'"take over". Set it to 0 (or a negative value) to '
'disable the check entirely.')),
cfg.StrOpt('configdrive_swift_container',
default='ironic_configdrive_container',
help=_('Name of the Swift container to store config drive '
'data. Used when configdrive_use_object_store is '
'True.')),
cfg.IntOpt('configdrive_swift_temp_url_duration',
min=60,
help=_('The timeout (in seconds) after which a configdrive '
'temporary URL becomes invalid. Defaults to '
'deploy_callback_timeout if it is set, otherwise to '
'1800 seconds. Used when '
'configdrive_use_object_store is True.')),
cfg.IntOpt('inspect_wait_timeout',
default=1800,
min=0,
help=_('Timeout (seconds) for waiting for node inspection. '
'0 - unlimited.')),
cfg.BoolOpt('automated_clean',
default=True,
mutable=True,
help=_('Enables or disables automated cleaning. Automated '
'cleaning is a configurable set of steps, '
'such as erasing disk drives, that are performed on '
'the node to ensure it is in a baseline state and '
'ready to be deployed to. This is '
'done after instance deletion as well as during '
'the transition from a "manageable" to "available" '
'state. When enabled, the particular steps '
'performed to clean a node depend on which driver '
'that node is managed by; see the individual '
'driver\'s documentation for details. '
'NOTE: The introduction of the cleaning operation '
'causes instance deletion to take significantly '
'longer. In an environment where all tenants are '
'trusted (eg, because there is only one tenant), '
'this option could be safely disabled.')),
cfg.BoolOpt('allow_provisioning_in_maintenance',
default=True,
mutable=True,
help=_('Whether to allow nodes to enter or undergo deploy or '
'cleaning when in maintenance mode. If this option is '
'set to False, and a node enters maintenance during '
'deploy or cleaning, the process will be aborted '
'after the next heartbeat. Automated cleaning or '
'making a node available will also fail. If True '
'(the default), the process will begin and will pause '
'after the node starts heartbeating. Moving it from '
'maintenance will make the process continue.')),
cfg.IntOpt('clean_callback_timeout',
default=1800,
min=0,
help=_('Timeout (seconds) to wait for a callback from the '
'ramdisk doing the cleaning. If the timeout is reached '
'the node will be put in the "clean failed" provision '
'state. Set to 0 to disable timeout.')),
cfg.IntOpt('rescue_callback_timeout',
default=1800,
min=0,
help=_('Timeout (seconds) to wait for a callback from the '
'rescue ramdisk. If the timeout is reached the node '
'will be put in the "rescue failed" provision state. '
'Set to 0 to disable timeout.')),
cfg.IntOpt('soft_power_off_timeout',
default=600,
min=1,
mutable=True,
help=_('Timeout (in seconds) of soft reboot and soft power '
'off operation. This value always has to be positive.')),
cfg.IntOpt('power_state_change_timeout',
min=2, default=60,
mutable=True,
help=_('Number of seconds to wait for power operations to '
'complete, i.e., so that a baremetal node is in the '
'desired power state. If timed out, the power operation '
'is considered a failure.')),
cfg.IntOpt('power_failure_recovery_interval',
min=0, default=300,
help=_('Interval (in seconds) between checking the power '
'state for nodes previously put into maintenance mode '
'due to power synchronization failure. A node is '
'automatically moved out of maintenance mode once its '
'power state is retrieved successfully. Set to 0 to '
'disable this check.')),
cfg.StrOpt('conductor_group',
default='',
help=_('Name of the conductor group to join. Can be up to '
'255 characters and is case insensitive. This '
'conductor will only manage nodes with a matching '
'"conductor_group" field set on the node.')),
cfg.BoolOpt('allow_deleting_available_nodes',
default=True,
mutable=True,
help=_('Allow deleting nodes which are in state '
'\'available\'. Defaults to True.')),
cfg.BoolOpt('enable_mdns', default=False,
help=_('Whether to enable publishing the baremetal API '
'endpoint via multicast DNS.')),
cfg.StrOpt('deploy_kernel',
mutable=True,
help=_('Glance ID, http:// or file:// URL of the '
'kernel of the default deploy image.')),
cfg.StrOpt('deploy_ramdisk',
mutable=True,
help=_('Glance ID, http:// or file:// URL of the '
'initramfs of the default deploy image.')),
cfg.DictOpt('deploy_kernel_by_arch',
default={},
mutable=True,
help=_('A dictionary of key-value pairs of each architecture '
'with the Glance ID, http:// or file:// URL of the '
'kernel of the default deploy image.')),
cfg.DictOpt('deploy_ramdisk_by_arch',
default={},
mutable=True,
help=_('A dictionary of key-value pairs of each architecture '
'with the Glance ID, http:// or file:// URL of the '
'initramfs of the default deploy image.')),
cfg.StrOpt('rescue_kernel',
mutable=True,
help=_('Glance ID, http:// or file:// URL of the '
'kernel of the default rescue image.')),
cfg.StrOpt('rescue_ramdisk',
mutable=True,
help=_('Glance ID, http:// or file:// URL of the '
'initramfs of the default rescue image.')),
cfg.DictOpt('rescue_kernel_by_arch',
default={},
mutable=True,
help=_('A dictionary of key-value pairs of each architecture '
'with the Glance ID, http:// or file:// URL of the '
'kernel of the default rescue image.')),
cfg.DictOpt('rescue_ramdisk_by_arch',
default={},
mutable=True,
help=_('A dictionary of key-value pairs of each architecture '
'with the Glance ID, http:// or file:// URL of the '
'initramfs of the default rescue image.')),
cfg.StrOpt('rescue_password_hash_algorithm',
default='sha256',
choices=['sha256', 'sha512'],
mutable=True,
help=_('Password hash algorithm to be used for the rescue '
'password.')),
cfg.BoolOpt('require_rescue_password_hashed',
# TODO(TheJulia): Change this to True in Victoria.
default=False,
mutable=True,
help=_('Option to cause the conductor to not fallback to '
'an un-hashed version of the rescue password, '
'permitting rescue with older ironic-python-agent '
'ramdisks.')),
cfg.StrOpt('bootloader',
mutable=True,
help=_('Glance ID, http:// or file:// URL of the EFI system '
'partition image containing EFI boot loader. This image '
'will be used by ironic when building UEFI-bootable ISO '
'out of kernel and ramdisk. Required for UEFI boot from '
'partition images.')),
cfg.MultiOpt('clean_step_priority_override',
item_type=types.Dict(),
default={},
help=_('Priority to run automated clean steps for both '
'in-band and out of band clean steps, provided in '
'interface.step_name:priority format, e.g. '
'deploy.erase_devices_metadata:123. The option can '
'be specified multiple times to define priorities '
'for multiple steps. If set to 0, this specific step '
'will not run during cleaning. If unset for an '
'inband clean step, will use the priority set in the '
'ramdisk.')),
cfg.BoolOpt('node_history',
default=True,
mutable=True,
help=_('Boolean value, default True, if node event history '
'is to be recorded. Errors and other noteworthy '
'events in relation to a node are journaled to a '
'database table which incurs some additional load. '
'A periodic task does periodically remove entries '
'from the database. Please note, if this is disabled, '
'the conductor will continue to purge entries as '
'long as [conductor]node_history_cleanup_batch_count '
'is not 0.')),
cfg.IntOpt('node_history_max_entries',
default=300,
min=0,
mutable=True,
help=_('Maximum number of history entries which will be stored '
'in the database per node. Default is 300. This setting '
'excludes the minimum number of days retained using the '
'[conductor]node_history_minimum_days setting.')),
cfg.IntOpt('node_history_cleanup_interval',
min=0,
default=86400,
mutable=False,
help=_('Interval in seconds at which node history entries '
'can be cleaned up in the database. Setting to 0 '
'disables the periodic task. Defaults to once a day, '
'or 86400 seconds.')),
cfg.IntOpt('node_history_cleanup_batch_count',
min=0,
default=1000,
mutable=False,
help=_('The target number of node history records to purge '
'from the database when performing clean-up. '
'Deletes are performed by node, and a node with excess '
'records for a node will still be deleted. '
'Defaults to 1000. Operators who find node history '
'building up may wish to '
'lower this threshold and decrease the time between '
'cleanup operations using the '
'``node_history_cleanup_interval`` setting.')),
cfg.IntOpt('node_history_minimum_days',
min=0,
default=0,
mutable=True,
help=_('The minimum number of days to explicitly keep on '
'hand in the database history entries for nodes. '
'This is exclusive from the [conductor]'
'node_history_max_entries setting as users of '
'this setting are anticipated to need to retain '
'history by policy.')),
cfg.MultiOpt('verify_step_priority_override',
item_type=types.Dict(),
default={},
mutable=True,
help=_('Priority to run automated verify steps '
'provided in interface.step_name:priority format,'
'e.g. management.clear_job_queue:123. The option can '
'be specified multiple times to define priorities '
'for multiple steps. If set to 0, this specific step '
'will not run during verification. ')),
cfg.BoolOpt('automatic_lessee',
default=False,
mutable=True,
help=_('If the conductor should record the Project ID '
'indicated by Keystone for a requested deployment. '
'Allows rights to be granted to directly access the '
'deployed node as a lessee within the RBAC security '
'model. The conductor does *not* record this value '
'otherwise, and this information is not backfilled '
'for prior instances which have been deployed.')),
cfg.IntOpt('max_concurrent_deploy',
default=250,
min=1,
mutable=True,
help=_('The maximum number of concurrent nodes in deployment '
'which are permitted in this Ironic system. '
'If this limit is reached, new requests will be '
'rejected until the number of deployments in progress '
'is lower than this maximum. As this is a security '
'mechanism requests are not queued, and this setting '
'is a global setting applying to all requests this '
'conductor receives, regardless of access rights. '
'The concurrent deployment limit cannot be disabled.')),
cfg.IntOpt('max_concurrent_clean',
default=50,
min=1,
mutable=True,
help=_('The maximum number of concurrent nodes in cleaning '
'which are permitted in this Ironic system. '
'If this limit is reached, new requests will be '
'rejected until the number of nodes in cleaning '
'is lower than this maximum. As this is a security '
'mechanism requests are not queued, and this setting '
'is a global setting applying to all requests this '
'conductor receives, regardless of access rights. '
'The concurrent clean limit cannot be disabled.')),
cfg.BoolOpt('poweroff_in_cleanfail',
default=False,
help=_('If True power off nodes in the ``clean failed`` '
'state. Default False. Option may be unsafe '
'when using Cleaning to perform '
'hardware-transformative actions such as '
'firmware upgrade.')),
cfg.BoolOpt('poweroff_in_servicefail',
default=False,
help=_('If True power off nodes in the ``service failed`` '
'state. Default False. Option may be unsafe '
'when using service to perform '
'hardware-transformative actions such as '
'firmware upgrade.')),
cfg.BoolOpt('permit_child_node_step_async_result',
default=False,
mutable=True,
help=_('This option allows child node steps to not error if '
'the resulting step execution returned a "wait" '
'state. Under normal conditions, child nodes are not '
'expected to request a wait state. This option exists '
'for operators to use if needed to perform specific '
'tasks where this is known acceptable. Use at your'
'own risk!')),
cfg.IntOpt('max_conductor_wait_step_seconds',
default=30,
min=0,
max=1800,
mutable=True,
help=_('The maximum number of seconds which a step can '
'be requested to explicitly sleep or wait. This '
'value should be changed sparingly as it holds a '
'conductor thread and if used across many nodes at '
'once can exhaust a conductor\'s resources. This'
'capability has a hard coded maximum wait of 1800 '
'seconds, or 30 minutes. If you need to wait longer '
'than the maximum value, we recommend exploring '
'hold steps.')),
]
def register_opts(conf):
conf.register_opts(opts, group='conductor')