359 lines
14 KiB
Python
359 lines
14 KiB
Python
# Copyright 2016 OpenStack Foundation
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""The 'workarounds' group is for very specific reasons.
|
|
|
|
If you're:
|
|
|
|
- Working around an issue in a system tool (e.g. libvirt or qemu) where the
|
|
fix is in flight/discussed in that community.
|
|
- The tool can be/is fixed in some distributions and rather than patch the
|
|
code those distributions can trivially set a config option to get the
|
|
"correct" behavior.
|
|
|
|
Then this is a good place for your workaround.
|
|
|
|
.. warning::
|
|
|
|
Please use with care! Document the BugID that your workaround is paired with.
|
|
"""
|
|
|
|
from oslo_config import cfg
|
|
|
|
workarounds_group = cfg.OptGroup(
|
|
'workarounds',
|
|
title='Workaround Options',
|
|
help="""
|
|
A collection of workarounds used to mitigate bugs or issues found in system
|
|
tools (e.g. Libvirt or QEMU) or Nova itself under certain conditions. These
|
|
should only be enabled in exceptional circumstances. All options are linked
|
|
against bug IDs, where more information on the issue can be found.
|
|
""")
|
|
ALL_OPTS = [
|
|
cfg.BoolOpt(
|
|
'disable_rootwrap',
|
|
default=False,
|
|
help="""
|
|
Use sudo instead of rootwrap.
|
|
|
|
Allow fallback to sudo for performance reasons.
|
|
|
|
For more information, refer to the bug report:
|
|
|
|
https://bugs.launchpad.net/nova/+bug/1415106
|
|
|
|
Possible values:
|
|
|
|
* True: Use sudo instead of rootwrap
|
|
* False: Use rootwrap as usual
|
|
|
|
Interdependencies to other options:
|
|
|
|
* Any options that affect 'rootwrap' will be ignored.
|
|
"""),
|
|
|
|
cfg.BoolOpt(
|
|
'disable_libvirt_livesnapshot',
|
|
default=False,
|
|
deprecated_for_removal=True,
|
|
deprecated_since='19.0.0',
|
|
deprecated_reason="""
|
|
This option was added to work around issues with libvirt 1.2.2. We no longer
|
|
support this version of libvirt, which means this workaround is no longer
|
|
necessary. It will be removed in a future release.
|
|
""",
|
|
help="""
|
|
Disable live snapshots when using the libvirt driver.
|
|
|
|
Live snapshots allow the snapshot of the disk to happen without an
|
|
interruption to the guest, using coordination with a guest agent to
|
|
quiesce the filesystem.
|
|
|
|
When using libvirt 1.2.2 live snapshots fail intermittently under load
|
|
(likely related to concurrent libvirt/qemu operations). This config
|
|
option provides a mechanism to disable live snapshot, in favor of cold
|
|
snapshot, while this is resolved. Cold snapshot causes an instance
|
|
outage while the guest is going through the snapshotting process.
|
|
|
|
For more information, refer to the bug report:
|
|
|
|
https://bugs.launchpad.net/nova/+bug/1334398
|
|
|
|
Possible values:
|
|
|
|
* True: Live snapshot is disabled when using libvirt
|
|
* False: Live snapshots are always used when snapshotting (as long as
|
|
there is a new enough libvirt and the backend storage supports it)
|
|
"""),
|
|
|
|
cfg.BoolOpt(
|
|
'handle_virt_lifecycle_events',
|
|
default=True,
|
|
help="""
|
|
Enable handling of events emitted from compute drivers.
|
|
|
|
Many compute drivers emit lifecycle events, which are events that occur when,
|
|
for example, an instance is starting or stopping. If the instance is going
|
|
through task state changes due to an API operation, like resize, the events
|
|
are ignored.
|
|
|
|
This is an advanced feature which allows the hypervisor to signal to the
|
|
compute service that an unexpected state change has occurred in an instance
|
|
and that the instance can be shutdown automatically. Unfortunately, this can
|
|
race in some conditions, for example in reboot operations or when the compute
|
|
service or when host is rebooted (planned or due to an outage). If such races
|
|
are common, then it is advisable to disable this feature.
|
|
|
|
Care should be taken when this feature is disabled and
|
|
'sync_power_state_interval' is set to a negative value. In this case, any
|
|
instances that get out of sync between the hypervisor and the Nova database
|
|
will have to be synchronized manually.
|
|
|
|
For more information, refer to the bug report:
|
|
https://bugs.launchpad.net/bugs/1444630
|
|
|
|
Interdependencies to other options:
|
|
|
|
* If ``sync_power_state_interval`` is negative and this feature is disabled,
|
|
then instances that get out of sync between the hypervisor and the Nova
|
|
database will have to be synchronized manually.
|
|
"""),
|
|
|
|
cfg.BoolOpt(
|
|
'disable_group_policy_check_upcall',
|
|
default=False,
|
|
help="""
|
|
Disable the server group policy check upcall in compute.
|
|
|
|
In order to detect races with server group affinity policy, the compute
|
|
service attempts to validate that the policy was not violated by the
|
|
scheduler. It does this by making an upcall to the API database to list
|
|
the instances in the server group for one that it is booting, which violates
|
|
our api/cell isolation goals. Eventually this will be solved by proper affinity
|
|
guarantees in the scheduler and placement service, but until then, this late
|
|
check is needed to ensure proper affinity policy.
|
|
|
|
Operators that desire api/cell isolation over this check should
|
|
enable this flag, which will avoid making that upcall from compute.
|
|
|
|
Related options:
|
|
|
|
* [filter_scheduler]/track_instance_changes also relies on upcalls from the
|
|
compute service to the scheduler service.
|
|
"""),
|
|
|
|
cfg.BoolOpt(
|
|
'enable_numa_live_migration',
|
|
default=False,
|
|
deprecated_for_removal=True,
|
|
deprecated_since='20.0.0',
|
|
deprecated_reason="""This option was added to mitigate known issues
|
|
when live migrating instances with a NUMA topology with the libvirt driver.
|
|
Those issues are resolved in Train. Clouds using the libvirt driver and fully
|
|
upgraded to Train support NUMA-aware live migration. This option will be
|
|
removed in a future release.
|
|
""",
|
|
help="""
|
|
Enable live migration of instances with NUMA topologies.
|
|
|
|
Live migration of instances with NUMA topologies when using the libvirt driver
|
|
is only supported in deployments that have been fully upgraded to Train. In
|
|
previous versions, or in mixed Stein/Train deployments with a rolling upgrade
|
|
in progress, live migration of instances with NUMA topologies is disabled by
|
|
default when using the libvirt driver. This includes live migration of
|
|
instances with CPU pinning or hugepages. CPU pinning and huge page information
|
|
for such instances is not currently re-calculated, as noted in `bug #1289064`_.
|
|
This means that if instances were already present on the destination host, the
|
|
migrated instance could be placed on the same dedicated cores as these
|
|
instances or use hugepages allocated for another instance. Alternately, if the
|
|
host platforms were not homogeneous, the instance could be assigned to
|
|
non-existent cores or be inadvertently split across host NUMA nodes.
|
|
|
|
Despite these known issues, there may be cases where live migration is
|
|
necessary. By enabling this option, operators that are aware of the issues and
|
|
are willing to manually work around them can enable live migration support for
|
|
these instances.
|
|
|
|
Related options:
|
|
|
|
* ``compute_driver``: Only the libvirt driver is affected.
|
|
|
|
.. _bug #1289064: https://bugs.launchpad.net/nova/+bug/1289064
|
|
"""),
|
|
|
|
cfg.BoolOpt(
|
|
'ensure_libvirt_rbd_instance_dir_cleanup',
|
|
default=False,
|
|
help="""
|
|
Ensure the instance directory is removed during clean up when using rbd.
|
|
|
|
When enabled this workaround will ensure that the instance directory is always
|
|
removed during cleanup on hosts using ``[libvirt]/images_type=rbd``. This
|
|
avoids the following bugs with evacuation and revert resize clean up that lead
|
|
to the instance directory remaining on the host:
|
|
|
|
https://bugs.launchpad.net/nova/+bug/1414895
|
|
|
|
https://bugs.launchpad.net/nova/+bug/1761062
|
|
|
|
Both of these bugs can then result in ``DestinationDiskExists`` errors being
|
|
raised if the instances ever attempt to return to the host.
|
|
|
|
.. warning:: Operators will need to ensure that the instance directory itself,
|
|
specified by ``[DEFAULT]/instances_path``, is not shared between computes
|
|
before enabling this workaround otherwise the console.log, kernels, ramdisks
|
|
and any additional files being used by the running instance will be lost.
|
|
|
|
Related options:
|
|
|
|
* ``compute_driver`` (libvirt)
|
|
* ``[libvirt]/images_type`` (rbd)
|
|
* ``instances_path``
|
|
"""),
|
|
|
|
cfg.BoolOpt(
|
|
'disable_fallback_pcpu_query',
|
|
default=False,
|
|
deprecated_for_removal=True,
|
|
deprecated_since='20.0.0',
|
|
help="""
|
|
Disable fallback request for VCPU allocations when using pinned instances.
|
|
|
|
Starting in Train, compute nodes using the libvirt virt driver can report
|
|
``PCPU`` inventory and will use this for pinned instances. The scheduler will
|
|
automatically translate requests using the legacy CPU pinning-related flavor
|
|
extra specs, ``hw:cpu_policy`` and ``hw:cpu_thread_policy``, their image
|
|
metadata property equivalents, and the emulator threads pinning flavor extra
|
|
spec, ``hw:emulator_threads_policy``, to new placement requests. However,
|
|
compute nodes require additional configuration in order to report ``PCPU``
|
|
inventory and this configuration may not be present immediately after an
|
|
upgrade. To ensure pinned instances can be created without this additional
|
|
configuration, the scheduler will make a second request to placement for
|
|
old-style ``VCPU``-based allocations and fallback to these allocation
|
|
candidates if necessary. This has a slight performance impact and is not
|
|
necessary on new or upgraded deployments where the new configuration has been
|
|
set on all hosts. By setting this option, the second lookup is disabled and the
|
|
scheduler will only request ``PCPU``-based allocations.
|
|
"""),
|
|
cfg.BoolOpt(
|
|
'never_download_image_if_on_rbd',
|
|
default=False,
|
|
help="""
|
|
When booting from an image on a ceph-backed compute node, if the image does not
|
|
already reside on the ceph cluster (as would be the case if glance is
|
|
also using the same cluster), nova will download the image from glance and
|
|
upload it to ceph itself. If using multiple ceph clusters, this may cause nova
|
|
to unintentionally duplicate the image in a non-COW-able way in the local
|
|
ceph deployment, wasting space.
|
|
|
|
For more information, refer to the bug report:
|
|
|
|
https://bugs.launchpad.net/nova/+bug/1858877
|
|
|
|
Enabling this option will cause nova to *refuse* to boot an instance if it
|
|
would require downloading the image from glance and uploading it to ceph
|
|
itself.
|
|
|
|
Related options:
|
|
|
|
* ``compute_driver`` (libvirt)
|
|
* ``[libvirt]/images_type`` (rbd)
|
|
"""),
|
|
# TODO(lyarwood): Remove this workaround in the W release once all
|
|
# supported distros have rebased to a version of libgcrypt that does not
|
|
# have the performance issues listed below.
|
|
cfg.BoolOpt(
|
|
'disable_native_luksv1',
|
|
default=False,
|
|
help="""
|
|
When attaching encrypted LUKSv1 Cinder volumes to instances the Libvirt driver
|
|
configures the encrypted disks to be natively decrypted by QEMU.
|
|
|
|
A performance issue has been discovered in the libgcrypt library used by QEMU
|
|
that serverly limits the I/O performance in this scenario.
|
|
|
|
For more information please refer to the following bug report:
|
|
|
|
RFE: hardware accelerated AES-XTS mode
|
|
https://bugzilla.redhat.com/show_bug.cgi?id=1762765
|
|
|
|
Enabling this workaround option will cause Nova to use the legacy dm-crypt
|
|
based os-brick encryptor to decrypt the LUKSv1 volume.
|
|
|
|
Note that enabling this option while using volumes that do not provide a host
|
|
block device such as Ceph will result in a failure to boot from or attach the
|
|
volume to an instance. See the ``[workarounds]/rbd_block_device`` option for a
|
|
way to avoid this for RBD.
|
|
|
|
Related options:
|
|
|
|
* ``compute_driver`` (libvirt)
|
|
* ``rbd_block_device`` (workarounds)
|
|
"""),
|
|
# TODO(lyarwood): Remove this workaround in the W release when the
|
|
# above disable_native_luksv1 configurable is removed.
|
|
cfg.BoolOpt('rbd_volume_local_attach',
|
|
default=False,
|
|
help="""
|
|
Attach RBD Cinder volumes to the compute as host block devices.
|
|
|
|
When enabled this option instructs os-brick to connect RBD volumes locally on
|
|
the compute host as block devices instead of natively through QEMU.
|
|
|
|
This workaround does not currently support extending attached volumes.
|
|
|
|
This can be used with the disable_native_luksv1 workaround configuration
|
|
option to avoid the recently discovered performance issues found within the
|
|
libgcrypt library.
|
|
|
|
This workaround is temporary and will be removed during the W release once
|
|
all impacted distributions have been able to update their versions of the
|
|
libgcrypt library.
|
|
|
|
Related options:
|
|
|
|
* ``compute_driver`` (libvirt)
|
|
* ``disable_qemu_native_luksv1`` (workarounds)
|
|
"""),
|
|
cfg.BoolOpt('reserve_disk_resource_for_image_cache',
|
|
default=False,
|
|
help="""
|
|
If it is set to True then the libvirt driver will reserve DISK_GB resource for
|
|
the images stored in the image cache. If the
|
|
:oslo.config:option:`DEFAULT.instances_path` is on different disk partition
|
|
than the image cache directory then the driver will not reserve resource for
|
|
the cache.
|
|
|
|
Such disk reservation is done by a periodic task in the resource tracker that
|
|
runs every :oslo.config:option:`update_resources_interval` seconds. So the
|
|
reservation is not updated immediately when an image is cached.
|
|
|
|
Related options:
|
|
|
|
* :oslo.config:option:`DEFAULT.instances_path`
|
|
* :oslo.config:option:`image_cache.subdirectory_name`
|
|
* :oslo.config:option:`update_resources_interval`
|
|
"""),
|
|
]
|
|
|
|
|
|
def register_opts(conf):
|
|
conf.register_group(workarounds_group)
|
|
conf.register_opts(ALL_OPTS, group=workarounds_group)
|
|
|
|
|
|
def list_opts():
|
|
return {workarounds_group: ALL_OPTS}
|