Merge "Revert "Limit nodes by ironic shard key""

This commit is contained in:
Zuul 2023-09-18 19:07:55 +00:00 committed by Gerrit Code Review
commit 022b1bf2a6
6 changed files with 19 additions and 152 deletions

View File

@ -82,18 +82,6 @@ Related options:
'service. Note that setting this to the empty string (``""``) '
'will match the default conductor group, and is different than '
'leaving the option unset.'),
cfg.StrOpt(
'shard',
default=None,
mutable=True,
max_length=255,
regex=r'^[a-zA-Z0-9_.-]*$',
help='Specify which ironic shard this nova-compute will manage. '
'This allows you to shard Ironic nodes between compute '
'services across conductors and conductor groups. '
'When a shard is set, the peer_list configuraton is ignored. '
'We require that there is at most one nova-compute service '
'for each shard.'),
cfg.ListOpt(
'peer_list',
deprecated_for_removal=True,

View File

@ -86,7 +86,7 @@ class IronicClientWrapperTestCase(test.NoDBTestCase):
expected = {'session': 'session',
'max_retries': CONF.ironic.api_max_retries,
'retry_interval': CONF.ironic.api_retry_interval,
'os_ironic_api_version': ['1.82', '1.38'],
'os_ironic_api_version': ['1.46', '1.38'],
'endpoint':
self.get_ksa_adapter.return_value.get_endpoint.return_value,
'interface': ['internal', 'public']}
@ -111,7 +111,7 @@ class IronicClientWrapperTestCase(test.NoDBTestCase):
expected = {'session': 'session',
'max_retries': CONF.ironic.api_max_retries,
'retry_interval': CONF.ironic.api_retry_interval,
'os_ironic_api_version': ['1.82', '1.38'],
'os_ironic_api_version': ['1.46', '1.38'],
'endpoint': None,
'region_name': CONF.ironic.region_name,
'interface': ['internal', 'public']}
@ -132,7 +132,7 @@ class IronicClientWrapperTestCase(test.NoDBTestCase):
expected = {'session': 'session',
'max_retries': CONF.ironic.api_max_retries,
'retry_interval': CONF.ironic.api_retry_interval,
'os_ironic_api_version': ['1.82', '1.38'],
'os_ironic_api_version': ['1.46', '1.38'],
'endpoint': endpoint,
'interface': ['admin']}
mock_ir_cli.assert_called_once_with(1, **expected)

View File

@ -3020,31 +3020,6 @@ class HashRingTestCase(test.NoDBTestCase):
uncalled=['host3'])
mock_can_send.assert_called_once_with(min_version='1.46')
@mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
def test__refresh_hash_ring_peer_list_shard(self, mock_can_send):
services = ['host1', 'host2', 'host3']
expected_hosts = {'host1'}
self.mock_is_up.return_value = True
self.flags(host='host1')
self.flags(shard='shard1', group='ironic')
self._test__refresh_hash_ring(services, expected_hosts,
uncalled=['host2', 'host3'])
mock_can_send.assert_not_called()
@mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
def test__refresh_hash_ring_peer_list_shard_and_cg(self, mock_can_send):
services = ['host1', 'host2', 'host3']
expected_hosts = {'host1'}
self.mock_is_up.return_value = True
self.flags(host='host1')
self.flags(shard='shard1', group='ironic')
self.flags(conductor_group='not-none', group='ironic')
# Note that this is getting ignored, because the shard is set
self.flags(peer_list=['host1', 'host2'], group='ironic')
self._test__refresh_hash_ring(services, expected_hosts,
uncalled=['host2', 'host3'])
mock_can_send.assert_not_called()
@mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
def test__refresh_hash_ring_peer_list_old_api(self, mock_can_send):
mock_can_send.side_effect = (
@ -3119,8 +3094,7 @@ class NodeCacheTestCase(test.NoDBTestCase):
def _test__refresh_cache(self, instances, nodes, hosts, mock_instances,
mock_nodes, mock_hosts, mock_hash_ring,
mock_can_send, partition_key=None,
can_send_146=True, shard=None,
can_send_182=True):
can_send_146=True):
mock_instances.return_value = instances
mock_nodes.return_value = nodes
mock_hosts.side_effect = hosts
@ -3130,18 +3104,12 @@ class NodeCacheTestCase(test.NoDBTestCase):
if not can_send_146:
mock_can_send.side_effect = (
exception.IronicAPIVersionNotAvailable(version='1.46'))
if not can_send_182:
mock_can_send.side_effect = None, (
exception.IronicAPIVersionNotAvailable(version='1.82'))
self.driver.node_cache = {}
self.driver.node_cache_time = None
kwargs = {}
if partition_key is not None and can_send_146:
kwargs['conductor_group'] = partition_key
if shard and can_send_182:
kwargs["shard"] = shard
self.driver._refresh_cache()
@ -3219,74 +3187,6 @@ class NodeCacheTestCase(test.NoDBTestCase):
expected_cache = {n.uuid: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_shard(self):
# normal operation, one compute service
instances = []
nodes = [
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
]
hosts = [self.host, self.host, self.host]
shard = "shard1"
self.flags(shard=shard, group='ironic')
self._test__refresh_cache(instances, nodes, hosts,
shard=shard)
expected_cache = {n.uuid: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_shard_and_conductor_group(self):
# normal operation, one compute service
instances = []
nodes = [
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
]
hosts = [self.host, self.host, self.host]
shard = "shard1"
self.flags(shard=shard, group='ironic')
partition_key = 'some-group'
self.flags(conductor_group=partition_key, group='ironic')
self._test__refresh_cache(instances, nodes, hosts,
shard=shard, partition_key=partition_key)
expected_cache = {n.uuid: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_shard_and_conductor_group_skip_shard(self):
# normal operation, one compute service
instances = []
nodes = [
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
_get_cached_node(
uuid=uuidutils.generate_uuid(), instance_uuid=None),
]
hosts = [self.host, self.host, self.host]
shard = "shard1"
self.flags(shard=shard, group='ironic')
partition_key = 'some-group'
self.flags(conductor_group=partition_key, group='ironic')
self._test__refresh_cache(instances, nodes, hosts,
shard=shard, partition_key=partition_key,
can_send_182=False)
expected_cache = {n.uuid: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_partition_key_old_api(self):
# normal operation, one compute service
instances = []

View File

@ -32,7 +32,7 @@ ironic = None
IRONIC_GROUP = nova.conf.ironic.ironic_group
# The API version required by the Ironic driver
IRONIC_API_VERSION = (1, 82)
IRONIC_API_VERSION = (1, 46)
# NOTE(TheJulia): This version should ALWAYS be the _last_ release
# supported version of the API version used by nova. If a feature
# needs a higher version to be negotiated to operate properly, then the version

View File

@ -702,17 +702,11 @@ class IronicDriver(virt_driver.ComputeDriver):
def _refresh_hash_ring(self, ctxt):
peer_list = None
if CONF.ironic.shard is not None:
# When requesting a shard, we assume each compute service is
# targeting a separate shard, so hard code peer_list to
# just this service
peer_list = set([CONF.host])
# NOTE(jroll) if this is set, we need to limit the set of other
# compute services in the hash ring to hosts that are currently up
# and specified in the peer_list config option, as there's no way
# to check which conductor_group other compute services are using.
if peer_list is None and CONF.ironic.conductor_group is not None:
if CONF.ironic.conductor_group is not None:
try:
# NOTE(jroll) first we need to make sure the Ironic API can
# filter by conductor_group. If it cannot, limiting to
@ -775,24 +769,21 @@ class IronicDriver(virt_driver.ComputeDriver):
# attribute. If the API isn't new enough to support conductor groups,
# we fall back to managing all nodes. If it is new enough, we can
# filter it in the API.
# NOTE(johngarbutt) similarly, if shard is set, we also limit the
# nodes that are returned by the shard key
conductor_group = CONF.ironic.conductor_group
shard = CONF.ironic.shard
kwargs = {}
try:
if conductor_group is not None:
if conductor_group is not None:
try:
self._can_send_version(min_version='1.46')
kwargs['conductor_group'] = conductor_group
if shard is not None:
self._can_send_version(min_version='1.82')
kwargs['shard'] = shard
nodes = _get_node_list(**kwargs)
except exception.IronicAPIVersionNotAvailable:
LOG.error('Required Ironic API version is not '
'available to filter nodes by conductor group '
'and shard.')
nodes = _get_node_list(**kwargs)
nodes = _get_node_list(conductor_group=conductor_group)
LOG.debug('Limiting manageable ironic nodes to conductor '
'group %s', conductor_group)
except exception.IronicAPIVersionNotAvailable:
LOG.error('Required Ironic API version 1.46 is not '
'available to filter nodes by conductor group. '
'All nodes will be eligible to be managed by '
'this compute service.')
nodes = _get_node_list()
else:
nodes = _get_node_list()
# NOTE(saga): As _get_node_list() will take a long
# time to return in large clusters we need to call it before

View File

@ -1,12 +0,0 @@
---
features:
- |
Ironic nova-compute services can now target a specific shard of ironic
nodes by setting the config ``[ironic]shard``.
This is particularly useful when using active-passive methods to choose
on which physical host your ironic nova-compute process is running,
while ensuring ``[DEFAULT]host`` stays the same for each shard.
You can use this alongside ``[ironic]conductor_group`` to further limit
which ironic nodes are managed by each nova-compute service.
Note that when you use ``[ironic]shard`` the ``[ironic]peer_list``
is hard coded to a single nova-compute service.