Merge "Reject AZ changes during aggregate add / remove host"
This commit is contained in:
commit
7096423b34
@ -214,6 +214,9 @@ Adds a host to an aggregate.
|
||||
|
||||
Specify the ``add_host`` action and host name in the request body.
|
||||
|
||||
It is not allowed to move a host with servers between Availability Zones. Such
|
||||
request is rejected with 409 error.
|
||||
|
||||
Normal response codes: 200
|
||||
|
||||
Error response codes: badRequest(400), unauthorized(401), forbidden(403),
|
||||
@ -264,6 +267,9 @@ Removes a host from an aggregate.
|
||||
|
||||
Specify the ``remove_host`` action and host name in the request body.
|
||||
|
||||
It is not allowed to move a host with servers between Availability Zones. Such
|
||||
request is rejected with 409 error.
|
||||
|
||||
Normal response codes: 200
|
||||
|
||||
Error response codes: badRequest(400), unauthorized(401), forbidden(403),
|
||||
|
@ -32,6 +32,14 @@ availability zone where instances will be scheduled when the user fails to
|
||||
specify one. For more information on how to do this, refer to
|
||||
:doc:`/admin/availability-zones`.
|
||||
|
||||
.. note::
|
||||
|
||||
It is not allowed to move instances between Availability Zones.
|
||||
If adding a host to an aggregate or removing a host from an aggregate would
|
||||
cause an instance to move between Availability Zones, including moving
|
||||
from or moving to the default AZ, then the operation will be rejected. The
|
||||
administrator should drain the instances from the host first then the host
|
||||
can be moved.
|
||||
|
||||
.. _config-sch-for-aggs:
|
||||
|
||||
|
@ -55,6 +55,15 @@ when comparing availability zones and host aggregates:
|
||||
users to specify hosts where instances are launched in server creation.
|
||||
See `Using availability zones to select hosts`_ for more information.
|
||||
|
||||
.. note::
|
||||
|
||||
It is not allowed to move instances between Availability Zones.
|
||||
If adding a host to an aggregate or removing a host from an aggregate
|
||||
would cause an instance to move between Availability Zones, including
|
||||
moving from or moving to the default AZ, then the operation will be
|
||||
rejected. The administrator should drain the instances from the host
|
||||
first then the host can be moved.
|
||||
|
||||
In addition, other services, such as the :neutron-doc:`networking service <>`
|
||||
and the :cinder-doc:`block storage service <>`, also provide an availability
|
||||
zone feature. However, the implementation of these features differs vastly
|
||||
|
@ -219,9 +219,7 @@ class AggregateController(wsgi.Controller):
|
||||
exception.ResourceProviderUpdateConflict) as e:
|
||||
LOG.error('Failed to remove host %s from aggregate %s. Error: %s',
|
||||
host, id, str(e))
|
||||
msg = _('Cannot remove host %(host)s in aggregate %(id)s') % {
|
||||
'host': host, 'id': id}
|
||||
raise exc.HTTPConflict(explanation=msg)
|
||||
raise exc.HTTPConflict(explanation=e.format_message())
|
||||
return self._marshall_aggregate(req, aggregate)
|
||||
|
||||
@wsgi.expected_errors((400, 404))
|
||||
|
@ -6683,6 +6683,51 @@ class AggregateAPI:
|
||||
availability_zones.update_host_availability_zone_cache(context,
|
||||
host_name)
|
||||
|
||||
def ensure_no_instances_need_to_move_az_when_host_added(
|
||||
self, context, aggregate, host_name
|
||||
):
|
||||
instances = objects.InstanceList.get_by_host(context, host_name)
|
||||
if not instances:
|
||||
# if no instance then nothing moves
|
||||
return
|
||||
|
||||
new_az = aggregate.metadata.get('availability_zone')
|
||||
if not new_az:
|
||||
# if we add a host to an aggregate without AZ that cannot change
|
||||
# existing, effective AZ of the host. The host was either not
|
||||
# in any AZ and will not be in an AZ. Or the host was already in
|
||||
# an AZ but this aggregate does not challenge that as it has no AZ.
|
||||
return
|
||||
|
||||
# let's gather what is the AZ of the instances on the host before the
|
||||
# host is added to the aggregate
|
||||
aggregates = objects.AggregateList.get_by_host(context, host_name)
|
||||
az = {
|
||||
agg.metadata['availability_zone']
|
||||
for agg in aggregates
|
||||
if 'availability_zone' in agg.metadata}
|
||||
|
||||
# There can only be one or zero AZ names. Two different AZ names case
|
||||
# is already rejected by is_safe_to_update_az()
|
||||
old_az = list(az)[0] if az else None
|
||||
|
||||
# So here we know that the host is being added to a new AZ if it is
|
||||
# different from the existing, effective AZ of the host then the
|
||||
# instances on this host would need to move between AZs, that is not
|
||||
# supported. So reject it.
|
||||
if old_az != new_az:
|
||||
msg = _(
|
||||
"The host cannot be added to the aggregate as the "
|
||||
"availability zone of the host would change from '%s' to '%s' "
|
||||
"but the host already has %d instance(s). Changing the AZ of "
|
||||
"an existing instance is not supported by this action. Move "
|
||||
"the instances away from this host then try again. If you "
|
||||
"need to move the instances between AZs then you can use "
|
||||
"shelve_offload and unshelve to achieve this."
|
||||
) % (old_az, new_az, len(instances))
|
||||
self._raise_invalid_aggregate_exc(
|
||||
AGGREGATE_ACTION_ADD, aggregate.id, msg)
|
||||
|
||||
@wrap_exception()
|
||||
def add_host_to_aggregate(self, context, aggregate_id, host_name):
|
||||
"""Adds the host to an aggregate."""
|
||||
@ -6711,6 +6756,8 @@ class AggregateAPI:
|
||||
|
||||
self.is_safe_to_update_az(context, aggregate.metadata,
|
||||
hosts=[host_name], aggregate=aggregate)
|
||||
self.ensure_no_instances_need_to_move_az_when_host_added(
|
||||
context, aggregate, host_name)
|
||||
|
||||
aggregate.add_host(host_name)
|
||||
self.query_client.update_aggregates(context, [aggregate])
|
||||
@ -6746,6 +6793,54 @@ class AggregateAPI:
|
||||
|
||||
return aggregate
|
||||
|
||||
def ensure_no_instances_need_to_move_az_when_host_removed(
|
||||
self, context, aggregate, host_name
|
||||
):
|
||||
instances = objects.InstanceList.get_by_host(context, host_name)
|
||||
if not instances:
|
||||
# if no instance then nothing moves
|
||||
return
|
||||
|
||||
current_az = aggregate.metadata.get('availability_zone')
|
||||
if not current_az:
|
||||
# if we remove a host from an aggregate without AZ that cannot
|
||||
# change existing, effective AZ of the host. If the host has an AZ
|
||||
# before the removal then that is due to a different aggregate
|
||||
# membership so that does not change here. If the host has no AZ
|
||||
# before the removal then it won't have either after the removal
|
||||
# from an aggregate without az
|
||||
return
|
||||
|
||||
# let's gather what would be the AZ of the instances on the host
|
||||
# if we exclude the current aggregate.
|
||||
aggregates = objects.AggregateList.get_by_host(context, host_name)
|
||||
azs = {
|
||||
agg.metadata['availability_zone']
|
||||
for agg in aggregates
|
||||
if agg.id != aggregate.id and 'availability_zone' in agg.metadata
|
||||
}
|
||||
|
||||
# There can only be one or zero AZ names. Two different AZ names case
|
||||
# is already rejected by is_safe_to_update_az()
|
||||
new_az = list(azs)[0] if azs else None
|
||||
|
||||
# So here we know that the host is being removed from an aggregate
|
||||
# that has an AZ. So if the new AZ without this aggregate is different
|
||||
# then, that would mean the instances on this host need to change AZ.
|
||||
# That is not supported.
|
||||
if current_az != new_az:
|
||||
msg = _(
|
||||
"The host cannot be removed from the aggregate as the "
|
||||
"availability zone of the host would change from '%s' to '%s' "
|
||||
"but the host already has %d instance(s). Changing the AZ of "
|
||||
"an existing instance is not supported by this action. Move "
|
||||
"the instances away from this host then try again. If you "
|
||||
"need to move the instances between AZs then you can use "
|
||||
"shelve_offload and unshelve to achieve this."
|
||||
) % (current_az, new_az, len(instances))
|
||||
self._raise_invalid_aggregate_exc(
|
||||
AGGREGATE_ACTION_DELETE, aggregate.id, msg)
|
||||
|
||||
@wrap_exception()
|
||||
def remove_host_from_aggregate(self, context, aggregate_id, host_name):
|
||||
"""Removes host from the aggregate."""
|
||||
@ -6763,6 +6858,9 @@ class AggregateAPI:
|
||||
action=fields_obj.NotificationAction.REMOVE_HOST,
|
||||
phase=fields_obj.NotificationPhase.START)
|
||||
|
||||
self.ensure_no_instances_need_to_move_az_when_host_removed(
|
||||
context, aggregate, host_name)
|
||||
|
||||
# Remove the resource provider from the provider aggregate first before
|
||||
# we change anything on the nova side because if we did the nova stuff
|
||||
# first we can't re-attempt this from the compute API if cleaning up
|
||||
|
@ -389,13 +389,23 @@ class AggregateHostMoveTestCase(AggregateRequestFiltersTest):
|
||||
# server will be in default AZ
|
||||
self._boot_server(host=self.host)
|
||||
|
||||
# FIXME(stephenfin): This is bug #1907775, where we should reject the
|
||||
# request to add a host to an aggregate when and instance would need
|
||||
# to move between AZs
|
||||
|
||||
# The server would need to move from default AZ to custom AZ, that
|
||||
# is not OK
|
||||
self._add_host_to_aggregate('ag3-custom-az', self.host)
|
||||
ex = self.assertRaises(
|
||||
client.OpenStackApiException,
|
||||
self._add_host_to_aggregate, 'ag3-custom-az', self.host
|
||||
)
|
||||
self.assertEqual(409, ex.response.status_code)
|
||||
self.assertIn(
|
||||
"Reason: The host cannot be added to the aggregate as the "
|
||||
"availability zone of the host would change from 'None' to "
|
||||
"'custom-az' but the host already has 1 instance(s). "
|
||||
"Changing the AZ of an existing instance is not supported by this "
|
||||
"action. Move the instances away from this host then try again. "
|
||||
"If you need to move the instances between AZs then you can use "
|
||||
"shelve_offload and unshelve to achieve this.",
|
||||
str(ex)
|
||||
)
|
||||
|
||||
def test_add_host_with_instances_custom_az_then_default(self):
|
||||
self._add_host_to_aggregate('ag3-custom-az', self.host)
|
||||
@ -432,12 +442,22 @@ class AggregateHostMoveTestCase(AggregateRequestFiltersTest):
|
||||
# server will be in custom AZ
|
||||
self._boot_server(host=self.host)
|
||||
|
||||
# FIXME(stephenfin): This is bug #1907775, where we should reject the
|
||||
# request to remove a host from the aggregate when there are instances
|
||||
# on said host
|
||||
|
||||
# The server would need to move to the default AZ, that is not OK.
|
||||
self._remove_host_from_aggregate('ag3-custom-az', self.host)
|
||||
ex = self.assertRaises(
|
||||
client.OpenStackApiException,
|
||||
self._remove_host_from_aggregate, 'ag3-custom-az', self.host
|
||||
)
|
||||
self.assertEqual(409, ex.response.status_code)
|
||||
self.assertIn(
|
||||
"Reason: The host cannot be removed from the aggregate as the "
|
||||
"availability zone of the host would change from 'custom-az' to "
|
||||
"'None' but the host already has 1 instance(s). Changing the AZ "
|
||||
"of an existing instance is not supported by this action. Move "
|
||||
"the instances away from this host then try again. If you "
|
||||
"need to move the instances between AZs then you can use "
|
||||
"shelve_offload and unshelve to achieve this.",
|
||||
str(ex)
|
||||
)
|
||||
|
||||
def test_moving_host_around_az_without_instances(self):
|
||||
# host moving from default to custom AZ
|
||||
|
@ -13246,6 +13246,8 @@ class ComputeAPIAggrCallsSchedulerTestCase(test.NoDBTestCase):
|
||||
mock_get_all_by_host,
|
||||
):
|
||||
self.api.is_safe_to_update_az = mock.Mock()
|
||||
self.api.ensure_no_instances_need_to_move_az_when_host_added = (
|
||||
mock.Mock())
|
||||
self.api._update_az_cache_for_host = mock.Mock()
|
||||
agg = objects.Aggregate(name='fake', metadata={}, uuid=uuids.agg)
|
||||
agg.add_host = mock.Mock()
|
||||
@ -13274,6 +13276,8 @@ class ComputeAPIAggrCallsSchedulerTestCase(test.NoDBTestCase):
|
||||
self, update_aggregates, mock_notify, mock_remove_host,
|
||||
mock_get_all_by_host,
|
||||
):
|
||||
self.api.ensure_no_instances_need_to_move_az_when_host_removed = (
|
||||
mock.Mock())
|
||||
self.api._update_az_cache_for_host = mock.Mock()
|
||||
agg = objects.Aggregate(name='fake', metadata={}, uuid=uuids.agg)
|
||||
agg.delete_host = mock.Mock()
|
||||
|
@ -0,0 +1,12 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Nova now ensures that an instance cannot move between availability zones
|
||||
when the host of the instance is added or removed to an aggregate that is
|
||||
part of another availability zone. Moving from or to the default
|
||||
availability zone is also rejected.
|
||||
|
||||
This resolves `bug 1907775`_ where after such move the instance become
|
||||
stuck in between availability zones.
|
||||
|
||||
.. _bug 1907775: https://bugs.launchpad.net/nova/+bug/1907775
|
Loading…
Reference in New Issue
Block a user