Block unsupported instance operations with accelerators.

The block is applied to primary operations, such as pause
or shelve, but not to their reverse operations, like
unpause or unshelve, because that is not necessary.

Added functional tests for various instance operations,
including those that work and those that fail.
Rebuild functional test passes.

Change-Id: I016bc1812404ce1019c71b7a3363f34acc3f8aed
Blueprint: nova-cyborg-interaction
This commit is contained in:
Sundar Nadathur 2019-08-05 21:55:01 -07:00
parent c433b1df42
commit 89dbd08976
9 changed files with 228 additions and 9 deletions

View File

@ -74,7 +74,7 @@ class EvacuateController(wsgi.Controller):
# TODO(eliqiao): Should be responding here with 202 Accept
# because evacuate is an async call, but keep to 200 for
# backwards compatibility reasons.
@wsgi.expected_errors((400, 404, 409))
@wsgi.expected_errors((400, 403, 404, 409))
@wsgi.action('evacuate')
@validation.schema(evacuate.evacuate, "2.0", "2.13")
@validation.schema(evacuate.evacuate_v214, "2.14", "2.28")
@ -144,6 +144,8 @@ class EvacuateController(wsgi.Controller):
'evacuate', id)
except exception.ComputeServiceInUse as e:
raise exc.HTTPBadRequest(explanation=e.format_message())
except exception.ForbiddenWithAccelerators as e:
raise exc.HTTPForbidden(explanation=e.format_message())
if (not api_version_request.is_supported(req, min_version='2.14') and
CONF.api.enable_instance_password):

View File

@ -74,7 +74,8 @@ class MigrateServerController(wsgi.Controller):
try:
self.compute_api.resize(req.environ['nova.context'], instance,
host_name=host_name)
except (exception.TooManyInstances, exception.QuotaError) as e:
except (exception.TooManyInstances, exception.QuotaError,
exception.ForbiddenWithAccelerators) as e:
raise exc.HTTPForbidden(explanation=e.format_message())
except (exception.InstanceIsLocked,
exception.InstanceNotReady,
@ -90,7 +91,7 @@ class MigrateServerController(wsgi.Controller):
raise exc.HTTPBadRequest(explanation=e.format_message())
@wsgi.response(202)
@wsgi.expected_errors((400, 404, 409))
@wsgi.expected_errors((400, 403, 404, 409))
@wsgi.action('os-migrateLive')
@validation.schema(migrate_server.migrate_live, "2.0", "2.24")
@validation.schema(migrate_server.migrate_live_v2_25, "2.25", "2.29")
@ -174,6 +175,8 @@ class MigrateServerController(wsgi.Controller):
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'os-migrateLive', id)
except exception.ForbiddenWithAccelerators as e:
raise exc.HTTPForbidden(explanation=e.format_message())
def _get_force_param_for_live_migration(self, body, host):
force = body["os-migrateLive"].get("force", False)

View File

@ -946,7 +946,8 @@ class ServersController(wsgi.Controller):
try:
self.compute_api.resize(context, instance, flavor_id,
auto_disk_config=auto_disk_config)
except exception.QuotaError as error:
except (exception.QuotaError,
exception.ForbiddenWithAccelerators) as error:
raise exc.HTTPForbidden(
explanation=error.format_message())
except (exception.InstanceIsLocked,
@ -1113,7 +1114,8 @@ class ServersController(wsgi.Controller):
except exception.KeypairNotFound:
msg = _("Invalid key_name provided.")
raise exc.HTTPBadRequest(explanation=msg)
except exception.QuotaError as error:
except (exception.QuotaError,
exception.ForbiddenWithAccelerators) as error:
raise exc.HTTPForbidden(explanation=error.format_message())
except (exception.AutoDiskConfigDisabledByImage,
exception.CertificateValidationFailed,

View File

@ -27,7 +27,7 @@ class SuspendServerController(wsgi.Controller):
self.compute_api = compute.API()
@wsgi.response(202)
@wsgi.expected_errors((404, 409))
@wsgi.expected_errors((403, 404, 409))
@wsgi.action('suspend')
def _suspend(self, req, id, body):
"""Permit admins to suspend the server."""
@ -44,6 +44,8 @@ class SuspendServerController(wsgi.Controller):
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'suspend', id)
except exception.ForbiddenWithAccelerators as e:
raise exc.HTTPForbidden(explanation=e.format_message())
@wsgi.response(202)
@wsgi.expected_errors((404, 409))

View File

@ -284,6 +284,16 @@ def _get_image_meta_obj(image_meta_dict):
return image_meta
def block_accelerators(func):
@functools.wraps(func)
def wrapper(self, context, instance, *args, **kwargs):
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
if dp_name:
raise exception.ForbiddenWithAccelerators()
return func(self, context, instance, *args, **kwargs)
return wrapper
@profiler.trace_cls("compute_api")
class API(base.Base):
"""API for interacting with the compute manager."""
@ -3372,6 +3382,7 @@ class API(base.Base):
if img_arch:
fields_obj.Architecture.canonicalize(img_arch)
@block_accelerators
# TODO(stephenfin): We should expand kwargs out to named args
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
@ -3850,6 +3861,7 @@ class API(base.Base):
return node
@block_accelerators
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
@check_instance_host(check_is_up=True)
@ -4046,6 +4058,7 @@ class API(base.Base):
allow_same_host = CONF.allow_resize_to_same_host
return allow_same_host
@block_accelerators
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])
@ -4210,6 +4223,7 @@ class API(base.Base):
return self.compute_rpcapi.get_instance_diagnostics(context,
instance=instance)
@block_accelerators
@reject_sev_instances(instance_actions.SUSPEND)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE])
@ -4890,6 +4904,7 @@ class API(base.Base):
diff=diff)
return _metadata
@block_accelerators
@reject_sev_instances(instance_actions.LIVE_MIGRATION)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
@ -5019,6 +5034,7 @@ class API(base.Base):
self.compute_rpcapi.live_migration_abort(context,
instance, migration.id)
@block_accelerators
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.ERROR])
def evacuate(self, context, instance, host, on_shared_storage,

View File

@ -156,6 +156,11 @@ class Forbidden(NovaException):
code = 403
class ForbiddenWithAccelerators(NovaException):
msg_fmt = _("Forbidden with instances that have accelerators.")
code = 403
class AdminRequired(Forbidden):
msg_fmt = _("User does not have admin privileges")

View File

@ -7900,7 +7900,7 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase):
extra_spec=extra_specs)
return flavor_id
def _check_allocations_usage(self, server):
def _check_allocations_usage(self, server, check_other_host_alloc=True):
# Check allocations on host where instance is running
server_uuid = server['id']
@ -7921,7 +7921,8 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase):
self.assertEqual(expected_device_alloc,
device_alloc[server_uuid])
else:
self.assertEqual({}, host_alloc)
if check_other_host_alloc:
self.assertEqual({}, host_alloc)
self.assertEqual({}, device_alloc)
# NOTE(Sundar): ARQs for an instance could come from different
@ -8088,3 +8089,154 @@ class AcceleratorServerReschedTest(AcceleratorServerBase):
[mock.call(server_uuid),
mock.call(server_uuid),
mock.call(server_uuid)])
class AcceleratorServerOpsTest(AcceleratorServerBase):
def setUp(self):
self.NUM_HOSTS = 2 # 2nd host needed for evacuate
super(AcceleratorServerOpsTest, self).setUp()
flavor_id = self._create_acc_flavor()
server_name = 'accel_server1'
self.server = self._create_server(
server_name, flavor_id=flavor_id,
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
networks='none', expected_state='ACTIVE')
def test_soft_reboot_ok(self):
params = {'reboot': {'type': 'SOFT'}}
self.api.post_server_action(self.server['id'], params)
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_hard_reboot_ok(self):
params = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(self.server['id'], params)
self._wait_for_state_change(self.server, 'HARD_REBOOT')
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_pause_unpause_ok(self):
# Pause and unpause should work with accelerators.
# This is not a general test of un/pause functionality.
self.api.post_server_action(self.server['id'], {'pause': {}})
self._wait_for_state_change(self.server, 'PAUSED')
self._check_allocations_usage(self.server)
# ARQs didn't get deleted (and so didn't have to be re-created).
self.cyborg.mock_del_arqs.assert_not_called()
self.api.post_server_action(self.server['id'], {'unpause': {}})
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_stop_start_ok(self):
# Stop and start should work with accelerators.
# This is not a general test of start/stop functionality.
self.api.post_server_action(self.server['id'], {'os-stop': {}})
self._wait_for_state_change(self.server, 'SHUTOFF')
self._check_allocations_usage(self.server)
# ARQs didn't get deleted (and so didn't have to be re-created).
self.cyborg.mock_del_arqs.assert_not_called()
self.api.post_server_action(self.server['id'], {'os-start': {}})
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_lock_unlock_ok(self):
# Lock/unlock are no-ops for accelerators.
self.api.post_server_action(self.server['id'], {'lock': {}})
server = self.api.get_server(self.server['id'])
self.assertTrue(server['locked'])
self._check_allocations_usage(self.server)
self.api.post_server_action(self.server['id'], {'unlock': {}})
server = self.api.get_server(self.server['id'])
self.assertTrue(not server['locked'])
self._check_allocations_usage(self.server)
def test_backup_ok(self):
self.api.post_server_action(self.server['id'],
{'createBackup': {
'name': 'Backup 1',
'backup_type': 'daily',
'rotation': 1}})
self._check_allocations_usage(self.server)
def test_create_image_ok(self): # snapshot
self.api.post_server_action(self.server['id'],
{'createImage': {
'name': 'foo-image',
'metadata': {'meta_var': 'meta_val'}}})
self._check_allocations_usage(self.server)
def test_rescue_unrescue_ok(self):
self.api.post_server_action(self.server['id'],
{'rescue': {
'adminPass': 'MySecretPass',
'rescue_image_ref': '70a599e0-31e7-49b7-b260-868f441e862b'}})
self._check_allocations_usage(self.server)
# ARQs didn't get deleted (and so didn't have to be re-created).
self.cyborg.mock_del_arqs.assert_not_called()
self._wait_for_state_change(self.server, 'RESCUE')
self.api.post_server_action(self.server['id'], {'unrescue': {}})
self._check_allocations_usage(self.server)
def test_resize_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'resize': {'flavorRef': '2', 'OS-DCF:diskConfig': 'AUTO'}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_suspend_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'], {'suspend': {}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_migrate_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'], {'migrate': {}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_live_migrate_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'migrate': {'host': 'accel_host1'}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_evacuate_fails(self):
server_hostname = self.server['OS-EXT-SRV-ATTR:host']
for i in range(self.NUM_HOSTS):
hostname = 'accel_host' + str(i)
if hostname != server_hostname:
other_hostname = hostname
if self.compute_services[i].host == server_hostname:
compute_to_stop = self.compute_services[i]
# Stop and force down the compute service.
compute_id = self.admin_api.get_services(
host=server_hostname, binary='nova-compute')[0]['id']
compute_to_stop.stop()
self.admin_api.put_service(compute_id, {'forced_down': 'true'})
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'evacuate': {
'host': other_hostname,
'adminPass': 'MySecretPass'}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_rebuild_fails(self):
rebuild_image_ref = fake_image.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'rebuild': {
'imageRef': rebuild_image_ref,
'OS-DCF:diskConfig': 'AUTO'}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)

View File

@ -9088,6 +9088,7 @@ class ComputeAPITestCase(BaseTestCase):
def test_rebuild_in_error_not_launched(self):
instance = self._create_fake_instance_obj(params={'image_ref': ''})
flavor = instance.flavor
self.stub_out('nova.tests.unit.image.fake._FakeImageService.show',
self.fake_show)
self.compute.build_and_run_instance(self.context, instance, {}, {}, {},
@ -9098,6 +9099,7 @@ class ComputeAPITestCase(BaseTestCase):
"launched_at": None})
instance = db.instance_get_by_uuid(self.context, instance['uuid'])
instance['flavor'] = flavor
self.assertRaises(exception.InstanceInvalidState,
self.compute_api.rebuild,

View File

@ -2057,7 +2057,7 @@ class _ComputeAPIUnitTestMixIn(object):
'user': user_count}
cur_flavor = objects.Flavor(id=1, name='foo', vcpus=1, memory_mb=512,
root_gb=10, disabled=False)
root_gb=10, disabled=False, extra_specs={})
fake_inst = self._create_instance_obj()
fake_inst.flavor = cur_flavor
new_flavor = objects.Flavor(id=2, name='bar', vcpus=1, memory_mb=2048,
@ -7214,6 +7214,41 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
mock_get_min_ver.assert_called_once_with(
self.context, ['nova-compute'])
def _test_block_accelerators(self, instance, args_info):
@compute_api.block_accelerators
def myfunc(self, context, instance, *args, **kwargs):
args_info['args'] = (context, instance, *args)
args_info['kwargs'] = dict(**kwargs)
args = ('arg1', 'arg2')
kwargs = {'arg3': 'dummy3', 'arg4': 'dummy4'}
myfunc(mock.ANY, self.context, instance, *args, **kwargs)
expected_args = (self.context, instance, *args)
return expected_args, kwargs
def test_block_accelerators_no_device_profile(self):
instance = self._create_instance_obj()
args_info = {}
expected_args, kwargs = self._test_block_accelerators(
instance, args_info)
self.assertEqual(expected_args, args_info['args'])
self.assertEqual(kwargs, args_info['kwargs'])
def test_block_accelerators_with_device_profile(self):
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
instance = self._create_instance_obj(flavor=flavor)
args_info = {}
self.assertRaisesRegex(exception.ForbiddenWithAccelerators,
'Forbidden with instances that have accelerators.',
self._test_block_accelerators, instance, args_info)
# myfunc was not called
self.assertEqual({}, args_info)
class DiffDictTestCase(test.NoDBTestCase):
"""Unit tests for _diff_dict()."""