OpenStack Compute (Nova)
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

341 wiersze
16KB

  1. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  2. # not use this file except in compliance with the License. You may obtain
  3. # a copy of the License at
  4. #
  5. # http://www.apache.org/licenses/LICENSE-2.0
  6. #
  7. # Unless required by applicable law or agreed to in writing, software
  8. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  9. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  10. # License for the specific language governing permissions and limitations
  11. # under the License.
  12. import os_resource_classes as orc
  13. import os_traits
  14. import six
  15. from nova import context as nova_context
  16. from nova import exception
  17. from nova import objects
  18. from nova.tests.functional.api import client as api_client
  19. from nova.tests.functional import integrated_helpers
  20. from nova.tests.unit.image import fake as fake_image
  21. from nova import utils
  22. class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
  23. compute_driver = 'fake.SmallFakeDriver'
  24. def test_compute_service_delete_ensure_related_cleanup(self):
  25. """Tests deleting a compute service and the related cleanup associated
  26. with that like the compute_nodes table entry, removing the host
  27. from any aggregates, the host mapping in the API DB and the associated
  28. resource provider in Placement.
  29. """
  30. compute = self._start_compute('host1')
  31. # Make sure our compute host is represented as expected.
  32. services = self.admin_api.get_services(binary='nova-compute')
  33. self.assertEqual(1, len(services))
  34. service = services[0]
  35. # Now create a host aggregate and add our host to it.
  36. aggregate = self.admin_api.post_aggregate(
  37. {'aggregate': {'name': 'agg1'}})
  38. self.admin_api.add_host_to_aggregate(aggregate['id'], service['host'])
  39. # Make sure the host is in the aggregate.
  40. aggregate = self.admin_api.api_get(
  41. '/os-aggregates/%s' % aggregate['id']).body['aggregate']
  42. self.assertEqual([service['host']], aggregate['hosts'])
  43. rp_uuid = self._get_provider_uuid_by_host(service['host'])
  44. # We'll know there is a host mapping implicitly if os-hypervisors
  45. # returned something in _get_provider_uuid_by_host, but let's also
  46. # make sure the host mapping is there like we expect.
  47. ctxt = nova_context.get_admin_context()
  48. objects.HostMapping.get_by_host(ctxt, service['host'])
  49. # Make sure there is a resource provider for that compute node based
  50. # on the uuid.
  51. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
  52. self.assertEqual(200, resp.status)
  53. # Make sure the resource provider has inventory.
  54. inventories = self._get_provider_inventory(rp_uuid)
  55. # Expect a minimal set of inventory for the fake virt driver.
  56. for resource_class in [orc.VCPU, orc.MEMORY_MB, orc.DISK_GB]:
  57. self.assertIn(resource_class, inventories)
  58. # Now create a server so that the resource provider has some allocation
  59. # records.
  60. flavor = self.api.get_flavors()[0]
  61. server = self._boot_and_check_allocations(flavor, service['host'])
  62. # Now the fun part, delete the compute service and make sure related
  63. # resources are cleaned up, like the compute node, host mapping, and
  64. # resource provider. We have to first stop the compute service so
  65. # it doesn't recreate the compute node during the
  66. # update_available_resource periodic task.
  67. self.admin_api.put_service(service['id'], {'forced_down': True})
  68. compute.stop()
  69. # The first attempt should fail since there is an instance on the
  70. # compute host.
  71. ex = self.assertRaises(api_client.OpenStackApiException,
  72. self.admin_api.api_delete,
  73. '/os-services/%s' % service['id'])
  74. self.assertIn('Unable to delete compute service that is hosting '
  75. 'instances.', six.text_type(ex))
  76. self.assertEqual(409, ex.response.status_code)
  77. # Now delete the instance and wait for it to be gone.
  78. self._delete_and_check_allocations(server)
  79. # Now we can delete the service.
  80. self.admin_api.api_delete('/os-services/%s' % service['id'])
  81. # Make sure the service is deleted.
  82. services = self.admin_api.get_services(binary='nova-compute')
  83. self.assertEqual(0, len(services))
  84. # Make sure the host was removed from the aggregate.
  85. aggregate = self.admin_api.api_get(
  86. '/os-aggregates/%s' % aggregate['id']).body['aggregate']
  87. self.assertEqual([], aggregate['hosts'])
  88. # Trying to get the hypervisor should result in a 404.
  89. self.admin_api.api_get(
  90. 'os-hypervisors?hypervisor_hostname_pattern=%s' % service['host'],
  91. check_response_status=[404])
  92. # The host mapping should also be gone.
  93. self.assertRaises(exception.HostMappingNotFound,
  94. objects.HostMapping.get_by_host,
  95. ctxt, service['host'])
  96. # And finally, the resource provider should also be gone. The API
  97. # will perform a cascading delete of the resource provider inventory
  98. # and allocation information.
  99. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
  100. self.assertEqual(404, resp.status)
  101. def test_evacuate_then_delete_compute_service(self):
  102. """Tests a scenario where a server is created on a host, the host
  103. goes down, the server is evacuated to another host, and then the
  104. source host compute service is deleted. After that the deleted
  105. compute service is restarted. Related placement resources are checked
  106. throughout.
  107. """
  108. # Create our source host that we will evacuate *from* later.
  109. host1 = self._start_compute('host1')
  110. # Create a server which will go on host1 since it is the only host.
  111. flavor = self.api.get_flavors()[0]
  112. server = self._boot_and_check_allocations(flavor, 'host1')
  113. # Get the compute service record for host1 so we can manage it.
  114. service = self.admin_api.get_services(
  115. binary='nova-compute', host='host1')[0]
  116. # Get the corresponding resource provider uuid for host1.
  117. rp_uuid = self._get_provider_uuid_by_host(service['host'])
  118. # Make sure there is a resource provider for that compute node based
  119. # on the uuid.
  120. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
  121. self.assertEqual(200, resp.status)
  122. # Down the compute service for host1 so we can evacuate from it.
  123. self.admin_api.put_service(service['id'], {'forced_down': True})
  124. host1.stop()
  125. # Start another host and trigger the server evacuate to that host.
  126. self._start_compute('host2')
  127. self.admin_api.post_server_action(server['id'], {'evacuate': {}})
  128. # The host does not change until after the status is changed to ACTIVE
  129. # so wait for both parameters.
  130. self._wait_for_server_parameter(
  131. self.admin_api, server, {'status': 'ACTIVE',
  132. 'OS-EXT-SRV-ATTR:host': 'host2'})
  133. # Delete the compute service for host1 and check the related
  134. # placement resources for that host.
  135. self.admin_api.api_delete('/os-services/%s' % service['id'])
  136. # Make sure the service is gone.
  137. services = self.admin_api.get_services(
  138. binary='nova-compute', host='host1')
  139. self.assertEqual(0, len(services), services)
  140. # FIXME(mriedem): This is bug 1829479 where the compute service is
  141. # deleted but the resource provider is not because there are still
  142. # allocations against the provider from the evacuated server.
  143. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
  144. self.assertEqual(200, resp.status)
  145. self.assertFlavorMatchesUsage(rp_uuid, flavor)
  146. # Try to restart the host1 compute service to create a new resource
  147. # provider.
  148. self.restart_compute_service(host1)
  149. # FIXME(mriedem): This is bug 1817833 where restarting the now-deleted
  150. # compute service attempts to create a new resource provider with a
  151. # new uuid but the same name which results in a conflict. The service
  152. # does not die, however, because _update_available_resource_for_node
  153. # catches and logs but does not re-raise the error.
  154. log_output = self.stdlog.logger.output
  155. self.assertIn('Error updating resources for node host1.', log_output)
  156. self.assertIn('Failed to create resource provider host1', log_output)
  157. def test_migrate_confirm_after_deleted_source_compute(self):
  158. """Tests a scenario where a server is cold migrated and while in
  159. VERIFY_RESIZE status the admin attempts to delete the source compute
  160. and then the user tries to confirm the resize.
  161. """
  162. # Start a compute service and create a server there.
  163. self._start_compute('host1')
  164. host1_rp_uuid = self._get_provider_uuid_by_host('host1')
  165. flavor = self.api.get_flavors()[0]
  166. server = self._boot_and_check_allocations(flavor, 'host1')
  167. # Start a second compute service so we can cold migrate there.
  168. self._start_compute('host2')
  169. host2_rp_uuid = self._get_provider_uuid_by_host('host2')
  170. # Cold migrate the server to host2.
  171. self._migrate_and_check_allocations(
  172. server, flavor, host1_rp_uuid, host2_rp_uuid)
  173. # Delete the source compute service.
  174. service = self.admin_api.get_services(
  175. binary='nova-compute', host='host1')[0]
  176. self.admin_api.api_delete('/os-services/%s' % service['id'])
  177. # FIXME(mriedem): This is bug 1852610 where the compute service is
  178. # deleted but the resource provider is not because there are still
  179. # migration-based allocations against the source node provider.
  180. resp = self.placement_api.get('/resource_providers/%s' % host1_rp_uuid)
  181. self.assertEqual(200, resp.status)
  182. self.assertFlavorMatchesUsage(host1_rp_uuid, flavor)
  183. # Now try to confirm the migration.
  184. # FIXME(mriedem): This will fail until bug 1852610 is fixed and the
  185. # source compute service delete is blocked while there is an
  186. # in-progress migration involving the node.
  187. self.assertNotIn('ComputeHostNotFound', self.stdlog.logger.output)
  188. self.api.post_server_action(server['id'], {'confirmResize': None})
  189. self._wait_for_state_change(self.api, server, 'ERROR')
  190. self.assertIn('ComputeHostNotFound', self.stdlog.logger.output)
  191. class ComputeStatusFilterTest(integrated_helpers.ProviderUsageBaseTestCase):
  192. """Tests the API, compute service and Placement interaction with the
  193. COMPUTE_STATUS_DISABLED trait when a compute service is enable/disabled.
  194. This version of the test uses the 2.latest microversion for testing the
  195. 2.53+ behavior of the PUT /os-services/{service_id} API.
  196. """
  197. compute_driver = 'fake.SmallFakeDriver'
  198. def _update_service(self, service, disabled, forced_down=None):
  199. """Update the service using the 2.53 request schema.
  200. :param service: dict representing the service resource in the API
  201. :param disabled: True if the service should be disabled, False if the
  202. service should be enabled
  203. :param forced_down: Optionally change the forced_down value.
  204. """
  205. status = 'disabled' if disabled else 'enabled'
  206. req = {'status': status}
  207. if forced_down is not None:
  208. req['forced_down'] = forced_down
  209. self.admin_api.put_service(service['id'], req)
  210. def test_compute_status_filter(self):
  211. """Tests the compute_status_filter placement request filter"""
  212. # Start a compute service so a compute node and resource provider is
  213. # created.
  214. compute = self._start_compute('host1')
  215. # Get the UUID of the resource provider that was created.
  216. rp_uuid = self._get_provider_uuid_by_host('host1')
  217. # Get the service from the compute API.
  218. services = self.admin_api.get_services(binary='nova-compute',
  219. host='host1')
  220. self.assertEqual(1, len(services))
  221. service = services[0]
  222. # At this point, the service should be enabled and the
  223. # COMPUTE_STATUS_DISABLED trait should not be set on the
  224. # resource provider in placement.
  225. self.assertEqual('enabled', service['status'])
  226. rp_traits = self._get_provider_traits(rp_uuid)
  227. trait = os_traits.COMPUTE_STATUS_DISABLED
  228. self.assertNotIn(trait, rp_traits)
  229. # Now disable the compute service via the API.
  230. self._update_service(service, disabled=True)
  231. # The update to placement should be synchronous so check the provider
  232. # traits and COMPUTE_STATUS_DISABLED should be set.
  233. rp_traits = self._get_provider_traits(rp_uuid)
  234. self.assertIn(trait, rp_traits)
  235. # Try creating a server which should fail because nothing is available.
  236. networks = [{'port': self.neutron.port_1['id']}]
  237. server_req = self._build_minimal_create_server_request(
  238. self.api, 'test_compute_status_filter',
  239. image_uuid=fake_image.get_valid_image_id(), networks=networks)
  240. server = self.api.post_server({'server': server_req})
  241. server = self._wait_for_state_change(self.api, server, 'ERROR')
  242. # There should be a NoValidHost fault recorded.
  243. self.assertIn('fault', server)
  244. self.assertIn('No valid host', server['fault']['message'])
  245. # Now enable the service and the trait should be gone.
  246. self._update_service(service, disabled=False)
  247. rp_traits = self._get_provider_traits(rp_uuid)
  248. self.assertNotIn(trait, rp_traits)
  249. # Try creating another server and it should be OK.
  250. server = self.api.post_server({'server': server_req})
  251. self._wait_for_state_change(self.api, server, 'ACTIVE')
  252. # Stop, force-down and disable the service so the API cannot call
  253. # the compute service to sync the trait.
  254. compute.stop()
  255. self._update_service(service, disabled=True, forced_down=True)
  256. # The API should have logged a message about the service being down.
  257. self.assertIn('Compute service on host host1 is down. The '
  258. 'COMPUTE_STATUS_DISABLED trait will be synchronized '
  259. 'when the service is restarted.',
  260. self.stdlog.logger.output)
  261. # The trait should not be on the provider even though the node is
  262. # disabled.
  263. rp_traits = self._get_provider_traits(rp_uuid)
  264. self.assertNotIn(trait, rp_traits)
  265. # Restart the compute service which should sync and set the trait on
  266. # the provider in placement.
  267. self.restart_compute_service(compute)
  268. rp_traits = self._get_provider_traits(rp_uuid)
  269. self.assertIn(trait, rp_traits)
  270. class ComputeStatusFilterTest211(ComputeStatusFilterTest):
  271. """Extends ComputeStatusFilterTest and uses the 2.11 API for the
  272. legacy os-services disable/enable/force-down API behavior
  273. """
  274. microversion = '2.11'
  275. def _update_service(self, service, disabled, forced_down=None):
  276. """Update the service using the 2.11 request schema.
  277. :param service: dict representing the service resource in the API
  278. :param disabled: True if the service should be disabled, False if the
  279. service should be enabled
  280. :param forced_down: Optionally change the forced_down value.
  281. """
  282. # Before 2.53 the service is uniquely identified by host and binary.
  283. body = {
  284. 'host': service['host'],
  285. 'binary': service['binary']
  286. }
  287. # Handle forced_down first if provided since the enable/disable
  288. # behavior in the API depends on it.
  289. if forced_down is not None:
  290. body['forced_down'] = forced_down
  291. self.admin_api.api_put('/os-services/force-down', body)
  292. if disabled:
  293. self.admin_api.api_put('/os-services/disable', body)
  294. else:
  295. self.admin_api.api_put('/os-services/enable', body)
  296. def _get_provider_uuid_by_host(self, host):
  297. # We have to temporarily mutate to 2.53 to get the hypervisor UUID.
  298. with utils.temporary_mutation(self.admin_api, microversion='2.53'):
  299. return super(ComputeStatusFilterTest211,
  300. self)._get_provider_uuid_by_host(host)