From 7085a3eaf10fdb15b850204e721f2f58a443699d Mon Sep 17 00:00:00 2001 From: Chuan Miao Date: Mon, 1 Jul 2024 13:02:56 +0200 Subject: [PATCH] svm migration across physical networks with multiple port bindings This patch provides the support for share server migrations between different physical networks. It is achieved by creating an inactive port binding on the target host during share server migration, and then cut it over to target host when migration completes. The implementation is driver agnostic, so this feature should be not limited to specific driver, although it is only tested against NetApp driver. The feature is enabled by a new share service option 'server_migration_extend_neutron_network'. The support for multiple binding Manila ports is added in Neutron's 2023.1 release. Closes-Bug: #2002019 Change-Id: I18d0dd1847a09f28989b1a2b62fc6ff8f8155def --- manila/db/api.py | 6 + manila/db/sqlalchemy/api.py | 13 +- manila/network/neutron/api.py | 39 +++++ .../network/neutron/neutron_network_plugin.py | 65 ++++++++ .../netapp/dataontap/client/client_cmode.py | 1 + manila/share/manager.py | 86 ++++++++++- .../tests/network/neutron/test_neutron_api.py | 33 +++++ .../network/neutron/test_neutron_plugin.py | 139 ++++++++++++++++++ ...th-network-extension-7433a5c38c8278e4.yaml | 7 + 9 files changed, 387 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/server-migration-with-network-extension-7433a5c38c8278e4.yaml diff --git a/manila/db/api.py b/manila/db/api.py index e62cb60499..406d49d636 100644 --- a/manila/db/api.py +++ b/manila/db/api.py @@ -1317,6 +1317,12 @@ def share_server_backend_details_set(context, share_server_id, server_details): server_details) +def share_server_backend_details_get_item(context, share_server_id, meta_key): + """Get backend details.""" + return IMPL.share_server_backend_details_get_item(context, share_server_id, + meta_key) + + def share_server_backend_details_delete(context, share_server_id): """Delete backend details DB records for a share server.""" return IMPL.share_server_backend_details_delete(context, share_server_id) diff --git a/manila/db/sqlalchemy/api.py b/manila/db/sqlalchemy/api.py index 480d96ebed..8fb274ea18 100644 --- a/manila/db/sqlalchemy/api.py +++ b/manila/db/sqlalchemy/api.py @@ -5571,6 +5571,17 @@ def share_server_backend_details_set(context, share_server_id, server_details): return server_details +@require_context +@context_manager.reader +def share_server_backend_details_get_item(context, share_server_id, meta_key): + try: + meta_ref = _share_server_backend_details_get_item( + context, share_server_id, meta_key) + except exception.ShareServerBackendDetailsNotFound: + return None + return meta_ref.get('value') + + @require_context @context_manager.writer def share_server_backend_details_delete(context, share_server_id): @@ -5738,7 +5749,7 @@ def network_allocations_get_for_share_server( share_server_id=share_server_id, ) if label: - if label != 'admin': + if label == 'user': query = query.filter(or_( # NOTE(vponomaryov): we treat None as alias for 'user'. models.NetworkAllocation.label == None, # noqa diff --git a/manila/network/neutron/api.py b/manila/network/neutron/api.py index e0d40478d6..c675925566 100644 --- a/manila/network/neutron/api.py +++ b/manila/network/neutron/api.py @@ -66,6 +66,19 @@ CONF = cfg.CONF LOG = log.getLogger(__name__) +class PortBindingAlreadyExistsClient(neutron_client_exc.Conflict): + pass + + +# We need to monkey-patch neutronclient.common.exceptions module, to make +# neutron client to raise error specific exceptions. E.g. exception +# PortBindingAlreadyExistsClient is raised for Neutron API error +# PortBindingAlreadyExists. If not defined, a general exception of type +# Conflict will be raised. +neutron_client_exc.PortBindingAlreadyExistsClient = \ + PortBindingAlreadyExistsClient + + def list_opts(): return client_auth.AuthClientLoader.list_opts(NEUTRON_GROUP) @@ -336,6 +349,32 @@ class API(object): raise exception.NetworkException(code=e.status_code, message=e.message) + def bind_port_to_host(self, port_id, host, vnic_type): + """Add an inactive binding to existing port.""" + + try: + data = {"binding": {"host": host, "vnic_type": vnic_type}} + return self.client.create_port_binding(port_id, data)['binding'] + except neutron_client_exc.PortBindingAlreadyExistsClient as e: + LOG.warning('Port binding already exists: %s', e) + except neutron_client_exc.NeutronClientException as e: + raise exception.NetworkException( + code=e.status_code, message=e.message) + + def delete_port_binding(self, port_id, host): + try: + return self.client.delete_port_binding(port_id, host) + except neutron_client_exc.NeutronClientException as e: + raise exception.NetworkException( + code=e.status_code, message=e.message) + + def activate_port_binding(self, port_id, host): + try: + return self.client.activate_port_binding(port_id, host) + except neutron_client_exc.NeutronClientException as e: + raise exception.NetworkException( + code=e.status_code, message=e.message) + def show_router(self, router_id): try: return self.client.show_router(router_id).get('router', {}) diff --git a/manila/network/neutron/neutron_network_plugin.py b/manila/network/neutron/neutron_network_plugin.py index edd735ea08..dea494c334 100644 --- a/manila/network/neutron/neutron_network_plugin.py +++ b/manila/network/neutron/neutron_network_plugin.py @@ -26,6 +26,7 @@ from manila.i18n import _ from manila import network from manila.network.neutron import api as neutron_api from manila.network.neutron import constants as neutron_constants +from manila.share import utils as share_utils from manila import utils LOG = log.getLogger(__name__) @@ -689,6 +690,70 @@ class NeutronBindNetworkPlugin(NeutronNetworkPlugin): context, port['id'], port_info) return ports + @utils.retry(retry_param=exception.NetworkException, retries=20) + def _wait_for_network_segment(self, share_server, host): + network_id = share_server['share_network_subnet']['neutron_net_id'] + network = self.neutron_api.get_network(network_id) + for segment in network['segments']: + if segment['provider:physical_network'] == ( + self.config.neutron_physical_net_name): + return segment['provider:segmentation_id'] + msg = _('Network segment not found on host %s') % host + raise exception.NetworkException(msg) + + def extend_network_allocations(self, context, share_server): + """Extend network to target host. + + This will create port bindings on target host without activating them. + If network segment does not exist on target host, it will be created. + + :return: list of port bindings with new segmentation id on target host + """ + vnic_type = self.config.neutron_vnic_type + host_id = self.config.neutron_host_id + + active_port_bindings = ( + self.db.network_allocations_get_for_share_server( + context, share_server['id'], label='user')) + if len(active_port_bindings) == 0: + raise exception.NetworkException( + 'Can not extend network with no active bindings') + + # Create port binding on destination backend. It's safe to call neutron + # api bind_port_to_host if the port is already bound to destination + # host. + for port in active_port_bindings: + self.neutron_api.bind_port_to_host(port['id'], host_id, + vnic_type) + + # Wait for network segment to be created on destination host. + vlan = self._wait_for_network_segment(share_server, host_id) + for port in active_port_bindings: + port['segmentation_id'] = vlan + + return active_port_bindings + + def delete_extended_allocations(self, context, share_server): + host_id = self.config.neutron_host_id + ports = self.db.network_allocations_get_for_share_server( + context, share_server['id'], label='user') + for port in ports: + try: + self.neutron_api.delete_port_binding(port['id'], host_id) + except exception.NetworkException as e: + msg = 'Failed to delete port binding on port %{port}s: %{err}s' + LOG.warning(msg, {'port': port['id'], 'err': e}) + + def cutover_network_allocations(self, context, src_share_server): + src_host = share_utils.extract_host(src_share_server['host'], 'host') + dest_host = self.config.neutron_host_id + ports = self.db.network_allocations_get_for_share_server( + context, src_share_server['id'], label='user') + for port in ports: + self.neutron_api.activate_port_binding(port['id'], dest_host) + self.neutron_api.delete_port_binding(port['id'], src_host) + return ports + class NeutronBindSingleNetworkPlugin(NeutronSingleNetworkPlugin, NeutronBindNetworkPlugin): diff --git a/manila/share/drivers/netapp/dataontap/client/client_cmode.py b/manila/share/drivers/netapp/dataontap/client/client_cmode.py index 905d7e05c5..2ebc1c1b3b 100644 --- a/manila/share/drivers/netapp/dataontap/client/client_cmode.py +++ b/manila/share/drivers/netapp/dataontap/client/client_cmode.py @@ -6162,6 +6162,7 @@ class NetAppCmodeClient(client_base.NetAppBaseClient): raise exception.NetAppException(message=e.message) msg = (_('Failed to check migration support. Reason: ' '%s' % e.message)) + LOG.error(msg) raise exception.NetAppException(msg) @na_utils.trace diff --git a/manila/share/manager.py b/manila/share/manager.py index 6bef6a4bed..7183c479da 100644 --- a/manila/share/manager.py +++ b/manila/share/manager.py @@ -113,6 +113,14 @@ share_manager_opts = [ 'the share manager will poll the driver to perform the ' 'next step of migration in the storage backend, for a ' 'migrating share server.'), + cfg.BoolOpt('server_migration_extend_neutron_network', + default=False, + help='If set to True, neutron network are extended to ' + 'destination host during share server migration. This ' + 'option should only be enabled if using ' + 'NeutronNetworkPlugin or its derivatives and when ' + 'multiple bindings of Manila ports are supported by ' + 'Neutron ML2 plugin.'), cfg.IntOpt('share_usage_size_update_interval', default=300, help='This value, specified in seconds, determines how often ' @@ -5670,8 +5678,20 @@ class ShareManager(manager.SchedulerDependentManager): availability_zone_id=service['availability_zone_id'], share_network_id=new_share_network_id)) + extended_allocs = None dest_share_server = None try: + # NOTE: Extend network allocations to destination host, i.e. create + # inactive port bindings on the destination host. Refresh + # network_allocations field in source_share_server with the new + # bindings, so that correct segmentation id is used during + # compatibility check and migration. + if CONF.server_migration_extend_neutron_network: + extended_allocs = ( + self.driver.network_api.extend_network_allocations( + context, source_share_server)) + source_share_server['network_allocations'] = extended_allocs + compatibility = ( self.driver.share_server_migration_check_compatibility( context, source_share_server, dest_host, old_share_network, @@ -5751,6 +5771,10 @@ class ShareManager(manager.SchedulerDependentManager): backend_details = ( server_info.get('backend_details') if server_info else None) + if extended_allocs: + backend_details = backend_details or {} + backend_details['segmentation_id'] = ( + extended_allocs[0]['segmentation_id']) if backend_details: self.db.share_server_backend_details_set( context, dest_share_server['id'], backend_details) @@ -5770,6 +5794,10 @@ class ShareManager(manager.SchedulerDependentManager): context, constants.STATUS_AVAILABLE, share_instance_ids=share_instance_ids, snapshot_instance_ids=snapshot_instance_ids) + # Rollback port bindings on destination host + if extended_allocs: + self.driver.network_api.delete_extended_allocations( + context, source_share_server, dest_share_server) # Rollback read only access rules self._reset_read_only_access_rules_for_server( context, share_instances, source_share_server, @@ -5833,6 +5861,22 @@ class ShareManager(manager.SchedulerDependentManager): service = self.db.service_get_by_args( context, service_host, 'manila-share') + # NOTE: Extend network allocations to destination host, i.e. create + # inactive port bindings on destination host with the same ports in the + # share network subnet. Refresh share_server with new network + # allocations, so that correct segmentation id is used in the + # compatibility check. + if CONF.server_migration_extend_neutron_network: + try: + allocs = self.driver.network_api.extend_network_allocations( + context, share_server) + share_server['network_allocations'] = allocs + except Exception: + LOG.warning( + 'Failed to extend network allocations for ' + 'share server %s.', share_server['id']) + return result + # NOTE(dviroel): We'll build a list of request specs and send it to # the driver so vendors have a chance to validate if the destination # host meets the requirements before starting the migration. @@ -5860,6 +5904,12 @@ class ShareManager(manager.SchedulerDependentManager): # the validations. driver_result['compatible'] = False + # NOTE: Delete port bindings on destination host after compatibility + # check + if CONF.server_migration_extend_neutron_network: + self.driver.network_api.delete_extended_allocations( + context, share_server) + result.update(driver_result) return result @@ -6102,6 +6152,29 @@ class ShareManager(manager.SchedulerDependentManager): dest_snss = self.db.share_network_subnet_get_all_by_share_server_id( context, dest_share_server['id']) + migration_extended_network_allocations = ( + CONF.server_migration_extend_neutron_network) + + # NOTE: Network allocations are extended to the destination host on + # previous (migration_start) step, i.e. port bindings are created on + # destination host with existing ports. The network allocations will be + # cut over on this (migration_complete) step, i.e. port bindings on + # destination host will be activated and bindings on source host will + # be deleted. + if migration_extended_network_allocations: + updated_allocations = ( + self.driver.network_api.cutover_network_allocations( + context, source_share_server)) + segmentation_id = self.db.share_server_backend_details_get_item( + context, dest_share_server['id'], 'segmentation_id') + alloc_update = { + 'segmentation_id': segmentation_id, + 'share_server_id': dest_share_server['id'] + } + subnet_update = { + 'segmentation_id': segmentation_id, + } + migration_reused_network_allocations = (len( self.db.network_allocations_get_for_share_server( context, dest_share_server['id'])) == 0) @@ -6118,7 +6191,14 @@ class ShareManager(manager.SchedulerDependentManager): context, source_share_server, dest_share_server, share_instances, snapshot_instances, new_network_allocations) - if not migration_reused_network_allocations: + if migration_extended_network_allocations: + for alloc in updated_allocations: + self.db.network_allocation_update(context, alloc['id'], + alloc_update) + for subnet in dest_snss: + self.db.share_network_subnet_update(context, subnet['id'], + subnet_update) + elif not migration_reused_network_allocations: network_allocations = [] for net_allocation in new_network_allocations: network_allocations += net_allocation['network_allocations'] @@ -6264,6 +6344,10 @@ class ShareManager(manager.SchedulerDependentManager): context, share_server, dest_share_server, share_instances, snapshot_instances) + if CONF.server_migration_extend_neutron_network: + self.driver.network_api.delete_extended_allocations( + context, share_server) + # NOTE(dviroel): After cancelling the migration we should set the new # share server to INVALID since it may contain an invalid configuration # to be reused. We also cleanup the source_share_server_id to unblock diff --git a/manila/tests/network/neutron/test_neutron_api.py b/manila/tests/network/neutron/test_neutron_api.py index 572529c341..11affea9ef 100644 --- a/manila/tests/network/neutron/test_neutron_api.py +++ b/manila/tests/network/neutron/test_neutron_api.py @@ -45,6 +45,15 @@ class FakeNeutronClient(object): def list_ports(self, **search_opts): pass + def create_port_binding(self, port_id, body): + return body + + def delete_port_binding(self, port_id, host_id): + pass + + def activate_port_binding(self, port_id, host_id): + pass + def list_networks(self): pass @@ -536,6 +545,30 @@ class NeutronApiTest(test.TestCase): port_id, {'port': fixed_ips}) self.assertTrue(clientv20.Client.called) + def test_bind_port_to_host(self): + port_id = 'test_port' + host = 'test_host' + vnic_type = 'test_vnic_type' + + port = self.neutron_api.bind_port_to_host(port_id, host, vnic_type) + + self.assertEqual(host, port['host']) + self.assertTrue(clientv20.Client.called) + + def test_delete_port_binding(self): + port_id = 'test_port' + host = 'test_host' + self.neutron_api.delete_port_binding(port_id, host) + self.assertTrue(clientv20.Client.called) + + def test_activate_port_binding(self): + port_id = 'test_port' + host = 'test_host' + + self.neutron_api.activate_port_binding(port_id, host) + + self.assertTrue(clientv20.Client.called) + def test_router_update_routes(self): # Set up test data router_id = 'test_router' diff --git a/manila/tests/network/neutron/test_neutron_plugin.py b/manila/tests/network/neutron/test_neutron_plugin.py index f9b9c87003..f91063ab8e 100644 --- a/manila/tests/network/neutron/test_neutron_plugin.py +++ b/manila/tests/network/neutron/test_neutron_plugin.py @@ -28,6 +28,7 @@ from manila import exception from manila.network.neutron import api as neutron_api from manila.network.neutron import constants as neutron_constants from manila.network.neutron import neutron_network_plugin as plugin +from manila.share import utils as share_utils from manila import test from manila.tests import utils as test_utils @@ -132,6 +133,11 @@ fake_nw_info = { 'provider:physical_network': 'net1', 'provider:segmentation_id': 3926, }, + { + 'provider:network_type': 'vlan', + 'provider:physical_network': 'net2', + 'provider:segmentation_id': 1249, + }, { 'provider:network_type': 'vxlan', 'provider:physical_network': None, @@ -197,6 +203,23 @@ fake_binding_profile = { 'neutron_switch_info': 'fake switch info' } +fake_network_allocation_ext = { + 'id': 'fake port binding id', + 'share_server_id': fake_share_server['id'], + 'ip_address': fake_neutron_port['fixed_ips'][0]['ip_address'], + 'mac_address': fake_neutron_port['mac_address'], + 'status': constants.STATUS_ACTIVE, + 'label': fake_nw_info['segments'][1]['provider:physical_network'], + 'network_type': fake_share_network_subnet['network_type'], + 'segmentation_id': ( + fake_nw_info['segments'][1]['provider:segmentation_id'] + ), + 'ip_version': fake_share_network_subnet['ip_version'], + 'cidr': fake_share_network_subnet['cidr'], + 'gateway': fake_share_network_subnet['gateway'], + 'mtu': 1509, +} + @ddt.ddt class NeutronNetworkPluginTest(test.TestCase): @@ -1084,6 +1107,122 @@ class NeutronBindNetworkPluginTest(test.TestCase): fake_neutron_port['id'], network_allocation_update_data) + def test_extend_network_allocations(self): + old_network_allocation = copy.deepcopy(fake_network_allocation) + fake_network = copy.deepcopy(fake_neutron_network_multi) + fake_ss = copy.deepcopy(fake_share_server) + fake_ss["share_network_subnet"] = fake_share_network_subnet + + fake_host_id = "fake_host_id" + fake_physical_net = "net2" + fake_port_id = old_network_allocation["id"] + fake_vnic_type = "baremetal" + config_data = { + 'DEFAULT': { + "neutron_host_id": fake_host_id, + "neutron_vnic_type": fake_vnic_type, + 'neutron_physical_net_name': fake_physical_net, + } + } + self.bind_plugin = self._get_neutron_network_plugin_instance( + config_data + ) + self.mock_object( + self.bind_plugin.neutron_api, + "get_network", + mock.Mock(return_value=fake_network), + ) + self.mock_object(self.bind_plugin.neutron_api, "bind_port_to_host") + self.mock_object( + self.bind_plugin.db, + "network_allocations_get_for_share_server", + mock.Mock(return_value=[old_network_allocation])) + + # calling the extend_network_allocations method + self.bind_plugin.extend_network_allocations(self.fake_context, fake_ss) + + # testing the calls, we expect the port to be bound to the current host + # and the new network allocation to be created + self.bind_plugin.neutron_api.bind_port_to_host.assert_called_once_with( + fake_port_id, fake_host_id, fake_vnic_type + ) + + def test_delete_extended_allocations(self): + old_network_allocation = copy.deepcopy(fake_network_allocation) + fake_ss = copy.deepcopy(fake_share_server) + fake_host_id = "fake_host_id" + fake_physical_net = "net2" + fake_port_id = old_network_allocation["id"] + fake_vnic_type = "baremetal" + config_data = { + "DEFAULT": { + "neutron_host_id": fake_host_id, + "neutron_vnic_type": fake_vnic_type, + "neutron_physical_net_name": fake_physical_net, + } + } + + self.bind_plugin = self._get_neutron_network_plugin_instance( + config_data) + self.mock_object(self.bind_plugin.neutron_api, "delete_port_binding") + self.mock_object(self.bind_plugin.db, "network_allocation_delete") + self.mock_object(self.bind_plugin.db, + "network_allocations_get_for_share_server", + mock.Mock(return_value=[old_network_allocation])) + + self.bind_plugin.delete_extended_allocations(self.fake_context, + fake_ss) + neutron_api = self.bind_plugin.neutron_api + neutron_api.delete_port_binding.assert_called_once_with( + fake_port_id, fake_host_id) + + @ddt.unpack + def test_cutover_network_allocation(self): + fake_alloc = copy.deepcopy(fake_network_allocation) + fake_network = copy.deepcopy(fake_neutron_network_multi) + fake_old_ss = copy.deepcopy(fake_share_server) + fake_old_ss["share_network_subnet"] = fake_share_network_subnet + fake_dest_ss = copy.deepcopy(fake_share_server) + fake_dest_ss["host"] = "fake_host2@backend2#pool2" + fake_old_host = share_utils.extract_host(fake_old_ss["host"], "host") + + fake_host_id = "fake_host_id" + fake_physical_net = "net2" + fake_port_id = fake_alloc["id"] + fake_vnic_type = "baremetal" + config_data = { + "DEFAULT": { + "neutron_host_id": fake_host_id, + "neutron_vnic_type": fake_vnic_type, + "neutron_physical_net_name": fake_physical_net, + } + } + self.bind_plugin = self._get_neutron_network_plugin_instance( + config_data) + self.mock_object(self.bind_plugin.neutron_api, "get_network", + mock.Mock(return_value=fake_network)) + self.mock_object(self.bind_plugin.neutron_api, "bind_port_to_host") + self.mock_object(self.bind_plugin.db, "network_allocation_create") + + self.mock_object(self.bind_plugin.db, + "network_allocations_get_for_share_server", + mock.Mock(return_value=[fake_alloc])) + + neutron_api = self.bind_plugin.neutron_api + db_api = self.bind_plugin.db + self.mock_object(neutron_api, "activate_port_binding") + self.mock_object(neutron_api, "delete_port_binding") + self.mock_object(db_api, "network_allocation_update") + self.mock_object(db_api, "network_allocation_delete") + self.mock_object(db_api, "share_network_subnet_update") + + self.bind_plugin.cutover_network_allocations( + self.fake_context, fake_old_ss) + neutron_api.activate_port_binding.assert_called_once_with( + fake_port_id, fake_host_id) + neutron_api.delete_port_binding.assert_called_once_with( + fake_port_id, fake_old_host) + @ddt.data({ 'neutron_binding_profiles': None, 'binding_profiles': {} diff --git a/releasenotes/notes/server-migration-with-network-extension-7433a5c38c8278e4.yaml b/releasenotes/notes/server-migration-with-network-extension-7433a5c38c8278e4.yaml new file mode 100644 index 0000000000..3e64519a4b --- /dev/null +++ b/releasenotes/notes/server-migration-with-network-extension-7433a5c38c8278e4.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Added support for share server migrations between different physical + networks. It is achieved by creating an inactive port binding on the target + host during share server migration, then cut it over to target host during + migration-complete step.