From f73cd449a99b59487ea471dd14fa4d764665057e Mon Sep 17 00:00:00 2001 From: asarfaty Date: Tue, 14 Jul 2020 13:16:23 +0200 Subject: [PATCH] MP2P migration: misc fixes - Add elapsed time logging - Skip irrelevant retries of nsx requests - retrive only tier0/tier1 logical routers whenever relevant - Update max number of tier1 logical ports to migrate Change-Id: If6878e20701312daf66be1a035db4614013c7e47 --- vmware_nsx/plugins/common_v3/utils.py | 17 ++++- .../admin/plugins/nsxp/resources/utils.py | 13 ++-- .../plugins/nsxv3/resources/migration.py | 72 +++++++++++++------ .../admin/plugins/nsxv3/resources/utils.py | 6 +- 4 files changed, 78 insertions(+), 30 deletions(-) diff --git a/vmware_nsx/plugins/common_v3/utils.py b/vmware_nsx/plugins/common_v3/utils.py index 052a414e4a..98d0dc51bf 100644 --- a/vmware_nsx/plugins/common_v3/utils.py +++ b/vmware_nsx/plugins/common_v3/utils.py @@ -133,7 +133,8 @@ def get_client_cert_provider(conf_path=cfg.CONF.nsx_v3): def get_nsxlib_wrapper(nsx_username=None, nsx_password=None, basic_auth=False, - plugin_conf=None, allow_overwrite_header=False): + plugin_conf=None, allow_overwrite_header=False, + retriable_exceptions=None): if not plugin_conf: plugin_conf = cfg.CONF.nsx_v3 @@ -142,6 +143,9 @@ def get_nsxlib_wrapper(nsx_username=None, nsx_password=None, basic_auth=False, # if basic auth requested, dont use cert file even if provided client_cert_provider = get_client_cert_provider(conf_path=plugin_conf) + exception_config = config.ExceptionConfig() + if retriable_exceptions: + exception_config.retriables = retriable_exceptions nsxlib_config = config.NsxLibConfig( username=nsx_username or plugin_conf.nsx_api_user, password=nsx_password or plugin_conf.nsx_api_password, @@ -161,12 +165,14 @@ def get_nsxlib_wrapper(nsx_username=None, nsx_password=None, basic_auth=False, plugin_ver=n_version.version_info.release_string(), dns_nameservers=cfg.CONF.nsx_v3.nameservers, dns_domain=cfg.CONF.nsx_v3.dns_domain, - allow_overwrite_header=allow_overwrite_header) + allow_overwrite_header=allow_overwrite_header, + exception_config=exception_config) return v3.NsxLib(nsxlib_config) def get_nsxpolicy_wrapper(nsx_username=None, nsx_password=None, - basic_auth=False, conf_path=None): + basic_auth=False, conf_path=None, + retriable_exceptions=None): if not conf_path: conf_path = cfg.CONF.nsx_p client_cert_provider = None @@ -175,6 +181,10 @@ def get_nsxpolicy_wrapper(nsx_username=None, nsx_password=None, client_cert_provider = get_client_cert_provider( conf_path=conf_path) + exception_config = config.ExceptionConfig() + if retriable_exceptions: + exception_config.retriables = retriable_exceptions + nsxlib_config = config.NsxLibConfig( username=nsx_username or conf_path.nsx_api_user, password=nsx_password or conf_path.nsx_api_password, @@ -194,6 +204,7 @@ def get_nsxpolicy_wrapper(nsx_username=None, nsx_password=None, plugin_ver=n_version.version_info.release_string(), dns_nameservers=conf_path.nameservers, dns_domain=conf_path.dns_domain, + exception_config=exception_config, allow_passthrough=(conf_path.allow_passthrough if hasattr(conf_path, 'allow_passthrough') else False), diff --git a/vmware_nsx/shell/admin/plugins/nsxp/resources/utils.py b/vmware_nsx/shell/admin/plugins/nsxp/resources/utils.py index de968c4fa7..71ccfba31d 100644 --- a/vmware_nsx/shell/admin/plugins/nsxp/resources/utils.py +++ b/vmware_nsx/shell/admin/plugins/nsxp/resources/utils.py @@ -42,6 +42,7 @@ def get_nsxp_client(nsx_username=None, nsx_password=None, def get_connected_nsxpolicy(nsx_username=None, nsx_password=None, use_basic_auth=False, conf_path=None, + retriable_exceptions=None, verbose=False): global _NSXPOLICY @@ -54,12 +55,14 @@ def get_connected_nsxpolicy(nsx_username=None, nsx_password=None, if not verbose: # Return logs to normal logging.disable(logging.NOTSET) - return v3_utils.get_nsxpolicy_wrapper(nsx_username, - nsx_password, - use_basic_auth, - conf_path=conf_path) + return v3_utils.get_nsxpolicy_wrapper( + nsx_username, nsx_password, use_basic_auth, + conf_path=conf_path, + retriable_exceptions=retriable_exceptions) if _NSXPOLICY is None: - _NSXPOLICY = v3_utils.get_nsxpolicy_wrapper(conf_path=conf_path) + _NSXPOLICY = v3_utils.get_nsxpolicy_wrapper( + conf_path=conf_path, + retriable_exceptions=retriable_exceptions) if not verbose: # Return logs to normal logging.disable(logging.NOTSET) diff --git a/vmware_nsx/shell/admin/plugins/nsxv3/resources/migration.py b/vmware_nsx/shell/admin/plugins/nsxv3/resources/migration.py index caff4f37fd..0cca731fd4 100644 --- a/vmware_nsx/shell/admin/plugins/nsxv3/resources/migration.py +++ b/vmware_nsx/shell/admin/plugins/nsxv3/resources/migration.py @@ -42,6 +42,7 @@ from vmware_nsx.shell import resources as shell from vmware_nsxlib.v3 import core_resources as nsx_resources from vmware_nsxlib.v3 import exceptions as nsxlib_exc from vmware_nsxlib.v3 import load_balancer as nsxlib_lb +from vmware_nsxlib.v3 import nsx_constants from vmware_nsxlib.v3.policy import constants as policy_constants from vmware_nsxlib.v3.policy import core_resources as policy_resources from vmware_nsxlib.v3.policy import utils as policy_utils @@ -62,8 +63,8 @@ STATUS_ALLOW_MIGRATION_REQ = set([ MIGRATE_LIMIT_NO_LIMIT = 0 MIGRATE_LIMIT_TIER0 = 1 MIGRATE_LIMIT_TIER0_PORTS = 1000 -MIGRATE_LIMIT_TIER1 = 500 -MIGRATE_LIMIT_TIER1_PORTS = 5 +MIGRATE_LIMIT_TIER1 = 1000 +MIGRATE_LIMIT_TIER1_PORTS = 1000 MIGRATE_LIMIT_NAT = 1500 MIGRATE_LIMIT_DHCP_SERVER = 1500 MIGRATE_LIMIT_MD_PROXY = 1500 @@ -87,6 +88,8 @@ ROLLBACK_DATA = [] EDGE_FW_SEQ = 1 DFW_SEQ = 1 +SERVICE_UP_RETRIES = 30 + def start_migration_process(nsxlib): """Notify the manager that the migration process is starting""" @@ -140,27 +143,33 @@ def change_migration_service_status(start=True, nsxlib=None): if start and nsxlib: LOG.info("Waiting for the service to be up...") + start_time = time.time() @tenacity.retry(reraise=True, retry=tenacity.retry_if_exception_type(Exception), wait=tenacity.wait_exponential(multiplier=0.5, max=2), - stop=tenacity.stop_after_attempt( - cfg.CONF.nsx_v3.retries)) + stop=tenacity.stop_after_attempt(SERVICE_UP_RETRIES)) def get_migration_status_with_retry(nsxlib): get_migration_status(nsxlib, silent=True) - get_migration_status_with_retry(nsxlib) - LOG.info("The service is up") + try: + get_migration_status_with_retry(nsxlib) + except Exception: + raise Exception("The migration service did not get up after %s " + "retries" % SERVICE_UP_RETRIES) + + elapsed_time = time.time() - start_time + LOG.info("The service is up (waited %s seconds)", elapsed_time) def ensure_migration_state_ready(nsxlib, with_abort=False): try: status = get_migration_status(nsxlib, silent=True) - except nsxlib_exc.CannotConnectToServer as e: - LOG.debug("Failed to get migration status: %s", e) + except Exception as e: if with_abort: change_migration_service_status(start=True, nsxlib=nsxlib) return ensure_migration_state_ready(nsxlib) + LOG.debug("Failed to get migration status: %s", e) return False if status["overall_migration_status"] not in STATUS_ALLOW_MIGRATION_REQ: @@ -220,12 +229,16 @@ def get_resource_migration_data(nsxlib_resource, neutron_id_tags, printable_name=None, policy_resource_get=None, policy_id_callback=None, metadata_callback=None, - skip_policy_path_check=False): + skip_policy_path_check=False, + nsxlib_list_args=None): if not printable_name: printable_name = resource_type LOG.debug("Getting data for MP %s", printable_name) - resources = nsxlib_resource.list() + if nsxlib_list_args: + resources = nsxlib_resource.list(**nsxlib_list_args) + else: + resources = nsxlib_resource.list() if not isinstance(resources, list): # the nsxlib resources list return inconsistent type of result resources = resources.get('results', []) @@ -422,7 +435,8 @@ def migrate_tier0s(nsxlib, nsxpolicy, plugin): entries = get_resource_migration_data( nsxlib.logical_router, None, 'TIER0', resource_condition=cond, - policy_resource_get=nsxpolicy.tier0.get) + policy_resource_get=nsxpolicy.tier0.get, + nsxlib_list_args={'router_type': nsx_constants.ROUTER_TYPE_TIER0}) migrate_resource(nsxlib, 'TIER0', entries, MIGRATE_LIMIT_TIER0, use_admin=True) migrated_tier0s = [entry['manager_id'] for entry in entries] @@ -664,7 +678,8 @@ def migrate_routers(nsxlib, nsxpolicy): nsxlib.logical_router, ['os-neutron-router-id'], 'TIER1', - policy_resource_get=nsxpolicy.tier1.get) + policy_resource_get=nsxpolicy.tier1.get, + nsxlib_list_args={'router_type': nsx_constants.ROUTER_TYPE_TIER1}) migrate_resource(nsxlib, 'TIER1', entries, MIGRATE_LIMIT_TIER1) migrated_routers = [entry['manager_id'] for entry in entries] return migrated_routers @@ -736,13 +751,15 @@ def migrate_routers_config(nsxlib, nsxpolicy, plugin, migrated_routers): policy_id_callback=get_policy_id, resource_condition=cond, metadata_callback=add_metadata, - skip_policy_path_check=True) + skip_policy_path_check=True, + nsxlib_list_args={'router_type': nsx_constants.ROUTER_TYPE_TIER1}) migrate_resource(nsxlib, 'TIER1_LOGICAL_ROUTER_PORT', entries, MIGRATE_LIMIT_TIER1_PORTS) # Migrate NAT rules per neutron tier1 entries = [] - tier1s = nsxlib.logical_router.list()['results'] + tier1s = nsxlib.logical_router.list( + router_type=nsx_constants.ROUTER_TYPE_TIER1)['results'] ctx = context.get_admin_context() for tier1 in tier1s: # skip routers that were not migrated in this script call @@ -854,7 +871,8 @@ def migrate_tier0_config(nsxlib, nsxpolicy, tier0s): 'TIER0_LOGICAL_ROUTER_CONFIG', policy_id_callback=get_policy_id, resource_condition=cond, - skip_policy_path_check=True) + skip_policy_path_check=True, + nsxlib_list_args={'router_type': nsx_constants.ROUTER_TYPE_TIER0}) migrate_resource(nsxlib, 'TIER0_LOGICAL_ROUTER_CONFIG', entries, MIGRATE_LIMIT_TIER0, use_admin=True) @@ -1089,8 +1107,7 @@ def migrate_t_resources_2_p(nsxlib, nsxpolicy, plugin): return False # Initialize the migration process - if not ensure_migration_state_ready( - nsxlib, with_abort=True): + if not ensure_migration_state_ready(nsxlib, with_abort=True): return False try: @@ -1450,34 +1467,47 @@ def t_2_p_migration(resource, event, trigger, **kwargs): "in the configuration") return - nsxlib = utils.get_connected_nsxlib( - verbose=verbose, allow_overwrite_header=True) + retriables = [nsxlib_exc.APITransactionAborted, + nsxlib_exc.ServerBusy] + nsxlib = utils.get_connected_nsxlib(verbose=verbose, + allow_overwrite_header=True, + retriable_exceptions=retriables) nsxpolicy = p_utils.get_connected_nsxpolicy( - conf_path=cfg.CONF.nsx_v3) + conf_path=cfg.CONF.nsx_v3, retriable_exceptions=retriables) # Also create a policy manager with admin user to manipulate admin-defined # resources which should not have neutron principal identity nsxpolicy_admin = p_utils.get_connected_nsxpolicy( conf_path=cfg.CONF.nsx_v3, use_basic_auth=True, nsx_username=cfg.CONF.nsx_v3.nsx_api_user, - nsx_password=cfg.CONF.nsx_v3.nsx_api_password) + nsx_password=cfg.CONF.nsx_v3.nsx_api_password, + retriable_exceptions=retriables) with utils.NsxV3PluginWrapper(verbose=verbose) as plugin: # Make sure FWaaS was initialized plugin.init_fwaas_for_admin_utils() + start_time = time.time() if not pre_migration_checks(nsxlib, plugin): # Failed LOG.error("T2P migration cannot run. Please fix the configuration " "and try again\n\n") return + elapsed_time = time.time() - start_time + LOG.debug("Pre-migration took %s seconds", elapsed_time) + start_time = time.time() if not migrate_t_resources_2_p(nsxlib, nsxpolicy, plugin): # Failed LOG.error("T2P migration failed. Aborting\n\n") return + elapsed_time = time.time() - start_time + LOG.debug("Migration took %s seconds", elapsed_time) + start_time = time.time() post_migration_actions(nsxlib, nsxpolicy, nsxpolicy_admin, plugin) + elapsed_time = time.time() - start_time + LOG.debug("Post-migration took %s seconds", elapsed_time) LOG.info("T2P migration completed successfully\n\n") diff --git a/vmware_nsx/shell/admin/plugins/nsxv3/resources/utils.py b/vmware_nsx/shell/admin/plugins/nsxv3/resources/utils.py index 14a037fd99..9235f77a6e 100644 --- a/vmware_nsx/shell/admin/plugins/nsxv3/resources/utils.py +++ b/vmware_nsx/shell/admin/plugins/nsxv3/resources/utils.py @@ -49,6 +49,7 @@ def get_connected_nsxlib(nsx_username=None, nsx_password=None, use_basic_auth=False, plugin_conf=None, allow_overwrite_header=False, + retriable_exceptions=None, verbose=False): global _NSXLIB @@ -64,11 +65,14 @@ def get_connected_nsxlib(nsx_username=None, nsx_password=None, return v3_utils.get_nsxlib_wrapper( nsx_username, nsx_password, use_basic_auth, plugin_conf=plugin_conf, + retriable_exceptions=retriable_exceptions, allow_overwrite_header=allow_overwrite_header) + if _NSXLIB is None: _NSXLIB = v3_utils.get_nsxlib_wrapper( plugin_conf=plugin_conf, - allow_overwrite_header=allow_overwrite_header) + allow_overwrite_header=allow_overwrite_header, + retriable_exceptions=retriable_exceptions) if not verbose: # Return logs to normal