diff --git a/devstack/lib/ironic b/devstack/lib/ironic index 13a6591d24..83bcfc4171 100644 --- a/devstack/lib/ironic +++ b/devstack/lib/ironic @@ -1590,6 +1590,7 @@ function configure_ironic_conductor { # specific driver interfaces in DevStack iniset $IRONIC_CONF_FILE DEFAULT enabled_power_interfaces "snmp" iniset $IRONIC_CONF_FILE DEFAULT enabled_management_interfaces "noop" + iniset $IRONIC_CONF_FILE pxe enable_netboot_fallback True fi if is_ansible_deploy_enabled; then diff --git a/ironic/common/pxe_utils.py b/ironic/common/pxe_utils.py index 87c9cfab28..3f6ebb0f72 100644 --- a/ironic/common/pxe_utils.py +++ b/ironic/common/pxe_utils.py @@ -782,7 +782,8 @@ def build_pxe_config_options(task, pxe_info, service=False, def build_service_pxe_config(task, instance_image_info, root_uuid_or_disk_id, ramdisk_boot=False, - ipxe_enabled=False): + ipxe_enabled=False, + is_whole_disk_image=None): node = task.node pxe_config_path = get_pxe_config_file_path(node.uuid, ipxe_enabled=ipxe_enabled) @@ -802,12 +803,16 @@ def build_service_pxe_config(task, instance_image_info, pxe_config_template = deploy_utils.get_pxe_config_template(node) create_pxe_config(task, pxe_options, pxe_config_template, ipxe_enabled=ipxe_enabled) - iwdi = node.driver_internal_info.get('is_whole_disk_image') + + if is_whole_disk_image is None: + is_whole_disk_image = node.driver_internal_info.get( + 'is_whole_disk_image') deploy_utils.switch_pxe_config( pxe_config_path, root_uuid_or_disk_id, boot_mode_utils.get_boot_mode(node), - iwdi, deploy_utils.is_trusted_boot_requested(node), + is_whole_disk_image, + deploy_utils.is_trusted_boot_requested(node), deploy_utils.is_iscsi_boot(task), ramdisk_boot, ipxe_enabled=ipxe_enabled) diff --git a/ironic/conf/pxe.py b/ironic/conf/pxe.py index 1a86237a7e..0e8ff5e376 100644 --- a/ironic/conf/pxe.py +++ b/ironic/conf/pxe.py @@ -147,6 +147,15 @@ opts = [ "local HTTP server. " "Applicable only when 'ipxe' compatible boot interface " "is used.")), + cfg.BoolOpt('enable_netboot_fallback', + default=False, + mutable=True, + help=_('If True, generate a PXE environment even for nodes ' + 'that use local boot. This is useful when the driver ' + 'cannot switch nodes to local boot, e.g. with SNMP ' + 'or with Redfish on machines that cannot do persistent ' + 'boot. Mostly useful for standalone ironic since ' + 'Neutron will prevent incorrect PXE boot.')), ] diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index dd610baa1b..a255700d9f 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -178,7 +178,7 @@ def switch_pxe_config(path, root_uuid_or_disk_id, boot_mode, :param ipxe_enabled: A default False boolean value to tell the method if the caller is using iPXE. """ - if not ramdisk_boot: + if not ramdisk_boot and root_uuid_or_disk_id is not None: if not is_whole_disk_image: _replace_root_uuid(path, root_uuid_or_disk_id) else: diff --git a/ironic/drivers/modules/pxe_base.py b/ironic/drivers/modules/pxe_base.py index 290b005a14..8632a43cca 100644 --- a/ironic/drivers/modules/pxe_base.py +++ b/ironic/drivers/modules/pxe_base.py @@ -301,11 +301,22 @@ class PXEBaseMixin(object): ipxe_enabled=self.ipxe_enabled) boot_device = boot_devices.PXE else: - # If it's going to boot from the local disk, we don't need - # PXE config files. They still need to be generated as part - # of the prepare() because the deployment does PXE boot the - # deploy ramdisk - pxe_utils.clean_up_pxe_config(task, ipxe_enabled=self.ipxe_enabled) + # NOTE(dtantsur): create a PXE configuration as a safety net for + # hardware uncapable of persistent boot. If on a reboot it will try + # to boot from PXE, this configuration will return it back. + if CONF.pxe.enable_netboot_fallback: + pxe_utils.build_service_pxe_config( + task, instance_image_info, + task.node.driver_internal_info.get('root_uuid_or_disk_id'), + ipxe_enabled=self.ipxe_enabled, + # PXE config for whole disk images is identical to what + # we need to boot from local disk, so use True even + # for partition images. + is_whole_disk_image=True) + else: + # Clean up the deployment configuration + pxe_utils.clean_up_pxe_config( + task, ipxe_enabled=self.ipxe_enabled) boot_device = boot_devices.DISK # NOTE(pas-ha) do not re-set boot device on ACTIVE nodes diff --git a/ironic/tests/unit/drivers/modules/test_ipxe.py b/ironic/tests/unit/drivers/modules/test_ipxe.py index a9ecdab34c..4385be74b5 100644 --- a/ironic/tests/unit/drivers/modules/test_ipxe.py +++ b/ironic/tests/unit/drivers/modules/test_ipxe.py @@ -857,6 +857,41 @@ class iPXEBootTestCase(db_base.DbTestCase): task, ipxe_enabled=True) self.assertFalse(set_boot_device_mock.called) + @mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True) + @mock.patch.object(pxe_utils, 'clean_up_pxe_config', autospec=True) + @mock.patch.object(deploy_utils, 'switch_pxe_config', autospec=True) + @mock.patch.object(dhcp_factory, 'DHCPFactory', autospec=True) + @mock.patch.object(pxe_utils, 'cache_ramdisk_kernel', autospec=True) + @mock.patch.object(pxe_utils, 'get_instance_image_info', autospec=True) + def test_prepare_instance_localboot_with_fallback( + self, get_image_info_mock, cache_mock, + dhcp_factory_mock, switch_pxe_config_mock, + clean_up_pxe_config_mock, set_boot_device_mock): + self.config(enable_netboot_fallback=True, group='pxe') + with task_manager.acquire(self.context, self.node.uuid) as task: + task.node.instance_info = task.node.instance_info + task.node.instance_info['capabilities'] = {'boot_option': 'local'} + task.node.driver_internal_info['root_uuid_or_disk_id'] = ( + "30212642-09d3-467f-8e09-21685826ab50") + task.node.driver_internal_info['is_whole_disk_image'] = False + pxe_config_path = pxe_utils.get_pxe_config_file_path( + task.node.uuid, ipxe_enabled=True) + + task.driver.boot.prepare_instance(task) + + set_boot_device_mock.assert_called_once_with(task, + boot_devices.DISK, + persistent=True) + switch_pxe_config_mock.assert_called_once_with( + pxe_config_path, "30212642-09d3-467f-8e09-21685826ab50", + 'bios', True, False, False, False, ipxe_enabled=True) + # No clean up + self.assertFalse(clean_up_pxe_config_mock.called) + # No netboot configuration beyond the PXE files + self.assertFalse(get_image_info_mock.called) + self.assertFalse(cache_mock.called) + self.assertFalse(dhcp_factory_mock.return_value.update_dhcp.called) + @mock.patch.object(pxe_utils, 'clean_up_pxe_env', autospec=True) @mock.patch.object(pxe_utils, 'get_instance_image_info', autospec=True) def test_clean_up_instance(self, get_image_info_mock, diff --git a/releasenotes/notes/netboot-fallback-b208b2c3b40a0d01.yaml b/releasenotes/notes/netboot-fallback-b208b2c3b40a0d01.yaml new file mode 100644 index 0000000000..637464b88d --- /dev/null +++ b/releasenotes/notes/netboot-fallback-b208b2c3b40a0d01.yaml @@ -0,0 +1,12 @@ +--- +issues: + - | + The SNMP hardware type cannot change boot devices and thus may fail + to deploy nodes with local boot. To work around this problem, set + ``[pxe]enable_netboot_fallback`` to ``True``. +features: + - | + Adds an ability to generate network boot templates even for nodes that + use local boot via the new ``[pxe]enable_netboot_fallback`` option. + This is required to work around the situation when switching boot devices + does not work reliably. diff --git a/zuul.d/ironic-jobs.yaml b/zuul.d/ironic-jobs.yaml index 60a3e83758..de5a5df607 100644 --- a/zuul.d/ironic-jobs.yaml +++ b/zuul.d/ironic-jobs.yaml @@ -309,9 +309,6 @@ IRONIC_TEMPEST_WHOLE_DISK_IMAGE: True IRONIC_VM_EPHEMERAL_DISK: 0 IRONIC_AUTOMATED_CLEAN_ENABLED: False - # NOTE(dtantsur): the snmp hardware type does not have a management - # interface, thus we cannot switch to local boot on deploy. - IRONIC_DEFAULT_BOOT_OPTION: netboot - job: name: ironic-tempest-ipa-partition-uefi-pxe_ipmitool