# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. """ Base PXE Interface Methods """ from futurist import periodics from ironic_lib import metrics_utils from oslo_config import cfg from oslo_log import log as logging from ironic.common import boot_devices from ironic.common import dhcp_factory from ironic.common import exception from ironic.common.glance_service import service_utils from ironic.common.i18n import _ from ironic.common import pxe_utils from ironic.common import states from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils from ironic.drivers.modules import boot_mode_utils from ironic.drivers.modules import deploy_utils from ironic.drivers import utils as driver_utils CONF = cfg.CONF LOG = logging.getLogger(__name__) METRICS = metrics_utils.get_metrics_logger(__name__) REQUIRED_PROPERTIES = { 'deploy_kernel': _("UUID (from Glance) of the deployment kernel. " "Required."), 'deploy_ramdisk': _("UUID (from Glance) of the ramdisk that is " "mounted at boot time. Required."), } RESCUE_PROPERTIES = { 'rescue_kernel': _('UUID (from Glance) of the rescue kernel. This value ' 'is required for rescue mode.'), 'rescue_ramdisk': _('UUID (from Glance) of the rescue ramdisk with agent ' 'that is used at node rescue time. This value is ' 'required for rescue mode.'), } COMMON_PROPERTIES = REQUIRED_PROPERTIES.copy() COMMON_PROPERTIES.update(driver_utils.OPTIONAL_PROPERTIES) COMMON_PROPERTIES.update(RESCUE_PROPERTIES) class PXEBaseMixin(object): ipxe_enabled = False def get_properties(self): """Return the properties of the interface. :returns: dictionary of : entries. """ return COMMON_PROPERTIES @METRICS.timer('PXEBaseMixin.clean_up_ramdisk') def clean_up_ramdisk(self, task): """Cleans up the boot of ironic ramdisk. This method cleans up the PXE environment that was setup for booting the deploy or rescue ramdisk. It unlinks the deploy/rescue kernel/ramdisk in the node's directory in tftproot and removes it's PXE config. :param task: a task from TaskManager. :param mode: Label indicating a deploy or rescue operation was carried out on the node. Supported values are 'deploy' and 'rescue'. Defaults to 'deploy', indicating deploy operation was carried out. :returns: None """ node = task.node mode = deploy_utils.rescue_or_deploy_mode(node) try: images_info = pxe_utils.get_image_info( node, mode=mode, ipxe_enabled=self.ipxe_enabled) except exception.MissingParameterValue as e: LOG.warning('Could not get %(mode)s image info ' 'to clean up images for node %(node)s: %(err)s', {'mode': mode, 'node': node.uuid, 'err': e}) else: pxe_utils.clean_up_pxe_env( task, images_info, ipxe_enabled=self.ipxe_enabled) @METRICS.timer('PXEBaseMixin.clean_up_instance') def clean_up_instance(self, task): """Cleans up the boot of instance. This method cleans up the environment that was setup for booting the instance. It unlinks the instance kernel/ramdisk in node's directory in tftproot and removes the PXE config. :param task: a task from TaskManager. :returns: None """ node = task.node try: images_info = pxe_utils.get_instance_image_info( task, ipxe_enabled=self.ipxe_enabled) except exception.MissingParameterValue as e: LOG.warning('Could not get instance image info ' 'to clean up images for node %(node)s: %(err)s', {'node': node.uuid, 'err': e}) else: pxe_utils.clean_up_pxe_env(task, images_info, ipxe_enabled=self.ipxe_enabled) boot_mode_utils.deconfigure_secure_boot_if_needed(task) @METRICS.timer('PXEBaseMixin.prepare_ramdisk') def prepare_ramdisk(self, task, ramdisk_params): """Prepares the boot of Ironic ramdisk using PXE. This method prepares the boot of the deploy or rescue kernel/ramdisk after reading relevant information from the node's driver_info and instance_info. :param task: a task from TaskManager. :param ramdisk_params: the parameters to be passed to the ramdisk. pxe driver passes these parameters as kernel command-line arguments. :returns: None :raises: MissingParameterValue, if some information is missing in node's driver_info or instance_info. :raises: InvalidParameterValue, if some information provided is invalid. :raises: IronicException, if some power or set boot boot device operation failed on the node. """ node = task.node # Label indicating a deploy or rescue operation being carried out on # the node, 'deploy' or 'rescue'. Unless the node is in a rescue like # state, the mode is set to 'deploy', indicating deploy operation is # being carried out. mode = deploy_utils.rescue_or_deploy_mode(node) if self.ipxe_enabled: # NOTE(mjturek): At this point, the ipxe boot script should # already exist as it is created at startup time. However, we # call the boot script create method here to assert its # existence and handle the unlikely case that it wasn't created # or was deleted. pxe_utils.create_ipxe_boot_script() # Generate options for both IPv4 and IPv6, and they can be # filtered down later based upon the port options. # TODO(TheJulia): This should be re-tooled during the Victoria # development cycle so that we call a single method and return # combined options. The method we currently call is relied upon # by two eternal projects, to changing the behavior is not ideal. dhcp_opts = pxe_utils.dhcp_options_for_instance( task, ipxe_enabled=self.ipxe_enabled, ip_version=4) dhcp_opts += pxe_utils.dhcp_options_for_instance( task, ipxe_enabled=self.ipxe_enabled, ip_version=6) provider = dhcp_factory.DHCPFactory() provider.update_dhcp(task, dhcp_opts) pxe_info = pxe_utils.get_image_info(node, mode=mode, ipxe_enabled=self.ipxe_enabled) # NODE: Try to validate and fetch instance images only # if we are in DEPLOYING state. if node.provision_state == states.DEPLOYING: pxe_info.update( pxe_utils.get_instance_image_info( task, ipxe_enabled=self.ipxe_enabled)) boot_mode_utils.sync_boot_mode(task) pxe_options = pxe_utils.build_pxe_config_options( task, pxe_info, ipxe_enabled=self.ipxe_enabled, ramdisk_params=ramdisk_params) # TODO(dtantsur): backwards compability hack, remove in the V release if ramdisk_params.get("ipa-api-url"): pxe_options["ipa-api-url"] = ramdisk_params["ipa-api-url"] if self.ipxe_enabled: pxe_config_template = deploy_utils.get_ipxe_config_template(node) else: pxe_config_template = deploy_utils.get_pxe_config_template(node) pxe_utils.create_pxe_config(task, pxe_options, pxe_config_template, ipxe_enabled=self.ipxe_enabled) manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=False) if self.ipxe_enabled and CONF.pxe.ipxe_use_swift: kernel_label = '%s_kernel' % mode ramdisk_label = '%s_ramdisk' % mode pxe_info.pop(kernel_label, None) pxe_info.pop(ramdisk_label, None) if pxe_info: pxe_utils.cache_ramdisk_kernel(task, pxe_info, ipxe_enabled=self.ipxe_enabled) LOG.debug('Ramdisk (i)PXE boot for node %(node)s has been prepared ' 'with kernel params %(params)s', {'node': node.uuid, 'params': pxe_options}) @METRICS.timer('PXEBaseMixin.prepare_instance') def prepare_instance(self, task): """Prepares the boot of instance. This method prepares the boot of the instance after reading relevant information from the node's instance_info. In case of netboot, it updates the dhcp entries and switches the PXE config. In case of localboot, it cleans up the PXE config. :param task: a task from TaskManager. :returns: None """ boot_mode_utils.sync_boot_mode(task) boot_mode_utils.configure_secure_boot_if_needed(task) node = task.node boot_option = deploy_utils.get_boot_option(node) boot_device = None instance_image_info = {} if boot_option == "ramdisk" or boot_option == "kickstart": instance_image_info = pxe_utils.get_instance_image_info( task, ipxe_enabled=self.ipxe_enabled) pxe_utils.cache_ramdisk_kernel(task, instance_image_info, ipxe_enabled=self.ipxe_enabled) if 'ks_template' in instance_image_info: ks_cfg = pxe_utils.validate_kickstart_template( instance_image_info['ks_template'][1] ) pxe_utils.validate_kickstart_file(ks_cfg) if (deploy_utils.is_iscsi_boot(task) or boot_option == "ramdisk" or boot_option == "kickstart"): pxe_utils.prepare_instance_pxe_config( task, instance_image_info, iscsi_boot=deploy_utils.is_iscsi_boot(task), ramdisk_boot=(boot_option == "ramdisk"), anaconda_boot=(boot_option == "kickstart"), ipxe_enabled=self.ipxe_enabled) pxe_utils.prepare_instance_kickstart_config( task, instance_image_info, anaconda_boot=(boot_option == "kickstart")) boot_device = boot_devices.PXE elif boot_option != "local": if task.driver.storage.should_write_image(task): # Make sure that the instance kernel/ramdisk is cached. # This is for the takeover scenario for active nodes. instance_image_info = pxe_utils.get_instance_image_info( task, ipxe_enabled=self.ipxe_enabled) pxe_utils.cache_ramdisk_kernel(task, instance_image_info, ipxe_enabled=self.ipxe_enabled) # If it's going to PXE boot we need to update the DHCP server dhcp_opts = pxe_utils.dhcp_options_for_instance( task, ipxe_enabled=self.ipxe_enabled, ip_version=4) dhcp_opts += pxe_utils.dhcp_options_for_instance( task, ipxe_enabled=self.ipxe_enabled, ip_version=6) provider = dhcp_factory.DHCPFactory() provider.update_dhcp(task, dhcp_opts) iwdi = task.node.driver_internal_info.get('is_whole_disk_image') try: root_uuid_or_disk_id = task.node.driver_internal_info[ 'root_uuid_or_disk_id' ] except KeyError: if not task.driver.storage.should_write_image(task): pass elif not iwdi: LOG.warning("The UUID for the root partition can't be " "found, unable to switch the pxe config from " "deployment mode to service (boot) mode for " "node %(node)s", {"node": task.node.uuid}) else: LOG.warning("The disk id for the whole disk image can't " "be found, unable to switch the pxe config " "from deployment mode to service (boot) mode " "for node %(node)s. Booting the instance " "from disk.", {"node": task.node.uuid}) pxe_utils.clean_up_pxe_config( task, ipxe_enabled=self.ipxe_enabled) boot_device = boot_devices.DISK else: pxe_utils.build_service_pxe_config( task, instance_image_info, root_uuid_or_disk_id, ipxe_enabled=self.ipxe_enabled) boot_device = boot_devices.PXE else: # NOTE(dtantsur): create a PXE configuration as a safety net for # hardware uncapable of persistent boot. If on a reboot it will try # to boot from PXE, this configuration will return it back. if CONF.pxe.enable_netboot_fallback: pxe_utils.build_service_pxe_config( task, instance_image_info, task.node.driver_internal_info.get('root_uuid_or_disk_id'), ipxe_enabled=self.ipxe_enabled, # PXE config for whole disk images is identical to what # we need to boot from local disk, so use True even # for partition images. is_whole_disk_image=True) else: # Clean up the deployment configuration pxe_utils.clean_up_pxe_config( task, ipxe_enabled=self.ipxe_enabled) boot_device = boot_devices.DISK # NOTE(pas-ha) do not re-set boot device on ACTIVE nodes # during takeover if boot_device and task.node.provision_state != states.ACTIVE: manager_utils.node_set_boot_device(task, boot_device, persistent=True) def _validate_common(self, task): node = task.node if not driver_utils.get_node_mac_addresses(task): raise exception.MissingParameterValue( _("Node %s does not have any port associated with it.") % node.uuid) if self.ipxe_enabled: if not CONF.deploy.http_url or not CONF.deploy.http_root: raise exception.MissingParameterValue(_( "iPXE boot is enabled but no HTTP URL or HTTP " "root was specified.")) # NOTE(zer0c00l): When 'kickstart' boot option is used we need to store # kickstart and squashfs files in http_root directory. These files # will be eventually requested by anaconda installer during deployment # over http(s). if deploy_utils.get_boot_option(node) == 'kickstart': if not CONF.deploy.http_url or not CONF.deploy.http_root: raise exception.MissingParameterValue(_( "'kickstart' boot option is set on the node but no HTTP " "URL or HTTP root was specified.")) if not CONF.anaconda.default_ks_template: raise exception.MissingParameterValue(_( "'kickstart' boot option is set on the node but no " "default kickstart template is specified.")) # Check the trusted_boot capabilities value. deploy_utils.validate_capabilities(node) if deploy_utils.is_trusted_boot_requested(node): # Check if 'boot_option' and boot mode is compatible with # trusted boot. if self.ipxe_enabled: # NOTE(TheJulia): So in theory (huge theory here, not put to # practice or tested), that one can define the kernel as tboot # and define the actual kernel and ramdisk as appended data. # Similar to how one can iPXE load the XEN hypervisor. # tboot mailing list seem to indicate pxe/ipxe support, or # more specifically avoiding breaking the scenarios of use, # but there is also no definitive documentation on the subject. LOG.warning('Trusted boot has been requested for %(node)s in ' 'concert with iPXE. This is not a supported ' 'configuration for an ironic deployment.', {'node': node.uuid}) pxe_utils.validate_boot_parameters_for_trusted_boot(node) # Check if we have invalid parameters being passed which will not work # for ramdisk configurations. if (node.instance_info.get('image_source') and node.instance_info.get('boot_iso')): raise exception.InvalidParameterValue(_( "An 'image_source' and 'boot_iso' parameter may not be " "specified at the same time.")) pxe_utils.parse_driver_info(node) @METRICS.timer('PXEBaseMixin.validate') def validate(self, task): """Validate the PXE-specific info for booting deploy/instance images. This method validates the PXE-specific info for booting the ramdisk and instance on the node. If invalid, raises an exception; otherwise returns None. :param task: a task from TaskManager. :returns: None :raises: InvalidParameterValue, if some parameters are invalid. :raises: MissingParameterValue, if some required parameters are missing. """ self._validate_common(task) node = task.node # NOTE(TheJulia): If we're not writing an image, we can skip # the remainder of this method. # NOTE(dtantsur): if we're are writing an image with local boot # the boot interface does not care about image parameters and # must not validate them. boot_option = deploy_utils.get_boot_option(node) if (not task.driver.storage.should_write_image(task) or boot_option == 'local'): return d_info = deploy_utils.get_image_instance_info(node) if node.driver_internal_info.get('is_whole_disk_image'): props = [] elif d_info.get('boot_iso'): props = ['boot_iso'] elif service_utils.is_glance_image(d_info['image_source']): props = ['kernel_id', 'ramdisk_id'] if boot_option == 'kickstart': props.append('squashfs_id') else: props = ['kernel', 'ramdisk'] deploy_utils.validate_image_properties(task.context, d_info, props) @METRICS.timer('PXEBaseMixin.validate_rescue') def validate_rescue(self, task): """Validate that the node has required properties for rescue. :param task: a TaskManager instance with the node being checked :raises: MissingParameterValue if node is missing one or more required parameters """ pxe_utils.parse_driver_info(task.node, mode='rescue') @METRICS.timer('PXEBaseMixin.validate_inspection') def validate_inspection(self, task): """Validate that the node has required properties for inspection. :param task: A TaskManager instance with the node being checked :raises: UnsupportedDriverExtension """ try: self._validate_common(task) except exception.MissingParameterValue: # Fall back to non-managed in-band inspection raise exception.UnsupportedDriverExtension( driver=task.node.driver, extension='inspection') _RETRY_ALLOWED_STATES = {states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT} @METRICS.timer('PXEBaseMixin._check_boot_timeouts') @periodics.periodic(spacing=CONF.pxe.boot_retry_check_interval, enabled=bool(CONF.pxe.boot_retry_timeout)) def _check_boot_timeouts(self, manager, context): """Periodically checks whether boot has timed out and retry it. :param manager: conductor manager. :param context: request context. """ filters = {'provision_state_in': self._RETRY_ALLOWED_STATES, 'reserved': False, 'maintenance': False, 'provisioned_before': CONF.pxe.boot_retry_timeout} node_iter = manager.iter_nodes(filters=filters) for node_uuid, driver, conductor_group in node_iter: try: lock_purpose = 'checking PXE boot status' with task_manager.acquire(context, node_uuid, shared=True, purpose=lock_purpose) as task: self._check_boot_status(task) except (exception.NodeLocked, exception.NodeNotFound): continue def _check_boot_status(self, task): if not isinstance(task.driver.boot, PXEBaseMixin): return if not _should_retry_boot(task.node): return task.upgrade_lock(purpose='retrying PXE boot') # Retry critical checks after acquiring the exclusive lock. if (task.node.maintenance or task.node.provision_state not in self._RETRY_ALLOWED_STATES or not _should_retry_boot(task.node)): return LOG.info('Booting the ramdisk on node %(node)s is taking more than ' '%(timeout)d seconds, retrying boot', {'node': task.node.uuid, 'timeout': CONF.pxe.boot_retry_timeout}) manager_utils.node_power_action(task, states.POWER_OFF) manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=False) manager_utils.node_power_action(task, states.POWER_ON) def _should_retry_boot(node): # NOTE(dtantsur): this assumes IPA, do we need to make it generic? for field in ('agent_last_heartbeat', 'last_power_state_change'): if manager_utils.value_within_timeout( node.driver_internal_info.get(field), CONF.pxe.boot_retry_timeout): # Alive and heartbeating, probably busy with something long LOG.debug('Not retrying PXE boot for node %(node)s; its ' '%(event)s happened less than %(timeout)d seconds ago', {'node': node.uuid, 'event': field, 'timeout': CONF.pxe.boot_retry_timeout}) return False return True