From 5e5766a079552f2122c3e22ebf261405dd883500 Mon Sep 17 00:00:00 2001 From: Ruby Loo Date: Fri, 4 Mar 2022 15:27:56 +0000 Subject: [PATCH] Fix rebuilds using anaconda deploy interface The anaconda deploy interface was saving internal information in the node's instance_info, in the user-facing 'stage2' and 'ks_template' fields. This broke rebuilds using a different image with different stage2 or template specified in the image properties. This has been fixed by saving the information in the node's driver_internal_info instead. Addresses comments/nits from https://review.opendev.org/c/openstack/ironic/+/827924. Change-Id: Id9428518d21290fb38a0f7471a2cb69a6cb3ffb2 (cherry picked from commit ab68c9d88b76e4ff83f9dffddcc204676fdb50d5) --- ironic/common/pxe_utils.py | 73 +++++++++++-------- ironic/drivers/modules/pxe.py | 15 +++- ...da-instance-info-fix-a51837d8ac7b41de.yaml | 9 +++ 3 files changed, 64 insertions(+), 33 deletions(-) create mode 100644 releasenotes/notes/anaconda-instance-info-fix-a51837d8ac7b41de.yaml diff --git a/ironic/common/pxe_utils.py b/ironic/common/pxe_utils.py index 533c250d2a..44b88154cd 100644 --- a/ironic/common/pxe_utils.py +++ b/ironic/common/pxe_utils.py @@ -665,16 +665,22 @@ def get_instance_image_info(task, ipxe_enabled=False): return image_info - labels = ('kernel', 'ramdisk') image_properties = None d_info = deploy_utils.get_image_instance_info(node) + + def _get_image_properties(): + nonlocal image_properties + if not image_properties: + glance_service = service.GlanceImageService(context=ctx) + image_properties = glance_service.show( + d_info['image_source'])['properties'] + + labels = ('kernel', 'ramdisk') if not (i_info.get('kernel') and i_info.get('ramdisk')): # NOTE(rloo): If both are not specified in instance_info # we won't use any of them. We'll use the values specified # with the image, which we assume have been set. - glance_service = service.GlanceImageService(context=ctx) - image_properties = glance_service.show( - d_info['image_source'])['properties'] + _get_image_properties() for label in labels: i_info[label] = str(image_properties[label + '_id']) node.instance_info = i_info @@ -686,36 +692,41 @@ def get_instance_image_info(task, ipxe_enabled=False): # ks_template: anaconda kickstart template # ks_cfg - rendered ks_template anaconda_labels = ('stage2', 'ks_template', 'ks_cfg') - if not i_info.get('stage2') or not i_info.get('ks_template'): - if not image_properties: - glance_service = service.GlanceImageService(context=ctx) - image_properties = glance_service.show( - d_info['image_source'])['properties'] - if not i_info.get('ks_template'): - # ks_template is an optional property on the image - if 'ks_template' not in image_properties: - i_info['ks_template'] = CONF.anaconda.default_ks_template - else: - i_info['ks_template'] = str( - image_properties['ks_template']) - if not i_info.get('stage2'): + # NOTE(rloo): We save stage2 & ks_template values in case they + # are changed by the user after we start using them and to + # prevent re-computing them again. + if not node.driver_internal_info.get('stage2'): + if i_info.get('stage2'): + node.set_driver_internal_info('stage2', i_info['stage2']) + else: + _get_image_properties() if 'stage2_id' not in image_properties: - msg = ("'stage2_id' property is missing from the OS image " - "%s. The anaconda deploy interface requires this " - "to be set with the OS image or in instance_info. " - % d_info['image_source']) + msg = (_("'stage2_id' is missing from the properties of " + "the OS image %s. The anaconda deploy interface " + "requires this to be set with the OS image or " + "in instance_info['stage2']. ") % + d_info['image_source']) raise exception.ImageUnacceptable(msg) else: - i_info['stage2'] = str(image_properties['stage2_id']) - # NOTE(rloo): This is internally generated; cannot be specified. - i_info['ks_cfg'] = '' - - node.instance_info = i_info - node.save() + node.set_driver_internal_info( + 'stage2', str(image_properties['stage2_id'])) + if i_info.get('ks_template'): + node.set_driver_internal_info('ks_template', + i_info['ks_template']) + else: + _get_image_properties() + # ks_template is an optional property on the image + if 'ks_template' not in image_properties: + node.set_driver_internal_info( + 'ks_template', CONF.anaconda.default_ks_template) + else: + node.set_driver_internal_info( + 'ks_template', str(image_properties['ks_template'])) + node.save() for label in labels + anaconda_labels: image_info[label] = ( - i_info[label], + i_info.get(label) or node.driver_internal_info.get(label, ''), get_file_path_from_label(node.uuid, root_dir, label) ) @@ -1111,9 +1122,9 @@ def validate_kickstart_file(ks_cfg): 'ksvalidator', ks_file.name, check_on_exit=[0], attempts=1 ) except processutils.ProcessExecutionError as e: - msg = _(("The kickstart file generated does not pass validation. " + msg = (_("The kickstart file generated does not pass validation. " "The ksvalidator tool returned the following error: %s") % - (e)) + e) raise exception.InvalidKickstartFile(msg) @@ -1211,7 +1222,7 @@ def cache_ramdisk_kernel(task, pxe_info, ipxe_enabled=False): path = os.path.join(CONF.pxe.tftp_root, node.uuid) ensure_tree(path) # anaconda deploy will have 'stage2' as one of the labels in pxe_info dict - if 'stage2' in pxe_info.keys(): + if 'stage2' in pxe_info: # stage2 will be stored in ipxe http directory so make sure the # directory exists. file_path = get_file_path_from_label(node.uuid, diff --git a/ironic/drivers/modules/pxe.py b/ironic/drivers/modules/pxe.py index 9df7354a5b..9e403d3e2c 100644 --- a/ironic/drivers/modules/pxe.py +++ b/ironic/drivers/modules/pxe.py @@ -127,8 +127,8 @@ class PXEAnacondaDeploy(agent_base.AgentBaseMixin, agent_base.HeartbeatMixin, manager_utils.node_power_action(task, states.POWER_ON) task.process_event('done') except Exception as e: - msg = (_('Error rebooting node %(node)s after deploy. ' - 'Error: %(error)s') % + msg = (_('An error occurred after deployment, while preparing to ' + 'reboot the node %(node)s: %(error)s') % {'node': node.uuid, 'error': e}) agent_base.log_and_raise_deployment_error(task, msg) @@ -157,3 +157,14 @@ class PXEAnacondaDeploy(agent_base.AgentBaseMixin, agent_base.HeartbeatMixin, '%(agent_status_message)s', msg) deploy_utils.set_failed_state(task, agent_status_message, collect_logs=False) + + @METRICS.timer('AnacondaDeploy.clean_up') + @task_manager.require_exclusive_lock + def clean_up(self, task): + super(PXEAnacondaDeploy, self).clean_up(task) + node = task.node + # NOTE(rloo): These were added during deployment, as a side-effect of + # pxe_utils.get_instance_image_info(). + node.del_driver_internal_info('stage2') + node.del_driver_internal_info('ks_template') + node.save() diff --git a/releasenotes/notes/anaconda-instance-info-fix-a51837d8ac7b41de.yaml b/releasenotes/notes/anaconda-instance-info-fix-a51837d8ac7b41de.yaml new file mode 100644 index 0000000000..314d3ae9ce --- /dev/null +++ b/releasenotes/notes/anaconda-instance-info-fix-a51837d8ac7b41de.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The anaconda deploy interface was saving internal information in + the node's instance_info, in the user-facing 'stage2' and + 'ks_template' fields. This broke rebuilds using a different image + with different stage2 or template specified in the image properties. + This has been fixed by saving the information in the node's + driver_internal_info instead.