Rescan after making partition changes

Long story short, we need to rescan after making partition changes. Without forcing a filesystem sync and rescan operation, we rely upon lazy write-out from buffers to the block device when operating with a ramdisk. This is because ramfs does NOT actually operate with fsync where as GNU parted utilizes fsync on the device in an attempt to force the write to disk to occur. This is fine if your booted with devfs, or say even a physical block device that backs /dev, just not ramfs. Change-Id: I66481d5f3c17a489f7bdf0af82e31b9107605968 Story: 2006423 Task: 36316 (cherry picked from commit 2a83c0add0)
2019-08-20 16:52:29 -04:00 · 2019-08-20 16:52:29 -04:00 · a8b84e8b7f
parent 3ca0cf67c1
commit a8b84e8b7f
3 changed files with 60 additions and 20 deletions
--- a/ironic_lib/disk_utils.py
+++ b/ironic_lib/disk_utils.py
@ -294,6 +294,7 @@ def make_partitions(dev, root_mb, swap_mb, ephemeral_mb,
    if commit:
        # write to the disk
        dp.commit()
+        _trigger_device_rescan(dev)
    return part_dict


@ -781,6 +782,45 @@ def fix_gpt_partition(device, node_uuid):
        raise exception.InstanceDeployFailure(msg)


+def _trigger_device_rescan(device):
+    """Sync and trigger device rescan.
+
+    Disk partition performed via parted, when performed on a ramdisk
+    do not have to honor the fsync mechanism. In essence, fsync is used
+    on the file representing the block device, which falls to the kernel
+    filesystem layer to trigger a sync event. On a ramdisk using ramfs,
+    this is an explicit non-operation.
+
+    As a result of this, we need to trigger a system wide sync operation
+    which will trigger cache to flush to disk, after which partition changes
+    should be visible upon re-scan.
+
+    When ramdisks are not in use, this also helps ensure that data has
+    been safely flushed across the wire, such as on iscsi connections.
+
+    :param device: The block device containing paritions that is attempting
+                   to be updated.
+    """
+    # TODO(TheJulia): This helper method was broken out for re-use, and
+    # does not have an explicit test case for itself, and it should, but
+    # it's steps are fairly explicitly checked with mocks on execute in
+    # the partition code.
+    LOG.debug('Explicitly calling sync to force buffer/cache flush.')
+    utils.execute('sync')
+    # Make sure any additions to the partitioning are reflected in the
+    # kernel.
+    LOG.debug('Waiting until udev event queue is empty')
+    utils.execute('udevadm', 'settle')
+    try:
+        utils.execute('partprobe', device, run_as_root=True,
+                      attempts=CONF.disk_utils.partprobe_attempts)
+        # Also verify that the partitioning is correct now.
+        utils.execute('sgdisk', '-v', device, run_as_root=True)
+    except processutils.ProcessExecutionError as exc:
+        LOG.warning('Failed to verify partitioning after creating '
+                    'partition(s): %s', exc)
+
+
 def create_config_drive_partition(node_uuid, device, configdrive):
    """Create a partition for config drive

@ -863,25 +903,8 @@ def create_config_drive_partition(node_uuid, device, configdrive):
                utils.execute('parted', '-a', 'optimal', '-s', '--', device,
                              'mkpart', 'primary', 'fat32', startlimit,
                              endlimit, run_as_root=True)
-            # Parted uses fsync to tell the kernel to sync file io
-            # however on ramdisks in ramfs, this is an explicit no-op.
-            # Explicitly call sync so when the the kernel attempts to read
-            # the partition table from disk, it is less likely that the write
-            # is still in buffer cache pending write to disk.
-            LOG.debug('Explicitly calling sync to force buffer/cache flush.')
-            utils.execute('sync')
-            # Make sure any additions to the partitioning are reflected in the
-            # kernel.
-            LOG.debug('Waiting until udev event queue is empty')
-            utils.execute('udevadm', 'settle')
-            try:
-                utils.execute('partprobe', device, run_as_root=True,
-                              attempts=CONF.disk_utils.partprobe_attempts)
-                # Also verify that the partitioning is correct now.
-                utils.execute('sgdisk', '-v', device, run_as_root=True)
-            except processutils.ProcessExecutionError as exc:
-                LOG.warning('Failed to verify GPT partitioning after creating '
-                            'the configdrive partition: %s', exc)
+            # Trigger device rescan
+            _trigger_device_rescan(device)

            upd_parts = set(part['number'] for part in list_partitions(device))
            new_part = set(upd_parts) - set(cur_parts)
--- a/ironic_lib/tests/test_disk_utils.py
+++ b/ironic_lib/tests/test_disk_utils.py
@ -449,7 +449,14 @@ class MakePartitionsTestCase(base.IronicLibTestCase):
        fuser_cmd = ['fuser', 'fake-dev']
        fuser_call = mock.call(*fuser_cmd, run_as_root=True,
                               check_exit_code=[0, 1])
-        mock_exc.assert_has_calls([parted_call, fuser_call])
+
+        sync_calls = [mock.call('sync'),
+                      mock.call('udevadm', 'settle'),
+                      mock.call('partprobe', self.dev, attempts=10,
+                                run_as_root=True),
+                      mock.call('sgdisk', '-v', self.dev, run_as_root=True)]
+
+        mock_exc.assert_has_calls([parted_call, fuser_call] + sync_calls)

    def test_make_partitions(self, mock_exc):
        self._test_make_partitions(mock_exc, boot_option="netboot")
@ -663,6 +670,9 @@ class PopulateImageTestCase(base.IronicLibTestCase):
        self.assertFalse(mock_dd.called)


+# NOTE(TheJulia): _trigger_device_rescan is systemwide thus pointless
+# to execute in the file test case. Also, CI unit test jobs lack sgdisk.
+@mock.patch.object(disk_utils, '_trigger_device_rescan', lambda *_: None)
@mock.patch.object(utils, 'wait_for_disk_to_become_available', lambda *_: None)
@mock.patch.object(disk_utils, 'is_block_device', lambda d: True)
@mock.patch.object(disk_utils, 'block_uuid', lambda p: 'uuid')
--- a/releasenotes/notes/rescan-for-partition-write-out-3fbb92ae5c2a33c6.yaml
+++ b/releasenotes/notes/rescan-for-partition-write-out-3fbb92ae5c2a33c6.yaml
@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    Fixes an issue with the ``disk_utils`` method ``make_partitions``,
+    which is utilized to facilitate the write-out of partition images to disk.
+    Previously when this method was invoked on a ramdisk, the partition may
+    not have been found.