From 8bcf1be9200b52536f76cb61c62071eaaac1e004 Mon Sep 17 00:00:00 2001 From: Jacob Anders Date: Fri, 22 Jan 2021 10:20:15 +1000 Subject: [PATCH] Add support for using NVMe specific cleaning This change adds support for utilising NVMe specific cleaning tools on supported devices. This will remove the neccessity of using shred to securely delete the contents of a NVMe drive and enable using nvme-cli tools instead, improving cleaning performance and reducing wear on the device. Story: 2008290 Task: 41168 Change-Id: I2f63db9b739e53699bd5f164b79640927bf757d7 --- ironic_python_agent/hardware.py | 119 ++++- .../tests/unit/samples/hardware_samples.py | 444 ++++++++++++++++++ .../tests/unit/test_hardware.py | 83 ++++ ...ds-nvme-secure-erase-0ecfd624e5f50581.yaml | 8 + 4 files changed, 646 insertions(+), 8 deletions(-) create mode 100644 releasenotes/notes/adds-nvme-secure-erase-0ecfd624e5f50581.yaml diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 8d50e354e..dae732b50 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -59,6 +59,8 @@ API_CLIENT = None API_LOOKUP_TIMEOUT = None API_LOOKUP_INTERVAL = None SUPPORTED_SOFTWARE_RAID_LEVELS = frozenset(['0', '1', '1+0', '5', '6']) +NVME_CLI_FORMAT_SUPPORTED_FLAG = 0b10 +NVME_CLI_CRYPTO_FORMAT_SUPPORTED_FLAG = 0b100 RAID_APPLY_CONFIGURATION_ARGSINFO = { "raid_config": { @@ -1285,18 +1287,42 @@ class GenericHardwareManager(HardwareManager): # Note(TheJulia) Use try/except to capture and log the failure # and then revert to attempting to shred the volume if enabled. try: - execute_secure_erase = info.get( - 'agent_enable_ata_secure_erase', True) - if execute_secure_erase and self._ata_erase(block_device): - return + if self._is_nvme(block_device): + + execute_nvme_erase = info.get( + 'agent_enable_nvme_secure_erase', True) + if execute_nvme_erase and self._nvme_erase(block_device): + return + else: + execute_secure_erase = info.get( + 'agent_enable_ata_secure_erase', True) + if execute_secure_erase and self._ata_erase(block_device): + return except errors.BlockDeviceEraseError as e: - execute_shred = info.get( - 'agent_continue_if_ata_erase_failed', False) + execute_shred = info.get('agent_continue_if_secure_erase_failed') + + # NOTE(janders) While we are deprecating + # ``driver_internal_info['agent_continue_if_ata_erase_failed']`` + # names check for both ``agent_continue_if_secure_erase_failed`` + # and ``agent_continue_if_ata_erase_failed``. + # This is to ensure interoperability between newer Ironic Python + # Agent images and older Ironic API services. + # In future releases, 'False' default value needs to be added to + # the info.get call above and the code below can be removed. + # If we're dealing with new-IPA and old-API scenario, NVMe secure + # erase should not be attempted due to absence of + # ``[deploy]/enable_nvme_secure_erase`` config option so + # ``agent_continue_if_ata_erase_failed`` is not misleading here + # as it will only apply to ATA Secure Erase. + if execute_shred is None: + execute_shred = info.get('agent_continue_if_ata_erase_failed', + False) + if execute_shred: - LOG.warning('Failed to invoke ata_erase, ' + LOG.warning('Failed to invoke secure erase, ' 'falling back to shred: %s', e) else: - msg = ('Failed to invoke ata_erase, ' + msg = ('Failed to invoke secure erase, ' 'fallback to shred is not enabled: %s' % e) LOG.error(msg) raise errors.IncompatibleHardwareMethodError(msg) @@ -1617,6 +1643,83 @@ class GenericHardwareManager(HardwareManager): # In SEC1 security state return True + def _is_nvme(self, block_device): + """Check if a block device is a NVMe. + + Checks if the device name indicates that it is an NVMe drive. + + :param block_device: a BlockDevice object + :returns: True if the device is an NVMe, False if it is not. + """ + + return block_device.name.startswith("/dev/nvme") + + def _nvme_erase(self, block_device): + """Attempt to clean the NVMe using the most secure supported method + + :param block_device: a BlockDevice object + :return: True if cleaning operation succeeded, False if it failed + :raises: BlockDeviceEraseError + """ + + # check if crypto format is supported + try: + LOG.debug("Attempting to fetch NVMe capabilities for device %s", + block_device.name) + nvme_info, _e = utils.execute('nvme', 'id-ctrl', + block_device.name, '-o', 'json') + nvme_info = json.loads(nvme_info) + + except processutils.ProcessExecutionError as e: + msg = (("Failed to fetch NVMe capabilities for device {}: {}") + .format(block_device, e)) + LOG.error(msg) + raise errors.BlockDeviceEraseError(msg) + + # execute format with crypto option (ses=2) if supported + # if crypto is unsupported use user-data erase (ses=1) + if nvme_info: + # Check if the device supports NVMe format at all. This info + # is in "oacs" section of nvme-cli id-ctrl output. If it does, + # set format mode to 1 (this is passed as -s parameter + # to nvme-cli later) + fmt_caps = nvme_info['oacs'] + if fmt_caps & NVME_CLI_FORMAT_SUPPORTED_FLAG: + # Given the device supports format, check if crypto + # erase format mode is supported and pass it to nvme-cli + # instead + crypto_caps = nvme_info['fna'] + if crypto_caps & NVME_CLI_CRYPTO_FORMAT_SUPPORTED_FLAG: + format_mode = 2 # crypto erase + else: + format_mode = 1 # user-data erase + else: + msg = ('nvme-cli did not return any supported format modes ' + 'for device: {device}').format( + device=block_device.name) + LOG.error(msg) + raise errors.BlockDeviceEraseError(msg) + else: + # If nvme-cli output is empty, raise an exception + msg = ('nvme-cli did not return any information ' + 'for device: {device}').format(device=block_device.name) + LOG.error(msg) + raise errors.BlockDeviceEraseError(msg) + + try: + LOG.debug("Attempting to nvme-format %s using secure format mode " + "(ses) %s", block_device.name, format_mode) + utils.execute('nvme', 'format', block_device.name, '-s', + format_mode) + LOG.info("nvme-cli format for device %s (ses= %s ) completed " + "successfully.", block_device.name, format_mode) + return True + + except processutils.ProcessExecutionError as e: + msg = (("Failed to nvme format device {}: {}" + ).format(block_device, e)) + raise errors.BlockDeviceEraseError(msg) + def get_bmc_address(self): """Attempt to detect BMC IP address diff --git a/ironic_python_agent/tests/unit/samples/hardware_samples.py b/ironic_python_agent/tests/unit/samples/hardware_samples.py index 3b8b4b40d..9523323d2 100644 --- a/ironic_python_agent/tests/unit/samples/hardware_samples.py +++ b/ironic_python_agent/tests/unit/samples/hardware_samples.py @@ -837,3 +837,447 @@ tmpfs /run/user/1000 tmpfs rw,nosuid,nodev,relatime 0 0 pstore /sys/fs/pstore qstore rw,nosuid,nodev,noexec,relatime 0 0 /dev/loop19 /snap/core/10126 squashfs ro,nodev,relatime 0 0 """) + +NVME_CLI_INFO_TEMPLATE_CRYPTO_SUPPORTED = (""" +{ + "vid" : 5559, + "ssvid" : 5559, + "sn" : "1951B3444502 ", + "mn" : "WDC PC SN730 SDBQNTY-256G-1001 ", + "fr" : "11170101", + "rab" : 4, + "ieee" : 6980, + "cmic" : 0, + "mdts" : 7, + "cntlid" : 8215, + "ver" : 66304, + "rtd3r" : 500000, + "rtd3e" : 1000000, + "oaes" : 512, + "ctratt" : 2, + "rrls" : 0, + "crdt1" : 0, + "crdt2" : 0, + "crdt3" : 0, + "oacs" : 23, + "acl" : 4, + "aerl" : 7, + "frmw" : 20, + "lpa" : 30, + "elpe" : 255, + "npss" : 4, + "avscc" : 1, + "apsta" : 1, + "wctemp" : 357, + "cctemp" : 361, + "mtfa" : 50, + "hmpre" : 0, + "hmmin" : 0, + "tnvmcap" : 256060514304, + "unvmcap" : 0, + "rpmbs" : 0, + "edstt" : 26, + "dsto" : 1, + "fwug" : 1, + "kas" : 0, + "hctma" : 1, + "mntmt" : 273, + "mxtmt" : 357, + "sanicap" : 1610612739, + "hmminds" : 0, + "hmmaxd" : 0, + "nsetidmax" : 0, + "anatt" : 0, + "anacap" : 0, + "anagrpmax" : 0, + "nanagrpid" : 0, + "sqes" : 102, + "cqes" : 68, + "maxcmd" : 0, + "nn" : 1, + "oncs" : 95, + "fuses" : 0, + "fna" : 4, + "vwc" : 7, + "awun" : 0, + "awupf" : 0, + "nvscc" : 1, + "nwpc" : 0, + "acwu" : 0, + "sgls" : 0, + "subnqn" : "nqn.2018-01.com.wdc:guid:E8238FA6BF53-0001-001B444A44C72385", + "ioccsz" : 0, + "iorcsz" : 0, + "icdoff" : 0, + "ctrattr" : 0, + "msdbd" : 0, + "psds" : [ + { + "max_power" : 500, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 0, + "read_lat" : 0, + "write_tput" : 0, + "write_lat" : 0, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 350, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 1, + "read_lat" : 1, + "write_tput" : 1, + "write_lat" : 1, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 300, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 2, + "read_lat" : 2, + "write_tput" : 2, + "write_lat" : 2, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 700, + "flags" : 3, + "entry_lat" : 4000, + "exit_lat" : 10000, + "read_tput" : 3, + "read_lat" : 3, + "write_tput" : 3, + "write_lat" : 3, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 35, + "flags" : 3, + "entry_lat" : 4000, + "exit_lat" : 40000, + "read_tput" : 4, + "read_lat" : 4, + "write_tput" : 4, + "write_lat" : 4, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + } + ] +} +""") + +NVME_CLI_INFO_TEMPLATE_USERDATA_SUPPORTED = (""" +{ + "vid" : 5559, + "ssvid" : 5559, + "sn" : "1951B3444502 ", + "mn" : "WDC PC SN730 SDBQNTY-256G-1001 ", + "fr" : "11170101", + "rab" : 4, + "ieee" : 6980, + "cmic" : 0, + "mdts" : 7, + "cntlid" : 8215, + "ver" : 66304, + "rtd3r" : 500000, + "rtd3e" : 1000000, + "oaes" : 512, + "ctratt" : 2, + "rrls" : 0, + "crdt1" : 0, + "crdt2" : 0, + "crdt3" : 0, + "oacs" : 23, + "acl" : 4, + "aerl" : 7, + "frmw" : 20, + "lpa" : 30, + "elpe" : 255, + "npss" : 4, + "avscc" : 1, + "apsta" : 1, + "wctemp" : 357, + "cctemp" : 361, + "mtfa" : 50, + "hmpre" : 0, + "hmmin" : 0, + "tnvmcap" : 256060514304, + "unvmcap" : 0, + "rpmbs" : 0, + "edstt" : 26, + "dsto" : 1, + "fwug" : 1, + "kas" : 0, + "hctma" : 1, + "mntmt" : 273, + "mxtmt" : 357, + "sanicap" : 1610612739, + "hmminds" : 0, + "hmmaxd" : 0, + "nsetidmax" : 0, + "anatt" : 0, + "anacap" : 0, + "anagrpmax" : 0, + "nanagrpid" : 0, + "sqes" : 102, + "cqes" : 68, + "maxcmd" : 0, + "nn" : 1, + "oncs" : 95, + "fuses" : 0, + "fna" : 0, + "vwc" : 7, + "awun" : 0, + "awupf" : 0, + "nvscc" : 1, + "nwpc" : 0, + "acwu" : 0, + "sgls" : 0, + "subnqn" : "nqn.2018-01.com.wdc:guid:E8238FA6BF53-0001-001B444A44C72385", + "ioccsz" : 0, + "iorcsz" : 0, + "icdoff" : 0, + "ctrattr" : 0, + "msdbd" : 0, + "psds" : [ + { + "max_power" : 500, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 0, + "read_lat" : 0, + "write_tput" : 0, + "write_lat" : 0, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 350, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 1, + "read_lat" : 1, + "write_tput" : 1, + "write_lat" : 1, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 300, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 2, + "read_lat" : 2, + "write_tput" : 2, + "write_lat" : 2, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 700, + "flags" : 3, + "entry_lat" : 4000, + "exit_lat" : 10000, + "read_tput" : 3, + "read_lat" : 3, + "write_tput" : 3, + "write_lat" : 3, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 35, + "flags" : 3, + "entry_lat" : 4000, + "exit_lat" : 40000, + "read_tput" : 4, + "read_lat" : 4, + "write_tput" : 4, + "write_lat" : 4, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + } + ] +} +""") + +NVME_CLI_INFO_TEMPLATE_FORMAT_UNSUPPORTED = (""" +{ + "vid" : 5559, + "ssvid" : 5559, + "sn" : "1951B3444502 ", + "mn" : "WDC PC SN730 SDBQNTY-256G-1001 ", + "fr" : "11170101", + "rab" : 4, + "ieee" : 6980, + "cmic" : 0, + "mdts" : 7, + "cntlid" : 8215, + "ver" : 66304, + "rtd3r" : 500000, + "rtd3e" : 1000000, + "oaes" : 512, + "ctratt" : 2, + "rrls" : 0, + "crdt1" : 0, + "crdt2" : 0, + "crdt3" : 0, + "oacs" : 0, + "acl" : 4, + "aerl" : 7, + "frmw" : 20, + "lpa" : 30, + "elpe" : 255, + "npss" : 4, + "avscc" : 1, + "apsta" : 1, + "wctemp" : 357, + "cctemp" : 361, + "mtfa" : 50, + "hmpre" : 0, + "hmmin" : 0, + "tnvmcap" : 256060514304, + "unvmcap" : 0, + "rpmbs" : 0, + "edstt" : 26, + "dsto" : 1, + "fwug" : 1, + "kas" : 0, + "hctma" : 1, + "mntmt" : 273, + "mxtmt" : 357, + "sanicap" : 1610612739, + "hmminds" : 0, + "hmmaxd" : 0, + "nsetidmax" : 0, + "anatt" : 0, + "anacap" : 0, + "anagrpmax" : 0, + "nanagrpid" : 0, + "sqes" : 102, + "cqes" : 68, + "maxcmd" : 0, + "nn" : 1, + "oncs" : 95, + "fuses" : 0, + "fna" : 0, + "vwc" : 7, + "awun" : 0, + "awupf" : 0, + "nvscc" : 1, + "nwpc" : 0, + "acwu" : 0, + "sgls" : 0, + "subnqn" : "nqn.2018-01.com.wdc:guid:E8238FA6BF53-0001-001B444A44C72385", + "ioccsz" : 0, + "iorcsz" : 0, + "icdoff" : 0, + "ctrattr" : 0, + "msdbd" : 0, + "psds" : [ + { + "max_power" : 500, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 0, + "read_lat" : 0, + "write_tput" : 0, + "write_lat" : 0, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 350, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 1, + "read_lat" : 1, + "write_tput" : 1, + "write_lat" : 1, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 300, + "flags" : 0, + "entry_lat" : 0, + "exit_lat" : 0, + "read_tput" : 2, + "read_lat" : 2, + "write_tput" : 2, + "write_lat" : 2, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 700, + "flags" : 3, + "entry_lat" : 4000, + "exit_lat" : 10000, + "read_tput" : 3, + "read_lat" : 3, + "write_tput" : 3, + "write_lat" : 3, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + }, + { + "max_power" : 35, + "flags" : 3, + "entry_lat" : 4000, + "exit_lat" : 40000, + "read_tput" : 4, + "read_lat" : 4, + "write_tput" : 4, + "write_lat" : 4, + "idle_power" : 0, + "idle_scale" : 0, + "active_power" : 0, + "active_work_scale" : 0 + } + ] +} +""") diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 772815586..bbdfe3af2 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -3862,6 +3862,89 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.assertEqual(hardware.BootInfo(current_boot_mode='uefi'), result) mocked_isdir.assert_called_once_with('/sys/firmware/efi') + @mock.patch.object(hardware.GenericHardwareManager, + '_is_linux_raid_member', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_erase_block_device_nvme_crypto_success(self, mocked_execute, + mocked_raid_member): + info = self.node['driver_internal_info'] + info['agent_enable_nvme_erase'] = True + info['agent_continue_if_secure_erase_failed'] = True + mocked_raid_member.return_value = False + mocked_execute.side_effect = [ + (hws.NVME_CLI_INFO_TEMPLATE_CRYPTO_SUPPORTED, ''), + ('', ''), + ] + + block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk", + 1073741824, False) + retval = self.hardware._nvme_erase(block_device) + mocked_execute.assert_has_calls([ + mock.call('nvme', 'id-ctrl', '/dev/nvme0n1', '-o', 'json'), + mock.call('nvme', 'format', '/dev/nvme0n1', '-s', 2), + ]) + + self.assertTrue(retval) + + @mock.patch.object(hardware.GenericHardwareManager, + '_is_linux_raid_member', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_erase_block_device_nvme_userdata_success(self, mocked_execute, + mocked_raid_member): + info = self.node['driver_internal_info'] + info['agent_enable_nvme_erase'] = True + info['agent_continue_if_secure_erase_failed'] = True + mocked_raid_member.return_value = False + mocked_execute.side_effect = [ + (hws.NVME_CLI_INFO_TEMPLATE_USERDATA_SUPPORTED, ''), + ('', ''), + ] + + block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk", + 1073741824, False) + retval = self.hardware._nvme_erase(block_device) + mocked_execute.assert_has_calls([ + mock.call('nvme', 'id-ctrl', '/dev/nvme0n1', '-o', 'json'), + mock.call('nvme', 'format', '/dev/nvme0n1', '-s', 1), + ]) + + self.assertTrue(retval) + + @mock.patch.object(hardware.GenericHardwareManager, + '_is_linux_raid_member', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_erase_block_device_nvme_failed(self, mocked_execute, + mocked_raid_member): + info = self.node['driver_internal_info'] + info['agent_enable_nvme_erase'] = True + mocked_raid_member.return_value = False + mocked_execute.side_effect = [ + (hws.NVME_CLI_INFO_TEMPLATE_CRYPTO_SUPPORTED, ''), + (processutils.ProcessExecutionError()), + ] + + block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk", + 1073741824, False) + self.assertRaises(errors.BlockDeviceEraseError, + self.hardware._nvme_erase, block_device) + + @mock.patch.object(hardware.GenericHardwareManager, + '_is_linux_raid_member', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_erase_block_device_nvme_format_unsupported(self, mocked_execute, + mocked_raid_member): + info = self.node['driver_internal_info'] + info['agent_enable_nvme_erase'] = True + mocked_raid_member.return_value = False + mocked_execute.side_effect = [ + (hws.NVME_CLI_INFO_TEMPLATE_FORMAT_UNSUPPORTED, ''), + ] + + block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk", + 1073741824, False) + self.assertRaises(errors.BlockDeviceEraseError, + self.hardware._nvme_erase, block_device) + @mock.patch.object(hardware.GenericHardwareManager, 'get_os_install_device', autospec=True) diff --git a/releasenotes/notes/adds-nvme-secure-erase-0ecfd624e5f50581.yaml b/releasenotes/notes/adds-nvme-secure-erase-0ecfd624e5f50581.yaml new file mode 100644 index 000000000..bd1441ee0 --- /dev/null +++ b/releasenotes/notes/adds-nvme-secure-erase-0ecfd624e5f50581.yaml @@ -0,0 +1,8 @@ +--- +features: + - Adds support for NVMe-specific storage cleaning to IPA. Currently this is + implemented by using nvme-cli format functionality. Crypto Erase is used + if supported by the device, otherwise the code falls back to User Data + Erase. The operators can control NVMe cleaning by using + deploy.enable_nvme_erase config option which controls + ``agent_enable_nvme_erase`` internal setting in driver_internal_info.