Add support for using NVMe specific cleaning

This change adds support for utilising NVMe specific cleaning tools
on supported devices. This will remove the neccessity of using shred to
securely delete the contents of a NVMe drive and enable using nvme-cli
tools instead, improving cleaning performance and reducing wear on the device.

Story: 2008290
Task: 41168
Change-Id: I2f63db9b739e53699bd5f164b79640927bf757d7
This commit is contained in:
Jacob Anders 2021-01-22 10:20:15 +10:00
parent a35761c21f
commit 8bcf1be920
4 changed files with 646 additions and 8 deletions

View File

@ -59,6 +59,8 @@ API_CLIENT = None
API_LOOKUP_TIMEOUT = None
API_LOOKUP_INTERVAL = None
SUPPORTED_SOFTWARE_RAID_LEVELS = frozenset(['0', '1', '1+0', '5', '6'])
NVME_CLI_FORMAT_SUPPORTED_FLAG = 0b10
NVME_CLI_CRYPTO_FORMAT_SUPPORTED_FLAG = 0b100
RAID_APPLY_CONFIGURATION_ARGSINFO = {
"raid_config": {
@ -1285,18 +1287,42 @@ class GenericHardwareManager(HardwareManager):
# Note(TheJulia) Use try/except to capture and log the failure
# and then revert to attempting to shred the volume if enabled.
try:
execute_secure_erase = info.get(
'agent_enable_ata_secure_erase', True)
if execute_secure_erase and self._ata_erase(block_device):
return
if self._is_nvme(block_device):
execute_nvme_erase = info.get(
'agent_enable_nvme_secure_erase', True)
if execute_nvme_erase and self._nvme_erase(block_device):
return
else:
execute_secure_erase = info.get(
'agent_enable_ata_secure_erase', True)
if execute_secure_erase and self._ata_erase(block_device):
return
except errors.BlockDeviceEraseError as e:
execute_shred = info.get(
'agent_continue_if_ata_erase_failed', False)
execute_shred = info.get('agent_continue_if_secure_erase_failed')
# NOTE(janders) While we are deprecating
# ``driver_internal_info['agent_continue_if_ata_erase_failed']``
# names check for both ``agent_continue_if_secure_erase_failed``
# and ``agent_continue_if_ata_erase_failed``.
# This is to ensure interoperability between newer Ironic Python
# Agent images and older Ironic API services.
# In future releases, 'False' default value needs to be added to
# the info.get call above and the code below can be removed.
# If we're dealing with new-IPA and old-API scenario, NVMe secure
# erase should not be attempted due to absence of
# ``[deploy]/enable_nvme_secure_erase`` config option so
# ``agent_continue_if_ata_erase_failed`` is not misleading here
# as it will only apply to ATA Secure Erase.
if execute_shred is None:
execute_shred = info.get('agent_continue_if_ata_erase_failed',
False)
if execute_shred:
LOG.warning('Failed to invoke ata_erase, '
LOG.warning('Failed to invoke secure erase, '
'falling back to shred: %s', e)
else:
msg = ('Failed to invoke ata_erase, '
msg = ('Failed to invoke secure erase, '
'fallback to shred is not enabled: %s' % e)
LOG.error(msg)
raise errors.IncompatibleHardwareMethodError(msg)
@ -1617,6 +1643,83 @@ class GenericHardwareManager(HardwareManager):
# In SEC1 security state
return True
def _is_nvme(self, block_device):
"""Check if a block device is a NVMe.
Checks if the device name indicates that it is an NVMe drive.
:param block_device: a BlockDevice object
:returns: True if the device is an NVMe, False if it is not.
"""
return block_device.name.startswith("/dev/nvme")
def _nvme_erase(self, block_device):
"""Attempt to clean the NVMe using the most secure supported method
:param block_device: a BlockDevice object
:return: True if cleaning operation succeeded, False if it failed
:raises: BlockDeviceEraseError
"""
# check if crypto format is supported
try:
LOG.debug("Attempting to fetch NVMe capabilities for device %s",
block_device.name)
nvme_info, _e = utils.execute('nvme', 'id-ctrl',
block_device.name, '-o', 'json')
nvme_info = json.loads(nvme_info)
except processutils.ProcessExecutionError as e:
msg = (("Failed to fetch NVMe capabilities for device {}: {}")
.format(block_device, e))
LOG.error(msg)
raise errors.BlockDeviceEraseError(msg)
# execute format with crypto option (ses=2) if supported
# if crypto is unsupported use user-data erase (ses=1)
if nvme_info:
# Check if the device supports NVMe format at all. This info
# is in "oacs" section of nvme-cli id-ctrl output. If it does,
# set format mode to 1 (this is passed as -s <mode> parameter
# to nvme-cli later)
fmt_caps = nvme_info['oacs']
if fmt_caps & NVME_CLI_FORMAT_SUPPORTED_FLAG:
# Given the device supports format, check if crypto
# erase format mode is supported and pass it to nvme-cli
# instead
crypto_caps = nvme_info['fna']
if crypto_caps & NVME_CLI_CRYPTO_FORMAT_SUPPORTED_FLAG:
format_mode = 2 # crypto erase
else:
format_mode = 1 # user-data erase
else:
msg = ('nvme-cli did not return any supported format modes '
'for device: {device}').format(
device=block_device.name)
LOG.error(msg)
raise errors.BlockDeviceEraseError(msg)
else:
# If nvme-cli output is empty, raise an exception
msg = ('nvme-cli did not return any information '
'for device: {device}').format(device=block_device.name)
LOG.error(msg)
raise errors.BlockDeviceEraseError(msg)
try:
LOG.debug("Attempting to nvme-format %s using secure format mode "
"(ses) %s", block_device.name, format_mode)
utils.execute('nvme', 'format', block_device.name, '-s',
format_mode)
LOG.info("nvme-cli format for device %s (ses= %s ) completed "
"successfully.", block_device.name, format_mode)
return True
except processutils.ProcessExecutionError as e:
msg = (("Failed to nvme format device {}: {}"
).format(block_device, e))
raise errors.BlockDeviceEraseError(msg)
def get_bmc_address(self):
"""Attempt to detect BMC IP address

View File

@ -837,3 +837,447 @@ tmpfs /run/user/1000 tmpfs rw,nosuid,nodev,relatime 0 0
pstore /sys/fs/pstore qstore rw,nosuid,nodev,noexec,relatime 0 0
/dev/loop19 /snap/core/10126 squashfs ro,nodev,relatime 0 0
""")
NVME_CLI_INFO_TEMPLATE_CRYPTO_SUPPORTED = ("""
{
"vid" : 5559,
"ssvid" : 5559,
"sn" : "1951B3444502 ",
"mn" : "WDC PC SN730 SDBQNTY-256G-1001 ",
"fr" : "11170101",
"rab" : 4,
"ieee" : 6980,
"cmic" : 0,
"mdts" : 7,
"cntlid" : 8215,
"ver" : 66304,
"rtd3r" : 500000,
"rtd3e" : 1000000,
"oaes" : 512,
"ctratt" : 2,
"rrls" : 0,
"crdt1" : 0,
"crdt2" : 0,
"crdt3" : 0,
"oacs" : 23,
"acl" : 4,
"aerl" : 7,
"frmw" : 20,
"lpa" : 30,
"elpe" : 255,
"npss" : 4,
"avscc" : 1,
"apsta" : 1,
"wctemp" : 357,
"cctemp" : 361,
"mtfa" : 50,
"hmpre" : 0,
"hmmin" : 0,
"tnvmcap" : 256060514304,
"unvmcap" : 0,
"rpmbs" : 0,
"edstt" : 26,
"dsto" : 1,
"fwug" : 1,
"kas" : 0,
"hctma" : 1,
"mntmt" : 273,
"mxtmt" : 357,
"sanicap" : 1610612739,
"hmminds" : 0,
"hmmaxd" : 0,
"nsetidmax" : 0,
"anatt" : 0,
"anacap" : 0,
"anagrpmax" : 0,
"nanagrpid" : 0,
"sqes" : 102,
"cqes" : 68,
"maxcmd" : 0,
"nn" : 1,
"oncs" : 95,
"fuses" : 0,
"fna" : 4,
"vwc" : 7,
"awun" : 0,
"awupf" : 0,
"nvscc" : 1,
"nwpc" : 0,
"acwu" : 0,
"sgls" : 0,
"subnqn" : "nqn.2018-01.com.wdc:guid:E8238FA6BF53-0001-001B444A44C72385",
"ioccsz" : 0,
"iorcsz" : 0,
"icdoff" : 0,
"ctrattr" : 0,
"msdbd" : 0,
"psds" : [
{
"max_power" : 500,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 0,
"read_lat" : 0,
"write_tput" : 0,
"write_lat" : 0,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 350,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 1,
"read_lat" : 1,
"write_tput" : 1,
"write_lat" : 1,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 300,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 2,
"read_lat" : 2,
"write_tput" : 2,
"write_lat" : 2,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 700,
"flags" : 3,
"entry_lat" : 4000,
"exit_lat" : 10000,
"read_tput" : 3,
"read_lat" : 3,
"write_tput" : 3,
"write_lat" : 3,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 35,
"flags" : 3,
"entry_lat" : 4000,
"exit_lat" : 40000,
"read_tput" : 4,
"read_lat" : 4,
"write_tput" : 4,
"write_lat" : 4,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
}
]
}
""")
NVME_CLI_INFO_TEMPLATE_USERDATA_SUPPORTED = ("""
{
"vid" : 5559,
"ssvid" : 5559,
"sn" : "1951B3444502 ",
"mn" : "WDC PC SN730 SDBQNTY-256G-1001 ",
"fr" : "11170101",
"rab" : 4,
"ieee" : 6980,
"cmic" : 0,
"mdts" : 7,
"cntlid" : 8215,
"ver" : 66304,
"rtd3r" : 500000,
"rtd3e" : 1000000,
"oaes" : 512,
"ctratt" : 2,
"rrls" : 0,
"crdt1" : 0,
"crdt2" : 0,
"crdt3" : 0,
"oacs" : 23,
"acl" : 4,
"aerl" : 7,
"frmw" : 20,
"lpa" : 30,
"elpe" : 255,
"npss" : 4,
"avscc" : 1,
"apsta" : 1,
"wctemp" : 357,
"cctemp" : 361,
"mtfa" : 50,
"hmpre" : 0,
"hmmin" : 0,
"tnvmcap" : 256060514304,
"unvmcap" : 0,
"rpmbs" : 0,
"edstt" : 26,
"dsto" : 1,
"fwug" : 1,
"kas" : 0,
"hctma" : 1,
"mntmt" : 273,
"mxtmt" : 357,
"sanicap" : 1610612739,
"hmminds" : 0,
"hmmaxd" : 0,
"nsetidmax" : 0,
"anatt" : 0,
"anacap" : 0,
"anagrpmax" : 0,
"nanagrpid" : 0,
"sqes" : 102,
"cqes" : 68,
"maxcmd" : 0,
"nn" : 1,
"oncs" : 95,
"fuses" : 0,
"fna" : 0,
"vwc" : 7,
"awun" : 0,
"awupf" : 0,
"nvscc" : 1,
"nwpc" : 0,
"acwu" : 0,
"sgls" : 0,
"subnqn" : "nqn.2018-01.com.wdc:guid:E8238FA6BF53-0001-001B444A44C72385",
"ioccsz" : 0,
"iorcsz" : 0,
"icdoff" : 0,
"ctrattr" : 0,
"msdbd" : 0,
"psds" : [
{
"max_power" : 500,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 0,
"read_lat" : 0,
"write_tput" : 0,
"write_lat" : 0,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 350,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 1,
"read_lat" : 1,
"write_tput" : 1,
"write_lat" : 1,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 300,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 2,
"read_lat" : 2,
"write_tput" : 2,
"write_lat" : 2,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 700,
"flags" : 3,
"entry_lat" : 4000,
"exit_lat" : 10000,
"read_tput" : 3,
"read_lat" : 3,
"write_tput" : 3,
"write_lat" : 3,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 35,
"flags" : 3,
"entry_lat" : 4000,
"exit_lat" : 40000,
"read_tput" : 4,
"read_lat" : 4,
"write_tput" : 4,
"write_lat" : 4,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
}
]
}
""")
NVME_CLI_INFO_TEMPLATE_FORMAT_UNSUPPORTED = ("""
{
"vid" : 5559,
"ssvid" : 5559,
"sn" : "1951B3444502 ",
"mn" : "WDC PC SN730 SDBQNTY-256G-1001 ",
"fr" : "11170101",
"rab" : 4,
"ieee" : 6980,
"cmic" : 0,
"mdts" : 7,
"cntlid" : 8215,
"ver" : 66304,
"rtd3r" : 500000,
"rtd3e" : 1000000,
"oaes" : 512,
"ctratt" : 2,
"rrls" : 0,
"crdt1" : 0,
"crdt2" : 0,
"crdt3" : 0,
"oacs" : 0,
"acl" : 4,
"aerl" : 7,
"frmw" : 20,
"lpa" : 30,
"elpe" : 255,
"npss" : 4,
"avscc" : 1,
"apsta" : 1,
"wctemp" : 357,
"cctemp" : 361,
"mtfa" : 50,
"hmpre" : 0,
"hmmin" : 0,
"tnvmcap" : 256060514304,
"unvmcap" : 0,
"rpmbs" : 0,
"edstt" : 26,
"dsto" : 1,
"fwug" : 1,
"kas" : 0,
"hctma" : 1,
"mntmt" : 273,
"mxtmt" : 357,
"sanicap" : 1610612739,
"hmminds" : 0,
"hmmaxd" : 0,
"nsetidmax" : 0,
"anatt" : 0,
"anacap" : 0,
"anagrpmax" : 0,
"nanagrpid" : 0,
"sqes" : 102,
"cqes" : 68,
"maxcmd" : 0,
"nn" : 1,
"oncs" : 95,
"fuses" : 0,
"fna" : 0,
"vwc" : 7,
"awun" : 0,
"awupf" : 0,
"nvscc" : 1,
"nwpc" : 0,
"acwu" : 0,
"sgls" : 0,
"subnqn" : "nqn.2018-01.com.wdc:guid:E8238FA6BF53-0001-001B444A44C72385",
"ioccsz" : 0,
"iorcsz" : 0,
"icdoff" : 0,
"ctrattr" : 0,
"msdbd" : 0,
"psds" : [
{
"max_power" : 500,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 0,
"read_lat" : 0,
"write_tput" : 0,
"write_lat" : 0,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 350,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 1,
"read_lat" : 1,
"write_tput" : 1,
"write_lat" : 1,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 300,
"flags" : 0,
"entry_lat" : 0,
"exit_lat" : 0,
"read_tput" : 2,
"read_lat" : 2,
"write_tput" : 2,
"write_lat" : 2,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 700,
"flags" : 3,
"entry_lat" : 4000,
"exit_lat" : 10000,
"read_tput" : 3,
"read_lat" : 3,
"write_tput" : 3,
"write_lat" : 3,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
},
{
"max_power" : 35,
"flags" : 3,
"entry_lat" : 4000,
"exit_lat" : 40000,
"read_tput" : 4,
"read_lat" : 4,
"write_tput" : 4,
"write_lat" : 4,
"idle_power" : 0,
"idle_scale" : 0,
"active_power" : 0,
"active_work_scale" : 0
}
]
}
""")

View File

@ -3862,6 +3862,89 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.assertEqual(hardware.BootInfo(current_boot_mode='uefi'), result)
mocked_isdir.assert_called_once_with('/sys/firmware/efi')
@mock.patch.object(hardware.GenericHardwareManager,
'_is_linux_raid_member', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_erase_block_device_nvme_crypto_success(self, mocked_execute,
mocked_raid_member):
info = self.node['driver_internal_info']
info['agent_enable_nvme_erase'] = True
info['agent_continue_if_secure_erase_failed'] = True
mocked_raid_member.return_value = False
mocked_execute.side_effect = [
(hws.NVME_CLI_INFO_TEMPLATE_CRYPTO_SUPPORTED, ''),
('', ''),
]
block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk",
1073741824, False)
retval = self.hardware._nvme_erase(block_device)
mocked_execute.assert_has_calls([
mock.call('nvme', 'id-ctrl', '/dev/nvme0n1', '-o', 'json'),
mock.call('nvme', 'format', '/dev/nvme0n1', '-s', 2),
])
self.assertTrue(retval)
@mock.patch.object(hardware.GenericHardwareManager,
'_is_linux_raid_member', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_erase_block_device_nvme_userdata_success(self, mocked_execute,
mocked_raid_member):
info = self.node['driver_internal_info']
info['agent_enable_nvme_erase'] = True
info['agent_continue_if_secure_erase_failed'] = True
mocked_raid_member.return_value = False
mocked_execute.side_effect = [
(hws.NVME_CLI_INFO_TEMPLATE_USERDATA_SUPPORTED, ''),
('', ''),
]
block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk",
1073741824, False)
retval = self.hardware._nvme_erase(block_device)
mocked_execute.assert_has_calls([
mock.call('nvme', 'id-ctrl', '/dev/nvme0n1', '-o', 'json'),
mock.call('nvme', 'format', '/dev/nvme0n1', '-s', 1),
])
self.assertTrue(retval)
@mock.patch.object(hardware.GenericHardwareManager,
'_is_linux_raid_member', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_erase_block_device_nvme_failed(self, mocked_execute,
mocked_raid_member):
info = self.node['driver_internal_info']
info['agent_enable_nvme_erase'] = True
mocked_raid_member.return_value = False
mocked_execute.side_effect = [
(hws.NVME_CLI_INFO_TEMPLATE_CRYPTO_SUPPORTED, ''),
(processutils.ProcessExecutionError()),
]
block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk",
1073741824, False)
self.assertRaises(errors.BlockDeviceEraseError,
self.hardware._nvme_erase, block_device)
@mock.patch.object(hardware.GenericHardwareManager,
'_is_linux_raid_member', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_erase_block_device_nvme_format_unsupported(self, mocked_execute,
mocked_raid_member):
info = self.node['driver_internal_info']
info['agent_enable_nvme_erase'] = True
mocked_raid_member.return_value = False
mocked_execute.side_effect = [
(hws.NVME_CLI_INFO_TEMPLATE_FORMAT_UNSUPPORTED, ''),
]
block_device = hardware.BlockDevice('/dev/nvme0n1', "testdisk",
1073741824, False)
self.assertRaises(errors.BlockDeviceEraseError,
self.hardware._nvme_erase, block_device)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)

View File

@ -0,0 +1,8 @@
---
features:
- Adds support for NVMe-specific storage cleaning to IPA. Currently this is
implemented by using nvme-cli format functionality. Crypto Erase is used
if supported by the device, otherwise the code falls back to User Data
Erase. The operators can control NVMe cleaning by using
deploy.enable_nvme_erase config option which controls
``agent_enable_nvme_erase`` internal setting in driver_internal_info.