Out-of-band `erase_devices` clean step for Proliant Servers

This commit adds new management interface `ilo5` to support an
out-of-band clean step for sanitize disk erase on ilo5 based
Proliant Servers.

Change-Id: I7b9fc6add6f6a616bdee4d94cb577e7a27a794e5
Story: #2006394
Task: #36269
This commit is contained in:
paresh-sao 2019-03-07 09:09:46 +00:00 committed by kesper
parent 9bbf5115c1
commit c0799c95ed
9 changed files with 415 additions and 3 deletions

View File

@ -4,7 +4,7 @@
# python projects they should package as optional dependencies for Ironic.
# These are available on pypi
proliantutils>=2.7.0
proliantutils>=2.9.0
pysnmp>=4.3.0,<5.0.0
python-scciclient>=0.8.0
python-dracclient>=3.0.0,<4.0.0

View File

@ -66,6 +66,13 @@ opts = [
default=2,
help=_('Amount of time in seconds to wait in between power '
'operations')),
cfg.IntOpt('oob_erase_devices_job_status_interval',
min=10,
default=300,
help=_('Interval (in seconds) between periodic erase-devices '
'status checks to determine whether the asynchronous '
'out-of-band erase-devices was successfully finished or '
'not.')),
cfg.StrOpt('ca_file',
help=_('CA certificate file to validate iLO.')),
cfg.StrOpt('default_boot_mode',

View File

@ -82,3 +82,8 @@ class Ilo5Hardware(IloHardware):
def supported_raid_interfaces(self):
"""List of supported raid interfaces."""
return [raid.Ilo5RAID, noop.NoRAID]
@property
def supported_management_interfaces(self):
"""List of supported management interfaces."""
return [management.Ilo5Management]

View File

@ -17,6 +17,7 @@ iLO Management Interface
from ironic_lib import metrics_utils
from oslo_log import log as logging
from oslo_service import loopingcall
from oslo_utils import excutils
from oslo_utils import importutils
import six
@ -25,7 +26,9 @@ import six.moves.urllib.parse as urlparse
from ironic.common import boot_devices
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
from ironic.drivers import base
from ironic.drivers.modules import agent_base_vendor
@ -614,3 +617,163 @@ class IloManagement(base.ManagementInterface):
except ilo_error.IloError as ilo_exception:
raise exception.IloOperationError(operation=operation,
error=ilo_exception)
class Ilo5Management(IloManagement):
def _set_driver_internal_value(self, task, value, *keys):
driver_internal_info = task.node.driver_internal_info
for key in keys:
driver_internal_info[key] = value
task.node.driver_internal_info = driver_internal_info
task.node.save()
def _pop_driver_internal_values(self, task, *keys):
driver_internal_info = task.node.driver_internal_info
for key in keys:
driver_internal_info.pop(key, None)
task.node.driver_internal_info = driver_internal_info
task.node.save()
def _set_clean_failed(self, task, msg):
LOG.error("Out-of-band sanitize disk erase job failed for node "
"%(node)s. Message: '%(message)s'.",
{'node': task.node.uuid, 'message': msg})
task.node.last_error = msg
task.process_event('fail')
def _wait_for_disk_erase_status(self, node):
"""Wait for out-of-band sanitize disk erase to be completed."""
interval = CONF.ilo.oob_erase_devices_job_status_interval
ilo_object = ilo_common.get_ilo_object(node)
time_elps = [0]
# This will loop indefinitely till disk erase is complete
def _wait():
if ilo_object.has_disk_erase_completed():
raise loopingcall.LoopingCallDone()
time_elps[0] += interval
LOG.debug("%(tim)s secs elapsed while waiting for out-of-band "
"sanitize disk erase to complete for node %(node)s.",
{'tim': time_elps[0], 'node': node.uuid})
# Start a timer and wait for the operation to complete.
timer = loopingcall.FixedIntervalLoopingCall(_wait)
timer.start(interval=interval).wait()
return True
def _validate_erase_pattern(self, erase_pattern, node):
invalid = False
if isinstance(erase_pattern, dict):
for device_type, pattern in erase_pattern.items():
if device_type == 'hdd' and pattern in (
'overwrite', 'crypto', 'zero'):
continue
elif device_type == 'ssd' and pattern in (
'block', 'crypto', 'zero'):
continue
else:
invalid = True
break
else:
invalid = True
if invalid:
msg = (_("Erase pattern '%(value)s' is invalid. Clean step "
"'erase_devices' is not executed for %(node)s. Supported "
"patterns are, for "
"'hdd': ('overwrite', 'crypto', 'zero') and for "
"'ssd': ('block', 'crypto', 'zero'). "
"Ex. {'hdd': 'overwrite', 'ssd': 'block'}")
% {'value': erase_pattern, 'node': node.uuid})
LOG.error(msg)
raise exception.InvalidParameterValue(msg)
@METRICS.timer('Ilo5Management.erase_devices')
@base.clean_step(priority=0, abortable=False, argsinfo={
'erase_pattern': {
'description': (
'Dictionary of disk type and corresponding erase pattern '
'to be used to perform specific out-of-band sanitize disk '
'erase. Supported values are, '
'for "hdd": ("overwrite", "crypto", "zero"), '
'for "ssd": ("block", "crypto", "zero"). Default pattern is: '
'{"hdd": "overwrite", "ssd": "block"}.'
),
'required': False
}
})
def erase_devices(self, task, **kwargs):
"""Erase all the drives on the node.
This method performs out-of-band sanitize disk erase on all the
supported physical drives in the node. This erase cannot be performed
on logical drives.
:param task: a TaskManager instance.
:raises: InvalidParameterValue, if any of the arguments are invalid.
:raises: IloError on an error from iLO.
"""
erase_pattern = kwargs.get('erase_pattern',
{'hdd': 'overwrite', 'ssd': 'block'})
node = task.node
self._validate_erase_pattern(erase_pattern, node)
driver_internal_info = node.driver_internal_info
LOG.debug("Calling out-of-band sanitize disk erase for node %(node)s",
{'node': node.uuid})
try:
ilo_object = ilo_common.get_ilo_object(node)
disk_types = ilo_object.get_available_disk_types()
LOG.info("Disk type detected are: %(disk_types)s. Sanitize disk "
"erase are now exercised for one after another disk type "
"for node %(node)s.",
{'disk_types': disk_types, 'node': node.uuid})
if disk_types:
# First disk-erase will execute for HDD's and after reboot only
# try for SSD, since both share same redfish api and would be
# overwritten.
if not driver_internal_info.get(
'ilo_disk_erase_hdd_check') and ('HDD' in disk_types):
ilo_object.do_disk_erase('HDD', erase_pattern.get('hdd'))
self._set_driver_internal_value(
task, True, 'cleaning_reboot',
'ilo_disk_erase_hdd_check')
self._set_driver_internal_value(
task, False, 'skip_current_clean_step')
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return states.CLEANWAIT
if not driver_internal_info.get(
'ilo_disk_erase_ssd_check') and ('SSD' in disk_types):
ilo_object.do_disk_erase('SSD', erase_pattern.get('ssd'))
self._set_driver_internal_value(
task, True, 'ilo_disk_erase_hdd_check',
'ilo_disk_erase_ssd_check', 'cleaning_reboot')
self._set_driver_internal_value(
task, False, 'skip_current_clean_step')
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return states.CLEANWAIT
# It will wait until disk erase will complete
if self._wait_for_disk_erase_status(task.node):
LOG.info("For node %(uuid)s erase_devices clean "
"step is done.", {'uuid': task.node.uuid})
self._pop_driver_internal_values(
task, 'ilo_disk_erase_hdd_check',
'ilo_disk_erase_ssd_check')
else:
LOG.info("No drive found to perform out-of-band sanitize "
"disk erase for node %(node)s", {'node': node.uuid})
except ilo_error.IloError as ilo_exception:
self._pop_driver_internal_values(task,
'ilo_disk_erase_hdd_check',
'ilo_disk_erase_ssd_check',
'cleaning_reboot',
'skip_current_clean_step')
self._set_clean_failed(task, ilo_exception)

View File

@ -22,16 +22,21 @@ from ironic.common import boot_devices
from ironic.common import exception
from ironic.common import states
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules.ilo import common as ilo_common
from ironic.drivers.modules.ilo import management as ilo_management
from ironic.drivers.modules import ipmitool
from ironic.drivers import utils as driver_utils
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
from ironic.tests.unit.drivers.modules.ilo import test_common
from ironic.tests.unit.objects import utils as obj_utils
ilo_error = importutils.try_import('proliantutils.exception')
INFO_DICT = db_utils.get_test_ilo_info()
class IloManagementTestCase(test_common.BaseIloTest):
@ -897,3 +902,219 @@ class IloManagementTestCase(test_common.BaseIloTest):
self.assertRaises(exception.IloOperationNotSupported,
task.driver.management.inject_nmi,
task)
class Ilo5ManagementTestCase(db_base.DbTestCase):
def setUp(self):
super(Ilo5ManagementTestCase, self).setUp()
self.driver = mock.Mock(management=ilo_management.Ilo5Management())
self.clean_step = {'step': 'erase_devices',
'interface': 'management'}
n = {
'driver': 'ilo5',
'driver_info': INFO_DICT,
'clean_step': self.clean_step,
}
self.config(enabled_hardware_types=['ilo5'],
enabled_boot_interfaces=['ilo-virtual-media'],
enabled_console_interfaces=['ilo'],
enabled_deploy_interfaces=['iscsi'],
enabled_inspect_interfaces=['ilo'],
enabled_management_interfaces=['ilo5'],
enabled_power_interfaces=['ilo'],
enabled_raid_interfaces=['ilo5'])
self.node = obj_utils.create_test_node(self.context, **n)
@mock.patch.object(deploy_utils, 'build_agent_options',
autospec=True)
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_erase_devices_hdd(self, mock_power, ilo_mock, build_agent_mock):
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = ['HDD']
build_agent_mock.return_value = []
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
result = task.driver.management.erase_devices(task)
self.assertTrue(
task.node.driver_internal_info.get(
'ilo_disk_erase_hdd_check'))
self.assertTrue(
task.node.driver_internal_info.get(
'cleaning_reboot'))
self.assertFalse(
task.node.driver_internal_info.get(
'skip_current_clean_step'))
ilo_mock_object.do_disk_erase.assert_called_once_with(
'HDD', 'overwrite')
self.assertEqual(states.CLEANWAIT, result)
mock_power.assert_called_once_with(task, states.REBOOT)
@mock.patch.object(deploy_utils, 'build_agent_options',
autospec=True)
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_erase_devices_ssd(self, mock_power, ilo_mock, build_agent_mock):
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = ['SSD']
build_agent_mock.return_value = []
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
result = task.driver.management.erase_devices(task)
self.assertTrue(
task.node.driver_internal_info.get(
'ilo_disk_erase_ssd_check'))
self.assertTrue(
task.node.driver_internal_info.get(
'ilo_disk_erase_hdd_check'))
self.assertTrue(
task.node.driver_internal_info.get(
'cleaning_reboot'))
self.assertFalse(
task.node.driver_internal_info.get(
'skip_current_clean_step'))
ilo_mock_object.do_disk_erase.assert_called_once_with(
'SSD', 'block')
self.assertEqual(states.CLEANWAIT, result)
mock_power.assert_called_once_with(task, states.REBOOT)
@mock.patch.object(deploy_utils, 'build_agent_options',
autospec=True)
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_erase_devices_ssd_when_hdd_done(self, mock_power, ilo_mock,
build_agent_mock):
build_agent_mock.return_value = []
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = ['HDD', 'SSD']
self.node.driver_internal_info = {'ilo_disk_erase_hdd_check': True}
self.node.save()
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
result = task.driver.management.erase_devices(task)
self.assertTrue(
task.node.driver_internal_info.get(
'ilo_disk_erase_hdd_check'))
self.assertTrue(
task.node.driver_internal_info.get(
'ilo_disk_erase_ssd_check'))
self.assertTrue(
task.node.driver_internal_info.get(
'cleaning_reboot'))
self.assertFalse(
task.node.driver_internal_info.get(
'skip_current_clean_step'))
ilo_mock_object.do_disk_erase.assert_called_once_with(
'SSD', 'block')
self.assertEqual(states.CLEANWAIT, result)
mock_power.assert_called_once_with(task, states.REBOOT)
@mock.patch.object(ilo_management.LOG, 'info')
@mock.patch.object(ilo_management.Ilo5Management,
'_wait_for_disk_erase_status', autospec=True)
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
def test_erase_devices_completed(self, ilo_mock, disk_status_mock,
log_mock):
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = ['HDD', 'SSD']
disk_status_mock.return_value = True
self.node.driver_internal_info = {'ilo_disk_erase_hdd_check': True,
'ilo_disk_erase_ssd_check': True}
self.node.save()
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.management.erase_devices(task)
self.assertFalse(
task.node.driver_internal_info.get(
'ilo_disk_erase_hdd_check'))
self.assertFalse(
task.node.driver_internal_info.get(
'ilo_disk_erase_hdd_check'))
self.assertTrue(log_mock.called)
@mock.patch.object(deploy_utils, 'build_agent_options',
autospec=True)
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_erase_devices_hdd_with_erase_pattern_zero(
self, mock_power, ilo_mock, build_agent_mock):
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = ['HDD']
build_agent_mock.return_value = []
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
result = task.driver.management.erase_devices(
task, erase_pattern={'hdd': 'zero', 'ssd': 'zero'})
self.assertTrue(
task.node.driver_internal_info.get(
'ilo_disk_erase_hdd_check'))
self.assertTrue(
task.node.driver_internal_info.get(
'cleaning_reboot'))
self.assertFalse(
task.node.driver_internal_info.get(
'skip_current_clean_step'))
ilo_mock_object.do_disk_erase.assert_called_once_with(
'HDD', 'zero')
self.assertEqual(states.CLEANWAIT, result)
mock_power.assert_called_once_with(task, states.REBOOT)
@mock.patch.object(ilo_management.LOG, 'info')
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
def test_erase_devices_when_no_drive_available(
self, ilo_mock, log_mock):
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = []
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.management.erase_devices(task)
self.assertTrue(log_mock.called)
def test_erase_devices_hdd_with_invalid_format_erase_pattern(
self):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
self.assertRaises(exception.InvalidParameterValue,
task.driver.management.erase_devices,
task, erase_pattern=123)
def test_erase_devices_hdd_with_invalid_device_type_erase_pattern(
self):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
self.assertRaises(exception.InvalidParameterValue,
task.driver.management.erase_devices,
task, erase_pattern={'xyz': 'block'})
def test_erase_devices_hdd_with_invalid_erase_pattern(
self):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
self.assertRaises(exception.InvalidParameterValue,
task.driver.management.erase_devices,
task, erase_pattern={'ssd': 'xyz'})
@mock.patch.object(ilo_common, 'get_ilo_object', autospec=True)
@mock.patch.object(ilo_management.Ilo5Management, '_set_clean_failed')
def test_erase_devices_hdd_ilo_error(self, set_clean_failed_mock,
ilo_mock):
ilo_mock_object = ilo_mock.return_value
ilo_mock_object.get_available_disk_types.return_value = ['HDD']
exc = ilo_error.IloError('error')
ilo_mock_object.do_disk_erase.side_effect = exc
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.management.erase_devices(task)
ilo_mock_object.do_disk_erase.assert_called_once_with(
'HDD', 'overwrite')
self.assertNotIn('ilo_disk_erase_hdd_check',
task.node.driver_internal_info)
self.assertNotIn('ilo_disk_erase_ssd_check',
task.node.driver_internal_info)
self.assertNotIn('cleaning_reboot',
task.node.driver_internal_info)
self.assertNotIn('skip_current_clean_step',
task.node.driver_internal_info)
set_clean_failed_mock.assert_called_once_with(
task, exc)

View File

@ -57,7 +57,7 @@ class Ilo5RAIDTestCase(db_base.DbTestCase):
enabled_console_interfaces=['ilo'],
enabled_deploy_interfaces=['iscsi'],
enabled_inspect_interfaces=['ilo'],
enabled_management_interfaces=['ilo'],
enabled_management_interfaces=['ilo5'],
enabled_power_interfaces=['ilo'],
enabled_raid_interfaces=['ilo5'])
self.node = obj_utils.create_test_node(self.context, **n)

View File

@ -19,6 +19,7 @@ Test class for iLO Drivers
from ironic.conductor import task_manager
from ironic.drivers import ilo
from ironic.drivers.modules import agent
from ironic.drivers.modules.ilo import management
from ironic.drivers.modules.ilo import raid
from ironic.drivers.modules import inspector
from ironic.drivers.modules import iscsi_deploy
@ -177,7 +178,7 @@ class Ilo5HardwareTestCase(db_base.DbTestCase):
enabled_console_interfaces=['ilo'],
enabled_deploy_interfaces=['iscsi', 'direct'],
enabled_inspect_interfaces=['ilo'],
enabled_management_interfaces=['ilo'],
enabled_management_interfaces=['ilo5'],
enabled_power_interfaces=['ilo'],
enabled_raid_interfaces=['ilo5'],
enabled_rescue_interfaces=['no-rescue', 'agent'],
@ -187,6 +188,8 @@ class Ilo5HardwareTestCase(db_base.DbTestCase):
node = obj_utils.create_test_node(self.context, driver='ilo5')
with task_manager.acquire(self.context, node.id) as task:
self.assertIsInstance(task.driver.raid, raid.Ilo5RAID)
self.assertIsInstance(task.driver.management,
management.Ilo5Management)
def test_override_with_no_raid(self):
self.config(enabled_raid_interfaces=['no-raid', 'ilo5'])

View File

@ -0,0 +1,12 @@
---
features:
- Adds functionality to perform out-of-band sanitize disk-erase operation for
iLO5 based HPE Proliant servers. Management interface ``ilo5`` has been
added to ``ilo5`` hardware type. A clean step ``erase_devices`` has been
added to management interface ``ilo5`` to support this operation.
upgrade:
- The ``do_disk_erase``, ``has_disk_erase_completed`` and
``get_available_disk_types`` interfaces of 'proliantutils' library has been
enhanced to support out-of-band sanitize disk-erase operation for ``ilo5``
hardware type. To leverage this feature, the 'proliantutils' library needs
to be upgraded to version '2.9.0'.

View File

@ -100,6 +100,7 @@ ironic.hardware.interfaces.management =
ibmc = ironic.drivers.modules.ibmc.management:IBMCManagement
idrac = ironic.drivers.modules.drac.management:DracManagement
ilo = ironic.drivers.modules.ilo.management:IloManagement
ilo5 = ironic.drivers.modules.ilo.management:Ilo5Management
intel-ipmitool = ironic.drivers.modules.intel_ipmi.management:IntelIPMIManagement
ipmitool = ironic.drivers.modules.ipmitool:IPMIManagement
irmc = ironic.drivers.modules.irmc.management:IRMCManagement