Add cleaning/zapping support to IPA

This will add support for in band cleaning operations to IPA and
replace the decom API that was unused.

Adds API support for get_clean_steps, which returns a list of
supported clean steps for the node, execute_clean_step, to execute
one of the steps returned by get_clean_steps.

Adds versioning and naming for hardware managers, so if a new hardware
manager version is deployed in the middle of cleaning/zapping, the
cleaning/zapping will be restarted to avoid incompatabilities.

blueprint implement-cleaning-states
blueprint inband-raid-configuration
blueprint implement-zaping-states
Depends-On: Ia2500ed5afb72058b4c5e8f41307169381cbce48
Change-Id: I750b80b9bf98b3ddc5643bb4c14a67d2052239af
This commit is contained in:
Josh Gachnang 2015-03-03 17:46:04 -08:00
parent be44ac8d4d
commit 5f4fa7f27e
8 changed files with 351 additions and 60 deletions

View File

@ -284,6 +284,38 @@ class IncompatibleHardwareMethodError(RESTError):
super(IncompatibleHardwareMethodError, self).__init__(details)
class CleanVersionMismatch(RESTError):
"""Error raised when Ironic and the Agent have different versions.
If the agent version has changed since get_clean_steps was called by
the Ironic conductor, it indicates the agent has been updated (either
on purpose, or a new agent was deployed and the node was rebooted).
Since we cannot know if the upgraded IPA will work with cleaning as it
stands (steps could have different priorities, either in IPA or in
other Ironic interfaces), we should restart cleaning from the start.
"""
message = 'Clean version mismatch, reload agent with correct version'
def __init__(self, agent_version, node_version):
self.status_code = 409
details = ('Agent clean version: {0}, node clean version: {1}'
.format(agent_version, node_version))
super(CleanVersionMismatch, self).__init__(details)
class CleaningError(RESTError):
"""Error raised when a cleaning step fails."""
message = 'Clean step failed.'
def __init__(self, details=None):
if details is not None:
details = details
else:
details = self.message
super(CleaningError, self).__init__(details)
class ISCSIError(RESTError):
"""Error raised when an image cannot be written to a device."""

View File

@ -30,6 +30,7 @@ class AgentCommandStatus(object):
RUNNING = u'RUNNING'
SUCCEEDED = u'SUCCEEDED'
FAILED = u'FAILED'
CLEAN_VERSION_MISMATCH = u'CLEAN_VERSION_MISMATCH'
class BaseCommandResult(encoding.Serializable):
@ -153,6 +154,11 @@ class AsyncCommandResult(BaseCommandResult):
with self.command_state_lock:
self.command_result = result
self.command_status = AgentCommandStatus.SUCCEEDED
except errors.CleanVersionMismatch as e:
with self.command_state_lock:
self.command_error = e
self.command_status = AgentCommandStatus.CLEAN_VERSION_MISMATCH
self.command_result = None
except Exception as e:
if not isinstance(e, errors.RESTError):
e = errors.CommandExecutionError(str(e))

View File

@ -0,0 +1,86 @@
# Copyright 2015 Rackspace, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ironic_python_agent import errors
from ironic_python_agent.extensions import base
from ironic_python_agent import hardware
class CleanExtension(base.BaseAgentExtension):
@base.sync_command('get_clean_steps')
def get_clean_steps(self, node, ports):
"""Get the list of clean steps supported for the node and ports
:param node: A dict representation of a node
:param ports: A dict representation of ports attached to node
:returns: A list of clean steps with keys step, priority, and
reboot_requested
"""
# Results should be a dict, not a list
steps = hardware.dispatch_to_all_managers('get_clean_steps',
node, ports)
return {
'clean_steps': steps,
'hardware_manager_version': _get_current_clean_version()
}
@base.async_command('execute_clean_step')
def execute_clean_step(self, step, node, ports, clean_version=None,
**kwargs):
"""Execute a clean step
:param step: A clean step with 'step', 'priority' and 'interface' keys
:param node: A dict representation of a node
:param ports: A dict representation of ports attached to node
:param clean_version: The clean version as returned by
_get_current_clean_version() at the beginning of cleaning/zapping
:returns: a CommandResult object with command_result set to whatever
the step returns.
"""
# Ensure the agent is still the same version, or raise an exception
_check_clean_version(clean_version)
if 'step' not in step:
raise ValueError('Malformed clean_step, no "step" key: %s'.format(
step))
try:
result = hardware.dispatch_to_managers(step['step'], node, ports)
except Exception as e:
raise errors.CleaningError(
'Error performing clean_step %(step)s: %(err)s' %
{'step': step['step'], 'err': e})
# Return the step that was executed so we can dispatch
# to the appropriate Ironic interface
return {
'clean_result': result,
'clean_step': step
}
def _check_clean_version(clean_version=None):
"""Ensure the clean version hasn't changed."""
# If the version is None, assume this is the first run
if clean_version is None:
return
agent_version = _get_current_clean_version()
if clean_version != agent_version:
raise errors.CleanVersionMismatch(agent_version=agent_version,
node_version=clean_version)
def _get_current_clean_version():
return {version.get('name'): version.get('version')
for version in hardware.dispatch_to_all_managers(
'get_version').values()}

View File

@ -1,24 +0,0 @@
# Copyright 2013 Rackspace, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ironic_python_agent.extensions import base
from ironic_python_agent import hardware
class DecomExtension(base.BaseAgentExtension):
@base.async_command('erase_hardware')
def erase_hardware(self):
hardware.dispatch_to_managers('erase_devices')
return 'finished'

View File

@ -137,13 +137,16 @@ class HardwareManager(object):
"""
raise errors.IncompatibleHardwareMethodError
def erase_devices(self):
def erase_devices(self, node, ports):
"""Erase any device that holds user data.
By default this will attempt to erase block devices. This method can be
overridden in an implementation-specific hardware manager in order to
erase additional hardware, although backwards-compatible upstream
submissions are encouraged.
:param node: Ironic node object
:param ports: list of Ironic port objects
"""
block_devices = self.list_block_devices()
for block_device in block_devices:
@ -157,8 +160,73 @@ class HardwareManager(object):
hardware_info['memory'] = self.get_memory()
return hardware_info
def get_clean_steps(self, node, ports):
"""Get a list of clean steps with priority.
Returns a list of dicts of the following form:
{'step': the HardwareManager function to call.
'priority': the order steps will be run in. Ironic will sort all the
clean steps from all the drivers, with the largest priority
step being run first. If priority is set to 0, the step will
not be run during cleaning, but may be run during zapping.
'reboot_requested': Whether the agent should request Ironic reboots
the node via the power driver after the operation completes.
}
Note: multiple hardware managers may return the same step name. The
priority of the step will be the largest priority of steps with
the same name. The steps will be called using
`hardware.dispatch_to_managers` and handled by the best suited
hardware manager. If you need a step to be executed by only your
hardware manager, ensure it has a unique step name.
`node` and `ports` can be used by other hardware managers to further
determine if a clean step is supported for the node.
:param node: Ironic node object
:param ports: list of Ironic port objects
:return: a default list of decommission steps, as a list of
dictionaries
"""
return [
{
'step': 'erase_devices',
'priority': 10,
'interface': 'deploy',
'reboot_requested': False
}
]
def get_version(self):
"""Get a name and version for this hardware manager.
In order to avoid errors and make agent upgrades painless, cleaning
will check the version of all hardware managers during get_clean_steps
at the beginning of cleaning and before executing each step in the
agent.
The agent isn't aware of the steps being taken before or after via
out of band steps, so it can never know if a new step is safe to run.
Therefore, we default to restarting the whole process.
:returns: a dictionary with two keys: `name` and
`version`, where `name` is a string identifying the hardware
manager and `version` is an arbitrary version string. `name` will
be a class variable called HARDWARE_MANAGER_NAME, or default to
the class name and `version` will be a class variable called
HARDWARE_MANAGER_VERSION or default to '1.0'.
"""
return {
'name': getattr(self, 'HARDWARE_MANAGER_NAME',
type(self).__name__),
'version': getattr(self, 'HARDWARE_MANAGER_VERSION', '1.0')
}
class GenericHardwareManager(HardwareManager):
HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
HARDWARE_MANAGER_VERSION = '1.0'
def __init__(self):
self.sys_path = '/sys'

View File

@ -0,0 +1,157 @@
# Copyright 2015 Rackspace, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mock
from oslotest import base as test_base
from ironic_python_agent import errors
from ironic_python_agent.extensions import clean
class TestCleanExtension(test_base.BaseTestCase):
def setUp(self):
super(TestCleanExtension, self).setUp()
self.agent_extension = clean.CleanExtension()
self.node = {'uuid': 'dda135fb-732d-4742-8e72-df8f3199d244'}
self.ports = []
self.step = {
'GenericHardwareManager':
[{'step': 'erase_devices',
'priority': 10,
'interface': 'deploy'}]
}
self.version = {'generic': '1', 'specific': '1'}
@mock.patch('ironic_python_agent.extensions.clean.'
'_get_current_clean_version')
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers')
def test_get_clean_steps(self, mock_dispatch, mock_version):
mock_version.return_value = self.version
manager_steps = {
'SpecificHardwareManager': [
{
'step': 'erase_devices',
'priority': 10,
'interface': 'deploy',
'reboot_requested': False
},
{
'step': 'upgrade_bios',
'priority': 20,
'interface': 'deploy',
'reboot_requested': True
}
],
'FirmwareHardwareManager': [
{
'step': 'upgrade_firmware',
'priority': 30,
'interface': 'deploy',
'reboot_requested': False
}
]
}
mock_dispatch.return_value = manager_steps
expected_return = {
'hardware_manager_version': self.version,
'clean_steps': manager_steps
}
async_results = self.agent_extension.get_clean_steps(node=self.node,
ports=self.ports)
self.assertEqual(expected_return, async_results.join().command_result)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers')
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version')
def test_execute_clean_step(self, mock_version, mock_dispatch):
result = 'cleaned'
mock_dispatch.return_value = result
expected_result = {
'clean_step': self.step['GenericHardwareManager'][0],
'clean_result': result
}
async_result = self.agent_extension.execute_clean_step(
step=self.step['GenericHardwareManager'][0],
node=self.node, ports=self.ports,
clean_version=self.version)
async_result.join()
mock_version.assert_called_once_with(self.version)
mock_dispatch.assert_called_once_with(
self.step['GenericHardwareManager'][0]['step'],
self.node, self.ports)
self.assertEqual(expected_result, async_result.command_result)
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version')
def test_execute_clean_step_no_step(self, mock_version):
async_result = self.agent_extension.execute_clean_step(
step={}, node=self.node, ports=self.ports,
clean_version=self.version)
async_result.join()
self.assertEqual('FAILED', async_result.command_status)
mock_version.assert_called_once_with(self.version)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers')
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version')
def test_execute_clean_step_fail(self, mock_version, mock_dispatch):
mock_dispatch.side_effect = RuntimeError
async_result = self.agent_extension.execute_clean_step(
step=self.step['GenericHardwareManager'][0], node=self.node,
ports=self.ports, clean_version=self.version)
async_result.join()
self.assertEqual('FAILED', async_result.command_status)
mock_version.assert_called_once_with(self.version)
mock_dispatch.assert_called_once_with(
self.step['GenericHardwareManager'][0]['step'],
self.node, self.ports)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers')
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version')
def test_execute_clean_step_version_mismatch(self, mock_version,
mock_dispatch):
mock_version.side_effect = errors.CleanVersionMismatch(
{'GenericHardwareManager': 1}, {'GenericHardwareManager': 2})
async_result = self.agent_extension.execute_clean_step(
step=self.step['GenericHardwareManager'][0], node=self.node,
ports=self.ports, clean_version=self.version)
async_result.join()
self.assertEqual('CLEAN_VERSION_MISMATCH', async_result.command_status)
mock_version.assert_called_once_with(self.version)
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers')
def _get_current_clean_version(self, mock_dispatch):
mock_dispatch.return_value = {'SpecificHardwareManager':
{'name': 'specific', 'version': '1'},
'GenericHardwareManager':
{'name': 'generic', 'version': '1'}}
self.assertEqual(self.version, clean._get_current_clean_version())
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers')
def test__check_clean_version_fail(self, mock_dispatch):
mock_dispatch.return_value = {'SpecificHardwareManager':
{'name': 'specific', 'version': '1'}}
self.assertRaises(errors.CleanVersionMismatch,
clean._check_clean_version,
{'not_specific': '1'})

View File

@ -1,34 +0,0 @@
# Copyright 2013 Rackspace, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mock
from oslotest import base as test_base
from ironic_python_agent.extensions import decom
class TestDecomExtension(test_base.BaseTestCase):
def setUp(self):
super(TestDecomExtension, self).setUp()
self.agent_extension = decom.DecomExtension()
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
def test_erase_devices(self, mocked_dispatch):
result = self.agent_extension.erase_hardware()
result.join()
mocked_dispatch.assert_called_once_with('erase_devices')
self.assertTrue('result' in result.command_result.keys())
cmd_result_text = 'erase_hardware: finished'
self.assertEqual(cmd_result_text, result.command_result['result'])

View File

@ -20,7 +20,7 @@ console_scripts =
ironic_python_agent.extensions =
standby = ironic_python_agent.extensions.standby:StandbyExtension
decom = ironic_python_agent.extensions.decom:DecomExtension
clean = ironic_python_agent.extensions.clean:CleanExtension
flow = ironic_python_agent.extensions.flow:FlowExtension
iscsi = ironic_python_agent.extensions.iscsi:ISCSIExtension
image = ironic_python_agent.extensions.image:ImageExtension