Implement cleaning/zapping for the agent driver

Adds get_clean_steps and execute_clean_steps for the agent. Also
implements prepare and tear_down for cleaning to ensure the agent
is booted during cleaning. These bits should be factored out to
the boot interface when that merges in L.

Adds checks for cleaning steps in the heartbeat handler.

Adds parameters to the dhcp functions to pass in VIFs instead
of relying on the cached ones in the Ironic port objects. As
cleaning can start before Nova can unplug those ports, we
should ignore the cached VIFs and use the ones we create.

Changes _do_next_clean_step to acquire a lock on its own so
cleaning from do_node_tear_down and continue_cleaning both
work.

Implements blueprint implement-cleaning-states
Change-Id: Ia2500ed5afb72058b4c5e8f41307169381cbce48
Depends-on: Ic38de10668c97648d073fdf9a3afc59712057849
This commit is contained in:
Josh Gachnang 2015-02-17 17:42:18 -08:00 committed by Jim Rollenhagen
parent c59be0bb00
commit 534d9ee96a
12 changed files with 764 additions and 32 deletions

View File

@ -351,6 +351,11 @@
# Neutron bootfile DHCP parameter. (string value)
#agent_pxe_bootfile_name=pxelinux.0
# Priority to run in-band erase devices via the Ironic Python
# Agent ramdisk. If unset, will use the priority set in the
# ramdisk (defaults to 10 for the GenericHardwareManager). If
# set to 0, will not run during cleaning. (integer value)
#agent_erase_devices_priority=<None>
#
# Options defined in ironic.drivers.modules.agent_base_vendor
@ -1122,6 +1127,11 @@
# (string value)
#auth_strategy=keystone
# UUID of the network to create Neutron ports on when booting
# to a ramdisk for cleaning/zapping using Neutron DHCP (string
# value)
#cleaning_network_uuid=<None>
[oslo_messaging_amqp]

View File

@ -74,7 +74,7 @@ class DHCPFactory(object):
cls._dhcp_provider = _extension_manager.driver
def update_dhcp(self, task, dhcp_opts):
def update_dhcp(self, task, dhcp_opts, ports=None):
"""Send or update the DHCP BOOT options for this node.
:param task: A TaskManager instance.
@ -88,8 +88,10 @@ class DHCPFactory(object):
'opt_value': '123.123.123.456'},
{'opt_name': 'tftp-server',
'opt_value': '123.123.123.123'}]
:param ports: a list of Neutron port dicts to update DHCP options on.
If None, will get the list of ports from the Ironic port objects.
"""
self.provider.update_dhcp_opts(task, dhcp_opts)
self.provider.update_dhcp_opts(task, dhcp_opts, ports)
@property
def provider(self):

View File

@ -27,6 +27,7 @@ from ironic.common.i18n import _LE
from ironic.common.i18n import _LW
from ironic.common import keystone
from ironic.common import network
from ironic.conductor import manager
from ironic.dhcp import base
from ironic.drivers.modules import ssh
from ironic.openstack.common import log as logging
@ -48,7 +49,11 @@ neutron_opts = [
'to neutron. Can be either "keystone" or "noauth". '
'Running neutron in noauth mode (related to but not '
'affected by this setting) is insecure and should only be '
'used for testing.')
'used for testing.'),
cfg.StrOpt('cleaning_network_uuid',
help='UUID of the network to create Neutron ports on when '
'booting to a ramdisk for cleaning/zapping using Neutron '
'DHCP')
]
CONF = cfg.CONF
@ -140,11 +145,11 @@ class NeutronDHCPApi(base.BaseDHCP):
"port %s."), port_id)
raise exception.FailedToUpdateMacOnPort(port_id=port_id)
def update_dhcp_opts(self, task, options):
def update_dhcp_opts(self, task, options, vifs=None):
"""Send or update the DHCP BOOT options for this node.
:param task: A TaskManager instance.
:param dhcp_opts: this will be a list of dicts, e.g.
:param options: this will be a list of dicts, e.g.
::
@ -154,8 +159,14 @@ class NeutronDHCPApi(base.BaseDHCP):
'opt_value': '123.123.123.456'},
{'opt_name': 'tftp-server',
'opt_value': '123.123.123.123'}]
:param vifs: a dict of Neutron port dicts to update DHCP options on.
The keys should be Ironic port UUIDs, and the values should be
Neutron port UUIDs
If the value is None, will get the list of ports from the Ironic
port objects.
"""
vifs = network.get_node_vif_ids(task)
if vifs is None:
vifs = network.get_node_vif_ids(task)
if not vifs:
raise exception.FailedToUpdateDHCPOptOnPort(
_("No VIFs found for node %(node)s when attempting "
@ -275,3 +286,85 @@ class NeutronDHCPApi(base.BaseDHCP):
{'node': task.node.uuid, 'ports': failures})
return ip_addresses
def create_cleaning_ports(self, task):
"""Create neutron ports for each port on task.node to boot the ramdisk.
:param task: a TaskManager instance.
:raises: InvalidParameterValue if the cleaning network is None
:returns: a dictionary in the form {port.uuid: neutron_port['id']}
"""
if not CONF.neutron.cleaning_network_uuid:
raise exception.InvalidParameterValue(_('Valid cleaning network '
'UUID not provided'))
neutron_client = _build_client(task.context.auth_token)
body = {
'port': {
'network_id': CONF.neutron.cleaning_network_uuid,
'admin_state_up': True,
}
}
ports = {}
for ironic_port in task.ports:
body['port']['mac_address'] = ironic_port.address
try:
port = neutron_client.create_port(body)
except neutron_client_exc.ConnectionFailed as e:
msg = (_('Could not create cleaning port on network %(net)s '
'from %(node)s. %(exc)s') %
{'net': CONF.neutron.cleaning_network_uuid,
'node': task.node.uuid,
'exc': e})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)
if not port.get('port') or not port['port'].get('id'):
# Rollback changes
try:
self.delete_cleaning_ports(task)
except Exception:
# Log the error, but continue to cleaning error handler
LOG.exception(_LE('Failed to rollback cleaning port '
'changes for node %s') % task.node.uuid)
msg = (_('Failed to create cleaning ports for node '
'%(node)s') % task.node.uuid)
LOG.error(msg)
return manager.cleaning_error_handler(task, msg)
# Match return value of get_node_vif_ids()
ports[ironic_port.uuid] = port['port']['id']
return ports
def delete_cleaning_ports(self, task):
"""Deletes the neutron port created for booting the ramdisk.
:param task: a TaskManager instance.
"""
neutron_client = _build_client(task.context.auth_token)
macs = [p.address for p in task.ports]
params = {
'network_id': CONF.neutron.cleaning_network_uuid
}
try:
ports = neutron_client.list_ports(**params)
except neutron_client_exc.ConnectionFailed as e:
msg = (_('Could not get cleaning network vif for %(node)s '
'from Neutron, possible network issue. %(exc)s') %
{'node': task.node.uuid,
'exc': e})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)
# Iterate the list of Neutron port dicts, remove the ones we added
for neutron_port in ports.get('ports', []):
# Only delete ports using the node's mac addresses
if neutron_port.get('mac_address') in macs:
try:
neutron_client.delete_port(neutron_port.get('id'))
except neutron_client_exc.ConnectionFailed as e:
msg = (_('Could not remove cleaning ports on network '
'%(net)s from %(node)s, possible network issue. '
'%(exc)s') %
{'net': CONF.neutron.cleaning_network_uuid,
'node': task.node.uuid,
'exc': e})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)

View File

@ -18,6 +18,7 @@ import time
from oslo_config import cfg
from oslo_utils import excutils
from ironic.common import boot_devices
from ironic.common import dhcp_factory
from ironic.common import exception
from ironic.common.glance_service import service_utils
@ -51,6 +52,12 @@ agent_opts = [
cfg.StrOpt('agent_pxe_bootfile_name',
default='pxelinux.0',
help='Neutron bootfile DHCP parameter.'),
cfg.IntOpt('agent_erase_devices_priority',
help='Priority to run in-band erase devices via the Ironic '
'Python Agent ramdisk. If unset, will use the priority '
'set in the ramdisk (defaults to 10 for the '
'GenericHardwareManager). If set to 0, will not run '
'during cleaning.')
]
CONF = cfg.CONF
@ -185,6 +192,40 @@ def build_instance_info_for_deploy(task):
return instance_info
def _prepare_pxe_boot(task):
"""Prepare the files required for PXE booting the agent."""
pxe_info = _get_tftp_image_info(task.node)
pxe_options = _build_pxe_config_options(task.node, pxe_info)
pxe_utils.create_pxe_config(task,
pxe_options,
CONF.agent.agent_pxe_config_template)
_cache_tftp_images(task.context, task.node, pxe_info)
def _do_pxe_boot(task, ports=None):
"""Reboot the node into the PXE ramdisk.
:param ports: a list of Neutron port dicts to update DHCP options on. If
None, will get the list of ports from the Ironic port objects.
"""
dhcp_opts = pxe_utils.dhcp_options_for_instance(task)
provider = dhcp_factory.DHCPFactory()
provider.update_dhcp(task, dhcp_opts, ports)
manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=True)
manager_utils.node_power_action(task, states.REBOOT)
def _clean_up_pxe(task):
"""Clean up left over PXE and DHCP files."""
pxe_info = _get_tftp_image_info(task.node)
for label in pxe_info:
path = pxe_info[label][1]
utils.unlink_without_raise(path)
AgentTFTPImageCache().clean_up()
pxe_utils.clean_up_pxe_config(task)
class AgentDeploy(base.DeployInterface):
"""Interface for deploy-related actions."""
@ -238,12 +279,7 @@ class AgentDeploy(base.DeployInterface):
:param task: a TaskManager instance.
:returns: status of the deploy. One of ironic.common.states.
"""
dhcp_opts = pxe_utils.dhcp_options_for_instance(task)
provider = dhcp_factory.DHCPFactory()
provider.update_dhcp(task, dhcp_opts)
manager_utils.node_set_boot_device(task, 'pxe', persistent=True)
manager_utils.node_power_action(task, states.REBOOT)
_do_pxe_boot(task)
return states.DEPLOYWAIT
@task_manager.require_exclusive_lock
@ -262,12 +298,7 @@ class AgentDeploy(base.DeployInterface):
:param task: a TaskManager instance.
"""
node = task.node
pxe_info = _get_tftp_image_info(task.node)
pxe_options = _build_pxe_config_options(task.node, pxe_info)
pxe_utils.create_pxe_config(task,
pxe_options,
CONF.agent.agent_pxe_config_template)
_cache_tftp_images(task.context, node, pxe_info)
_prepare_pxe_boot(task)
node.instance_info = build_instance_info_for_deploy(task)
node.save()
@ -288,13 +319,7 @@ class AgentDeploy(base.DeployInterface):
:param task: a TaskManager instance.
"""
pxe_info = _get_tftp_image_info(task.node)
for label in pxe_info:
path = pxe_info[label][1]
utils.unlink_without_raise(path)
AgentTFTPImageCache().clean_up()
pxe_utils.clean_up_pxe_config(task)
_clean_up_pxe(task)
def take_over(self, task):
"""Take over management of this node from a dead conductor.
@ -315,6 +340,59 @@ class AgentDeploy(base.DeployInterface):
"""
pass
def get_clean_steps(self, task):
"""Get the list of clean steps from the agent.
:param task: a TaskManager object containing the node
:returns: A list of clean step dictionaries
"""
steps = deploy_utils.agent_get_clean_steps(task)
if CONF.agent.agent_erase_devices_priority:
for step in steps:
if (step.get('step') == 'erase_devices' and
step.get('interface') == 'deploy'):
# Override with operator set priority
step['priority'] = CONF.agent.agent_erase_devices_priority
return steps
def execute_clean_step(self, task, step):
"""Execute a clean step asynchronously on the agent.
:param task: a TaskManager object containing the node
:param step: a clean step dictionary to execute
:raises: NodeCleaningFailure if the agent does not return a command
status
:returns: states.CLEANING to signify the step will be completed async
"""
return deploy_utils.agent_execute_clean_step(task, step)
def prepare_cleaning(self, task):
"""Boot into the agent to prepare for cleaning."""
provider = dhcp_factory.DHCPFactory()
# If we have left over ports from a previous cleaning, remove them
if getattr(provider.provider, 'delete_cleaning_ports', None):
provider.provider.delete_cleaning_ports(task)
# Create cleaning ports if necessary
ports = None
if getattr(provider.provider, 'create_cleaning_ports', None):
ports = provider.provider.create_cleaning_ports(task)
_prepare_pxe_boot(task)
_do_pxe_boot(task, ports)
# Tell the conductor we are waiting for the agent to boot.
return states.CLEANING
def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning."""
manager_utils.node_power_action(task, states.POWER_OFF)
_clean_up_pxe(task)
# If we created cleaning ports, delete them
provider = dhcp_factory.DHCPFactory()
if getattr(provider.provider, 'delete_cleaning_ports', None):
provider.provider.delete_cleaning_ports(task)
class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):

View File

@ -30,6 +30,8 @@ from ironic.common.i18n import _LI
from ironic.common.i18n import _LW
from ironic.common import states
from ironic.common import utils
from ironic.conductor import manager
from ironic.conductor import rpcapi
from ironic.conductor import utils as manager_utils
from ironic.drivers import base
from ironic.drivers.modules import agent_client
@ -126,6 +128,67 @@ class BaseAgentVendor(base.VendorInterface):
'payload version: %s')
% version)
def _notify_conductor_resume_clean(self, task):
uuid = task.node.uuid
rpc = rpcapi.ConductorAPI()
topic = rpc.get_topic_for(task.node)
# Need to release the lock to let the conductor take it
task.release_resources()
rpc.continue_node_clean(task.context, uuid, topic=topic)
def continue_cleaning(self, task, **kwargs):
"""Start the next cleaning step if the previous one is complete.
In order to avoid errors and make agent upgrades painless, cleaning
will check the version of all hardware managers during get_clean_steps
at the beginning of cleaning and before executing each step in the
agent. If the version has changed between steps, the agent is unable
to tell if an ordering change will cause a cleaning issue. Therefore,
we restart cleaning.
"""
command = self._get_completed_cleaning_command(task)
LOG.debug('Cleaning command status for node %(node)s on step %(step)s '
'(command)%', {'node': task.node.uuid,
'step': task.node.clean_step,
'command': command})
if not command:
# Command is not done yet
return
if command.get('command_status') == 'FAILED':
msg = (_('Agent returned error for clean step %(step)s on node '
'%(node)s : %(err)s.') %
{'node': task.node.uuid,
'err': command.get('command_error'),
'step': task.node.clean_step})
LOG.error(msg)
manager.cleaning_error_handler(task, msg)
elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH':
# Restart cleaning, agent must have rebooted to new version
try:
manager.set_node_cleaning_steps(task)
except exception.NodeCleaningFailure:
msg = (_('Could not restart cleaning on node %(node)s: '
'%(err)s.') %
{'node': task.node.uuid,
'err': command.get('command_error'),
'step': task.node.clean_step})
LOG.exception(msg)
manager.cleaning_error_handler(task, msg)
self._notify_conductor_resume_clean(task)
elif command.get('command_status') == 'SUCCEEDED':
self._notify_conductor_resume_clean(task)
else:
msg = (_('Agent returned unknown status for clean step %(step)s '
'on node %(node)s : %(err)s.') %
{'node': task.node.uuid,
'err': command.get('command_status'),
'step': task.node.clean_step})
LOG.error(msg)
manager.cleaning_error_handler(task, msg)
@base.passthru(['POST'])
def heartbeat(self, task, **kwargs):
"""Method for agent to periodically check in.
@ -167,6 +230,15 @@ class BaseAgentVendor(base.VendorInterface):
self.deploy_is_done(task)):
msg = _('Node failed to move to active state.')
self.reboot_to_instance(task, **kwargs)
elif (node.provision_state == states.CLEANING and
not node.clean_step):
# Agent booted from prepare_cleaning
manager.set_node_cleaning_steps(task)
self._notify_conductor_resume_clean(task)
elif (node.provision_state == states.CLEANING and
node.clean_step):
self.continue_cleaning(task, **kwargs)
except Exception as e:
err_info = {'node': node.uuid, 'msg': msg, 'e': e}
last_error = _('Asynchronous exception for node %(node)s: '
@ -228,6 +300,19 @@ class BaseAgentVendor(base.VendorInterface):
'node': node
}
def _get_completed_cleaning_command(self, task):
"""Returns None or a completed cleaning command from the agent."""
commands = self._client.get_commands_status(task.node)
if not commands:
return
last_command = commands[-1]
if last_command['command_status'] == 'RUNNING':
return
else:
return last_command
def _get_interfaces(self, inventory):
interfaces = []
try:

View File

@ -114,3 +114,26 @@ class AgentClient(object):
method='image.install_bootloader',
params=params,
wait=True)
def get_clean_steps(self, node, ports):
params = {
'node': node.as_dict(),
'ports': [port.as_dict() for port in ports]
}
return self._command(node=node,
method='clean.get_clean_steps',
params=params,
wait=True)
def execute_clean_step(self, step, node, ports):
params = {
'step': step,
'node': node.as_dict(),
'ports': [port.as_dict() for port in ports],
'clean_version': node.driver_internal_info.get(
'hardware_manager_version')
}
return self._command(node=node,
method='clean.execute_clean_step',
params=params,
wait=False)

View File

@ -44,8 +44,10 @@ from ironic.common import images
from ironic.common import states
from ironic.common import utils
from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import image_cache
from ironic.drivers import utils as driver_utils
from ironic import objects
from ironic.openstack.common import log as logging
@ -71,6 +73,10 @@ LOG = logging.getLogger(__name__)
VALID_ROOT_DEVICE_HINTS = set(('size', 'model', 'wwn', 'serial', 'vendor'))
def _get_agent_client():
return agent_client.AgentClient()
# All functions are called from deploy() directly or indirectly.
# They are split for stub-out.
@ -873,6 +879,65 @@ def parse_instance_info_capabilities(node):
return capabilities
def agent_get_clean_steps(task):
"""Get the list of clean steps from the agent.
#TODO(JoshNang) move to BootInterface
:param task: a TaskManager object containing the node
:raises: NodeCleaningFailure if the agent returns invalid results
:returns: A list of clean step dictionaries
"""
client = _get_agent_client()
ports = objects.Port.list_by_node_id(
task.context, task.node.id)
result = client.get_clean_steps(task.node, ports).get('command_result')
if ('clean_steps' not in result or
'hardware_manager_version' not in result):
raise exception.NodeCleaningFailure(_(
'get_clean_steps for node %(node)s returned invalid result:'
' %(result)s') % ({'node': task.node.uuid, 'result': result}))
driver_info = task.node.driver_internal_info
driver_info['hardware_manager_version'] = result[
'hardware_manager_version']
task.node.driver_internal_info = driver_info
task.node.save()
# Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..}
# Flatten clean steps into one list
steps_list = [step for step_list in
result['clean_steps'].values()
for step in step_list]
# Filter steps to only return deploy steps
steps = [step for step in steps_list
if step.get('interface') == 'deploy']
return steps
def agent_execute_clean_step(task, step):
"""Execute a clean step asynchronously on the agent.
#TODO(JoshNang) move to BootInterface
:param task: a TaskManager object containing the node
:param step: a clean step dictionary to execute
:raises: NodeCleaningFailure if the agent does not return a command status
:returns: states.CLEANING to signify the step will be completed async
"""
client = _get_agent_client()
ports = objects.Port.list_by_node_id(
task.context, task.node.id)
result = client.execute_clean_step(step, task.node, ports)
if not result.get('command_status'):
raise exception.NodeCleaningFailure(_(
'Agent on node %(node)s returned bad command result: '
'%(result)s') % {'node': task.node.uuid,
'result': result.get('command_error')})
return states.CLEANING
def try_set_boot_device(task, device, persistent=True):
"""Tries to set the boot device on the node.

View File

@ -34,6 +34,9 @@ class TestNeutron(db_base.DbTestCase):
def setUp(self):
super(TestNeutron, self).setUp()
mgr_utils.mock_the_extension_manager(driver='fake')
self.config(
cleaning_network_uuid='00000000-0000-0000-0000-000000000000',
group='neutron')
self.config(enabled_drivers=['fake'])
self.config(dhcp_provider='neutron',
group='dhcp')
@ -49,6 +52,15 @@ class TestNeutron(db_base.DbTestCase):
auth_uri='test-auth-uri',
group='keystone_authtoken')
self.node = object_utils.create_test_node(self.context)
self.ports = [
object_utils.create_test_port(
self.context, node_id=self.node.id, id=2,
uuid='1be26c0b-03f2-4d2e-ae87-c02d7f33c782',
address='52:54:00:cf:2d:32')]
# Very simple neutron port representation
self.neutron_port = {'id': '132f871f-eaec-4fed-9475-0d54465e0f00',
'mac_address': '52:54:00:cf:2d:32'}
dhcp_factory.DHCPFactory._dhcp_provider = None
def test__build_client_invalid_auth_strategy(self):
@ -340,16 +352,97 @@ class TestNeutron(db_base.DbTestCase):
@mock.patch('ironic.dhcp.neutron.NeutronDHCPApi._get_port_ip_address')
def test_get_ip_addresses(self, get_ip_mock):
ip_address = '10.10.0.1'
address = "aa:aa:aa:aa:aa:aa"
expected = [ip_address]
port = object_utils.create_test_port(self.context,
node_id=self.node.id,
address=address)
get_ip_mock.return_value = ip_address
with task_manager.acquire(self.context, self.node.uuid) as task:
api = dhcp_factory.DHCPFactory().provider
result = api.get_ip_addresses(task)
get_ip_mock.assert_called_once_with(task, port.uuid, mock.ANY)
get_ip_mock.assert_called_once_with(task, self.ports[0].uuid,
mock.ANY)
self.assertEqual(expected, result)
@mock.patch.object(client.Client, 'create_port')
def test_create_cleaning_ports(self, create_mock):
# Ensure we can create cleaning ports for in band cleaning
create_mock.return_value = {'port': self.neutron_port}
expected = {self.ports[0].uuid: self.neutron_port['id']}
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
ports = api.create_cleaning_ports(task)
self.assertEqual(expected, ports)
create_mock.assert_called_once_with({'port': {
'network_id': '00000000-0000-0000-0000-000000000000',
'admin_state_up': True, 'mac_address': self.ports[0].address}})
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'create_port')
def test_create_cleaning_ports_fail(self, create_mock, error_mock):
# Check that if creating a port fails, the node goes to cleanfail
create_mock.side_effect = neutron_client_exc.ConnectionFailed
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.create_cleaning_ports(task)
error_mock.assert_called_once_with(task, mock.ANY)
create_mock.assert_called_once_with({'port': {
'network_id': '00000000-0000-0000-0000-000000000000',
'admin_state_up': True, 'mac_address': self.ports[0].address}})
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'create_port')
def test_create_cleaning_ports_bad_config(self, create_mock, error_mock):
self.config(cleaning_network_uuid=None, group='neutron')
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
self.assertRaises(exception.InvalidParameterValue,
api.create_cleaning_ports, task)
@mock.patch.object(client.Client, 'delete_port')
@mock.patch.object(client.Client, 'list_ports')
def test_delete_cleaning_ports(self, list_mock, delete_mock):
# Ensure that we can delete cleaning ports, and that ports with
# different macs don't get deleted
other_port = {'id': '132f871f-eaec-4fed-9475-0d54465e0f01',
'mac_address': 'aa:bb:cc:dd:ee:ff'}
list_mock.return_value = {'ports': [self.neutron_port, other_port]}
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.delete_cleaning_ports(task)
list_mock.assert_called_once_with(
network_id='00000000-0000-0000-0000-000000000000')
delete_mock.assert_called_once_with(self.neutron_port['id'])
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'list_ports')
def test_delete_cleaning_ports_list_fail(self, list_mock, error_mock):
# Check that if listing ports fails, the node goes to cleanfail
list_mock.side_effect = neutron_client_exc.ConnectionFailed
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.delete_cleaning_ports(task)
list_mock.assert_called_once_with(
network_id='00000000-0000-0000-0000-000000000000')
error_mock.assert_called_once_with(task, mock.ANY)
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'delete_port')
@mock.patch.object(client.Client, 'list_ports')
def test_delete_cleaning_ports_delete_fail(self, list_mock, delete_mock,
error_mock):
# Check that if deleting ports fails, the node goes to cleanfail
list_mock.return_value = {'ports': [self.neutron_port]}
delete_mock.side_effect = neutron_client_exc.ConnectionFailed
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.delete_cleaning_ports(task)
list_mock.assert_called_once_with(
network_id='00000000-0000-0000-0000-000000000000')
delete_mock.assert_called_once_with(self.neutron_port['id'])
error_mock.assert_called_once_with(task, mock.ANY)

View File

@ -140,6 +140,8 @@ class TestAgentDeploy(db_base.DbTestCase):
'driver_internal_info': DRIVER_INTERNAL_INFO,
}
self.node = object_utils.create_test_node(self.context, **n)
self.ports = [object_utils.create_test_port(self.context,
node_id=self.node.id)]
def test_get_properties(self):
expected = agent.COMMON_PROPERTIES
@ -197,7 +199,7 @@ class TestAgentDeploy(db_base.DbTestCase):
dhcp_opts = pxe_utils.dhcp_options_for_instance(task)
driver_return = self.driver.deploy(task)
self.assertEqual(driver_return, states.DEPLOYWAIT)
dhcp_mock.assert_called_once_with(task, dhcp_opts)
dhcp_mock.assert_called_once_with(task, dhcp_opts, None)
bootdev_mock.assert_called_once_with(task, 'pxe', persistent=True)
power_mock.assert_called_once_with(task,
states.REBOOT)
@ -210,6 +212,59 @@ class TestAgentDeploy(db_base.DbTestCase):
power_mock.assert_called_once_with(task, states.POWER_OFF)
self.assertEqual(driver_return, states.DELETED)
@mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.delete_cleaning_ports')
@mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.create_cleaning_ports')
@mock.patch('ironic.drivers.modules.agent._do_pxe_boot')
@mock.patch('ironic.drivers.modules.agent._prepare_pxe_boot')
def test_prepare_cleaning(self, prepare_mock, boot_mock, create_mock,
delete_mock):
ports = [{'ports': self.ports}]
create_mock.return_value = ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
self.assertEqual(states.CLEANING,
self.driver.prepare_cleaning(task))
prepare_mock.assert_called_once_with(task)
boot_mock.assert_called_once_with(task, ports)
create_mock.assert_called_once()
delete_mock.assert_called_once()
@mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.delete_cleaning_ports')
@mock.patch('ironic.drivers.modules.agent._clean_up_pxe')
@mock.patch('ironic.conductor.utils.node_power_action')
def test_tear_down_cleaning(self, power_mock, cleanup_mock, neutron_mock):
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
self.assertIsNone(self.driver.tear_down_cleaning(task))
power_mock.assert_called_once_with(task, states.POWER_OFF)
cleanup_mock.assert_called_once_with(task)
neutron_mock.assert_called_once()
@mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps')
def test_get_clean_steps(self, mock_get_clean_steps):
# Test getting clean steps
mock_steps = [{'priority': 10, 'interface': 'deploy',
'step': 'erase_devices'}]
mock_get_clean_steps.return_value = mock_steps
with task_manager.acquire(self.context, self.node.uuid) as task:
steps = self.driver.get_clean_steps(task)
mock_get_clean_steps.assert_called_once_with(task)
self.assertEqual(mock_steps, steps)
@mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps')
def test_get_clean_steps_config_priority(self, mock_get_clean_steps):
# Test that we can override the priority of get clean steps
self.config(agent_erase_devices_priority=20, group='agent')
mock_steps = [{'priority': 10, 'interface': 'deploy',
'step': 'erase_devices'}]
expected_steps = [{'priority': 20, 'interface': 'deploy',
'step': 'erase_devices'}]
mock_get_clean_steps.return_value = mock_steps
with task_manager.acquire(self.context, self.node.uuid) as task:
steps = self.driver.get_clean_steps(task)
mock_get_clean_steps.assert_called_once_with(task)
self.assertEqual(expected_steps, steps)
class TestAgentVendor(db_base.DbTestCase):

View File

@ -401,3 +401,64 @@ class TestBaseAgentVendor(db_base.DbTestCase):
task, boot_devices.DISK)
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning(self, status_mock, notify_mock):
status_mock.return_value = [{
'command_status': 'SUCCEEDED',
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
notify_mock.assert_called_once_with(task)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_running(self, status_mock, notify_mock):
status_mock.return_value = [{
'command_status': 'RUNNING',
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
notify_mock.assert_not_called()
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_fail(self, status_mock, error_mock):
status_mock.return_value = [{
'command_status': 'FAILED',
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
error_mock.assert_called_once_with(task, mock.ANY)
@mock.patch('ironic.conductor.manager.set_node_cleaning_steps')
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_clean_version_mismatch(
self, status_mock, notify_mock, steps_mock):
status_mock.return_value = [{
'command_status': 'CLEAN_VERSION_MISMATCH',
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
steps_mock.assert_called_once_with(task)
notify_mock.assert_called_once_with(task)
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_unknown(self, status_mock, error_mock):
status_mock.return_value = [{
'command_status': 'UNKNOWN',
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
error_mock.assert_called_once_with(task, mock.ANY)

View File

@ -36,10 +36,19 @@ class MockNode(object):
self.uuid = 'uuid'
self.driver_info = {}
self.driver_internal_info = {
'agent_url': "http://127.0.0.1:9999"
'agent_url': "http://127.0.0.1:9999",
'clean_version': {'generic': '1'}
}
self.instance_info = {}
def as_dict(self):
return {
'uuid': self.uuid,
'driver_info': self.driver_info,
'driver_internal_info': self.driver_internal_info,
'instance_info': self.instance_info
}
class TestAgentClient(base.TestCase):
def setUp(self):
@ -148,3 +157,37 @@ class TestAgentClient(base.TestCase):
self.client._command.assert_called_once_with(
node=self.node, method='image.install_bootloader', params=params,
wait=True)
def test_get_clean_steps(self):
self.client._command = mock.Mock()
ports = []
expected_params = {
'node': self.node.as_dict(),
'ports': []
}
self.client.get_clean_steps(self.node,
ports)
self.client._command.assert_called_once_with(node=self.node,
method='clean.get_clean_steps',
params=expected_params,
wait=True)
def test_execute_clean_step(self):
self.client._command = mock.Mock()
ports = []
step = {'priority': 10, 'step': 'erase_devices', 'interface': 'deploy'}
expected_params = {
'step': step,
'node': self.node.as_dict(),
'ports': [],
'clean_version': self.node.driver_internal_info.get(
'hardware_manager_version')
}
self.client.execute_clean_step(step,
self.node,
ports)
self.client._command.assert_called_once_with(node=self.node,
method='clean.execute_clean_step',
params=expected_params,
wait=False)

View File

@ -33,6 +33,7 @@ from ironic.common import boot_devices
from ironic.common import disk_partitioner
from ironic.common import exception
from ironic.common import images
from ironic.common import states
from ironic.common import utils as common_utils
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
@ -1509,6 +1510,129 @@ class TrySetBootDeviceTestCase(db_base.DbTestCase):
task, boot_devices.DISK, persistent=True)
class AgentCleaningTestCase(db_base.DbTestCase):
def setUp(self):
super(AgentCleaningTestCase, self).setUp()
mgr_utils.mock_the_extension_manager(driver='fake_agent')
n = {'driver': 'fake_agent'}
self.node = obj_utils.create_test_node(self.context, **n)
self.ports = [obj_utils.create_test_port(self.context,
node_id=self.node.id)]
self.clean_steps = {
'hardware_manager_version': '1',
'clean_steps': {
'GenericHardwareManager': [
{'interface': 'deploy',
'step': 'erase_devices',
'priority': 20},
],
'SpecificHardwareManager': [
{'interface': 'deploy',
'step': 'update_firmware',
'priority': 30},
{'interface': 'raid',
'step': 'create_raid',
'priority': 10},
]
}
}
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_get_clean_steps(self, get_client_mock, list_ports_mock):
client_mock = mock.Mock()
client_mock.get_clean_steps.return_value = {
'command_result': self.clean_steps}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
response = utils.agent_get_clean_steps(task)
client_mock.get_clean_steps.assert_called_once_with(task.node,
self.ports)
self.assertEqual('1', task.node.driver_internal_info[
'hardware_manager_version'])
# Since steps are returned in dicts, they have non-deterministic
# ordering
self.assertEqual(2, len(response))
self.assertTrue(self.clean_steps['clean_steps'][
'GenericHardwareManager'][0] in response)
self.assertTrue(self.clean_steps['clean_steps'][
'SpecificHardwareManager'][0] in response)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_get_clean_steps_missing_steps(self, get_client_mock,
list_ports_mock):
client_mock = mock.Mock()
del self.clean_steps['clean_steps']
client_mock.get_clean_steps.return_value = {
'command_result': self.clean_steps}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
self.assertRaises(exception.NodeCleaningFailure,
utils.agent_get_clean_steps,
task)
client_mock.get_clean_steps.assert_called_once_with(task.node,
self.ports)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_execute_clean_step(self, get_client_mock, list_ports_mock):
client_mock = mock.Mock()
client_mock.execute_clean_step.return_value = {
'command_status': 'SUCCEEDED'}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
response = utils.agent_execute_clean_step(
task,
self.clean_steps['clean_steps']['GenericHardwareManager'][0])
self.assertEqual(states.CLEANING, response)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_execute_clean_step_running(self, get_client_mock,
list_ports_mock):
client_mock = mock.Mock()
client_mock.execute_clean_step.return_value = {
'command_status': 'RUNNING'}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
response = utils.agent_execute_clean_step(
task,
self.clean_steps['clean_steps']['GenericHardwareManager'][0])
self.assertEqual(states.CLEANING, response)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_execute_clean_step_version_mismatch(self, get_client_mock,
list_ports_mock):
client_mock = mock.Mock()
client_mock.execute_clean_step.return_value = {
'command_status': 'RUNNING'}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
response = utils.agent_execute_clean_step(
task,
self.clean_steps['clean_steps']['GenericHardwareManager'][0])
self.assertEqual(states.CLEANING, response)
@mock.patch.object(utils, 'is_block_device')
@mock.patch.object(utils, 'login_iscsi', lambda *_: None)
@mock.patch.object(utils, 'discovery', lambda *_: None)