From 534d9ee96ac738f24a23087a26c98804934ea72d Mon Sep 17 00:00:00 2001 From: Josh Gachnang Date: Tue, 17 Feb 2015 17:42:18 -0800 Subject: [PATCH] Implement cleaning/zapping for the agent driver Adds get_clean_steps and execute_clean_steps for the agent. Also implements prepare and tear_down for cleaning to ensure the agent is booted during cleaning. These bits should be factored out to the boot interface when that merges in L. Adds checks for cleaning steps in the heartbeat handler. Adds parameters to the dhcp functions to pass in VIFs instead of relying on the cached ones in the Ironic port objects. As cleaning can start before Nova can unplug those ports, we should ignore the cached VIFs and use the ones we create. Changes _do_next_clean_step to acquire a lock on its own so cleaning from do_node_tear_down and continue_cleaning both work. Implements blueprint implement-cleaning-states Change-Id: Ia2500ed5afb72058b4c5e8f41307169381cbce48 Depends-on: Ic38de10668c97648d073fdf9a3afc59712057849 --- etc/ironic/ironic.conf.sample | 10 ++ ironic/common/dhcp_factory.py | 6 +- ironic/dhcp/neutron.py | 101 +++++++++++++- ironic/drivers/modules/agent.py | 116 +++++++++++++--- ironic/drivers/modules/agent_base_vendor.py | 85 ++++++++++++ ironic/drivers/modules/agent_client.py | 23 ++++ ironic/drivers/modules/deploy_utils.py | 65 +++++++++ ironic/tests/dhcp/test_neutron.py | 103 ++++++++++++++- ironic/tests/drivers/test_agent.py | 57 +++++++- .../tests/drivers/test_agent_base_vendor.py | 61 +++++++++ ironic/tests/drivers/test_agent_client.py | 45 ++++++- ironic/tests/drivers/test_deploy_utils.py | 124 ++++++++++++++++++ 12 files changed, 764 insertions(+), 32 deletions(-) diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index c06188b287..27440d3839 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -351,6 +351,11 @@ # Neutron bootfile DHCP parameter. (string value) #agent_pxe_bootfile_name=pxelinux.0 +# Priority to run in-band erase devices via the Ironic Python +# Agent ramdisk. If unset, will use the priority set in the +# ramdisk (defaults to 10 for the GenericHardwareManager). If +# set to 0, will not run during cleaning. (integer value) +#agent_erase_devices_priority= # # Options defined in ironic.drivers.modules.agent_base_vendor @@ -1122,6 +1127,11 @@ # (string value) #auth_strategy=keystone +# UUID of the network to create Neutron ports on when booting +# to a ramdisk for cleaning/zapping using Neutron DHCP (string +# value) +#cleaning_network_uuid= + [oslo_messaging_amqp] diff --git a/ironic/common/dhcp_factory.py b/ironic/common/dhcp_factory.py index 24b10c88d5..c9fadf18eb 100644 --- a/ironic/common/dhcp_factory.py +++ b/ironic/common/dhcp_factory.py @@ -74,7 +74,7 @@ class DHCPFactory(object): cls._dhcp_provider = _extension_manager.driver - def update_dhcp(self, task, dhcp_opts): + def update_dhcp(self, task, dhcp_opts, ports=None): """Send or update the DHCP BOOT options for this node. :param task: A TaskManager instance. @@ -88,8 +88,10 @@ class DHCPFactory(object): 'opt_value': '123.123.123.456'}, {'opt_name': 'tftp-server', 'opt_value': '123.123.123.123'}] + :param ports: a list of Neutron port dicts to update DHCP options on. + If None, will get the list of ports from the Ironic port objects. """ - self.provider.update_dhcp_opts(task, dhcp_opts) + self.provider.update_dhcp_opts(task, dhcp_opts, ports) @property def provider(self): diff --git a/ironic/dhcp/neutron.py b/ironic/dhcp/neutron.py index 1d4388bf18..ec5f15be80 100644 --- a/ironic/dhcp/neutron.py +++ b/ironic/dhcp/neutron.py @@ -27,6 +27,7 @@ from ironic.common.i18n import _LE from ironic.common.i18n import _LW from ironic.common import keystone from ironic.common import network +from ironic.conductor import manager from ironic.dhcp import base from ironic.drivers.modules import ssh from ironic.openstack.common import log as logging @@ -48,7 +49,11 @@ neutron_opts = [ 'to neutron. Can be either "keystone" or "noauth". ' 'Running neutron in noauth mode (related to but not ' 'affected by this setting) is insecure and should only be ' - 'used for testing.') + 'used for testing.'), + cfg.StrOpt('cleaning_network_uuid', + help='UUID of the network to create Neutron ports on when ' + 'booting to a ramdisk for cleaning/zapping using Neutron ' + 'DHCP') ] CONF = cfg.CONF @@ -140,11 +145,11 @@ class NeutronDHCPApi(base.BaseDHCP): "port %s."), port_id) raise exception.FailedToUpdateMacOnPort(port_id=port_id) - def update_dhcp_opts(self, task, options): + def update_dhcp_opts(self, task, options, vifs=None): """Send or update the DHCP BOOT options for this node. :param task: A TaskManager instance. - :param dhcp_opts: this will be a list of dicts, e.g. + :param options: this will be a list of dicts, e.g. :: @@ -154,8 +159,14 @@ class NeutronDHCPApi(base.BaseDHCP): 'opt_value': '123.123.123.456'}, {'opt_name': 'tftp-server', 'opt_value': '123.123.123.123'}] + :param vifs: a dict of Neutron port dicts to update DHCP options on. + The keys should be Ironic port UUIDs, and the values should be + Neutron port UUIDs + If the value is None, will get the list of ports from the Ironic + port objects. """ - vifs = network.get_node_vif_ids(task) + if vifs is None: + vifs = network.get_node_vif_ids(task) if not vifs: raise exception.FailedToUpdateDHCPOptOnPort( _("No VIFs found for node %(node)s when attempting " @@ -275,3 +286,85 @@ class NeutronDHCPApi(base.BaseDHCP): {'node': task.node.uuid, 'ports': failures}) return ip_addresses + + def create_cleaning_ports(self, task): + """Create neutron ports for each port on task.node to boot the ramdisk. + + :param task: a TaskManager instance. + :raises: InvalidParameterValue if the cleaning network is None + :returns: a dictionary in the form {port.uuid: neutron_port['id']} + """ + if not CONF.neutron.cleaning_network_uuid: + raise exception.InvalidParameterValue(_('Valid cleaning network ' + 'UUID not provided')) + neutron_client = _build_client(task.context.auth_token) + body = { + 'port': { + 'network_id': CONF.neutron.cleaning_network_uuid, + 'admin_state_up': True, + } + } + ports = {} + for ironic_port in task.ports: + body['port']['mac_address'] = ironic_port.address + try: + port = neutron_client.create_port(body) + except neutron_client_exc.ConnectionFailed as e: + msg = (_('Could not create cleaning port on network %(net)s ' + 'from %(node)s. %(exc)s') % + {'net': CONF.neutron.cleaning_network_uuid, + 'node': task.node.uuid, + 'exc': e}) + LOG.exception(msg) + return manager.cleaning_error_handler(task, msg) + if not port.get('port') or not port['port'].get('id'): + # Rollback changes + try: + self.delete_cleaning_ports(task) + except Exception: + # Log the error, but continue to cleaning error handler + LOG.exception(_LE('Failed to rollback cleaning port ' + 'changes for node %s') % task.node.uuid) + msg = (_('Failed to create cleaning ports for node ' + '%(node)s') % task.node.uuid) + LOG.error(msg) + return manager.cleaning_error_handler(task, msg) + # Match return value of get_node_vif_ids() + ports[ironic_port.uuid] = port['port']['id'] + return ports + + def delete_cleaning_ports(self, task): + """Deletes the neutron port created for booting the ramdisk. + + :param task: a TaskManager instance. + """ + neutron_client = _build_client(task.context.auth_token) + macs = [p.address for p in task.ports] + params = { + 'network_id': CONF.neutron.cleaning_network_uuid + } + try: + ports = neutron_client.list_ports(**params) + except neutron_client_exc.ConnectionFailed as e: + msg = (_('Could not get cleaning network vif for %(node)s ' + 'from Neutron, possible network issue. %(exc)s') % + {'node': task.node.uuid, + 'exc': e}) + LOG.exception(msg) + return manager.cleaning_error_handler(task, msg) + + # Iterate the list of Neutron port dicts, remove the ones we added + for neutron_port in ports.get('ports', []): + # Only delete ports using the node's mac addresses + if neutron_port.get('mac_address') in macs: + try: + neutron_client.delete_port(neutron_port.get('id')) + except neutron_client_exc.ConnectionFailed as e: + msg = (_('Could not remove cleaning ports on network ' + '%(net)s from %(node)s, possible network issue. ' + '%(exc)s') % + {'net': CONF.neutron.cleaning_network_uuid, + 'node': task.node.uuid, + 'exc': e}) + LOG.exception(msg) + return manager.cleaning_error_handler(task, msg) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 505e812056..efd4be6fa6 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -18,6 +18,7 @@ import time from oslo_config import cfg from oslo_utils import excutils +from ironic.common import boot_devices from ironic.common import dhcp_factory from ironic.common import exception from ironic.common.glance_service import service_utils @@ -51,6 +52,12 @@ agent_opts = [ cfg.StrOpt('agent_pxe_bootfile_name', default='pxelinux.0', help='Neutron bootfile DHCP parameter.'), + cfg.IntOpt('agent_erase_devices_priority', + help='Priority to run in-band erase devices via the Ironic ' + 'Python Agent ramdisk. If unset, will use the priority ' + 'set in the ramdisk (defaults to 10 for the ' + 'GenericHardwareManager). If set to 0, will not run ' + 'during cleaning.') ] CONF = cfg.CONF @@ -185,6 +192,40 @@ def build_instance_info_for_deploy(task): return instance_info +def _prepare_pxe_boot(task): + """Prepare the files required for PXE booting the agent.""" + pxe_info = _get_tftp_image_info(task.node) + pxe_options = _build_pxe_config_options(task.node, pxe_info) + pxe_utils.create_pxe_config(task, + pxe_options, + CONF.agent.agent_pxe_config_template) + _cache_tftp_images(task.context, task.node, pxe_info) + + +def _do_pxe_boot(task, ports=None): + """Reboot the node into the PXE ramdisk. + + :param ports: a list of Neutron port dicts to update DHCP options on. If + None, will get the list of ports from the Ironic port objects. + """ + dhcp_opts = pxe_utils.dhcp_options_for_instance(task) + provider = dhcp_factory.DHCPFactory() + provider.update_dhcp(task, dhcp_opts, ports) + manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=True) + manager_utils.node_power_action(task, states.REBOOT) + + +def _clean_up_pxe(task): + """Clean up left over PXE and DHCP files.""" + pxe_info = _get_tftp_image_info(task.node) + for label in pxe_info: + path = pxe_info[label][1] + utils.unlink_without_raise(path) + AgentTFTPImageCache().clean_up() + + pxe_utils.clean_up_pxe_config(task) + + class AgentDeploy(base.DeployInterface): """Interface for deploy-related actions.""" @@ -238,12 +279,7 @@ class AgentDeploy(base.DeployInterface): :param task: a TaskManager instance. :returns: status of the deploy. One of ironic.common.states. """ - dhcp_opts = pxe_utils.dhcp_options_for_instance(task) - provider = dhcp_factory.DHCPFactory() - provider.update_dhcp(task, dhcp_opts) - manager_utils.node_set_boot_device(task, 'pxe', persistent=True) - manager_utils.node_power_action(task, states.REBOOT) - + _do_pxe_boot(task) return states.DEPLOYWAIT @task_manager.require_exclusive_lock @@ -262,12 +298,7 @@ class AgentDeploy(base.DeployInterface): :param task: a TaskManager instance. """ node = task.node - pxe_info = _get_tftp_image_info(task.node) - pxe_options = _build_pxe_config_options(task.node, pxe_info) - pxe_utils.create_pxe_config(task, - pxe_options, - CONF.agent.agent_pxe_config_template) - _cache_tftp_images(task.context, node, pxe_info) + _prepare_pxe_boot(task) node.instance_info = build_instance_info_for_deploy(task) node.save() @@ -288,13 +319,7 @@ class AgentDeploy(base.DeployInterface): :param task: a TaskManager instance. """ - pxe_info = _get_tftp_image_info(task.node) - for label in pxe_info: - path = pxe_info[label][1] - utils.unlink_without_raise(path) - AgentTFTPImageCache().clean_up() - - pxe_utils.clean_up_pxe_config(task) + _clean_up_pxe(task) def take_over(self, task): """Take over management of this node from a dead conductor. @@ -315,6 +340,59 @@ class AgentDeploy(base.DeployInterface): """ pass + def get_clean_steps(self, task): + """Get the list of clean steps from the agent. + + :param task: a TaskManager object containing the node + + :returns: A list of clean step dictionaries + """ + steps = deploy_utils.agent_get_clean_steps(task) + if CONF.agent.agent_erase_devices_priority: + for step in steps: + if (step.get('step') == 'erase_devices' and + step.get('interface') == 'deploy'): + # Override with operator set priority + step['priority'] = CONF.agent.agent_erase_devices_priority + return steps + + def execute_clean_step(self, task, step): + """Execute a clean step asynchronously on the agent. + + :param task: a TaskManager object containing the node + :param step: a clean step dictionary to execute + :raises: NodeCleaningFailure if the agent does not return a command + status + :returns: states.CLEANING to signify the step will be completed async + """ + return deploy_utils.agent_execute_clean_step(task, step) + + def prepare_cleaning(self, task): + """Boot into the agent to prepare for cleaning.""" + provider = dhcp_factory.DHCPFactory() + # If we have left over ports from a previous cleaning, remove them + if getattr(provider.provider, 'delete_cleaning_ports', None): + provider.provider.delete_cleaning_ports(task) + + # Create cleaning ports if necessary + ports = None + if getattr(provider.provider, 'create_cleaning_ports', None): + ports = provider.provider.create_cleaning_ports(task) + _prepare_pxe_boot(task) + _do_pxe_boot(task, ports) + # Tell the conductor we are waiting for the agent to boot. + return states.CLEANING + + def tear_down_cleaning(self, task): + """Clean up the PXE and DHCP files after cleaning.""" + manager_utils.node_power_action(task, states.POWER_OFF) + _clean_up_pxe(task) + + # If we created cleaning ports, delete them + provider = dhcp_factory.DHCPFactory() + if getattr(provider.provider, 'delete_cleaning_ports', None): + provider.provider.delete_cleaning_ports(task) + class AgentVendorInterface(agent_base_vendor.BaseAgentVendor): diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index 542a1a459e..79ae3149c7 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -30,6 +30,8 @@ from ironic.common.i18n import _LI from ironic.common.i18n import _LW from ironic.common import states from ironic.common import utils +from ironic.conductor import manager +from ironic.conductor import rpcapi from ironic.conductor import utils as manager_utils from ironic.drivers import base from ironic.drivers.modules import agent_client @@ -126,6 +128,67 @@ class BaseAgentVendor(base.VendorInterface): 'payload version: %s') % version) + def _notify_conductor_resume_clean(self, task): + uuid = task.node.uuid + rpc = rpcapi.ConductorAPI() + topic = rpc.get_topic_for(task.node) + # Need to release the lock to let the conductor take it + task.release_resources() + rpc.continue_node_clean(task.context, uuid, topic=topic) + + def continue_cleaning(self, task, **kwargs): + """Start the next cleaning step if the previous one is complete. + + In order to avoid errors and make agent upgrades painless, cleaning + will check the version of all hardware managers during get_clean_steps + at the beginning of cleaning and before executing each step in the + agent. If the version has changed between steps, the agent is unable + to tell if an ordering change will cause a cleaning issue. Therefore, + we restart cleaning. + """ + command = self._get_completed_cleaning_command(task) + LOG.debug('Cleaning command status for node %(node)s on step %(step)s ' + '(command)%', {'node': task.node.uuid, + 'step': task.node.clean_step, + 'command': command}) + + if not command: + # Command is not done yet + return + + if command.get('command_status') == 'FAILED': + msg = (_('Agent returned error for clean step %(step)s on node ' + '%(node)s : %(err)s.') % + {'node': task.node.uuid, + 'err': command.get('command_error'), + 'step': task.node.clean_step}) + LOG.error(msg) + manager.cleaning_error_handler(task, msg) + elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH': + # Restart cleaning, agent must have rebooted to new version + try: + manager.set_node_cleaning_steps(task) + except exception.NodeCleaningFailure: + msg = (_('Could not restart cleaning on node %(node)s: ' + '%(err)s.') % + {'node': task.node.uuid, + 'err': command.get('command_error'), + 'step': task.node.clean_step}) + LOG.exception(msg) + manager.cleaning_error_handler(task, msg) + self._notify_conductor_resume_clean(task) + + elif command.get('command_status') == 'SUCCEEDED': + self._notify_conductor_resume_clean(task) + else: + msg = (_('Agent returned unknown status for clean step %(step)s ' + 'on node %(node)s : %(err)s.') % + {'node': task.node.uuid, + 'err': command.get('command_status'), + 'step': task.node.clean_step}) + LOG.error(msg) + manager.cleaning_error_handler(task, msg) + @base.passthru(['POST']) def heartbeat(self, task, **kwargs): """Method for agent to periodically check in. @@ -167,6 +230,15 @@ class BaseAgentVendor(base.VendorInterface): self.deploy_is_done(task)): msg = _('Node failed to move to active state.') self.reboot_to_instance(task, **kwargs) + elif (node.provision_state == states.CLEANING and + not node.clean_step): + # Agent booted from prepare_cleaning + manager.set_node_cleaning_steps(task) + self._notify_conductor_resume_clean(task) + elif (node.provision_state == states.CLEANING and + node.clean_step): + self.continue_cleaning(task, **kwargs) + except Exception as e: err_info = {'node': node.uuid, 'msg': msg, 'e': e} last_error = _('Asynchronous exception for node %(node)s: ' @@ -228,6 +300,19 @@ class BaseAgentVendor(base.VendorInterface): 'node': node } + def _get_completed_cleaning_command(self, task): + """Returns None or a completed cleaning command from the agent.""" + commands = self._client.get_commands_status(task.node) + if not commands: + return + + last_command = commands[-1] + + if last_command['command_status'] == 'RUNNING': + return + else: + return last_command + def _get_interfaces(self, inventory): interfaces = [] try: diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 4bb2b49ada..280b3741a5 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -114,3 +114,26 @@ class AgentClient(object): method='image.install_bootloader', params=params, wait=True) + + def get_clean_steps(self, node, ports): + params = { + 'node': node.as_dict(), + 'ports': [port.as_dict() for port in ports] + } + return self._command(node=node, + method='clean.get_clean_steps', + params=params, + wait=True) + + def execute_clean_step(self, step, node, ports): + params = { + 'step': step, + 'node': node.as_dict(), + 'ports': [port.as_dict() for port in ports], + 'clean_version': node.driver_internal_info.get( + 'hardware_manager_version') + } + return self._command(node=node, + method='clean.execute_clean_step', + params=params, + wait=False) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 1c43aeffdb..f9f2361d86 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -44,8 +44,10 @@ from ironic.common import images from ironic.common import states from ironic.common import utils from ironic.conductor import utils as manager_utils +from ironic.drivers.modules import agent_client from ironic.drivers.modules import image_cache from ironic.drivers import utils as driver_utils +from ironic import objects from ironic.openstack.common import log as logging @@ -71,6 +73,10 @@ LOG = logging.getLogger(__name__) VALID_ROOT_DEVICE_HINTS = set(('size', 'model', 'wwn', 'serial', 'vendor')) +def _get_agent_client(): + return agent_client.AgentClient() + + # All functions are called from deploy() directly or indirectly. # They are split for stub-out. @@ -873,6 +879,65 @@ def parse_instance_info_capabilities(node): return capabilities +def agent_get_clean_steps(task): + """Get the list of clean steps from the agent. + + #TODO(JoshNang) move to BootInterface + + :param task: a TaskManager object containing the node + :raises: NodeCleaningFailure if the agent returns invalid results + :returns: A list of clean step dictionaries + """ + client = _get_agent_client() + ports = objects.Port.list_by_node_id( + task.context, task.node.id) + result = client.get_clean_steps(task.node, ports).get('command_result') + + if ('clean_steps' not in result or + 'hardware_manager_version' not in result): + raise exception.NodeCleaningFailure(_( + 'get_clean_steps for node %(node)s returned invalid result:' + ' %(result)s') % ({'node': task.node.uuid, 'result': result})) + + driver_info = task.node.driver_internal_info + driver_info['hardware_manager_version'] = result[ + 'hardware_manager_version'] + task.node.driver_internal_info = driver_info + task.node.save() + + # Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..} + # Flatten clean steps into one list + steps_list = [step for step_list in + result['clean_steps'].values() + for step in step_list] + # Filter steps to only return deploy steps + steps = [step for step in steps_list + if step.get('interface') == 'deploy'] + return steps + + +def agent_execute_clean_step(task, step): + """Execute a clean step asynchronously on the agent. + + #TODO(JoshNang) move to BootInterface + + :param task: a TaskManager object containing the node + :param step: a clean step dictionary to execute + :raises: NodeCleaningFailure if the agent does not return a command status + :returns: states.CLEANING to signify the step will be completed async + """ + client = _get_agent_client() + ports = objects.Port.list_by_node_id( + task.context, task.node.id) + result = client.execute_clean_step(step, task.node, ports) + if not result.get('command_status'): + raise exception.NodeCleaningFailure(_( + 'Agent on node %(node)s returned bad command result: ' + '%(result)s') % {'node': task.node.uuid, + 'result': result.get('command_error')}) + return states.CLEANING + + def try_set_boot_device(task, device, persistent=True): """Tries to set the boot device on the node. diff --git a/ironic/tests/dhcp/test_neutron.py b/ironic/tests/dhcp/test_neutron.py index 1ff0390e70..4cb4402f66 100644 --- a/ironic/tests/dhcp/test_neutron.py +++ b/ironic/tests/dhcp/test_neutron.py @@ -34,6 +34,9 @@ class TestNeutron(db_base.DbTestCase): def setUp(self): super(TestNeutron, self).setUp() mgr_utils.mock_the_extension_manager(driver='fake') + self.config( + cleaning_network_uuid='00000000-0000-0000-0000-000000000000', + group='neutron') self.config(enabled_drivers=['fake']) self.config(dhcp_provider='neutron', group='dhcp') @@ -49,6 +52,15 @@ class TestNeutron(db_base.DbTestCase): auth_uri='test-auth-uri', group='keystone_authtoken') self.node = object_utils.create_test_node(self.context) + self.ports = [ + object_utils.create_test_port( + self.context, node_id=self.node.id, id=2, + uuid='1be26c0b-03f2-4d2e-ae87-c02d7f33c782', + address='52:54:00:cf:2d:32')] + # Very simple neutron port representation + self.neutron_port = {'id': '132f871f-eaec-4fed-9475-0d54465e0f00', + 'mac_address': '52:54:00:cf:2d:32'} + dhcp_factory.DHCPFactory._dhcp_provider = None def test__build_client_invalid_auth_strategy(self): @@ -340,16 +352,97 @@ class TestNeutron(db_base.DbTestCase): @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi._get_port_ip_address') def test_get_ip_addresses(self, get_ip_mock): ip_address = '10.10.0.1' - address = "aa:aa:aa:aa:aa:aa" expected = [ip_address] - port = object_utils.create_test_port(self.context, - node_id=self.node.id, - address=address) get_ip_mock.return_value = ip_address with task_manager.acquire(self.context, self.node.uuid) as task: api = dhcp_factory.DHCPFactory().provider result = api.get_ip_addresses(task) - get_ip_mock.assert_called_once_with(task, port.uuid, mock.ANY) + get_ip_mock.assert_called_once_with(task, self.ports[0].uuid, + mock.ANY) self.assertEqual(expected, result) + + @mock.patch.object(client.Client, 'create_port') + def test_create_cleaning_ports(self, create_mock): + # Ensure we can create cleaning ports for in band cleaning + create_mock.return_value = {'port': self.neutron_port} + expected = {self.ports[0].uuid: self.neutron_port['id']} + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + ports = api.create_cleaning_ports(task) + self.assertEqual(expected, ports) + create_mock.assert_called_once_with({'port': { + 'network_id': '00000000-0000-0000-0000-000000000000', + 'admin_state_up': True, 'mac_address': self.ports[0].address}}) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'create_port') + def test_create_cleaning_ports_fail(self, create_mock, error_mock): + # Check that if creating a port fails, the node goes to cleanfail + create_mock.side_effect = neutron_client_exc.ConnectionFailed + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.create_cleaning_ports(task) + error_mock.assert_called_once_with(task, mock.ANY) + create_mock.assert_called_once_with({'port': { + 'network_id': '00000000-0000-0000-0000-000000000000', + 'admin_state_up': True, 'mac_address': self.ports[0].address}}) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'create_port') + def test_create_cleaning_ports_bad_config(self, create_mock, error_mock): + self.config(cleaning_network_uuid=None, group='neutron') + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.InvalidParameterValue, + api.create_cleaning_ports, task) + + @mock.patch.object(client.Client, 'delete_port') + @mock.patch.object(client.Client, 'list_ports') + def test_delete_cleaning_ports(self, list_mock, delete_mock): + # Ensure that we can delete cleaning ports, and that ports with + # different macs don't get deleted + other_port = {'id': '132f871f-eaec-4fed-9475-0d54465e0f01', + 'mac_address': 'aa:bb:cc:dd:ee:ff'} + list_mock.return_value = {'ports': [self.neutron_port, other_port]} + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.delete_cleaning_ports(task) + list_mock.assert_called_once_with( + network_id='00000000-0000-0000-0000-000000000000') + delete_mock.assert_called_once_with(self.neutron_port['id']) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'list_ports') + def test_delete_cleaning_ports_list_fail(self, list_mock, error_mock): + # Check that if listing ports fails, the node goes to cleanfail + list_mock.side_effect = neutron_client_exc.ConnectionFailed + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.delete_cleaning_ports(task) + list_mock.assert_called_once_with( + network_id='00000000-0000-0000-0000-000000000000') + error_mock.assert_called_once_with(task, mock.ANY) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'delete_port') + @mock.patch.object(client.Client, 'list_ports') + def test_delete_cleaning_ports_delete_fail(self, list_mock, delete_mock, + error_mock): + # Check that if deleting ports fails, the node goes to cleanfail + list_mock.return_value = {'ports': [self.neutron_port]} + delete_mock.side_effect = neutron_client_exc.ConnectionFailed + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.delete_cleaning_ports(task) + list_mock.assert_called_once_with( + network_id='00000000-0000-0000-0000-000000000000') + delete_mock.assert_called_once_with(self.neutron_port['id']) + error_mock.assert_called_once_with(task, mock.ANY) diff --git a/ironic/tests/drivers/test_agent.py b/ironic/tests/drivers/test_agent.py index aa3213c23e..bad86162d7 100644 --- a/ironic/tests/drivers/test_agent.py +++ b/ironic/tests/drivers/test_agent.py @@ -140,6 +140,8 @@ class TestAgentDeploy(db_base.DbTestCase): 'driver_internal_info': DRIVER_INTERNAL_INFO, } self.node = object_utils.create_test_node(self.context, **n) + self.ports = [object_utils.create_test_port(self.context, + node_id=self.node.id)] def test_get_properties(self): expected = agent.COMMON_PROPERTIES @@ -197,7 +199,7 @@ class TestAgentDeploy(db_base.DbTestCase): dhcp_opts = pxe_utils.dhcp_options_for_instance(task) driver_return = self.driver.deploy(task) self.assertEqual(driver_return, states.DEPLOYWAIT) - dhcp_mock.assert_called_once_with(task, dhcp_opts) + dhcp_mock.assert_called_once_with(task, dhcp_opts, None) bootdev_mock.assert_called_once_with(task, 'pxe', persistent=True) power_mock.assert_called_once_with(task, states.REBOOT) @@ -210,6 +212,59 @@ class TestAgentDeploy(db_base.DbTestCase): power_mock.assert_called_once_with(task, states.POWER_OFF) self.assertEqual(driver_return, states.DELETED) + @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.delete_cleaning_ports') + @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.create_cleaning_ports') + @mock.patch('ironic.drivers.modules.agent._do_pxe_boot') + @mock.patch('ironic.drivers.modules.agent._prepare_pxe_boot') + def test_prepare_cleaning(self, prepare_mock, boot_mock, create_mock, + delete_mock): + ports = [{'ports': self.ports}] + create_mock.return_value = ports + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + self.assertEqual(states.CLEANING, + self.driver.prepare_cleaning(task)) + prepare_mock.assert_called_once_with(task) + boot_mock.assert_called_once_with(task, ports) + create_mock.assert_called_once() + delete_mock.assert_called_once() + + @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.delete_cleaning_ports') + @mock.patch('ironic.drivers.modules.agent._clean_up_pxe') + @mock.patch('ironic.conductor.utils.node_power_action') + def test_tear_down_cleaning(self, power_mock, cleanup_mock, neutron_mock): + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + self.assertIsNone(self.driver.tear_down_cleaning(task)) + power_mock.assert_called_once_with(task, states.POWER_OFF) + cleanup_mock.assert_called_once_with(task) + neutron_mock.assert_called_once() + + @mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps') + def test_get_clean_steps(self, mock_get_clean_steps): + # Test getting clean steps + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + mock_get_clean_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = self.driver.get_clean_steps(task) + mock_get_clean_steps.assert_called_once_with(task) + self.assertEqual(mock_steps, steps) + + @mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps') + def test_get_clean_steps_config_priority(self, mock_get_clean_steps): + # Test that we can override the priority of get clean steps + self.config(agent_erase_devices_priority=20, group='agent') + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + expected_steps = [{'priority': 20, 'interface': 'deploy', + 'step': 'erase_devices'}] + mock_get_clean_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = self.driver.get_clean_steps(task) + mock_get_clean_steps.assert_called_once_with(task) + self.assertEqual(expected_steps, steps) + class TestAgentVendor(db_base.DbTestCase): diff --git a/ironic/tests/drivers/test_agent_base_vendor.py b/ironic/tests/drivers/test_agent_base_vendor.py index ba70a9f3fc..31596abc13 100644 --- a/ironic/tests/drivers/test_agent_base_vendor.py +++ b/ironic/tests/drivers/test_agent_base_vendor.py @@ -401,3 +401,64 @@ class TestBaseAgentVendor(db_base.DbTestCase): task, boot_devices.DISK) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + '_notify_conductor_resume_clean') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning(self, status_mock, notify_mock): + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + notify_mock.assert_called_once_with(task) + + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + '_notify_conductor_resume_clean') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_running(self, status_mock, notify_mock): + status_mock.return_value = [{ + 'command_status': 'RUNNING', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + notify_mock.assert_not_called() + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_fail(self, status_mock, error_mock): + status_mock.return_value = [{ + 'command_status': 'FAILED', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY) + + @mock.patch('ironic.conductor.manager.set_node_cleaning_steps') + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + '_notify_conductor_resume_clean') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_clean_version_mismatch( + self, status_mock, notify_mock, steps_mock): + status_mock.return_value = [{ + 'command_status': 'CLEAN_VERSION_MISMATCH', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + steps_mock.assert_called_once_with(task) + notify_mock.assert_called_once_with(task) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_unknown(self, status_mock, error_mock): + status_mock.return_value = [{ + 'command_status': 'UNKNOWN', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY) diff --git a/ironic/tests/drivers/test_agent_client.py b/ironic/tests/drivers/test_agent_client.py index b8e1fbf85a..80a0f69af8 100644 --- a/ironic/tests/drivers/test_agent_client.py +++ b/ironic/tests/drivers/test_agent_client.py @@ -36,10 +36,19 @@ class MockNode(object): self.uuid = 'uuid' self.driver_info = {} self.driver_internal_info = { - 'agent_url': "http://127.0.0.1:9999" + 'agent_url': "http://127.0.0.1:9999", + 'clean_version': {'generic': '1'} } self.instance_info = {} + def as_dict(self): + return { + 'uuid': self.uuid, + 'driver_info': self.driver_info, + 'driver_internal_info': self.driver_internal_info, + 'instance_info': self.instance_info + } + class TestAgentClient(base.TestCase): def setUp(self): @@ -148,3 +157,37 @@ class TestAgentClient(base.TestCase): self.client._command.assert_called_once_with( node=self.node, method='image.install_bootloader', params=params, wait=True) + + def test_get_clean_steps(self): + self.client._command = mock.Mock() + ports = [] + expected_params = { + 'node': self.node.as_dict(), + 'ports': [] + } + + self.client.get_clean_steps(self.node, + ports) + self.client._command.assert_called_once_with(node=self.node, + method='clean.get_clean_steps', + params=expected_params, + wait=True) + + def test_execute_clean_step(self): + self.client._command = mock.Mock() + ports = [] + step = {'priority': 10, 'step': 'erase_devices', 'interface': 'deploy'} + expected_params = { + 'step': step, + 'node': self.node.as_dict(), + 'ports': [], + 'clean_version': self.node.driver_internal_info.get( + 'hardware_manager_version') + } + self.client.execute_clean_step(step, + self.node, + ports) + self.client._command.assert_called_once_with(node=self.node, + method='clean.execute_clean_step', + params=expected_params, + wait=False) diff --git a/ironic/tests/drivers/test_deploy_utils.py b/ironic/tests/drivers/test_deploy_utils.py index 29731c176f..bdbec93196 100644 --- a/ironic/tests/drivers/test_deploy_utils.py +++ b/ironic/tests/drivers/test_deploy_utils.py @@ -33,6 +33,7 @@ from ironic.common import boot_devices from ironic.common import disk_partitioner from ironic.common import exception from ironic.common import images +from ironic.common import states from ironic.common import utils as common_utils from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils @@ -1509,6 +1510,129 @@ class TrySetBootDeviceTestCase(db_base.DbTestCase): task, boot_devices.DISK, persistent=True) +class AgentCleaningTestCase(db_base.DbTestCase): + def setUp(self): + super(AgentCleaningTestCase, self).setUp() + mgr_utils.mock_the_extension_manager(driver='fake_agent') + n = {'driver': 'fake_agent'} + self.node = obj_utils.create_test_node(self.context, **n) + self.ports = [obj_utils.create_test_port(self.context, + node_id=self.node.id)] + + self.clean_steps = { + 'hardware_manager_version': '1', + 'clean_steps': { + 'GenericHardwareManager': [ + {'interface': 'deploy', + 'step': 'erase_devices', + 'priority': 20}, + ], + 'SpecificHardwareManager': [ + {'interface': 'deploy', + 'step': 'update_firmware', + 'priority': 30}, + {'interface': 'raid', + 'step': 'create_raid', + 'priority': 10}, + ] + } + } + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_get_clean_steps(self, get_client_mock, list_ports_mock): + client_mock = mock.Mock() + client_mock.get_clean_steps.return_value = { + 'command_result': self.clean_steps} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_get_clean_steps(task) + client_mock.get_clean_steps.assert_called_once_with(task.node, + self.ports) + self.assertEqual('1', task.node.driver_internal_info[ + 'hardware_manager_version']) + + # Since steps are returned in dicts, they have non-deterministic + # ordering + self.assertEqual(2, len(response)) + self.assertTrue(self.clean_steps['clean_steps'][ + 'GenericHardwareManager'][0] in response) + self.assertTrue(self.clean_steps['clean_steps'][ + 'SpecificHardwareManager'][0] in response) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_get_clean_steps_missing_steps(self, get_client_mock, + list_ports_mock): + client_mock = mock.Mock() + del self.clean_steps['clean_steps'] + client_mock.get_clean_steps.return_value = { + 'command_result': self.clean_steps} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + self.assertRaises(exception.NodeCleaningFailure, + utils.agent_get_clean_steps, + task) + client_mock.get_clean_steps.assert_called_once_with(task.node, + self.ports) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_execute_clean_step(self, get_client_mock, list_ports_mock): + client_mock = mock.Mock() + client_mock.execute_clean_step.return_value = { + 'command_status': 'SUCCEEDED'} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_execute_clean_step( + task, + self.clean_steps['clean_steps']['GenericHardwareManager'][0]) + self.assertEqual(states.CLEANING, response) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_execute_clean_step_running(self, get_client_mock, + list_ports_mock): + client_mock = mock.Mock() + client_mock.execute_clean_step.return_value = { + 'command_status': 'RUNNING'} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_execute_clean_step( + task, + self.clean_steps['clean_steps']['GenericHardwareManager'][0]) + self.assertEqual(states.CLEANING, response) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_execute_clean_step_version_mismatch(self, get_client_mock, + list_ports_mock): + client_mock = mock.Mock() + client_mock.execute_clean_step.return_value = { + 'command_status': 'RUNNING'} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_execute_clean_step( + task, + self.clean_steps['clean_steps']['GenericHardwareManager'][0]) + self.assertEqual(states.CLEANING, response) + + @mock.patch.object(utils, 'is_block_device') @mock.patch.object(utils, 'login_iscsi', lambda *_: None) @mock.patch.object(utils, 'discovery', lambda *_: None)