diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index c06188b287..27440d3839 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -351,6 +351,11 @@ # Neutron bootfile DHCP parameter. (string value) #agent_pxe_bootfile_name=pxelinux.0 +# Priority to run in-band erase devices via the Ironic Python +# Agent ramdisk. If unset, will use the priority set in the +# ramdisk (defaults to 10 for the GenericHardwareManager). If +# set to 0, will not run during cleaning. (integer value) +#agent_erase_devices_priority= # # Options defined in ironic.drivers.modules.agent_base_vendor @@ -1122,6 +1127,11 @@ # (string value) #auth_strategy=keystone +# UUID of the network to create Neutron ports on when booting +# to a ramdisk for cleaning/zapping using Neutron DHCP (string +# value) +#cleaning_network_uuid= + [oslo_messaging_amqp] diff --git a/ironic/common/dhcp_factory.py b/ironic/common/dhcp_factory.py index 24b10c88d5..c9fadf18eb 100644 --- a/ironic/common/dhcp_factory.py +++ b/ironic/common/dhcp_factory.py @@ -74,7 +74,7 @@ class DHCPFactory(object): cls._dhcp_provider = _extension_manager.driver - def update_dhcp(self, task, dhcp_opts): + def update_dhcp(self, task, dhcp_opts, ports=None): """Send or update the DHCP BOOT options for this node. :param task: A TaskManager instance. @@ -88,8 +88,10 @@ class DHCPFactory(object): 'opt_value': '123.123.123.456'}, {'opt_name': 'tftp-server', 'opt_value': '123.123.123.123'}] + :param ports: a list of Neutron port dicts to update DHCP options on. + If None, will get the list of ports from the Ironic port objects. """ - self.provider.update_dhcp_opts(task, dhcp_opts) + self.provider.update_dhcp_opts(task, dhcp_opts, ports) @property def provider(self): diff --git a/ironic/dhcp/neutron.py b/ironic/dhcp/neutron.py index 1d4388bf18..ec5f15be80 100644 --- a/ironic/dhcp/neutron.py +++ b/ironic/dhcp/neutron.py @@ -27,6 +27,7 @@ from ironic.common.i18n import _LE from ironic.common.i18n import _LW from ironic.common import keystone from ironic.common import network +from ironic.conductor import manager from ironic.dhcp import base from ironic.drivers.modules import ssh from ironic.openstack.common import log as logging @@ -48,7 +49,11 @@ neutron_opts = [ 'to neutron. Can be either "keystone" or "noauth". ' 'Running neutron in noauth mode (related to but not ' 'affected by this setting) is insecure and should only be ' - 'used for testing.') + 'used for testing.'), + cfg.StrOpt('cleaning_network_uuid', + help='UUID of the network to create Neutron ports on when ' + 'booting to a ramdisk for cleaning/zapping using Neutron ' + 'DHCP') ] CONF = cfg.CONF @@ -140,11 +145,11 @@ class NeutronDHCPApi(base.BaseDHCP): "port %s."), port_id) raise exception.FailedToUpdateMacOnPort(port_id=port_id) - def update_dhcp_opts(self, task, options): + def update_dhcp_opts(self, task, options, vifs=None): """Send or update the DHCP BOOT options for this node. :param task: A TaskManager instance. - :param dhcp_opts: this will be a list of dicts, e.g. + :param options: this will be a list of dicts, e.g. :: @@ -154,8 +159,14 @@ class NeutronDHCPApi(base.BaseDHCP): 'opt_value': '123.123.123.456'}, {'opt_name': 'tftp-server', 'opt_value': '123.123.123.123'}] + :param vifs: a dict of Neutron port dicts to update DHCP options on. + The keys should be Ironic port UUIDs, and the values should be + Neutron port UUIDs + If the value is None, will get the list of ports from the Ironic + port objects. """ - vifs = network.get_node_vif_ids(task) + if vifs is None: + vifs = network.get_node_vif_ids(task) if not vifs: raise exception.FailedToUpdateDHCPOptOnPort( _("No VIFs found for node %(node)s when attempting " @@ -275,3 +286,85 @@ class NeutronDHCPApi(base.BaseDHCP): {'node': task.node.uuid, 'ports': failures}) return ip_addresses + + def create_cleaning_ports(self, task): + """Create neutron ports for each port on task.node to boot the ramdisk. + + :param task: a TaskManager instance. + :raises: InvalidParameterValue if the cleaning network is None + :returns: a dictionary in the form {port.uuid: neutron_port['id']} + """ + if not CONF.neutron.cleaning_network_uuid: + raise exception.InvalidParameterValue(_('Valid cleaning network ' + 'UUID not provided')) + neutron_client = _build_client(task.context.auth_token) + body = { + 'port': { + 'network_id': CONF.neutron.cleaning_network_uuid, + 'admin_state_up': True, + } + } + ports = {} + for ironic_port in task.ports: + body['port']['mac_address'] = ironic_port.address + try: + port = neutron_client.create_port(body) + except neutron_client_exc.ConnectionFailed as e: + msg = (_('Could not create cleaning port on network %(net)s ' + 'from %(node)s. %(exc)s') % + {'net': CONF.neutron.cleaning_network_uuid, + 'node': task.node.uuid, + 'exc': e}) + LOG.exception(msg) + return manager.cleaning_error_handler(task, msg) + if not port.get('port') or not port['port'].get('id'): + # Rollback changes + try: + self.delete_cleaning_ports(task) + except Exception: + # Log the error, but continue to cleaning error handler + LOG.exception(_LE('Failed to rollback cleaning port ' + 'changes for node %s') % task.node.uuid) + msg = (_('Failed to create cleaning ports for node ' + '%(node)s') % task.node.uuid) + LOG.error(msg) + return manager.cleaning_error_handler(task, msg) + # Match return value of get_node_vif_ids() + ports[ironic_port.uuid] = port['port']['id'] + return ports + + def delete_cleaning_ports(self, task): + """Deletes the neutron port created for booting the ramdisk. + + :param task: a TaskManager instance. + """ + neutron_client = _build_client(task.context.auth_token) + macs = [p.address for p in task.ports] + params = { + 'network_id': CONF.neutron.cleaning_network_uuid + } + try: + ports = neutron_client.list_ports(**params) + except neutron_client_exc.ConnectionFailed as e: + msg = (_('Could not get cleaning network vif for %(node)s ' + 'from Neutron, possible network issue. %(exc)s') % + {'node': task.node.uuid, + 'exc': e}) + LOG.exception(msg) + return manager.cleaning_error_handler(task, msg) + + # Iterate the list of Neutron port dicts, remove the ones we added + for neutron_port in ports.get('ports', []): + # Only delete ports using the node's mac addresses + if neutron_port.get('mac_address') in macs: + try: + neutron_client.delete_port(neutron_port.get('id')) + except neutron_client_exc.ConnectionFailed as e: + msg = (_('Could not remove cleaning ports on network ' + '%(net)s from %(node)s, possible network issue. ' + '%(exc)s') % + {'net': CONF.neutron.cleaning_network_uuid, + 'node': task.node.uuid, + 'exc': e}) + LOG.exception(msg) + return manager.cleaning_error_handler(task, msg) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 505e812056..efd4be6fa6 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -18,6 +18,7 @@ import time from oslo_config import cfg from oslo_utils import excutils +from ironic.common import boot_devices from ironic.common import dhcp_factory from ironic.common import exception from ironic.common.glance_service import service_utils @@ -51,6 +52,12 @@ agent_opts = [ cfg.StrOpt('agent_pxe_bootfile_name', default='pxelinux.0', help='Neutron bootfile DHCP parameter.'), + cfg.IntOpt('agent_erase_devices_priority', + help='Priority to run in-band erase devices via the Ironic ' + 'Python Agent ramdisk. If unset, will use the priority ' + 'set in the ramdisk (defaults to 10 for the ' + 'GenericHardwareManager). If set to 0, will not run ' + 'during cleaning.') ] CONF = cfg.CONF @@ -185,6 +192,40 @@ def build_instance_info_for_deploy(task): return instance_info +def _prepare_pxe_boot(task): + """Prepare the files required for PXE booting the agent.""" + pxe_info = _get_tftp_image_info(task.node) + pxe_options = _build_pxe_config_options(task.node, pxe_info) + pxe_utils.create_pxe_config(task, + pxe_options, + CONF.agent.agent_pxe_config_template) + _cache_tftp_images(task.context, task.node, pxe_info) + + +def _do_pxe_boot(task, ports=None): + """Reboot the node into the PXE ramdisk. + + :param ports: a list of Neutron port dicts to update DHCP options on. If + None, will get the list of ports from the Ironic port objects. + """ + dhcp_opts = pxe_utils.dhcp_options_for_instance(task) + provider = dhcp_factory.DHCPFactory() + provider.update_dhcp(task, dhcp_opts, ports) + manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=True) + manager_utils.node_power_action(task, states.REBOOT) + + +def _clean_up_pxe(task): + """Clean up left over PXE and DHCP files.""" + pxe_info = _get_tftp_image_info(task.node) + for label in pxe_info: + path = pxe_info[label][1] + utils.unlink_without_raise(path) + AgentTFTPImageCache().clean_up() + + pxe_utils.clean_up_pxe_config(task) + + class AgentDeploy(base.DeployInterface): """Interface for deploy-related actions.""" @@ -238,12 +279,7 @@ class AgentDeploy(base.DeployInterface): :param task: a TaskManager instance. :returns: status of the deploy. One of ironic.common.states. """ - dhcp_opts = pxe_utils.dhcp_options_for_instance(task) - provider = dhcp_factory.DHCPFactory() - provider.update_dhcp(task, dhcp_opts) - manager_utils.node_set_boot_device(task, 'pxe', persistent=True) - manager_utils.node_power_action(task, states.REBOOT) - + _do_pxe_boot(task) return states.DEPLOYWAIT @task_manager.require_exclusive_lock @@ -262,12 +298,7 @@ class AgentDeploy(base.DeployInterface): :param task: a TaskManager instance. """ node = task.node - pxe_info = _get_tftp_image_info(task.node) - pxe_options = _build_pxe_config_options(task.node, pxe_info) - pxe_utils.create_pxe_config(task, - pxe_options, - CONF.agent.agent_pxe_config_template) - _cache_tftp_images(task.context, node, pxe_info) + _prepare_pxe_boot(task) node.instance_info = build_instance_info_for_deploy(task) node.save() @@ -288,13 +319,7 @@ class AgentDeploy(base.DeployInterface): :param task: a TaskManager instance. """ - pxe_info = _get_tftp_image_info(task.node) - for label in pxe_info: - path = pxe_info[label][1] - utils.unlink_without_raise(path) - AgentTFTPImageCache().clean_up() - - pxe_utils.clean_up_pxe_config(task) + _clean_up_pxe(task) def take_over(self, task): """Take over management of this node from a dead conductor. @@ -315,6 +340,59 @@ class AgentDeploy(base.DeployInterface): """ pass + def get_clean_steps(self, task): + """Get the list of clean steps from the agent. + + :param task: a TaskManager object containing the node + + :returns: A list of clean step dictionaries + """ + steps = deploy_utils.agent_get_clean_steps(task) + if CONF.agent.agent_erase_devices_priority: + for step in steps: + if (step.get('step') == 'erase_devices' and + step.get('interface') == 'deploy'): + # Override with operator set priority + step['priority'] = CONF.agent.agent_erase_devices_priority + return steps + + def execute_clean_step(self, task, step): + """Execute a clean step asynchronously on the agent. + + :param task: a TaskManager object containing the node + :param step: a clean step dictionary to execute + :raises: NodeCleaningFailure if the agent does not return a command + status + :returns: states.CLEANING to signify the step will be completed async + """ + return deploy_utils.agent_execute_clean_step(task, step) + + def prepare_cleaning(self, task): + """Boot into the agent to prepare for cleaning.""" + provider = dhcp_factory.DHCPFactory() + # If we have left over ports from a previous cleaning, remove them + if getattr(provider.provider, 'delete_cleaning_ports', None): + provider.provider.delete_cleaning_ports(task) + + # Create cleaning ports if necessary + ports = None + if getattr(provider.provider, 'create_cleaning_ports', None): + ports = provider.provider.create_cleaning_ports(task) + _prepare_pxe_boot(task) + _do_pxe_boot(task, ports) + # Tell the conductor we are waiting for the agent to boot. + return states.CLEANING + + def tear_down_cleaning(self, task): + """Clean up the PXE and DHCP files after cleaning.""" + manager_utils.node_power_action(task, states.POWER_OFF) + _clean_up_pxe(task) + + # If we created cleaning ports, delete them + provider = dhcp_factory.DHCPFactory() + if getattr(provider.provider, 'delete_cleaning_ports', None): + provider.provider.delete_cleaning_ports(task) + class AgentVendorInterface(agent_base_vendor.BaseAgentVendor): diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index 542a1a459e..79ae3149c7 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -30,6 +30,8 @@ from ironic.common.i18n import _LI from ironic.common.i18n import _LW from ironic.common import states from ironic.common import utils +from ironic.conductor import manager +from ironic.conductor import rpcapi from ironic.conductor import utils as manager_utils from ironic.drivers import base from ironic.drivers.modules import agent_client @@ -126,6 +128,67 @@ class BaseAgentVendor(base.VendorInterface): 'payload version: %s') % version) + def _notify_conductor_resume_clean(self, task): + uuid = task.node.uuid + rpc = rpcapi.ConductorAPI() + topic = rpc.get_topic_for(task.node) + # Need to release the lock to let the conductor take it + task.release_resources() + rpc.continue_node_clean(task.context, uuid, topic=topic) + + def continue_cleaning(self, task, **kwargs): + """Start the next cleaning step if the previous one is complete. + + In order to avoid errors and make agent upgrades painless, cleaning + will check the version of all hardware managers during get_clean_steps + at the beginning of cleaning and before executing each step in the + agent. If the version has changed between steps, the agent is unable + to tell if an ordering change will cause a cleaning issue. Therefore, + we restart cleaning. + """ + command = self._get_completed_cleaning_command(task) + LOG.debug('Cleaning command status for node %(node)s on step %(step)s ' + '(command)%', {'node': task.node.uuid, + 'step': task.node.clean_step, + 'command': command}) + + if not command: + # Command is not done yet + return + + if command.get('command_status') == 'FAILED': + msg = (_('Agent returned error for clean step %(step)s on node ' + '%(node)s : %(err)s.') % + {'node': task.node.uuid, + 'err': command.get('command_error'), + 'step': task.node.clean_step}) + LOG.error(msg) + manager.cleaning_error_handler(task, msg) + elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH': + # Restart cleaning, agent must have rebooted to new version + try: + manager.set_node_cleaning_steps(task) + except exception.NodeCleaningFailure: + msg = (_('Could not restart cleaning on node %(node)s: ' + '%(err)s.') % + {'node': task.node.uuid, + 'err': command.get('command_error'), + 'step': task.node.clean_step}) + LOG.exception(msg) + manager.cleaning_error_handler(task, msg) + self._notify_conductor_resume_clean(task) + + elif command.get('command_status') == 'SUCCEEDED': + self._notify_conductor_resume_clean(task) + else: + msg = (_('Agent returned unknown status for clean step %(step)s ' + 'on node %(node)s : %(err)s.') % + {'node': task.node.uuid, + 'err': command.get('command_status'), + 'step': task.node.clean_step}) + LOG.error(msg) + manager.cleaning_error_handler(task, msg) + @base.passthru(['POST']) def heartbeat(self, task, **kwargs): """Method for agent to periodically check in. @@ -167,6 +230,15 @@ class BaseAgentVendor(base.VendorInterface): self.deploy_is_done(task)): msg = _('Node failed to move to active state.') self.reboot_to_instance(task, **kwargs) + elif (node.provision_state == states.CLEANING and + not node.clean_step): + # Agent booted from prepare_cleaning + manager.set_node_cleaning_steps(task) + self._notify_conductor_resume_clean(task) + elif (node.provision_state == states.CLEANING and + node.clean_step): + self.continue_cleaning(task, **kwargs) + except Exception as e: err_info = {'node': node.uuid, 'msg': msg, 'e': e} last_error = _('Asynchronous exception for node %(node)s: ' @@ -228,6 +300,19 @@ class BaseAgentVendor(base.VendorInterface): 'node': node } + def _get_completed_cleaning_command(self, task): + """Returns None or a completed cleaning command from the agent.""" + commands = self._client.get_commands_status(task.node) + if not commands: + return + + last_command = commands[-1] + + if last_command['command_status'] == 'RUNNING': + return + else: + return last_command + def _get_interfaces(self, inventory): interfaces = [] try: diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 4bb2b49ada..280b3741a5 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -114,3 +114,26 @@ class AgentClient(object): method='image.install_bootloader', params=params, wait=True) + + def get_clean_steps(self, node, ports): + params = { + 'node': node.as_dict(), + 'ports': [port.as_dict() for port in ports] + } + return self._command(node=node, + method='clean.get_clean_steps', + params=params, + wait=True) + + def execute_clean_step(self, step, node, ports): + params = { + 'step': step, + 'node': node.as_dict(), + 'ports': [port.as_dict() for port in ports], + 'clean_version': node.driver_internal_info.get( + 'hardware_manager_version') + } + return self._command(node=node, + method='clean.execute_clean_step', + params=params, + wait=False) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 1c43aeffdb..f9f2361d86 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -44,8 +44,10 @@ from ironic.common import images from ironic.common import states from ironic.common import utils from ironic.conductor import utils as manager_utils +from ironic.drivers.modules import agent_client from ironic.drivers.modules import image_cache from ironic.drivers import utils as driver_utils +from ironic import objects from ironic.openstack.common import log as logging @@ -71,6 +73,10 @@ LOG = logging.getLogger(__name__) VALID_ROOT_DEVICE_HINTS = set(('size', 'model', 'wwn', 'serial', 'vendor')) +def _get_agent_client(): + return agent_client.AgentClient() + + # All functions are called from deploy() directly or indirectly. # They are split for stub-out. @@ -873,6 +879,65 @@ def parse_instance_info_capabilities(node): return capabilities +def agent_get_clean_steps(task): + """Get the list of clean steps from the agent. + + #TODO(JoshNang) move to BootInterface + + :param task: a TaskManager object containing the node + :raises: NodeCleaningFailure if the agent returns invalid results + :returns: A list of clean step dictionaries + """ + client = _get_agent_client() + ports = objects.Port.list_by_node_id( + task.context, task.node.id) + result = client.get_clean_steps(task.node, ports).get('command_result') + + if ('clean_steps' not in result or + 'hardware_manager_version' not in result): + raise exception.NodeCleaningFailure(_( + 'get_clean_steps for node %(node)s returned invalid result:' + ' %(result)s') % ({'node': task.node.uuid, 'result': result})) + + driver_info = task.node.driver_internal_info + driver_info['hardware_manager_version'] = result[ + 'hardware_manager_version'] + task.node.driver_internal_info = driver_info + task.node.save() + + # Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..} + # Flatten clean steps into one list + steps_list = [step for step_list in + result['clean_steps'].values() + for step in step_list] + # Filter steps to only return deploy steps + steps = [step for step in steps_list + if step.get('interface') == 'deploy'] + return steps + + +def agent_execute_clean_step(task, step): + """Execute a clean step asynchronously on the agent. + + #TODO(JoshNang) move to BootInterface + + :param task: a TaskManager object containing the node + :param step: a clean step dictionary to execute + :raises: NodeCleaningFailure if the agent does not return a command status + :returns: states.CLEANING to signify the step will be completed async + """ + client = _get_agent_client() + ports = objects.Port.list_by_node_id( + task.context, task.node.id) + result = client.execute_clean_step(step, task.node, ports) + if not result.get('command_status'): + raise exception.NodeCleaningFailure(_( + 'Agent on node %(node)s returned bad command result: ' + '%(result)s') % {'node': task.node.uuid, + 'result': result.get('command_error')}) + return states.CLEANING + + def try_set_boot_device(task, device, persistent=True): """Tries to set the boot device on the node. diff --git a/ironic/tests/dhcp/test_neutron.py b/ironic/tests/dhcp/test_neutron.py index 1ff0390e70..4cb4402f66 100644 --- a/ironic/tests/dhcp/test_neutron.py +++ b/ironic/tests/dhcp/test_neutron.py @@ -34,6 +34,9 @@ class TestNeutron(db_base.DbTestCase): def setUp(self): super(TestNeutron, self).setUp() mgr_utils.mock_the_extension_manager(driver='fake') + self.config( + cleaning_network_uuid='00000000-0000-0000-0000-000000000000', + group='neutron') self.config(enabled_drivers=['fake']) self.config(dhcp_provider='neutron', group='dhcp') @@ -49,6 +52,15 @@ class TestNeutron(db_base.DbTestCase): auth_uri='test-auth-uri', group='keystone_authtoken') self.node = object_utils.create_test_node(self.context) + self.ports = [ + object_utils.create_test_port( + self.context, node_id=self.node.id, id=2, + uuid='1be26c0b-03f2-4d2e-ae87-c02d7f33c782', + address='52:54:00:cf:2d:32')] + # Very simple neutron port representation + self.neutron_port = {'id': '132f871f-eaec-4fed-9475-0d54465e0f00', + 'mac_address': '52:54:00:cf:2d:32'} + dhcp_factory.DHCPFactory._dhcp_provider = None def test__build_client_invalid_auth_strategy(self): @@ -340,16 +352,97 @@ class TestNeutron(db_base.DbTestCase): @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi._get_port_ip_address') def test_get_ip_addresses(self, get_ip_mock): ip_address = '10.10.0.1' - address = "aa:aa:aa:aa:aa:aa" expected = [ip_address] - port = object_utils.create_test_port(self.context, - node_id=self.node.id, - address=address) get_ip_mock.return_value = ip_address with task_manager.acquire(self.context, self.node.uuid) as task: api = dhcp_factory.DHCPFactory().provider result = api.get_ip_addresses(task) - get_ip_mock.assert_called_once_with(task, port.uuid, mock.ANY) + get_ip_mock.assert_called_once_with(task, self.ports[0].uuid, + mock.ANY) self.assertEqual(expected, result) + + @mock.patch.object(client.Client, 'create_port') + def test_create_cleaning_ports(self, create_mock): + # Ensure we can create cleaning ports for in band cleaning + create_mock.return_value = {'port': self.neutron_port} + expected = {self.ports[0].uuid: self.neutron_port['id']} + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + ports = api.create_cleaning_ports(task) + self.assertEqual(expected, ports) + create_mock.assert_called_once_with({'port': { + 'network_id': '00000000-0000-0000-0000-000000000000', + 'admin_state_up': True, 'mac_address': self.ports[0].address}}) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'create_port') + def test_create_cleaning_ports_fail(self, create_mock, error_mock): + # Check that if creating a port fails, the node goes to cleanfail + create_mock.side_effect = neutron_client_exc.ConnectionFailed + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.create_cleaning_ports(task) + error_mock.assert_called_once_with(task, mock.ANY) + create_mock.assert_called_once_with({'port': { + 'network_id': '00000000-0000-0000-0000-000000000000', + 'admin_state_up': True, 'mac_address': self.ports[0].address}}) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'create_port') + def test_create_cleaning_ports_bad_config(self, create_mock, error_mock): + self.config(cleaning_network_uuid=None, group='neutron') + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.InvalidParameterValue, + api.create_cleaning_ports, task) + + @mock.patch.object(client.Client, 'delete_port') + @mock.patch.object(client.Client, 'list_ports') + def test_delete_cleaning_ports(self, list_mock, delete_mock): + # Ensure that we can delete cleaning ports, and that ports with + # different macs don't get deleted + other_port = {'id': '132f871f-eaec-4fed-9475-0d54465e0f01', + 'mac_address': 'aa:bb:cc:dd:ee:ff'} + list_mock.return_value = {'ports': [self.neutron_port, other_port]} + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.delete_cleaning_ports(task) + list_mock.assert_called_once_with( + network_id='00000000-0000-0000-0000-000000000000') + delete_mock.assert_called_once_with(self.neutron_port['id']) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'list_ports') + def test_delete_cleaning_ports_list_fail(self, list_mock, error_mock): + # Check that if listing ports fails, the node goes to cleanfail + list_mock.side_effect = neutron_client_exc.ConnectionFailed + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.delete_cleaning_ports(task) + list_mock.assert_called_once_with( + network_id='00000000-0000-0000-0000-000000000000') + error_mock.assert_called_once_with(task, mock.ANY) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(client.Client, 'delete_port') + @mock.patch.object(client.Client, 'list_ports') + def test_delete_cleaning_ports_delete_fail(self, list_mock, delete_mock, + error_mock): + # Check that if deleting ports fails, the node goes to cleanfail + list_mock.return_value = {'ports': [self.neutron_port]} + delete_mock.side_effect = neutron_client_exc.ConnectionFailed + api = dhcp_factory.DHCPFactory().provider + + with task_manager.acquire(self.context, self.node.uuid) as task: + api.delete_cleaning_ports(task) + list_mock.assert_called_once_with( + network_id='00000000-0000-0000-0000-000000000000') + delete_mock.assert_called_once_with(self.neutron_port['id']) + error_mock.assert_called_once_with(task, mock.ANY) diff --git a/ironic/tests/drivers/test_agent.py b/ironic/tests/drivers/test_agent.py index aa3213c23e..bad86162d7 100644 --- a/ironic/tests/drivers/test_agent.py +++ b/ironic/tests/drivers/test_agent.py @@ -140,6 +140,8 @@ class TestAgentDeploy(db_base.DbTestCase): 'driver_internal_info': DRIVER_INTERNAL_INFO, } self.node = object_utils.create_test_node(self.context, **n) + self.ports = [object_utils.create_test_port(self.context, + node_id=self.node.id)] def test_get_properties(self): expected = agent.COMMON_PROPERTIES @@ -197,7 +199,7 @@ class TestAgentDeploy(db_base.DbTestCase): dhcp_opts = pxe_utils.dhcp_options_for_instance(task) driver_return = self.driver.deploy(task) self.assertEqual(driver_return, states.DEPLOYWAIT) - dhcp_mock.assert_called_once_with(task, dhcp_opts) + dhcp_mock.assert_called_once_with(task, dhcp_opts, None) bootdev_mock.assert_called_once_with(task, 'pxe', persistent=True) power_mock.assert_called_once_with(task, states.REBOOT) @@ -210,6 +212,59 @@ class TestAgentDeploy(db_base.DbTestCase): power_mock.assert_called_once_with(task, states.POWER_OFF) self.assertEqual(driver_return, states.DELETED) + @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.delete_cleaning_ports') + @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.create_cleaning_ports') + @mock.patch('ironic.drivers.modules.agent._do_pxe_boot') + @mock.patch('ironic.drivers.modules.agent._prepare_pxe_boot') + def test_prepare_cleaning(self, prepare_mock, boot_mock, create_mock, + delete_mock): + ports = [{'ports': self.ports}] + create_mock.return_value = ports + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + self.assertEqual(states.CLEANING, + self.driver.prepare_cleaning(task)) + prepare_mock.assert_called_once_with(task) + boot_mock.assert_called_once_with(task, ports) + create_mock.assert_called_once() + delete_mock.assert_called_once() + + @mock.patch('ironic.dhcp.neutron.NeutronDHCPApi.delete_cleaning_ports') + @mock.patch('ironic.drivers.modules.agent._clean_up_pxe') + @mock.patch('ironic.conductor.utils.node_power_action') + def test_tear_down_cleaning(self, power_mock, cleanup_mock, neutron_mock): + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + self.assertIsNone(self.driver.tear_down_cleaning(task)) + power_mock.assert_called_once_with(task, states.POWER_OFF) + cleanup_mock.assert_called_once_with(task) + neutron_mock.assert_called_once() + + @mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps') + def test_get_clean_steps(self, mock_get_clean_steps): + # Test getting clean steps + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + mock_get_clean_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = self.driver.get_clean_steps(task) + mock_get_clean_steps.assert_called_once_with(task) + self.assertEqual(mock_steps, steps) + + @mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps') + def test_get_clean_steps_config_priority(self, mock_get_clean_steps): + # Test that we can override the priority of get clean steps + self.config(agent_erase_devices_priority=20, group='agent') + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + expected_steps = [{'priority': 20, 'interface': 'deploy', + 'step': 'erase_devices'}] + mock_get_clean_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = self.driver.get_clean_steps(task) + mock_get_clean_steps.assert_called_once_with(task) + self.assertEqual(expected_steps, steps) + class TestAgentVendor(db_base.DbTestCase): diff --git a/ironic/tests/drivers/test_agent_base_vendor.py b/ironic/tests/drivers/test_agent_base_vendor.py index ba70a9f3fc..31596abc13 100644 --- a/ironic/tests/drivers/test_agent_base_vendor.py +++ b/ironic/tests/drivers/test_agent_base_vendor.py @@ -401,3 +401,64 @@ class TestBaseAgentVendor(db_base.DbTestCase): task, boot_devices.DISK) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + '_notify_conductor_resume_clean') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning(self, status_mock, notify_mock): + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + notify_mock.assert_called_once_with(task) + + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + '_notify_conductor_resume_clean') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_running(self, status_mock, notify_mock): + status_mock.return_value = [{ + 'command_status': 'RUNNING', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + notify_mock.assert_not_called() + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_fail(self, status_mock, error_mock): + status_mock.return_value = [{ + 'command_status': 'FAILED', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY) + + @mock.patch('ironic.conductor.manager.set_node_cleaning_steps') + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + '_notify_conductor_resume_clean') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_clean_version_mismatch( + self, status_mock, notify_mock, steps_mock): + status_mock.return_value = [{ + 'command_status': 'CLEAN_VERSION_MISMATCH', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + steps_mock.assert_called_once_with(task) + notify_mock.assert_called_once_with(task) + + @mock.patch('ironic.conductor.manager.cleaning_error_handler') + @mock.patch.object(agent_client.AgentClient, 'get_commands_status') + def test_continue_cleaning_unknown(self, status_mock, error_mock): + status_mock.return_value = [{ + 'command_status': 'UNKNOWN', + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.passthru.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY) diff --git a/ironic/tests/drivers/test_agent_client.py b/ironic/tests/drivers/test_agent_client.py index b8e1fbf85a..80a0f69af8 100644 --- a/ironic/tests/drivers/test_agent_client.py +++ b/ironic/tests/drivers/test_agent_client.py @@ -36,10 +36,19 @@ class MockNode(object): self.uuid = 'uuid' self.driver_info = {} self.driver_internal_info = { - 'agent_url': "http://127.0.0.1:9999" + 'agent_url': "http://127.0.0.1:9999", + 'clean_version': {'generic': '1'} } self.instance_info = {} + def as_dict(self): + return { + 'uuid': self.uuid, + 'driver_info': self.driver_info, + 'driver_internal_info': self.driver_internal_info, + 'instance_info': self.instance_info + } + class TestAgentClient(base.TestCase): def setUp(self): @@ -148,3 +157,37 @@ class TestAgentClient(base.TestCase): self.client._command.assert_called_once_with( node=self.node, method='image.install_bootloader', params=params, wait=True) + + def test_get_clean_steps(self): + self.client._command = mock.Mock() + ports = [] + expected_params = { + 'node': self.node.as_dict(), + 'ports': [] + } + + self.client.get_clean_steps(self.node, + ports) + self.client._command.assert_called_once_with(node=self.node, + method='clean.get_clean_steps', + params=expected_params, + wait=True) + + def test_execute_clean_step(self): + self.client._command = mock.Mock() + ports = [] + step = {'priority': 10, 'step': 'erase_devices', 'interface': 'deploy'} + expected_params = { + 'step': step, + 'node': self.node.as_dict(), + 'ports': [], + 'clean_version': self.node.driver_internal_info.get( + 'hardware_manager_version') + } + self.client.execute_clean_step(step, + self.node, + ports) + self.client._command.assert_called_once_with(node=self.node, + method='clean.execute_clean_step', + params=expected_params, + wait=False) diff --git a/ironic/tests/drivers/test_deploy_utils.py b/ironic/tests/drivers/test_deploy_utils.py index 29731c176f..bdbec93196 100644 --- a/ironic/tests/drivers/test_deploy_utils.py +++ b/ironic/tests/drivers/test_deploy_utils.py @@ -33,6 +33,7 @@ from ironic.common import boot_devices from ironic.common import disk_partitioner from ironic.common import exception from ironic.common import images +from ironic.common import states from ironic.common import utils as common_utils from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils @@ -1509,6 +1510,129 @@ class TrySetBootDeviceTestCase(db_base.DbTestCase): task, boot_devices.DISK, persistent=True) +class AgentCleaningTestCase(db_base.DbTestCase): + def setUp(self): + super(AgentCleaningTestCase, self).setUp() + mgr_utils.mock_the_extension_manager(driver='fake_agent') + n = {'driver': 'fake_agent'} + self.node = obj_utils.create_test_node(self.context, **n) + self.ports = [obj_utils.create_test_port(self.context, + node_id=self.node.id)] + + self.clean_steps = { + 'hardware_manager_version': '1', + 'clean_steps': { + 'GenericHardwareManager': [ + {'interface': 'deploy', + 'step': 'erase_devices', + 'priority': 20}, + ], + 'SpecificHardwareManager': [ + {'interface': 'deploy', + 'step': 'update_firmware', + 'priority': 30}, + {'interface': 'raid', + 'step': 'create_raid', + 'priority': 10}, + ] + } + } + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_get_clean_steps(self, get_client_mock, list_ports_mock): + client_mock = mock.Mock() + client_mock.get_clean_steps.return_value = { + 'command_result': self.clean_steps} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_get_clean_steps(task) + client_mock.get_clean_steps.assert_called_once_with(task.node, + self.ports) + self.assertEqual('1', task.node.driver_internal_info[ + 'hardware_manager_version']) + + # Since steps are returned in dicts, they have non-deterministic + # ordering + self.assertEqual(2, len(response)) + self.assertTrue(self.clean_steps['clean_steps'][ + 'GenericHardwareManager'][0] in response) + self.assertTrue(self.clean_steps['clean_steps'][ + 'SpecificHardwareManager'][0] in response) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_get_clean_steps_missing_steps(self, get_client_mock, + list_ports_mock): + client_mock = mock.Mock() + del self.clean_steps['clean_steps'] + client_mock.get_clean_steps.return_value = { + 'command_result': self.clean_steps} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + self.assertRaises(exception.NodeCleaningFailure, + utils.agent_get_clean_steps, + task) + client_mock.get_clean_steps.assert_called_once_with(task.node, + self.ports) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_execute_clean_step(self, get_client_mock, list_ports_mock): + client_mock = mock.Mock() + client_mock.execute_clean_step.return_value = { + 'command_status': 'SUCCEEDED'} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_execute_clean_step( + task, + self.clean_steps['clean_steps']['GenericHardwareManager'][0]) + self.assertEqual(states.CLEANING, response) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_execute_clean_step_running(self, get_client_mock, + list_ports_mock): + client_mock = mock.Mock() + client_mock.execute_clean_step.return_value = { + 'command_status': 'RUNNING'} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_execute_clean_step( + task, + self.clean_steps['clean_steps']['GenericHardwareManager'][0]) + self.assertEqual(states.CLEANING, response) + + @mock.patch('ironic.objects.Port.list_by_node_id') + @mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client') + def test_execute_clean_step_version_mismatch(self, get_client_mock, + list_ports_mock): + client_mock = mock.Mock() + client_mock.execute_clean_step.return_value = { + 'command_status': 'RUNNING'} + get_client_mock.return_value = client_mock + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + response = utils.agent_execute_clean_step( + task, + self.clean_steps['clean_steps']['GenericHardwareManager'][0]) + self.assertEqual(states.CLEANING, response) + + @mock.patch.object(utils, 'is_block_device') @mock.patch.object(utils, 'login_iscsi', lambda *_: None) @mock.patch.object(utils, 'discovery', lambda *_: None)