Fix cleaning nits

In the rush to meet the K-3 feature freeze, some large nits
were merged.

Nits fixed for commits:
534d9ee96a
47c58ea9bf

Change-Id: I0f83750a8a1688878d7e7654bea37baa5d584019
This commit is contained in:
Josh Gachnang 2015-03-26 11:34:09 -07:00
parent fb0cc8f86f
commit 967ea27048
9 changed files with 160 additions and 85 deletions

View File

@ -1018,7 +1018,7 @@ class NodesController(rest.RestController):
# Allow node.instance_uuid removal during cleaning, but not other
# operations.
# TODO(JoshNang) remove node.instance_uuid when removing
# instance_info stop removing node.instance_uuid in the Nova
# instance_info and stop removing node.instance_uuid in the Nova
# Ironic driver. Bug: 1436568
LOG.debug('Removing instance uuid %(instance)s from node %(node)s',
{'instance': rpc_node.instance_uuid,

View File

@ -27,7 +27,6 @@ from ironic.common.i18n import _LE
from ironic.common.i18n import _LW
from ironic.common import keystone
from ironic.common import network
from ironic.conductor import manager
from ironic.dhcp import base
from ironic.drivers.modules import ssh
from ironic.openstack.common import log as logging
@ -310,25 +309,20 @@ class NeutronDHCPApi(base.BaseDHCP):
try:
port = neutron_client.create_port(body)
except neutron_client_exc.ConnectionFailed as e:
self._rollback_cleaning_ports(task)
msg = (_('Could not create cleaning port on network %(net)s '
'from %(node)s. %(exc)s') %
{'net': CONF.neutron.cleaning_network_uuid,
'node': task.node.uuid,
'exc': e})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)
raise exception.NodeCleaningFailure(msg)
if not port.get('port') or not port['port'].get('id'):
# Rollback changes
try:
self.delete_cleaning_ports(task)
except Exception:
# Log the error, but continue to cleaning error handler
LOG.exception(_LE('Failed to rollback cleaning port '
'changes for node %s') % task.node.uuid)
self._rollback_cleaning_ports(task)
msg = (_('Failed to create cleaning ports for node '
'%(node)s') % task.node.uuid)
LOG.error(msg)
return manager.cleaning_error_handler(task, msg)
raise exception.NodeCleaningFailure(msg)
# Match return value of get_node_vif_ids()
ports[ironic_port.uuid] = port['port']['id']
return ports
@ -351,7 +345,7 @@ class NeutronDHCPApi(base.BaseDHCP):
{'node': task.node.uuid,
'exc': e})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)
raise exception.NodeCleaningFailure(msg)
# Iterate the list of Neutron port dicts, remove the ones we added
for neutron_port in ports.get('ports', []):
@ -367,4 +361,20 @@ class NeutronDHCPApi(base.BaseDHCP):
'node': task.node.uuid,
'exc': e})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)
raise exception.NodeCleaningFailure(msg)
def _rollback_cleaning_ports(self, task):
"""Attempts to delete any ports created by cleaning
Purposefully will not raise any exceptions so error handling can
continue.
:param task: a TaskManager instance.
"""
try:
self.delete_cleaning_ports(task)
except Exception:
# Log the error, but let the caller invoke the
# manager.cleaning_error_handler().
LOG.exception(_LE('Failed to rollback cleaning port '
'changes for node %s') % task.node.uuid)

View File

@ -205,6 +205,7 @@ def _prepare_pxe_boot(task):
def _do_pxe_boot(task, ports=None):
"""Reboot the node into the PXE ramdisk.
:param task: a TaskManager instance
:param ports: a list of Neutron port dicts to update DHCP options on. If
None, will get the list of ports from the Ironic port objects.
"""
@ -357,7 +358,7 @@ class AgentDeploy(base.DeployInterface):
:returns: A list of clean step dictionaries
"""
steps = deploy_utils.agent_get_clean_steps(task)
if CONF.agent.agent_erase_devices_priority:
if CONF.agent.agent_erase_devices_priority is not None:
for step in steps:
if (step.get('step') == 'erase_devices' and
step.get('interface') == 'deploy'):
@ -377,29 +378,44 @@ class AgentDeploy(base.DeployInterface):
return deploy_utils.agent_execute_clean_step(task, step)
def prepare_cleaning(self, task):
"""Boot into the agent to prepare for cleaning."""
"""Boot into the agent to prepare for cleaning.
:param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the previous cleaning ports cannot
be removed or if new cleaning ports cannot be created
:returns: states.CLEANING to signify an asynchronous prepare
"""
provider = dhcp_factory.DHCPFactory()
# If we have left over ports from a previous cleaning, remove them
if getattr(provider.provider, 'delete_cleaning_ports', None):
# Allow to raise if it fails, is caught and handled in conductor
provider.provider.delete_cleaning_ports(task)
# Create cleaning ports if necessary
ports = None
if getattr(provider.provider, 'create_cleaning_ports', None):
# Allow to raise if it fails, is caught and handled in conductor
ports = provider.provider.create_cleaning_ports(task)
_prepare_pxe_boot(task)
_do_pxe_boot(task, ports)
# Tell the conductor we are waiting for the agent to boot.
return states.CLEANING
def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning."""
"""Clean up the PXE and DHCP files after cleaning.
:param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the cleaning ports cannot be
removed
"""
manager_utils.node_power_action(task, states.POWER_OFF)
_clean_up_pxe(task)
# If we created cleaning ports, delete them
provider = dhcp_factory.DHCPFactory()
if getattr(provider.provider, 'delete_cleaning_ports', None):
# Allow to raise if it fails, is caught and handled in conductor
provider.provider.delete_cleaning_ports(task)

View File

@ -147,10 +147,10 @@ class BaseAgentVendor(base.VendorInterface):
we restart cleaning.
"""
command = self._get_completed_cleaning_command(task)
LOG.debug('Cleaning command status for node %(node)s on step %(step)s '
'(command)%', {'node': task.node.uuid,
'step': task.node.clean_step,
'command': command})
LOG.debug('Cleaning command status for node %(node)s on step %(step)s:'
' %(command)s', {'node': task.node.uuid,
'step': task.node.clean_step,
'command': command})
if not command:
# Command is not done yet
@ -163,7 +163,7 @@ class BaseAgentVendor(base.VendorInterface):
'err': command.get('command_error'),
'step': task.node.clean_step})
LOG.error(msg)
manager.cleaning_error_handler(task, msg)
return manager.cleaning_error_handler(task, msg)
elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH':
# Restart cleaning, agent must have rebooted to new version
try:
@ -175,7 +175,7 @@ class BaseAgentVendor(base.VendorInterface):
'err': command.get('command_error'),
'step': task.node.clean_step})
LOG.exception(msg)
manager.cleaning_error_handler(task, msg)
return manager.cleaning_error_handler(task, msg)
self._notify_conductor_resume_clean(task)
elif command.get('command_status') == 'SUCCEEDED':
@ -187,7 +187,7 @@ class BaseAgentVendor(base.VendorInterface):
'err': command.get('command_status'),
'step': task.node.clean_step})
LOG.error(msg)
manager.cleaning_error_handler(task, msg)
return manager.cleaning_error_handler(task, msg)
@base.passthru(['POST'])
def heartbeat(self, task, **kwargs):
@ -313,8 +313,19 @@ class BaseAgentVendor(base.VendorInterface):
last_command = commands[-1]
if last_command['command_name'] != 'execute_clean_step':
# catches race condition where execute_clean_step is still
# processing so the command hasn't started yet
return
last_step = last_command['command_result'].get('clean_step')
if last_command['command_status'] == 'RUNNING':
return
elif (last_command['command_status'] == 'SUCCEEDED' and
last_step != task.node.clean_step):
# A previous clean_step was running, the new command has not yet
# started.
return
else:
return last_command

View File

@ -73,10 +73,6 @@ LOG = logging.getLogger(__name__)
VALID_ROOT_DEVICE_HINTS = set(('size', 'model', 'wwn', 'serial', 'vendor'))
def _get_agent_client():
return agent_client.AgentClient()
# All functions are called from deploy() directly or indirectly.
# They are split for stub-out.
@ -897,7 +893,7 @@ def agent_get_clean_steps(task):
:raises: NodeCleaningFailure if the agent returns invalid results
:returns: A list of clean step dictionaries
"""
client = _get_agent_client()
client = agent_client.AgentClient()
ports = objects.Port.list_by_node_id(
task.context, task.node.id)
result = client.get_clean_steps(task.node, ports).get('command_result')
@ -908,10 +904,10 @@ def agent_get_clean_steps(task):
'get_clean_steps for node %(node)s returned invalid result:'
' %(result)s') % ({'node': task.node.uuid, 'result': result}))
driver_info = task.node.driver_internal_info
driver_info['hardware_manager_version'] = result[
driver_internal_info = task.node.driver_internal_info
driver_internal_info['hardware_manager_version'] = result[
'hardware_manager_version']
task.node.driver_internal_info = driver_info
task.node.driver_internal_info = driver_internal_info
task.node.save()
# Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..}
@ -935,7 +931,7 @@ def agent_execute_clean_step(task, step):
:raises: NodeCleaningFailure if the agent does not return a command status
:returns: states.CLEANING to signify the step will be completed async
"""
client = _get_agent_client()
client = agent_client.AgentClient()
ports = objects.Port.list_by_node_id(
task.context, task.node.id)
result = client.execute_clean_step(step, task.node, ports)

View File

@ -377,23 +377,23 @@ class TestNeutron(db_base.DbTestCase):
'network_id': '00000000-0000-0000-0000-000000000000',
'admin_state_up': True, 'mac_address': self.ports[0].address}})
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'create_port')
def test_create_cleaning_ports_fail(self, create_mock, error_mock):
# Check that if creating a port fails, the node goes to cleanfail
def test_create_cleaning_ports_fail(self, create_mock):
# Check that if creating a port fails, the ports are cleaned up
create_mock.side_effect = neutron_client_exc.ConnectionFailed
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.create_cleaning_ports(task)
error_mock.assert_called_once_with(task, mock.ANY)
self.assertRaises(exception.NodeCleaningFailure,
api.create_cleaning_ports,
task)
create_mock.assert_called_once_with({'port': {
'network_id': '00000000-0000-0000-0000-000000000000',
'admin_state_up': True, 'mac_address': self.ports[0].address}})
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'create_port')
def test_create_cleaning_ports_bad_config(self, create_mock, error_mock):
def test_create_cleaning_ports_bad_config(self, create_mock):
# Check an error is raised if the cleaning network is not set
self.config(cleaning_network_uuid=None, group='neutron')
api = dhcp_factory.DHCPFactory().provider
@ -417,32 +417,31 @@ class TestNeutron(db_base.DbTestCase):
network_id='00000000-0000-0000-0000-000000000000')
delete_mock.assert_called_once_with(self.neutron_port['id'])
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'list_ports')
def test_delete_cleaning_ports_list_fail(self, list_mock, error_mock):
def test_delete_cleaning_ports_list_fail(self, list_mock):
# Check that if listing ports fails, the node goes to cleanfail
list_mock.side_effect = neutron_client_exc.ConnectionFailed
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.delete_cleaning_ports(task)
self.assertRaises(exception.NodeCleaningFailure,
api.delete_cleaning_ports,
task)
list_mock.assert_called_once_with(
network_id='00000000-0000-0000-0000-000000000000')
error_mock.assert_called_once_with(task, mock.ANY)
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(client.Client, 'delete_port')
@mock.patch.object(client.Client, 'list_ports')
def test_delete_cleaning_ports_delete_fail(self, list_mock, delete_mock,
error_mock):
def test_delete_cleaning_ports_delete_fail(self, list_mock, delete_mock):
# Check that if deleting ports fails, the node goes to cleanfail
list_mock.return_value = {'ports': [self.neutron_port]}
delete_mock.side_effect = neutron_client_exc.ConnectionFailed
api = dhcp_factory.DHCPFactory().provider
with task_manager.acquire(self.context, self.node.uuid) as task:
api.delete_cleaning_ports(task)
self.assertRaises(exception.NodeCleaningFailure,
api.delete_cleaning_ports,
task)
list_mock.assert_called_once_with(
network_id='00000000-0000-0000-0000-000000000000')
delete_mock.assert_called_once_with(self.neutron_port['id'])
error_mock.assert_called_once_with(task, mock.ANY)

View File

@ -261,10 +261,11 @@ class TestAgentDeploy(db_base.DbTestCase):
@mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps')
def test_get_clean_steps_config_priority(self, mock_get_clean_steps):
# Test that we can override the priority of get clean steps
self.config(agent_erase_devices_priority=20, group='agent')
# Use 0 because it is an edge case (false-y) and used in devstack
self.config(agent_erase_devices_priority=0, group='agent')
mock_steps = [{'priority': 10, 'interface': 'deploy',
'step': 'erase_devices'}]
expected_steps = [{'priority': 20, 'interface': 'deploy',
expected_steps = [{'priority': 0, 'interface': 'deploy',
'step': 'erase_devices'}]
mock_get_clean_steps.return_value = mock_steps
with task_manager.acquire(self.context, self.node.uuid) as task:

View File

@ -457,20 +457,63 @@ class TestBaseAgentVendor(db_base.DbTestCase):
'_notify_conductor_resume_clean')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning(self, status_mock, notify_mock):
# Test a successful execute clean step on the agent
self.node.clean_step = {
'priority': 10,
'interface': 'deploy',
'step': 'erase_devices',
'reboot_requested': False
}
self.node.save()
status_mock.return_value = [{
'command_status': 'SUCCEEDED',
'command_name': 'execute_clean_step',
'command_result': {
'clean_step': self.node.clean_step
}
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
notify_mock.assert_called_once_with(task)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_old_command(self, status_mock, notify_mock):
# Test when a second execute_clean_step happens to the agent, but
# the new step hasn't started yet.
self.node.clean_step = {
'priority': 10,
'interface': 'deploy',
'step': 'erase_devices',
'reboot_requested': False
}
self.node.save()
status_mock.return_value = [{
'command_status': 'SUCCEEDED',
'command_name': 'execute_clean_step',
'command_result': {
'priority': 20,
'interface': 'deploy',
'step': 'update_firmware',
'reboot_requested': False
}
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
self.passthru.continue_cleaning(task)
self.assertFalse(notify_mock.called)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_running(self, status_mock, notify_mock):
# Test that no action is taken while a clean step is executing
status_mock.return_value = [{
'command_status': 'RUNNING',
'command_name': 'execute_clean_step',
'command_result': {}
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
@ -480,8 +523,11 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_fail(self, status_mock, error_mock):
# Test the a failure puts the node in CLEANFAIL
status_mock.return_value = [{
'command_status': 'FAILED',
'command_name': 'execute_clean_step',
'command_result': {}
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
@ -494,8 +540,11 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_clean_version_mismatch(
self, status_mock, notify_mock, steps_mock):
# Test that cleaning is restarted if there is a version mismatch
status_mock.return_value = [{
'command_status': 'CLEAN_VERSION_MISMATCH',
'command_name': 'execute_clean_step',
'command_result': {}
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
@ -506,8 +555,11 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch('ironic.conductor.manager.cleaning_error_handler')
@mock.patch.object(agent_client.AgentClient, 'get_commands_status')
def test_continue_cleaning_unknown(self, status_mock, error_mock):
# Test that unknown commands are treated as failures
status_mock.return_value = [{
'command_status': 'UNKNOWN',
'command_name': 'execute_clean_step',
'command_result': {}
}]
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:

View File

@ -37,6 +37,7 @@ from ironic.common import states
from ironic.common import utils as common_utils
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils as utils
from ironic.drivers.modules import image_cache
from ironic.tests import base as tests_base
@ -1526,7 +1527,9 @@ class AgentCleaningTestCase(db_base.DbTestCase):
def setUp(self):
super(AgentCleaningTestCase, self).setUp()
mgr_utils.mock_the_extension_manager(driver='fake_agent')
n = {'driver': 'fake_agent'}
n = {'driver': 'fake_agent',
'driver_internal_info': {'agent_url': 'http://127.0.0.1:9999'}}
self.node = obj_utils.create_test_node(self.context, **n)
self.ports = [obj_utils.create_test_port(self.context,
node_id=self.node.id)]
@ -1551,39 +1554,34 @@ class AgentCleaningTestCase(db_base.DbTestCase):
}
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_get_clean_steps(self, get_client_mock, list_ports_mock):
client_mock = mock.Mock()
client_mock.get_clean_steps.return_value = {
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps')
def test_get_clean_steps(self, client_mock, list_ports_mock):
client_mock.return_value = {
'command_result': self.clean_steps}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
response = utils.agent_get_clean_steps(task)
client_mock.get_clean_steps.assert_called_once_with(task.node,
self.ports)
client_mock.assert_called_once_with(task.node, self.ports)
self.assertEqual('1', task.node.driver_internal_info[
'hardware_manager_version'])
# Since steps are returned in dicts, they have non-deterministic
# ordering
self.assertEqual(2, len(response))
self.assertTrue(self.clean_steps['clean_steps'][
'GenericHardwareManager'][0] in response)
self.assertTrue(self.clean_steps['clean_steps'][
'SpecificHardwareManager'][0] in response)
self.assertIn(self.clean_steps['clean_steps'][
'GenericHardwareManager'][0], response)
self.assertIn(self.clean_steps['clean_steps'][
'SpecificHardwareManager'][0], response)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_get_clean_steps_missing_steps(self, get_client_mock,
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps')
def test_get_clean_steps_missing_steps(self, client_mock,
list_ports_mock):
client_mock = mock.Mock()
del self.clean_steps['clean_steps']
client_mock.get_clean_steps.return_value = {
client_mock.return_value = {
'command_result': self.clean_steps}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
@ -1591,16 +1589,13 @@ class AgentCleaningTestCase(db_base.DbTestCase):
self.assertRaises(exception.NodeCleaningFailure,
utils.agent_get_clean_steps,
task)
client_mock.get_clean_steps.assert_called_once_with(task.node,
self.ports)
client_mock.assert_called_once_with(task.node, self.ports)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_execute_clean_step(self, get_client_mock, list_ports_mock):
client_mock = mock.Mock()
client_mock.execute_clean_step.return_value = {
@mock.patch.object(agent_client.AgentClient, 'execute_clean_step')
def test_execute_clean_step(self, client_mock, list_ports_mock):
client_mock.return_value = {
'command_status': 'SUCCEEDED'}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
@ -1611,13 +1606,10 @@ class AgentCleaningTestCase(db_base.DbTestCase):
self.assertEqual(states.CLEANING, response)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_execute_clean_step_running(self, get_client_mock,
list_ports_mock):
client_mock = mock.Mock()
client_mock.execute_clean_step.return_value = {
@mock.patch.object(agent_client.AgentClient, 'execute_clean_step')
def test_execute_clean_step_running(self, client_mock, list_ports_mock):
client_mock.return_value = {
'command_status': 'RUNNING'}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(
@ -1628,13 +1620,11 @@ class AgentCleaningTestCase(db_base.DbTestCase):
self.assertEqual(states.CLEANING, response)
@mock.patch('ironic.objects.Port.list_by_node_id')
@mock.patch('ironic.drivers.modules.deploy_utils._get_agent_client')
def test_execute_clean_step_version_mismatch(self, get_client_mock,
@mock.patch.object(agent_client.AgentClient, 'execute_clean_step')
def test_execute_clean_step_version_mismatch(self, client_mock,
list_ports_mock):
client_mock = mock.Mock()
client_mock.execute_clean_step.return_value = {
client_mock.return_value = {
'command_status': 'RUNNING'}
get_client_mock.return_value = client_mock
list_ports_mock.return_value = self.ports
with task_manager.acquire(