diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index fd28696917..fc7d05bd23 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -22,6 +22,7 @@ import time from oslo_config import cfg from oslo_log import log from oslo_utils import excutils +import retrying from ironic.common import boot_devices from ironic.common import exception @@ -43,6 +44,15 @@ agent_opts = [ cfg.IntOpt('heartbeat_timeout', default=300, help='Maximum interval (in seconds) for agent heartbeats.'), + cfg.IntOpt('post_deploy_get_power_state_retries', + default=6, + help='Number of times to retry getting power state to check ' + 'if bare metal node has been powered off after a soft ' + 'power off.'), + cfg.IntOpt('post_deploy_get_power_state_retry_interval', + default=5, + help='Amount of time (in seconds) to wait between polling ' + 'power state after trigger soft poweroff.'), ] CONF = cfg.CONF @@ -438,11 +448,37 @@ class BaseAgentVendor(base.VendorInterface): :param task: a TaskManager object containing the node :raises: InstanceDeployFailure, if node reboot failed. """ + wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000 + attempts = CONF.agent.post_deploy_get_power_state_retries + 1 + + @retrying.retry( + stop_max_attempt_number=attempts, + retry_on_result=lambda state: state != states.POWER_OFF, + wait_fixed=wait + ) + def _wait_until_powered_off(task): + return task.driver.power.get_power_state(task) + + node = task.node + try: - manager_utils.node_power_action(task, states.REBOOT) + try: + self._client.power_off(node) + _wait_until_powered_off(task) + except Exception as e: + LOG.warning( + _LW('Failed to soft power off node %(node_uuid)s ' + 'in at least %(timeout)d seconds. Error: %(error)s'), + {'node_uuid': node.uuid, + 'timeout': wait * (attempts - 1), + 'error': e}) + manager_utils.node_power_action(task, states.REBOOT) + else: + manager_utils.node_power_action(task, states.POWER_ON) except Exception as e: - msg = (_('Error rebooting node %(node)s. Error: %(error)s') % - {'node': task.node.uuid, 'error': e}) + msg = (_('Error rebooting node %(node)s after deploy. ' + 'Error: %(error)s') % + {'node': node.uuid, 'error': e}) self._log_and_raise_deployment_error(task, msg) task.process_event('done') diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index ba19a27be9..9f550e3643 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -142,3 +142,9 @@ class AgentClient(object): method='clean.execute_clean_step', params=params, wait=False) + + def power_off(self, node): + """Soft powers off the bare metal node by shutting down ramdisk OS.""" + return self._command(node=node, + method='standby.power_off', + params={}) diff --git a/ironic/tests/drivers/test_agent.py b/ironic/tests/drivers/test_agent.py index 694aa9004b..8d9c9b1316 100644 --- a/ironic/tests/drivers/test_agent.py +++ b/ironic/tests/drivers/test_agent.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import types + import mock from oslo_config import cfg @@ -22,8 +24,10 @@ from ironic.common import keystone from ironic.common import pxe_utils from ironic.common import states from ironic.conductor import task_manager +from ironic.conductor import utils as manager_utils from ironic.drivers.modules import agent from ironic.drivers.modules import agent_client +from ironic.drivers.modules import fake from ironic.tests.conductor import utils as mgr_utils from ironic.tests.db import base as db_base from ironic.tests.db import utils as db_utils @@ -421,12 +425,17 @@ class TestAgentVendor(db_base.DbTestCase): self.assertEqual(states.ACTIVE, task.node.target_provision_state) - @mock.patch('ironic.conductor.utils.node_power_action', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) @mock.patch('ironic.conductor.utils.node_set_boot_device', autospec=True) @mock.patch('ironic.drivers.modules.agent.AgentVendorInterface' '.check_deploy_success', autospec=True) def test_reboot_to_instance(self, check_deploy_mock, bootdev_mock, - power_mock): + power_off_mock, get_power_state_mock, + node_power_action_mock): check_deploy_mock.return_value = None self.node.provision_state = states.DEPLOYING @@ -435,11 +444,15 @@ class TestAgentVendor(db_base.DbTestCase): with task_manager.acquire(self.context, self.node.uuid, shared=False) as task: + get_power_state_mock.return_value = states.POWER_OFF self.passthru.reboot_to_instance(task) check_deploy_mock.assert_called_once_with(mock.ANY, task.node) bootdev_mock.assert_called_once_with(task, 'disk', persistent=True) - power_mock.assert_called_once_with(task, states.REBOOT) + power_off_mock.assert_called_once_with(task.node) + get_power_state_mock.assert_called_once_with(task) + node_power_action_mock.assert_called_once_with( + task, states.POWER_ON) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) diff --git a/ironic/tests/drivers/test_agent_base_vendor.py b/ironic/tests/drivers/test_agent_base_vendor.py index 410d55fa48..b686f41b85 100644 --- a/ironic/tests/drivers/test_agent_base_vendor.py +++ b/ironic/tests/drivers/test_agent_base_vendor.py @@ -15,6 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. +import time import types import mock @@ -27,6 +28,7 @@ from ironic.conductor import utils as manager_utils from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils +from ironic.drivers.modules import fake from ironic import objects from ironic.tests.conductor import utils as mgr_utils from ironic.tests.db import base as db_base @@ -332,31 +334,118 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertIsInstance(driver_routes, dict) self.assertEqual(expected, list(driver_routes)) + @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test_reboot_and_finish_deploy_success(self, node_power_action_mock): + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy(self, power_off_mock, + get_power_state_mock, + node_power_action_mock): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.side_effect = [states.POWER_ON, + states.POWER_OFF] self.passthru.reboot_and_finish_deploy(task) - node_power_action_mock.assert_called_once_with(task, states.REBOOT) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(2, get_power_state_mock.call_count) + node_power_action_mock.assert_called_once_with( + task, states.POWER_ON) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_soft_poweroff_doesnt_complete( + self, power_off_mock, get_power_state_mock, + node_power_action_mock): + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.return_value = states.POWER_ON + self.passthru.reboot_and_finish_deploy(task) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(7, get_power_state_mock.call_count) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test_reboot_and_finish_deploy_reboot_failure(self, - node_power_action_mock): - exc = exception.PowerStateFailure(pstate=states.REBOOT) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_soft_poweroff_fails( + self, power_off_mock, node_power_action_mock): + power_off_mock.side_effect = RuntimeError("boom") self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() - node_power_action_mock.side_effect = exc - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + self.passthru.reboot_and_finish_deploy(task) + power_off_mock.assert_called_once_with(task.node) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_get_power_state_fails( + self, power_off_mock, get_power_state_mock, + node_power_action_mock): + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.side_effect = RuntimeError("boom") + self.passthru.reboot_and_finish_deploy(task) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(7, get_power_state_mock.call_count) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_power_action_fails( + self, power_off_mock, get_power_state_mock, + node_power_action_mock): + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.return_value = states.POWER_ON + node_power_action_mock.side_effect = RuntimeError("boom") self.assertRaises(exception.InstanceDeployFailure, - self.passthru.reboot_and_finish_deploy, task) - node_power_action_mock.assert_any_call(task, states.REBOOT) + self.passthru.reboot_and_finish_deploy, + task) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(7, get_power_state_mock.call_count) + node_power_action_mock.assert_has_calls([ + mock.call(task, states.REBOOT), + mock.call(task, states.POWER_OFF)]) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) diff --git a/ironic/tests/drivers/test_agent_client.py b/ironic/tests/drivers/test_agent_client.py index 92662511f0..9fb1e43a1d 100644 --- a/ironic/tests/drivers/test_agent_client.py +++ b/ironic/tests/drivers/test_agent_client.py @@ -210,3 +210,9 @@ class TestAgentClient(base.TestCase): self.client._command.assert_called_once_with( node=self.node, method='clean.execute_clean_step', params=expected_params, wait=False) + + def test_power_off(self): + self.client._command = mock.MagicMock(spec_set=[]) + self.client.power_off(self.node) + self.client._command.assert_called_once_with( + node=self.node, method='standby.power_off', params={})