IPA: Do a soft power off at the end of deployment

For IPA we currently do a hard reboot at the end of the deployment,
but that doesn't always play nice with some systems specially when they
have to deal with UEFI. So this patch is changing the code in Ironic to
actually tell the ramdisk to power off itself instead of hard powering
off the machine from BMC.  This will give the kernel the necessary
time to finish sync'ing up whatever it has to sync before rebooting
the node.

Depends-On: I2a5f984af26bbbe03002bb8c367c8c6af8d91434
Co-Authored-By: Lucas Alvares Gomes <lucasagomes@gmail.com>
Closes-Bug: #1451310
Change-Id: I831d8dc1d15c82a0caab94173c2b9f147017500f
This commit is contained in:
Ramakrishnan G 2015-05-26 14:32:02 +01:00
parent 309707bab0
commit 7adbf0622c
5 changed files with 168 additions and 18 deletions

View File

@ -22,6 +22,7 @@ import time
from oslo_config import cfg
from oslo_log import log
from oslo_utils import excutils
import retrying
from ironic.common import boot_devices
from ironic.common import exception
@ -43,6 +44,15 @@ agent_opts = [
cfg.IntOpt('heartbeat_timeout',
default=300,
help='Maximum interval (in seconds) for agent heartbeats.'),
cfg.IntOpt('post_deploy_get_power_state_retries',
default=6,
help='Number of times to retry getting power state to check '
'if bare metal node has been powered off after a soft '
'power off.'),
cfg.IntOpt('post_deploy_get_power_state_retry_interval',
default=5,
help='Amount of time (in seconds) to wait between polling '
'power state after trigger soft poweroff.'),
]
CONF = cfg.CONF
@ -438,11 +448,37 @@ class BaseAgentVendor(base.VendorInterface):
:param task: a TaskManager object containing the node
:raises: InstanceDeployFailure, if node reboot failed.
"""
wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000
attempts = CONF.agent.post_deploy_get_power_state_retries + 1
@retrying.retry(
stop_max_attempt_number=attempts,
retry_on_result=lambda state: state != states.POWER_OFF,
wait_fixed=wait
)
def _wait_until_powered_off(task):
return task.driver.power.get_power_state(task)
node = task.node
try:
manager_utils.node_power_action(task, states.REBOOT)
try:
self._client.power_off(node)
_wait_until_powered_off(task)
except Exception as e:
LOG.warning(
_LW('Failed to soft power off node %(node_uuid)s '
'in at least %(timeout)d seconds. Error: %(error)s'),
{'node_uuid': node.uuid,
'timeout': wait * (attempts - 1),
'error': e})
manager_utils.node_power_action(task, states.REBOOT)
else:
manager_utils.node_power_action(task, states.POWER_ON)
except Exception as e:
msg = (_('Error rebooting node %(node)s. Error: %(error)s') %
{'node': task.node.uuid, 'error': e})
msg = (_('Error rebooting node %(node)s after deploy. '
'Error: %(error)s') %
{'node': node.uuid, 'error': e})
self._log_and_raise_deployment_error(task, msg)
task.process_event('done')

View File

@ -142,3 +142,9 @@ class AgentClient(object):
method='clean.execute_clean_step',
params=params,
wait=False)
def power_off(self, node):
"""Soft powers off the bare metal node by shutting down ramdisk OS."""
return self._command(node=node,
method='standby.power_off',
params={})

View File

@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import types
import mock
from oslo_config import cfg
@ -22,8 +24,10 @@ from ironic.common import keystone
from ironic.common import pxe_utils
from ironic.common import states
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import fake
from ironic.tests.conductor import utils as mgr_utils
from ironic.tests.db import base as db_base
from ironic.tests.db import utils as db_utils
@ -421,12 +425,17 @@ class TestAgentVendor(db_base.DbTestCase):
self.assertEqual(states.ACTIVE,
task.node.target_provision_state)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
@mock.patch('ironic.conductor.utils.node_set_boot_device', autospec=True)
@mock.patch('ironic.drivers.modules.agent.AgentVendorInterface'
'.check_deploy_success', autospec=True)
def test_reboot_to_instance(self, check_deploy_mock, bootdev_mock,
power_mock):
power_off_mock, get_power_state_mock,
node_power_action_mock):
check_deploy_mock.return_value = None
self.node.provision_state = states.DEPLOYING
@ -435,11 +444,15 @@ class TestAgentVendor(db_base.DbTestCase):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
get_power_state_mock.return_value = states.POWER_OFF
self.passthru.reboot_to_instance(task)
check_deploy_mock.assert_called_once_with(mock.ANY, task.node)
bootdev_mock.assert_called_once_with(task, 'disk', persistent=True)
power_mock.assert_called_once_with(task, states.REBOOT)
power_off_mock.assert_called_once_with(task.node)
get_power_state_mock.assert_called_once_with(task)
node_power_action_mock.assert_called_once_with(
task, states.POWER_ON)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)

View File

@ -15,6 +15,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import time
import types
import mock
@ -27,6 +28,7 @@ from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_base_vendor
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules import fake
from ironic import objects
from ironic.tests.conductor import utils as mgr_utils
from ironic.tests.db import base as db_base
@ -332,31 +334,118 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertIsInstance(driver_routes, dict)
self.assertEqual(expected, list(driver_routes))
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_reboot_and_finish_deploy_success(self, node_power_action_mock):
@mock.patch.object(fake.FakePower, 'get_power_state',
spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
def test_reboot_and_finish_deploy(self, power_off_mock,
get_power_state_mock,
node_power_action_mock):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
get_power_state_mock.side_effect = [states.POWER_ON,
states.POWER_OFF]
self.passthru.reboot_and_finish_deploy(task)
node_power_action_mock.assert_called_once_with(task, states.REBOOT)
power_off_mock.assert_called_once_with(task.node)
self.assertEqual(2, get_power_state_mock.call_count)
node_power_action_mock.assert_called_once_with(
task, states.POWER_ON)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
def test_reboot_and_finish_deploy_soft_poweroff_doesnt_complete(
self, power_off_mock, get_power_state_mock,
node_power_action_mock):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
get_power_state_mock.return_value = states.POWER_ON
self.passthru.reboot_and_finish_deploy(task)
power_off_mock.assert_called_once_with(task.node)
self.assertEqual(7, get_power_state_mock.call_count)
node_power_action_mock.assert_called_once_with(
task, states.REBOOT)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_reboot_and_finish_deploy_reboot_failure(self,
node_power_action_mock):
exc = exception.PowerStateFailure(pstate=states.REBOOT)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
def test_reboot_and_finish_deploy_soft_poweroff_fails(
self, power_off_mock, node_power_action_mock):
power_off_mock.side_effect = RuntimeError("boom")
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
node_power_action_mock.side_effect = exc
with task_manager.acquire(self.context, self.node['uuid'],
shared=False) as task:
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
self.passthru.reboot_and_finish_deploy(task)
power_off_mock.assert_called_once_with(task.node)
node_power_action_mock.assert_called_once_with(
task, states.REBOOT)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
def test_reboot_and_finish_deploy_get_power_state_fails(
self, power_off_mock, get_power_state_mock,
node_power_action_mock):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
get_power_state_mock.side_effect = RuntimeError("boom")
self.passthru.reboot_and_finish_deploy(task)
power_off_mock.assert_called_once_with(task.node)
self.assertEqual(7, get_power_state_mock.call_count)
node_power_action_mock.assert_called_once_with(
task, states.REBOOT)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
def test_reboot_and_finish_deploy_power_action_fails(
self, power_off_mock, get_power_state_mock,
node_power_action_mock):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
get_power_state_mock.return_value = states.POWER_ON
node_power_action_mock.side_effect = RuntimeError("boom")
self.assertRaises(exception.InstanceDeployFailure,
self.passthru.reboot_and_finish_deploy, task)
node_power_action_mock.assert_any_call(task, states.REBOOT)
self.passthru.reboot_and_finish_deploy,
task)
power_off_mock.assert_called_once_with(task.node)
self.assertEqual(7, get_power_state_mock.call_count)
node_power_action_mock.assert_has_calls([
mock.call(task, states.REBOOT),
mock.call(task, states.POWER_OFF)])
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)

View File

@ -210,3 +210,9 @@ class TestAgentClient(base.TestCase):
self.client._command.assert_called_once_with(
node=self.node, method='clean.execute_clean_step',
params=expected_params, wait=False)
def test_power_off(self):
self.client._command = mock.MagicMock(spec_set=[])
self.client.power_off(self.node)
self.client._command.assert_called_once_with(
node=self.node, method='standby.power_off', params={})