Correct power state handling for managed in-band inspection
Do not try to configure networks when powered on, unless it's a node with a SmartNIC, in which case do power on before configuring networks. A new helper is created based on existing code in agent.py. Change-Id: I3a8fab7a39b604ed17a690fa9c31b3cd1dbdc6a7 Story: #1528920 Task: #37753
This commit is contained in:
parent
7a7e9689a3
commit
fd064a4f6b
|
@ -12,6 +12,7 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import contextlib
|
||||
import datetime
|
||||
import time
|
||||
|
||||
|
@ -831,6 +832,20 @@ def restore_power_state_if_needed(task, power_state_to_restore):
|
|||
node_power_action(task, power_state_to_restore)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def power_state_for_network_configuration(task):
|
||||
"""Handle the power state for a node reconfiguration.
|
||||
|
||||
Powers the node on if and only if it has a Smart NIC port. Yields for
|
||||
the actual reconfiguration, then restores the power state.
|
||||
|
||||
:param task: A TaskManager object.
|
||||
"""
|
||||
previous = power_on_node_if_needed(task)
|
||||
yield task
|
||||
restore_power_state_if_needed(task, previous)
|
||||
|
||||
|
||||
def build_configdrive(node, configdrive):
|
||||
"""Build a configdrive from provided meta_data, network_data and user_data.
|
||||
|
||||
|
|
|
@ -478,12 +478,9 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
|||
# This is not being done now as it is expected to be
|
||||
# refactored in the near future.
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
power_state_to_restore = (
|
||||
manager_utils.power_on_node_if_needed(task))
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
task.driver.boot.prepare_instance(task)
|
||||
manager_utils.node_power_action(task, states.POWER_ON)
|
||||
LOG.info('Deployment to node %s done', task.node.uuid)
|
||||
|
@ -507,13 +504,11 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
|||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
task.driver.storage.detach_volumes(task)
|
||||
deploy_utils.tear_down_storage_configuration(task)
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
# NOTE(mgoddard): If the deployment was unsuccessful the node may have
|
||||
# ports on the provisioning network which were not deleted.
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
# NOTE(mgoddard): If the deployment was unsuccessful the node may
|
||||
# have ports on the provisioning network which were not deleted.
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
return states.DELETED
|
||||
|
||||
@METRICS.timer('AgentDeploy.prepare')
|
||||
|
@ -853,11 +848,9 @@ class AgentRescue(base.RescueInterface):
|
|||
task.node.save()
|
||||
|
||||
task.driver.boot.clean_up_instance(task)
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
task.driver.network.add_rescuing_network(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
task.driver.network.add_rescuing_network(task)
|
||||
if CONF.agent.manage_agent_boot:
|
||||
ramdisk_opts = deploy_utils.build_agent_options(task.node)
|
||||
# prepare_ramdisk will set the boot device
|
||||
|
@ -892,10 +885,8 @@ class AgentRescue(base.RescueInterface):
|
|||
task.node.save()
|
||||
|
||||
self.clean_up(task)
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
task.driver.boot.prepare_instance(task)
|
||||
manager_utils.node_power_action(task, states.POWER_ON)
|
||||
|
||||
|
@ -947,7 +938,5 @@ class AgentRescue(base.RescueInterface):
|
|||
manager_utils.remove_node_rescue_password(task.node, save=True)
|
||||
if CONF.agent.manage_agent_boot:
|
||||
task.driver.boot.clean_up_ramdisk(task)
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.remove_rescuing_network(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.remove_rescuing_network(task)
|
||||
|
|
|
@ -464,10 +464,8 @@ class HeartbeatMixin(object):
|
|||
reason=fail_reason)
|
||||
task.process_event('resume')
|
||||
task.driver.rescue.clean_up(task)
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
task.process_event('done')
|
||||
|
||||
|
||||
|
@ -736,12 +734,9 @@ class AgentDeployMixin(HeartbeatMixin):
|
|||
log_and_raise_deployment_error(task, msg, exc=e)
|
||||
|
||||
try:
|
||||
power_state_to_restore = (
|
||||
manager_utils.power_on_node_if_needed(task))
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.node_power_action(task, states.POWER_ON)
|
||||
except Exception as e:
|
||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||
|
|
|
@ -114,7 +114,8 @@ def _tear_down_managed_boot(task):
|
|||
LOG.exception('Unable to clean up ramdisk boot for node %s',
|
||||
task.node.uuid)
|
||||
try:
|
||||
task.driver.network.remove_inspection_network(task)
|
||||
with cond_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.remove_inspection_network(task)
|
||||
except Exception as exc:
|
||||
errors.append(_('unable to remove inspection ports: %s') % exc)
|
||||
LOG.exception('Unable to remove inspection network for node %s',
|
||||
|
@ -194,10 +195,12 @@ def _start_managed_inspection(task):
|
|||
params = dict(_parse_kernel_params(),
|
||||
**{'ipa-inspection-callback-url': endpoint})
|
||||
|
||||
task.driver.network.add_inspection_network(task)
|
||||
cond_utils.node_power_action(task, states.POWER_OFF)
|
||||
with cond_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.add_inspection_network(task)
|
||||
task.driver.boot.prepare_ramdisk(task, ramdisk_params=params)
|
||||
client.start_introspection(task.node.uuid, manage_boot=False)
|
||||
cond_utils.node_power_action(task, states.REBOOT)
|
||||
cond_utils.node_power_action(task, states.POWER_ON)
|
||||
except Exception as exc:
|
||||
LOG.exception('Unable to start managed inspection for node %(uuid)s: '
|
||||
'%(err)s', {'uuid': task.node.uuid, 'err': exc})
|
||||
|
|
|
@ -440,12 +440,9 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
|
|||
# This is not being done now as it is expected to be
|
||||
# refactored in the near future.
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
power_state_to_restore = (
|
||||
manager_utils.power_on_node_if_needed(task))
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
task.driver.boot.prepare_instance(task)
|
||||
manager_utils.node_power_action(task, states.POWER_ON)
|
||||
|
||||
|
@ -471,13 +468,11 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
|
|||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
task.driver.storage.detach_volumes(task)
|
||||
deploy_utils.tear_down_storage_configuration(task)
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
# NOTE(mgoddard): If the deployment was unsuccessful the node may have
|
||||
# ports on the provisioning network which were not deleted.
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
# NOTE(mgoddard): If the deployment was unsuccessful the node may
|
||||
# have ports on the provisioning network which were not deleted.
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
return states.DELETED
|
||||
|
||||
@METRICS.timer('ISCSIDeploy.prepare')
|
||||
|
|
|
@ -357,10 +357,8 @@ class PXERamdiskDeploy(agent.AgentDeploy):
|
|||
# IDEA(TheJulia): Maybe a "trusted environment" mode flag
|
||||
# that we otherwise fail validation on for drivers that
|
||||
# require explicit security postures.
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
|
||||
# calling boot.prepare_instance will also set the node
|
||||
# to PXE boot, and update PXE templates accordingly
|
||||
|
|
|
@ -192,11 +192,12 @@ class InspectHardwareTestCase(BaseTestCase):
|
|||
})
|
||||
self.driver.network.add_inspection_network.assert_called_once_with(
|
||||
self.task)
|
||||
self.driver.power.reboot.assert_called_once_with(
|
||||
self.task, timeout=None)
|
||||
self.driver.power.set_power_state.assert_has_calls([
|
||||
mock.call(self.task, states.POWER_OFF, timeout=None),
|
||||
mock.call(self.task, states.POWER_ON, timeout=None),
|
||||
])
|
||||
self.assertFalse(self.driver.network.remove_inspection_network.called)
|
||||
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
|
||||
self.assertFalse(self.driver.power.set_power_state.called)
|
||||
|
||||
def test_managed_custom_params(self, mock_client):
|
||||
CONF.set_override('extra_kernel_params',
|
||||
|
@ -219,11 +220,12 @@ class InspectHardwareTestCase(BaseTestCase):
|
|||
})
|
||||
self.driver.network.add_inspection_network.assert_called_once_with(
|
||||
self.task)
|
||||
self.driver.power.reboot.assert_called_once_with(
|
||||
self.task, timeout=None)
|
||||
self.driver.power.set_power_state.assert_has_calls([
|
||||
mock.call(self.task, states.POWER_OFF, timeout=None),
|
||||
mock.call(self.task, states.POWER_ON, timeout=None),
|
||||
])
|
||||
self.assertFalse(self.driver.network.remove_inspection_network.called)
|
||||
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
|
||||
self.assertFalse(self.driver.power.set_power_state.called)
|
||||
|
||||
@mock.patch.object(task_manager, 'acquire', autospec=True)
|
||||
def test_managed_error(self, mock_acquire, mock_client):
|
||||
|
@ -246,7 +248,7 @@ class InspectHardwareTestCase(BaseTestCase):
|
|||
self.driver.network.remove_inspection_network.assert_called_once_with(
|
||||
self.task)
|
||||
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
|
||||
self.driver.power.set_power_state.assert_called_once_with(
|
||||
self.driver.power.set_power_state.assert_called_with(
|
||||
self.task, 'power off', timeout=None)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue