diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index 0de32da549..469646a066 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -88,12 +88,12 @@ class NodePowerStateController(rest.RestController): #TODO(lucasagomes): Test if target is a valid state and if it's able # to transition to the target state from the current one - node['target_power_state'] = target - updated_node = pecan.request.rpcapi.update_node(pecan.request.context, - node) + # Note that there is a race condition. The node state(s) could change + # by the time the RPC call is made and the TaskManager manager gets a + # lock. pecan.request.rpcapi.change_node_power_state(pecan.request.context, - updated_node, target) - return NodePowerState.convert_with_links(updated_node, expand=False) + node, target) + return NodePowerState.convert_with_links(node, expand=False) class NodeProvisionState(state.State): @@ -190,6 +190,10 @@ class Node(base.APIBase): target_power_state = wtypes.text "The user modified desired power state of the node." + last_error = wtypes.text + "Any error from the most recent (last) asynchronous transaction that" + "started but failed to finish." + provision_state = wtypes.text "Represent the current (not transition) provision state of the node" @@ -230,6 +234,7 @@ class Node(base.APIBase): def convert_with_links(cls, rpc_node, expand=True): minimum_fields = ['uuid', 'power_state', 'target_power_state', 'provision_state', 'target_provision_state', + 'last_error', 'instance_uuid'] fields = minimum_fields if not expand else None node = Node.from_rpc_object(rpc_node, fields) diff --git a/ironic/common/states.py b/ironic/common/states.py index 92cddb20d5..a028264948 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -28,7 +28,7 @@ validated by the driver. Any node with non-empty `properties` is said to be When the driver has received both `properties` and `driver_info`, it will check the power status of the node and update the `power_state` accordingly. If the -driver fails to read the the power state from the node, it will reject the +driver fails to read the power state from the node, it will reject the `driver_info` change, and the state will remain as INIT. If the power status check succeeds, `power_state` will change to one of POWER_ON or POWER_OFF, accordingly. @@ -36,7 +36,9 @@ accordingly. At this point, the power state may be changed via the API, a console may be started, and a tenant may be associated. -The `power_state` for a node which fails to transition will be set to ERROR. +The `power_state` for a node always represents the current power state. Any +power operation sets this to the actual state when done (whether successful or +not). It is set to ERROR only when unable to get the power state from a node. When `instance_uuid` is set to a non-empty / non-None value, the node is said to be "associated" with a tenant. diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 6260e93bba..8375ae6752 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -154,7 +154,7 @@ class ConductorManager(service.PeriodicService): """RPC method to encapsulate changes to a node's state. Perform actions such as power on, power off. It waits for the power - action to finish, then if succesful, it updates the power_state for + action to finish, then if successful, it updates the power_state for the node with the new power state. :param context: an admin context. @@ -162,10 +162,8 @@ class ConductorManager(service.PeriodicService): :param new_state: the desired power state of the node. :raises: InvalidParameterValue when the wrong state is specified or the wrong driver info is specified. - :raises: NodeInWrongPowerState when the node is in the state. - that cannot perform and requested power action. - :raises: other exceptins by the node's power driver if something - wrong during the power action. + :raises: other exceptions by the node's power driver if something + wrong occurred during the power action. """ node_id = node_obj.get('uuid') @@ -174,31 +172,57 @@ class ConductorManager(service.PeriodicService): % {'node': node_id, 'state': new_state}) with task_manager.acquire(context, node_id, shared=False) as task: - # an exception will be raised if validate fails. - task.driver.power.validate(node_obj) - curr_state = task.driver.power.get_power_state(task, node_obj) - if curr_state == new_state: - raise exception.NodeInWrongPowerState(node=node_id, - pstate=curr_state) - - # set the target_power_state. - # This will expose to other processes and clients that the work - # is in progress - node_obj['target_power_state'] = new_state - node_obj.save(context) - - #take power action, set the power_state to error if fails + node = task.node try: - task.driver.power.set_power_state(task, node_obj, new_state) - except exception.IronicException: - node_obj['power_state'] = states.ERROR - node_obj.save(context) - raise + task.driver.power.validate(node) + curr_state = task.driver.power.get_power_state(task, node) + except Exception as e: + with excutils.save_and_reraise_exception(): + node['last_error'] = \ + _("Failed to change power state to '%(target)s'. " + "Error: %(error)s") % { + 'target': new_state, 'error': e} + node.save(context) - # update the node power states - node_obj['power_state'] = new_state - node_obj['target_power_state'] = states.NOSTATE - node_obj.save(context) + if curr_state == new_state: + # Neither the ironic service nor the hardware has erred. The + # node is, for some reason, already in the requested state, + # though we don't know why. eg, perhaps the user previously + # requested the node POWER_ON, the network delayed those IPMI + # packets, and they are trying again -- but the node finally + # responds to the first request, and so the second request + # gets to this check and stops. + # This isn't an error, so we'll clear last_error field + # (from previous operation), log a warning, and return. + node['last_error'] = None + node.save(context) + LOG.warn(_("Not going to change_node_power_state because " + "current state = requested state = '%(state)s'.") + % {'state': curr_state}) + return + + # Set the target_power_state and clear any last_error, since we're + # starting a new operation. This will expose to other processes + # and clients that work is in progress. + node['target_power_state'] = new_state + node['last_error'] = None + node.save(context) + + # take power action + try: + task.driver.power.set_power_state(task, node, new_state) + except Exception as e: + with excutils.save_and_reraise_exception(): + node['last_error'] = \ + _("Failed to change power state to '%(target)s'. " + "Error: %(error)s") % { + 'target': new_state, 'error': e} + else: + # success! + node['power_state'] = new_state + finally: + node['target_power_state'] = states.NOSTATE + node.save(context) # NOTE(deva): There is a race condition in the RPC API for vendor_passthru. # Between the validate_vendor_action and do_vendor_action calls, it's @@ -256,7 +280,7 @@ class ConductorManager(service.PeriodicService): try: new_state = task.driver.deploy.deploy(task, node_obj) - except exception.IronicException: + except Exception: with excutils.save_and_reraise_exception(): node_obj['provision_state'] = states.ERROR node_obj.save(context) @@ -299,7 +323,7 @@ class ConductorManager(service.PeriodicService): try: new_state = task.driver.deploy.tear_down(task, node_obj) - except exception.IronicException: + except Exception: with excutils.save_and_reraise_exception(): node_obj['provision_state'] = states.ERROR node_obj.save(context) diff --git a/ironic/db/sqlalchemy/migrate_repo/versions/013_nodes_add_last_error.py b/ironic/db/sqlalchemy/migrate_repo/versions/013_nodes_add_last_error.py new file mode 100644 index 0000000000..9a3197f060 --- /dev/null +++ b/ironic/db/sqlalchemy/migrate_repo/versions/013_nodes_add_last_error.py @@ -0,0 +1,30 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# -*- encoding: utf-8 -*- +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sqlalchemy import Table, Column, MetaData, Text + + +def upgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + + nodes = Table('nodes', meta, autoload=True) + + # Create new last_error column + nodes.create_column(Column('last_error', Text, nullable=True)) + + +def downgrade(migrate_engine): + raise NotImplementedError('Downgrade from version 013 is unsupported.') diff --git a/ironic/db/sqlalchemy/models.py b/ironic/db/sqlalchemy/models.py index b56e9c88e0..b102c125c3 100644 --- a/ironic/db/sqlalchemy/models.py +++ b/ironic/db/sqlalchemy/models.py @@ -26,7 +26,7 @@ from oslo.config import cfg from sqlalchemy import Column, ForeignKey from sqlalchemy import Integer, Index -from sqlalchemy import schema, String +from sqlalchemy import schema, String, Text from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.types import TypeDecorator, VARCHAR @@ -120,6 +120,7 @@ class Node(Base): target_power_state = Column(String(15), nullable=True) provision_state = Column(String(15), nullable=True) target_provision_state = Column(String(15), nullable=True) + last_error = Column(Text, nullable=True) properties = Column(JSONEncodedDict) driver = Column(String(15)) driver_info = Column(JSONEncodedDict) diff --git a/ironic/drivers/modules/fake.py b/ironic/drivers/modules/fake.py index a288185763..082807b760 100644 --- a/ironic/drivers/modules/fake.py +++ b/ironic/drivers/modules/fake.py @@ -33,7 +33,9 @@ class FakePower(base.PowerInterface): return states.NOSTATE def set_power_state(self, task, node, power_state): - pass + if power_state not in [states.POWER_ON, states.POWER_OFF]: + raise exception.InvalidParameterValue(_("set_power_state called " + "with an invalid power state: %s.") % power_state) def reboot(self, task, node): pass diff --git a/ironic/objects/node.py b/ironic/objects/node.py index 457bb823d1..e74eec7c78 100644 --- a/ironic/objects/node.py +++ b/ironic/objects/node.py @@ -35,10 +35,22 @@ class Node(base.IronicObject): 'properties': utils.dict_or_none, 'reservation': utils.str_or_none, + + # One of states.POWER_ON|POWER_OFF|NOSTATE|ERROR 'power_state': utils.str_or_none, + + # Set to one of states.POWER_ON|POWER_OFF when a power operation + # starts, and set to NOSTATE when the operation finishes + # (successfully or unsuccessfully). 'target_power_state': utils.str_or_none, + 'provision_state': utils.str_or_none, 'target_provision_state': utils.str_or_none, + + # Any error from the most recent (last) asynchronous transaction + # that started but failed to finish. + 'last_error': utils.str_or_none, + 'extra': utils.dict_or_none, } diff --git a/ironic/tests/api/test_nodes.py b/ironic/tests/api/test_nodes.py index eb90cffaf9..3914cb272a 100644 --- a/ironic/tests/api/test_nodes.py +++ b/ironic/tests/api/test_nodes.py @@ -539,49 +539,37 @@ class TestPut(base.FunctionalTest): self.chassis = self.dbapi.create_chassis(cdict) ndict = dbutils.get_test_node() self.node = self.dbapi.create_node(ndict) - p = mock.patch.object(rpcapi.ConductorAPI, 'update_node') - self.mock_update_node = p.start() - self.addCleanup(p.stop) p = mock.patch.object(rpcapi.ConductorAPI, 'change_node_power_state') self.mock_cnps = p.start() self.addCleanup(p.stop) def test_power_state(self): - self.mock_update_node.return_value = self.node - response = self.put_json('/nodes/%s/state/power' % self.node['uuid'], {'target': states.POWER_ON}) self.assertEqual(response.content_type, 'application/json') self.assertEqual(response.status_code, 202) - self.mock_update_node.assert_called_once_with(mock.ANY, mock.ANY) self.mock_cnps.assert_called_once_with(mock.ANY, mock.ANY, mock.ANY) def test_power_state_in_progress(self): - self.mock_update_node.return_value = self.node manager = mock.MagicMock() with mock.patch.object(objects.Node, 'get_by_uuid') as mock_get_node: mock_get_node.return_value = self.node manager.attach_mock(mock_get_node, 'get_by_uuid') - manager.attach_mock(self.mock_update_node, 'update_node') manager.attach_mock(self.mock_cnps, 'change_node_power_state') expected = [mock.call.get_by_uuid(mock.ANY, self.node['uuid']), - mock.call.update_node(mock.ANY, mock.ANY), mock.call.change_node_power_state(mock.ANY, mock.ANY, - mock.ANY), - mock.call.get_by_uuid(mock.ANY, self.node['uuid'])] + mock.ANY)] self.put_json('/nodes/%s/state/power' % self.node['uuid'], {'target': states.POWER_ON}) - self.assertRaises(webtest.app.AppError, self.put_json, - '/nodes/%s/state/power' % self.node['uuid'], - {'target': states.POWER_ON}) - self.assertEqual(manager.mock_calls, expected) - # check status code self.dbapi.update_node(self.node['uuid'], {'target_power_state': 'fake'}) + self.assertRaises(webtest.app.AppError, self.put_json, + '/nodes/%s/state/power' % self.node['uuid'], + {'target': states.POWER_ON}) response = self.put_json('/nodes/%s/state/power' % self.node['uuid'], {'target': states.POWER_ON}, expect_errors=True) diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py index da3a0d9e9b..490040e6de 100644 --- a/ironic/tests/conductor/test_manager.py +++ b/ironic/tests/conductor/test_manager.py @@ -181,12 +181,13 @@ class ManagerTestCase(base.DbTestCase): self.assertEqual(res['driver'], existing_driver) def test_change_node_power_state_power_on(self): - """Test if start_power_state to turn node power on + """Test if change_node_power_state to turn node power on is successful or not. """ ndict = utils.get_test_node(driver='fake', power_state=states.POWER_OFF) node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) with mock.patch.object(self.driver.power, 'get_power_state') \ as get_power_mock: @@ -194,18 +195,20 @@ class ManagerTestCase(base.DbTestCase): self.service.change_node_power_state(self.context, node, states.POWER_ON) - - get_power_mock.assert_called_once_with(mock.ANY, node) + node.refresh() + get_power_mock.assert_called_once_with(mock.ANY, mock.ANY) self.assertEqual(node['power_state'], states.POWER_ON) self.assertEqual(node['target_power_state'], None) + self.assertEqual(node['last_error'], None) def test_change_node_power_state_power_off(self): - """Test if start_power_state to turn node power off + """Test if change_node_power_state to turn node power off is successful or not. """ ndict = utils.get_test_node(driver='fake', power_state=states.POWER_ON) node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) with mock.patch.object(self.driver.power, 'get_power_state') \ as get_power_mock: @@ -213,10 +216,43 @@ class ManagerTestCase(base.DbTestCase): self.service.change_node_power_state(self.context, node, states.POWER_OFF) - - get_power_mock.assert_called_once_with(mock.ANY, node) + node.refresh() + get_power_mock.assert_called_once_with(mock.ANY, mock.ANY) self.assertEqual(node['power_state'], states.POWER_OFF) self.assertEqual(node['target_power_state'], None) + self.assertEqual(node['last_error'], None) + + def test_change_node_power_state_to_invalid_state(self): + """Test if an exception is thrown when changing to an invalid + power state. + """ + ndict = utils.get_test_node(driver='fake', + power_state=states.POWER_ON) + node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) + + with mock.patch.object(self.driver.power, 'get_power_state') \ + as get_power_mock: + get_power_mock.return_value = states.POWER_ON + + self.assertRaises(exception.InvalidParameterValue, + self.service.change_node_power_state, + self.context, + node, + "POWER") + node.refresh() + get_power_mock.assert_called_once_with(mock.ANY, mock.ANY) + self.assertEqual(node['power_state'], states.POWER_ON) + self.assertEqual(node['target_power_state'], None) + self.assertNotEqual(node['last_error'], None) + + # last_error is cleared when a new transaction happens + self.service.change_node_power_state(self.context, node, + states.POWER_OFF) + node.refresh() + self.assertEqual(node['power_state'], states.POWER_OFF) + self.assertEqual(node['target_power_state'], None) + self.assertEqual(node['last_error'], None) def test_change_node_power_state_already_locked(self): """Test if an exception is thrown when applying an exclusive @@ -225,6 +261,7 @@ class ManagerTestCase(base.DbTestCase): ndict = utils.get_test_node(driver='fake', power_state=states.POWER_ON) node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) # check if the node is locked with task_manager.acquire(self.context, node['id'], shared=False): @@ -233,33 +270,65 @@ class ManagerTestCase(base.DbTestCase): self.context, node, states.POWER_ON) + node.refresh() + self.assertEqual(node['power_state'], states.POWER_ON) + self.assertEqual(node['target_power_state'], None) + self.assertEqual(node['last_error'], None) - def test_change_node_power_state_invalid_state(self): - """Test if an NodeInWrongPowerState exception is thrown - when the node is in an invalid state to perform current operation. + def test_change_node_power_state_already_being_processed(self): + """The target_power_state is expected to be None so it isn't + checked in the code. This is what happens if it is not None. + (Eg, if a conductor had died during a previous power-off + attempt and left the target_power_state set to states.POWER_OFF, + and the user is attempting to power-off again.) """ ndict = utils.get_test_node(driver='fake', + power_state=states.POWER_ON, + target_power_state=states.POWER_OFF) + node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) + + self.service.change_node_power_state(self.context, node, + states.POWER_OFF) + node.refresh() + self.assertEqual(node['power_state'], states.POWER_OFF) + self.assertEqual(node['target_power_state'], states.NOSTATE) + self.assertEqual(node['last_error'], None) + + def test_change_node_power_state_in_same_state(self): + """Test that we don't try to set the power state if the requested + state is the same as the current state. + """ + ndict = utils.get_test_node(driver='fake', + last_error='anything but None', power_state=states.POWER_ON) node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) with mock.patch.object(self.driver.power, 'get_power_state') \ as get_power_mock: get_power_mock.return_value = states.POWER_ON + with mock.patch.object(self.driver.power, 'set_power_state') \ + as set_power_mock: + set_power_mock.side_effect = exception.IronicException() - self.assertRaises(exception.NodeInWrongPowerState, - self.service.change_node_power_state, - self.context, - node, - states.POWER_ON) - get_power_mock.assert_called_once_with(mock.ANY, node) + self.service.change_node_power_state(self.context, node, + states.POWER_ON) + node.refresh() + get_power_mock.assert_called_once_with(mock.ANY, mock.ANY) + self.assertFalse(set_power_mock.called) + self.assertEqual(node['power_state'], states.POWER_ON) + self.assertEqual(node['target_power_state'], None) + self.assertEqual(node['last_error'], None) def test_change_node_power_state_invalid_driver_info(self): - """Test if an exception is thrown when the driver validation is - failed. + """Test if an exception is thrown when the driver validation + fails. """ ndict = utils.get_test_node(driver='fake', power_state=states.POWER_ON) node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) with mock.patch.object(self.driver.power, 'validate') \ as validate_mock: @@ -271,34 +340,40 @@ class ManagerTestCase(base.DbTestCase): self.context, node, states.POWER_ON) - validate_mock.assert_called_once_with(node) + node.refresh() + validate_mock.assert_called_once_with(mock.ANY) + self.assertEqual(node['power_state'], states.POWER_ON) + self.assertEqual(node['target_power_state'], None) + self.assertNotEqual(node['last_error'], None) def test_change_node_power_state_set_power_failure(self): - """Test if an exception is thrown when the set_power call is - failed. + """Test if an exception is thrown when the set_power call + fails. """ - class TestException(Exception): - pass - ndict = utils.get_test_node(driver='fake', power_state=states.POWER_OFF) node = self.dbapi.create_node(ndict) + node = objects.Node.get_by_uuid(self.context, node['uuid']) with mock.patch.object(self.driver.power, 'get_power_state') \ as get_power_mock: with mock.patch.object(self.driver.power, 'set_power_state') \ as set_power_mock: get_power_mock.return_value = states.POWER_OFF - set_power_mock.side_effect = TestException() + set_power_mock.side_effect = exception.IronicException() - self.assertRaises(TestException, + self.assertRaises(exception.IronicException, self.service.change_node_power_state, self.context, node, states.POWER_ON) - get_power_mock.assert_called_once_with(mock.ANY, node) - set_power_mock.assert_called_once_with(mock.ANY, node, + node.refresh() + get_power_mock.assert_called_once_with(mock.ANY, mock.ANY) + set_power_mock.assert_called_once_with(mock.ANY, mock.ANY, states.POWER_ON) + self.assertEqual(node['power_state'], states.POWER_OFF) + self.assertEqual(node['target_power_state'], None) + self.assertNotEqual(node['last_error'], None) def test_vendor_action(self): n = utils.get_test_node(driver='fake') diff --git a/ironic/tests/db/sqlalchemy/test_migrations.py b/ironic/tests/db/sqlalchemy/test_migrations.py index 66b396c77b..54fb3b14f9 100644 --- a/ironic/tests/db/sqlalchemy/test_migrations.py +++ b/ironic/tests/db/sqlalchemy/test_migrations.py @@ -709,3 +709,22 @@ class TestMigrations(BaseMigrationTestCase, WalkVersionsMixin): conductor.insert().execute, {'hostname': None}) # FIXME: add check for postgres + + def _pre_upgrade_013(self, engine): + nodes = db_utils.get_table(engine, 'nodes') + col_names = set(column.name for column in nodes.c) + + self.assertFalse('last_error' in col_names) + return col_names + + def _check_013(self, engine, col_names_pre): + nodes = db_utils.get_table(engine, 'nodes') + col_names = set(column.name for column in nodes.c) + + # didn't lose any columns in the migration + self.assertEqual(col_names_pre, col_names.intersection(col_names_pre)) + + # only added one 'last_error' column + self.assertEqual(len(col_names_pre), len(col_names) - 1) + self.assertTrue(isinstance(nodes.c['last_error'].type, + getattr(sqlalchemy.types, 'Text'))) diff --git a/ironic/tests/db/utils.py b/ironic/tests/db/utils.py index 4ef89b07eb..53cf949b56 100644 --- a/ironic/tests/db/utils.py +++ b/ironic/tests/db/utils.py @@ -72,6 +72,7 @@ def get_test_node(**kw): 'provision_state': kw.get('provision_state', states.NOSTATE), 'target_provision_state': kw.get('target_provision_state', states.NOSTATE), + 'last_error': kw.get('last_error', None), 'instance_uuid': kw.get('instance_uuid', None), 'driver': kw.get('driver', 'fake'), 'driver_info': kw.get('driver_info', fake_info), diff --git a/ironic/tests/drivers/test_fake.py b/ironic/tests/drivers/test_fake.py index 272cd792b4..447b81d366 100644 --- a/ironic/tests/drivers/test_fake.py +++ b/ironic/tests/drivers/test_fake.py @@ -44,7 +44,10 @@ class FakeDriverTestCase(base.TestCase): def test_power_interface(self): self.driver.power.validate(self.node) self.driver.power.get_power_state(None, self.node) - self.driver.power.set_power_state(None, None, states.NOSTATE) + self.assertRaises(exception.InvalidParameterValue, + self.driver.power.set_power_state, + None, None, states.NOSTATE) + self.driver.power.set_power_state(None, None, states.POWER_ON) self.driver.power.reboot(None, None) def test_deploy_interface(self):