Fix leaking resources during cluster recover

If a node recover was in progress when a cluster delete was
called it would cause resources to be created, but unable
to update the Node object as it no longer exists.

With this patch if Node.update fails because the object
no longer exists. Delete all resources created during
the failed operation.

Change-Id: I95a8b5791eee5d334f8841bebe52f37f6f5eb2e5
This commit is contained in:
Erik Olof Gunnar Andersson 2019-12-01 01:10:41 -08:00
parent 7e04d4a353
commit a6a18b8df5
2 changed files with 269 additions and 11 deletions

View File

@ -768,9 +768,13 @@ class ServerProfile(base.Profile):
port_attrs['floating'].update({'remove': True})
internal_ports.append(port_attrs)
if internal_ports:
node_data = obj.data
node_data.update(internal_ports=internal_ports)
node_obj.Node.update(self.context, obj.id, {'data': node_data})
try:
node_data = obj.data
node_data.update(internal_ports=internal_ports)
node_obj.Node.update(self.context, obj.id, {'data': node_data})
except exc.ResourceNotFound:
self._rollback_ports(obj, internal_ports)
raise
return internal_ports
def _build_metadata(self, obj, usermeta):
@ -809,14 +813,7 @@ class ServerProfile(base.Profile):
:param obj: The node object for which a server will be created.
"""
kwargs = {}
for key in self.KEYS:
# context is treated as connection parameters
if key == self.CONTEXT:
continue
if self.properties[key] is not None:
kwargs[key] = self.properties[key]
kwargs = self._generate_kwargs()
admin_pass = self.properties[self.ADMIN_PASS]
if admin_pass:
@ -890,6 +887,10 @@ class ServerProfile(base.Profile):
# Update zone placement info if available
self._update_zone_info(obj, server)
return server.id
except exc.ResourceNotFound:
self._rollback_ports(obj, ports)
self._rollback_instance(obj, server)
raise
except exc.InternalError as ex:
if server and server.id:
resource_id = server.id
@ -908,6 +909,75 @@ class ServerProfile(base.Profile):
message=six.text_type(ex),
resource_id=resource_id)
def _generate_kwargs(self):
"""Generate the base kwargs for a server.
:return:
"""
kwargs = {}
for key in self.KEYS:
# context is treated as connection parameters
if key == self.CONTEXT:
continue
if self.properties[key] is not None:
kwargs[key] = self.properties[key]
return kwargs
def _rollback_ports(self, obj, ports):
"""Rollback any ports created after a ResourceNotFound exception.
:param obj: The node object.
:param ports: A list of ports which attached to this server.
:return:
"""
if not ports:
return
LOG.warning(
'Rolling back ports for Node %s.',
obj.id
)
for port in ports:
if not port.get('remove', False):
continue
try:
if (port.get('floating') and
port['floating'].get('remove', False)):
self.network(obj).floatingip_delete(
port['floating']['id']
)
self.network(obj).port_delete(port['id'])
except exc.InternalError as ex:
LOG.debug(
'Failed to delete port %s during rollback for Node %s: %s',
port['id'], obj.id, ex
)
def _rollback_instance(self, obj, server):
"""Rollback an instance created after a ResourceNotFound exception.
:param obj: The node object.
:param server: A server.
:return:
"""
if not server or not server.id:
return
LOG.warning(
'Rolling back instance %s for Node %s.',
server.id, obj.id
)
try:
self.compute(obj).server_force_delete(server.id, True)
except exc.InternalError as ex:
LOG.debug(
'Failed to delete instance %s during rollback for Node %s: %s',
server.id, obj.id, ex
)
def do_delete(self, obj, **params):
"""Delete the physical resource associated with the specified node.

View File

@ -192,6 +192,76 @@ class TestNovaServerBasic(base.SenlinTestCase):
mock_zone_info.assert_called_once_with(node_obj, fake_server)
self.assertEqual('FAKE_ID', server_id)
@mock.patch.object(node_ob.Node, 'update')
def test_do_create_fail_create_instance_when_node_longer_exists(
self, mock_node_update):
mock_node_update.side_effect = [
None, exc.ResourceNotFound(type='Node', id='FAKE_NODE_ID')
]
cc = mock.Mock()
nc = mock.Mock()
profile = server.ServerProfile('t', self.spec)
profile._computeclient = cc
profile._networkclient = nc
profile._rollback_ports = mock.Mock()
profile._rollback_instance = mock.Mock()
self._stubout_profile(profile, mock_image=True, mock_flavor=True,
mock_keypair=True, mock_net=False)
node_obj = mock.Mock(id='FAKE_NODE_ID', index=123,
availability_zone="AZ01",
cluster_id='FAKE_CLUSTER_ID',
data={
'placement': {
'zone': 'AZ1',
'servergroup': 'SERVER_GROUP_1'
}
})
node_obj.name = 'TEST_SERVER'
fake_server = mock.Mock(id='FAKE_ID')
cc.server_create.return_value = fake_server
cc.server_get.return_value = fake_server
self.assertRaises(
exc.ResourceNotFound, profile.do_create, node_obj
)
profile._rollback_ports.assert_called_once()
profile._rollback_instance.assert_called_once()
@mock.patch.object(node_ob.Node, 'update')
def test_do_create_fail_create_port_when_node_longer_exists(
self, mock_node_update):
mock_node_update.side_effect = exc.ResourceNotFound(
type='Node', id='FAKE_NODE_ID'
)
cc = mock.Mock()
nc = mock.Mock()
profile = server.ServerProfile('t', self.spec)
profile._computeclient = cc
profile._networkclient = nc
profile._rollback_ports = mock.Mock()
profile._rollback_instance = mock.Mock()
self._stubout_profile(profile, mock_image=True, mock_flavor=True,
mock_keypair=True, mock_net=False)
node_obj = mock.Mock(id='FAKE_NODE_ID', index=123,
cluster_id='FAKE_CLUSTER_ID',
data={
'placement': {
'zone': 'AZ1',
'servergroup': 'SERVER_GROUP_1'
}
})
node_obj.name = 'TEST_SERVER'
self.assertRaises(
exc.ResourceNotFound, profile.do_create, node_obj
)
profile._rollback_ports.assert_called_once()
profile._rollback_instance.assert_not_called()
def test_do_create_invalid_image(self):
profile = server.ServerProfile('s2', self.spec)
err = exc.EResourceCreation(type='server', message='boom')
@ -675,6 +745,124 @@ class TestNovaServerBasic(base.SenlinTestCase):
self.assertEqual(1, cc.wait_for_server.call_count)
self.assertEqual(0, mock_zone_info.call_count)
def test_rollback_ports(self):
nc = mock.Mock()
nc.port_delete.return_value = None
nc.floatingip_delete.return_value = None
profile = server.ServerProfile('t', self.spec)
profile._networkclient = nc
ports = [
{
'id': 'FAKE_PORT_ID',
'remove': True
},
{
'floating': {
'remove': True,
'id': 'FAKE_FLOATING_ID',
},
'id': 'FAKE_PORT_ID',
'remove': True
},
{
'floating': {
'remove': False,
'id': 'FAKE_FLOATING_ID',
},
'id': 'FAKE_PORT_ID',
'remove': False
}
]
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
profile._rollback_ports(node_obj, ports)
nc.port_delete.assert_called()
nc.floatingip_delete.assert_called_once_with('FAKE_FLOATING_ID')
def test_rollback_with_no_ports(self):
nc = mock.Mock()
nc.port_delete.return_value = None
nc.floatingip_delete.return_value = None
profile = server.ServerProfile('t', self.spec)
profile._networkclient = nc
ports = []
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
profile._rollback_ports(node_obj, ports)
nc.port_delete.assert_not_called()
nc.floatingip_delete.assert_not_called()
def test_rollback_ports_with_internal_error(self):
nc = mock.Mock()
nc.port_delete.return_value = None
nc.floatingip_delete.side_effect = exc.InternalError()
profile = server.ServerProfile('t', self.spec)
profile._networkclient = nc
ports = [{
'floating': {
'remove': True,
'id': 'FAKE_FLOATING_ID',
},
'id': 'FAKE_PORT_ID',
'remove': True
}]
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
profile._rollback_ports(node_obj, ports)
nc.port_delete.assert_not_called()
nc.floatingip_delete.assert_called_once_with('FAKE_FLOATING_ID')
def test_rollback_instance(self):
cc = mock.Mock()
cc.port_delete.return_value = None
profile = server.ServerProfile('t', self.spec)
profile._computeclient = cc
server_obj = mock.Mock(id='SERVER_ID')
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
profile._rollback_instance(node_obj, server_obj)
cc.server_force_delete.assert_called_once_with('SERVER_ID', True)
def test_rollback_with_no_instance(self):
cc = mock.Mock()
cc.port_delete.return_value = None
profile = server.ServerProfile('t', self.spec)
profile._computeclient = cc
server_obj = None
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
profile._rollback_instance(node_obj, server_obj)
cc.server_force_delete.assert_not_called()
def test_rollback_instance_with_internal_error(self):
cc = mock.Mock()
cc.server_force_delete.side_effect = exc.InternalError()
profile = server.ServerProfile('t', self.spec)
profile._computeclient = cc
server_obj = mock.Mock(id='SERVER_ID')
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
profile._rollback_instance(node_obj, server_obj)
cc.server_force_delete.assert_called_once_with('SERVER_ID', True)
def test_do_delete_ok(self):
profile = server.ServerProfile('t', self.spec)