Fix leaking resources during cluster recover
If a node recover was in progress when a cluster delete was called it would cause resources to be created, but unable to update the Node object as it no longer exists. With this patch if Node.update fails because the object no longer exists. Delete all resources created during the failed operation. Change-Id: I95a8b5791eee5d334f8841bebe52f37f6f5eb2e5
This commit is contained in:
parent
7e04d4a353
commit
a6a18b8df5
|
@ -768,9 +768,13 @@ class ServerProfile(base.Profile):
|
|||
port_attrs['floating'].update({'remove': True})
|
||||
internal_ports.append(port_attrs)
|
||||
if internal_ports:
|
||||
node_data = obj.data
|
||||
node_data.update(internal_ports=internal_ports)
|
||||
node_obj.Node.update(self.context, obj.id, {'data': node_data})
|
||||
try:
|
||||
node_data = obj.data
|
||||
node_data.update(internal_ports=internal_ports)
|
||||
node_obj.Node.update(self.context, obj.id, {'data': node_data})
|
||||
except exc.ResourceNotFound:
|
||||
self._rollback_ports(obj, internal_ports)
|
||||
raise
|
||||
return internal_ports
|
||||
|
||||
def _build_metadata(self, obj, usermeta):
|
||||
|
@ -809,14 +813,7 @@ class ServerProfile(base.Profile):
|
|||
|
||||
:param obj: The node object for which a server will be created.
|
||||
"""
|
||||
kwargs = {}
|
||||
for key in self.KEYS:
|
||||
# context is treated as connection parameters
|
||||
if key == self.CONTEXT:
|
||||
continue
|
||||
|
||||
if self.properties[key] is not None:
|
||||
kwargs[key] = self.properties[key]
|
||||
kwargs = self._generate_kwargs()
|
||||
|
||||
admin_pass = self.properties[self.ADMIN_PASS]
|
||||
if admin_pass:
|
||||
|
@ -890,6 +887,10 @@ class ServerProfile(base.Profile):
|
|||
# Update zone placement info if available
|
||||
self._update_zone_info(obj, server)
|
||||
return server.id
|
||||
except exc.ResourceNotFound:
|
||||
self._rollback_ports(obj, ports)
|
||||
self._rollback_instance(obj, server)
|
||||
raise
|
||||
except exc.InternalError as ex:
|
||||
if server and server.id:
|
||||
resource_id = server.id
|
||||
|
@ -908,6 +909,75 @@ class ServerProfile(base.Profile):
|
|||
message=six.text_type(ex),
|
||||
resource_id=resource_id)
|
||||
|
||||
def _generate_kwargs(self):
|
||||
"""Generate the base kwargs for a server.
|
||||
|
||||
:return:
|
||||
"""
|
||||
kwargs = {}
|
||||
for key in self.KEYS:
|
||||
# context is treated as connection parameters
|
||||
if key == self.CONTEXT:
|
||||
continue
|
||||
|
||||
if self.properties[key] is not None:
|
||||
kwargs[key] = self.properties[key]
|
||||
return kwargs
|
||||
|
||||
def _rollback_ports(self, obj, ports):
|
||||
"""Rollback any ports created after a ResourceNotFound exception.
|
||||
|
||||
:param obj: The node object.
|
||||
:param ports: A list of ports which attached to this server.
|
||||
:return:
|
||||
"""
|
||||
if not ports:
|
||||
return
|
||||
|
||||
LOG.warning(
|
||||
'Rolling back ports for Node %s.',
|
||||
obj.id
|
||||
)
|
||||
|
||||
for port in ports:
|
||||
if not port.get('remove', False):
|
||||
continue
|
||||
try:
|
||||
if (port.get('floating') and
|
||||
port['floating'].get('remove', False)):
|
||||
self.network(obj).floatingip_delete(
|
||||
port['floating']['id']
|
||||
)
|
||||
self.network(obj).port_delete(port['id'])
|
||||
except exc.InternalError as ex:
|
||||
LOG.debug(
|
||||
'Failed to delete port %s during rollback for Node %s: %s',
|
||||
port['id'], obj.id, ex
|
||||
)
|
||||
|
||||
def _rollback_instance(self, obj, server):
|
||||
"""Rollback an instance created after a ResourceNotFound exception.
|
||||
|
||||
:param obj: The node object.
|
||||
:param server: A server.
|
||||
:return:
|
||||
"""
|
||||
if not server or not server.id:
|
||||
return
|
||||
|
||||
LOG.warning(
|
||||
'Rolling back instance %s for Node %s.',
|
||||
server.id, obj.id
|
||||
)
|
||||
|
||||
try:
|
||||
self.compute(obj).server_force_delete(server.id, True)
|
||||
except exc.InternalError as ex:
|
||||
LOG.debug(
|
||||
'Failed to delete instance %s during rollback for Node %s: %s',
|
||||
server.id, obj.id, ex
|
||||
)
|
||||
|
||||
def do_delete(self, obj, **params):
|
||||
"""Delete the physical resource associated with the specified node.
|
||||
|
||||
|
|
|
@ -192,6 +192,76 @@ class TestNovaServerBasic(base.SenlinTestCase):
|
|||
mock_zone_info.assert_called_once_with(node_obj, fake_server)
|
||||
self.assertEqual('FAKE_ID', server_id)
|
||||
|
||||
@mock.patch.object(node_ob.Node, 'update')
|
||||
def test_do_create_fail_create_instance_when_node_longer_exists(
|
||||
self, mock_node_update):
|
||||
mock_node_update.side_effect = [
|
||||
None, exc.ResourceNotFound(type='Node', id='FAKE_NODE_ID')
|
||||
]
|
||||
|
||||
cc = mock.Mock()
|
||||
nc = mock.Mock()
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._computeclient = cc
|
||||
profile._networkclient = nc
|
||||
profile._rollback_ports = mock.Mock()
|
||||
profile._rollback_instance = mock.Mock()
|
||||
self._stubout_profile(profile, mock_image=True, mock_flavor=True,
|
||||
mock_keypair=True, mock_net=False)
|
||||
node_obj = mock.Mock(id='FAKE_NODE_ID', index=123,
|
||||
availability_zone="AZ01",
|
||||
cluster_id='FAKE_CLUSTER_ID',
|
||||
data={
|
||||
'placement': {
|
||||
'zone': 'AZ1',
|
||||
'servergroup': 'SERVER_GROUP_1'
|
||||
}
|
||||
})
|
||||
node_obj.name = 'TEST_SERVER'
|
||||
fake_server = mock.Mock(id='FAKE_ID')
|
||||
cc.server_create.return_value = fake_server
|
||||
cc.server_get.return_value = fake_server
|
||||
|
||||
self.assertRaises(
|
||||
exc.ResourceNotFound, profile.do_create, node_obj
|
||||
)
|
||||
|
||||
profile._rollback_ports.assert_called_once()
|
||||
profile._rollback_instance.assert_called_once()
|
||||
|
||||
@mock.patch.object(node_ob.Node, 'update')
|
||||
def test_do_create_fail_create_port_when_node_longer_exists(
|
||||
self, mock_node_update):
|
||||
mock_node_update.side_effect = exc.ResourceNotFound(
|
||||
type='Node', id='FAKE_NODE_ID'
|
||||
)
|
||||
|
||||
cc = mock.Mock()
|
||||
nc = mock.Mock()
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._computeclient = cc
|
||||
profile._networkclient = nc
|
||||
profile._rollback_ports = mock.Mock()
|
||||
profile._rollback_instance = mock.Mock()
|
||||
self._stubout_profile(profile, mock_image=True, mock_flavor=True,
|
||||
mock_keypair=True, mock_net=False)
|
||||
node_obj = mock.Mock(id='FAKE_NODE_ID', index=123,
|
||||
cluster_id='FAKE_CLUSTER_ID',
|
||||
data={
|
||||
'placement': {
|
||||
'zone': 'AZ1',
|
||||
'servergroup': 'SERVER_GROUP_1'
|
||||
}
|
||||
})
|
||||
node_obj.name = 'TEST_SERVER'
|
||||
|
||||
self.assertRaises(
|
||||
exc.ResourceNotFound, profile.do_create, node_obj
|
||||
)
|
||||
|
||||
profile._rollback_ports.assert_called_once()
|
||||
profile._rollback_instance.assert_not_called()
|
||||
|
||||
def test_do_create_invalid_image(self):
|
||||
profile = server.ServerProfile('s2', self.spec)
|
||||
err = exc.EResourceCreation(type='server', message='boom')
|
||||
|
@ -675,6 +745,124 @@ class TestNovaServerBasic(base.SenlinTestCase):
|
|||
self.assertEqual(1, cc.wait_for_server.call_count)
|
||||
self.assertEqual(0, mock_zone_info.call_count)
|
||||
|
||||
def test_rollback_ports(self):
|
||||
nc = mock.Mock()
|
||||
nc.port_delete.return_value = None
|
||||
nc.floatingip_delete.return_value = None
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._networkclient = nc
|
||||
|
||||
ports = [
|
||||
{
|
||||
'id': 'FAKE_PORT_ID',
|
||||
'remove': True
|
||||
},
|
||||
{
|
||||
'floating': {
|
||||
'remove': True,
|
||||
'id': 'FAKE_FLOATING_ID',
|
||||
},
|
||||
'id': 'FAKE_PORT_ID',
|
||||
'remove': True
|
||||
},
|
||||
{
|
||||
'floating': {
|
||||
'remove': False,
|
||||
'id': 'FAKE_FLOATING_ID',
|
||||
},
|
||||
'id': 'FAKE_PORT_ID',
|
||||
'remove': False
|
||||
}
|
||||
]
|
||||
|
||||
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
|
||||
|
||||
profile._rollback_ports(node_obj, ports)
|
||||
|
||||
nc.port_delete.assert_called()
|
||||
nc.floatingip_delete.assert_called_once_with('FAKE_FLOATING_ID')
|
||||
|
||||
def test_rollback_with_no_ports(self):
|
||||
nc = mock.Mock()
|
||||
nc.port_delete.return_value = None
|
||||
nc.floatingip_delete.return_value = None
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._networkclient = nc
|
||||
|
||||
ports = []
|
||||
|
||||
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
|
||||
|
||||
profile._rollback_ports(node_obj, ports)
|
||||
|
||||
nc.port_delete.assert_not_called()
|
||||
nc.floatingip_delete.assert_not_called()
|
||||
|
||||
def test_rollback_ports_with_internal_error(self):
|
||||
nc = mock.Mock()
|
||||
nc.port_delete.return_value = None
|
||||
nc.floatingip_delete.side_effect = exc.InternalError()
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._networkclient = nc
|
||||
|
||||
ports = [{
|
||||
'floating': {
|
||||
'remove': True,
|
||||
'id': 'FAKE_FLOATING_ID',
|
||||
},
|
||||
'id': 'FAKE_PORT_ID',
|
||||
'remove': True
|
||||
}]
|
||||
|
||||
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
|
||||
|
||||
profile._rollback_ports(node_obj, ports)
|
||||
|
||||
nc.port_delete.assert_not_called()
|
||||
nc.floatingip_delete.assert_called_once_with('FAKE_FLOATING_ID')
|
||||
|
||||
def test_rollback_instance(self):
|
||||
cc = mock.Mock()
|
||||
cc.port_delete.return_value = None
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._computeclient = cc
|
||||
|
||||
server_obj = mock.Mock(id='SERVER_ID')
|
||||
|
||||
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
|
||||
|
||||
profile._rollback_instance(node_obj, server_obj)
|
||||
|
||||
cc.server_force_delete.assert_called_once_with('SERVER_ID', True)
|
||||
|
||||
def test_rollback_with_no_instance(self):
|
||||
cc = mock.Mock()
|
||||
cc.port_delete.return_value = None
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._computeclient = cc
|
||||
|
||||
server_obj = None
|
||||
|
||||
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
|
||||
|
||||
profile._rollback_instance(node_obj, server_obj)
|
||||
|
||||
cc.server_force_delete.assert_not_called()
|
||||
|
||||
def test_rollback_instance_with_internal_error(self):
|
||||
cc = mock.Mock()
|
||||
cc.server_force_delete.side_effect = exc.InternalError()
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
profile._computeclient = cc
|
||||
|
||||
server_obj = mock.Mock(id='SERVER_ID')
|
||||
|
||||
node_obj = mock.Mock(id='NODE_ID', cluster_id='', index=-1, data={})
|
||||
|
||||
profile._rollback_instance(node_obj, server_obj)
|
||||
|
||||
cc.server_force_delete.assert_called_once_with('SERVER_ID', True)
|
||||
|
||||
def test_do_delete_ok(self):
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
|
||||
|
|
Loading…
Reference in New Issue