Delete VMs in error state after creation

If a server create ends up with the server in error state, delete the
server.

Change-Id: I43afa5f393cd8475d2686f1e61d96ec79dac00cd
Closes-Bug: #1847676
This commit is contained in:
Duc Truong 2019-10-10 22:02:51 +00:00
parent c19f2de68e
commit 60fd3ff874
6 changed files with 72 additions and 13 deletions

View File

@ -0,0 +1,4 @@
---
features:
- |
Added a new config option to specify the timeout for Nova API calls.

View File

@ -70,6 +70,9 @@ engine_opts = [
cfg.IntOpt('default_action_timeout',
default=3600,
help=_('Timeout in seconds for actions.')),
cfg.IntOpt('default_nova_timeout',
default=600,
help=_('Timeout in seconds for nova API calls.')),
cfg.IntOpt('max_actions_per_batch',
default=0,
help=_('Maximum number of node actions that each engine worker '

View File

@ -157,7 +157,7 @@ class NovaClient(base.DriverBase):
if failures is None:
failures = [consts.VS_ERROR]
if timeout is None:
timeout = cfg.CONF.default_action_timeout
timeout = cfg.CONF.default_nova_timeout
server_obj = self.conn.compute.find_server(server, False)
self.conn.compute.wait_for_server(server_obj, status=status,
@ -170,7 +170,7 @@ class NovaClient(base.DriverBase):
def wait_for_server_delete(self, server, timeout=None):
"""Wait for server deleting complete"""
if timeout is None:
timeout = cfg.CONF.default_action_timeout
timeout = cfg.CONF.default_nova_timeout
server_obj = self.conn.compute.find_server(server, True)
if server_obj:

View File

@ -13,6 +13,7 @@
import base64
import copy
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import encodeutils
import six
@ -883,7 +884,8 @@ class ServerProfile(base.Profile):
resource_id = None
try:
server = self.compute(obj).server_create(**kwargs)
self.compute(obj).wait_for_server(server.id)
self.compute(obj).wait_for_server(server.id,
cfg.CONF.default_nova_timeout)
server = self.compute(obj).server_get(server.id)
# Update zone placement info if available
self._update_zone_info(obj, server)
@ -891,7 +893,16 @@ class ServerProfile(base.Profile):
except exc.InternalError as ex:
if server and server.id:
resource_id = server.id
if ports:
LOG.debug('Deleting server %s that is ERROR state after'
' create.', server.id)
try:
obj.physical_id = server.id
self.do_delete(obj, internal_ports=ports)
except Exception:
LOG.error('Failed to delete server %s', server.id)
pass
elif ports:
self._delete_ports(obj, ports)
raise exc.EResourceCreation(type='server',
message=six.text_type(ex),
@ -911,7 +922,7 @@ class ServerProfile(base.Profile):
ignore_missing = params.get('ignore_missing', True)
internal_ports = obj.data.get('internal_ports', [])
force = params.get('force', False)
timeout = params.get('timeout', None)
timeout = params.get('timeout', cfg.CONF.default_nova_timeout)
try:
if server_id:

View File

@ -336,7 +336,7 @@ class TestNovaV2(base.SenlinTestCase):
def test_wait_for_server_with_default_timeout(self):
self.compute.find_server.return_value = 'foo'
timeout = cfg.CONF.default_action_timeout
timeout = cfg.CONF.default_nova_timeout
d = nova_v2.NovaClient(self.conn_params)
d.wait_for_server('foo')
@ -353,7 +353,7 @@ class TestNovaV2(base.SenlinTestCase):
self.compute.wait_for_delete.assert_called_once_with('FOO', wait=120)
def test_wait_for_server_delete_with_default_timeout(self):
cfg.CONF.set_override('default_action_timeout', 360)
cfg.CONF.set_override('default_nova_timeout', 360)
self.compute.find_server.return_value = 'FOO'
d = nova_v2.NovaClient(self.conn_params)

View File

@ -13,6 +13,7 @@
import base64
import mock
from oslo_config import cfg
from oslo_utils import encodeutils
import six
@ -600,9 +601,9 @@ class TestNovaServerBasic(base.SenlinTestCase):
self.assertEqual('FAKE_ID', ex.resource_id)
self.assertEqual('Failed in creating server: TIMEOUT.',
six.text_type(ex))
mock_node_obj.assert_called_once_with(mock.ANY, node_obj.id,
{'data': node_obj.data})
cc.wait_for_server.assert_called_once_with('FAKE_ID')
mock_node_obj.assert_not_called()
cc.wait_for_server.assert_called_once_with(
'FAKE_ID', cfg.CONF.default_nova_timeout)
@mock.patch.object(node_ob.Node, 'update')
def test_do_create_failed(self, mock_node_obj):
@ -637,6 +638,43 @@ class TestNovaServerBasic(base.SenlinTestCase):
self.assertEqual(0, cc.wait_for_server.call_count)
self.assertEqual(0, mock_zone_info.call_count)
@mock.patch.object(node_ob.Node, 'update')
@mock.patch.object(server.ServerProfile, 'do_delete')
def test_do_create_failed_with_server_id(self, mock_profile_delete,
mock_node_obj):
cc = mock.Mock()
profile = server.ServerProfile('t', self.spec)
profile._computeclient = cc
self._stubout_profile(profile, mock_image=True, mock_flavor=True,
mock_keypair=True, mock_net=True)
mock_zone_info = self.patchobject(profile, '_update_zone_info')
node_obj = mock.Mock(id='FAKE_NODE_ID', index=123,
cluster_id='FAKE_CLUSTER_ID',
data={
'placement': {
'zone': 'AZ1',
'servergroup': 'SERVER_GROUP_1'
}
})
node_obj.name = 'TEST_SERVER'
fake_server = mock.Mock(id='FAKE_ID')
cc.server_create.return_value = fake_server
cc.wait_for_server.side_effect = exc.InternalError(
code=500, message="creation failed.")
# do it
ex = self.assertRaises(exc.EResourceCreation, profile.do_create,
node_obj)
# assertions
mock_node_obj.assert_not_called()
mock_profile_delete.assert_called_once_with(
node_obj, internal_ports=[{'id': 'FAKE_PORT'}])
self.assertEqual('Failed in creating server: creation failed.',
six.text_type(ex))
self.assertEqual(1, cc.wait_for_server.call_count)
self.assertEqual(0, mock_zone_info.call_count)
def test_do_delete_ok(self):
profile = server.ServerProfile('t', self.spec)
@ -651,7 +689,8 @@ class TestNovaServerBasic(base.SenlinTestCase):
self.assertTrue(res)
cc.server_delete.assert_called_once_with('FAKE_ID', True)
cc.wait_for_server_delete.assert_called_once_with('FAKE_ID', None)
cc.wait_for_server_delete.assert_called_once_with(
'FAKE_ID', cfg.CONF.default_nova_timeout)
def test_do_delete_no_physical_id(self):
profile = server.ServerProfile('t', self.spec)
@ -732,7 +771,8 @@ class TestNovaServerBasic(base.SenlinTestCase):
nc.floatingip_delete.assert_called_once_with('FAKE_FLOATING_ID')
nc.port_delete.assert_called_once_with('FAKE_PORT_ID')
cc.server_delete.assert_called_once_with('FAKE_ID', True)
cc.wait_for_server_delete.assert_called_once_with('FAKE_ID', None)
cc.wait_for_server_delete.assert_called_once_with(
'FAKE_ID', cfg.CONF.default_nova_timeout)
def test_do_delete_ignore_missing_force(self):
profile = server.ServerProfile('t', self.spec)
@ -747,7 +787,8 @@ class TestNovaServerBasic(base.SenlinTestCase):
self.assertTrue(res)
cc.server_force_delete.assert_called_once_with('FAKE_ID', False)
cc.wait_for_server_delete.assert_called_once_with('FAKE_ID', None)
cc.wait_for_server_delete.assert_called_once_with(
'FAKE_ID', cfg.CONF.default_nova_timeout)
@mock.patch.object(node_ob.Node, 'update')
def test_do_delete_with_delete_failure(self, mock_node_obj):