Add command to delete BUILD instances and clusters
Sometimes an instance/cluster can be stuck in BUILD state forever. Attempting to delete the instance in this state is currently not allowed. Add force-delete and reset-status command. Reset-status will reset the status of an instance to ERROR and cluster to NONE. The reset-status command can only be used if the instance/cluster is in BUILD or ERROR state. Resetting the status of an instance in ERROR state can be useful as an instance might go ACTIVE after the specified timeout. Once the status has been reset it is possible for an instance to go ACTIVE if it receives a hearbeat from the guestagent. Force-delete will combine functionality of reset-status and delete. Change-Id: I83f6cdcdd884e51d002295b0d1f07341990e512c Depends-On: I957b4be5030e493e0eb8c6b6855d41b942b2823c Partial-Bug: #1579141
This commit is contained in:
parent
1ef945d6fa
commit
cee1f8e6c7
6
releasenotes/notes/force_delete-c2b06dbead554726.yaml
Normal file
6
releasenotes/notes/force_delete-c2b06dbead554726.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
features:
|
||||
- The reset-status command will set the task and status
|
||||
of an instance to ERROR after which it can be deleted.
|
||||
- The force-delete command will allow the deletion of
|
||||
an instance even if the instance is stuck in BUILD
|
||||
state.
|
@ -21,7 +21,8 @@ from trove.cluster.tasks import ClusterTasks
|
||||
from trove.common import cfg
|
||||
from trove.common import exception
|
||||
from trove.common.i18n import _
|
||||
from trove.common.notification import DBaaSClusterGrow, DBaaSClusterShrink
|
||||
from trove.common.notification import (DBaaSClusterGrow, DBaaSClusterShrink,
|
||||
DBaaSClusterResetStatus)
|
||||
from trove.common.notification import StartNotification
|
||||
from trove.common import remote
|
||||
from trove.common import server_group as srv_grp
|
||||
@ -136,6 +137,16 @@ class Cluster(object):
|
||||
LOG.info(_("Setting task to NONE on cluster %s") % self.id)
|
||||
self.update_db(task_status=ClusterTasks.NONE)
|
||||
|
||||
def reset_status(self):
|
||||
self.validate_cluster_available([ClusterTasks.BUILDING_INITIAL])
|
||||
LOG.info(_("Resetting status to NONE on cluster %s") % self.id)
|
||||
self.reset_task()
|
||||
instances = inst_models.DBInstance.find_all(cluster_id=self.id,
|
||||
deleted=False).all()
|
||||
for inst in instances:
|
||||
instance = inst_models.load_any_instance(self.context, inst.id)
|
||||
instance.reset_status()
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.db_info.id
|
||||
@ -291,6 +302,12 @@ class Cluster(object):
|
||||
with StartNotification(context, cluster_id=self.id):
|
||||
instance_ids = [instance['id'] for instance in param]
|
||||
return self.shrink(instance_ids)
|
||||
elif action == "reset-status":
|
||||
context.notification = DBaaSClusterResetStatus(context,
|
||||
request=req)
|
||||
with StartNotification(context, cluster_id=self.id):
|
||||
return self.reset_status()
|
||||
|
||||
else:
|
||||
raise exception.BadRequest(_("Action %s not supported") % action)
|
||||
|
||||
|
@ -506,6 +506,16 @@ class DBaaSInstanceDelete(DBaaSAPINotification):
|
||||
return ['instance_id']
|
||||
|
||||
|
||||
class DBaaSInstanceResetStatus(DBaaSAPINotification):
|
||||
|
||||
def event_type(self):
|
||||
return 'instance_reset_status'
|
||||
|
||||
@abc.abstractmethod
|
||||
def required_start_traits(self):
|
||||
return ['instance_id']
|
||||
|
||||
|
||||
class DBaaSInstanceDetach(DBaaSAPINotification):
|
||||
|
||||
@abc.abstractmethod
|
||||
@ -565,6 +575,17 @@ class DBaaSClusterDelete(DBaaSAPINotification):
|
||||
return ['cluster_id']
|
||||
|
||||
|
||||
class DBaaSClusterResetStatus(DBaaSAPINotification):
|
||||
|
||||
@abc.abstractmethod
|
||||
def event_type(self):
|
||||
return 'cluster_reset_status'
|
||||
|
||||
@abc.abstractmethod
|
||||
def required_start_traits(self):
|
||||
return ['cluster_id']
|
||||
|
||||
|
||||
class DBaaSClusterAddShard(DBaaSAPINotification):
|
||||
|
||||
@abc.abstractmethod
|
||||
|
@ -246,6 +246,10 @@ class SimpleInstance(object):
|
||||
def is_building(self):
|
||||
return self.status in [InstanceStatus.BUILD]
|
||||
|
||||
@property
|
||||
def is_error(self):
|
||||
return self.status in [InstanceStatus.ERROR]
|
||||
|
||||
@property
|
||||
def is_datastore_running(self):
|
||||
"""True if the service status indicates datastore is up and running."""
|
||||
@ -292,6 +296,10 @@ class SimpleInstance(object):
|
||||
if self.db_info.task_status.is_error:
|
||||
return InstanceStatus.ERROR
|
||||
|
||||
# If we've reset the status, show it as an error
|
||||
if tr_instance.ServiceStatuses.UNKNOWN == self.datastore_status.status:
|
||||
return InstanceStatus.ERROR
|
||||
|
||||
# Check for taskmanager status.
|
||||
action = self.db_info.task_status.action
|
||||
if 'BUILDING' == action:
|
||||
@ -597,8 +605,9 @@ class BaseInstance(SimpleInstance):
|
||||
def delete(self):
|
||||
def _delete_resources():
|
||||
if self.is_building:
|
||||
raise exception.UnprocessableEntity("Instance %s is not ready."
|
||||
% self.id)
|
||||
raise exception.UnprocessableEntity(
|
||||
"Instance %s is not ready. (Status is %s)." %
|
||||
(self.id, self.status))
|
||||
LOG.debug("Deleting instance with compute id = %s.",
|
||||
self.db_info.compute_instance_id)
|
||||
|
||||
@ -718,6 +727,20 @@ class BaseInstance(SimpleInstance):
|
||||
|
||||
return files
|
||||
|
||||
def reset_status(self):
|
||||
if self.is_building or self.is_error:
|
||||
LOG.info(_LI("Resetting the status to ERROR on instance %s."),
|
||||
self.id)
|
||||
self.reset_task_status()
|
||||
|
||||
reset_instance = InstanceServiceStatus.find_by(instance_id=self.id)
|
||||
reset_instance.set_status(tr_instance.ServiceStatuses.UNKNOWN)
|
||||
reset_instance.save()
|
||||
else:
|
||||
raise exception.UnprocessableEntity(
|
||||
"Instance %s status can only be reset in BUILD or ERROR "
|
||||
"state." % self.id)
|
||||
|
||||
|
||||
class FreshInstance(BaseInstance):
|
||||
@classmethod
|
||||
@ -727,8 +750,8 @@ class FreshInstance(BaseInstance):
|
||||
|
||||
class BuiltInstance(BaseInstance):
|
||||
@classmethod
|
||||
def load(cls, context, id):
|
||||
return load_instance(cls, context, id, needs_server=True)
|
||||
def load(cls, context, id, needs_server=True):
|
||||
return load_instance(cls, context, id, needs_server=needs_server)
|
||||
|
||||
|
||||
class Instance(BuiltInstance):
|
||||
|
@ -78,7 +78,6 @@ class InstanceController(wsgi.Controller):
|
||||
if not body:
|
||||
raise exception.BadRequest(_("Invalid request body."))
|
||||
context = req.environ[wsgi.CONTEXT_KEY]
|
||||
instance = models.Instance.load(context, id)
|
||||
_actions = {
|
||||
'restart': self._action_restart,
|
||||
'resize': self._action_resize,
|
||||
@ -86,6 +85,7 @@ class InstanceController(wsgi.Controller):
|
||||
'promote_to_replica_source':
|
||||
self._action_promote_to_replica_source,
|
||||
'eject_replica_source': self._action_eject_replica_source,
|
||||
'reset_status': self._action_reset_status,
|
||||
}
|
||||
selected_action = None
|
||||
action_name = None
|
||||
@ -97,6 +97,10 @@ class InstanceController(wsgi.Controller):
|
||||
"instance %(instance_id)s for tenant '%(tenant_id)s'"),
|
||||
{'action_name': action_name, 'instance_id': id,
|
||||
'tenant_id': tenant_id})
|
||||
needs_server = True
|
||||
if action_name in ['reset_status']:
|
||||
needs_server = False
|
||||
instance = models.Instance.load(context, id, needs_server=needs_server)
|
||||
return selected_action(context, req, instance, body)
|
||||
|
||||
def _action_restart(self, context, req, instance, body):
|
||||
@ -163,6 +167,17 @@ class InstanceController(wsgi.Controller):
|
||||
instance.eject_replica_source()
|
||||
return wsgi.Result(None, 202)
|
||||
|
||||
def _action_reset_status(self, context, req, instance, body):
|
||||
context.notification = notification.DBaaSInstanceResetStatus(
|
||||
context, request=req)
|
||||
with StartNotification(context, instance_id=instance.id):
|
||||
instance.reset_status()
|
||||
|
||||
LOG.debug("Failing backups for instance %s." % instance.id)
|
||||
backup_model.fail_for_instance(instance.id)
|
||||
|
||||
return wsgi.Result(None, 202)
|
||||
|
||||
def index(self, req, tenant_id):
|
||||
"""Return all instances."""
|
||||
LOG.info(_LI("Listing database instances for tenant '%s'"), tenant_id)
|
||||
|
@ -619,7 +619,9 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
|
||||
status == rd_instance.ServiceStatuses.INSTANCE_READY):
|
||||
return True
|
||||
elif status not in [rd_instance.ServiceStatuses.NEW,
|
||||
rd_instance.ServiceStatuses.BUILDING]:
|
||||
rd_instance.ServiceStatuses.BUILDING,
|
||||
rd_instance.ServiceStatuses.UNKNOWN,
|
||||
rd_instance.ServiceStatuses.DELETED]:
|
||||
raise TroveError(_("Service not active, status: %s") % status)
|
||||
|
||||
c_id = self.db_info.compute_instance_id
|
||||
|
@ -42,6 +42,7 @@ from trove.tests.scenario.groups import instance_actions_group
|
||||
from trove.tests.scenario.groups import instance_create_group
|
||||
from trove.tests.scenario.groups import instance_delete_group
|
||||
from trove.tests.scenario.groups import instance_error_create_group
|
||||
from trove.tests.scenario.groups import instance_force_delete_group
|
||||
from trove.tests.scenario.groups import instance_upgrade_group
|
||||
from trove.tests.scenario.groups import module_group
|
||||
from trove.tests.scenario.groups import negative_cluster_actions_group
|
||||
@ -150,6 +151,9 @@ instance_error_create_groups.extend([instance_error_create_group.GROUP])
|
||||
instance_upgrade_groups = list(instance_create_groups)
|
||||
instance_upgrade_groups.extend([instance_upgrade_group.GROUP])
|
||||
|
||||
instance_force_delete_groups = list(base_groups)
|
||||
instance_force_delete_groups.extend([instance_force_delete_group.GROUP])
|
||||
|
||||
backup_groups = list(instance_create_groups)
|
||||
backup_groups.extend([groups.BACKUP,
|
||||
groups.BACKUP_INST])
|
||||
@ -195,12 +199,13 @@ user_actions_groups.extend([user_actions_group.GROUP])
|
||||
# groups common to all datastores
|
||||
common_groups = list(instance_actions_groups)
|
||||
common_groups.extend([guest_log_groups, instance_error_create_groups,
|
||||
module_groups])
|
||||
instance_force_delete_groups, module_groups])
|
||||
|
||||
# Register: Component based groups
|
||||
register(["backup"], backup_groups)
|
||||
register(["backup_incremental"], backup_incremental_groups)
|
||||
register(["cluster"], cluster_actions_groups)
|
||||
register(["common"], common_groups)
|
||||
register(["configuration"], configuration_groups)
|
||||
register(["configuration_create"], configuration_create_groups)
|
||||
register(["database"], database_actions_groups)
|
||||
@ -209,6 +214,7 @@ register(["instance", "instance_actions"], instance_actions_groups)
|
||||
register(["instance_create"], instance_create_groups)
|
||||
register(["instance_error_create"], instance_error_create_groups)
|
||||
register(["instance_upgrade"], instance_upgrade_groups)
|
||||
register(["instance_force_delete"], instance_force_delete_groups)
|
||||
register(["module"], module_groups)
|
||||
register(["module_create"], module_create_groups)
|
||||
register(["replication"], replication_groups)
|
||||
|
@ -87,6 +87,11 @@ INST_ERROR_DELETE = "scenario.inst_error_delete_grp"
|
||||
INST_ERROR_DELETE_WAIT = "scenario.inst_error_delete_wait_grp"
|
||||
|
||||
|
||||
# Instance Force Delete Group
|
||||
INST_FORCE_DELETE = "scenario.inst_force_delete_grp"
|
||||
INST_FORCE_DELETE_WAIT = "scenario.inst_force_delete_wait_grp"
|
||||
|
||||
|
||||
# Module Group
|
||||
MODULE_CREATE = "scenario.module_create_grp"
|
||||
MODULE_DELETE = "scenario.module_delete_grp"
|
||||
|
@ -52,8 +52,8 @@ class InstanceErrorCreateGroup(TestGroup):
|
||||
|
||||
|
||||
@test(depends_on_groups=[groups.INST_ERROR_CREATE],
|
||||
groups=[GROUP, groups.INST_ERROR_CREATE_WAIT],
|
||||
runs_after_groups=[groups.MODULE_CREATE, groups.CFGGRP_CREATE])
|
||||
runs_after_groups=[groups.MODULE_CREATE, groups.CFGGRP_CREATE],
|
||||
groups=[GROUP, groups.INST_ERROR_CREATE_WAIT])
|
||||
class InstanceErrorCreateWaitGroup(TestGroup):
|
||||
"""Test that Instance Error Create Completes."""
|
||||
|
||||
@ -94,6 +94,7 @@ class InstanceErrorDeleteGroup(TestGroup):
|
||||
|
||||
|
||||
@test(depends_on_groups=[groups.INST_ERROR_DELETE],
|
||||
runs_after_groups=[groups.MODULE_INST_CREATE],
|
||||
groups=[GROUP, groups.INST_ERROR_DELETE_WAIT])
|
||||
class InstanceErrorDeleteWaitGroup(TestGroup):
|
||||
"""Test that Instance Error Delete Completes."""
|
||||
|
67
trove/tests/scenario/groups/instance_force_delete_group.py
Normal file
67
trove/tests/scenario/groups/instance_force_delete_group.py
Normal file
@ -0,0 +1,67 @@
|
||||
# Copyright 2016 Tesora Inc.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from proboscis import test
|
||||
|
||||
from trove.tests import PRE_INSTANCES
|
||||
from trove.tests.scenario import groups
|
||||
from trove.tests.scenario.groups.test_group import TestGroup
|
||||
from trove.tests.scenario.runners import test_runners
|
||||
|
||||
|
||||
GROUP = "scenario.instance_force_delete_group"
|
||||
|
||||
|
||||
class InstanceForceDeleteRunnerFactory(test_runners.RunnerFactory):
|
||||
|
||||
_runner_ns = 'instance_force_delete_runners'
|
||||
_runner_cls = 'InstanceForceDeleteRunner'
|
||||
|
||||
|
||||
@test(depends_on_groups=["services.initialize"],
|
||||
runs_after_groups=[PRE_INSTANCES, groups.INST_ERROR_CREATE],
|
||||
groups=[GROUP, groups.INST_FORCE_DELETE])
|
||||
class InstanceForceDeleteGroup(TestGroup):
|
||||
"""Test Instance Force Delete functionality."""
|
||||
|
||||
def __init__(self):
|
||||
super(InstanceForceDeleteGroup, self).__init__(
|
||||
InstanceForceDeleteRunnerFactory.instance())
|
||||
|
||||
@test
|
||||
def create_build_instance(self):
|
||||
"""Create an instance in BUILD state."""
|
||||
self.test_runner.run_create_build_instance()
|
||||
|
||||
@test(depends_on=['create_build_instance'])
|
||||
def delete_build_instance(self):
|
||||
"""Make sure the instance in BUILD state deletes."""
|
||||
self.test_runner.run_delete_build_instance()
|
||||
|
||||
|
||||
@test(depends_on_groups=[groups.INST_FORCE_DELETE],
|
||||
runs_after_groups=[groups.MODULE_INST_CREATE],
|
||||
groups=[GROUP, groups.INST_FORCE_DELETE_WAIT])
|
||||
class InstanceForceDeleteWaitGroup(TestGroup):
|
||||
"""Make sure the Force Delete instance goes away."""
|
||||
|
||||
def __init__(self):
|
||||
super(InstanceForceDeleteWaitGroup, self).__init__(
|
||||
InstanceForceDeleteRunnerFactory.instance())
|
||||
|
||||
@test
|
||||
def wait_for_force_delete(self):
|
||||
"""Wait for the Force Delete instance to be gone."""
|
||||
self.test_runner.run_wait_for_force_delete()
|
@ -286,7 +286,7 @@ class ModuleCreateGroup(TestGroup):
|
||||
|
||||
|
||||
@test(depends_on_groups=[groups.INST_CREATE_WAIT, groups.MODULE_CREATE],
|
||||
runs_after_groups=[groups.INST_ERROR_DELETE],
|
||||
runs_after_groups=[groups.INST_ERROR_DELETE, groups.INST_FORCE_DELETE],
|
||||
groups=[GROUP, groups.MODULE_INST, groups.MODULE_INST_CREATE])
|
||||
class ModuleInstCreateGroup(TestGroup):
|
||||
"""Test Module Instance Create functionality."""
|
||||
|
@ -0,0 +1,54 @@
|
||||
# Copyright 2016 Tesora Inc.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from proboscis import SkipTest
|
||||
|
||||
from trove.tests.scenario.runners.test_runners import TestRunner
|
||||
|
||||
|
||||
class InstanceForceDeleteRunner(TestRunner):
|
||||
|
||||
def __init__(self):
|
||||
super(InstanceForceDeleteRunner, self).__init__(sleep_time=1)
|
||||
|
||||
self.build_inst_id = None
|
||||
|
||||
def run_create_build_instance(self, expected_states=['NEW', 'BUILD'],
|
||||
expected_http_code=200):
|
||||
if self.is_using_existing_instance:
|
||||
raise SkipTest("Using an existing instance.")
|
||||
|
||||
name = self.instance_info.name + '_build'
|
||||
flavor = self.get_instance_flavor()
|
||||
|
||||
inst = self.auth_client.instances.create(
|
||||
name,
|
||||
self.get_flavor_href(flavor),
|
||||
self.instance_info.volume,
|
||||
nics=self.instance_info.nics,
|
||||
datastore=self.instance_info.dbaas_datastore,
|
||||
datastore_version=self.instance_info.dbaas_datastore_version)
|
||||
self.assert_instance_action([inst.id], expected_states,
|
||||
expected_http_code)
|
||||
self.build_inst_id = inst.id
|
||||
|
||||
def run_delete_build_instance(self, expected_http_code=202):
|
||||
if self.build_inst_id:
|
||||
self.auth_client.instances.force_delete(self.build_inst_id)
|
||||
self.assert_client_code(expected_http_code)
|
||||
|
||||
def run_wait_for_force_delete(self):
|
||||
if self.build_inst_id:
|
||||
self.assert_all_gone([self.build_inst_id], ['SHUTDOWN'])
|
@ -139,7 +139,7 @@ class RunnerFactory(object):
|
||||
# such as a missing override class. Anything else
|
||||
# shouldn't be suppressed.
|
||||
l_msg = ie.message.lower()
|
||||
if load_type not in l_msg or (
|
||||
if (load_type and load_type not in l_msg) or (
|
||||
'no module named' not in l_msg and
|
||||
'cannot be found' not in l_msg):
|
||||
raise
|
||||
|
Loading…
Reference in New Issue
Block a user