Gracefully stop for task based deployment
Introduce new node status: stopped, which used for nodes, where orchestrator was successfully gracefully stopped task deployment. This status will be able node redeploy. Also introduced new type of node error_type: stop_deployment It will be used if gracefully stop on node will be failed for example, because of timeout. It will be apply automatically by recevier for nodes in deploying status if orchestrator return error for stop deployment task Change-Id: I30d606d7d11d670d1a68ee90b01b932c1543fccc Implements: blueprint graceful-stop-restart-deployment
This commit is contained in:
parent
b0cba9a677
commit
a7cab8077d
@ -126,6 +126,7 @@ NODE_STATUSES = Enum(
|
||||
'deploying',
|
||||
'error',
|
||||
'removing',
|
||||
'stopped',
|
||||
)
|
||||
|
||||
NODE_ERRORS = Enum(
|
||||
@ -133,6 +134,7 @@ NODE_ERRORS = Enum(
|
||||
'provision',
|
||||
'deletion',
|
||||
'discover',
|
||||
'stop_deployment'
|
||||
)
|
||||
|
||||
NODE_GROUPS = Enum(
|
||||
|
@ -42,7 +42,7 @@ cluster_statuses_old = (
|
||||
'error',
|
||||
'remove',
|
||||
'update',
|
||||
'update_error'
|
||||
'update_error',
|
||||
)
|
||||
cluster_statuses_new = (
|
||||
'new',
|
||||
@ -53,6 +53,38 @@ cluster_statuses_new = (
|
||||
'remove',
|
||||
'partially_deployed'
|
||||
)
|
||||
node_statuses_old = (
|
||||
'ready',
|
||||
'discover',
|
||||
'provisioning',
|
||||
'provisioned',
|
||||
'deploying',
|
||||
'error',
|
||||
'removing',
|
||||
)
|
||||
node_statuses_new = (
|
||||
'ready',
|
||||
'discover',
|
||||
'provisioning',
|
||||
'provisioned',
|
||||
'deploying',
|
||||
'error',
|
||||
'removing',
|
||||
'stopped',
|
||||
)
|
||||
node_errors_old = (
|
||||
'deploy',
|
||||
'provision',
|
||||
'deletion',
|
||||
'discover',
|
||||
)
|
||||
node_errors_new = (
|
||||
'deploy',
|
||||
'provision',
|
||||
'deletion',
|
||||
'discover',
|
||||
'stop_deployment',
|
||||
)
|
||||
|
||||
|
||||
def upgrade():
|
||||
@ -64,9 +96,13 @@ def upgrade():
|
||||
upgrade_node_attributes()
|
||||
upgrade_remove_wizard_metadata_from_releases()
|
||||
drop_legacy_patching()
|
||||
upgrade_node_status_attributes()
|
||||
upgrade_node_stop_deployment_error_type()
|
||||
|
||||
|
||||
def downgrade():
|
||||
downgrade_node_stop_deployment_error_type()
|
||||
downgrade_node_status_attributes()
|
||||
restore_legacy_patching()
|
||||
downgrade_remove_wizard_metadata_from_releases()
|
||||
downgrade_node_attributes()
|
||||
@ -752,3 +788,43 @@ def restore_legacy_patching():
|
||||
cluster_statuses_new, # new options
|
||||
cluster_statuses_old, # old options
|
||||
)
|
||||
|
||||
|
||||
def upgrade_node_status_attributes():
|
||||
upgrade_enum(
|
||||
"nodes", # table
|
||||
"status", # column
|
||||
"node_status", # ENUM name
|
||||
node_statuses_old, # old options
|
||||
node_statuses_new # new options
|
||||
)
|
||||
|
||||
|
||||
def downgrade_node_status_attributes():
|
||||
upgrade_enum(
|
||||
"nodes", # table
|
||||
"status", # column
|
||||
"node_status", # ENUM name
|
||||
node_statuses_new, # old options
|
||||
node_statuses_old # new options
|
||||
)
|
||||
|
||||
|
||||
def upgrade_node_stop_deployment_error_type():
|
||||
upgrade_enum(
|
||||
"nodes",
|
||||
"error_type",
|
||||
"node_error_type",
|
||||
node_errors_old,
|
||||
node_errors_new
|
||||
)
|
||||
|
||||
|
||||
def downgrade_node_stop_deployment_error_type():
|
||||
upgrade_enum(
|
||||
"nodes",
|
||||
"error_type",
|
||||
"node_error_type",
|
||||
node_errors_new,
|
||||
node_errors_old
|
||||
)
|
||||
|
@ -163,8 +163,11 @@ class Node(Base):
|
||||
@property
|
||||
def needs_redeploy(self):
|
||||
return (
|
||||
self.status in ['error', 'provisioned'] or
|
||||
len(self.pending_roles)) and not self.pending_deletion
|
||||
self.status in [
|
||||
consts.NODE_STATUSES.error,
|
||||
consts.NODE_STATUSES.provisioned,
|
||||
consts.NODE_STATUSES.stopped
|
||||
] or len(self.pending_roles)) and not self.pending_deletion
|
||||
|
||||
@property
|
||||
def needs_redeletion(self):
|
||||
|
@ -649,7 +649,7 @@ class NailgunReceiver(object):
|
||||
task.cluster.status = consts.CLUSTER_STATUSES.stopped
|
||||
|
||||
if stop_tasks:
|
||||
map(db().delete, stop_tasks)
|
||||
objects.Task.bulk_delete(x.id for x in stop_tasks)
|
||||
|
||||
node_uids = [n['uid'] for n in itertools.chain(nodes, ia_nodes)]
|
||||
q_nodes = objects.NodeCollection.filter_by_id_list(None, node_uids)
|
||||
@ -658,7 +658,6 @@ class NailgunReceiver(object):
|
||||
cluster_id=task.cluster_id
|
||||
)
|
||||
q_nodes = objects.NodeCollection.order_by(q_nodes, 'id')
|
||||
q_nodes = objects.NodeCollection.lock_for_update(q_nodes)
|
||||
|
||||
# locking Nodes for update
|
||||
update_nodes = objects.NodeCollection.lock_for_update(
|
||||
@ -677,8 +676,6 @@ class NailgunReceiver(object):
|
||||
|
||||
message = (
|
||||
u"Deployment of environment '{0}' was successfully stopped. "
|
||||
u"Please make changes and reset the environment "
|
||||
u"if you want to redeploy it."
|
||||
.format(task.cluster.name or task.cluster_id)
|
||||
)
|
||||
|
||||
@ -687,6 +684,43 @@ class NailgunReceiver(object):
|
||||
message,
|
||||
task.cluster_id
|
||||
)
|
||||
elif status == consts.TASK_STATUSES.error:
|
||||
task.cluster.status = consts.CLUSTER_STATUSES.error
|
||||
|
||||
if stop_tasks:
|
||||
objects.Task.bulk_delete(x.id for x in stop_tasks)
|
||||
|
||||
q_nodes = objects.NodeCollection.filter_by(
|
||||
None,
|
||||
cluster_id=task.cluster_id
|
||||
)
|
||||
q_nodes = objects.NodeCollection.filter_by(
|
||||
q_nodes,
|
||||
status=consts.NODE_STATUSES.deploying
|
||||
)
|
||||
q_nodes = objects.NodeCollection.order_by(q_nodes, 'id')
|
||||
|
||||
update_nodes = objects.NodeCollection.lock_for_update(
|
||||
q_nodes
|
||||
).all()
|
||||
|
||||
for node_db in update_nodes:
|
||||
node_db.status = consts.NODE_STATUSES.error
|
||||
node_db.progress = 100
|
||||
node_db.error_type = consts.NODE_ERRORS.stop_deployment
|
||||
|
||||
db().flush()
|
||||
message = (
|
||||
u"Deployment of environment '{0}' was failed to stop: {1}. "
|
||||
u"Please check logs for details."
|
||||
.format(task.cluster.name or task.cluster_id, message)
|
||||
)
|
||||
|
||||
notifier.notify(
|
||||
"error",
|
||||
message,
|
||||
task.cluster_id
|
||||
)
|
||||
|
||||
data = {'status': status, 'progress': progress, 'message': message}
|
||||
objects.Task.update(task, data)
|
||||
|
@ -75,8 +75,7 @@ class TestStopDeployment(BaseIntegrationTest):
|
||||
|
||||
self.assertRegexpMatches(
|
||||
notification.message,
|
||||
'Please make changes and reset the environment '
|
||||
'if you want to redeploy it.')
|
||||
'was successfully stopped')
|
||||
|
||||
# FIXME(aroma): remove when stop action will be reworked for ha
|
||||
# cluster. To get more details, please, refer to [1]
|
||||
|
@ -119,6 +119,29 @@ class TestTaskHelpers(BaseTestCase):
|
||||
computes = self.filter_by_role(nodes, 'compute')
|
||||
self.assertEqual(len(computes), 2)
|
||||
|
||||
def test_redeploy_with_stopped_nodes(self):
|
||||
cluster = self.create_env([
|
||||
{'roles': ['controller'], 'status': 'error'},
|
||||
{'roles': ['controller'], 'status': 'stopped'},
|
||||
{'roles': ['controller'], 'status': 'stopped'},
|
||||
{'roles': ['compute', 'cinder'], 'status': 'stopped'},
|
||||
{'roles': ['compute'], 'status': 'error',
|
||||
'error_type': 'stop_deployment'},
|
||||
{'roles': ['cinder'], 'status': 'error',
|
||||
'error_type': 'deploy'}])
|
||||
|
||||
nodes = TaskHelper.nodes_to_deploy(cluster)
|
||||
self.assertEqual(len(nodes), 6)
|
||||
|
||||
controllers = self.filter_by_role(nodes, 'controller')
|
||||
self.assertEqual(len(controllers), 3)
|
||||
|
||||
cinders = self.filter_by_role(nodes, 'cinder')
|
||||
self.assertEqual(len(cinders), 2)
|
||||
|
||||
computes = self.filter_by_role(nodes, 'compute')
|
||||
self.assertEqual(len(computes), 2)
|
||||
|
||||
# TODO(aroma): move it to utils testing code
|
||||
def test_recalculate_deployment_task_progress(self):
|
||||
cluster = self.create_env([
|
||||
|
Loading…
Reference in New Issue
Block a user