fuel-web/nailgun/nailgun/task/fake.py

800 lines
25 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2013 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import copy
from itertools import repeat
import math
from random import randrange
import threading
import time
from fysom import Fysom
from kombu import Connection
from kombu import Exchange
from kombu import Queue
from nailgun import consts
from nailgun.db import db
from nailgun.db.sqlalchemy.models import Node
from nailgun import objects
from nailgun.rpc.receiver import NailgunReceiver
from nailgun.settings import settings
from nailgun.utils import get_in
def _is_slave(node):
return all([
node['uid'] is not None,
node['uid'] != consts.MASTER_NODE_UID,
])
def task_executor(tasks):
for i, task in enumerate(tasks):
ctx = {'deployment_graph_task_name': task['id'],
'progress': math.ceil(100.0 / len(tasks) * (i + 1))}
if task.get('type') == consts.ORCHESTRATOR_TASK_TYPES.skipped:
ctx.update({'task_status': consts.HISTORY_TASK_STATUSES.skipped})
yield ctx
else:
ctx.update({'task_status': consts.HISTORY_TASK_STATUSES.running})
yield ctx
ctx.update({'task_status': consts.HISTORY_TASK_STATUSES.ready})
yield ctx
class FSMNodeFlow(Fysom):
def __init__(self, data, initial=None, tasks=None):
super(FSMNodeFlow, self).__init__({
'initial': initial or consts.NODE_STATUSES.discover,
'events': [
{'name': 'next',
'src': consts.NODE_STATUSES.discover,
'dst': consts.NODE_STATUSES.provisioning},
{'name': 'next',
'src': consts.NODE_STATUSES.provisioning,
'dst': consts.NODE_STATUSES.provisioned},
{'name': 'next',
'src': consts.NODE_STATUSES.provisioned,
'dst': consts.NODE_STATUSES.deploying},
{'name': 'next',
'src': consts.NODE_STATUSES.deploying,
'dst': consts.NODE_STATUSES.ready},
{'name': 'next',
'src': consts.NODE_STATUSES.error,
'dst': consts.NODE_STATUSES.error},
{
'name': 'error',
'src': [
consts.NODE_STATUSES.discover,
consts.NODE_STATUSES.provisioning,
consts.NODE_STATUSES.provisioned,
consts.NODE_STATUSES.deploying,
consts.NODE_STATUSES.ready,
consts.NODE_STATUSES.error
],
'dst': consts.NODE_STATUSES.error
},
{
'name': consts.NODE_STATUSES.ready,
'src': [
consts.NODE_STATUSES.discover,
consts.NODE_STATUSES.provisioning,
consts.NODE_STATUSES.provisioned,
consts.NODE_STATUSES.deploying,
consts.NODE_STATUSES.ready,
consts.NODE_STATUSES.error
],
'dst': consts.NODE_STATUSES.ready
},
],
'callbacks': {
'onnext': self.on_next,
'onerror': self.on_error,
'onready': self.on_ready
}
})
self.data = data
self.data.setdefault('progress', 0)
self._executor = task_executor(tasks or [])
if data.get('status') == consts.NODE_STATUSES.error:
self.error()
else:
self.next()
def on_ready(self, e):
self.data['status'] = consts.NODE_STATUSES.ready
self.data['progress'] = 100
def on_error(self, e):
self.data['status'] = consts.NODE_STATUSES.error
if e.src in [
consts.NODE_STATUSES.discover, consts.NODE_STATUSES.provisioning
]:
if not self.data.get('error_type'):
self.data['error_type'] = 'provision'
elif e.src in [consts.NODE_STATUSES.provisioned,
consts.NODE_STATUSES.deploying,
consts.NODE_STATUSES.ready]:
if not self.data.get('error_type'):
self.data['error_type'] = 'deploy'
self.data['progress'] = 100
def on_next(self, e):
if e.dst in [
consts.NODE_STATUSES.provisioning, consts.NODE_STATUSES.deploying
]:
self.data['progress'] = 0
self.data['status'] = e.dst
def step(self):
try:
self.data.update(next(self._executor))
except StopIteration:
self.next()
def update_progress(self, value):
self.data['progress'] += value
if self.data['progress'] >= 100:
self.data['progress'] = 100
self.next()
class FakeThread(threading.Thread):
NODE_FIELDS = ('status', 'error_msg', 'error_type')
Receiver = NailgunReceiver
def __init__(self, data=None, params=None, group=None, target=None,
name=None, verbose=None, join_to=None):
threading.Thread.__init__(self, group=group, target=target, name=name,
verbose=verbose)
self.data = data
self.params = params
self.join_to = join_to
self.tick_count = int(settings.FAKE_TASKS_TICK_COUNT)
self.low_tick_count = self.tick_count - 10
if self.low_tick_count < 0:
self.low_tick_count = 0
self.tick_interval = int(settings.FAKE_TASKS_TICK_INTERVAL)
self.task_uuid = data['args'].get(
'task_uuid'
)
self.respond_to = data['respond_to']
self.stoprequest = threading.Event()
self.error = None
def run(self):
if self.join_to:
self.join_to.join()
if self.join_to.error:
self.error = "Task aborted"
self.message_gen = self.error_message_gen
def error_message_gen(self):
return [{
'task_uuid': self.task_uuid,
'status': 'error',
'progress': 100,
'error': self.error
}]
def rude_join(self, timeout=None):
self.stoprequest.set()
super(FakeThread, self).join(timeout)
def sleep(self, timeout):
if timeout == 0:
return
step = 0.001
for r in repeat(step, int(float(timeout) / step)):
if not self.stoprequest.isSet():
time.sleep(r)
def refresh_nodes(self, nodes):
nodes_map = {
str(node['uid']): node
for node in nodes if _is_slave(node)
}
nodes_db = db().query(Node).filter(Node.id.in_(nodes_map))
for node_db in nodes_db:
node = nodes_map[node_db.uid]
for field in self.NODE_FIELDS:
v = getattr(node_db, field)
if v is not None:
node[field] = v
def notify(self, kwargs):
resp_method = getattr(self.Receiver, self.respond_to)
try:
resp_method(**kwargs)
db().commit()
except Exception:
# TODO(ikalnitsky): research why some tests hit this
# code but do not fail.
db().rollback()
raise
def run_until_status(self, smart_nodes, status, role=None, error=False,
instant=False):
ready = False
if error:
node = next((n for n in smart_nodes if _is_slave(n.data)), None)
node.error()
while not ready and not self.stoprequest.isSet():
for sn in smart_nodes:
continue_cases = (
sn.current in (status, consts.NODE_STATUSES.error),
role and
(('role' in sn.data and sn.data['role'] != role) or
('roles' in sn.data and role not in sn.data['roles']))
)
if any(continue_cases):
continue
if instant:
sn.ready()
else:
if sn.data['status'] == consts.NODE_STATUSES.deploying:
sn.step()
else:
sn.update_progress(
randrange(
self.low_tick_count,
self.tick_count
)
)
if role:
test_nodes = [
sn for sn in smart_nodes
if (('role' in sn.data and sn.data['role']) or
('roles' in sn.data and role in sn.data['roles']))
]
else:
test_nodes = smart_nodes
node_ready_status = (
(tn.current in (status, 'error'))
for tn in test_nodes
)
if all(node_ready_status):
ready = True
yield [sn.data for sn in smart_nodes]
class FakeAmpqThread(FakeThread):
def run(self):
super(FakeAmpqThread, self).run()
if settings.FAKE_TASKS_AMQP:
nailgun_exchange = Exchange(
'nailgun',
'topic',
durable=True
)
nailgun_queue = Queue(
'nailgun',
exchange=nailgun_exchange,
routing_key='nailgun'
)
with Connection('amqp://guest:guest@localhost//') as conn:
with conn.Producer(serializer='json') as producer:
for msg in self.message_gen():
producer.publish(
{
"method": self.respond_to,
"args": msg
},
exchange=nailgun_exchange,
routing_key='nailgun',
declare=[nailgun_queue]
)
else:
for msg in self.message_gen():
self.notify(msg)
class FakeDeploymentThread(FakeAmpqThread):
def inject_node_status_transition(self, kwargs):
if kwargs['status'] == consts.TASK_STATUSES.ready:
selector = 'successful'
elif kwargs['status'] == consts.TASK_STATUSES.error:
selector = 'failed'
elif kwargs['status'] == consts.TASK_STATUSES.stopped:
selector = 'stopped'
else:
return
node_statuses_transition = get_in(
self.data['args'],
'tasks_metadata', 'node_statuses_transitions', selector
)
if node_statuses_transition:
kwargs['nodes'] = [
dict(uid=uid, **node_statuses_transition)
for uid in self.data['args']['tasks_graph']
]
def message_gen(self):
# TEST: we can fail only in deployment stage here:
error = self.params.get("error")
if error != 'deployment':
error = None
# TEST: error message from "orchestrator"
error_msg = self.params.get("error_msg", "")
# TEST: we can set task to ready no matter what
# True or False
task_ready = self.params.get("task_ready")
if 'godmode' in self.params:
raise ValueError('godmode is not supported anymore, '
'please use override_state instead')
override_state = self.params.get("override_state", False)
nodes = [
{'uid': uid}
for uid in self.data['args']['tasks_graph']
]
self.refresh_nodes(nodes)
kwargs = {
'task_uuid': self.task_uuid,
'nodes': nodes,
'status': 'running'
}
if override_state:
progress = override_state.get("progress", 0)
status = override_state.get("status", "running")
for n in kwargs["nodes"]:
n["status"] = status
n["progress"] = progress
kwargs["status"] = status
yield kwargs
raise StopIteration
smart_nodes = [
FSMNodeFlow(
node,
consts.NODE_STATUSES.provisioned,
self.data['args']['tasks_graph'][node['uid']]
)
for node in kwargs['nodes']
]
stages_errors = {
# no errors - default deployment
None: self.run_until_status(
smart_nodes, consts.NODE_STATUSES.ready
),
# error on deployment stage
'deployment': self.run_until_status(
smart_nodes, consts.NODE_STATUSES.ready, error=True
),
}
mode = stages_errors[error]
for nodes_status in mode:
kwargs['nodes'] = nodes_status
yield kwargs
self.sleep(self.tick_interval)
if not error or task_ready:
kwargs['status'] = consts.TASK_STATUSES.ready
else:
kwargs['status'] = consts.TASK_STATUSES.error
if error_msg:
kwargs['error'] = error_msg
self.inject_node_status_transition(kwargs)
yield kwargs
class FakeProvisionThread(FakeThread):
def run(self):
super(FakeProvisionThread, self).run()
# TEST: we can fail only in deployment stage here:
error = self.params.get("error")
error_msg = self.params.get('error_msg')
if error != "provisioning":
error = None
kwargs = {
'task_uuid': self.task_uuid,
'status': consts.TASK_STATUSES.running,
'progress': 0
}
smart_nodes = [
FSMNodeFlow(n, consts.NODE_STATUSES.discover)
for n in self.data['args']['provisioning_info']['nodes']
]
for nodes in self.run_until_status(
smart_nodes, consts.NODE_STATUSES.provisioned,
error=error is not None
):
kwargs['nodes'] = nodes
for n in nodes:
if n['status'] in (
consts.NODE_STATUSES.provisioned,
consts.NODE_STATUSES.error
):
continue
kwargs['progress'] = n['progress']
kwargs['status'] = consts.TASK_STATUSES.running
break
else:
kwargs['progress'] = 100
if error is None:
kwargs['status'] = consts.TASK_STATUSES.ready
else:
kwargs['status'] = consts.TASK_STATUSES.error
if error_msg:
kwargs['error_msg'] = error_msg
self.notify(kwargs)
self.sleep(self.tick_interval)
class FakeDeletionThread(FakeThread):
def run(self):
super(FakeDeletionThread, self).run()
receiver = NailgunReceiver
kwargs = {
'task_uuid': self.task_uuid,
'nodes': self.data['args']['nodes'],
'status': 'ready'
}
# copy the data deeply, because we're going delete the original one
nodes_to_restore = copy.deepcopy(
self.data['args'].get('nodes_to_restore', []))
resp_method = getattr(receiver, self.respond_to)
try:
resp_method(**kwargs)
db().commit()
except Exception:
db().rollback()
raise
recover_nodes = self.params.get("recover_nodes", True)
recover_offline_nodes = self.params.get("recover_offline_nodes", True)
if not recover_nodes:
db().commit()
return
for node_data in nodes_to_restore:
# We want to preserve offline nodes since in fake mode
# it's easier to do that than add new one
is_offline = "online" in node_data and not node_data["online"]
if is_offline and not recover_offline_nodes:
continue
node_data["status"] = "discover"
objects.Node.create(node_data)
db().commit()
class FakeStopDeploymentThread(FakeThread):
def run(self):
super(FakeStopDeploymentThread, self).run()
receiver = NailgunReceiver
recover_nodes = self.params.get("recover_nodes", True)
ia_nodes_count = self.params.get("ia_nodes_count", 0)
nodes = self.data['args']['nodes']
ia_nodes = []
if ia_nodes_count:
ia_nodes = nodes[0:ia_nodes_count]
nodes = nodes[ia_nodes_count:]
self.sleep(self.tick_interval)
kwargs = {
'task_uuid': self.task_uuid,
'stop_task_uuid': self.data['args']['stop_task_uuid'],
'nodes': nodes,
'inaccessible_nodes': ia_nodes,
'status': 'ready',
'progress': 100
}
resp_method = getattr(receiver, self.respond_to)
try:
resp_method(**kwargs)
db().commit()
except Exception:
db().rollback()
raise
if not recover_nodes:
db().commit()
return
nodes_db = db().query(Node).filter(
Node.id.in_([
n['uid'] for n in self.data['args']['nodes']
])
).all()
for n in nodes_db:
self.sleep(self.tick_interval)
n.online = True
n.status = "discover"
db().add(n)
db().commit()
class FakeResetEnvironmentThread(FakeThread):
def run(self):
super(FakeResetEnvironmentThread, self).run()
receiver = NailgunReceiver
recover_nodes = self.params.get("recover_nodes", True)
ia_nodes_count = self.params.get("ia_nodes_count", 0)
nodes = self.data['args']['nodes']
ia_nodes = []
if ia_nodes_count:
ia_nodes = nodes[0:ia_nodes_count]
nodes = nodes[ia_nodes_count:]
self.sleep(self.tick_interval)
kwargs = {
'task_uuid': self.task_uuid,
'nodes': nodes,
'inaccessible_nodes': ia_nodes,
'status': 'ready',
'progress': 100
}
resp_method = getattr(receiver, self.respond_to)
try:
resp_method(**kwargs)
db().commit()
except Exception:
db().rollback()
raise
if not recover_nodes:
db().commit()
return
nodes_db = db().query(Node).filter(
Node.id.in_([
n['uid'] for n in self.data['args']['nodes']
])
).all()
for n in nodes_db:
self.sleep(self.tick_interval)
n.online = True
n.status = "discover"
db().add(n)
db().commit()
class FakeVerificationThread(FakeThread):
def run(self):
super(FakeVerificationThread, self).run()
# some kinda hack for debugging in fake tasks:
# verification will fail if you specified 404 as VLAN id in any net
for n in self.data['args']['nodes']:
for iface in n['networks']:
if 404 in iface['vlans']:
iface['vlans'] = list(set(iface['vlans']) ^ set([404]))
# we have to execute subtasks too, just like astute does. otherwise
# we will have "running" subtasks in the database.
for subtask in self.data.get('subtasks', []):
func = FAKE_THREADS[subtask['method']](subtask, self.params)
# (mihgen): Don't try to create more threads here, it will
# break integration testing which relies on synchronous run
func.run()
resp_method = getattr(NailgunReceiver, self.respond_to)
try:
resp_method(
task_uuid=self.task_uuid,
progress=100,
status='ready',
nodes=self.data['args']['nodes'],
)
db().commit()
except Exception:
db().rollback()
raise
class FakeMulticastVerifications(FakeAmpqThread):
"""Network verifications will be as single dispatcher method in naily"""
def ready_multicast(self):
response = {
'task_uuid': self.task_uuid,
'progress': 0
}
response['progress'] = 30
yield response
nodes = self.data['args']['nodes']
nodes_uid = [node['uid'] for node in nodes]
response['status'] = 'ready'
response['progress'] = 100
response['nodes'] = dict((node_uid, nodes_uid)
for node_uid in nodes_uid)
yield response
def error1_multicast(self):
response = {
'task_uuid': self.task_uuid,
'progress': 0
}
response['progress'] = 30
yield response
nodes = self.data['args']['nodes']
# no messages from last node
nodes_uid = [node['uid'] for node in nodes][:len(nodes) - 1]
response['status'] = 'ready'
response['progress'] = 100
response['nodes'] = dict((node_uid, nodes_uid)
for node_uid in nodes_uid)
yield response
def error2_multicast(self):
response = {
'task_uuid': self.task_uuid,
'progress': 0
}
response['progress'] = 30
yield response
nodes = self.data['args']['nodes']
nodes_uid = [node['uid'] for node in nodes]
response['status'] = 'ready'
response['progress'] = 100
response['nodes'] = dict((node_uid, nodes_uid)
for node_uid in nodes_uid)
# last node did not received any messages
response['nodes'][nodes_uid[-1]] = []
yield response
def message_gen(self):
task_name = '{0}_multicast'.format(self.params.get('prefix', 'ready'))
return getattr(self, task_name)()
class FakeCheckingDhcpThread(FakeAmpqThread):
"""Thread to be used with test_task_managers.py"""
def _get_message(self, mac):
"""Example of message with discovered dhcp server"""
nodes = [{'uid': '90',
'status': 'ready',
'data': [{'mac': mac,
'server_id': '10.20.0.20',
'yiaddr': '10.20.0.133',
'iface': 'eth0'}]},
{'uid': '91',
'status': 'ready',
'data': [{'mac': mac,
'server_id': '10.20.0.20',
'yiaddr': '10.20.0.131',
'iface': 'eth0'}]}]
return {'task_uuid': self.task_uuid,
'error': '',
'status': 'ready',
'progress': 100,
'nodes': nodes}
def message_gen(self):
self.sleep(self.tick_interval)
if self.params.get("dhcp_error"):
return self.error_message_gen()
elif 'rogue_dhcp_mac' in self.params:
return (self._get_message(self.params['rogue_dhcp_mac']),)
else:
return (self._get_message(settings.ADMIN_NETWORK['mac']),)
class FakeDumpEnvironment(FakeAmpqThread):
def message_gen(self):
self.sleep(self.tick_interval)
return [{
'task_uuid': self.task_uuid,
'status': 'ready',
'progress': 100,
'msg': '/tmp/fake_dump'
}]
class FakeCapacityLog(FakeAmpqThread):
def message_gen(self):
self.sleep(self.tick_interval)
return [{
'task_uuid': self.task_uuid,
'status': 'ready',
'progress': 100,
'msg': ''
}]
class FakeExecuteTasksThread(FakeAmpqThread):
def message_gen(self):
self.sleep(self.tick_interval)
return [{
'task_uuid': self.task_uuid,
'status': 'ready',
'progress': 100
}]
class FakeCheckRepositories(FakeAmpqThread):
def message_gen(self):
self.sleep(self.tick_interval)
return [{
"task_uuid": self.task_uuid,
"status": "ready",
"progress": 100,
"nodes": [{"uid": "1", "status": 0, "out": "", "err": ""}]
}]
FAKE_THREADS = {
'native_provision': FakeProvisionThread,
'image_provision': FakeProvisionThread,
'granular_deploy': FakeDeploymentThread,
'deploy': FakeDeploymentThread,
'task_deploy': FakeDeploymentThread,
'remove_nodes': FakeDeletionThread,
'stop_deploy_task': FakeStopDeploymentThread,
'reset_environment': FakeResetEnvironmentThread,
'verify_networks': FakeVerificationThread,
'check_dhcp': FakeCheckingDhcpThread,
'dump_environment': FakeDumpEnvironment,
'generate_capacity_log': FakeCapacityLog,
'multicast_verification': FakeMulticastVerifications,
'execute_tasks': FakeExecuteTasksThread,
'check_repositories': FakeCheckRepositories,
'check_repositories_with_setup': FakeCheckRepositories,
}