# -*- coding: utf-8 -*- # Copyright 2013 Mirantis, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import copy from itertools import repeat import math from random import randrange import threading import time from fysom import Fysom from kombu import Connection from kombu import Exchange from kombu import Queue from nailgun import consts from nailgun.db import db from nailgun.db.sqlalchemy.models import Node from nailgun import objects from nailgun.rpc.receiver import NailgunReceiver from nailgun.settings import settings from nailgun.utils import get_in def _is_slave(node): return all([ node['uid'] is not None, node['uid'] != consts.MASTER_NODE_UID, ]) def task_executor(tasks): for i, task in enumerate(tasks): ctx = {'deployment_graph_task_name': task['id'], 'progress': math.ceil(100.0 / len(tasks) * (i + 1))} if task.get('type') == consts.ORCHESTRATOR_TASK_TYPES.skipped: ctx.update({'task_status': consts.HISTORY_TASK_STATUSES.skipped}) yield ctx else: ctx.update({'task_status': consts.HISTORY_TASK_STATUSES.running}) yield ctx ctx.update({'task_status': consts.HISTORY_TASK_STATUSES.ready}) yield ctx class FSMNodeFlow(Fysom): def __init__(self, data, initial=None, tasks=None): super(FSMNodeFlow, self).__init__({ 'initial': initial or consts.NODE_STATUSES.discover, 'events': [ {'name': 'next', 'src': consts.NODE_STATUSES.discover, 'dst': consts.NODE_STATUSES.provisioning}, {'name': 'next', 'src': consts.NODE_STATUSES.provisioning, 'dst': consts.NODE_STATUSES.provisioned}, {'name': 'next', 'src': consts.NODE_STATUSES.provisioned, 'dst': consts.NODE_STATUSES.deploying}, {'name': 'next', 'src': consts.NODE_STATUSES.deploying, 'dst': consts.NODE_STATUSES.ready}, {'name': 'next', 'src': consts.NODE_STATUSES.error, 'dst': consts.NODE_STATUSES.error}, { 'name': 'error', 'src': [ consts.NODE_STATUSES.discover, consts.NODE_STATUSES.provisioning, consts.NODE_STATUSES.provisioned, consts.NODE_STATUSES.deploying, consts.NODE_STATUSES.ready, consts.NODE_STATUSES.error ], 'dst': consts.NODE_STATUSES.error }, { 'name': consts.NODE_STATUSES.ready, 'src': [ consts.NODE_STATUSES.discover, consts.NODE_STATUSES.provisioning, consts.NODE_STATUSES.provisioned, consts.NODE_STATUSES.deploying, consts.NODE_STATUSES.ready, consts.NODE_STATUSES.error ], 'dst': consts.NODE_STATUSES.ready }, ], 'callbacks': { 'onnext': self.on_next, 'onerror': self.on_error, 'onready': self.on_ready } }) self.data = data self.data.setdefault('progress', 0) self._executor = task_executor(tasks or []) if data.get('status') == consts.NODE_STATUSES.error: self.error() else: self.next() def on_ready(self, e): self.data['status'] = consts.NODE_STATUSES.ready self.data['progress'] = 100 def on_error(self, e): self.data['status'] = consts.NODE_STATUSES.error if e.src in [ consts.NODE_STATUSES.discover, consts.NODE_STATUSES.provisioning ]: if not self.data.get('error_type'): self.data['error_type'] = 'provision' elif e.src in [consts.NODE_STATUSES.provisioned, consts.NODE_STATUSES.deploying, consts.NODE_STATUSES.ready]: if not self.data.get('error_type'): self.data['error_type'] = 'deploy' self.data['progress'] = 100 def on_next(self, e): if e.dst in [ consts.NODE_STATUSES.provisioning, consts.NODE_STATUSES.deploying ]: self.data['progress'] = 0 self.data['status'] = e.dst def step(self): try: self.data.update(next(self._executor)) except StopIteration: self.next() def update_progress(self, value): self.data['progress'] += value if self.data['progress'] >= 100: self.data['progress'] = 100 self.next() class FakeThread(threading.Thread): NODE_FIELDS = ('status', 'error_msg', 'error_type') Receiver = NailgunReceiver def __init__(self, data=None, params=None, group=None, target=None, name=None, verbose=None, join_to=None): threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose) self.data = data self.params = params self.join_to = join_to self.tick_count = int(settings.FAKE_TASKS_TICK_COUNT) self.low_tick_count = self.tick_count - 10 if self.low_tick_count < 0: self.low_tick_count = 0 self.tick_interval = int(settings.FAKE_TASKS_TICK_INTERVAL) self.task_uuid = data['args'].get( 'task_uuid' ) self.respond_to = data['respond_to'] self.stoprequest = threading.Event() self.error = None def run(self): if self.join_to: self.join_to.join() if self.join_to.error: self.error = "Task aborted" self.message_gen = self.error_message_gen def error_message_gen(self): return [{ 'task_uuid': self.task_uuid, 'status': 'error', 'progress': 100, 'error': self.error }] def rude_join(self, timeout=None): self.stoprequest.set() super(FakeThread, self).join(timeout) def sleep(self, timeout): if timeout == 0: return step = 0.001 for r in repeat(step, int(float(timeout) / step)): if not self.stoprequest.isSet(): time.sleep(r) def refresh_nodes(self, nodes): nodes_map = { str(node['uid']): node for node in nodes if _is_slave(node) } nodes_db = db().query(Node).filter(Node.id.in_(nodes_map)) for node_db in nodes_db: node = nodes_map[node_db.uid] for field in self.NODE_FIELDS: v = getattr(node_db, field) if v is not None: node[field] = v def notify(self, kwargs): resp_method = getattr(self.Receiver, self.respond_to) try: resp_method(**kwargs) db().commit() except Exception: # TODO(ikalnitsky): research why some tests hit this # code but do not fail. db().rollback() raise def run_until_status(self, smart_nodes, status, role=None, error=False, instant=False): ready = False if error: node = next((n for n in smart_nodes if _is_slave(n.data)), None) node.error() while not ready and not self.stoprequest.isSet(): for sn in smart_nodes: continue_cases = ( sn.current in (status, consts.NODE_STATUSES.error), role and (('role' in sn.data and sn.data['role'] != role) or ('roles' in sn.data and role not in sn.data['roles'])) ) if any(continue_cases): continue if instant: sn.ready() else: if sn.data['status'] == consts.NODE_STATUSES.deploying: sn.step() else: sn.update_progress( randrange( self.low_tick_count, self.tick_count ) ) if role: test_nodes = [ sn for sn in smart_nodes if (('role' in sn.data and sn.data['role']) or ('roles' in sn.data and role in sn.data['roles'])) ] else: test_nodes = smart_nodes node_ready_status = ( (tn.current in (status, 'error')) for tn in test_nodes ) if all(node_ready_status): ready = True yield [sn.data for sn in smart_nodes] class FakeAmpqThread(FakeThread): def run(self): super(FakeAmpqThread, self).run() if settings.FAKE_TASKS_AMQP: nailgun_exchange = Exchange( 'nailgun', 'topic', durable=True ) nailgun_queue = Queue( 'nailgun', exchange=nailgun_exchange, routing_key='nailgun' ) with Connection('amqp://guest:guest@localhost//') as conn: with conn.Producer(serializer='json') as producer: for msg in self.message_gen(): producer.publish( { "method": self.respond_to, "args": msg }, exchange=nailgun_exchange, routing_key='nailgun', declare=[nailgun_queue] ) else: for msg in self.message_gen(): self.notify(msg) class FakeDeploymentThread(FakeAmpqThread): def inject_node_status_transition(self, kwargs): if kwargs['status'] == consts.TASK_STATUSES.ready: selector = 'successful' elif kwargs['status'] == consts.TASK_STATUSES.error: selector = 'failed' elif kwargs['status'] == consts.TASK_STATUSES.stopped: selector = 'stopped' else: return node_statuses_transition = get_in( self.data['args'], 'tasks_metadata', 'node_statuses_transitions', selector ) if node_statuses_transition: kwargs['nodes'] = [ dict(uid=uid, **node_statuses_transition) for uid in self.data['args']['tasks_graph'] ] def message_gen(self): # TEST: we can fail only in deployment stage here: error = self.params.get("error") if error != 'deployment': error = None # TEST: error message from "orchestrator" error_msg = self.params.get("error_msg", "") # TEST: we can set task to ready no matter what # True or False task_ready = self.params.get("task_ready") if 'godmode' in self.params: raise ValueError('godmode is not supported anymore, ' 'please use override_state instead') override_state = self.params.get("override_state", False) nodes = [ {'uid': uid} for uid in self.data['args']['tasks_graph'] ] self.refresh_nodes(nodes) kwargs = { 'task_uuid': self.task_uuid, 'nodes': nodes, 'status': 'running' } if override_state: progress = override_state.get("progress", 0) status = override_state.get("status", "running") for n in kwargs["nodes"]: n["status"] = status n["progress"] = progress kwargs["status"] = status yield kwargs raise StopIteration smart_nodes = [ FSMNodeFlow( node, consts.NODE_STATUSES.provisioned, self.data['args']['tasks_graph'][node['uid']] ) for node in kwargs['nodes'] ] stages_errors = { # no errors - default deployment None: self.run_until_status( smart_nodes, consts.NODE_STATUSES.ready ), # error on deployment stage 'deployment': self.run_until_status( smart_nodes, consts.NODE_STATUSES.ready, error=True ), } mode = stages_errors[error] for nodes_status in mode: kwargs['nodes'] = nodes_status yield kwargs self.sleep(self.tick_interval) if not error or task_ready: kwargs['status'] = consts.TASK_STATUSES.ready else: kwargs['status'] = consts.TASK_STATUSES.error if error_msg: kwargs['error'] = error_msg self.inject_node_status_transition(kwargs) yield kwargs class FakeProvisionThread(FakeThread): def run(self): super(FakeProvisionThread, self).run() # TEST: we can fail only in deployment stage here: error = self.params.get("error") error_msg = self.params.get('error_msg') if error != "provisioning": error = None kwargs = { 'task_uuid': self.task_uuid, 'status': consts.TASK_STATUSES.running, 'progress': 0 } smart_nodes = [ FSMNodeFlow(n, consts.NODE_STATUSES.discover) for n in self.data['args']['provisioning_info']['nodes'] ] for nodes in self.run_until_status( smart_nodes, consts.NODE_STATUSES.provisioned, error=error is not None ): kwargs['nodes'] = nodes for n in nodes: if n['status'] in ( consts.NODE_STATUSES.provisioned, consts.NODE_STATUSES.error ): continue kwargs['progress'] = n['progress'] kwargs['status'] = consts.TASK_STATUSES.running break else: kwargs['progress'] = 100 if error is None: kwargs['status'] = consts.TASK_STATUSES.ready else: kwargs['status'] = consts.TASK_STATUSES.error if error_msg: kwargs['error_msg'] = error_msg self.notify(kwargs) self.sleep(self.tick_interval) class FakeDeletionThread(FakeThread): def run(self): super(FakeDeletionThread, self).run() receiver = NailgunReceiver kwargs = { 'task_uuid': self.task_uuid, 'nodes': self.data['args']['nodes'], 'status': 'ready' } # copy the data deeply, because we're going delete the original one nodes_to_restore = copy.deepcopy( self.data['args'].get('nodes_to_restore', [])) resp_method = getattr(receiver, self.respond_to) try: resp_method(**kwargs) db().commit() except Exception: db().rollback() raise recover_nodes = self.params.get("recover_nodes", True) recover_offline_nodes = self.params.get("recover_offline_nodes", True) if not recover_nodes: db().commit() return for node_data in nodes_to_restore: # We want to preserve offline nodes since in fake mode # it's easier to do that than add new one is_offline = "online" in node_data and not node_data["online"] if is_offline and not recover_offline_nodes: continue node_data["status"] = "discover" objects.Node.create(node_data) db().commit() class FakeStopDeploymentThread(FakeThread): def run(self): super(FakeStopDeploymentThread, self).run() receiver = NailgunReceiver recover_nodes = self.params.get("recover_nodes", True) ia_nodes_count = self.params.get("ia_nodes_count", 0) nodes = self.data['args']['nodes'] ia_nodes = [] if ia_nodes_count: ia_nodes = nodes[0:ia_nodes_count] nodes = nodes[ia_nodes_count:] self.sleep(self.tick_interval) kwargs = { 'task_uuid': self.task_uuid, 'stop_task_uuid': self.data['args']['stop_task_uuid'], 'nodes': nodes, 'inaccessible_nodes': ia_nodes, 'status': 'ready', 'progress': 100 } resp_method = getattr(receiver, self.respond_to) try: resp_method(**kwargs) db().commit() except Exception: db().rollback() raise if not recover_nodes: db().commit() return nodes_db = db().query(Node).filter( Node.id.in_([ n['uid'] for n in self.data['args']['nodes'] ]) ).all() for n in nodes_db: self.sleep(self.tick_interval) n.online = True n.status = "discover" db().add(n) db().commit() class FakeResetEnvironmentThread(FakeThread): def run(self): super(FakeResetEnvironmentThread, self).run() receiver = NailgunReceiver recover_nodes = self.params.get("recover_nodes", True) ia_nodes_count = self.params.get("ia_nodes_count", 0) nodes = self.data['args']['nodes'] ia_nodes = [] if ia_nodes_count: ia_nodes = nodes[0:ia_nodes_count] nodes = nodes[ia_nodes_count:] self.sleep(self.tick_interval) kwargs = { 'task_uuid': self.task_uuid, 'nodes': nodes, 'inaccessible_nodes': ia_nodes, 'status': 'ready', 'progress': 100 } resp_method = getattr(receiver, self.respond_to) try: resp_method(**kwargs) db().commit() except Exception: db().rollback() raise if not recover_nodes: db().commit() return nodes_db = db().query(Node).filter( Node.id.in_([ n['uid'] for n in self.data['args']['nodes'] ]) ).all() for n in nodes_db: self.sleep(self.tick_interval) n.online = True n.status = "discover" db().add(n) db().commit() class FakeVerificationThread(FakeThread): def run(self): super(FakeVerificationThread, self).run() # some kinda hack for debugging in fake tasks: # verification will fail if you specified 404 as VLAN id in any net for n in self.data['args']['nodes']: for iface in n['networks']: if 404 in iface['vlans']: iface['vlans'] = list(set(iface['vlans']) ^ set([404])) # we have to execute subtasks too, just like astute does. otherwise # we will have "running" subtasks in the database. for subtask in self.data.get('subtasks', []): func = FAKE_THREADS[subtask['method']](subtask, self.params) # (mihgen): Don't try to create more threads here, it will # break integration testing which relies on synchronous run func.run() resp_method = getattr(NailgunReceiver, self.respond_to) try: resp_method( task_uuid=self.task_uuid, progress=100, status='ready', nodes=self.data['args']['nodes'], ) db().commit() except Exception: db().rollback() raise class FakeMulticastVerifications(FakeAmpqThread): """Network verifications will be as single dispatcher method in naily""" def ready_multicast(self): response = { 'task_uuid': self.task_uuid, 'progress': 0 } response['progress'] = 30 yield response nodes = self.data['args']['nodes'] nodes_uid = [node['uid'] for node in nodes] response['status'] = 'ready' response['progress'] = 100 response['nodes'] = dict((node_uid, nodes_uid) for node_uid in nodes_uid) yield response def error1_multicast(self): response = { 'task_uuid': self.task_uuid, 'progress': 0 } response['progress'] = 30 yield response nodes = self.data['args']['nodes'] # no messages from last node nodes_uid = [node['uid'] for node in nodes][:len(nodes) - 1] response['status'] = 'ready' response['progress'] = 100 response['nodes'] = dict((node_uid, nodes_uid) for node_uid in nodes_uid) yield response def error2_multicast(self): response = { 'task_uuid': self.task_uuid, 'progress': 0 } response['progress'] = 30 yield response nodes = self.data['args']['nodes'] nodes_uid = [node['uid'] for node in nodes] response['status'] = 'ready' response['progress'] = 100 response['nodes'] = dict((node_uid, nodes_uid) for node_uid in nodes_uid) # last node did not received any messages response['nodes'][nodes_uid[-1]] = [] yield response def message_gen(self): task_name = '{0}_multicast'.format(self.params.get('prefix', 'ready')) return getattr(self, task_name)() class FakeCheckingDhcpThread(FakeAmpqThread): """Thread to be used with test_task_managers.py""" def _get_message(self, mac): """Example of message with discovered dhcp server""" nodes = [{'uid': '90', 'status': 'ready', 'data': [{'mac': mac, 'server_id': '10.20.0.20', 'yiaddr': '10.20.0.133', 'iface': 'eth0'}]}, {'uid': '91', 'status': 'ready', 'data': [{'mac': mac, 'server_id': '10.20.0.20', 'yiaddr': '10.20.0.131', 'iface': 'eth0'}]}] return {'task_uuid': self.task_uuid, 'error': '', 'status': 'ready', 'progress': 100, 'nodes': nodes} def message_gen(self): self.sleep(self.tick_interval) if self.params.get("dhcp_error"): return self.error_message_gen() elif 'rogue_dhcp_mac' in self.params: return (self._get_message(self.params['rogue_dhcp_mac']),) else: return (self._get_message(settings.ADMIN_NETWORK['mac']),) class FakeDumpEnvironment(FakeAmpqThread): def message_gen(self): self.sleep(self.tick_interval) return [{ 'task_uuid': self.task_uuid, 'status': 'ready', 'progress': 100, 'msg': '/tmp/fake_dump' }] class FakeCapacityLog(FakeAmpqThread): def message_gen(self): self.sleep(self.tick_interval) return [{ 'task_uuid': self.task_uuid, 'status': 'ready', 'progress': 100, 'msg': '' }] class FakeExecuteTasksThread(FakeAmpqThread): def message_gen(self): self.sleep(self.tick_interval) return [{ 'task_uuid': self.task_uuid, 'status': 'ready', 'progress': 100 }] class FakeCheckRepositories(FakeAmpqThread): def message_gen(self): self.sleep(self.tick_interval) return [{ "task_uuid": self.task_uuid, "status": "ready", "progress": 100, "nodes": [{"uid": "1", "status": 0, "out": "", "err": ""}] }] FAKE_THREADS = { 'native_provision': FakeProvisionThread, 'image_provision': FakeProvisionThread, 'granular_deploy': FakeDeploymentThread, 'deploy': FakeDeploymentThread, 'task_deploy': FakeDeploymentThread, 'remove_nodes': FakeDeletionThread, 'stop_deploy_task': FakeStopDeploymentThread, 'reset_environment': FakeResetEnvironmentThread, 'verify_networks': FakeVerificationThread, 'check_dhcp': FakeCheckingDhcpThread, 'dump_environment': FakeDumpEnvironment, 'generate_capacity_log': FakeCapacityLog, 'multicast_verification': FakeMulticastVerifications, 'execute_tasks': FakeExecuteTasksThread, 'check_repositories': FakeCheckRepositories, 'check_repositories_with_setup': FakeCheckRepositories, }