Fuel UI
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.py 78KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2013 Mirantis, Inc.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. # not use this file except in compliance with the License. You may obtain
  6. # a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. # License for the specific language governing permissions and limitations
  14. # under the License.
  15. import collections
  16. from copy import deepcopy
  17. from itertools import groupby
  18. import os
  19. import netaddr
  20. import six
  21. from sqlalchemy import func
  22. from sqlalchemy import not_
  23. from sqlalchemy.orm import ColumnProperty
  24. from sqlalchemy.orm import object_mapper
  25. from nailgun.api.v1.validators import assignment
  26. from nailgun import consts
  27. from nailgun.db import db
  28. from nailgun.db.sqlalchemy.models import CapacityLog
  29. from nailgun.db.sqlalchemy.models import Cluster
  30. from nailgun.db.sqlalchemy.models import Node
  31. from nailgun.db.sqlalchemy.models import Task
  32. from nailgun import errors
  33. from nailgun.extensions import fire_callback_on_before_deployment_check
  34. from nailgun.extensions.network_manager.checker import NetworkCheck
  35. from nailgun.extensions.network_manager.manager import NetworkManager
  36. from nailgun import lcm
  37. from nailgun.logger import logger
  38. from nailgun import objects
  39. from nailgun.orchestrator import deployment_serializers
  40. from nailgun.orchestrator.deployment_serializers import \
  41. deployment_info_to_legacy
  42. from nailgun.orchestrator import orchestrator_graph
  43. from nailgun.orchestrator import provisioning_serializers
  44. from nailgun.orchestrator import stages
  45. from nailgun.orchestrator import task_based_deployment
  46. from nailgun.orchestrator import tasks_serializer
  47. from nailgun.orchestrator import tasks_templates
  48. import nailgun.rpc as rpc
  49. from nailgun.settings import settings
  50. from nailgun.task.fake import FAKE_THREADS
  51. from nailgun.task.helpers import TaskHelper
  52. from nailgun.task.legacy_tasks_adapter import adapt_legacy_tasks
  53. from nailgun.utils import logs as logs_utils
  54. from nailgun.utils.resolvers import TagResolver
  55. def make_astute_message(task, method, respond_to, args):
  56. message = {
  57. 'api_version': settings.VERSION['api'],
  58. 'method': method,
  59. 'respond_to': respond_to,
  60. 'args': args
  61. }
  62. message['args']['task_uuid'] = task.uuid
  63. task.cache = message
  64. return message
  65. def fake_cast(queue, messages, **kwargs):
  66. def make_thread(message, join_to=None):
  67. thread = FAKE_THREADS[message['method']](
  68. data=message,
  69. params=kwargs,
  70. join_to=join_to
  71. )
  72. tests = settings.TESTS_WITH_NO_THREADS
  73. if not tests:
  74. # Running real thread, if tests is not set or False
  75. logger.debug("Calling fake thread: data: %s, params: %s",
  76. message, kwargs)
  77. thread.start()
  78. # thread.name can only be set after thread.start() here.
  79. # otherwise exception is raised: 'Thread.__init__() not called'
  80. thread.name = message['method'].upper()
  81. return thread
  82. else:
  83. # For testing purposes, we run the same code synchronously,
  84. # threading is mocked in test/base.py
  85. thread.run()
  86. if isinstance(messages, (list,)):
  87. thread = None
  88. for m in messages:
  89. thread = make_thread(m, join_to=thread)
  90. else:
  91. make_thread(messages)
  92. class BaseDeploymentTask(object):
  93. @classmethod
  94. def get_deployment_methods(cls, cluster):
  95. """Get deployment method name based on cluster version
  96. :param cluster: Cluster db object
  97. :returns: list of available methods
  98. """
  99. methods = []
  100. if objects.Cluster.is_task_deploy_enabled(cluster):
  101. methods.append('task_deploy')
  102. if objects.Release.is_granular_enabled(cluster.release):
  103. methods.append('granular_deploy')
  104. else:
  105. methods.append('deploy')
  106. return methods
  107. @classmethod
  108. def call_deployment_method(cls, transaction, **kwargs):
  109. """Calls the deployment method with fallback.
  110. :param transaction: the transaction object
  111. :param kwargs: the keyword arguments
  112. """
  113. error_messages = []
  114. available_methods = cls.get_deployment_methods(transaction.cluster)
  115. for method in available_methods:
  116. try:
  117. args = getattr(cls, method)(transaction, **kwargs)
  118. # save tasks history
  119. if 'tasks_graph' in args:
  120. logger.info("tasks history saving is started.")
  121. objects.DeploymentHistoryCollection.create(
  122. transaction, args['tasks_graph']
  123. )
  124. logger.info("tasks history saving is finished.")
  125. return method, args
  126. except errors.TaskBaseDeploymentNotAllowed as e:
  127. error_messages.append(six.text_type(e))
  128. logger.warning(
  129. "%s is not allowed, fallback to next method.", method
  130. )
  131. raise errors.TaskBaseDeploymentNotAllowed(
  132. "The task deploy is not allowed because of {0}"
  133. .format(", ".join(error_messages))
  134. )
  135. @classmethod
  136. def _save_deployment_info(cls, transaction, deployment_info):
  137. normalized = {
  138. 'common': deployment_info['common'],
  139. 'nodes': {n['uid']: n for n in deployment_info['nodes']}
  140. }
  141. objects.Transaction.attach_deployment_info(
  142. transaction, normalized)
  143. return normalized
  144. class DeploymentTask(BaseDeploymentTask):
  145. """Task for applying changes to cluster
  146. LOGIC
  147. Use cases:
  148. 1. Cluster exists, node(s) added
  149. If we add one node to existing OpenStack cluster, other nodes may require
  150. updates (redeployment), but they don't require full system
  151. reinstallation.
  152. How to: run deployment for all nodes which system type is target.
  153. Run provisioning first and then deployment for nodes which are in
  154. discover system type.
  155. Q: Should we care about node status (provisioning, error, deploying)?
  156. A: offline - when node doesn't respond (agent doesn't run, not
  157. implemented); let's say user should remove this node from
  158. cluster before deployment.
  159. ready - target OS is loaded and node is Ok, we redeploy
  160. ready nodes only if cluster has pending changes i.e.
  161. network or cluster attrs were changed
  162. discover - in discovery mode, provisioning is required
  163. provisioning - at the time of task execution there should not be such
  164. case. If there is - previous provisioning has failed.
  165. Possible solution would be to try again to provision
  166. deploying - the same as provisioning, but stucked in previous deploy,
  167. solution - try to deploy. May loose some data if reprovis.
  168. error - recognized error in deployment or provisioning... We have to
  169. know where the error was. If in deployment - reprovisioning
  170. may not be a solution (can loose data).
  171. If in provisioning - can do provisioning & deployment again
  172. 2. New cluster, just added nodes
  173. Provision first, and run deploy as second
  174. 3. Remove some and add some another node
  175. Deletion task will run first and will actually remove nodes, include
  176. removal from DB.. however removal from DB happens when remove_nodes_resp
  177. is ran. It means we have to filter nodes and not to run deployment on
  178. those which are prepared for removal.
  179. """
  180. @classmethod
  181. def message(cls, task, nodes, affected_nodes=None, deployment_tasks=None,
  182. reexecutable_filter=None, graph_type=None,
  183. force=False, dry_run=False, **kwargs):
  184. """Builds RPC message for deployment task.
  185. :param task: the database task object instance
  186. :param nodes: the nodes for deployment
  187. :param affected_nodes: the list of nodes is affected by deployment
  188. :param deployment_tasks: the list of tasks_ids to execute,
  189. if None, all tasks will be executed
  190. :param reexecutable_filter: the list of events to find subscribed tasks
  191. :param force: force
  192. :param dry_run: dry run
  193. :param graph_type: deployment graph type
  194. """
  195. logger.debug("DeploymentTask.message(task=%s)" % task.uuid)
  196. task_ids = deployment_tasks or []
  197. objects.NodeCollection.lock_nodes(nodes)
  198. for n in nodes:
  199. if n.pending_roles:
  200. n.roles = n.roles + n.pending_roles
  201. n.pending_roles = []
  202. # If receiver for some reasons didn't update
  203. # node's status to provisioned when deployment
  204. # started, we should do it in nailgun
  205. if n.status in (consts.NODE_STATUSES.deploying,):
  206. n.status = consts.NODE_STATUSES.provisioned
  207. n.progress = 0
  208. # database commit is required to release nodes lock before
  209. # serialization started otherwise concurrent nailgun API queries will
  210. # be locked at database level all the time it is running.
  211. db().commit()
  212. deployment_tasks = objects.Cluster.get_deployment_tasks(
  213. task.cluster, graph_type
  214. )
  215. task.graph_type = graph_type or consts.DEFAULT_DEPLOYMENT_GRAPH_TYPE
  216. objects.Transaction.attach_tasks_snapshot(
  217. task,
  218. deployment_tasks
  219. )
  220. deployment_mode, message = cls.call_deployment_method(
  221. task, tasks=deployment_tasks, nodes=nodes,
  222. affected_nodes=affected_nodes, selected_task_ids=task_ids,
  223. events=reexecutable_filter, force=force,
  224. dry_run=dry_run, **kwargs
  225. )
  226. # After serialization set pending_addition to False
  227. for node in nodes:
  228. node.pending_addition = False
  229. objects.Transaction.attach_cluster_settings(
  230. task,
  231. {
  232. 'editable': objects.Cluster.get_editable_attributes(
  233. task.cluster, all_plugins_versions=True)
  234. }
  235. )
  236. objects.Transaction.attach_network_settings(
  237. task,
  238. objects.Cluster.get_network_attributes(task.cluster)
  239. )
  240. # get puppet_debug attribute
  241. cluster_settings = objects.Cluster.get_editable_attributes(
  242. task.cluster)
  243. if cluster_settings['common'].get('puppet_debug', {}).get('value'):
  244. message['debug'] = True
  245. rpc_message = make_astute_message(
  246. task,
  247. deployment_mode,
  248. 'deploy_resp',
  249. message
  250. )
  251. db().flush()
  252. return rpc_message
  253. @staticmethod
  254. def _extend_tasks_list(dst, src):
  255. """Append tasks from src to dst with joining same ones.
  256. Append tasks from the list 'src' to the list 'dst' and
  257. join tasks with the same id (concatenate lists of
  258. node uids).
  259. :param dst: list of serialized tasks
  260. :param src: list of serialized tasks
  261. :return: None
  262. """
  263. src_dict = {t['id']: t for t in src if 'id' in t}
  264. for t in dst:
  265. if 'id' not in t or t['id'] not in src_dict:
  266. continue
  267. t['uids'].extend(src_dict[t['id']]['uids'])
  268. src_dict.pop(t['id'])
  269. dst.extend(src_dict.values())
  270. dst.extend(t for t in src if 'id' not in t)
  271. @classmethod
  272. def granular_deploy(cls, transaction, tasks, nodes,
  273. affected_nodes, selected_task_ids, events,
  274. dry_run=False, **kwargs):
  275. """Builds parameters for granular deployment.
  276. :param transaction: the transaction object
  277. :param tasks: the list of deployment tasks to execute
  278. :param nodes: the nodes for deployment
  279. :param affected_nodes: the list of nodes is affected by deployment
  280. :param selected_task_ids: the list of tasks_ids to execute,
  281. if None, all tasks will be executed
  282. :param events: the list of events to find subscribed tasks
  283. :param dry_run: dry run
  284. :return: the arguments for RPC message
  285. """
  286. if dry_run:
  287. raise errors.DryRunSupportedOnlyByLCM()
  288. graph = orchestrator_graph.AstuteGraph(transaction.cluster, tasks)
  289. graph.check()
  290. graph.only_tasks(selected_task_ids)
  291. # NOTE(dshulyak) At this point parts of the orchestration can be empty,
  292. # it should not cause any issues with deployment/progress and was
  293. # done by design
  294. resolver = TagResolver(nodes)
  295. serialized_cluster = deployment_serializers.serialize(
  296. graph, transaction.cluster, nodes)
  297. cls._save_deployment_info(transaction, serialized_cluster)
  298. serialized_cluster = deployment_info_to_legacy(serialized_cluster)
  299. pre_deployment = stages.pre_deployment_serialize(
  300. graph, transaction.cluster, nodes,
  301. resolver=resolver)
  302. post_deployment = stages.post_deployment_serialize(
  303. graph, transaction.cluster, nodes,
  304. resolver=resolver)
  305. if affected_nodes:
  306. graph.reexecutable_tasks(events)
  307. serialized_affected_nodes = deployment_serializers.serialize(
  308. graph, transaction.cluster, affected_nodes
  309. )
  310. serialized_affected_nodes = deployment_info_to_legacy(
  311. serialized_affected_nodes)
  312. serialized_cluster.extend(serialized_affected_nodes)
  313. pre_deployment_affected = stages.pre_deployment_serialize(
  314. graph, transaction.cluster, affected_nodes,
  315. resolver=resolver)
  316. post_deployment_affected = stages.post_deployment_serialize(
  317. graph, transaction.cluster, affected_nodes,
  318. resolver=resolver)
  319. cls._extend_tasks_list(pre_deployment, pre_deployment_affected)
  320. cls._extend_tasks_list(post_deployment, post_deployment_affected)
  321. return {
  322. 'deployment_info': serialized_cluster,
  323. 'pre_deployment': pre_deployment,
  324. 'post_deployment': post_deployment
  325. }
  326. deploy = granular_deploy
  327. @classmethod
  328. def task_deploy(cls, transaction, tasks, nodes, affected_nodes,
  329. selected_task_ids, events, dry_run=False,
  330. **kwargs):
  331. """Builds parameters for task based deployment.
  332. :param transaction: the transaction object
  333. :param tasks: the list of deployment tasks to execute
  334. :param nodes: the nodes for deployment
  335. :param affected_nodes: the list of nodes is affected by deployment
  336. :param selected_task_ids: the list of tasks_ids to execute,
  337. if None, all tasks will be executed
  338. :param events: the list of events to find subscribed tasks
  339. :param dry_run: dry run
  340. :return: RPC method name, the arguments for RPC message
  341. """
  342. if dry_run:
  343. raise errors.DryRunSupportedOnlyByLCM()
  344. task_processor = task_based_deployment.TaskProcessor
  345. for task in tasks:
  346. task_processor.ensure_task_based_deploy_allowed(task)
  347. logger.info("cluster serialization is started.")
  348. serialized_cluster = deployment_serializers.serialize(
  349. None, transaction.cluster, nodes
  350. )
  351. cls._save_deployment_info(transaction, serialized_cluster)
  352. serialized_cluster = deployment_info_to_legacy(serialized_cluster)
  353. logger.info("cluster serialization is finished.")
  354. tasks_events = events and \
  355. task_based_deployment.TaskEvents('reexecute_on', events)
  356. logger.debug("tasks serialization is started.")
  357. directory, graph = task_based_deployment.TasksSerializer.serialize(
  358. transaction.cluster, nodes, tasks, affected_nodes,
  359. selected_task_ids, tasks_events
  360. )
  361. logger.info("tasks serialization is finished.")
  362. return {
  363. "deployment_info": serialized_cluster,
  364. "tasks_directory": directory,
  365. "tasks_graph": graph
  366. }
  367. class ClusterTransaction(DeploymentTask):
  368. ignored_types = {
  369. consts.ORCHESTRATOR_TASK_TYPES.skipped,
  370. consts.ORCHESTRATOR_TASK_TYPES.group,
  371. consts.ORCHESTRATOR_TASK_TYPES.stage,
  372. }
  373. node_statuses_for_redeploy = {
  374. consts.NODE_STATUSES.discover,
  375. consts.NODE_STATUSES.error,
  376. consts.NODE_STATUSES.provisioned,
  377. consts.NODE_STATUSES.stopped,
  378. }
  379. @classmethod
  380. def get_deployment_methods(cls, cluster):
  381. return ['task_deploy']
  382. @classmethod
  383. def mark_skipped(cls, tasks, ids_not_to_skip):
  384. """Change tasks type which ids are not ids_not_to_skip to skipped
  385. :param tasks: the list of deployment tasks to execute
  386. :param ids_not_to_skip: the list of task ids that will be not skipped
  387. """
  388. task_ids = set(ids_not_to_skip)
  389. for task in tasks:
  390. if (task['id'] not in task_ids and
  391. task['type'] not in cls.ignored_types):
  392. task = task.copy()
  393. task['type'] = consts.ORCHESTRATOR_TASK_TYPES.skipped
  394. yield task
  395. @classmethod
  396. def is_node_for_redeploy(cls, node):
  397. """Should node's previous state be cleared.
  398. :param node: db Node object or None
  399. :returns: Bool
  400. """
  401. if node is None:
  402. return False
  403. node_state = objects.Node.get_status(node)
  404. return node_state in cls.node_statuses_for_redeploy
  405. @classmethod
  406. def get_current_state(cls, cluster, nodes, tasks):
  407. """Current state for deployment.
  408. :param cluster: Cluster db object
  409. :param nodes: iterable of Node db objects
  410. :param tasks: list of tasks which state needed
  411. :returns: current state {task_name: {node_uid: <astute.yaml>, ...},}
  412. """
  413. nodes = {n.uid: n for n in nodes}
  414. nodes[consts.MASTER_NODE_UID] = None
  415. tasks_names = [t['id'] for t in tasks
  416. if t['type'] not in cls.ignored_types]
  417. transactions = list(
  418. objects.TransactionCollection.get_successful_transactions_per_task(
  419. cluster.id, tasks_names, nodes)
  420. )
  421. # sort by transaction.id
  422. transactions.sort(key=lambda x: x[0].id)
  423. state = {}
  424. for transaction, data in groupby(transactions, lambda x: x[0]):
  425. data = list(data)
  426. transaction_nodes = [d[1] for d in data]
  427. deployment_info = objects.Transaction.get_deployment_info(
  428. transaction, node_uids=transaction_nodes)
  429. for _, node_uid, task_name in data:
  430. task_state = state.setdefault(task_name, {})
  431. task_state.setdefault('nodes', {})
  432. if cls.is_node_for_redeploy(nodes.get(node_uid)):
  433. task_state['nodes'][node_uid] = {}
  434. else:
  435. node_info = deployment_info['nodes'].get(node_uid, {})
  436. task_state['nodes'][node_uid] = node_info
  437. task_state['common'] = deployment_info['common']
  438. return state
  439. @classmethod
  440. def task_deploy(cls, transaction, tasks, nodes, force=False,
  441. selected_task_ids=None, dry_run=False, noop_run=False,
  442. **kwargs):
  443. logger.info("The cluster transaction is initiated.")
  444. logger.info("cluster serialization is started.")
  445. # we should update information for all nodes except deleted
  446. # TODO(bgaifullin) pass role resolver to serializers
  447. deployment_info = deployment_serializers.serialize_for_lcm(
  448. transaction.cluster, nodes
  449. )
  450. logger.info("cluster serialization is finished.")
  451. if selected_task_ids:
  452. tasks = list(cls.mark_skipped(tasks, selected_task_ids))
  453. if force:
  454. current_state = {'common': {}, 'nodes': {}}
  455. else:
  456. current_state = cls.get_current_state(
  457. transaction.cluster, nodes, tasks)
  458. expected_state = cls._save_deployment_info(
  459. transaction, deployment_info
  460. )
  461. # Added cluster state
  462. expected_state['nodes'][None] = {}
  463. context = lcm.TransactionContext(expected_state, current_state)
  464. logger.debug("tasks serialization is started.")
  465. # TODO(bgaifullin) Primary roles applied in deployment_serializers
  466. # need to move this code from deployment serializer
  467. # also role resolver should be created after serialization completed
  468. resolver = TagResolver(nodes)
  469. cluster = transaction.cluster
  470. if objects.Cluster.is_propagate_task_deploy_enabled(cluster):
  471. logger.info("The legacy tasks adaptation is used.")
  472. if transaction.graph_type == consts.DEFAULT_DEPLOYMENT_GRAPH_TYPE:
  473. plugin_tasks = objects.Cluster.get_legacy_plugin_tasks(
  474. cluster
  475. )
  476. else:
  477. plugin_tasks = None
  478. tasks = adapt_legacy_tasks(tasks, plugin_tasks, resolver)
  479. directory, graph, metadata = lcm.TransactionSerializer.serialize(
  480. context,
  481. tasks,
  482. resolver,
  483. )
  484. logger.info("tasks serialization is finished.")
  485. return {
  486. "tasks_directory": directory,
  487. "tasks_graph": graph,
  488. "tasks_metadata": metadata,
  489. "dry_run": dry_run,
  490. "noop_run": noop_run,
  491. }
  492. class UpdateNodesInfoTask(object):
  493. """Task for updating nodes.yaml and /etc/hosts on all slaves
  494. The task is intended to be used in order to update both nodes.yaml and
  495. /etc/hosts on all slaves. This task aren't going to manage node or cluster
  496. statuses, and should be used only in one case - when we remove some node
  497. and don't add anything new (if some new node is added, these tasks will
  498. be executed without any additional help).
  499. """
  500. # the following post deployment tasks are used to update nodes
  501. # information on all slaves
  502. _tasks = [
  503. tasks_serializer.UploadNodesInfo.identity,
  504. tasks_serializer.UpdateHosts.identity,
  505. ]
  506. @classmethod
  507. def message(cls, task):
  508. graph = orchestrator_graph.AstuteGraph(task.cluster)
  509. graph.only_tasks(cls._tasks)
  510. rpc_message = make_astute_message(
  511. task,
  512. 'execute_tasks',
  513. 'deploy_resp',
  514. {
  515. 'tasks': graph.post_tasks_serialize([])
  516. }
  517. )
  518. db().flush()
  519. return rpc_message
  520. class ProvisionTask(object):
  521. @classmethod
  522. def _get_provision_method(cls, cluster):
  523. """Get provision method name based on cluster attributes
  524. :param cluster: Cluster db object
  525. :returns: string - an Astute callable
  526. """
  527. cluster_attrs = objects.Attributes.merged_attrs_values(
  528. cluster.attributes)
  529. provision_method = cluster_attrs.get('provision', {}).get(
  530. 'method', consts.PROVISION_METHODS.cobbler)
  531. # NOTE(kozhukalov):
  532. #
  533. # Map provisioning method to Astute callable.
  534. if provision_method == consts.PROVISION_METHODS.cobbler:
  535. return 'native_provision'
  536. return 'image_provision'
  537. @classmethod
  538. def message(cls, task, nodes_to_provisioning):
  539. logger.debug("ProvisionTask.message(task=%s)" % task.uuid)
  540. task = objects.Task.get_by_uid(
  541. task.id,
  542. fail_if_not_found=True,
  543. lock_for_update=True
  544. )
  545. objects.NodeCollection.lock_nodes(nodes_to_provisioning)
  546. serialized_cluster = provisioning_serializers.serialize(
  547. task.cluster, nodes_to_provisioning)
  548. for node in nodes_to_provisioning:
  549. if settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP:
  550. continue
  551. logs_utils.prepare_syslog_dir(node)
  552. rpc_message = make_astute_message(
  553. task,
  554. cls._get_provision_method(task.cluster),
  555. 'provision_resp',
  556. {
  557. 'provisioning_info': serialized_cluster
  558. }
  559. )
  560. db().commit()
  561. return rpc_message
  562. class DeletionTask(object):
  563. @classmethod
  564. def format_node_to_delete(cls, node, mclient_remove=True):
  565. """Convert node to dict for deletion.
  566. :param node: Node object
  567. :param mclient_remove: Boolean flag telling Astute whether to also
  568. remove node from mclient (True by default). For offline nodes this
  569. can be set to False to avoid long retrying unsuccessful deletes.
  570. :return: Dictionary in format accepted by Astute.
  571. """
  572. return {
  573. 'id': node.id,
  574. 'uid': node.id,
  575. 'roles': node.roles,
  576. 'slave_name': objects.Node.get_slave_name(node),
  577. 'mclient_remove': mclient_remove,
  578. }
  579. # TODO(ikalnitsky): Get rid of this, maybe move to fake handlers?
  580. @classmethod
  581. def format_node_to_restore(cls, node):
  582. """Convert node to dict for restoring, works only in fake mode.
  583. Fake mode can optionally restore the removed node (this simulates
  584. the node being rediscovered). This method creates the appropriate
  585. input for that procedure.
  586. :param node:
  587. :return: dict
  588. """
  589. # only fake tasks
  590. if cls.use_fake():
  591. new_node = {}
  592. reset_attrs = (
  593. 'id',
  594. 'cluster_id',
  595. 'roles',
  596. 'pending_deletion',
  597. 'pending_addition',
  598. 'group_id',
  599. 'hostname',
  600. )
  601. for prop in object_mapper(node).iterate_properties:
  602. if isinstance(
  603. prop, ColumnProperty
  604. ) and prop.key not in reset_attrs:
  605. new_node[prop.key] = getattr(node, prop.key)
  606. return new_node
  607. # /only fake tasks
  608. @classmethod
  609. def prepare_nodes_for_task(cls, nodes, mclient_remove=True):
  610. """Format all specified nodes for the deletion task.
  611. :param nodes:
  612. :param mclient_remove:
  613. :return: dict
  614. """
  615. nodes_to_delete = []
  616. nodes_to_restore = []
  617. for node in nodes:
  618. nodes_to_delete.append(
  619. cls.format_node_to_delete(node, mclient_remove=mclient_remove)
  620. )
  621. if not node.pending_deletion:
  622. objects.Node.update(node, {'pending_deletion': True})
  623. db().flush()
  624. node_to_restore = cls.format_node_to_restore(node)
  625. if node_to_restore:
  626. nodes_to_restore.append(node_to_restore)
  627. return {
  628. 'nodes_to_delete': nodes_to_delete,
  629. 'nodes_to_restore': nodes_to_restore,
  630. }
  631. @classmethod
  632. def get_task_nodes_for_cluster(cls, cluster):
  633. return cls.prepare_nodes_for_task(TaskHelper.nodes_to_delete(cluster))
  634. @classmethod
  635. def remove_undeployed_nodes_from_db(cls, nodes_to_delete):
  636. """Removes undeployed nodes from the given list from the DB.
  637. :param List nodes_to_delete: List of nodes as returned by
  638. :meth:`DeletionTask.format_node_to_delete`
  639. :returns: Remaining (deployed) nodes to delete.
  640. """
  641. node_names_dict = dict(
  642. (node['id'], node['slave_name']) for node in nodes_to_delete)
  643. node_ids = [n['id'] for n in nodes_to_delete]
  644. discovery_ids = objects.NodeCollection.discovery_node_ids()
  645. objects.NodeCollection.delete_by_ids(
  646. set(discovery_ids) & set(node_ids))
  647. db.commit()
  648. remaining_nodes_db = db().query(
  649. Node.id).filter(Node.id.in_(node_names_dict.keys()))
  650. remaining_nodes_ids = set([
  651. row[0] for row
  652. in remaining_nodes_db
  653. ])
  654. remaining_nodes = filter(
  655. lambda node: node['id'] in remaining_nodes_ids,
  656. nodes_to_delete
  657. )
  658. deleted_nodes_ids = set(node_names_dict).difference(
  659. remaining_nodes_ids)
  660. slave_names_joined = ', '.join([slave_name
  661. for id, slave_name
  662. in six.iteritems(node_names_dict)
  663. if id in deleted_nodes_ids])
  664. if len(slave_names_joined):
  665. logger.info("Nodes are not deployed yet, can't clean MBR: %s",
  666. slave_names_joined)
  667. return remaining_nodes
  668. @classmethod
  669. def execute(cls, task, nodes=None, respond_to='remove_nodes_resp',
  670. check_ceph=False):
  671. """Call remote Astute method to remove nodes from a cluster
  672. :param task: Task object
  673. :param nodes: List of nodes to delete
  674. :param respond_to: RPC method which receives data from remote method
  675. :param check_ceph: Boolean flag to tell Astute to run (or not run)
  676. checks to prevent deletion of OSD nodes. If True this task will
  677. fail if a node to be deleted has Ceph data on it. This flag must
  678. be False if deleting all nodes.
  679. """
  680. logger.debug("DeletionTask.execute(task=%s, nodes=%s)",
  681. task.uuid, nodes)
  682. task_uuid = task.uuid
  683. logger.debug("Nodes deletion task is running")
  684. # TODO(ikalnitsky): remove this, let the flow always go through Astute
  685. # No need to call Astute if no nodes are specified
  686. if task.name == consts.TASK_NAMES.cluster_deletion and \
  687. not (nodes and nodes['nodes_to_delete']):
  688. logger.debug("No nodes specified, exiting")
  689. rcvr = rpc.receiver.NailgunReceiver()
  690. rcvr.remove_cluster_resp(
  691. task_uuid=task_uuid,
  692. status=consts.TASK_STATUSES.ready,
  693. progress=100
  694. )
  695. return
  696. nodes_to_delete = nodes['nodes_to_delete']
  697. nodes_to_restore = nodes['nodes_to_restore']
  698. nodes_to_delete = cls.remove_undeployed_nodes_from_db(nodes_to_delete)
  699. logger.debug(
  700. "Removing nodes from database and pending them to clean their "
  701. "MBR: %s",
  702. ', '.join(node['slave_name'] for node in nodes_to_delete)
  703. )
  704. msg_delete = make_astute_message(
  705. task,
  706. 'remove_nodes',
  707. respond_to,
  708. {
  709. 'nodes': nodes_to_delete,
  710. 'check_ceph': check_ceph,
  711. 'engine': {
  712. 'url': settings.COBBLER_URL,
  713. 'username': settings.COBBLER_USER,
  714. 'password': settings.COBBLER_PASSWORD,
  715. 'master_ip': settings.MASTER_IP,
  716. }
  717. }
  718. )
  719. db().flush()
  720. # only fake tasks
  721. if cls.use_fake() and nodes_to_restore:
  722. msg_delete['args']['nodes_to_restore'] = nodes_to_restore
  723. # /only fake tasks
  724. logger.debug("Calling rpc remove_nodes method with nodes %s",
  725. nodes_to_delete)
  726. db().commit()
  727. rpc.cast('naily', msg_delete)
  728. @classmethod
  729. def use_fake(cls):
  730. return settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP
  731. class DeleteIBPImagesTask(object):
  732. @classmethod
  733. def message(cls, task, images_data):
  734. files = []
  735. for image_path, image_data in six.iteritems(images_data):
  736. file_name = os.path.basename(
  737. six.moves.urllib.parse.urlsplit(image_data['uri']).path)
  738. files.append(os.path.join(
  739. settings.PROVISIONING_IMAGES_PATH, file_name)
  740. )
  741. if image_path == '/':
  742. yaml_name = '{0}.{1}'.format(file_name.split('.')[0], 'yaml')
  743. files.append(os.path.join(
  744. settings.PROVISIONING_IMAGES_PATH, yaml_name))
  745. task_params = {
  746. 'parameters': {
  747. 'cmd': 'rm -f {0}'.format(' '.join(files)),
  748. 'timeout': settings.REMOVE_IMAGES_TIMEOUT,
  749. }
  750. }
  751. rpc_message = make_astute_message(
  752. task,
  753. 'execute_tasks',
  754. 'remove_images_resp',
  755. {
  756. 'tasks': [tasks_templates.make_shell_task(
  757. [consts.MASTER_NODE_UID], task_params
  758. )]
  759. }
  760. )
  761. return rpc_message
  762. @classmethod
  763. def execute(cls, cluster, image_data):
  764. task = Task(name=consts.TASK_NAMES.remove_images, cluster=cluster)
  765. db().add(task)
  766. message = cls.message(task, image_data)
  767. db().commit()
  768. rpc.cast('naily', message)
  769. class StopDeploymentTask(object):
  770. @classmethod
  771. def message(cls, task, stop_task):
  772. nodes_to_stop = db().query(Node).filter(
  773. Node.cluster_id == task.cluster.id
  774. ).filter(
  775. not_(Node.status == 'ready')
  776. ).yield_per(100)
  777. rpc_message = make_astute_message(
  778. task,
  779. "stop_deploy_task",
  780. "stop_deployment_resp",
  781. {
  782. "stop_task_uuid": stop_task.uuid,
  783. "nodes": [
  784. {
  785. 'uid': n.uid,
  786. 'roles': n.roles,
  787. 'slave_name': objects.Node.get_slave_name(n),
  788. 'admin_ip': objects.Cluster.get_network_manager(
  789. n.cluster
  790. ).get_admin_ip_for_node(n)
  791. } for n in nodes_to_stop
  792. ],
  793. "engine": {
  794. "url": settings.COBBLER_URL,
  795. "username": settings.COBBLER_USER,
  796. "password": settings.COBBLER_PASSWORD,
  797. "master_ip": settings.MASTER_IP,
  798. }
  799. }
  800. )
  801. db().commit()
  802. return rpc_message
  803. @classmethod
  804. def execute(cls, task, deploy_task=None, provision_task=None):
  805. db().commit()
  806. if provision_task:
  807. rpc.cast(
  808. 'naily',
  809. cls.message(task, provision_task),
  810. service=True
  811. )
  812. if deploy_task:
  813. rpc.cast(
  814. 'naily',
  815. cls.message(task, deploy_task),
  816. service=True
  817. )
  818. class ResetEnvironmentTask(object):
  819. @classmethod
  820. def message(cls, task):
  821. nodes_to_reset = db().query(Node).filter(
  822. Node.cluster_id == task.cluster.id
  823. ).yield_per(100)
  824. rpc_message = make_astute_message(
  825. task,
  826. "reset_environment",
  827. "reset_environment_resp",
  828. {
  829. "nodes": [
  830. {
  831. 'uid': n.uid,
  832. 'roles': n.roles,
  833. 'slave_name': objects.Node.get_slave_name(n)
  834. } for n in nodes_to_reset
  835. ],
  836. "engine": {
  837. "url": settings.COBBLER_URL,
  838. "username": settings.COBBLER_USER,
  839. "password": settings.COBBLER_PASSWORD,
  840. "master_ip": settings.MASTER_IP,
  841. }
  842. }
  843. )
  844. db().commit()
  845. return rpc_message
  846. class RemoveClusterKeys(object):
  847. """Task that deletes all ssh and ssl data for deployed environment
  848. Meant to be run after environment reset to make sure that new keys will be
  849. generated.
  850. """
  851. @classmethod
  852. def message(cls, task):
  853. rpc_message = make_astute_message(
  854. task,
  855. "execute_tasks",
  856. "remove_keys_resp",
  857. {
  858. "tasks": [
  859. tasks_templates.make_shell_task(
  860. [consts.MASTER_NODE_UID],
  861. {
  862. "parameters": {
  863. "cmd": "rm -rf /var/lib/fuel/keys/{0}".format(
  864. task.cluster.id),
  865. "timeout": 30
  866. }
  867. }
  868. )
  869. ]
  870. }
  871. )
  872. return rpc_message
  873. class RemoveIronicBootstrap(object):
  874. """Task that deletes Ironic's bootstrap images
  875. Meant to be run after environment reset to make sure that new images will
  876. be generated.
  877. """
  878. @classmethod
  879. def message(cls, task):
  880. rpc_message = make_astute_message(
  881. task,
  882. "execute_tasks",
  883. "remove_ironic_bootstrap_resp",
  884. {
  885. "tasks": [
  886. tasks_templates.make_shell_task(
  887. [consts.MASTER_NODE_UID],
  888. {
  889. "parameters": {
  890. "cmd": "rm -rf /var/www/nailgun/bootstrap/"
  891. "ironic/{0}".format(task.cluster.id),
  892. "timeout": 30
  893. }
  894. }
  895. )
  896. ]
  897. }
  898. )
  899. return rpc_message
  900. class ClusterDeletionTask(object):
  901. @classmethod
  902. def execute(cls, task):
  903. logger.debug("Cluster deletion task is running")
  904. attrs = objects.Attributes.merged_attrs_values(task.cluster.attributes)
  905. if attrs.get('provision'):
  906. if (task.cluster.release.operating_system ==
  907. consts.RELEASE_OS.ubuntu and
  908. attrs['provision']['method'] ==
  909. consts.PROVISION_METHODS.image):
  910. logger.debug("Delete IBP images task is running")
  911. DeleteIBPImagesTask.execute(
  912. task.cluster, attrs['provision']['image_data'])
  913. else:
  914. logger.debug("Skipping IBP images deletion task")
  915. DeletionTask.execute(
  916. task,
  917. nodes=DeletionTask.get_task_nodes_for_cluster(task.cluster),
  918. respond_to='remove_cluster_resp'
  919. )
  920. class BaseNetworkVerification(object):
  921. def __init__(self, task, config):
  922. self.task = task
  923. self.config = config
  924. def get_ifaces_on_undeployed_node(self, node, node_json, networks_to_skip):
  925. """Get list of interfaces and their VLANs to be checked for the node
  926. :param node: Node object
  927. :param node_json: dictionary for saving result
  928. :param networks_to_skip: list of networks, which should be skipped
  929. :return:
  930. """
  931. # Save bonds info to be able to check net-probe results w/o
  932. # need to access nodes in DB (node can be deleted before the test is
  933. # completed). This info is needed for non-deployed nodes only.
  934. bonds = {}
  935. for bond in node.bond_interfaces:
  936. bonds[bond.name] = sorted(s.name for s in bond.slaves)
  937. if bonds:
  938. node_json['bonds'] = bonds
  939. for iface in node.nic_interfaces:
  940. assigned_networks = iface.assigned_networks_list
  941. # In case of present bond interfaces - collect assigned networks
  942. # against bonds slave NICs. We should skip bonds with LACP and
  943. # Round-robin (balance-rr) modes as Fuel do not setup them for
  944. # network_checker now.
  945. if iface.bond:
  946. assigned_networks = iface.bond.assigned_networks_list
  947. vlans = []
  948. for ng in assigned_networks:
  949. # Handle FuelWeb admin network first.
  950. if ng.group_id is None:
  951. vlans.append(0)
  952. continue
  953. if ng.name in networks_to_skip:
  954. continue
  955. data_ng = filter(lambda i: i['name'] == ng.name,
  956. self.config)[0]
  957. if data_ng['vlans']:
  958. vlans.extend(data_ng['vlans'])
  959. else:
  960. # in case absence of vlans net_probe will
  961. # send packages on untagged iface
  962. vlans.append(0)
  963. if not vlans:
  964. continue
  965. modes_to_skip = (consts.BOND_MODES.l_802_3ad,
  966. consts.BOND_MODES.balance_rr)
  967. if iface.bond and iface.bond.mode in modes_to_skip:
  968. node_json['excluded_networks'].append(
  969. {'iface': iface.name})
  970. else:
  971. node_json['networks'].append(
  972. {'iface': iface.name, 'vlans': vlans})
  973. def get_ifaces_on_deployed_node(self, node, node_json, networks_to_skip):
  974. """Get list of interfaces and their VLANs to be checked for the node
  975. :param node: Node object
  976. :param node_json: dictionary for saving result
  977. :param networks_to_skip: list of networks, which should be skipped
  978. :return:
  979. """
  980. for iface in node.interfaces:
  981. # In case of present bond interfaces - collect assigned networks
  982. # against bonds themselves. We can check bonds as they are up on
  983. # deployed nodes.
  984. vlans = []
  985. for ng in iface.assigned_networks_list:
  986. # Handle FuelWeb admin network first.
  987. if ng.group_id is None:
  988. vlans.append(0)
  989. continue
  990. if ng.name in networks_to_skip:
  991. continue
  992. data_ng = filter(lambda i: i['name'] == ng.name,
  993. self.config)[0]
  994. if data_ng['vlans']:
  995. vlans.extend(data_ng['vlans'])
  996. else:
  997. # in case absence of vlans net_probe will
  998. # send packages on untagged iface
  999. vlans.append(0)
  1000. if vlans:
  1001. node_json['networks'].append(
  1002. {'iface': iface.name, 'vlans': vlans})
  1003. def get_message_body(self):
  1004. nodes = []
  1005. offline_nodes = 0
  1006. nodes_w_public = set()
  1007. nodes_wo_dpdk = set()
  1008. for node in self.task.cluster.nodes:
  1009. if node.offline:
  1010. continue
  1011. if objects.Node.should_have_public_with_ip(node):
  1012. nodes_w_public.add(node.id)
  1013. # it's ok to check private networks on non deployed dpdk nodes
  1014. if not (objects.Node.dpdk_enabled(node) and
  1015. node.status == consts.NODE_STATUSES.ready):
  1016. nodes_wo_dpdk.add(node.id)
  1017. if len(nodes_w_public) == 1:
  1018. # don't check public VLANs if there is the only node with public
  1019. nodes_w_public = set()
  1020. if len(nodes_wo_dpdk) == 1:
  1021. # We cannot check private VLANs if there is the
  1022. # only node without DPDK
  1023. nodes_wo_dpdk = set()
  1024. for node in self.task.cluster.nodes:
  1025. if node.offline:
  1026. offline_nodes += 1
  1027. continue
  1028. node_json = {
  1029. 'uid': node.id,
  1030. 'name': node.name,
  1031. 'status': node.status,
  1032. 'networks': [],
  1033. 'excluded_networks': [],
  1034. }
  1035. networks_to_skip = []
  1036. if node.id not in nodes_w_public:
  1037. networks_to_skip.append(consts.NETWORKS.public)
  1038. if node.id not in nodes_wo_dpdk:
  1039. # After deployment we can't check traffic on DPDK enabled
  1040. # interface since it's no longer visible in the system. So we
  1041. # should skip "Private" network from network verification
  1042. # after deployment.
  1043. networks_to_skip.append(consts.NETWORKS.private)
  1044. # Check bonds on deployed nodes and check bonds slave NICs on
  1045. # undeployed ones.
  1046. if node.status == consts.NODE_STATUSES.ready:
  1047. self.get_ifaces_on_deployed_node(node, node_json,
  1048. networks_to_skip)
  1049. else:
  1050. self.get_ifaces_on_undeployed_node(node, node_json,
  1051. networks_to_skip)
  1052. nodes.append(node_json)
  1053. return {
  1054. 'nodes': nodes,
  1055. 'offline': offline_nodes
  1056. }
  1057. def get_message(self):
  1058. msg_body = self.get_message_body()
  1059. message = make_astute_message(
  1060. self.task,
  1061. self.task.name,
  1062. '{0}_resp'.format(self.task.name),
  1063. msg_body
  1064. )
  1065. return message
  1066. def execute(self, task=None):
  1067. # task is there for prev compatibility
  1068. message = self.get_message()
  1069. logger.debug("%s method is called with: %s",
  1070. self.task.name, message)
  1071. db().commit()
  1072. rpc.cast('naily', message)
  1073. @classmethod
  1074. def enabled(cls, cluster):
  1075. """Verify that subtask is enabled based on cluster configuration."""
  1076. return True
  1077. class VerifyNetworksForTemplateMixin(object):
  1078. @staticmethod
  1079. def _get_private_vlan_range(cluster, template):
  1080. if cluster.network_config.segmentation_type == \
  1081. consts.NEUTRON_SEGMENT_TYPES.vlan and \
  1082. 'neutron/private' in template['roles']:
  1083. vlan_range = cluster.network_config.vlan_range
  1084. return range(vlan_range[0], vlan_range[1] + 1)
  1085. return None
  1086. @classmethod
  1087. def _add_interface(cls, ifaces, ifname, vlan_ids, bond_name=None):
  1088. ifname, vlan = cls._parse_template_iface(ifname)
  1089. bond_name = bond_name or ifname
  1090. ifaces[bond_name].add(int(vlan))
  1091. if vlan_ids:
  1092. ifaces[bond_name].update(vlan_ids)
  1093. return ifname
  1094. @classmethod
  1095. def _get_transformations(cls, node):
  1096. templates_for_node_mapping = \
  1097. node.network_template['templates_for_node_role']
  1098. cluster = node.cluster
  1099. counter_by_network_template = collections.defaultdict(int)
  1100. for n in cluster.nodes:
  1101. seen_templates = set()
  1102. for r in n.all_roles:
  1103. for net_template in templates_for_node_mapping[r]:
  1104. # same template can be used for multiple node roles
  1105. # therefore ensure that they counted only once
  1106. if net_template not in seen_templates:
  1107. counter_by_network_template[net_template] += 1
  1108. seen_templates.add(net_template)
  1109. node_templates = set()
  1110. for role_name in node.all_roles:
  1111. node_templates.update(templates_for_node_mapping[role_name])
  1112. templates = node.network_template['templates']
  1113. for template_name in node_templates:
  1114. if counter_by_network_template[template_name] < 2:
  1115. logger.warning(
  1116. 'We have only one node in cluster with '
  1117. 'network template %s.'
  1118. ' Verification for this network template will be skipped.',
  1119. template_name)
  1120. continue
  1121. template = templates[template_name]
  1122. transformations = template['transformations']
  1123. vlan_ids = cls._get_private_vlan_range(cluster, template)
  1124. private_endpoint = template['roles'].get('neutron/private')
  1125. for transformation in transformations:
  1126. if transformation['action'] in ['add-port', 'add-bond']:
  1127. # After deployment we can't check traffic on DPDK enabled
  1128. # interface since it's not visible in the system. So we
  1129. # should skip "Private" network from network verification.
  1130. if (transformation.get('provider', '') == 'dpdkovs' and
  1131. node.status == consts.NODE_STATUSES.ready and
  1132. transformation.get('bridge', '') ==
  1133. private_endpoint):
  1134. continue
  1135. yield transformation, vlan_ids
  1136. @staticmethod
  1137. def _parse_template_iface(ifname):
  1138. vlan = 0
  1139. chunks = ifname.rsplit('.', 1)
  1140. if len(chunks) == 2:
  1141. ifname, vlan = chunks
  1142. return ifname, vlan
  1143. @classmethod
  1144. def get_ifaces_from_template_on_undeployed_node(cls, node, node_json):
  1145. """Retrieves list of network interfaces on the undeployed node
  1146. List is retrieved from the network template.
  1147. """
  1148. bonds = collections.defaultdict(list)
  1149. ifaces = collections.defaultdict(set)
  1150. for transformation, vlan_ids in cls._get_transformations(node):
  1151. if transformation['action'] == 'add-port':
  1152. cls._add_interface(ifaces, transformation['name'], vlan_ids)
  1153. elif transformation['action'] == 'add-bond':
  1154. if transformation.get('mode') == consts.BOND_MODES.l_802_3ad:
  1155. node_json['excluded_networks'].append(
  1156. transformation['name'])
  1157. else:
  1158. for ifname in sorted(transformation['interfaces']):
  1159. ifname = cls._add_interface(ifaces, ifname, vlan_ids)
  1160. bond_name = transformation['name']
  1161. bonds[bond_name].append(ifname)
  1162. for if_name, vlans in six.iteritems(ifaces):
  1163. node_json['networks'].append({
  1164. 'iface': if_name,
  1165. 'vlans': sorted(vlans)
  1166. })
  1167. if bonds:
  1168. node_json['bonds'] = bonds
  1169. @classmethod
  1170. def get_ifaces_from_template_on_deployed_node(cls, node, node_json,
  1171. skip_private):
  1172. """Retrieves list of network interfaces on the deployed node
  1173. List is retrieved from the network template.
  1174. """
  1175. ifaces = collections.defaultdict(set)
  1176. net_manager = objects.Cluster.get_network_manager(node.cluster)
  1177. private_endpoint = \
  1178. net_manager.get_node_endpoint_by_network_role(node,
  1179. 'neutron/private')
  1180. for transformation, vlan_ids in cls._get_transformations(node):
  1181. if (skip_private and transformation.get('bridge', '') ==
  1182. private_endpoint):
  1183. continue
  1184. if transformation['action'] == 'add-port':
  1185. cls._add_interface(ifaces, transformation['name'], vlan_ids)
  1186. elif transformation['action'] == 'add-bond':
  1187. bond_name = transformation['name']
  1188. for ifname in transformation['interfaces']:
  1189. cls._add_interface(ifaces, ifname, vlan_ids, bond_name)
  1190. for if_name, vlans in six.iteritems(ifaces):
  1191. node_json['networks'].append({
  1192. 'iface': if_name,
  1193. 'vlans': sorted(vlans)
  1194. })
  1195. def get_ifaces_on_undeployed_node(self, node, node_json, networks_to_skip):
  1196. """Retrieves list of network interfaces on the undeployed node.
  1197. By default list of network interfaces is based on the information
  1198. recieved from the fuel agent unless cluster has network template
  1199. attached. In this case, list of interfaces retrieved from the
  1200. network template.
  1201. """
  1202. if node.network_template:
  1203. self.get_ifaces_from_template_on_undeployed_node(node, node_json)
  1204. return
  1205. super(VerifyNetworksForTemplateMixin, self
  1206. ).get_ifaces_on_undeployed_node(node, node_json,
  1207. networks_to_skip)
  1208. def get_ifaces_on_deployed_node(self, node, node_json, networks_to_skip):
  1209. """Retrieves list of network interfaces on the deployed node."""
  1210. if node.network_template:
  1211. self.get_ifaces_from_template_on_deployed_node(
  1212. node, node_json,
  1213. skip_private=consts.NETWORKS.private in networks_to_skip)
  1214. return
  1215. super(VerifyNetworksForTemplateMixin, self
  1216. ).get_ifaces_on_deployed_node(node, node_json, networks_to_skip)
  1217. class VerifyNetworksTask(VerifyNetworksForTemplateMixin,
  1218. BaseNetworkVerification):
  1219. def __init__(self, *args):
  1220. super(VerifyNetworksTask, self).__init__(*args)
  1221. self.subtasks = []
  1222. def add_subtask(self, subtask):
  1223. self.subtasks.append(subtask.get_message())
  1224. def get_message(self):
  1225. message = super(VerifyNetworksTask, self).get_message()
  1226. message['subtasks'] = self.subtasks
  1227. return message
  1228. class CheckDhcpTask(VerifyNetworksForTemplateMixin,
  1229. BaseNetworkVerification):
  1230. """Task for dhcp verification."""
  1231. class MulticastVerificationTask(BaseNetworkVerification):
  1232. def __init__(self, task):
  1233. corosync = task.cluster.attributes['editable']['corosync']
  1234. group = corosync['group']['value']
  1235. port = corosync['port']['value']
  1236. conf = {'group': group, 'port': port}
  1237. super(MulticastVerificationTask, self).__init__(task, conf)
  1238. def get_message_body(self):
  1239. # multicast verification should be done only for network which
  1240. # corosync uses for communication - management in our case
  1241. all_nics = objects.cluster.Cluster.get_ifaces_for_network_in_cluster(
  1242. self.task.cluster, 'management')
  1243. return {
  1244. 'nodes': [dict(self.config, iface=nic[1], uid=str(nic[0]))
  1245. for nic in all_nics]
  1246. }
  1247. @classmethod
  1248. def enabled(cls, cluster):
  1249. """Checks whether task is enabled
  1250. Multicast should be enabled only in case 'corosync' section
  1251. is present in editable attributes, which is not the case if cluster
  1252. was upgraded from 5.0
  1253. """
  1254. # TODO(dshulyak) enable it, when it will be possible to upgrade
  1255. # mcagent and network checker for old envs
  1256. return False
  1257. class CheckNetworksTask(object):
  1258. @classmethod
  1259. def execute(cls, task, data, check_all_parameters=False):
  1260. """Execute NetworkCheck task
  1261. :param task: Task instance
  1262. :param data: task data
  1263. :param check_all_parameters: bool flag to specify that all network
  1264. checks should be run. Without this flag only check for network
  1265. configuration parameters will be run. For now, check_all_parameters
  1266. is set to True only if task is executed from VerifyNetworks or
  1267. CheckBeforeDeployment tasks.
  1268. """
  1269. checker = NetworkCheck(task, data)
  1270. checker.check_configuration()
  1271. if check_all_parameters:
  1272. checker.check_network_template()
  1273. warn_msgs = checker.check_interface_mapping()
  1274. if warn_msgs:
  1275. task.result = {"warning": warn_msgs}
  1276. db().commit()
  1277. class CheckBeforeDeploymentTask(object):
  1278. @classmethod
  1279. def execute(cls, task):
  1280. fire_callback_on_before_deployment_check(
  1281. task.cluster, TaskHelper.nodes_to_deploy(task.cluster)
  1282. )
  1283. cls._check_nodes_are_online(task)
  1284. cls._check_nodes_roles(task)
  1285. cls._check_ceph(task)
  1286. cls._check_public_network(task)
  1287. cls._validate_network_template(task)
  1288. # TODO(asvechnikov): Make an appropriate versioning of tasks
  1289. if objects.Release.is_nfv_supported(task.cluster.release):
  1290. cls._check_sriov_properties(task)
  1291. if objects.Cluster.dpdk_enabled(task.cluster):
  1292. cls._check_dpdk_properties(task)
  1293. if objects.Release.is_external_mongo_enabled(task.cluster.release):
  1294. cls._check_mongo_nodes(task)
  1295. @classmethod
  1296. def _check_nodes_are_online(cls, task):
  1297. offline_nodes = db().query(Node).\
  1298. filter(Node.cluster == task.cluster).\
  1299. filter_by(online=False).\
  1300. filter_by(pending_deletion=False)
  1301. offline_nodes_not_ready = [n for n in offline_nodes
  1302. if n.status != consts.NODE_STATUSES.ready]
  1303. nodes_to_deploy = TaskHelper.nodes_to_deploy(task.cluster)
  1304. offline_nodes_to_redeploy = [
  1305. n for n in offline_nodes
  1306. if n.status == consts.NODE_STATUSES.ready and n in nodes_to_deploy]
  1307. if offline_nodes_not_ready or offline_nodes_to_redeploy:
  1308. node_names = ','.join(
  1309. map(lambda n: n.full_name,
  1310. offline_nodes_not_ready + offline_nodes_to_redeploy))
  1311. raise errors.NodeOffline(
  1312. u'Nodes "{0}" are offline.'
  1313. ' Remove them from environment '
  1314. 'and try again.'.format(node_names))
  1315. @classmethod
  1316. def _check_nodes_roles(cls, task):
  1317. cluster = task.cluster
  1318. models = objects.Cluster.get_restrictions_models(cluster)
  1319. nodes = objects.Cluster.get_nodes_not_for_deletion(cluster)
  1320. roles_metadata = objects.Cluster.get_roles(cluster)
  1321. for node in nodes:
  1322. roles = node.all_roles
  1323. # TODO(asvechnikov): move these methods out from validator
  1324. assignment.NodeAssignmentValidator.check_roles_for_conflicts(
  1325. roles, roles_metadata)
  1326. assignment.NodeAssignmentValidator.check_roles_requirement(
  1327. roles, roles_metadata, models)
  1328. @classmethod
  1329. def _check_ceph(cls, task):
  1330. storage = objects.Attributes.merged_attrs(
  1331. task.cluster.attributes
  1332. )['storage']
  1333. for option in storage:
  1334. if '_ceph' in option and\
  1335. storage[option] and\
  1336. storage[option]['value'] is True:
  1337. cls._check_ceph_osds(task)
  1338. return
  1339. @classmethod
  1340. def _check_ceph_osds(cls, task):
  1341. osd_count = len(filter(
  1342. lambda node: 'ceph-osd' in node.all_roles,
  1343. task.cluster.nodes))
  1344. osd_pool_size = int(objects.Attributes.merged_attrs(
  1345. task.cluster.attributes
  1346. )['storage']['osd_pool_size']['value'])
  1347. if osd_count < osd_pool_size:
  1348. raise errors.NotEnoughOsdNodes(
  1349. 'Number of OSD nodes (%s) cannot be less than '
  1350. 'the Ceph object replication factor (%s). '
  1351. 'Please either assign ceph-osd role to more nodes, '
  1352. 'or reduce Ceph replication factor in the Settings tab.' %
  1353. (osd_count, osd_pool_size))
  1354. @classmethod
  1355. def _check_public_network(cls, task):
  1356. all_public = \
  1357. objects.Cluster.should_assign_public_to_all_nodes(task.cluster)
  1358. public_networks = filter(
  1359. lambda ng: ng.name == 'public',
  1360. task.cluster.network_groups)
  1361. for public in public_networks:
  1362. nodes = objects.NodeCollection.get_by_group_id(public.group_id)
  1363. if all_public:
  1364. nodes_count = nodes.count()
  1365. else:
  1366. nodes_count = sum(
  1367. int(objects.Node.should_have_public_with_ip(node)) for
  1368. node in nodes)
  1369. vip_count = 0
  1370. if task.cluster.is_ha_mode and (
  1371. any('controller' in node.all_roles
  1372. for node in nodes)
  1373. ):
  1374. # 2 IPs are required for VIPs (1 for haproxy + 1 for vrouter)
  1375. vip_count = 2
  1376. if cls.__network_size(public) < nodes_count + vip_count:
  1377. error_message = cls.__format_network_error(public, nodes_count)
  1378. raise errors.NetworkCheckError(error_message)
  1379. @classmethod
  1380. def __network_size(cls, network):
  1381. return sum(len(netaddr.IPRange(ip_range.first, ip_range.last))
  1382. for ip_range in network.ip_ranges)
  1383. @classmethod
  1384. def __format_network_error(cls, public, nodes_count):
  1385. return 'Not enough IP addresses. Public network {0} must have ' \
  1386. 'at least {1} IP addresses '.format(public.cidr, nodes_count) + \
  1387. 'for the current environment.'
  1388. @classmethod
  1389. def _check_mongo_nodes(cls, task):
  1390. """Check for mongo nodes presence in env with external mongo."""
  1391. components = objects.Attributes.merged_attrs(
  1392. task.cluster.attributes).get("additional_components", None)
  1393. if (components and components["ceilometer"]["value"] and
  1394. components["mongo"]["value"] and
  1395. len(objects.Cluster.get_nodes_by_role(
  1396. task.cluster, 'mongo')) > 0):
  1397. raise errors.ExtMongoCheckerError
  1398. if (components and components["ceilometer"]["value"] and not
  1399. components["mongo"]["value"] and
  1400. len(objects.Cluster.get_nodes_by_role(
  1401. task.cluster, 'mongo')) == 0):
  1402. raise errors.MongoNodesCheckError
  1403. @classmethod
  1404. def _validate_network_template(cls, task):
  1405. cluster = task.cluster
  1406. if not cluster.network_config.configuration_template:
  1407. return
  1408. template = (cluster.network_config.configuration_template
  1409. ['adv_net_template'])
  1410. # following loop does two things: checking that networks of each
  1411. # network group from the template belongs to those of particular
  1412. # node group of the cluster and cumulating node roles from the template
  1413. # for further check
  1414. template_node_roles = set()
  1415. for node_group in cluster.node_groups:
  1416. template_for_node_group = (
  1417. template[node_group.name] if node_group.name in template
  1418. else template['default']
  1419. )
  1420. required_nets = set(template_for_node_group['network_assignments'])
  1421. ng_nets = set(ng.name for ng in node_group.networks)
  1422. # Admin net doesn't have a nodegroup so must be added to
  1423. # the default group
  1424. if node_group.is_default:
  1425. ng_nets.add(consts.NETWORKS.fuelweb_admin)
  1426. missing_nets = required_nets - ng_nets
  1427. if missing_nets:
  1428. error_msg = ('The following network groups are missing: {0} '
  1429. 'from node group {1} and are required by the '
  1430. 'current network '
  1431. 'template.'.format(
  1432. ','.join(missing_nets),
  1433. node_group.name)
  1434. )
  1435. raise errors.NetworkTemplateMissingNetworkGroup(error_msg)
  1436. template_node_roles.update(
  1437. template_for_node_group['templates_for_node_role'])
  1438. cls._check_dpdk_network_scheme(
  1439. template_for_node_group['network_scheme'], node_group)
  1440. cluster_roles = objects.Cluster.get_assigned_roles(cluster)
  1441. missing_roles = cluster_roles - template_node_roles
  1442. if missing_roles:
  1443. error_roles = ', '.join(missing_roles)
  1444. error_msg = ('Node roles {0} are missing from '
  1445. 'network configuration template').format(error_roles)
  1446. raise errors.NetworkTemplateMissingRoles(error_msg)
  1447. # TODO(akostrikov) https://bugs.launchpad.net/fuel/+bug/1561485
  1448. @classmethod
  1449. def _check_sriov_properties(self, task):
  1450. # check hypervisor type
  1451. h_type = objects.Cluster.get_editable_attributes(
  1452. task.cluster)['common']['libvirt_type']['value']
  1453. for node in task.cluster.nodes:
  1454. if not objects.Node.sriov_enabled(node):
  1455. continue
  1456. if h_type != consts.HYPERVISORS.kvm:
  1457. raise errors.InvalidData(
  1458. 'Only KVM hypervisor works with SRIOV.')
  1459. @classmethod
  1460. def _check_dpdk_network_scheme(cls, network_scheme, node_group):
  1461. """DPDK endpoint provider check
  1462. Check that endpoint with dpdk provider mapped only to neutron/private
  1463. """
  1464. for net_template in network_scheme.values():
  1465. roles = net_template['roles']
  1466. endpoints = set()
  1467. for transformation in net_template['transformations']:
  1468. if (
  1469. transformation.get('provider') ==
  1470. consts.NEUTRON_L23_PROVIDERS.dpdkovs
  1471. ):
  1472. endpoints.add(transformation['bridge'])
  1473. if not endpoints:
  1474. continue
  1475. if len(endpoints) > 1:
  1476. raise errors.NetworkCheckError(
  1477. 'dpdkovs provider can be assigned only for one endpoint.'
  1478. ' You trying for {}: {}'.format(len(endpoints),
  1479. ', '.join(endpoints))
  1480. )
  1481. endpoint_roles = collections.defaultdict(set)
  1482. for role_name, endpoint in roles.items():
  1483. endpoint_roles[endpoint].add(role_name)
  1484. endpoint = endpoints.pop()
  1485. if endpoint_roles[endpoint] != {'neutron/private'}:
  1486. raise errors.NetworkCheckError(
  1487. "Only neutron/private network role could be assigned to"
  1488. " node group '{}' with DPDK".format(
  1489. node_group.name)
  1490. )
  1491. @classmethod
  1492. def _check_dpdk_properties(self, task):
  1493. dpdk_enabled = False
  1494. for node in objects.Cluster.get_nodes_not_for_deletion(task.cluster):
  1495. if (not objects.NodeAttributes.is_cpu_pinning_enabled(node) and
  1496. not objects.Node.dpdk_enabled):
  1497. continue
  1498. try:
  1499. objects.NodeAttributes.distribute_node_cpus(node)
  1500. except ValueError as e:
  1501. raise errors.InvalidData(
  1502. "Node '{}': {}".format(node.id, e.message))
  1503. if not objects.Node.dpdk_enabled(node):
  1504. continue
  1505. dpdk_enabled = True
  1506. if not objects.NodeAttributes.is_dpdk_hugepages_enabled(node):
  1507. raise errors.InvalidData(
  1508. "Hugepages for DPDK are not configured"
  1509. " for node '{}'".format(node.id))
  1510. if not objects.NodeAttributes.is_nova_hugepages_enabled(node):
  1511. raise errors.InvalidData(
  1512. "Hugepages for Nova are not configured"
  1513. " for node '{}'".format(node.id))
  1514. if dpdk_enabled:
  1515. # check hypervisor type
  1516. h_type = objects.Cluster.get_editable_attributes(
  1517. task.cluster)['common']['libvirt_type']['value']
  1518. if h_type != consts.HYPERVISORS.kvm:
  1519. raise errors.InvalidData(
  1520. 'Only KVM hypervisor works with DPDK.')
  1521. class DumpTask(object):
  1522. @classmethod
  1523. def conf(cls, auth_token=None):
  1524. logger.debug("Preparing config for snapshot")
  1525. nodes = db().query(Node).filter(
  1526. Node.status.in_(['ready', 'provisioned', 'deploying',
  1527. 'error', 'stopped'])
  1528. ).all()
  1529. dump_conf = deepcopy(settings.DUMP)
  1530. if auth_token:
  1531. dump_conf['auth-token'] = auth_token
  1532. for node in nodes:
  1533. if node.cluster is None:
  1534. logger.info("Node {id} is not assigned to an environment, "
  1535. "falling back to root".format(id=node.id))
  1536. ssh_user = "root"
  1537. else:
  1538. editable_attrs = objects.Cluster.get_editable_attributes(
  1539. node.cluster
  1540. )
  1541. try:
  1542. ssh_user = editable_attrs['service_user']['name']['value']
  1543. except KeyError:
  1544. logger.info("Environment {env} doesn't support non-root "
  1545. "accounts on the slave nodes, falling back "
  1546. "to root for node-{node}".format(
  1547. env=node.cluster_id,
  1548. node=node.id))
  1549. ssh_user = "root"
  1550. host = {
  1551. 'hostname': objects.Node.get_slave_name(node),
  1552. 'address': node.ip,
  1553. 'ssh-user': ssh_user,
  1554. 'ssh-key': settings.SHOTGUN_SSH_KEY,
  1555. }
  1556. # save controllers
  1557. if 'controller' in node.roles:
  1558. dump_conf['dump']['controller']['hosts'].append(host)
  1559. # save slaves
  1560. dump_conf['dump']['slave']['hosts'].append(host)
  1561. if 'controller' in dump_conf['dump'] and \
  1562. not dump_conf['dump']['controller']['hosts']:
  1563. del dump_conf['dump']['controller']
  1564. if 'slave' in dump_conf['dump'] and \
  1565. not dump_conf['dump']['slave']['hosts']:
  1566. del dump_conf['dump']['slave']
  1567. # render postgres connection data in dump settings
  1568. dump_conf['dump']['local']['objects'].append({
  1569. 'type': 'postgres',
  1570. 'dbhost': settings.DATABASE['host'],
  1571. 'dbname': settings.DATABASE['name'],
  1572. 'username': settings.DATABASE['user'],
  1573. 'password': settings.DATABASE['passwd'],
  1574. })
  1575. # render cobbler coonection data in dump settings
  1576. # NOTE: we no need user/password for cobbler
  1577. dump_conf['dump']['local']['objects'].append({
  1578. 'type': 'xmlrpc',
  1579. 'server': settings.COBBLER_URL,
  1580. 'methods': [
  1581. 'get_distros',
  1582. 'get_profiles',
  1583. 'get_systems',
  1584. ],
  1585. 'to_file': 'cobbler.txt',
  1586. })
  1587. logger.debug("Dump conf: %s", str(dump_conf))
  1588. return dump_conf
  1589. @classmethod
  1590. def execute(cls, task, conf=None, auth_token=None):
  1591. logger.debug("DumpTask: task={0}".format(task.uuid))
  1592. message = make_astute_message(
  1593. task,
  1594. 'dump_environment',
  1595. 'dump_environment_resp',
  1596. {
  1597. 'settings': conf or cls.conf(auth_token)
  1598. }
  1599. )
  1600. db().commit()
  1601. rpc.cast('naily', message)
  1602. class GenerateCapacityLogTask(object):
  1603. @classmethod
  1604. def execute(cls, task):
  1605. logger.debug("GenerateCapacityLogTask: task=%s" % task.uuid)
  1606. unallocated_nodes = db().query(Node).filter_by(cluster_id=None).count()
  1607. # Use Node.cluster_id != (None) for PEP-8 accordance.
  1608. allocated_nodes = db().query(Node).\
  1609. filter(Node.cluster_id != (None)).count()
  1610. node_allocation = db().query(Cluster, func.count(Node.id)).\
  1611. outerjoin(Node).group_by(Cluster)
  1612. env_stats = []
  1613. for allocation in node_allocation:
  1614. env_stats.append({'cluster': allocation[0].name,
  1615. 'nodes': allocation[1]})
  1616. allocation_stats = {'allocated': allocated_nodes,
  1617. 'unallocated': unallocated_nodes}
  1618. fuel_data = {
  1619. "release": settings.VERSION['release'],
  1620. "uuid": settings.FUEL_KEY
  1621. }
  1622. roles_stat = {}
  1623. for node in db().query(Node):
  1624. if node.roles:
  1625. roles_list = '+'.join(sorted(node.roles))
  1626. if roles_list in roles_stat:
  1627. roles_stat[roles_list] += 1
  1628. else:
  1629. roles_stat[roles_list] = 1
  1630. capacity_data = {'environment_stats': env_stats,
  1631. 'allocation_stats': allocation_stats,
  1632. 'fuel_data': fuel_data,
  1633. 'roles_stat': roles_stat}
  1634. capacity_log = CapacityLog()
  1635. capacity_log.report = capacity_data
  1636. db().add(capacity_log)
  1637. db().flush()
  1638. task.result = {'log_id': capacity_log.id}
  1639. task.status = 'ready'
  1640. task.progress = '100'
  1641. db().commit()
  1642. class CheckRepoAvailability(BaseNetworkVerification):
  1643. def get_message(self):
  1644. rpc_message = make_astute_message(
  1645. self.task,
  1646. "check_repositories",
  1647. "check_repositories_resp",
  1648. {
  1649. "nodes": self._get_nodes_to_check(),
  1650. "urls": objects.Cluster.get_repo_urls(self.task.cluster),
  1651. }
  1652. )
  1653. return rpc_message
  1654. def execute(self):
  1655. db().commit()
  1656. rpc.cast('naily', self.get_message())
  1657. def _get_nodes_to_check(self):
  1658. nodes = [{'uid': consts.MASTER_NODE_UID}]
  1659. for n in objects.Cluster.get_nodes_not_for_deletion(self.task.cluster):
  1660. if n.online:
  1661. nodes.append({'uid': n.id})
  1662. return nodes
  1663. class CheckRepoAvailabilityWithSetup(object):
  1664. def __init__(self, task, config):
  1665. self.task = task
  1666. self.config = config
  1667. @classmethod
  1668. def get_config(cls, cluster):
  1669. urls = objects.Cluster.get_repo_urls(cluster)
  1670. nodes = []
  1671. errors = []
  1672. # if there is nothing to verify - just skip this task
  1673. if not urls:
  1674. return
  1675. all_public = \
  1676. objects.Cluster.should_assign_public_to_all_nodes(cluster)
  1677. public_networks = filter(
  1678. lambda ng: ng.name == 'public', cluster.network_groups)
  1679. for public in public_networks:
  1680. # we are not running this verification for nodes not in discover
  1681. # state
  1682. nodes_with_public_ip = []
  1683. required_ips = 0
  1684. group_nodes = objects.NodeCollection.filter_by(
  1685. None, group_id=public.group_id,
  1686. status=consts.NODE_STATUSES.discover).all()
  1687. for node in group_nodes:
  1688. if not (all_public or
  1689. objects.Node.should_have_public_with_ip(node)):
  1690. continue
  1691. ip = NetworkManager.get_ip_by_network_name(node, public.name)
  1692. nodes_with_public_ip.append((node, ip))
  1693. if ip is None:
  1694. required_ips += 1
  1695. if not nodes_with_public_ip:
  1696. continue
  1697. # we are not doing any allocations during verification
  1698. # just ask for free ips and use them
  1699. free_ips = iter(NetworkManager.get_free_ips(public, required_ips))
  1700. mask = public.cidr.split('/')[1]
  1701. lacp_modes = (
  1702. consts.BOND_MODES.lacp_balance_tcp,
  1703. consts.BOND_MODES.l_802_3ad)
  1704. for node, ip in nodes_with_public_ip:
  1705. if not node.online:
  1706. continue
  1707. iface = NetworkManager.find_nic_assoc_with_ng(
  1708. node, public)
  1709. if iface.bond and iface.bond.mode in lacp_modes:
  1710. errors.append(
  1711. 'Iface {0} on node {1} configured to use '
  1712. 'lacp-balance-tcp mode as part of {2}. Repo '
  1713. 'availability verification for this node '
  1714. 'will be skipped.'.format(
  1715. iface.name, node.name, iface.bond.name))
  1716. continue
  1717. ip = ip or next(free_ips)
  1718. node_config = {
  1719. 'addr': '{0}/{1}'.format(ip, mask),
  1720. 'gateway': public.gateway,
  1721. 'vlan': public.vlan_start or 0,
  1722. 'iface': iface.name,
  1723. 'urls': urls,
  1724. 'uid': node.uid}
  1725. nodes.append(node_config)
  1726. # if no nodes will be present - we will skip this task
  1727. return nodes, errors
  1728. def get_message(self):
  1729. return make_astute_message(
  1730. self.task,
  1731. "check_repositories_with_setup",
  1732. "check_repositories_with_setup_resp",
  1733. {
  1734. "nodes": self.config
  1735. }
  1736. )
  1737. class CreateStatsUserTask(object):
  1738. @classmethod
  1739. def message(cls, task, primary_controller):
  1740. rpc_message = make_astute_message(
  1741. task,
  1742. 'execute_tasks',
  1743. 'stats_user_resp',
  1744. {
  1745. 'tasks': [{
  1746. 'type': consts.ORCHESTRATOR_TASK_TYPES.puppet,
  1747. 'uids': [primary_controller.id],
  1748. 'parameters': {
  1749. 'puppet_modules': '/etc/puppet/modules',
  1750. 'puppet_manifest': '/etc/puppet/modules/osnailyfacter'
  1751. '/modular/keystone'
  1752. '/workloads_collector_add.pp',
  1753. 'cwd': '/',
  1754. }
  1755. }]
  1756. }
  1757. )
  1758. return rpc_message
  1759. @classmethod
  1760. def execute(cls, task, primary_controller):
  1761. db().commit()
  1762. rpc.cast(
  1763. 'naily',
  1764. cls.message(task, primary_controller)
  1765. )
  1766. class RemoveStatsUserTask(object):
  1767. @classmethod
  1768. def message(cls, task, primary_controller):
  1769. rpc_message = make_astute_message(
  1770. task,
  1771. 'execute_tasks',
  1772. 'stats_user_resp',
  1773. {
  1774. 'tasks': [{
  1775. 'type': consts.ORCHESTRATOR_TASK_TYPES.puppet,
  1776. 'uids': [primary_controller.id],
  1777. 'parameters': {
  1778. 'puppet_modules': '/etc/puppet/modules',
  1779. 'puppet_manifest': '/etc/puppet/modules/osnailyfacter'
  1780. '/modular/keystone'
  1781. '/workloads_collector_remove.pp',
  1782. 'cwd': '/',
  1783. }
  1784. }]
  1785. }
  1786. )
  1787. return rpc_message
  1788. @classmethod
  1789. def execute(cls, task, primary_controller):
  1790. db().commit()
  1791. rpc.cast(
  1792. 'naily',
  1793. cls.message(task, primary_controller)
  1794. )
  1795. class UpdateOpenstackConfigTask(BaseDeploymentTask):
  1796. @staticmethod
  1797. def task_deploy(transaction, nodes, tasks, task_ids):
  1798. # TODO(akostrikov) https://bugs.launchpad.net/fuel/+bug/1561485
  1799. directory, graph = task_based_deployment.TasksSerializer.serialize(
  1800. transaction.cluster, nodes, tasks, task_ids=task_ids
  1801. )
  1802. return make_astute_message(
  1803. transaction, "task_deploy", "update_config_resp", {
  1804. "tasks_directory": directory,
  1805. "tasks_graph": graph
  1806. }
  1807. )
  1808. @staticmethod
  1809. def granular_deploy(transaction, nodes, tasks, task_ids):
  1810. graph = orchestrator_graph.AstuteGraph(transaction.cluster, tasks)
  1811. graph.only_tasks(task_ids)
  1812. deployment_tasks = graph.stage_tasks_serialize(
  1813. graph.graph.topology, nodes
  1814. )
  1815. return make_astute_message(
  1816. transaction, 'execute_tasks', 'update_config_resp', {
  1817. 'tasks': deployment_tasks,
  1818. })
  1819. @classmethod
  1820. def message(cls, task, nodes, graph_type, **kwargs):
  1821. configs = objects.OpenstackConfigCollection.find_configs_for_nodes(
  1822. task.cluster, nodes)
  1823. updated_configs = set()
  1824. for config in configs:
  1825. updated_configs.update(config.configuration)
  1826. if updated_configs:
  1827. updated_configs.add('*') # '*' means any config
  1828. else:
  1829. raise errors.NoChanges()
  1830. refreshable_tasks = objects.Cluster.get_refreshable_tasks(
  1831. task.cluster, updated_configs, graph_type
  1832. )
  1833. task_ids = {t['id'] for t in refreshable_tasks}
  1834. deployment_tasks = objects.Cluster.get_deployment_tasks(
  1835. task.cluster, graph_type)
  1836. return cls.call_deployment_method(
  1837. task, tasks=deployment_tasks, nodes=nodes, task_ids=task_ids
  1838. )[1]
  1839. if settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP:
  1840. rpc.cast = fake_cast