Fuel plugin for Mellanox support
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mellanox_settings.py 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. #!/usr/bin/python
  2. # Copyright 2016 Mellanox Technologies, Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13. # implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import os
  17. import sys
  18. import subprocess
  19. import yaml
  20. import glob
  21. import logging
  22. import traceback
  23. MAX_NUM_VFS = 16
  24. MLNX_SECTION = 'mellanox-plugin'
  25. SETTINGS_FILE = '/etc/astute.yaml'
  26. PLUGIN_OVERRIDE_FILE = '/etc/hiera/override/plugins.yaml'
  27. MLNX_DRIVERS_LIST = { 'ConnectX-3': {'eth_driver' : 'mlx4_en', 'ib_driver' : 'eth_ipoib'},
  28. 'ConnectX-4': {'eth_driver' : 'mlx5_core', 'ib_driver' : 'eth_ipoib'}}
  29. MLNX_DRIVERS = set([MLNX_DRIVERS_LIST[card][net]
  30. for card in MLNX_DRIVERS_LIST
  31. for net in MLNX_DRIVERS_LIST[card]])
  32. ETH_DRIVERS = set([MLNX_DRIVERS_LIST[card][net]
  33. for card in MLNX_DRIVERS_LIST
  34. for net in MLNX_DRIVERS_LIST[card]
  35. if net == 'eth_driver'])
  36. IB_DRIVERS = MLNX_DRIVERS - ETH_DRIVERS
  37. ISER_IFC_NAME = 'mlnx_iser0'
  38. LOG_FILE = '/var/log/mellanox-plugin.log'
  39. class MellanoxSettingsException(Exception):
  40. pass
  41. class MellanoxSettings(object):
  42. data = None
  43. mlnx_interfaces_section = None
  44. @classmethod
  45. def get_mlnx_section(cls):
  46. if cls.data is None:
  47. raise MellanoxSettingsException("No YAML file loaded")
  48. if MLNX_SECTION not in cls.data:
  49. raise MellanoxSettingsException(
  50. "Couldn't find section '{0}'".format(MLNX_SECTION)
  51. )
  52. return cls.data[MLNX_SECTION]
  53. @classmethod
  54. def get_bridge_for_network(cls, network):
  55. network_to_bridge = {
  56. 'private': 'prv',
  57. 'management': 'mgmt',
  58. 'storage': 'storage',
  59. }
  60. return 'br-{0}'.format(network_to_bridge[network])
  61. @classmethod
  62. def get_interface_by_network(cls, network):
  63. if network not in ('management', 'storage', 'private'):
  64. raise MellanoxSettingsException("Unknown network: {0}".format(network))
  65. mlnx_interfaces_section = cls.mlnx_interfaces_section
  66. ifc = mlnx_interfaces_section[network]['interface']
  67. return ifc
  68. @classmethod
  69. def get_card_type(cls, driver):
  70. for card in MLNX_DRIVERS_LIST.keys():
  71. if driver in MLNX_DRIVERS_LIST[card].values():
  72. network_driver_type = MLNX_DRIVERS_LIST[card].keys()[MLNX_DRIVERS_LIST[card].values()\
  73. .index(driver)]
  74. return card
  75. @classmethod
  76. def add_cx_card(cls):
  77. mlnx_interfaces = cls.mlnx_interfaces_section
  78. drivers = list()
  79. interfaces = list()
  80. mlnx = cls.get_mlnx_section()
  81. for network_type, ifc_dict in mlnx_interfaces.iteritems():
  82. if 'driver' in ifc_dict and network_type in ['private','management','storage']:
  83. # The bond interfaces extend the original list,
  84. # otherwise, the interface is appended to the list.
  85. if(type(ifc_dict['driver']) is list):
  86. drivers.extend(ifc_dict['driver'])
  87. else:
  88. drivers.append(ifc_dict['driver'])
  89. if(type(ifc_dict['interface']) is list):
  90. interfaces.extend(ifc_dict['interface'])
  91. else:
  92. interfaces.append(ifc_dict['interface'])
  93. drivers_set = list(set(drivers))
  94. interfaces_set = list(set(interfaces))
  95. if (len(drivers_set) > 1):
  96. logging.error("Multiple ConnectX adapters was found in this environment.")
  97. raise MellanoxSettingsException(
  98. "Multiple ConnectX adapters was found in this environment."
  99. )
  100. else:
  101. current_driver = drivers_set[0]
  102. mellanox_interface = interfaces_set[0]
  103. if current_driver in ETH_DRIVERS:
  104. mlnx['network_type'] = 'ethernet'
  105. mlnx['cx_card'] = cls.get_card_type(current_driver)
  106. elif current_driver in IB_DRIVERS:
  107. mlnx['network_type'] = 'infiniband'
  108. ibdev = os.popen('ibdev2netdev').readlines()
  109. if not ibdev:
  110. mlnx['cx_card'] = 'none'
  111. logging.error('Failed executing ibdev2netdev')
  112. return 0
  113. if ('bonds' in cls.data and mellanox_interface.startswith('bond')):
  114. mellanox_interface = cls.data['bonds'][mellanox_interface]['interfaces'][0]
  115. interface_line = [l for l in ibdev if mellanox_interface in l]
  116. if interface_line and 'mlx5' in interface_line.pop():
  117. mlnx['cx_card'] = 'ConnectX-4'
  118. else:
  119. mlnx['cx_card'] = 'ConnectX-3'
  120. network_info_msg = 'Detected Network Type is: {0} '.format(mlnx['network_type'])
  121. card_info_msg = 'Detected Card Type is: {0} '.format(mlnx['cx_card'])
  122. logging.info(network_info_msg)
  123. logging.info(card_info_msg)
  124. @classmethod
  125. def add_driver(cls):
  126. interfaces = cls.get_interfaces_section()
  127. mlnx = cls.get_mlnx_section()
  128. drivers = cls.get_physical_interfaces()
  129. if len(drivers) > 1:
  130. raise MellanoxSettingsException(
  131. "Found mismatching Mellanox drivers on different interfaces: "
  132. "{0}".format(mlnx_drivers)
  133. )
  134. if len(drivers) == 0:
  135. raise MellanoxSettingsException(
  136. "\nNo Network role was assigned to Mellanox interfaces. "
  137. "\nPlease go to nodes tab in Fuel UI and reset your network "
  138. "roles in interfaces screen. aborting. "
  139. )
  140. mlnx['driver'] = drivers[0]
  141. @classmethod
  142. def add_physical_port(cls):
  143. interfaces = cls.get_interfaces_section()
  144. mlnx = cls.get_mlnx_section()
  145. private_ifc = cls.get_interface_by_network('private')
  146. if mlnx['driver'] == MLNX_DRIVERS_LIST[mlnx['cx_card']]['ib_driver']:
  147. if 'bus_info' not in interfaces[private_ifc]['vendor_specific']:
  148. raise MellanoxSettingsException(
  149. "Couldn't find 'bus_info' for interface "
  150. "{0}".format(private_ifc)
  151. )
  152. mlnx['physical_port'] = interfaces[private_ifc]['vendor_specific']['bus_info']
  153. elif mlnx['driver'] == MLNX_DRIVERS_LIST[mlnx['cx_card']]['eth_driver']:
  154. # If only iSER
  155. if not cls.is_sriov_enabled() and cls.is_iser_enabled():
  156. mlnx = cls.get_mlnx_section()
  157. storage_ifc = cls.get_interface_by_network('storage')
  158. mlnx['physical_port'] = storage_ifc
  159. # If SR-IOV
  160. else:
  161. mlnx['physical_port'] = private_ifc
  162. @classmethod
  163. def add_storage_vlan(cls):
  164. mlnx = cls.get_mlnx_section()
  165. mlnx_interfaces_section = cls.mlnx_interfaces_section
  166. vlan = mlnx_interfaces_section['storage']['vlan']
  167. # Set storage vlan in mlnx section if vlan is used with iser
  168. if vlan:
  169. try:
  170. mlnx['storage_vlan'] = int(vlan)
  171. except ValueError:
  172. raise MellanoxSettingsException(
  173. "Failed reading vlan for br-storage"
  174. )
  175. if mlnx['driver'] == MLNX_DRIVERS_LIST[mlnx['cx_card']]['ib_driver']:
  176. pkey = format((int(vlan) ^ 0x8000),'04x')
  177. mlnx['storage_pkey'] = pkey
  178. @classmethod
  179. def add_storage_parent(cls):
  180. mlnx = cls.get_mlnx_section()
  181. storage_ifc = cls.get_interface_by_network('storage')
  182. mlnx['storage_parent'] = storage_ifc
  183. @classmethod
  184. def add_iser_interface_name(cls):
  185. mlnx = cls.get_mlnx_section()
  186. storage_ifc = cls.get_interface_by_network('storage')
  187. if mlnx['driver'] == MLNX_DRIVERS_LIST[mlnx['cx_card']]['eth_driver']:
  188. mlnx['iser_ifc_name'] = ISER_IFC_NAME
  189. elif mlnx['driver'] == MLNX_DRIVERS_LIST[mlnx['cx_card']]['ib_driver']:
  190. interfaces = cls.get_interfaces_section()
  191. mlnx['iser_ifc_name'] = interfaces[storage_ifc]['vendor_specific']['bus_info']
  192. else:
  193. raise MellanoxSettingsException("Could not find 'driver' in "
  194. "{0} section".format(MLNX_SECTION))
  195. @classmethod
  196. def set_storage_networking_scheme(cls):
  197. endpoints = cls.get_endpoints_section()
  198. interfaces = cls.get_interfaces_section()
  199. transformations = cls.data['network_scheme']['transformations']
  200. mlnx = cls.get_mlnx_section()
  201. for transformation in transformations:
  202. if ('bridges' in transformation) and ('br-storage' in transformation['bridges']):
  203. transformations.remove(transformation)
  204. elif ('name' in transformation) and ('br-storage' == transformation['name']) \
  205. and ('action' in transformation) and ('add-br' == transformation['action']):
  206. transformations.remove(transformation)
  207. # Handle iSER interface with and w/o vlan tagging
  208. storage_vlan = mlnx.get('storage_vlan')
  209. storage_parent = cls.get_interface_by_network('storage')
  210. if storage_vlan and mlnx['driver'] == MLNX_DRIVERS_LIST[mlnx['cx_card']]['eth_driver']: # Use VLAN dev
  211. vlan_name = "{0}.{1}".format(ISER_IFC_NAME, storage_vlan)
  212. # Set storage rule to iSER interface vlan interface
  213. cls.data['network_scheme']['roles']['storage'] = vlan_name
  214. # Set iSER interface vlan interface
  215. transformations.append({
  216. 'action': 'add-port',
  217. 'name': vlan_name,
  218. 'vlan_id': int(storage_vlan),
  219. 'vlan_dev': ISER_IFC_NAME,
  220. 'mtu': '1500'
  221. })
  222. endpoints[vlan_name] = (
  223. endpoints.pop('br-storage', {})
  224. )
  225. else:
  226. vlan_name = mlnx['iser_ifc_name']
  227. # Commented until fixing bug LP #1450420
  228. # Meanwhile using a workaround of configuring ib0
  229. # and changing to its child in post deployment
  230. #if storage_vlan: # IB child
  231. # vlan_name = mlnx['iser_ifc_name'] = \
  232. # "{0}.{1}".format(mlnx['iser_ifc_name'],
  233. # mlnx['storage_pkey'])
  234. # Set storage rule to iSER port
  235. cls.data['network_scheme']['roles']['storage'] = \
  236. mlnx['iser_ifc_name']
  237. # Set iSER endpoint with br-storage parameters
  238. endpoints[mlnx['iser_ifc_name']] = (
  239. endpoints.pop('br-storage', {})
  240. )
  241. interfaces[mlnx['iser_ifc_name']] = {}
  242. # Set role
  243. for role,bridge in cls.data['network_scheme']['roles'].iteritems():
  244. if bridge == 'br-storage':
  245. cls.data['network_scheme']['roles'][role] = vlan_name
  246. # Clean
  247. if storage_vlan: \
  248. storage_parent = "{0}.{1}".format(storage_parent, storage_vlan)
  249. for transformation in transformations:
  250. if ('name' in transformation) and (transformation['name'] == storage_parent) \
  251. and ('bridge' in transformation) and (transformation['bridge'] == 'br-storage') \
  252. and ('action' in transformation) and (transformation['action'] == 'add-port'):
  253. transformations.remove(transformation)
  254. endpoints['br-storage'] = {'IP' : 'None'}
  255. @classmethod
  256. def get_endpoints_section(cls):
  257. return cls.data['network_scheme']['endpoints']
  258. @classmethod
  259. def get_physical_interfaces(cls):
  260. # the main change will be here because it reads phy_interfaces
  261. mlnx_interfaces = cls.mlnx_interfaces_section
  262. drivers = list()
  263. mlnx = cls.get_mlnx_section()
  264. for network_type, ifc_dict in mlnx_interfaces.iteritems():
  265. if 'driver' in ifc_dict and \
  266. ifc_dict['driver'] in MLNX_DRIVERS_LIST[mlnx['cx_card']].values():
  267. drivers.append(ifc_dict['driver'])
  268. return list(set(drivers))
  269. @classmethod
  270. def get_interfaces_section(cls):
  271. return cls.data['network_scheme']['interfaces']
  272. @classmethod
  273. def is_iser_enabled(cls):
  274. return cls.get_mlnx_section()['iser']
  275. @classmethod
  276. def is_sriov_enabled(cls):
  277. return cls.get_mlnx_section()['sriov']
  278. @classmethod
  279. def is_vxlan_offloading_enabled(cls):
  280. return cls.get_mlnx_section()['vxlan_offloading']
  281. @classmethod
  282. def add_reboot_condition(cls):
  283. # if MAX_NUM_VF > default which is 16, reboot
  284. mlnx = cls.get_mlnx_section()
  285. mst_start = os.popen('mst start;').readlines()
  286. burned_num_vfs_list = list()
  287. devices = os.popen('mst status -v| grep pciconf | grep {0} | awk \'{{print $2}}\' '.format(
  288. mlnx['cx_card'].replace("-",""))).readlines()
  289. if len(devices) > 0:
  290. for dev in devices:
  291. num = os.popen('mlxconfig -d {0} q | grep NUM_OF_VFS | awk \'{{print $2}}\' \
  292. '.format(dev.rsplit()[0])).readlines()
  293. if len(num) > 0:
  294. burned_num_vfs_list.append(num[0].rsplit()[0])
  295. else:
  296. logging.error("Failed to grep NUM_OF_VFS from Mellanox card")
  297. sys.exit(1)
  298. burned_num_vfs_set_list = list(set(burned_num_vfs_list))
  299. for burned_num_vfs in burned_num_vfs_set_list :
  300. if int(burned_num_vfs) < int(mlnx['num_of_vfs']) :
  301. mlnx['reboot_required'] = True
  302. logging.info('reboot_required is true as {0} is < {1}'.format(burned_num_vfs,
  303. mlnx['num_of_vfs']))
  304. break;
  305. else:
  306. logging.error("There are no Mellanox devices with {0} card".format(mlnx['cx_card']))
  307. sys.exit(1)
  308. @classmethod
  309. def update_role_settings(cls):
  310. # detect ConnectX card
  311. cls.add_cx_card()
  312. # realize the driver in use (eth/ib)
  313. cls.add_driver()
  314. # decide the physical function for SR-IOV
  315. cls.add_physical_port()
  316. # set iSER parameters
  317. if cls.is_iser_enabled():
  318. cls.add_storage_parent()
  319. cls.add_storage_vlan()
  320. cls.add_iser_interface_name()
  321. cls.set_storage_networking_scheme()
  322. # fill reboot condition
  323. cls.add_reboot_condition()
  324. @classmethod
  325. def read_from_yaml(cls, settings_file):
  326. try:
  327. fd = open(settings_file, 'r')
  328. except IOError:
  329. raise MellanoxSettingsException("Given YAML file {0} doesn't "
  330. "exist".format(settings_file))
  331. try:
  332. data = yaml.load(fd)
  333. except yaml.YAMLError, exc:
  334. if hasattr(exc, 'problem_mark'):
  335. mark = exc.problem_mark
  336. raise MellanoxSettingsException(
  337. "Faild parsing YAML file {0}: error position "
  338. "({2},{3})".format(mark.line+1, mark.column+1)
  339. )
  340. finally:
  341. fd.close()
  342. cls.data = data
  343. cls.mlnx_interfaces_section = cls.get_mlnx_interfaces_section()
  344. @classmethod
  345. def write_to_yaml(cls, settings_file):
  346. # choose only the edited sections
  347. data = {}
  348. data['network_scheme'] = cls.data['network_scheme']
  349. data[MLNX_SECTION] = cls.data[MLNX_SECTION]
  350. # create containing adir
  351. try:
  352. settings_dir = os.path.dirname(settings_file)
  353. if not os.path.isdir(settings_dir):
  354. os.makedirs(settings_dir)
  355. except OSError:
  356. raise MellanoxSettingsException(
  357. "Failed creating directory: {0}".format(settings_dir)
  358. )
  359. try:
  360. fd = open(settings_file, 'w')
  361. yaml.dump(data, fd, default_flow_style=False)
  362. except IOError:
  363. raise MellanoxSettingsException("Failed writing changes to "
  364. "{0}".format(settings_file))
  365. finally:
  366. if fd:
  367. fd.close()
  368. @classmethod
  369. def update_settings(cls):
  370. # define input yaml file
  371. try:
  372. cls.read_from_yaml(SETTINGS_FILE)
  373. cls.update_role_settings()
  374. cls.write_to_yaml(PLUGIN_OVERRIDE_FILE)
  375. except MellanoxSettingsException, exc:
  376. error_msg = "Couldn't add Mellanox settings to " \
  377. "{0}: {1}\n".format(SETTINGS_FILE, exc)
  378. sys.stderr.write(error_msg)
  379. logging.error(error_msg)
  380. raise MellanoxSettingsException("Failed updating one or more "
  381. "setting files")
  382. @classmethod
  383. def get_mlnx_interfaces_section(cls):
  384. transformations = cls.data['network_scheme']['transformations']
  385. interfaces = cls.data['network_scheme']['interfaces']
  386. dict_of_interfaces = {}
  387. # Map bonds to interfaces
  388. for transformation in transformations:
  389. if transformation['action'] == 'add-bond':
  390. # Init bonds on the first bond
  391. if 'bonds' not in cls.data:
  392. cls.data['bonds'] = {}
  393. # Init bond assumptions
  394. all_drivers_equal = True
  395. first = transformation['interfaces'][0]
  396. driver = interfaces[first]['vendor_specific']['driver']
  397. # Check if all bond drivers are the same
  398. for interface in transformation['interfaces']:
  399. new_driver = \
  400. interfaces[interface]['vendor_specific']['driver']
  401. if new_driver != driver:
  402. all_drivers_equal = False
  403. if all_drivers_equal:
  404. bond_driver = driver
  405. else:
  406. bond_driver = None
  407. cls.data['bonds'][transformation['name']] = \
  408. {'interfaces' : transformation['interfaces'],
  409. 'driver' : bond_driver}
  410. # Map networks to interfaces
  411. for transformation in transformations:
  412. if 'bridge' in transformation.keys() and \
  413. (transformation['action'] == 'add-port' or \
  414. transformation['action'] == 'add-bond'):
  415. if transformation['bridge'] == 'br-fw-admin':
  416. network_type = 'admin'
  417. elif transformation['bridge'] == 'br-ex':
  418. network_type = 'public'
  419. elif transformation['bridge'] == 'br-aux' or \
  420. transformation['bridge'] == 'br-mesh':
  421. network_type = 'private'
  422. elif transformation['bridge'] == 'br-mgmt':
  423. network_type = 'management'
  424. elif transformation['bridge'] == 'br-storage':
  425. network_type = 'storage'
  426. elif transformation['bridge'] == 'br-baremetal':
  427. network_type = 'baremetal'
  428. network_interface = {}
  429. network_interface['bridge'] = transformation['bridge']
  430. # Split to iface name and VLAN
  431. iface_split = transformation['name'].split('.')
  432. if len(iface_split)==1:
  433. iface_split.append(str(1))
  434. interface, vlan = iface_split
  435. network_interface['interface'] = interface
  436. network_interface['vlan'] = vlan
  437. # If bond
  438. if 'bonds' in cls.data and interface in cls.data['bonds']:
  439. network_interface['driver'] = \
  440. cls.data['bonds'][interface]['driver']
  441. if ( network_type == 'private' and cls.is_sriov_enabled() ) or \
  442. ( network_type == 'storage' and cls.is_iser_enabled() ):
  443. # Assign SR-IOV/ISER to the first port only.
  444. # This is a temporary workaround until supporing bond over VFs.
  445. # We sort the array of interfaces in order to get the first
  446. # interface on all nodes.
  447. if_list = cls.data['bonds'][interface]['interfaces']
  448. if_list.sort()
  449. network_interface['interface'] = if_list[0]
  450. else: # Not a bond
  451. network_interface['driver'] = \
  452. interfaces[interface]['vendor_specific']['driver']
  453. dict_of_interfaces[network_type] = network_interface
  454. # Set private network in case private and storage on the same port
  455. if 'private' not in dict_of_interfaces.keys() and \
  456. 'storage' in dict_of_interfaces.keys():
  457. dict_of_interfaces['private'] = dict_of_interfaces['storage']
  458. dict_of_interfaces['private']['bridge'] = 'br-prv'
  459. return dict_of_interfaces
  460. def main():
  461. logging.basicConfig(format='%(asctime)s %(message)s',
  462. level=logging.DEBUG, filename=LOG_FILE)
  463. try:
  464. settings = MellanoxSettings()
  465. settings.update_settings()
  466. except MellanoxSettingsException, exc:
  467. error_msg = "Failed adding Mellanox settings: {0}\n".format(exc)
  468. sys.stderr.write(error_msg)
  469. logging.error(exc)
  470. sys.exit(1)
  471. success_msg = "Done adding Mellanox settings\n"
  472. sys.stdout.write(success_msg)
  473. logging.info(success_msg)
  474. sys.exit(0)
  475. if __name__ == '__main__':
  476. main()