Host network configuration tool
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

586 lines
21KB

  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014 Red Hat, Inc.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. # not use this file except in compliance with the License. You may obtain
  6. # a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. # License for the specific language governing permissions and limitations
  14. # under the License.
  15. #
  16. # The sriov_config.py module does the SR-IOV PF configuration.
  17. # It'll be invoked by the sriov_config systemd service for the persistence of
  18. # the SR-IOV configuration across reboots. And os-net-config:utils also invokes
  19. # it for the first time configuration.
  20. # An entry point os-net-config-sriov is added for invocation of this module.
  21. import argparse
  22. import logging
  23. import os
  24. import pyudev
  25. import re
  26. from six.moves import queue as Queue
  27. import sys
  28. import time
  29. import yaml
  30. from oslo_concurrency import processutils
  31. logger = logging.getLogger(__name__)
  32. _SYS_CLASS_NET = '/sys/class/net'
  33. _UDEV_RULE_FILE = '/etc/udev/rules.d/80-persistent-os-net-config.rules'
  34. _UDEV_LEGACY_RULE_FILE = '/etc/udev/rules.d/70-os-net-config-sriov.rules'
  35. _IFUP_LOCAL_FILE = '/sbin/ifup-local'
  36. MAX_RETRIES = 10
  37. PF_FUNC_RE = re.compile(r"\.(\d+)$", 0)
  38. # In order to keep VF representor name consistent specially after the upgrade
  39. # proccess, we should have a udev rule to handle that.
  40. # The udev rule will rename the VF representor as "<sriov_pf_name>_<vf_num>"
  41. _REP_LINK_NAME_FILE = "/etc/udev/rep-link-name.sh"
  42. _REP_LINK_NAME_DATA = '''#!/bin/bash
  43. # This file is autogenerated by os-net-config
  44. set -x
  45. PORT="$1"
  46. echo "NUMBER=${PORT##pf*vf}"
  47. '''
  48. # Create a queue for passing the udev network events
  49. vf_queue = Queue.Queue()
  50. # File to contain the list of SR-IOV PF, VF and their configurations
  51. # Format of the file shall be
  52. # - device_type: pf
  53. # name: <pf name>
  54. # numvfs: <number of VFs>
  55. # promisc: "on"/"off"
  56. # - device_type: vf
  57. # device:
  58. # name: <pf name>
  59. # vfid: <VF id>
  60. # name: <vf name>
  61. # vlan_id: <vlan>
  62. # qos: <qos>
  63. # spoofcheck: "on"/"off"
  64. # trust: "on"/"off"
  65. # state: "auto"/"enable"/"disable"
  66. # macaddr: <mac address>
  67. # promisc: "on"/"off"
  68. _SRIOV_CONFIG_FILE = '/var/lib/os-net-config/sriov_config.yaml'
  69. class SRIOVNumvfsException(ValueError):
  70. pass
  71. def udev_event_handler(action, device):
  72. event = {"action": action, "device": device.sys_path}
  73. logger.info("Received udev event %s for %s"
  74. % (event["action"], event["device"]))
  75. vf_queue.put(event)
  76. def get_file_data(filename):
  77. if not os.path.exists(filename):
  78. return ''
  79. try:
  80. with open(filename, 'r') as f:
  81. return f.read()
  82. except IOError:
  83. logger.error("Error reading file: %s" % filename)
  84. return ''
  85. def _get_sriov_map():
  86. contents = get_file_data(_SRIOV_CONFIG_FILE)
  87. sriov_map = yaml.safe_load(contents) if contents else []
  88. return sriov_map
  89. def get_numvfs(ifname):
  90. try:
  91. sriov_numvfs_path = os.path.join(_SYS_CLASS_NET, ifname,
  92. "device/sriov_numvfs")
  93. with open(sriov_numvfs_path, 'r') as f:
  94. return int(f.read())
  95. except IOError:
  96. msg = ("Unable to read numvfs for %s" % ifname)
  97. raise SRIOVNumvfsException(msg)
  98. def restart_ovs_and_pfs_netdevs():
  99. sriov_map = _get_sriov_map()
  100. processutils.execute('/usr/bin/systemctl', 'restart', 'openvswitch')
  101. for item in sriov_map:
  102. if item['device_type'] == 'pf':
  103. if_down_interface(item['name'])
  104. if_up_interface(item['name'])
  105. def cleanup_puppet_config():
  106. file_contents = ""
  107. if os.path.exists('/etc/udev/rules.d/70-tripleo-reset-sriov.rules'):
  108. os.remove('/etc/udev/rules.d/70-tripleo-reset-sriov.rules')
  109. if os.path.exists('/etc/sysconfig/allocate_vfs'):
  110. os.remove('/etc/sysconfig/allocate_vfs')
  111. if os.path.exists(_IFUP_LOCAL_FILE):
  112. # Remove the invocation of allocate_vfs script generated by puppet
  113. # After the removal of allocate_vfs, if the ifup-local file has just
  114. # "#!/bin/bash" left, then remove the file as well.
  115. with open(_IFUP_LOCAL_FILE) as oldfile:
  116. for line in oldfile:
  117. if "/etc/sysconfig/allocate_vfs" not in line:
  118. file_contents = file_contents + line
  119. if file_contents.strip() == "#!/bin/bash":
  120. os.remove(_IFUP_LOCAL_FILE)
  121. else:
  122. with open(_IFUP_LOCAL_FILE, 'w') as newfile:
  123. newfile.write(file_contents)
  124. def configure_sriov_pf(execution_from_cli=False, restart_openvswitch=False):
  125. # Create a context for pyudev and observe udev events for network
  126. context = pyudev.Context()
  127. monitor = pyudev.Monitor.from_netlink(context)
  128. monitor.filter_by('net')
  129. observer = pyudev.MonitorObserver(monitor, udev_event_handler)
  130. observer.start()
  131. sriov_map = _get_sriov_map()
  132. MLNX_UNBIND_FILE_PATH = "/sys/bus/pci/drivers/mlx5_core/unbind"
  133. MLNX_VENDOR_ID = "0x15b3"
  134. trigger_udev_rule = False
  135. # Cleanup the previous config by puppet-tripleo
  136. cleanup_puppet_config()
  137. for item in sriov_map:
  138. if item['device_type'] == 'pf':
  139. _pf_interface_up(item)
  140. if item.get('link_mode') == "legacy":
  141. # Add a udev rule to configure the VF's when PF's are
  142. # released by a guest
  143. add_udev_rule_for_legacy_sriov_pf(item['name'],
  144. item['numvfs'])
  145. try:
  146. sriov_numvfs_path = os.path.join(_SYS_CLASS_NET, item['name'],
  147. "device/sriov_numvfs")
  148. curr_numvfs = get_numvfs(item['name'])
  149. if curr_numvfs == item['numvfs']:
  150. logger.info("Numvfs already configured for %s"
  151. % item['name'])
  152. continue
  153. with open(sriov_numvfs_path, 'w') as f:
  154. f.write("%d" % item['numvfs'])
  155. except IOError as exc:
  156. msg = ("Unable to configure pf: %s with numvfs: %d\n%s"
  157. % (item['name'], item['numvfs'], exc))
  158. raise SRIOVNumvfsException(msg)
  159. # Wait for the creation of VFs for each PF
  160. _wait_for_vf_creation(item['name'], item['numvfs'])
  161. # Configure switchdev mode
  162. vendor_id = get_vendor_id(item['name'])
  163. if (item.get('link_mode') == "switchdev" and
  164. vendor_id == MLNX_VENDOR_ID):
  165. vf_pcis_list = get_vf_pcis_list(item['name'])
  166. for vf_pci in vf_pcis_list:
  167. vf_pci_path = "/sys/bus/pci/devices/%s/driver" % vf_pci
  168. if os.path.exists(vf_pci_path):
  169. with open(MLNX_UNBIND_FILE_PATH, 'w') as f:
  170. f.write("%s" % vf_pci)
  171. # Adding a udev rule to make vf-representors unmanaged by
  172. # NetworkManager
  173. add_udev_rule_to_unmanage_vf_representors_by_nm()
  174. # Adding a udev rule to save the sriov_pf name
  175. trigger_udev_rule = add_udev_rule_for_sriov_pf(item['name'])\
  176. or trigger_udev_rule
  177. configure_switchdev(item['name'])
  178. # Adding a udev rule to rename vf-representors
  179. trigger_udev_rule = add_udev_rule_for_vf_representors(
  180. item['name']) or trigger_udev_rule
  181. # Moving the sriov-PFs to switchdev mode will put the netdev
  182. # interfaces in down state.
  183. # In case we are running during initial deployment,
  184. # bring the interfaces up.
  185. # In case we are running as part of the sriov_config service
  186. # after reboot, net config scripts, which run after
  187. # sriov_config service will bring the interfaces up.
  188. if execution_from_cli:
  189. if_up_interface(item['name'])
  190. # Trigger udev rules if there is new rules written
  191. if trigger_udev_rule:
  192. trigger_udev_rules()
  193. observer.stop()
  194. if restart_openvswitch:
  195. restart_ovs_and_pfs_netdevs()
  196. def _write_numvfs(device_name, numvfs):
  197. sriov_numvfs_path = os.path.join(_SYS_CLASS_NET, device_name,
  198. "device/sriov_numvfs")
  199. curr_numvfs = get_numvfs(device_name)
  200. if curr_numvfs != 0:
  201. logger.info("Numvfs already configured for %s" % device_name)
  202. return
  203. try:
  204. with open(sriov_numvfs_path, 'w') as f:
  205. f.write("%d" % numvfs)
  206. except IOError as exc:
  207. msg = ("Unable to configure pf: %s with numvfs: %d\n%s"
  208. % (device_name, numvfs, exc))
  209. raise SRIOVNumvfsException(msg)
  210. def _wait_for_vf_creation(pf_name, numvfs):
  211. vf_count = 0
  212. vf_list = []
  213. while vf_count < numvfs:
  214. try:
  215. # wait for 5 seconds after every udev event
  216. event = vf_queue.get(True, 5)
  217. vf_name = os.path.basename(event["device"])
  218. pf_path = os.path.normpath(os.path.join(event["device"],
  219. "../../physfn/net"))
  220. if os.path.isdir(pf_path):
  221. pf_nic = os.listdir(pf_path)
  222. if len(pf_nic) == 1 and pf_name == pf_nic[0]:
  223. if vf_name not in vf_list:
  224. vf_list.append(vf_name)
  225. logger.info("VF: %s created for PF: %s"
  226. % (vf_name, pf_name))
  227. vf_count = vf_count + 1
  228. else:
  229. logger.warning("Unable to parse event %s"
  230. % event["device"])
  231. else:
  232. logger.warning("%s is not a directory" % pf_path)
  233. except Queue.Empty:
  234. logger.info("Timeout in the creation of VFs for PF %s" % pf_name)
  235. return
  236. logger.info("Required VFs are created for PF %s" % pf_name)
  237. def _wait_for_uplink_rep_creation(pf_name):
  238. uplink_rep_phys_switch_id_path = "/sys/class/net/%s/phys_switch_id" \
  239. % pf_name
  240. for i in range(MAX_RETRIES):
  241. if get_file_data(uplink_rep_phys_switch_id_path):
  242. logger.info("Uplink representor %s ready", pf_name)
  243. break
  244. time.sleep(1)
  245. else:
  246. raise RuntimeError("Timeout while waiting for uplink representor %s.",
  247. pf_name)
  248. def create_rep_link_name_script():
  249. with open(_REP_LINK_NAME_FILE, "w") as f:
  250. f.write(_REP_LINK_NAME_DATA)
  251. # Make the _REP_LINK_NAME_FILE executable
  252. os.chmod(_REP_LINK_NAME_FILE, 0o755)
  253. def add_udev_rule_for_sriov_pf(pf_name):
  254. pf_pci = get_pf_pci(pf_name)
  255. udev_data_line = 'SUBSYSTEM=="net", ACTION=="add", DRIVERS=="?*", '\
  256. 'KERNELS=="%s", NAME="%s"' % (pf_pci, pf_name)
  257. return add_udev_rule(udev_data_line, _UDEV_RULE_FILE)
  258. def add_udev_rule_for_legacy_sriov_pf(pf_name, numvfs):
  259. logger.info("adding udev rules for %s" % (pf_name))
  260. udev_line = 'KERNEL=="%s", '\
  261. 'RUN+="/bin/os-net-config-sriov -n %%k:%d"' \
  262. % (pf_name, numvfs)
  263. return add_udev_rule(udev_line, _UDEV_LEGACY_RULE_FILE)
  264. def add_udev_rule_for_vf_representors(pf_name):
  265. phys_switch_id_path = os.path.join(_SYS_CLASS_NET, pf_name,
  266. "phys_switch_id")
  267. phys_switch_id = get_file_data(phys_switch_id_path).strip()
  268. pf_pci = get_pf_pci(pf_name)
  269. pf_fun_num_match = PF_FUNC_RE.search(pf_pci)
  270. if pf_fun_num_match:
  271. pf_fun_num = pf_fun_num_match.group(1)
  272. else:
  273. logger.error("Failed to get function number for %s \n"
  274. "and so failed to create a udev rule for renaming "
  275. "its' vf-represent" % pf_name)
  276. return
  277. udev_data_line = 'SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}'\
  278. '=="%s", ATTR{phys_port_name}=="pf%svf*", '\
  279. 'IMPORT{program}="%s $attr{phys_port_name}", '\
  280. 'NAME="%s_$env{NUMBER}"' % (phys_switch_id,
  281. pf_fun_num,
  282. _REP_LINK_NAME_FILE,
  283. pf_name)
  284. create_rep_link_name_script()
  285. return add_udev_rule(udev_data_line, _UDEV_RULE_FILE)
  286. def add_udev_rule_to_unmanage_vf_representors_by_nm():
  287. udev_data_line = 'SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}'\
  288. '!="", ATTR{phys_port_name}=="pf*vf*", '\
  289. 'ENV{NM_UNMANAGED}="1"'
  290. return add_udev_rule(udev_data_line, _UDEV_RULE_FILE)
  291. def add_udev_rule(udev_data, udev_file):
  292. trigger_udev_rule = False
  293. udev_data = udev_data.strip()
  294. if not os.path.exists(udev_file):
  295. with open(udev_file, "w") as f:
  296. data = "# This file is autogenerated by os-net-config\n%s\n"\
  297. % udev_data
  298. f.write(data)
  299. reload_udev_rules()
  300. trigger_udev_rule = True
  301. else:
  302. file_data = get_file_data(udev_file)
  303. udev_lines = file_data.split("\n")
  304. if udev_data not in udev_lines:
  305. with open(udev_file, "a") as f:
  306. f.write(udev_data + "\n")
  307. reload_udev_rules()
  308. trigger_udev_rule = True
  309. return trigger_udev_rule
  310. def reload_udev_rules():
  311. try:
  312. processutils.execute('/usr/sbin/udevadm', 'control', '--reload-rules')
  313. logger.info("udev rules reloaded successfully")
  314. except processutils.ProcessExecutionError:
  315. logger.error("Failed to reload udev rules")
  316. raise
  317. def trigger_udev_rules():
  318. try:
  319. processutils.execute('/usr/sbin/udevadm', 'trigger', '--action=add',
  320. '--attr-match=subsystem=net')
  321. logger.info("udev rules triggered successfully")
  322. except processutils.ProcessExecutionError:
  323. logger.error("Failed to trigger udev rules")
  324. raise
  325. def configure_switchdev(pf_name):
  326. pf_pci = get_pf_pci(pf_name)
  327. pf_device_id = get_pf_device_id(pf_name)
  328. if pf_device_id == "0x1013" or pf_device_id == "0x1015":
  329. try:
  330. processutils.execute('/usr/sbin/devlink', 'dev', 'eswitch', 'set',
  331. 'pci/%s' % pf_pci, 'inline-mode', 'transport')
  332. except processutils.ProcessExecutionError:
  333. logger.error("Failed to set inline-mode to transport")
  334. raise
  335. try:
  336. processutils.execute('/usr/sbin/devlink', 'dev', 'eswitch', 'set',
  337. 'pci/%s' % pf_pci, 'mode', 'switchdev')
  338. except processutils.ProcessExecutionError:
  339. logger.error("Failed to set mode to switchdev")
  340. raise
  341. logger.info("Device pci/%s set to switchdev mode." % pf_pci)
  342. # WA to make sure that the uplink_rep is ready after moving to switchdev,
  343. # as moving to switchdev will remove the sriov_pf and create uplink
  344. # representor, so we need to make sure that uplink representor is ready
  345. # before proceed
  346. _wait_for_uplink_rep_creation(pf_name)
  347. try:
  348. processutils.execute('/usr/sbin/ethtool', '-K', pf_name,
  349. 'hw-tc-offload', 'on')
  350. logger.info("Enabled \"hw-tc-offload\" for PF %s." % pf_name)
  351. except processutils.ProcessExecutionError:
  352. logger.error("Failed to enable hw-tc-offload")
  353. raise
  354. def run_ip_config_cmd(*cmd, **kwargs):
  355. logger.info("Running %s" % ' '.join(cmd))
  356. try:
  357. processutils.execute(*cmd, **kwargs)
  358. except processutils.ProcessExecutionError:
  359. logger.error("Failed to execute %s" % ' '.join(cmd))
  360. raise
  361. def _pf_interface_up(pf_device):
  362. if 'promisc' in pf_device:
  363. run_ip_config_cmd('ip', 'link', 'set', 'dev', pf_device['name'],
  364. 'promisc', pf_device['promisc'])
  365. logger.info("Bringing up PF: %s" % pf_device['name'])
  366. run_ip_config_cmd('ip', 'link', 'set', 'dev', pf_device['name'], 'up')
  367. def get_vendor_id(ifname):
  368. try:
  369. with open(os.path.join(_SYS_CLASS_NET, ifname, "device/vendor"),
  370. 'r') as f:
  371. out = f.read().strip()
  372. return out
  373. except IOError:
  374. return
  375. def get_pf_pci(pf_name):
  376. pf_pci_path = os.path.join(_SYS_CLASS_NET, pf_name, "device/uevent")
  377. pf_info = get_file_data(pf_pci_path)
  378. pf_pci = re.search(r'PCI_SLOT_NAME=(.*)', pf_info, re.MULTILINE).group(1)
  379. return pf_pci
  380. def get_pf_device_id(pf_name):
  381. pf_device_path = os.path.join(_SYS_CLASS_NET, pf_name, "device/device")
  382. pf_device_id = get_file_data(pf_device_path).strip()
  383. return pf_device_id
  384. def get_vf_pcis_list(pf_name):
  385. vf_pcis_list = []
  386. listOfPfFiles = os.listdir(os.path.join(_SYS_CLASS_NET, pf_name,
  387. "device"))
  388. for pf_file in listOfPfFiles:
  389. if pf_file.startswith("virtfn"):
  390. vf_info = get_file_data(os.path.join(_SYS_CLASS_NET, pf_name,
  391. "device", pf_file, "uevent"))
  392. vf_pcis_list.append(re.search(r'PCI_SLOT_NAME=(.*)',
  393. vf_info, re.MULTILINE).group(1))
  394. return vf_pcis_list
  395. def if_down_interface(device):
  396. logger.info("Running /sbin/ifdown %s" % device)
  397. try:
  398. processutils.execute('/sbin/ifdown', device)
  399. except processutils.ProcessExecutionError:
  400. logger.error("Failed to ifdown %s" % device)
  401. raise
  402. def if_up_interface(device):
  403. logger.info("Running /sbin/ifup %s" % device)
  404. try:
  405. processutils.execute('/sbin/ifup', device)
  406. except processutils.ProcessExecutionError:
  407. logger.error("Failed to ifup %s" % device)
  408. raise
  409. def configure_sriov_vf():
  410. sriov_map = _get_sriov_map()
  411. for item in sriov_map:
  412. if item['device_type'] == 'vf':
  413. pf_name = item['device']['name']
  414. vfid = item['device']['vfid']
  415. base_cmd = ('ip', 'link', 'set', 'dev', pf_name, 'vf', str(vfid))
  416. logger.info("Configuring settings for PF: %s VF :%d VF name : %s"
  417. % (pf_name, vfid, item['name']))
  418. if 'macaddr' in item:
  419. cmd = base_cmd + ('mac', item['macaddr'])
  420. run_ip_config_cmd(*cmd)
  421. if 'vlan_id' in item:
  422. vlan_cmd = base_cmd + ('vlan', str(item['vlan_id']))
  423. if 'qos' in item:
  424. vlan_cmd = vlan_cmd + ('qos', str(item['qos']))
  425. run_ip_config_cmd(*vlan_cmd)
  426. if 'spoofcheck' in item:
  427. cmd = base_cmd + ('spoofchk', item['spoofcheck'])
  428. run_ip_config_cmd(*cmd)
  429. if 'state' in item:
  430. cmd = base_cmd + ('state', item['state'])
  431. run_ip_config_cmd(*cmd)
  432. if 'trust' in item:
  433. cmd = base_cmd + ('trust', item['trust'])
  434. run_ip_config_cmd(*cmd)
  435. if 'promisc' in item:
  436. run_ip_config_cmd('ip', 'link', 'set', 'dev', item['name'],
  437. 'promisc', item['promisc'])
  438. def parse_opts(argv):
  439. parser = argparse.ArgumentParser(
  440. description='Configure SR-IOV PF and VF interfaces using a YAML'
  441. ' config file format.')
  442. parser.add_argument(
  443. '-d', '--debug',
  444. dest="debug",
  445. action='store_true',
  446. help="Print debugging output.",
  447. required=False)
  448. parser.add_argument(
  449. '-v', '--verbose',
  450. dest="verbose",
  451. action='store_true',
  452. help="Print verbose output.",
  453. required=False)
  454. parser.add_argument(
  455. '-n', '--numvfs',
  456. dest="numvfs",
  457. action='store',
  458. help="Provide the numvfs for device in the format <device>:<numvfs>",
  459. required=False)
  460. opts = parser.parse_args(argv[1:])
  461. return opts
  462. def configure_logger(verbose=False, debug=False):
  463. LOG_FORMAT = '[%(asctime)s] [%(levelname)s] %(message)s'
  464. DATE_FORMAT = '%Y/%m/%d %I:%M:%S %p'
  465. log_level = logging.WARN
  466. if debug:
  467. log_level = logging.DEBUG
  468. elif verbose:
  469. log_level = logging.INFO
  470. logging.basicConfig(format=LOG_FORMAT, datefmt=DATE_FORMAT,
  471. level=log_level)
  472. def main(argv=sys.argv):
  473. opts = parse_opts(argv)
  474. configure_logger(opts.verbose, opts.debug)
  475. if opts.numvfs:
  476. if re.match("^\w+:\d+$", opts.numvfs):
  477. device_name, numvfs = opts.numvfs.split(':')
  478. _write_numvfs(device_name, int(numvfs))
  479. else:
  480. logging.error("Invalid arguments for --numvfs %s" % opts.numvfs)
  481. else:
  482. # Configure the PF's
  483. configure_sriov_pf()
  484. # Configure the VFs
  485. configure_sriov_vf()
  486. if __name__ == '__main__':
  487. sys.exit(main(sys.argv))