python utility to manage a tripleo based cloud
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

undercloud_preflight.py 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. # Copyright 2017 Red Hat Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  4. # not use this file except in compliance with the License. You may obtain
  5. # a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. # License for the specific language governing permissions and limitations
  13. # under the License.
  14. import json
  15. import logging
  16. import netaddr
  17. import netifaces
  18. import os
  19. import subprocess
  20. import sys
  21. from osc_lib.i18n import _
  22. from oslo_utils import netutils
  23. import psutil
  24. from oslo_config import cfg
  25. from tripleoclient import constants
  26. from tripleoclient import utils
  27. class FailedValidation(Exception):
  28. pass
  29. CONF = cfg.CONF
  30. # We need 8 GB, leave a little room for variation in what 8 GB means on
  31. # different platforms.
  32. REQUIRED_MB = 7680
  33. PASSWORD_PATH = '%s/%s' % (constants.UNDERCLOUD_OUTPUT_DIR,
  34. 'undercloud-passwords.conf')
  35. LOG = logging.getLogger(__name__ + ".UndercloudSetup")
  36. def _run_live_command(args, env=None, name=None, cwd=None, wait=True):
  37. """Run the command defined by args, env and cwd
  38. Either returns the process handler or runs the process
  39. asynchronously so the output can be logged while the process is still
  40. running.
  41. """
  42. if name is None:
  43. name = args[0]
  44. process = subprocess.Popen(args, env=env, cwd=cwd,
  45. stdout=subprocess.PIPE,
  46. stderr=subprocess.STDOUT,
  47. universal_newlines=True)
  48. if not wait:
  49. return process
  50. while True:
  51. line = process.stdout.readline().decode('utf-8')
  52. if line:
  53. LOG.info(line.rstrip())
  54. if line == '' and process.poll() is not None:
  55. break
  56. if process.returncode != 0:
  57. message = '%s failed. See log for details.' % name
  58. LOG.error(message)
  59. raise RuntimeError(message)
  60. def _check_diskspace(upgrade=False):
  61. """Check undercloud disk space
  62. This runs a simple ansible playbook located in tripleo-validations
  63. There are currently two playbooks:
  64. - undercloud-disk-space.yaml
  65. - undercloud-disk-space-pre-upgrade.yaml
  66. First one checks minimal disk space for a brand new deploy.
  67. Second one checks minimal disk space for an upgrade.
  68. """
  69. if upgrade:
  70. playbook = 'undercloud-disk-space-pre-upgrade.yaml'
  71. else:
  72. playbook = 'undercloud-disk-space.yaml'
  73. python_interpreter = "/usr/bin/python{}".format(sys.version_info[0])
  74. utils.run_ansible_playbook(logger=LOG,
  75. workdir=constants.ANSIBLE_VALIDATION_DIR,
  76. playbook=playbook,
  77. inventory='undercloud,',
  78. retries=False,
  79. connection='local',
  80. output_callback='validation_output',
  81. python_interpreter=python_interpreter)
  82. def _check_memory():
  83. """Check system memory
  84. The undercloud will not run properly in less than 8 GB of memory.
  85. This function verifies that at least that much is available before
  86. proceeding with install.
  87. """
  88. mem = psutil.virtual_memory()
  89. swap = psutil.swap_memory()
  90. total_mb = (mem.total + swap.total) / 1024 / 1024
  91. if total_mb < REQUIRED_MB:
  92. LOG.error(_('At least {0} MB of memory is required for undercloud '
  93. 'installation. A minimum of 8 GB is recommended. '
  94. 'Only detected {1} MB').format(REQUIRED_MB, total_mb))
  95. raise RuntimeError(_('Insufficient memory available'))
  96. def _check_ipv6_enabled():
  97. """Test if IPv6 is enabled
  98. If /proc/net/if_inet6 exist ipv6 sysctl settings are available.
  99. """
  100. return os.path.isfile('/proc/net/if_inet6')
  101. def _wrap_ipv6(ip):
  102. """Wrap a IP address in square brackets if IPv6
  103. """
  104. if netutils.is_valid_ipv6(ip):
  105. return "[%s]" % ip
  106. return ip
  107. def _check_sysctl():
  108. """Check sysctl option availability
  109. The undercloud will not install properly if some of the expected sysctl
  110. values are not available to be set.
  111. """
  112. options = ['net.ipv4.ip_forward', 'net.ipv4.ip_nonlocal_bind']
  113. if _check_ipv6_enabled():
  114. options.append('net.ipv6.ip_nonlocal_bind')
  115. not_available = []
  116. for option in options:
  117. path = '/proc/sys/{opt}'.format(opt=option.replace('.', '/'))
  118. if not os.path.isfile(path):
  119. not_available.append(option)
  120. if not_available:
  121. LOG.error(_('Required sysctl options are not available. Check '
  122. 'that your kernel is up to date. Missing: {options}')
  123. .format(options=", ".join(not_available)))
  124. raise RuntimeError(_('Missing sysctl options'))
  125. def _validate_ips():
  126. def is_ip(value, param_name):
  127. try:
  128. netaddr.IPAddress(value)
  129. except netaddr.core.AddrFormatError:
  130. msg = (_('{0} "{1}" must be a valid IP address')
  131. .format(param_name, value))
  132. LOG.error(msg)
  133. raise FailedValidation(msg)
  134. for ip in CONF.undercloud_nameservers:
  135. is_ip(ip, 'undercloud_nameservers')
  136. def _validate_value_formats():
  137. """Validate format of some values
  138. Certain values have a specific format that must be maintained in order to
  139. work properly. For example, local_ip must be in CIDR form, and the
  140. hostname must be a FQDN.
  141. """
  142. try:
  143. local_ip = netaddr.IPNetwork(CONF.local_ip)
  144. if local_ip.prefixlen == 32:
  145. LOG.error(_('Invalid netmask'))
  146. raise netaddr.AddrFormatError(_('Invalid netmask'))
  147. # If IPv6 the ctlplane network uses the EUI-64 address format,
  148. # which requires the prefix to be /64
  149. if local_ip.version == 6 and local_ip.prefixlen != 64:
  150. LOG.error(_('Prefix must be 64 for IPv6'))
  151. raise netaddr.AddrFormatError(_('Prefix must be 64 for IPv6'))
  152. except netaddr.core.AddrFormatError as e:
  153. message = (_('local_ip "{0}" not valid: "{1}" '
  154. 'Value must be in CIDR format.')
  155. .format(CONF.local_ip, str(e)))
  156. LOG.error(message)
  157. raise FailedValidation(message)
  158. hostname = CONF['undercloud_hostname']
  159. if hostname is not None and '.' not in hostname:
  160. message = (_('Hostname "%s" is not fully qualified.') % hostname)
  161. LOG.error(message)
  162. raise FailedValidation(message)
  163. def _validate_in_cidr(subnet_props, subnet_name):
  164. cidr = netaddr.IPNetwork(subnet_props.cidr)
  165. def validate_addr_in_cidr(addr, pretty_name=None, require_ip=True,
  166. log_only=False):
  167. try:
  168. if netaddr.IPAddress(addr) not in cidr:
  169. message = (_('Config option {0} "{1}" not in defined '
  170. 'CIDR "{2}"').format(pretty_name, addr, cidr))
  171. if log_only:
  172. LOG.warning(message)
  173. else:
  174. LOG.error(message)
  175. raise FailedValidation(message)
  176. except netaddr.core.AddrFormatError:
  177. if require_ip:
  178. message = (_('Invalid IP address: %s') % addr)
  179. LOG.error(message)
  180. raise FailedValidation(message)
  181. validate_addr_in_cidr(subnet_props.gateway, 'gateway')
  182. # NOTE(hjensas): Ignore the default dhcp_start and dhcp_end if cidr is not
  183. # the default as well. I.e allow not specifying dhcp_start and dhcp_end.
  184. if not (subnet_props.cidr != constants.CTLPLANE_CIDR_DEFAULT and
  185. subnet_props.dhcp_start == constants.CTLPLANE_DHCP_START_DEFAULT
  186. and subnet_props.dhcp_end == constants.CTLPLANE_DHCP_END_DEFAULT):
  187. for start in subnet_props.dhcp_start:
  188. validate_addr_in_cidr(start, 'dhcp_start')
  189. for end in subnet_props.dhcp_end:
  190. validate_addr_in_cidr(end, 'dhcp_end')
  191. if subnet_name == CONF.local_subnet:
  192. validate_addr_in_cidr(str(netaddr.IPNetwork(CONF.local_ip).ip),
  193. 'local_ip')
  194. if (CONF.undercloud_service_certificate or
  195. CONF.generate_service_certificate):
  196. validate_addr_in_cidr(CONF['undercloud_public_host'],
  197. 'undercloud_public_host',
  198. require_ip=False, log_only=True)
  199. validate_addr_in_cidr(CONF['undercloud_admin_host'],
  200. 'undercloud_admin_host',
  201. require_ip=False)
  202. def _validate_dhcp_range(subnet_props, subnet_name):
  203. len_dhcp_start = len(subnet_props.dhcp_start)
  204. len_dhcp_end = len(subnet_props.dhcp_end)
  205. if (len_dhcp_start > 1 or len_dhcp_end > 1 and
  206. len_dhcp_start != len_dhcp_end):
  207. message = (_('Number of elements in dhcp_start and dhcp_end must be '
  208. 'identical. Subnet "{0}" have "{1}" dhcp_start elements '
  209. 'and "{2}" dhcp_end elements.').format(subnet_name,
  210. len_dhcp_start,
  211. len_dhcp_end))
  212. LOG.error(message)
  213. raise FailedValidation(message)
  214. for a, b in zip(subnet_props.dhcp_start, subnet_props.dhcp_end):
  215. start = netaddr.IPAddress(a)
  216. end = netaddr.IPAddress(b)
  217. if start >= end:
  218. message = (_('Invalid dhcp range specified, dhcp_start "{0}" does '
  219. 'not come before dhcp_end "{1}"').format(start, end))
  220. LOG.error(message)
  221. raise FailedValidation(message)
  222. def _validate_inspection_range(subnet_props):
  223. start = netaddr.IPAddress(subnet_props.inspection_iprange.split(',')[0])
  224. end = netaddr.IPAddress(subnet_props.inspection_iprange.split(',')[1])
  225. if start >= end:
  226. message = (_('Invalid inspection range specified, inspection_iprange '
  227. '"{0}" does not come before "{1}"').format(start, end))
  228. LOG.error(message)
  229. raise FailedValidation(message)
  230. def _validate_interface_exists(config_var='local_interface'):
  231. """Validate the provided local interface exists"""
  232. if (not CONF.net_config_override
  233. and CONF.get(config_var) not in netifaces.interfaces()):
  234. message = (_('Invalid {0} specified. '
  235. '{1} is not available.').format(config_var,
  236. CONF.get(config_var)))
  237. LOG.error(message)
  238. raise FailedValidation(message)
  239. def _validate_no_ip_change():
  240. """Disallow provisioning interface IP changes
  241. Changing the provisioning network IP causes a number of issues, so we
  242. need to disallow it early in the install before configurations start to
  243. be changed.
  244. """
  245. if CONF.net_config_override:
  246. os_net_config_file = CONF.net_config_override
  247. else:
  248. os_net_config_file = '/etc/os-net-config/config.json'
  249. # Nothing to do if we haven't already installed
  250. if not os.path.isfile(
  251. os.path.expanduser(os_net_config_file)):
  252. return
  253. try:
  254. with open(os_net_config_file) as f:
  255. network_config = json.loads(f.read())
  256. ctlplane = [i for i in network_config.get('network_config', [])
  257. if i['name'] == 'br-ctlplane'][0]
  258. except ValueError:
  259. # File was empty
  260. return
  261. except IndexError:
  262. # Nothing to check if br-ctlplane wasn't configured
  263. return
  264. existing_ip = ctlplane['addresses'][0]['ip_netmask']
  265. if existing_ip != CONF.local_ip:
  266. message = _('Changing the local_ip is not allowed. Existing IP: '
  267. '{0}, Configured IP: {1}').format(
  268. existing_ip, CONF.local_ip)
  269. LOG.error(message)
  270. raise FailedValidation(message)
  271. def _validate_passwords_file():
  272. """Disallow updates if the passwords file is missing
  273. If the undercloud was already deployed, the passwords file needs to be
  274. present so passwords that can't be changed are persisted. If the file
  275. is missing it will break the undercloud, so we should fail-fast and let
  276. the user know about the problem.
  277. """
  278. if (os.path.isfile(os.path.expanduser('~/stackrc')) and
  279. not os.path.isfile(PASSWORD_PATH)):
  280. message = (_('The %s file is missing. This will cause all service '
  281. 'passwords to change and break the existing '
  282. 'undercloud. ') % PASSWORD_PATH)
  283. LOG.error(message)
  284. raise FailedValidation(message)
  285. def _validate_env_files_paths():
  286. """Verify the non-matching templates path vs env files paths"""
  287. tht_path = CONF.get('templates') or constants.TRIPLEO_HEAT_TEMPLATES
  288. roles_file = utils.rel_or_abs_path(
  289. CONF.get('roles_file') or constants.UNDERCLOUD_ROLES_FILE,
  290. tht_path)
  291. # get the list of jinja templates normally rendered for UC installations
  292. LOG.debug(_("Using roles file {0} from {1}").format(roles_file, tht_path))
  293. process_templates = os.path.join(tht_path,
  294. 'tools/process-templates.py')
  295. python_interpreter = "/usr/bin/python{}".format(sys.version_info[0])
  296. p = _run_live_command(
  297. [python_interpreter, process_templates, '--roles-data', roles_file,
  298. '--dry-run'],
  299. name='process-templates-dry-run', cwd=tht_path, wait=False)
  300. # parse the list for the rendered from j2 file names
  301. result = p.communicate()[0]
  302. j2_files_list = []
  303. for line in result.split("\n"):
  304. if ((line.startswith('dry run') or line.startswith('jinja2')) and
  305. line.endswith('.yaml')):
  306. bname = os.path.basename(line.split(' ')[-1])
  307. if line.startswith('dry run'):
  308. j2_files_list.append(bname)
  309. if line.startswith('jinja2'):
  310. j2_files_list.append(bname.replace('.j2', ''))
  311. for env_file in CONF['custom_env_files']:
  312. env_file_abs = os.path.abspath(env_file)
  313. if (os.path.dirname(env_file_abs) != os.path.abspath(tht_path) and
  314. os.path.basename(env_file) in j2_files_list):
  315. msg = _(
  316. 'Heat environment external to the templates dir '
  317. 'can not reference j2 processed file %s') % env_file_abs
  318. LOG.error(msg)
  319. raise FailedValidation(msg)
  320. def _run_yum_clean_all(instack_env):
  321. args = ['sudo', 'yum', 'clean', 'all']
  322. LOG.info('Running yum clean all')
  323. _run_live_command(args, instack_env, 'yum-clean-all')
  324. LOG.info(_('yum-clean-all completed successfully'))
  325. def _run_yum_update(instack_env):
  326. args = ['sudo', 'yum', 'update', '-y']
  327. LOG.info('Running yum update')
  328. _run_live_command(args, instack_env, 'yum-update')
  329. LOG.info(_('yum-update completed successfully'))
  330. def _validate_architecure_options():
  331. def error_handler(message):
  332. LOG.error(_('Undercloud configuration validation failed: %s'), message)
  333. raise FailedValidation(message)
  334. def _validate_ppc64le_exclusive_opts(error_callback):
  335. if 'ipxe_enabled' in CONF and CONF['ipxe_enabled']:
  336. error_callback(_('Currently iPXE boot isn\'t supported with '
  337. 'ppc64le systems but is enabled'))
  338. def _validate_additional_architectures(error_callback):
  339. for arch in CONF['additional_architectures']:
  340. if arch not in constants.ADDITIONAL_ARCHITECTURES:
  341. params = {'architecture': arch,
  342. 'all_architectures':
  343. ' '.join(constants.ADDITIONAL_ARCHITECTURES)
  344. }
  345. error_callback(_('additional_architectures "%(architecture)s" '
  346. 'must be in the supported architecture list: '
  347. '%(all_architectures)s') % params)
  348. _validate_additional_architectures(error_handler)
  349. if 'ppc64le' in CONF['additional_architectures']:
  350. _validate_ppc64le_exclusive_opts(error_handler)
  351. def _checking_status(item):
  352. LOG.info(_('Checking %s...') % item)
  353. def _check_routed_networks_enabled_if_multiple_subnets_defined():
  354. if (len(CONF.subnets) > 1 and not CONF.enable_routed_networks):
  355. msg = _('Multiple subnets specified: %s but routed networks are not '
  356. 'enabled.') % CONF.subnets
  357. LOG.error(msg)
  358. raise FailedValidation(msg)
  359. def _validate_deprecetad_now_invalid_parameters():
  360. invalid_opts = [
  361. 'masquerade_network',
  362. ]
  363. deprecate_conf = cfg.CONF
  364. invalid_opts_used = []
  365. for invalid_opt in invalid_opts:
  366. deprecate_conf.register_opts([cfg.StrOpt(invalid_opt)])
  367. if deprecate_conf.get(invalid_opt):
  368. invalid_opts_used.append(invalid_opt)
  369. if invalid_opts_used:
  370. msg = _('Options that has been deprecated and removed/replaced '
  371. 'detected. Invalid options: %s') % invalid_opts_used
  372. LOG.error(msg)
  373. raise FailedValidation(msg)
  374. del deprecate_conf
  375. def _validate_dnsnameservers(s):
  376. ip_version = netaddr.IPNetwork(s['cidr']).version
  377. if s['dns_nameservers']:
  378. nameservers = s['dns_nameservers']
  379. else:
  380. nameservers = CONF.undercloud_nameservers
  381. for nameserver in nameservers:
  382. if not netaddr.IPAddress(nameserver).version == ip_version:
  383. message = (_('IP version missmatch. Nameserver {0} is not valid '
  384. 'for subnet {1}').format(nameserver, s['cidr']))
  385. LOG.error(message)
  386. raise FailedValidation(message)
  387. def _check_all_or_no_subnets_use_dns_nameservers():
  388. x = [CONF.get(s).get('dns_nameservers') for s in CONF.subnets]
  389. if any(([len(y) == 0 for y in x])) and any(([len(y) > 0 for y in x])):
  390. message = (_('Option dns_nameservers is defined for subnets: {0}. '
  391. 'Option dns_nameservers is also required for subnets: '
  392. '{1}.').format(
  393. ', '.join([s for s in CONF.subnets if
  394. CONF.get(s).get('dns_nameservers')]),
  395. ', '.join([s for s in CONF.subnets if
  396. not CONF.get(s).get('dns_nameservers')])))
  397. LOG.error(message)
  398. raise FailedValidation(message)
  399. def check(verbose_level, upgrade=False):
  400. # Fetch configuration and use its log file param to add logging to a file
  401. utils.load_config(CONF, constants.UNDERCLOUD_CONF_PATH)
  402. utils.configure_logging(LOG, verbose_level, CONF['undercloud_log_file'])
  403. # data = {opt.name: CONF[opt.name] for opt in _opts}
  404. try:
  405. # Other validations
  406. _checking_status('Hostname')
  407. utils.check_hostname()
  408. _checking_status('Memory')
  409. _check_memory()
  410. _checking_status('Disk space')
  411. _check_diskspace(upgrade)
  412. _checking_status('Sysctl')
  413. _check_sysctl()
  414. _checking_status('Password file')
  415. _validate_passwords_file()
  416. _checking_status('Deprecated now invalid options')
  417. _validate_deprecetad_now_invalid_parameters()
  418. # Heat templates validations
  419. if CONF.get('custom_env_files'):
  420. _checking_status('Custom env file')
  421. _validate_env_files_paths()
  422. # Networking validations
  423. _checking_status('Networking values')
  424. _validate_value_formats()
  425. _check_routed_networks_enabled_if_multiple_subnets_defined()
  426. _check_all_or_no_subnets_use_dns_nameservers()
  427. for subnet in CONF.subnets:
  428. s = CONF.get(subnet)
  429. _checking_status('Subnet "%s" is in CIDR' % subnet)
  430. _validate_in_cidr(s, subnet)
  431. _checking_status('DHCP range is in subnet "%s"' % subnet)
  432. _validate_dhcp_range(s, subnet)
  433. _checking_status('Inspection range for subnet "%s"' % subnet)
  434. _validate_inspection_range(s)
  435. _validate_dnsnameservers(s)
  436. _checking_status('IP addresses')
  437. _validate_ips()
  438. _checking_status('Network interfaces')
  439. _validate_interface_exists()
  440. _checking_status('Provisionning IP change')
  441. _validate_no_ip_change()
  442. _checking_status('Architecture')
  443. _validate_architecure_options()
  444. except KeyError as e:
  445. LOG.error(_('Key error in configuration: {error}\n'
  446. 'Value is missing in configuration.').format(error=e))
  447. sys.exit(1)
  448. except FailedValidation as e:
  449. LOG.error(_('An error occurred during configuration '
  450. 'validation, please check your host '
  451. 'configuration and try again.\nError '
  452. 'message: {error}').format(error=e))
  453. sys.exit(1)
  454. except RuntimeError as e:
  455. LOG.error(_('An error occurred during configuration '
  456. 'validation, please check your host '
  457. 'configuration and try again. Error '
  458. 'message: {error}').format(error=e))
  459. sys.exit(1)
  460. def minion_check(verbose_level, upgrade=False):
  461. utils.load_config(CONF, constants.MINION_CONF_PATH)
  462. utils.configure_logging(LOG, verbose_level, CONF['minion_log_file'])
  463. try:
  464. _checking_status('Hostname')
  465. utils.check_hostname()
  466. _checking_status('Sysctl')
  467. _check_sysctl()
  468. _checking_status('Network interfaces')
  469. _validate_interface_exists('minion_local_interface')
  470. _checking_status('Password file')
  471. _validate_passwords_file()
  472. # Heat templates validations
  473. if CONF.get('custom_env_files'):
  474. _checking_status('Custom env file')
  475. _validate_env_files_paths()
  476. except KeyError as e:
  477. LOG.error(_('Key error in configuration: {error}\n'
  478. 'Value is missing in configuration.').format(error=e))
  479. sys.exit(1)
  480. except FailedValidation as e:
  481. LOG.error(_('An error occurred during configuration '
  482. 'validation, please check your host '
  483. 'configuration and try again.\nError '
  484. 'message: {error}').format(error=e))
  485. sys.exit(1)
  486. except RuntimeError as e:
  487. LOG.error(_('An error occurred during configuration '
  488. 'validation, please check your host '
  489. 'configuration and try again. Error '
  490. 'message: {error}').format(error=e))
  491. sys.exit(1)