886697755b
Fix an issue observed during the testing of a large-scale subcloud prestage operation. In one of many rounds of test, ansible hung in the middle of prestage of a subcloud causing the whole strategy to hang for many hours. The process had to be manually killed as strategy abort did not work in this case. The issue is addressed by invoking the 'ansible-playbook' call via '/usr/bin/timeout'. The timeout command will kill the ansible-playbook tree if the given timeout value is hit. For now, only the prestaging operations are using the new timeout. The original 'run_playbook' method is preserved in order to reduce any risk in this new method of invoking a subprocess. When a timeout occurs, the ansible log is updated before the process is killed. Example: 2022-04-28-17:28:44 TIMEOUT (1800s) - playbook is terminated Default timeout: - We use a global timeout (default: 3600s / 1hr) - The default can be modified from the [DEFAULTS] section in /etc/dcmanager/dcmanager.conf. To change it, add the 'playbook_timeout' as shown below, then restart the dcmanager-manager service. playbook_timeout=3600 Future considerations (not part of this commit): - In python3, this code can be simplified to use the new subprocess.run(timeout=val) method or Popen with p.wait(timeout=val) - Beginning with ansible 2.10, we can introduce the ANSIBLE_TASK_TIMEOUT value to set a task-level timeout. This is not available in our current version of ansible (2.7.5) Test Plan: PASS: Add unit tests covering: - no timeout given (maintain current functionality) - timeout given but not hit - timeout given; process is killed - timeout given; hung process (ignoring SIGTERM) is killed Run prestage operations as normal - no regression Modify default timeout to 5s, run prestage operations - verify that timeout occurs - verify that ansible-playbook is terminated - verify that ansible log file shows TIMEOUT log Modify default timeout to 5s for a single sublcoud, then run prestage operations - verify that only the single subcloud operation is killed Modify prestage prestage-sw-packages/tasks/main.yml to use '--bwlimit=128' in the rsync from registry.central. This slows down the package prestaging, and the playbook timeout is reached. Add a 'pause' task in the prestage-sw-packages ansible for a single subcloud. Ensure just the one task times out. Exercise non-prestaging ansible playbook (to ensure subprocess Popen change does not impact other playbooks - provisioned a new subcloud Closes-Bug: 1971994 Change-Id: Iaf1bee786afc505594c6671c959cc2650202ee6c Signed-off-by: Kyle MacLeod <kyle.macleod@windriver.com>
201 lines
7.0 KiB
Python
201 lines
7.0 KiB
Python
# Copyright 2016 Ericsson AB
|
|
# Copyright (c) 2017-2022 Wind River Systems, Inc.
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
"""
|
|
File to store all the configurations
|
|
"""
|
|
from oslo_config import cfg
|
|
from oslo_utils import importutils
|
|
|
|
# Ensure keystonemiddleware options are imported
|
|
importutils.import_module('keystonemiddleware.auth_token')
|
|
|
|
global_opts = [
|
|
cfg.BoolOpt('use_default_quota_class',
|
|
default=True,
|
|
help='Enables or disables use of default quota class '
|
|
'with default quota.'),
|
|
cfg.IntOpt('report_interval',
|
|
default=60,
|
|
help='Seconds between running periodic reporting tasks.'),
|
|
cfg.IntOpt('worker_rlimit_nofile',
|
|
default=4096,
|
|
help='Maximum number of open files per worker process.'),
|
|
]
|
|
|
|
# OpenStack credentials used for Endpoint Cache
|
|
# We need to register the below non-standard config
|
|
# options to dcmanager engine
|
|
keystone_opts = [
|
|
cfg.StrOpt('username',
|
|
help='Username of account'),
|
|
cfg.StrOpt('password',
|
|
secret=True,
|
|
help='Password of account'),
|
|
cfg.StrOpt('project_name',
|
|
help='Tenant name of account'),
|
|
cfg.StrOpt('user_domain_name',
|
|
default='Default',
|
|
help='User domain name of account'),
|
|
cfg.StrOpt('project_domain_name',
|
|
default='Default',
|
|
help='Project domain name of account'),
|
|
]
|
|
|
|
|
|
# Pecan_opts
|
|
pecan_opts = [
|
|
cfg.StrOpt(
|
|
'root',
|
|
default='dcmanager.api.controllers.root.RootController',
|
|
help='Pecan root controller'
|
|
),
|
|
cfg.ListOpt(
|
|
'modules',
|
|
default=["dcmanager.api"],
|
|
help='A list of modules where pecan will search for applications.'
|
|
),
|
|
cfg.BoolOpt(
|
|
'debug',
|
|
default=False,
|
|
help='Enables the ability to display tracebacks in the browser and'
|
|
'interactively debug during development.'
|
|
),
|
|
cfg.BoolOpt(
|
|
'auth_enable',
|
|
default=True,
|
|
help='Enables user authentication in pecan.'
|
|
)
|
|
]
|
|
|
|
|
|
# OpenStack admin user credentials used for Endpoint Cache
|
|
cache_opts = [
|
|
cfg.StrOpt('auth_uri',
|
|
help='Keystone authorization url'),
|
|
cfg.StrOpt('identity_uri',
|
|
help='Keystone service url'),
|
|
cfg.StrOpt('admin_username',
|
|
help='Username of admin account, needed when'
|
|
' auto_refresh_endpoint set to True'),
|
|
cfg.StrOpt('admin_password',
|
|
secret=True,
|
|
help='Password of admin account, needed when'
|
|
' auto_refresh_endpoint set to True'),
|
|
cfg.StrOpt('admin_tenant',
|
|
help='Tenant name of admin account, needed when'
|
|
' auto_refresh_endpoint set to True'),
|
|
cfg.StrOpt('admin_user_domain_name',
|
|
default='Default',
|
|
help='User domain name of admin account, needed when'
|
|
' auto_refresh_endpoint set to True'),
|
|
cfg.StrOpt('admin_project_domain_name',
|
|
default='Default',
|
|
help='Project domain name of admin account, needed when'
|
|
' auto_refresh_endpoint set to True')
|
|
]
|
|
|
|
# OpenStack credentials used for Endpoint Cache
|
|
endpoint_cache_opts = [
|
|
cfg.StrOpt('auth_uri',
|
|
help='Keystone authorization url'),
|
|
cfg.StrOpt('auth_plugin',
|
|
help='Name of the plugin to load'),
|
|
cfg.StrOpt('username',
|
|
help='Username of account'),
|
|
cfg.StrOpt('password',
|
|
secret=True,
|
|
help='Password of account'),
|
|
cfg.StrOpt('project_name',
|
|
help='Project name of account'),
|
|
cfg.StrOpt('user_domain_name',
|
|
default='Default',
|
|
help='User domain name of account'),
|
|
cfg.StrOpt('project_domain_name',
|
|
default='Default',
|
|
help='Project domain name of account'),
|
|
cfg.IntOpt('http_connect_timeout',
|
|
help='Request timeout value for communicating with Identity'
|
|
' API server.'),
|
|
]
|
|
|
|
scheduler_opts = [
|
|
cfg.BoolOpt('periodic_enable',
|
|
default=True,
|
|
help='boolean value for enable/disable periodic tasks'),
|
|
cfg.IntOpt('subcloud_audit_interval',
|
|
default=30,
|
|
help='periodic time interval for subcloud audit'),
|
|
cfg.IntOpt('kube_rootca_update_audit_expiry_days',
|
|
default=90,
|
|
help='Num days remaining for a kube rootca to be out-of-sync'),
|
|
cfg.IntOpt('patch_audit_interval',
|
|
default=900,
|
|
help='default time interval for patch audit')
|
|
]
|
|
|
|
common_opts = [
|
|
cfg.IntOpt('workers', default=1,
|
|
help='number of workers'),
|
|
cfg.IntOpt('orch_workers', default=1,
|
|
help='number of orchestrator workers'),
|
|
cfg.IntOpt('state_workers', default=4,
|
|
help='number of state workers'),
|
|
cfg.IntOpt('audit_workers', default=1,
|
|
help='number of audit workers'),
|
|
cfg.IntOpt('audit_worker_workers', default=4,
|
|
help='number of audit-worker workers'),
|
|
cfg.StrOpt('host',
|
|
default='localhost',
|
|
help='hostname of the machine'),
|
|
cfg.IntOpt('playbook_timeout', default=3600,
|
|
help='global ansible playbook timeout (seconds)'),
|
|
]
|
|
|
|
scheduler_opt_group = cfg.OptGroup(name='scheduler',
|
|
title='Scheduler options for periodic job')
|
|
keystone_opt_group = cfg.OptGroup(name='keystone_authtoken',
|
|
title='Keystone options')
|
|
# The group stores the pecan configurations.
|
|
pecan_group = cfg.OptGroup(name='pecan',
|
|
title='Pecan options')
|
|
|
|
cache_opt_group = cfg.OptGroup(name='cache',
|
|
title='OpenStack Admin Credentials')
|
|
|
|
endpoint_cache_opt_group = cfg.OptGroup(name='endpoint_cache',
|
|
title='OpenStack Credentials')
|
|
|
|
|
|
def list_opts():
|
|
yield cache_opt_group.name, cache_opts
|
|
yield endpoint_cache_opt_group.name, endpoint_cache_opts
|
|
yield scheduler_opt_group.name, scheduler_opts
|
|
yield pecan_group.name, pecan_opts
|
|
yield None, global_opts
|
|
yield None, common_opts
|
|
|
|
|
|
def register_options():
|
|
for group, opts in list_opts():
|
|
cfg.CONF.register_opts(opts, group=group)
|
|
|
|
|
|
# Only necessary for dcmanager engine, keystone_authtoken options for
|
|
# dcmanager-api will get picked up and registered automatically from the
|
|
# config file
|
|
def register_keystone_options():
|
|
cfg.CONF.register_opts(keystone_opts, group=keystone_opt_group.name)
|