From 30d9bf9a6d8af9590b04caa3757956522f2004d4 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 19 Jan 2021 12:10:52 -0800 Subject: [PATCH] Async task support We have a *ton* of stuff in devstack that is very linear, specifically the ten-ish minutes we spend loading osc to run a single API command against something. We also generate configs, sync databases, and other things that use one core of our worker and make our runtime longer than it really needs to be. The idea in this patch is to make it super simple to run some things in the background and then wait for them to finish before proceeding to something that will require them to be done. This avoids the interleaving you would expect by redirecting the async tasks to a log file, and then cat'ing that log file synchronously during the wait operation. The per-task log file remains so it's easier to examine it in isolation. Multiple people have reported between 22-30% improvement in the time it takes to stack with this. More can be done, but what is here already makes a significant difference. Change-Id: I270a910b531641b023c13f75dfedca057a1f1031 --- .zuul.yaml | 12 +++ clean.sh | 2 +- extras.d/80-tempest.sh | 3 +- functions | 1 + inc/async | 225 +++++++++++++++++++++++++++++++++++++++++ lib/keystone | 40 ++++---- lib/nova | 29 +++--- stack.sh | 46 ++++++--- unstack.sh | 1 + 9 files changed, 315 insertions(+), 44 deletions(-) create mode 100644 inc/async diff --git a/.zuul.yaml b/.zuul.yaml index c1406716fe..7b0696bcd7 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -598,6 +598,17 @@ nodeset: openstack-single-node-bionic voting: false +- job: + name: devstack-async + parent: tempest-full-py3 + description: Async mode enabled + voting: false + vars: + devstack_localrc: + DEVSTACK_PARALLEL: True + zuul_copy_output: + /opt/stack/async: logs + - job: name: devstack-platform-fedora-latest parent: tempest-full-py3 @@ -690,6 +701,7 @@ - devstack-platform-fedora-latest - devstack-platform-centos-8 - devstack-platform-bionic + - devstack-async - devstack-multinode - devstack-unit-tests - openstack-tox-bashate diff --git a/clean.sh b/clean.sh index 4cebf1d9ea..870dfd4313 100755 --- a/clean.sh +++ b/clean.sh @@ -113,7 +113,7 @@ cleanup_rpc_backend cleanup_database # Clean out data and status -sudo rm -rf $DATA_DIR $DEST/status +sudo rm -rf $DATA_DIR $DEST/status $DEST/async # Clean out the log file and log directories if [[ -n "$LOGFILE" ]] && [[ -f "$LOGFILE" ]]; then diff --git a/extras.d/80-tempest.sh b/extras.d/80-tempest.sh index 15ecfe39eb..06c73ec763 100644 --- a/extras.d/80-tempest.sh +++ b/extras.d/80-tempest.sh @@ -6,7 +6,7 @@ if is_service_enabled tempest; then source $TOP_DIR/lib/tempest elif [[ "$1" == "stack" && "$2" == "install" ]]; then echo_summary "Installing Tempest" - install_tempest + async_runfunc install_tempest elif [[ "$1" == "stack" && "$2" == "post-config" ]]; then # Tempest config must come after layer 2 services are running : @@ -17,6 +17,7 @@ if is_service_enabled tempest; then # local.conf Tempest option overrides : elif [[ "$1" == "stack" && "$2" == "test-config" ]]; then + async_wait install_tempest echo_summary "Initializing Tempest" configure_tempest echo_summary "Installing Tempest Plugins" diff --git a/functions b/functions index fc87a5512d..89bbab2085 100644 --- a/functions +++ b/functions @@ -21,6 +21,7 @@ source ${FUNC_DIR}/inc/ini-config source ${FUNC_DIR}/inc/meta-config source ${FUNC_DIR}/inc/python source ${FUNC_DIR}/inc/rootwrap +source ${FUNC_DIR}/inc/async # Save trace setting _XTRACE_FUNCTIONS=$(set +o | grep xtrace) diff --git a/inc/async b/inc/async new file mode 100644 index 0000000000..d29168f2f5 --- /dev/null +++ b/inc/async @@ -0,0 +1,225 @@ +#!/bin/bash +# +# Symbolic asynchronous tasks for devstack +# +# Usage: +# +# async_runfunc my_shell_func foo bar baz +# +# ... do other stuff ... +# +# async_wait my_shell_func +# + +DEVSTACK_PARALLEL=$(trueorfalse False DEVSTACK_PARALLEL) +_ASYNC_BG_TIME=0 + +# Keep track of how much total time was spent in background tasks +# Takes a job runtime in ms. +function _async_incr_bg_time { + local elapsed_ms="$1" + _ASYNC_BG_TIME=$(($_ASYNC_BG_TIME + $elapsed_ms)) +} + +# Get the PID of a named future to wait on +function async_pidof { + local name="$1" + local inifile="${DEST}/async/${name}.ini" + + if [ -f "$inifile" ]; then + iniget $inifile job pid + else + echo 'UNKNOWN' + return 1 + fi +} + +# Log a message about a job. If the message contains "%command" then the +# full command line of the job will be substituted in the output +function async_log { + local name="$1" + shift + local message="$*" + local inifile=${DEST}/async/${name}.ini + local pid + local command + + pid=$(iniget $inifile job pid) + command=$(iniget $inifile job command | tr '#' '-') + message=$(echo "$message" | sed "s#%command#$command#g") + + echo "[Async ${name}:${pid}]: $message" +} + +# Inner function that actually runs the requested task. We wrap it like this +# just so we can emit a finish message as soon as the work is done, to make +# it easier to find the tracking just before an error. +function async_inner { + local name="$1" + local rc + shift + set -o xtrace + if $* >${DEST}/async/${name}.log 2>&1; then + rc=0 + set +o xtrace + async_log "$name" "finished successfully" + else + rc=$? + set +o xtrace + async_log "$name" "FAILED with rc $rc" + fi + iniset ${DEST}/async/${name}.ini job end_time $(date "+%s%3N") + return $rc +} + +# Run something async. Takes a symbolic name and a list of arguments of +# what to run. Ideally this would be rarely used and async_runfunc() would +# be used everywhere for readability. +# +# This spawns the work in a background worker, records a "future" to be +# collected by a later call to async_wait() +function async_run { + local xtrace + xtrace=$(set +o | grep xtrace) + set +o xtrace + + local name="$1" + shift + local inifile=${DEST}/async/${name}.ini + + touch $inifile + iniset $inifile job command "$*" + iniset $inifile job start_time $(date +%s%3N) + + if [[ "$DEVSTACK_PARALLEL" = "True" ]]; then + async_inner $name $* & + iniset $inifile job pid $! + async_log "$name" "running: %command" + $xtrace + else + iniset $inifile job pid "self" + async_log "$name" "Running synchronously: %command" + $xtrace + $* + return $? + fi +} + +# Shortcut for running a shell function async. Uses the function name as the +# async name. +function async_runfunc { + async_run $1 $* +} + +# Wait for an async future to complete. May return immediately if already +# complete, or of the future has already been waited on (avoid this). May +# block until the future completes. +function async_wait { + local xtrace + xtrace=$(set +o | grep xtrace) + set +o xtrace + + local pid rc running inifile runtime + rc=0 + for name in $*; do + running=$(ls ${DEST}/async/*.ini 2>/dev/null | wc -l) + inifile="${DEST}/async/${name}.ini" + + if pid=$(async_pidof "$name"); then + async_log "$name" "Waiting for completion of %command" \ + "($running other jobs running)" + time_start async_wait + if [[ "$pid" != "self" ]]; then + # Do not actually call wait if we ran synchronously + if wait $pid; then + rc=0 + else + rc=$? + fi + cat ${DEST}/async/${name}.log + fi + time_stop async_wait + local start_time + local end_time + start_time=$(iniget $inifile job start_time) + end_time=$(iniget $inifile job end_time) + _async_incr_bg_time $(($end_time - $start_time)) + runtime=$((($end_time - $start_time) / 1000)) + async_log "$name" "finished %command with result" \ + "$rc in $runtime seconds" + rm -f $inifile + if [ $rc -ne 0 ]; then + echo Stopping async wait due to error: $* + break + fi + else + # This could probably be removed - it is really just here + # to help notice if you wait for something by the wrong + # name, but it also shows up for things we didn't start + # because they were not enabled. + echo Not waiting for async task $name that we never started or \ + has already been waited for + fi + done + + $xtrace + return $rc +} + +# Check for uncollected futures and wait on them +function async_cleanup { + local name + + if [[ "$DEVSTACK_PARALLEL" != "True" ]]; then + return 0 + fi + + for inifile in $(find ${DEST}/async -name '*.ini'); do + name=$(basename $pidfile .ini) + echo "WARNING: uncollected async future $name" + async_wait $name || true + done +} + +# Make sure our async dir is created and clean +function async_init { + local async_dir=${DEST}/async + + # Clean any residue if present from previous runs + rm -Rf $async_dir + + # Make sure we have a state directory + mkdir -p $async_dir +} + +function async_print_timing { + local bg_time_minus_wait + local elapsed_time + local serial_time + local speedup + + if [[ "$DEVSTACK_PARALLEL" != "True" ]]; then + return 0 + fi + + # The logic here is: All the background task time would be + # serialized if we did not do them in the background. So we can + # add that to the elapsed time for the whole run. However, time we + # spend waiting for async things to finish adds to the elapsed + # time, but is time where we're not doing anything useful. Thus, + # we substract that from the would-be-serialized time. + + bg_time_minus_wait=$((\ + ($_ASYNC_BG_TIME - ${_TIME_TOTAL[async_wait]}) / 1000)) + elapsed_time=$(($(date "+%s") - $_TIME_BEGIN)) + serial_time=$(($elapsed_time + $bg_time_minus_wait)) + + echo + echo "=================" + echo " Async summary" + echo "=================" + echo " Time spent in the background minus waits: $bg_time_minus_wait sec" + echo " Elapsed time: $elapsed_time sec" + echo " Time if we did everything serially: $serial_time sec" + echo " Speedup: " $(echo | awk "{print $serial_time / $elapsed_time}") +} diff --git a/lib/keystone b/lib/keystone index d4c7b063bb..66e867ca68 100644 --- a/lib/keystone +++ b/lib/keystone @@ -318,25 +318,25 @@ function create_keystone_accounts { local admin_role="admin" local member_role="member" - get_or_add_user_domain_role $admin_role $admin_user default + async_run ks-domain-role get_or_add_user_domain_role $admin_role $admin_user default # Create service project/role get_or_create_domain "$SERVICE_DOMAIN_NAME" - get_or_create_project "$SERVICE_PROJECT_NAME" "$SERVICE_DOMAIN_NAME" + async_run ks-project get_or_create_project "$SERVICE_PROJECT_NAME" "$SERVICE_DOMAIN_NAME" # Service role, so service users do not have to be admins - get_or_create_role service + async_run ks-service get_or_create_role service # The ResellerAdmin role is used by Nova and Ceilometer so we need to keep it. # The admin role in swift allows a user to act as an admin for their project, # but ResellerAdmin is needed for a user to act as any project. The name of this # role is also configurable in swift-proxy.conf - get_or_create_role ResellerAdmin + async_run ks-reseller get_or_create_role ResellerAdmin # another_role demonstrates that an arbitrary role may be created and used # TODO(sleepsonthefloor): show how this can be used for rbac in the future! local another_role="anotherrole" - get_or_create_role $another_role + async_run ks-anotherrole get_or_create_role $another_role # invisible project - admin can't see this one local invis_project @@ -349,10 +349,12 @@ function create_keystone_accounts { demo_user=$(get_or_create_user "demo" \ "$ADMIN_PASSWORD" "default" "demo@example.com") - get_or_add_user_project_role $member_role $demo_user $demo_project - get_or_add_user_project_role $admin_role $admin_user $demo_project - get_or_add_user_project_role $another_role $demo_user $demo_project - get_or_add_user_project_role $member_role $demo_user $invis_project + async_wait ks-{domain-role,domain,project,service,reseller,anotherrole} + + async_run ks-demo-member get_or_add_user_project_role $member_role $demo_user $demo_project + async_run ks-demo-admin get_or_add_user_project_role $admin_role $admin_user $demo_project + async_run ks-demo-another get_or_add_user_project_role $another_role $demo_user $demo_project + async_run ks-demo-invis get_or_add_user_project_role $member_role $demo_user $invis_project # alt_demo local alt_demo_project @@ -361,9 +363,9 @@ function create_keystone_accounts { alt_demo_user=$(get_or_create_user "alt_demo" \ "$ADMIN_PASSWORD" "default" "alt_demo@example.com") - get_or_add_user_project_role $member_role $alt_demo_user $alt_demo_project - get_or_add_user_project_role $admin_role $admin_user $alt_demo_project - get_or_add_user_project_role $another_role $alt_demo_user $alt_demo_project + async_run ks-alt-member get_or_add_user_project_role $member_role $alt_demo_user $alt_demo_project + async_run ks-alt-admin get_or_add_user_project_role $admin_role $admin_user $alt_demo_project + async_run ks-alt-another get_or_add_user_project_role $another_role $alt_demo_user $alt_demo_project # groups local admin_group @@ -373,11 +375,15 @@ function create_keystone_accounts { non_admin_group=$(get_or_create_group "nonadmins" \ "default" "non-admin group") - get_or_add_group_project_role $member_role $non_admin_group $demo_project - get_or_add_group_project_role $another_role $non_admin_group $demo_project - get_or_add_group_project_role $member_role $non_admin_group $alt_demo_project - get_or_add_group_project_role $another_role $non_admin_group $alt_demo_project - get_or_add_group_project_role $admin_role $admin_group $admin_project + async_run ks-group-memberdemo get_or_add_group_project_role $member_role $non_admin_group $demo_project + async_run ks-group-anotherdemo get_or_add_group_project_role $another_role $non_admin_group $demo_project + async_run ks-group-memberalt get_or_add_group_project_role $member_role $non_admin_group $alt_demo_project + async_run ks-group-anotheralt get_or_add_group_project_role $another_role $non_admin_group $alt_demo_project + async_run ks-group-admin get_or_add_group_project_role $admin_role $admin_group $admin_project + + async_wait ks-demo-{member,admin,another,invis} + async_wait ks-alt-{member,admin,another} + async_wait ks-group-{memberdemo,anotherdemo,memberalt,anotheralt,admin} if is_service_enabled ldap; then create_ldap_domain diff --git a/lib/nova b/lib/nova index d7426039c4..0a28cd97aa 100644 --- a/lib/nova +++ b/lib/nova @@ -741,31 +741,36 @@ function create_nova_keys_dir { sudo install -d -o $STACK_USER ${NOVA_STATE_PATH} ${NOVA_STATE_PATH}/keys } +function init_nova_db { + local dbname="$1" + local conffile="$2" + recreate_database $dbname + $NOVA_BIN_DIR/nova-manage --config-file $conffile db sync --local_cell +} + # init_nova() - Initialize databases, etc. function init_nova { # All nova components talk to a central database. # Only do this step once on the API node for an entire cluster. if is_service_enabled $DATABASE_BACKENDS && is_service_enabled n-api; then + # (Re)create nova databases + async_run nova-cell-0 init_nova_db nova_cell0 $NOVA_CONF + for i in $(seq 1 $NOVA_NUM_CELLS); do + async_run nova-cell-$i init_nova_db nova_cell${i} $(conductor_conf $i) + done + recreate_database $NOVA_API_DB $NOVA_BIN_DIR/nova-manage --config-file $NOVA_CONF api_db sync - recreate_database nova_cell0 - # map_cell0 will create the cell mapping record in the nova_api DB so - # this needs to come after the api_db sync happens. We also want to run - # this before the db sync below since that will migrate both the nova - # and nova_cell0 databases. + # this needs to come after the api_db sync happens. $NOVA_BIN_DIR/nova-manage cell_v2 map_cell0 --database_connection `database_connection_url nova_cell0` - # (Re)create nova databases - for i in $(seq 1 $NOVA_NUM_CELLS); do - recreate_database nova_cell${i} - $NOVA_BIN_DIR/nova-manage --config-file $(conductor_conf $i) db sync --local_cell + # Wait for DBs to finish from above + for i in $(seq 0 $NOVA_NUM_CELLS); do + async_wait nova-cell-$i done - # Migrate nova and nova_cell0 databases. - $NOVA_BIN_DIR/nova-manage --config-file $NOVA_CONF db sync - # Run online migrations on the new databases # Needed for flavor conversion $NOVA_BIN_DIR/nova-manage --config-file $NOVA_CONF db online_data_migrations diff --git a/stack.sh b/stack.sh index 036afd7b00..dcfef6db48 100755 --- a/stack.sh +++ b/stack.sh @@ -330,6 +330,9 @@ if [[ ! -d $DATA_DIR ]]; then safe_chmod 0755 $DATA_DIR fi +# Create and/or clean the async state directory +async_init + # Configure proper hostname # Certain services such as rabbitmq require that the local hostname resolves # correctly. Make sure it exists in /etc/hosts so that is always true. @@ -1082,19 +1085,19 @@ if is_service_enabled keystone; then create_keystone_accounts if is_service_enabled nova; then - create_nova_accounts + async_runfunc create_nova_accounts fi if is_service_enabled glance; then - create_glance_accounts + async_runfunc create_glance_accounts fi if is_service_enabled cinder; then - create_cinder_accounts + async_runfunc create_cinder_accounts fi if is_service_enabled neutron; then - create_neutron_accounts + async_runfunc create_neutron_accounts fi if is_service_enabled swift; then - create_swift_accounts + async_runfunc create_swift_accounts fi fi @@ -1107,9 +1110,11 @@ write_clouds_yaml if is_service_enabled horizon; then echo_summary "Configuring Horizon" - configure_horizon + async_runfunc configure_horizon fi +async_wait create_nova_accounts create_glance_accounts create_cinder_accounts +async_wait create_neutron_accounts create_swift_accounts configure_horizon # Glance # ------ @@ -1117,7 +1122,7 @@ fi # NOTE(yoctozepto): limited to node hosting the database which is the controller if is_service_enabled $DATABASE_BACKENDS && is_service_enabled glance; then echo_summary "Configuring Glance" - init_glance + async_runfunc init_glance fi @@ -1131,7 +1136,7 @@ if is_service_enabled neutron; then # Run init_neutron only on the node hosting the Neutron API server if is_service_enabled $DATABASE_BACKENDS && is_service_enabled neutron; then - init_neutron + async_runfunc init_neutron fi fi @@ -1161,7 +1166,7 @@ fi if is_service_enabled swift; then echo_summary "Configuring Swift" - init_swift + async_runfunc init_swift fi @@ -1170,7 +1175,7 @@ fi if is_service_enabled cinder; then echo_summary "Configuring Cinder" - init_cinder + async_runfunc init_cinder fi # Placement Service @@ -1178,9 +1183,16 @@ fi if is_service_enabled placement; then echo_summary "Configuring placement" - init_placement + async_runfunc init_placement fi +# Wait for neutron and placement before starting nova +async_wait init_neutron +async_wait init_placement +async_wait init_glance +async_wait init_swift +async_wait init_cinder + # Compute Service # --------------- @@ -1192,7 +1204,7 @@ if is_service_enabled nova; then # TODO(stephenfin): Is it possible for neutron to *not* be enabled now? If # not, remove the if here if is_service_enabled neutron; then - configure_neutron_nova + async_runfunc configure_neutron_nova fi fi @@ -1236,6 +1248,8 @@ if is_service_enabled cinder; then iniset $CINDER_CONF key_manager fixed_key "$FIXED_KEY" fi +async_wait configure_neutron_nova + # Launch the nova-api and wait for it to answer before continuing if is_service_enabled n-api; then echo_summary "Starting Nova API" @@ -1282,7 +1296,7 @@ fi if is_service_enabled nova; then echo_summary "Starting Nova" start_nova - create_flavors + async_runfunc create_flavors fi if is_service_enabled cinder; then echo_summary "Starting Cinder" @@ -1331,6 +1345,8 @@ if is_service_enabled horizon; then start_horizon fi +async_wait create_flavors + # Create account rc files # ======================= @@ -1467,8 +1483,12 @@ else exec 1>&3 fi +# Make sure we didn't leak any background tasks +async_cleanup + # Dump out the time totals time_totals +async_print_timing # Using the cloud # =============== diff --git a/unstack.sh b/unstack.sh index 3197cf136f..d9dca7c107 100755 --- a/unstack.sh +++ b/unstack.sh @@ -184,3 +184,4 @@ if is_service_enabled cinder && is_package_installed lvm2; then fi clean_pyc_files +rm -Rf $DEST/async