From fe5793b71d8914d1ce088c9b48ca7f8496bd67b1 Mon Sep 17 00:00:00 2001 From: Davlet Panech Date: Mon, 29 May 2023 19:34:15 -0400 Subject: [PATCH] archive-dir: binary search + parallelism Performance enhancements for archive-dir: * While searching for old checksums, use BSD look [1] (binary search), rather than grep (linear). This requires a docker image with that utility installed. A Dockerfile is included and is meant to be built and pushed to Docker Hub manually as needed. Image name: starlings/jenkins-pipelines-coreutils:TIMESTAMP . * Process all files in parallel. Previously we only calculated checksums in parallel. Timings before & after the patch, using a build with ~100K files and ~300K old checksums (docker + aptly + mirrors): * before patch with JOBS=4: 2 hrs 7 min * this patch with JOBS=4: 26 min * this patch with JOBS=1: 1hr 10 min [1] https://man.openbsd.org/look.1 TESTS ======================= Run "archive-misc" and make sure it copies/links the same files as before the patch. Story: 2010226 Task: 48184 Signed-off-by: Davlet Panech Change-Id: I2ad271be673e8499c17a87e9d52864b40e217fc7 --- dockerfiles/coreutils/.dockerignore | 1 + dockerfiles/coreutils/Dockerfile | 8 + dockerfiles/coreutils/build.sh | 8 + dockerfiles/coreutils/push.sh | 8 + scripts/archive-misc.sh | 46 +-- scripts/clean-build.sh | 3 +- scripts/helpers/archive-dir.sh | 467 ++++++++++++++++++---------- scripts/lib/job_utils.sh | 2 +- 8 files changed, 363 insertions(+), 180 deletions(-) create mode 100644 dockerfiles/coreutils/.dockerignore create mode 100644 dockerfiles/coreutils/Dockerfile create mode 100755 dockerfiles/coreutils/build.sh create mode 100755 dockerfiles/coreutils/push.sh diff --git a/dockerfiles/coreutils/.dockerignore b/dockerfiles/coreutils/.dockerignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/dockerfiles/coreutils/.dockerignore @@ -0,0 +1 @@ +* diff --git a/dockerfiles/coreutils/Dockerfile b/dockerfiles/coreutils/Dockerfile new file mode 100644 index 0000000..375e1e6 --- /dev/null +++ b/dockerfiles/coreutils/Dockerfile @@ -0,0 +1,8 @@ +FROM debian:11 + +RUN apt-get update -y && \ + apt-get upgrade -y && \ + apt-get install -y bsdextrautils parallel && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + diff --git a/dockerfiles/coreutils/build.sh b/dockerfiles/coreutils/build.sh new file mode 100755 index 0000000..46b0dac --- /dev/null +++ b/dockerfiles/coreutils/build.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +CONTEXT_DIR="$(dirname "$0")" +IMAGE="$(source "$CONTEXT_DIR/../../scripts/lib/job_utils.sh" && echo "$COREUTILS_DOCKER_IMG")" + +docker build -t "$IMAGE" "$CONTEXT_DIR" diff --git a/dockerfiles/coreutils/push.sh b/dockerfiles/coreutils/push.sh new file mode 100755 index 0000000..55eb997 --- /dev/null +++ b/dockerfiles/coreutils/push.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +CONTEXT_DIR="$(dirname "$0")" +IMAGE="$(source "$CONTEXT_DIR/../../scripts/lib/job_utils.sh" && echo "$COREUTILS_DOCKER_IMG")" + +docker push "$IMAGE" diff --git a/scripts/archive-misc.sh b/scripts/archive-misc.sh index 10cb41c..abfa6f5 100755 --- a/scripts/archive-misc.sh +++ b/scripts/archive-misc.sh @@ -19,27 +19,8 @@ source "$THIS_DIR"/lib/publish_utils.sh load_build_env -notice "archiving misc files" - #VERBOSE_ARG="--verbose" -exclude_args=() -exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside -exclude_args+=(--exclude "/aptly") # see below -exclude_args+=(--exclude "/mirrors") # see below -exclude_args+=(--exclude "/docker") # see below -exclude_args+=(--exclude "/workspace") # symlink -exclude_args+=(--exclude "/repo") # symlink -exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files -exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files -exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh - -mkdir -p "$BUILD_OUTPUT_HOME" -safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \ - "${exclude_args[@]}" \ - "$BUILD_HOME/" "$BUILD_OUTPUT_HOME/" - - print_regfile_name_if_exists() { if [[ -f "$1" ]] ; then echo "$1" @@ -126,6 +107,7 @@ do_archive_dir() { safe_rm "$BUILD_OUTPUT_HOME/$dir" fi tmp_dir="$BUILD_HOME/tmp/archive-misc" + rm -rf "$tmp_dir/$id" mkdir -p "$tmp_dir/$id" cp -a "$THIS_DIR/helpers/archive-dir.sh" "$tmp_dir/" local archive_args=() @@ -139,6 +121,9 @@ do_archive_dir() { print_regfile_name_if_exists "$extra_checksums_file" done >>"$old_checksums_file_list" fi + if $SHELL_XTRACE ; then + archive_args+=("--xtrace") + fi #local egid #egid=$(id -g) @@ -149,7 +134,7 @@ do_archive_dir() { maybe_run mkdir -p "$dst_dir" safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.sh" \ "${archive_args[@]}" \ - -j ${BUILD_PACKAGES_PARALLEL_JOBS:-1} \ + -j ${PARALLEL_CMD_JOBS:-1} \ --output-checksums "$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \ "$src_dir" \ "$dst_dir" \ @@ -163,6 +148,27 @@ do_archive_dir() { esac } +mkdir -p "$BUILD_OUTPUT_HOME" + +# Straight copy the other files +notice "archiving misc files" +exclude_args=() +exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside +exclude_args+=(--exclude "/aptly") # see below +exclude_args+=(--exclude "/mirrors") # see below +exclude_args+=(--exclude "/docker") # see below +exclude_args+=(--exclude "/workspace") # symlink +exclude_args+=(--exclude "/repo") # symlink +exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files +exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files +exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh +exclude_args+=(--exclude "/tmp/*") # some of the files here are quite large, exclude + +safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \ + "${exclude_args[@]}" \ + "$BUILD_HOME/" "$BUILD_OUTPUT_HOME/" + +# Link or copy big directories do_archive_dir "mirrors" do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/$CHECKSUMS_FILENAME" do_archive_dir "docker" diff --git a/scripts/clean-build.sh b/scripts/clean-build.sh index 3883715..016afaf 100755 --- a/scripts/clean-build.sh +++ b/scripts/clean-build.sh @@ -138,7 +138,8 @@ misc_rm=( "$BUILD_HOME"/workspace/std/build-wheels* \ "$BUILD_HOME"/workspace/std/build-helm \ "$BUILD_HOME"/workspace/"export" \ - "$BUILD_HOME"/workspace/helm-charts + "$BUILD_HOME"/workspace/helm-charts \ + "$BUILD_HOME"/tmp \ ) rm_args=() for path in "${misc_rm[@]}" ; do diff --git a/scripts/helpers/archive-dir.sh b/scripts/helpers/archive-dir.sh index 577f57b..2b84263 100755 --- a/scripts/helpers/archive-dir.sh +++ b/scripts/helpers/archive-dir.sh @@ -8,6 +8,7 @@ DST_CHECKSUMS_FILE= CHANGE_OWNER= CHANGE_GROUP= JOBS=1 +XTRACE=0 usage() { echo -n "\ @@ -30,6 +31,8 @@ Archive SRC_DIR in DST_DIR, using TMP_DIR for temporary files. We will use the files with matching properties & checksums to create hard links in DST_DIR. + --xtrace Enable debug output + If executed by root, we will preserve owners/groups of the copied files, unless they are overridden on the command line. @@ -58,7 +61,7 @@ check_pipe_status() { } # Process command line -temp=$(getopt -o h,j: --long help,jobs:,owner:,group:,output-checksums:,checksum-hardlink: -n "$PROGNAME" -- "$@") || cmdline_error +temp=$(getopt -o h,j: --long help,jobs:,owner:,group:,output-checksums:,checksum-hardlink:,xtrace -n "$PROGNAME" -- "$@") || cmdline_error eval set -- "$temp" while [[ "$#" -gt 0 ]] ; do case "$1" in @@ -89,6 +92,10 @@ while [[ "$#" -gt 0 ]] ; do DST_CHECKSUMS_FILE="$2" shift 2 ;; + --xtrace) + XTRACE=1 + shift + ;; --) shift break @@ -108,6 +115,23 @@ if [[ ! "$EGID" ]] ; then EGID="$(id -g)" || exit 1 fi +if [[ $XTRACE -eq 1 ]] ; then + set -x +fi + +# Make sure BSD look is installed +if ! look --help >/dev/null ; then + echo "This script requires \"look\" to be installed" >&2 + exit 1 +fi + +# Check for GNU parallel +if parallel --help >/dev/null 2>&1 ; then + GNU_PARALLEL_EXISTS=1 +else + GNU_PARALLEL_EXISTS=0 +fi + set -e # @@ -138,46 +162,82 @@ fi # Cretate a list file with each source file or dir + their stat properties echo $'\n## Compiling file list: '"$SRC_DIR" >&2 full_list_file="$TMP_DIR/full.list" -( cd "$SRC_DIR" && find -printf 'type=%y owner=%U group=%G mode=%#m size=%s mtime=%T@ checksum= name=%p\n' ) \ +( cd "$SRC_DIR" && find -printf 'type=%y owner=%U group=%G mode=%#m size=%s mtime=%T@ name=%p\n' ) \ | sed 's#name=[.]/#name=#' \ | sed 's#\(mtime=[0-9]\+\)[.][0-9]\+#\1#g' \ >"${full_list_file}" check_pipe_status -# Create another list file that contains only regular files, and fill in the -# "checksum=" field. -# Use "flock" when printing in xarg's sub-jobs, to avoid interleaved output. -echo $'\n## Calculating checksums: '"$SRC_DIR" >&2 +# Create another list file that contains only regular files regfile_list_file="$TMP_DIR/regfile.list" -if [[ "$JOBS" -eq 1 ]] ; then - let xargs_max_args=256 -else - let xargs_max_args="8" # calculate checksums in chunks of 8 files in parallel -fi -export SRC_DIR -\grep '^type=f' "$full_list_file" | xargs -r -d '\n' -n $xargs_max_args --process-slot-var=OUT_SUFFIX -P $JOBS bash -c ' - for line in "$@" ; do - name="${line##*name=}" - flock -s "$SRC_DIR" echo " SHA256 $name" >&2 - checksum="$(sha256sum "$SRC_DIR/$name" | awk "{print \$1}")" - [[ -n "$checksum" ]] || exit 1 - output_line="${line/ checksum= / checksum=$checksum }" - flock -s "$SRC_DIR" echo "$output_line" - done -' unused_arg | sort -k 8 >"$regfile_list_file" || exit 1 # sort by the last field "name=..." -[[ "${PIPESTATUS[1]}" -eq 0 ]] || exit 1 +\grep '^type=f' "$full_list_file" | sort -k 7 >"$regfile_list_file" || exit 1 # Create a list file that contains only directories # Sort by the last field "name=..." dir_list_file="$TMP_DIR/dir.list" -\grep '^type=d' "$full_list_file" | sort -k 8 >"$dir_list_file" +\grep '^type=d' "$full_list_file" | sort -k 7 >"$dir_list_file" || exit 1 # Create a list file that contains all other entries (non-dirs & non-files) other_list_file="$TMP_DIR/other.list" -\grep '^type=[^df]' "$full_list_file" | sort -k 8 >"$other_list_file" +\grep '^type=[^df]' "$full_list_file" | sort -k 7 >"$other_list_file" || exit 1 + # -# create directories +# Usage: process_lines MESSAGE INPUT_FILE FUNC ARGS... +# +# Call shell function FUNC in parallel, similar to xargs. +# We will read lines from INPUT_FILE, then pass some subset of lines +# to FUNC many times in parallel, until all lines have been processed. +# Input lines will be appended as additional arguments to FUNC calls. +# +# FUNC and any global vars it references must be exported before +# calling process_lines(). +# +# MESSAGE will be printed to STDERR before starting +# +process_lines() { + + local message="$1" ; shift + local input_file="$1" ; shift + + # how many input lines? bail out if 0 + local line_count + line_count="$(cat "$input_file" | wc -l)" || exit 1 + [[ "$line_count" -gt 0 ]] || return 0 + + # How many lines to process at a time. The more the better, but with too + # many some child jobs may starve -- cap it at 256 + local lines_per_job + if [[ "$JOBS" -gt 1 ]] ; then + let lines_per_job="line_count / JOBS / 2" + if [[ "$lines_per_job" -eq 0 ]] ; then + lines_per_job=1 + elif [[ "$lines_per_job" -gt 256 ]] ; then + lines_per_job=256 + fi + else + lines_per_job=256 + fi + + echo "** $message [JOBS=$JOBS lines_per_job=$lines_per_job]" >&2 + + # Prefer GNU parallel because it can exit early + local -a cmd + if [[ $GNU_PARALLEL_EXISTS -eq 1 ]] ; then + cmd=(parallel --halt now,fail=1 -q -r -d '\n' -n $lines_per_job -P $JOBS "$@") + else + cmd=(xargs -r -d '\n' -n $lines_per_job -P $JOBS $SHELL -c '"$@"' unused_arg "$@") + fi + if ! "${cmd[@]}" <"$input_file" ; then + echo "ERROR: command failed (\"$message\")" >&2 + return 1 + fi +} + + +# +# create directories in sort order, ie create parents before +# children # echo $'\n## Creating directories: '"$DST_DIR" >&2 while read -r line ; do @@ -202,158 +262,249 @@ while read -r line ; do \rm "$DST_DIR/$name" || exit 1 fi install -d "${install_args[@]}" "$DST_DIR/$name" -done <"$dir_list_file" +done <"$dir_list_file" || exit 1 # # Copy or hardlink regular files # echo $'\n## Copying regular files: '"$SRC_DIR" >&2 -if [[ "$DST_CHECKSUMS_FILE" ]] ; then - DST_CHECKSUMS_FD=5 - exec 5<>"$DST_CHECKSUMS_FILE" || exit 1 -else - DST_CHECKSUMS_FD=1 -fi -# read the list of regular files -while read -r line ; do - [[ -n "$line" ]] || continue - # source file name relative to SRC_DIR - name="${line##*name=}" - - # source checksum - checksum="$(echo "$line" | sed -n -r 's#.* checksum=([^[:space:]]+).*#\1#p')" - [[ -n "$name" && -n "$checksum" ]] || continue - - # source owner; or a user-provided override - install_args=() - if [[ "$CHANGE_OWNER" ]] ; then - owner="$CHANGE_OWNER" - install_args+=("--owner" "$owner") - elif [[ $EUID -eq 0 ]] ; then - owner="$(echo "$line" | sed -n -r 's#.* owner=([0-9]+).*#\1#p')" - install_args+=("--owner" "$owner") - else - owner=$EUID +# helper function to process regular files +# global vars used: +# SRC_DIR +# DST_DIR +# CHANGE_OWNER +# CHANGE_GROUP +# EUID (always definedby bash) +# EGID +# TMP_DIR +# XTRACE +# combined_checksums_file +process_regfiles() { + if [[ $XTRACE -eq 1 ]] ; then + set -x fi - # source group; or a user-provided override - if [[ "$CHANGE_GROUP" ]] ; then - group="$CHANGE_GROUP" - install_args+=("--group" "$group") - elif [[ $EGID -eq 0 ]] ; then - group="$(echo "$line" | sed -n -r 's#.* group=([0-9]+).*#\1#p')" - install_args+=("--group" "$group") - else - group=$EGID - fi + # Temp file generated by this function. Its name must be unique to + # prevent interference from other jobs with -j N. + local matching_checksums_file + matching_checksums_file="$TMP_DIR/matching_checksums-$$.list" - # source file's mode/permissions - mode="$(echo "$line" | sed -n -r 's#.* mode=([^[:space:]]+).*#\1#p')" + local line + for line in "$@" ; do - # Search for the checksum in an older StxChecksums file - if [[ "$CHECKSUM_FILES_LIST_FILE" ]] ; then - matching_checksums_file="$TMP_DIR/matching_checksums.list" - if \grep "^$checksum " "$combined_checksums_file" >"$matching_checksums_file" ; then - ( - # As we read previosuly-archived files properties from StxChecksums, - # make sure they have not changed compared to the actual files on disk. - while read -r ref_checksum ref_name ref_size ref_mtime ref_dev ref_inode ref_path x_rest ; do - [[ -f "$ref_path" ]] || continue - # read on-disk file properties - ref_stat=($(stat -c '%s %Y %u %g %#04a' "$ref_path" || true)) - [[ "${#ref_stat[@]}" -eq 5 ]] || continue + # source file name relative to SRC_DIR + local name + name="${line##*name=}" + [[ "$name" ]] || continue - # on-disk size does not match StxChecksums - ref_ondisk_size="${ref_stat[0]}" - [[ "$ref_size" == "$ref_ondisk_size" ]] || continue - - # on-disk mtime does not match StxChecksums - ref_ondisk_mtime="${ref_stat[1]}" - [[ "${ref_mtime}" == "$ref_ondisk_mtime" ]] || continue - - # on-disk owner does not match requested owner - ref_ondisk_owner="${ref_stat[2]}" - [[ "${owner}" == "$ref_ondisk_owner" ]] || continue - - # on-disk group does not match requested group - ref_ondisk_group="${ref_stat[3]}" - [[ "${group}" == "$ref_ondisk_group" ]] || continue - - # on-disk mode does not match the mode of the source file - ref_ondisk_mode="${ref_stat[4]}" - [[ "${mode}" == "$ref_ondisk_mode" ]] || continue - - # At this point checksum, size, mtime, mode, owner, group and checksums of the - # exsiting file match with the file we are trying to copy. - # Use that file to create a hardlink. - echo " LINK $name (from $ref_name)" >&2 - if ln -f "$ref_name" "${DST_DIR}/$name" ; then - echo "$checksum $name $ref_size $ref_mtime $ref_dev $ref_inode $DST_DIR/$name" - exit 0 - fi - done <"$matching_checksums_file" - # checksum not found in older archives - exit 1 - ) && continue || true + # source checksum + local checksum + #flock -s "$DST_DIR" echo " SHA256 $name" >&2 + checksum="$(sha256sum "$SRC_DIR/$name" | awk '{print $1}')" + if [[ ! "$checksum" ]] ; then + flock -s "$DST_DIR" echo "$SRC_DIR/$name: failed to calculate checksum" >&2 + return 1 fi + + # source owner; or a user-provided override + local -a install_args=() + local owner + if [[ "$CHANGE_OWNER" ]] ; then + owner="$CHANGE_OWNER" + install_args+=("--owner" "$owner") + elif [[ $EUID -eq 0 ]] ; then + owner="$(echo "$line" | sed -n -r 's#.* owner=([0-9]+).*#\1#p')" + install_args+=("--owner" "$owner") + else + owner=$EUID + fi + + # source group; or a user-provided override + local group + if [[ "$CHANGE_GROUP" ]] ; then + group="$CHANGE_GROUP" + install_args+=("--group" "$group") + elif [[ $EGID -eq 0 ]] ; then + group="$(echo "$line" | sed -n -r 's#.* group=([0-9]+).*#\1#p')" + install_args+=("--group" "$group") + else + group=$EGID + fi + + # source file's mode/permissions + local mode + mode="$(echo "$line" | sed -n -r 's#.* mode=([^[:space:]]+).*#\1#p')" + + # Search for the checksum in an older StxChecksums file + if [[ "$combined_checksums_file" ]] ; then + if look "$checksum " "$combined_checksums_file" >"$matching_checksums_file" ; then + ( + # As we read previosuly-archived files properties from StxChecksums, + # make sure they have not changed compared to the actual files on disk. + while read -r ref_checksum ref_name ref_size ref_mtime ref_dev ref_inode ref_path x_rest ; do + [[ -f "$ref_path" ]] || continue + # read on-disk file properties + local ref_stat + ref_stat=($(stat -c '%s %Y %u %g %#04a' "$ref_path" || true)) + [[ "${#ref_stat[@]}" -eq 5 ]] || continue + + # on-disk size does not match StxChecksums + local ref_ondisk_size + ref_ondisk_size="${ref_stat[0]}" + [[ "$ref_size" == "$ref_ondisk_size" ]] || continue + + # on-disk mtime does not match StxChecksums + local ref_ondisk_mtime + ref_ondisk_mtime="${ref_stat[1]}" + [[ "${ref_mtime}" == "$ref_ondisk_mtime" ]] || continue + + # on-disk owner does not match requested owner + local ref_ondisk_owner + ref_ondisk_owner="${ref_stat[2]}" + [[ "${owner}" == "$ref_ondisk_owner" ]] || continue + + # on-disk group does not match requested group + local ref_ondisk_group + ref_ondisk_group="${ref_stat[3]}" + [[ "${group}" == "$ref_ondisk_group" ]] || continue + + # on-disk mode does not match the mode of the source file + ref_ondisk_mode="${ref_stat[4]}" + [[ "${mode}" == "$ref_ondisk_mode" ]] || continue + + # At this point checksum, size, mtime, mode, owner, group and checksums of the + # exsiting file match with the file we are trying to copy. + # Use that file to create a hardlink. + flock -s "$DST_DIR" echo " LINK $name (from $ref_name)" >&2 + if ln -f "$ref_name" "${DST_DIR}/$name" ; then + flock -s "$DST_DIR" echo "$checksum $name $ref_size $ref_mtime $ref_dev $ref_inode $DST_DIR/$name" + exit 0 + fi + done <"$matching_checksums_file" + # checksum not found in older archives + exit 1 + ) && continue || true + fi + fi + + # No matching files found: really copy it + + if [[ -e "$DST_DIR/$name" ]] ; then + \rm "$DST_DIR/$name" || exit 1 + fi + + # source file's size & mtime + local size mtime + size="$(echo "$line" | sed -n -r 's#.* size=([^[:space:]]+).*#\1#p')" + mtime="$(echo "$line" | sed -n -r 's#.* mtime=([^[:space:]]+).*#\1#p')" + + # copy it to $DST_DIR + flock -s "$DST_DIR" echo " COPY $name" >&2 + rm -f "$DST_DIR/$name" || exit 1 + install --preserve-timestamps "${install_args[@]}" --mode="$mode" -T "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1 + + # check destination file properties + local dst_stat dst_size dst_dev dst_ino + dst_stat=($(stat -c '%s %d %i' "$DST_DIR/$name")) || exit 1 + dst_size="${dst_stat[0]}" + dst_dev="${dst_stat[1]}" + dst_ino="${dst_stat[2]}" + + # file changed while copying + if [[ "$dst_size" != "$size" ]] ; then + flock -s "$DST_DIR" echo "ERROR: $SRC_DIR/$name changed while copying!" >&2 + exit 1 + fi + + # print out a line for StxChecksums using source file properties (preserved + # during copying), but with destination file's dev & ino. + flock -s "$DST_DIR" echo "$checksum $name $size $mtime $dst_dev $dst_ino $DST_DIR/$name" + done + + rm -f "$matching_checksums_file" +} + +# process files in parallel +( + if [[ "$DST_CHECKSUMS_FILE" ]] ; then + dst_checksums_fd=5 + exec 5<>"$DST_CHECKSUMS_FILE" || exit 1 + else + dst_checksums_fd=1 fi - # No matching files found: really copy it + export SRC_DIR \ + DST_DIR \ + CHANGE_OWNER \ + CHANGE_GROUP \ + EGID \ + TMP_DIR \ + XTRACE \ + combined_checksums_file - if [[ -e "$DST_DIR/$name" ]] ; then - \rm "$DST_DIR/$name" || exit 1 - fi + export -f process_regfiles - # source file's size & mtime - size="$(echo "$line" | sed -n -r 's#.* size=([^[:space:]]+).*#\1#p')" - mtime="$(echo "$line" | sed -n -r 's#.* mtime=([^[:space:]]+).*#\1#p')" + message="processing regular files" + process_lines "$message" "$regfile_list_file" process_regfiles | sort >&$dst_checksums_fd + [[ "${PIPESTATUS[0]}" -eq 0 && "${PIPESTATUS[1]}" -eq 0 ]] || exit 1 +) || exit 1 - # copy it to $DST_DIR - echo " COPY $name" >&2 - rm -f "$DST_DIR/$name" - install --preserve-timestamps "${install_args[@]}" --mode="$mode" -T "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1 - - # check destination file properties - dst_stat=($(stat -c '%s %d %i' "$DST_DIR/$name")) || exit 1 - dst_size="${dst_stat[0]}" - dst_dev="${dst_stat[1]}" - dst_ino="${dst_stat[2]}" - - # file changed while copying - if [[ "$dst_size" != "$size" ]] ; then - echo "ERROR: $SRC_DIR/$name changed while copying!" >&2 - exit 1 - fi - - # print out a line for StxChecksums using source file properties (preserved - # during copying), but with destination file's dev & ino. - echo "$checksum $name $size $mtime $dst_dev $dst_ino $DST_DIR/$name" -done <"$regfile_list_file" >&$DST_CHECKSUMS_FD # # copy special files # echo $'\n## Copying special files: '"$DST_DIR" >&2 -while read -r line ; do - [[ -n "$line" ]] || continue - name="${line##*name=}" - type="$(echo "$line" | sed 's#^type=\(.\) .*#\1#g')" - [[ -n "$name" && -n "$type" ]] || continue - echo " CREATE type=$type $name" >&2 - if [[ -e "$DST_DIR/$name" ]] ; then - rm "$DST_DIR/$name" || exit 1 - fi - cp -a --no-dereference "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1 - if [[ "$CHANGE_OWNER" || "$CHANGE_GROUP" ]] ; then - chown_arg= - if [[ "$CHANGE_OWNER" ]] ; then - chown_arg="$CHANGE_OWNER" - fi - if [[ "$CHANGE_GROUP" ]] ; then - chown_arg+=":$CHANGE_GROUP" - fi - chown --no-dereference "$chown_arg" "$DST_DIR/$name" || exit 1 - fi -done <"$other_list_file" +# helper function for processing special files +# global vars used: +# SRC_DIR +# DST_DIR +# CHANGE_OWNER +# CHANGE_GROUP +# XTRACE +process_other() { + if [[ $XTRACE -eq 1 ]] ; then + set -x + fi + local line + for line in "$@" ; do + local name + name="${line##*name=}" + [[ -n "$name" ]] || continue + + local type + type="$(echo "$line" | sed 's#^type=\(.\) .*#\1#g')" + [[ -n "$type" ]] || continue + + flock -s "$DST_DIR" echo " CREATE type=$type $name" >&2 + if [[ -e "$DST_DIR/$name" ]] ; then + rm "$DST_DIR/$name" || exit 1 + fi + cp -a --no-dereference "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1 + if [[ "$CHANGE_OWNER" || "$CHANGE_GROUP" ]] ; then + local chown_arg= + if [[ "$CHANGE_OWNER" ]] ; then + chown_arg="$CHANGE_OWNER" + fi + if [[ "$CHANGE_GROUP" ]] ; then + chown_arg+=":$CHANGE_GROUP" + fi + chown --no-dereference "$chown_arg" "$DST_DIR/$name" || exit 1 + fi + done +} + +# process them in parallel +( + export SRC_DIR \ + DST_DIR \ + CHANGE_OWNER \ + CHANGE_GROUP \ + XTRACE + + export -f process_other + + message="processing other files" + process_lines "$message" "$other_list_file" process_other || exit 1 +) || exit 1 diff --git a/scripts/lib/job_utils.sh b/scripts/lib/job_utils.sh index b16c792..0f3530b 100644 --- a/scripts/lib/job_utils.sh +++ b/scripts/lib/job_utils.sh @@ -35,7 +35,7 @@ export REPO_TRACE=0 # docker images SAFE_RSYNC_DOCKER_IMG="servercontainers/rsync:3.1.3" -COREUTILS_DOCKER_IMG="debian:bullseye-20220509" +COREUTILS_DOCKER_IMG="starlingx/jenkins-pipelines-coreutils:20230529" notice() { ( set +x ; print_log -i --notice "$@" ; )