Merge "Add an exponentially rolling garbage collection script"
This commit is contained in:
		
							
								
								
									
										566
									
								
								contrib/git-exproll.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										566
									
								
								contrib/git-exproll.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,566 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
# Copyright (c) 2012, Code Aurora Forum. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions are
 | 
			
		||||
# met:
 | 
			
		||||
#    # Redistributions of source code must retain the above copyright
 | 
			
		||||
#       notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#    # Redistributions in binary form must reproduce the above
 | 
			
		||||
#       copyright notice, this list of conditions and the following
 | 
			
		||||
#       disclaimer in the documentation and/or other materials provided
 | 
			
		||||
#       with the distribution.
 | 
			
		||||
#    # Neither the name of Code Aurora Forum, Inc. nor the names of its
 | 
			
		||||
#       contributors may be used to endorse or promote products derived
 | 
			
		||||
#       from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
 | 
			
		||||
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
			
		||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
 | 
			
		||||
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 | 
			
		||||
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
			
		||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | 
			
		||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 | 
			
		||||
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | 
			
		||||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 | 
			
		||||
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 | 
			
		||||
# IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 | 
			
		||||
usage() { # error_message
 | 
			
		||||
 | 
			
		||||
    cat <<-EOF
 | 
			
		||||
		usage: $(basename $0) [-unvt] [--noref] [--nolosse] [-r|--ratio number]
 | 
			
		||||
		                      [git gc option...] git.repo
 | 
			
		||||
 | 
			
		||||
		-u|-h                usage/help
 | 
			
		||||
		-v verbose
 | 
			
		||||
		-n dry-run           don't actually repack anything
 | 
			
		||||
		-t touch             treat repo as if it had been touched
 | 
			
		||||
		--noref              avoid extra ref packing timestamp checking
 | 
			
		||||
		--noloose            do not run just because there are loose object dirs
 | 
			
		||||
		                     (repacking may still run if they are referenced)
 | 
			
		||||
		-r ratio <number>    packfile ratio to aim for (default 10)
 | 
			
		||||
 | 
			
		||||
		git gc option        will be passed as args to git gc
 | 
			
		||||
 | 
			
		||||
		git.repo             to run gc against
 | 
			
		||||
 | 
			
		||||
		Garbage collect using a pseudo logarithmic packfile maintenance
 | 
			
		||||
		approach.  This approach attempts to minimize packfile churn
 | 
			
		||||
		by keeping several generations of varying sized packfiles around
 | 
			
		||||
		and only consolidating packfiles (or loose objects) which are
 | 
			
		||||
		either new packfiles, or packfiles close to the same size as
 | 
			
		||||
		another packfile.
 | 
			
		||||
 | 
			
		||||
		An estimate is used to predict when rollups (one consolidation
 | 
			
		||||
		would cause another consolidation) would occur so that this
 | 
			
		||||
		rollup can be done all at once via a single repack.  This reduces
 | 
			
		||||
		both the runtime and the pack file churn in rollup cases.
 | 
			
		||||
 | 
			
		||||
		Approach: plan each consolidation by creating a table like this:
 | 
			
		||||
 | 
			
		||||
		Id Keep Size           Sha1(or consolidation list)      Actions(repack down up note)
 | 
			
		||||
		1     - 11356          9052edfb7392646cd4e5f362b953675985f01f96 y - - New
 | 
			
		||||
		2     - 429088         010904d5c11cd26a79fda91b01ab454d1001b402 y - - New
 | 
			
		||||
		c1    - 440444         [1,2]                                    - - -
 | 
			
		||||
 | 
			
		||||
		Id:    numbers preceded by a c are estimated "c pack" files
 | 
			
		||||
		Keep:  - none, k private keep, o our keep
 | 
			
		||||
		Size:  in disk blocks (default du output)
 | 
			
		||||
		Sha1:  of packfile, or consolidation list of packfile ids
 | 
			
		||||
		Actions
 | 
			
		||||
		repack: - n no, y yes
 | 
			
		||||
		down:   - noop, ^ consolidate with a file above
 | 
			
		||||
		up:     - noop, v consolidate with a file below
 | 
			
		||||
		note:   Human description of script decisions:
 | 
			
		||||
		         New (file is a new packfile)
 | 
			
		||||
		         Consolidate with:<list of packfile ids>
 | 
			
		||||
		         (too far from:<list of packfile ids>)
 | 
			
		||||
 | 
			
		||||
		On the first pass, always consolidate any new packfiles along
 | 
			
		||||
		with loose objects and along with any packfiles which are within
 | 
			
		||||
		the ratio size of their predecessors (note, the list is ordered
 | 
			
		||||
		by increasing size).  After each consolidation, insert a fake
 | 
			
		||||
		consolidation, or "c pack", to naively represent the size and
 | 
			
		||||
		ordered positioning of the anticipated new consolidated pack.
 | 
			
		||||
		Every time a new pack is planned, rescan the list in case the
 | 
			
		||||
		new "c pack" would cause more consolidation...
 | 
			
		||||
 | 
			
		||||
		Once the packfiles which need consolidation are determined, the
 | 
			
		||||
		packfiles which will not be consolidated are marked with a .keep
 | 
			
		||||
		file, and those which will be consolidated will have their .keep
 | 
			
		||||
		removed if they have one.  Thus, the packfiles with a .keep will
 | 
			
		||||
		not get repacked.
 | 
			
		||||
 | 
			
		||||
		Packfile consolidation is determined by the --ratio parameter
 | 
			
		||||
		(default is 10).  This ratio is somewhat of a tradeoff.  The
 | 
			
		||||
		smaller the number, the more packfiles will be kept on average;
 | 
			
		||||
		this increases disk utilization somewhat.  However, a larger
 | 
			
		||||
		ratio causes greater churn and may increase disk utilization due
 | 
			
		||||
		to deleted packfiles not being reclaimed since they may still be
 | 
			
		||||
		kept open by long running applications such as Gerrit.  Sane
 | 
			
		||||
		ratio values are probably between 2 and 10.  Since most
 | 
			
		||||
		consolidations actually end up smaller than the estimated
 | 
			
		||||
		consolidated packfile size (due to compression), the true ratio
 | 
			
		||||
		achieved will likely be 1 to 2 greater than the target ratio.
 | 
			
		||||
		The smaller the target ratio, the greater this discrepancy.
 | 
			
		||||
 | 
			
		||||
		Finally, attempt to skip garbage collection entirely on untouched
 | 
			
		||||
		repos.  In order to determine if a repo has been touched, use the
 | 
			
		||||
		timestamp on the script's keep files, if any relevant file/dir
 | 
			
		||||
		is newer than a keep marker file, assume that the repo has been
 | 
			
		||||
		touched and gc needs to run.  Also assume gc needs to run whenever
 | 
			
		||||
		there are loose object dirs since they may contain untouched
 | 
			
		||||
		unreferenced loose objects which need to be pruned (once they
 | 
			
		||||
		expire).
 | 
			
		||||
 | 
			
		||||
		In order to allow the keep files to be an effective timestamp
 | 
			
		||||
		marker to detect relevant changes in a repo since the last run,
 | 
			
		||||
		all relevant files and directories which may be modified during a
 | 
			
		||||
		gc run (even during a noop gc run), must have their timestamps
 | 
			
		||||
		reset to the same time as the keep files or gc will always run
 | 
			
		||||
		even on untouched repos.  The relevant files/dirs are all those
 | 
			
		||||
		files and directories which garbage collection, object packing,
 | 
			
		||||
		ref packing and pruning might change during noop actions.
 | 
			
		||||
EOF
 | 
			
		||||
 | 
			
		||||
    [ -n "$1" ] && info "ERROR $1"
 | 
			
		||||
 | 
			
		||||
    exit
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
debug() { [ -n "$SW_V" ] && info "$1" ; }
 | 
			
		||||
info() { echo "$1" >&2 ; }
 | 
			
		||||
 | 
			
		||||
array_copy() { #v2 # array_src array_dst
 | 
			
		||||
    local src=$1 dst=$2
 | 
			
		||||
    local s i=0
 | 
			
		||||
    eval s=\${#$src[@]}
 | 
			
		||||
    while [ $i -lt $s ] ; do
 | 
			
		||||
        eval $dst[$i]=\"\${$src[$i]}\"
 | 
			
		||||
        i=$(($i + 1))
 | 
			
		||||
    done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
array_equals() { #v2 # array_name [vals...]
 | 
			
		||||
    local a=$1 ; shift
 | 
			
		||||
    local s=0 t=() val
 | 
			
		||||
    array_copy "$a" t
 | 
			
		||||
    for s in "${!t[@]}" ; do s=$((s+1)) ; done
 | 
			
		||||
    [ "$s" -ne "$#" ] && return 1
 | 
			
		||||
    for val in "${t[@]}" ; do
 | 
			
		||||
        [ "$val" = "$1" ] || return 2
 | 
			
		||||
        shift
 | 
			
		||||
    done
 | 
			
		||||
    return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
packs_sizes() { # git.repo > "size pack"...
 | 
			
		||||
    du -s "$1"/objects/pack/pack-$SHA1.pack | sort -n 2> /dev/null
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
is_ourkeep() { grep -q "$KEEP" "$1" 2> /dev/null ; } # keep
 | 
			
		||||
has_ourkeep() { is_ourkeep "$(keep_for "$1")" ; } # pack
 | 
			
		||||
has_keep() { [ -f "$(keep_for "$1")" ] ; } # pack
 | 
			
		||||
is_repo() { [ -d "$1/objects" ] && [ -d "$1/refs/heads" ] ; } # git.repo
 | 
			
		||||
 | 
			
		||||
keep() { # pack   # returns true if we added our keep
 | 
			
		||||
    keep=$(keep_for "$1")
 | 
			
		||||
    [ -f "$keep" ] && return 1
 | 
			
		||||
    echo "$KEEP" > "$keep"
 | 
			
		||||
    return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
keep_for() { # packfile > keepfile
 | 
			
		||||
    local keep=$(echo "$1" | sed -es'/\.pack$/.keep/')
 | 
			
		||||
    [ "${keep/.keep}" = "$keep" ] && return 1
 | 
			
		||||
    echo "$keep"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
idx_for() { # packfile > idxfile
 | 
			
		||||
    local idx=$(echo "$1" | sed -es'/\.pack$/.idx/')
 | 
			
		||||
    [ "${idx/.idx}" = "$idx" ] && return 1
 | 
			
		||||
    echo "$idx"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# pack_or_keep_file > sha
 | 
			
		||||
sha_for() { echo "$1" | sed -es'|\(.*/\)*pack-\([^.]*\)\..*$|\2|' ; }
 | 
			
		||||
 | 
			
		||||
private_keeps() { # git.repo -> sets pkeeps
 | 
			
		||||
    local repo=$1 ary=$2
 | 
			
		||||
    local keep keeps=("$repo"/objects/pack/pack-$SHA1.keep)
 | 
			
		||||
    pkeeps=()
 | 
			
		||||
    for keep in "${keeps[@]}" ; do
 | 
			
		||||
        is_ourkeep "$keep" || pkeeps=("${pkeeps[@]}" "$keep")
 | 
			
		||||
    done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
is_tooclose() { [ "$(($1 * $RATIO))" -gt "$2" ] ; } # smaller larger
 | 
			
		||||
 | 
			
		||||
unique() { # [args...] > unique_words
 | 
			
		||||
    local lines=$(while [ $# -gt 0 ] ; do echo "$1" ; shift ; done)
 | 
			
		||||
    lines=$(echo "$lines" | sort -u)
 | 
			
		||||
    echo $lines  # as words
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
outfs() { # fs [args...] > argfs...
 | 
			
		||||
    local fs=$1 ; shift
 | 
			
		||||
    [ $# -gt 0 ] && echo -n "$1" ; shift
 | 
			
		||||
    while [ $# -gt 0 ] ; do echo -n "$fs$1" ; shift ; done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sort_list() { # < list > formatted_list
 | 
			
		||||
    # n has_keep size sha repack down up note
 | 
			
		||||
    awk '{ note=$8; for(i=8;i<NF;i++) note=note " "$(i+1)
 | 
			
		||||
           printf("%-5s %s %-14s %-40s %s %s %s %s\n", \
 | 
			
		||||
                     $1,$2,   $3,  $4, $5,$6,$7,note)}' |\
 | 
			
		||||
        sort -k 3,3n -k 1,1n
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
is_touched() { # git.repo
 | 
			
		||||
    local repo=$1
 | 
			
		||||
    local loose keep ours newer
 | 
			
		||||
    [ -n "$SW_T" ] && { debug "$SW_T -> treat as touched" ; return 0 ; }
 | 
			
		||||
 | 
			
		||||
    if [ -z "$SW_LOOSE" ] ; then
 | 
			
		||||
        # If there are loose objects, they may need to be pruned,
 | 
			
		||||
        # run even if nothing has really been touched.
 | 
			
		||||
        loose=$(find "$repo/objects" -type d \
 | 
			
		||||
                      -wholename "$repo/objects/[0-9][0-9]"
 | 
			
		||||
                      -print -quit 2>/dev/null)
 | 
			
		||||
        [ -n "$loose" ] && { info "There are loose object directories" ; return 0 ; }
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    # If we don't have a keep, the current packfiles may not have been
 | 
			
		||||
    # compressed with the current gc policy (gc may never have been run),
 | 
			
		||||
    # so run at least once to repack everything.  Also, we need a marker
 | 
			
		||||
    # file for timestamp tracking (a dir needs to detect changes within
 | 
			
		||||
    # it, so it cannot be a marker) and our keeps are something we control,
 | 
			
		||||
    # use them.
 | 
			
		||||
    for keep in "$repo"/objects/pack/pack-$SHA1.keep ; do
 | 
			
		||||
        is_ourkeep "$keep" && { ours=$keep ; break ; }
 | 
			
		||||
    done
 | 
			
		||||
    [ -z "$ours" ] && { info 'We have no keep (we have never run?): run' ; return 0 ; }
 | 
			
		||||
 | 
			
		||||
    debug "Our timestamp keep: $ours"
 | 
			
		||||
    # The wholename stuff seems to get touched by a noop git gc
 | 
			
		||||
    newer=$(find "$repo/objects" "$repo/refs" "$repo/packed-refs" \
 | 
			
		||||
                  '!' -wholename "$repo/objects/info" \
 | 
			
		||||
                  '!' -wholename "$repo/objects/info/*" \
 | 
			
		||||
                  -newer "$ours" \
 | 
			
		||||
                  -print -quit 2>/dev/null)
 | 
			
		||||
    [ -z "$newer" ] && return 1
 | 
			
		||||
 | 
			
		||||
    info "Touched since last run: $newer"
 | 
			
		||||
    return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
touch_refs() { # git.repo start_date refs
 | 
			
		||||
    local repo=$1 start_date=$2 refs=$3
 | 
			
		||||
    (
 | 
			
		||||
        debug "Setting start date($start_date) on unpacked refs:"
 | 
			
		||||
        debug "$refs"
 | 
			
		||||
        cd "$repo/refs" || return
 | 
			
		||||
        # safe to assume no newlines in a ref name
 | 
			
		||||
        echo "$refs" | xargs -d '\n' -n 1 touch -c -d "$start_date"
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
set_start_date() { # git.repo start_date refs refdirs packedrefs [packs]
 | 
			
		||||
    local repo=$1 start_date=$2 refs=$3 refdirs=$4 packedrefs=$5 ; shift 5
 | 
			
		||||
    local pack keep idx repacked
 | 
			
		||||
 | 
			
		||||
    # This stuff is touched during object packs
 | 
			
		||||
    while [ $# -gt 0 ] ; do
 | 
			
		||||
        pack=$1 ; shift
 | 
			
		||||
        keep="$(keep_for "$pack")"
 | 
			
		||||
        idx="$(idx_for "$pack")"
 | 
			
		||||
        touch -c -d "$start_date" "$pack" "$keep" "$idx"
 | 
			
		||||
        debug "Setting start date on: $pack $keep $idx"
 | 
			
		||||
    done
 | 
			
		||||
    # This will prevent us from detecting any deletes in the pack dir
 | 
			
		||||
    # since gc ran, except for private keeps which we are checking
 | 
			
		||||
    # manually.  But there really shouldn't be any other relevant deletes
 | 
			
		||||
    # in this dir which should cause us to rerun next time, deleting a
 | 
			
		||||
    # pack or index file by anything but gc would be bad!
 | 
			
		||||
    debug "Setting start date on pack dir: $start_date"
 | 
			
		||||
    touch -c -d "$start_date" "$repo/objects/pack"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    if [ -z "$SW_REFS" ] ; then
 | 
			
		||||
        repacked=$(find "$repo/packed-refs" -newer "$repo/objects/pack"
 | 
			
		||||
                      -print -quit 2>/dev/null)
 | 
			
		||||
        if [ -n "$repacked" ] ; then
 | 
			
		||||
            # The ref dirs and packed-ref files seem to get touched even on
 | 
			
		||||
            # a noop refpacking
 | 
			
		||||
            debug "Setting start date on packed-refs"
 | 
			
		||||
            touch -c -d "$start_date" "$repo/packed-refs"
 | 
			
		||||
            touch_refs "$repo" "$start_date" "$refdirs"
 | 
			
		||||
 | 
			
		||||
            # A ref repack does not imply a ref change, but since it is
 | 
			
		||||
            # hard to tell, simply assume so
 | 
			
		||||
            if [ "$refs" != "$(cd "$repo/refs" ; find -depth)" ] || \
 | 
			
		||||
               [ "$packedrefs" != "$(<"$repo/packed-refs")" ] ; then
 | 
			
		||||
                # We retouch if needed (instead of simply checking then
 | 
			
		||||
                # touching) to avoid a race between the check and the set.
 | 
			
		||||
                debug "  but refs actually got packed, so retouch packed-refs"
 | 
			
		||||
                touch -c "$repo/packed-refs"
 | 
			
		||||
            fi
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
note_consolidate() { # note entry > note (no duplicated consolidated entries)
 | 
			
		||||
    local note=$1 entry=$2
 | 
			
		||||
    local entries=() ifs=$IFS
 | 
			
		||||
    if  echo "$note" | grep -q 'Consolidate with:[0-9,c]' ; then
 | 
			
		||||
        IFS=,
 | 
			
		||||
        entries=( $(echo "$note" | sed -es'/^.*Consolidate with:\([0-9,c]*\).*$/\1/') )
 | 
			
		||||
        note=( $(echo "$note" | sed -es'/Consolidate with:[0-9,c]*//') )
 | 
			
		||||
        IFS=$ifs
 | 
			
		||||
    fi
 | 
			
		||||
    entries=( $(unique "${entries[@]}" "$entry") )
 | 
			
		||||
    echo "$note Consolidate with:$(outfs , "${entries[@]}")"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
note_toofar() { # note entry > note (no duplicated "too far" entries)
 | 
			
		||||
    local note=$1 entry=$2
 | 
			
		||||
    local entries=() ifs=$IFS
 | 
			
		||||
    if  echo "$note" | grep -q '(too far from:[0-9,c]*)' ; then
 | 
			
		||||
        IFS=,
 | 
			
		||||
        entries=( $(echo "$note" | sed -es'/^.*(too far from:\([0-9,c]*\)).*$/\1/') )
 | 
			
		||||
        note=( $(echo "$note" | sed -es'/(too far from:[0-9,c]*)//') )
 | 
			
		||||
        IFS=$ifs
 | 
			
		||||
    fi
 | 
			
		||||
    entries=( $(unique "${entries[@]}" "$entry") )
 | 
			
		||||
    echo "$note (too far from:$(outfs , "${entries[@]}"))"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
last_entry() { # isRepack pline repackline > last_rows_entry
 | 
			
		||||
    local size_hit=$1 pline=$2 repackline=$3
 | 
			
		||||
    if [ -n "$pline" ] ; then
 | 
			
		||||
        if [ -n "$size_hit" ] ; then
 | 
			
		||||
            echo "$repack_line"
 | 
			
		||||
        else
 | 
			
		||||
            echo "$pline"
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
init_list() { # git.repo > shortlist
 | 
			
		||||
    local repo=$1
 | 
			
		||||
    local file
 | 
			
		||||
    local n has_keep size sha repack
 | 
			
		||||
 | 
			
		||||
    packs_sizes "$1" | {
 | 
			
		||||
        while read size file ; do
 | 
			
		||||
            n=$((n+1))
 | 
			
		||||
            repack=n
 | 
			
		||||
            has_keep=-
 | 
			
		||||
            if has_keep "$file" ; then
 | 
			
		||||
                has_keep=k
 | 
			
		||||
                has_ourkeep "$file" && has_keep=o
 | 
			
		||||
            fi
 | 
			
		||||
            sha=$(sha_for "$file")
 | 
			
		||||
            echo "$n $has_keep $size $sha $repack"
 | 
			
		||||
        done
 | 
			
		||||
    } | sort_list
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
consolidate_list() { # run < list > list
 | 
			
		||||
    local run=$1
 | 
			
		||||
    local sum=0 psize=0 sum_size=0 size_hit pn clist pline repackline
 | 
			
		||||
    local n has_keep size sha repack down up note
 | 
			
		||||
 | 
			
		||||
    {
 | 
			
		||||
        while read n has_keep size sha repack down up note; do
 | 
			
		||||
            [ -z "$up" ] && up='-'
 | 
			
		||||
            [ -z "$down" ] && down="-"
 | 
			
		||||
 | 
			
		||||
            if [ "$has_keep" = "k" ] ; then
 | 
			
		||||
                echo "$n $has_keep $size $sha $repack - - Private"
 | 
			
		||||
                continue
 | 
			
		||||
            fi
 | 
			
		||||
 | 
			
		||||
            if [ "$repack" = "n" ] ; then
 | 
			
		||||
                if is_tooclose $psize $size ; then
 | 
			
		||||
                    size_hit=y
 | 
			
		||||
                    repack=y
 | 
			
		||||
                    sum=$(($sum + $sum_size + $size))
 | 
			
		||||
                    sum_size=0 # Prevents double summing this entry
 | 
			
		||||
                    clist=($(unique "${clist[@]}" $pn $n))
 | 
			
		||||
                    down="^"
 | 
			
		||||
                    [ "$has_keep" = "-" ] && note="$note New +"
 | 
			
		||||
                    note=$(note_consolidate "$note" "$pn")
 | 
			
		||||
                elif [ "$has_keep" = "-" ] ; then
 | 
			
		||||
                    repack=y
 | 
			
		||||
                    sum=$(($sum + $size))
 | 
			
		||||
                    sum_size=0 # Prevents double summing this entry
 | 
			
		||||
                    clist=($(unique "${clist[@]}" $n))
 | 
			
		||||
                    note="$note New"
 | 
			
		||||
                elif [ $psize -ne 0 ] ; then
 | 
			
		||||
                    sum_size=$size
 | 
			
		||||
                    down="!"
 | 
			
		||||
                    note=$(note_toofar "$note" "$pn")
 | 
			
		||||
                else
 | 
			
		||||
                    sum_size=$size
 | 
			
		||||
                fi
 | 
			
		||||
            else
 | 
			
		||||
                sum_size=$size
 | 
			
		||||
            fi
 | 
			
		||||
 | 
			
		||||
            # By preventing "c files" (consolidated) from being marked
 | 
			
		||||
            # "repack" they won't get keeps
 | 
			
		||||
            repack2=y
 | 
			
		||||
            [ "${n/c}" != "$n" ] && { repack=- ; repack2=- ; }
 | 
			
		||||
 | 
			
		||||
            last_entry "$size_hit" "$pline" "$repack_line"
 | 
			
		||||
            # Delay the printout until we know whether we are
 | 
			
		||||
            # being consolidated with the entry following us
 | 
			
		||||
            # (we won't know until the next iteration).
 | 
			
		||||
            # size_hit is used to determine which of the lines
 | 
			
		||||
            # below will actually get printed above on the next
 | 
			
		||||
            # iteration.
 | 
			
		||||
            pline="$n $has_keep $size $sha $repack $down $up $note"
 | 
			
		||||
            repack_line="$n $has_keep $size $sha $repack2 $down v $note"
 | 
			
		||||
 | 
			
		||||
            pn=$n ; psize=$size # previous entry data
 | 
			
		||||
            size_hit='' # will not be consolidated up
 | 
			
		||||
 | 
			
		||||
        done
 | 
			
		||||
        last_entry "$size_hit" "$pline" "$repack_line"
 | 
			
		||||
 | 
			
		||||
        [ $sum -gt 0 ] && echo "c$run - $sum [$(outfs , "${clist[@]}")] - - -"
 | 
			
		||||
 | 
			
		||||
    } | sort_list
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
process_list() { # git.repo > list
 | 
			
		||||
    local list=$(init_list "$1")  plist run=0
 | 
			
		||||
 | 
			
		||||
    while true ; do
 | 
			
		||||
        plist=$list
 | 
			
		||||
        run=$((run +1))
 | 
			
		||||
        list=$(echo "$list" | consolidate_list "$run")
 | 
			
		||||
        if [ "$plist" != "$list" ] ; then
 | 
			
		||||
            debug "------------------------------------------------------------------------------------"
 | 
			
		||||
            debug "$HEADER"
 | 
			
		||||
            debug "$list"
 | 
			
		||||
        else
 | 
			
		||||
            break
 | 
			
		||||
        fi
 | 
			
		||||
    done
 | 
			
		||||
    debug "------------------------------------------------------------------------------------"
 | 
			
		||||
    echo "$list"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
repack_list() { # git.repo < list
 | 
			
		||||
    local repo=$1
 | 
			
		||||
    local start_date newpacks=0 pkeeps keeps=1 refs refdirs rtn
 | 
			
		||||
    local packedrefs=$(<"$repo/packed-refs")
 | 
			
		||||
 | 
			
		||||
    # so they don't appear touched after a noop refpacking
 | 
			
		||||
    if [ -z "$SW_REFS" ] ; then
 | 
			
		||||
        refs=$(cd "$repo/refs" ; find -depth)
 | 
			
		||||
        refdirs=$(cd "$repo/refs" ; find -type d -depth)
 | 
			
		||||
        debug "Before refs:"
 | 
			
		||||
        debug "$refs"
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    # Find a private keep snapshot which has not changed from
 | 
			
		||||
    # before our start_date so private keep deletions during gc
 | 
			
		||||
    # can be detected
 | 
			
		||||
    while ! array_equals pkeeps "${keeps[@]}" ; do
 | 
			
		||||
       debug "Getting a private keep snapshot"
 | 
			
		||||
       private_keeps "$repo"
 | 
			
		||||
       keeps=("${pkeeps[@]}")
 | 
			
		||||
       debug "before keeps: ${keeps[*]}"
 | 
			
		||||
       start_date=$(date)
 | 
			
		||||
       private_keeps "$repo"
 | 
			
		||||
       debug "after keeps: ${pkeeps[*]}"
 | 
			
		||||
    done
 | 
			
		||||
 | 
			
		||||
    while read n has_keep size sha repack down up note; do
 | 
			
		||||
        if [ "$repack" = "y" ] ; then
 | 
			
		||||
            keep="$repo/objects/pack/pack-$sha.keep"
 | 
			
		||||
            info "Repacking $repo/objects/pack/pack-$sha.pack"
 | 
			
		||||
            [ -f "$keep" ] && rm -f "$keep"
 | 
			
		||||
        fi
 | 
			
		||||
    done
 | 
			
		||||
 | 
			
		||||
    ( cd "$repo" && git gc "${GC_OPTS[@]}" ) ; rtn=$?
 | 
			
		||||
 | 
			
		||||
    # Mark any files withoug a .keep with our .keep
 | 
			
		||||
    packs=("$repo"/objects/pack/pack-$SHA1.pack)
 | 
			
		||||
    for pack in "${packs[@]}" ; do
 | 
			
		||||
        if keep "$pack" ; then
 | 
			
		||||
            info "New pack: $pack"
 | 
			
		||||
            newpacks=$((newpacks+1))
 | 
			
		||||
        fi
 | 
			
		||||
    done
 | 
			
		||||
 | 
			
		||||
    # Record start_time.  If there is more than 1 new packfile, we
 | 
			
		||||
    # don't want to risk touching it with an older date since that
 | 
			
		||||
    # would prevent consolidation on the next run.  If the private
 | 
			
		||||
    # keeps have changed, then we should run next time no matter what.
 | 
			
		||||
    if [ $newpacks -le 1 ] || ! array_equals pkeeps "${keeps[@]}" ; then
 | 
			
		||||
        set_start_date "$repo" "$start_date" "$refs" "$refdirs" "$packedrefs" "${packs[@]}"
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    return $rtn # we really only care about the gc error code
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
git_gc() { # git.repo
 | 
			
		||||
    local list=$(process_list "$1")
 | 
			
		||||
    if [ -z "$SW_V" ] ; then
 | 
			
		||||
        info "Running $PROG on $1.  git gc options: ${GC_OPTS[@]}"
 | 
			
		||||
        echo "$HEADER" >&2
 | 
			
		||||
        echo "$list" >&2 ;
 | 
			
		||||
    fi
 | 
			
		||||
    echo "$list" | repack_list "$1"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PROG=$(basename "$0")
 | 
			
		||||
HEADER="Id Keep Size           Sha1(or consolidation list)      Actions(repack down up note)"
 | 
			
		||||
KEEP=git-exproll
 | 
			
		||||
HEX='[0-9a-f]'
 | 
			
		||||
HEX10=$HEX$HEX$HEX$HEX$HEX$HEX$HEX$HEX$HEX$HEX
 | 
			
		||||
SHA1=$HEX10$HEX10$HEX10$HEX10
 | 
			
		||||
 | 
			
		||||
RATIO=10
 | 
			
		||||
SW_N='' ; SW_V='' ; SW_T='' ; SW_REFS='' ; SW_LOOSE='' ; GC_OPTS=()
 | 
			
		||||
while [ $# -gt 0 ] ; do
 | 
			
		||||
    case "$1" in
 | 
			
		||||
        -u|-h)  usage ;;
 | 
			
		||||
        -n)  SW_N="$1" ;;
 | 
			
		||||
        -v)  SW_V="$1" ;;
 | 
			
		||||
 | 
			
		||||
        -t)  SW_T="$1" ;;
 | 
			
		||||
        --norefs)  SW_REFS="$1" ;;
 | 
			
		||||
        --noloose) SW_LOOSE="$1" ;;
 | 
			
		||||
 | 
			
		||||
        -r|--ratio)  shift ; RATIO="$1" ;;
 | 
			
		||||
 | 
			
		||||
        *)  [ $# -le 1 ] && break
 | 
			
		||||
            GC_OPTS=( "${GC_OPTS[@]}" "$1" )
 | 
			
		||||
            ;;
 | 
			
		||||
    esac
 | 
			
		||||
    shift
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
REPO="$1"
 | 
			
		||||
if ! is_repo "$REPO" ; then
 | 
			
		||||
    REPO=$REPO/.git
 | 
			
		||||
    is_repo "$REPO" || usage "($1) is not likely a git repo"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if [ -z "$SW_N" ] ; then
 | 
			
		||||
    is_touched "$REPO" || { info "Repo untouched since last run" ; exit ; }
 | 
			
		||||
    git_gc "$REPO"
 | 
			
		||||
else
 | 
			
		||||
    is_touched "$REPO" || info "Repo untouched since last run, analyze anyway."
 | 
			
		||||
    process_list "$REPO" >&2
 | 
			
		||||
fi
 | 
			
		||||
		Reference in New Issue
	
	Block a user