
This change adds a brief one line header to all default output files to indicate when the analysis was generated. It also allows the gitdm configuration file to use a date of the format YYYY-MM-DD to specify the start or end date of a range of commits, rather than only allowing a git changeset hash. This is valuable for analyzing projects which do not routinely or consistently use tags, and when it is difficult to precisely pinpoint the git commit that corresponds to a time-based release. Change-Id: I899f6c6b046462addd3235e7797a3e1a63c8776c
232 lines
9.6 KiB
Bash
Executable File
232 lines
9.6 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# provide debugging output when desired
|
|
function DEBUG() {
|
|
[ "$GITDM_DEBUG" == "on" ] && echo "DEBUG: $1"
|
|
}
|
|
|
|
# enable/disable debugging output
|
|
GITDM_DEBUG=${GITDM_DEBUG:-"off"}
|
|
|
|
# determine if a given parameter is a date matching the format YYYY-MM-DD,
|
|
# i.e. 2013-09-13 This is used to decide if git should specify a start
|
|
# date with '--since YYYY-MM-DD' rather than use an absolute changeset id
|
|
function IS_DATE() {
|
|
[[ $1 =~ ^[0-9][0-9][0-9][0-9]\-[0-9][0-9]\-[0-9][0-9]$ ]]
|
|
}
|
|
|
|
GITBASE=${GITBASE:-~/git/openstack}
|
|
RELEASE=${RELEASE:-havana}
|
|
BASEDIR=$(pwd)
|
|
CONFIGDIR=$(pwd)/openstack-config
|
|
TEMPDIR=${TEMPDIR:-$(mktemp -d $(pwd)/dmtmp-XXXXXX)}
|
|
GITLOGARGS="--no-merges --numstat -M --find-copies-harder"
|
|
REPOBASE=${REPOBASE:-http://review.openstack.org/p/openstack}
|
|
|
|
UPDATE_GIT=${UPDATE_GIT:-y}
|
|
GIT_STATS=${GIT_STATS:-y}
|
|
# LP_STATS disabled by default, they take forever
|
|
LP_STATS=${LP_STATS:-n}
|
|
QUERY_LP=${QUERY_LP:-y}
|
|
GERRIT_STATS=${GERRIT_STATS:-y}
|
|
REMOVE_TEMPDIR=${REMOVE_TEMPDIR:-y}
|
|
TIMESTAMP=`date`
|
|
# brief header to prepend to all of the analysis results
|
|
OUTPUT_HEADER="Statistics generated at ${TIMESTAMP}"
|
|
|
|
if [ ! -d .venv ]; then
|
|
echo "Creating a virtualenv"
|
|
./tools/install_venv.sh
|
|
fi
|
|
|
|
if [ "$UPDATE_GIT" = "y" ]; then
|
|
echo "Updating projects from git"
|
|
if [ ! -d ${GITBASE} ] ; then
|
|
DEBUG "Creating missing ${GITBASE}"
|
|
mkdir -p ${GITBASE}
|
|
fi
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
if [ ! -d ${GITBASE}/${project} ] ; then
|
|
DEBUG "Cloning missing ${project} from ${REPOBASE}/${project}"
|
|
git clone ${REPOBASE}/${project} ${GITBASE}/${project}
|
|
fi
|
|
cd ${GITBASE}/${project}
|
|
DEBUG "Fetching updates to ${project}"
|
|
git fetch origin 2>/dev/null
|
|
done
|
|
fi
|
|
|
|
if [ "$GIT_STATS" = "y" ] ; then
|
|
echo "Generating git commit logs"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project revisions excludes x; do
|
|
DEBUG "Generating git commit log for ${project}"
|
|
cd ${GITBASE}/${project}
|
|
# match possible dates of the format YYYY-MM-DD to use in
|
|
# supplying git with a '--since DATE' paramter instead of a
|
|
# range of changeset ids
|
|
if IS_DATE $revisions; then
|
|
DEBUG "Matched a git --since date of '${revisions}'"
|
|
revisions="--since ${revisions}"
|
|
fi
|
|
git log ${GITLOGARGS} ${revisions} > "${TEMPDIR}/${project}-commits.log"
|
|
if [ -n "$excludes" ]; then
|
|
awk "/^commit /{ok=1} /^commit ${excludes}/{ok=0} {if(ok) {print}}" \
|
|
< "${TEMPDIR}/${project}-commits.log" > "${TEMPDIR}/${project}-commits.log.new"
|
|
mv "${TEMPDIR}/${project}-commits.log.new" "${TEMPDIR}/${project}-commits.log"
|
|
fi
|
|
done
|
|
|
|
echo "Generating git statistics"
|
|
cd ${BASEDIR}
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
DEBUG "Generating git stats for ${project}"
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.txt"
|
|
python gitdm -l 20 -n < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats.txt"
|
|
# also create a full dump with csv for further downstream processing
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.csv"
|
|
python gitdm -n -y -z -x "${TEMPDIR}/${project}-git-stats.csv" < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats-all.txt"
|
|
done
|
|
|
|
DEBUG "Generating aggregate git stats for all projects"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
cat "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/git-commits.log"
|
|
done
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/git-stats.txt"
|
|
python gitdm -n -y -z -x "${TEMPDIR}/git-stats.csv" < "${TEMPDIR}/git-commits.log" >> "${TEMPDIR}/git-stats.txt"
|
|
fi
|
|
|
|
if [ "$LP_STATS" = "y" ] ; then
|
|
echo "Generating a list of bugs"
|
|
cd ${BASEDIR}
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
DEBUG "Generating a list of defects for ${project}"
|
|
if [ ! -f "${TEMPDIR}/${project}-bugs.log" -a "$QUERY_LP" = "y" ]; then
|
|
./tools/with_venv.sh python launchpad/buglist.py ${project} ${RELEASE} > "${TEMPDIR}/${project}-bugs.log"
|
|
fi
|
|
while read id person date x; do
|
|
emails=$(awk "/^$person / {print \$2}" ${CONFIGDIR}/launchpad-ids.txt)
|
|
echo $id $person $date $emails
|
|
done < "${TEMPDIR}/${project}-bugs.log" > "${TEMPDIR}/${project}-bugs.log.new"
|
|
mv "${TEMPDIR}/${project}-bugs.log.new" "${TEMPDIR}/${project}-bugs.log"
|
|
done
|
|
|
|
echo "Generating launchpad statistics"
|
|
cd ${BASEDIR}
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-lp-stats.txt"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
DEBUG "Generating launchpad stats for ${project}"
|
|
grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" |
|
|
python lpdm -l 20 >> "${TEMPDIR}/${project}-lp-stats.txt"
|
|
done
|
|
|
|
DEBUG "Generating aggregate launchpad stats for all projects"
|
|
> "${TEMPDIR}/lp-bugs.log"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" >> "${TEMPDIR}/lp-bugs.log"
|
|
done
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/lp-stats.txt"
|
|
grep -v '<unknown>' "${TEMPDIR}/lp-bugs.log" |
|
|
python lpdm -l 20 >> "${TEMPDIR}/lp-stats.txt"
|
|
fi
|
|
|
|
if [ "$GERRIT_STATS" = "y" ] ; then
|
|
echo "Generating a list of Change-Ids"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project revisions x; do
|
|
cd "${GITBASE}/${project}"
|
|
# match possible dates of the format YYYY-MM-DD to use in
|
|
# supplying git with a '--since DATE' paramter instead of a
|
|
# range of changeset ids
|
|
if IS_DATE $revisions; then
|
|
DEBUG "Matched a git --since date of '${revisions}'"
|
|
revisions="--since ${revisions}"
|
|
fi
|
|
git log ${revisions} |
|
|
awk '/^ Change-Id: / { print $2 }' |
|
|
split -l 100 -d - "${TEMPDIR}/${project}-${RELEASE}-change-ids-"
|
|
done
|
|
|
|
cd ${TEMPDIR}
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
> ${project}-${RELEASE}-reviews.json
|
|
for f in ${project}-${RELEASE}-change-ids-??; do
|
|
echo "Querying gerrit: ${f}"
|
|
ssh -p 29418 review.openstack.org \
|
|
gerrit query --all-approvals --format=json \
|
|
$(awk -v ORS=' OR ' '{print}' $f | sed 's/ OR $//') \
|
|
< /dev/null >> "${project}-${RELEASE}-reviews.json"
|
|
done
|
|
done
|
|
|
|
echo "Generating a list of commit IDs"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project revisions x; do
|
|
DEBUG "Generating a list of commit IDs for ${project}"
|
|
cd "${GITBASE}/${project}"
|
|
# match possible dates of the format YYYY-MM-DD to use in
|
|
# supplying git with a '--since DATE' paramter instead of a
|
|
# range of changeset ids
|
|
if IS_DATE $revisions; then
|
|
DEBUG "Matched a git --since date of '${revisions}'"
|
|
revisions="--since ${revisions}"
|
|
fi
|
|
git log --pretty=format:%H $revisions > \
|
|
"${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt"
|
|
done
|
|
|
|
echo "Parsing the gerrit queries"
|
|
cd ${BASEDIR}
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
DEBUG "Parsing the gerrit queries for ${project}"
|
|
python gerrit/parse-reviews.py \
|
|
"${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt" \
|
|
"${CONFIGDIR}/launchpad-ids.txt" \
|
|
< "${TEMPDIR}/${project}-${RELEASE}-reviews.json" \
|
|
> "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt"
|
|
done
|
|
|
|
echo "Generating gerrit statistics"
|
|
cd ${BASEDIR}
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats.txt"
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats-all.txt"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
DEBUG "Generating gerrit statistics for ${project}"
|
|
python gerritdm -l 20 \
|
|
< "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \
|
|
>> "${TEMPDIR}/${project}-gerrit-stats.txt"
|
|
python gerritdm -z \
|
|
< "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \
|
|
>> "${TEMPDIR}/${project}-gerrit-stats-all.txt"
|
|
done
|
|
|
|
DEBUG "Generating aggregate gerrit statistics for all projects"
|
|
> "${TEMPDIR}/gerrit-reviewers.txt"
|
|
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
|
|
while read project x; do
|
|
cat "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" >> "${TEMPDIR}/gerrit-reviewers.txt"
|
|
done
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats.txt"
|
|
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats-all.txt"
|
|
python gerritdm -l 20 < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats.txt"
|
|
python gerritdm -z < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats-all.txt"
|
|
fi
|
|
|
|
DEBUG "Cleaning up"
|
|
cd ${BASEDIR}
|
|
rm -rf ${RELEASE} && mkdir ${RELEASE}
|
|
mv ${TEMPDIR}/*stats.txt ${RELEASE}
|
|
mv ${TEMPDIR}/*stats-all.txt ${RELEASE}
|
|
mv ${TEMPDIR}/*.csv ${RELEASE}
|
|
|
|
[ "$REMOVE_TEMPDIR" = "y" ] && rm -rf ${TEMPDIR} || echo "Not removing ${TEMPDIR}"
|