Add header to output files and allow date ranges

This change adds a brief one line header to all default output files
to indicate when the analysis was generated.

It also allows the gitdm configuration file to use a date of the
format YYYY-MM-DD to specify the start or end date of a range of
commits, rather than only allowing a git changeset hash.  This is
valuable for analyzing projects which do not routinely or
consistently use tags, and when it is difficult to precisely
pinpoint the git commit that corresponds to a time-based release.

Change-Id: I899f6c6b046462addd3235e7797a3e1a63c8776c
This commit is contained in:
Dan Stangel
2014-01-23 15:43:24 -07:00
parent 87c9a9e333
commit e3252c037f

View File

@@ -8,6 +8,13 @@ function DEBUG() {
# enable/disable debugging output # enable/disable debugging output
GITDM_DEBUG=${GITDM_DEBUG:-"off"} GITDM_DEBUG=${GITDM_DEBUG:-"off"}
# determine if a given parameter is a date matching the format YYYY-MM-DD,
# i.e. 2013-09-13 This is used to decide if git should specify a start
# date with '--since YYYY-MM-DD' rather than use an absolute changeset id
function IS_DATE() {
[[ $1 =~ ^[0-9][0-9][0-9][0-9]\-[0-9][0-9]\-[0-9][0-9]$ ]]
}
GITBASE=${GITBASE:-~/git/openstack} GITBASE=${GITBASE:-~/git/openstack}
RELEASE=${RELEASE:-havana} RELEASE=${RELEASE:-havana}
BASEDIR=$(pwd) BASEDIR=$(pwd)
@@ -23,6 +30,9 @@ LP_STATS=${LP_STATS:-n}
QUERY_LP=${QUERY_LP:-y} QUERY_LP=${QUERY_LP:-y}
GERRIT_STATS=${GERRIT_STATS:-y} GERRIT_STATS=${GERRIT_STATS:-y}
REMOVE_TEMPDIR=${REMOVE_TEMPDIR:-y} REMOVE_TEMPDIR=${REMOVE_TEMPDIR:-y}
TIMESTAMP=`date`
# brief header to prepend to all of the analysis results
OUTPUT_HEADER="Statistics generated at ${TIMESTAMP}"
if [ ! -d .venv ]; then if [ ! -d .venv ]; then
echo "Creating a virtualenv" echo "Creating a virtualenv"
@@ -53,6 +63,13 @@ if [ "$GIT_STATS" = "y" ] ; then
while read project revisions excludes x; do while read project revisions excludes x; do
DEBUG "Generating git commit log for ${project}" DEBUG "Generating git commit log for ${project}"
cd ${GITBASE}/${project} cd ${GITBASE}/${project}
# match possible dates of the format YYYY-MM-DD to use in
# supplying git with a '--since DATE' paramter instead of a
# range of changeset ids
if IS_DATE $revisions; then
DEBUG "Matched a git --since date of '${revisions}'"
revisions="--since ${revisions}"
fi
git log ${GITLOGARGS} ${revisions} > "${TEMPDIR}/${project}-commits.log" git log ${GITLOGARGS} ${revisions} > "${TEMPDIR}/${project}-commits.log"
if [ -n "$excludes" ]; then if [ -n "$excludes" ]; then
awk "/^commit /{ok=1} /^commit ${excludes}/{ok=0} {if(ok) {print}}" \ awk "/^commit /{ok=1} /^commit ${excludes}/{ok=0} {if(ok) {print}}" \
@@ -66,9 +83,11 @@ if [ "$GIT_STATS" = "y" ] ; then
grep -v '^#' ${CONFIGDIR}/${RELEASE} | grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do while read project x; do
DEBUG "Generating git stats for ${project}" DEBUG "Generating git stats for ${project}"
python gitdm -l 20 -n < "${TEMPDIR}/${project}-commits.log" > "${TEMPDIR}/${project}-git-stats.txt" echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.txt"
python gitdm -l 20 -n < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats.txt"
# also create a full dump with csv for further downstream processing # also create a full dump with csv for further downstream processing
python gitdm -n -y -z -x "${TEMPDIR}/${project}-git-stats.csv" < "${TEMPDIR}/${project}-commits.log" > "${TEMPDIR}/${project}-git-stats-all.txt" echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.csv"
python gitdm -n -y -z -x "${TEMPDIR}/${project}-git-stats.csv" < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats-all.txt"
done done
DEBUG "Generating aggregate git stats for all projects" DEBUG "Generating aggregate git stats for all projects"
@@ -76,7 +95,8 @@ if [ "$GIT_STATS" = "y" ] ; then
while read project x; do while read project x; do
cat "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/git-commits.log" cat "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/git-commits.log"
done done
python gitdm -n -y -z -x "${TEMPDIR}/git-stats.csv" < "${TEMPDIR}/git-commits.log" > "${TEMPDIR}/git-stats.txt" echo "${OUTPUT_HEADER}" > "${TEMPDIR}/git-stats.txt"
python gitdm -n -y -z -x "${TEMPDIR}/git-stats.csv" < "${TEMPDIR}/git-commits.log" >> "${TEMPDIR}/git-stats.txt"
fi fi
if [ "$LP_STATS" = "y" ] ; then if [ "$LP_STATS" = "y" ] ; then
@@ -97,11 +117,12 @@ if [ "$LP_STATS" = "y" ] ; then
echo "Generating launchpad statistics" echo "Generating launchpad statistics"
cd ${BASEDIR} cd ${BASEDIR}
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-lp-stats.txt"
grep -v '^#' ${CONFIGDIR}/${RELEASE} | grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do while read project x; do
DEBUG "Generating launchpad stats for ${project}" DEBUG "Generating launchpad stats for ${project}"
grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" | grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" |
python lpdm -l 20 > "${TEMPDIR}/${project}-lp-stats.txt" python lpdm -l 20 >> "${TEMPDIR}/${project}-lp-stats.txt"
done done
DEBUG "Generating aggregate launchpad stats for all projects" DEBUG "Generating aggregate launchpad stats for all projects"
@@ -110,8 +131,9 @@ if [ "$LP_STATS" = "y" ] ; then
while read project x; do while read project x; do
grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" >> "${TEMPDIR}/lp-bugs.log" grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" >> "${TEMPDIR}/lp-bugs.log"
done done
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/lp-stats.txt"
grep -v '<unknown>' "${TEMPDIR}/lp-bugs.log" | grep -v '<unknown>' "${TEMPDIR}/lp-bugs.log" |
python lpdm -l 20 > "${TEMPDIR}/lp-stats.txt" python lpdm -l 20 >> "${TEMPDIR}/lp-stats.txt"
fi fi
if [ "$GERRIT_STATS" = "y" ] ; then if [ "$GERRIT_STATS" = "y" ] ; then
@@ -119,6 +141,13 @@ if [ "$GERRIT_STATS" = "y" ] ; then
grep -v '^#' ${CONFIGDIR}/${RELEASE} | grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project revisions x; do while read project revisions x; do
cd "${GITBASE}/${project}" cd "${GITBASE}/${project}"
# match possible dates of the format YYYY-MM-DD to use in
# supplying git with a '--since DATE' paramter instead of a
# range of changeset ids
if IS_DATE $revisions; then
DEBUG "Matched a git --since date of '${revisions}'"
revisions="--since ${revisions}"
fi
git log ${revisions} | git log ${revisions} |
awk '/^ Change-Id: / { print $2 }' | awk '/^ Change-Id: / { print $2 }' |
split -l 100 -d - "${TEMPDIR}/${project}-${RELEASE}-change-ids-" split -l 100 -d - "${TEMPDIR}/${project}-${RELEASE}-change-ids-"
@@ -142,6 +171,13 @@ if [ "$GERRIT_STATS" = "y" ] ; then
while read project revisions x; do while read project revisions x; do
DEBUG "Generating a list of commit IDs for ${project}" DEBUG "Generating a list of commit IDs for ${project}"
cd "${GITBASE}/${project}" cd "${GITBASE}/${project}"
# match possible dates of the format YYYY-MM-DD to use in
# supplying git with a '--since DATE' paramter instead of a
# range of changeset ids
if IS_DATE $revisions; then
DEBUG "Matched a git --since date of '${revisions}'"
revisions="--since ${revisions}"
fi
git log --pretty=format:%H $revisions > \ git log --pretty=format:%H $revisions > \
"${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt" "${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt"
done done
@@ -160,15 +196,17 @@ if [ "$GERRIT_STATS" = "y" ] ; then
echo "Generating gerrit statistics" echo "Generating gerrit statistics"
cd ${BASEDIR} cd ${BASEDIR}
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats.txt"
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats-all.txt"
grep -v '^#' ${CONFIGDIR}/${RELEASE} | grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do while read project x; do
DEBUG "Generating gerrit statistics for ${project}" DEBUG "Generating gerrit statistics for ${project}"
python gerritdm -l 20 \ python gerritdm -l 20 \
< "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \ < "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \
> "${TEMPDIR}/${project}-gerrit-stats.txt" >> "${TEMPDIR}/${project}-gerrit-stats.txt"
python gerritdm -z \ python gerritdm -z \
< "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \ < "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \
> "${TEMPDIR}/${project}-gerrit-stats-all.txt" >> "${TEMPDIR}/${project}-gerrit-stats-all.txt"
done done
DEBUG "Generating aggregate gerrit statistics for all projects" DEBUG "Generating aggregate gerrit statistics for all projects"
@@ -177,8 +215,10 @@ if [ "$GERRIT_STATS" = "y" ] ; then
while read project x; do while read project x; do
cat "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" >> "${TEMPDIR}/gerrit-reviewers.txt" cat "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" >> "${TEMPDIR}/gerrit-reviewers.txt"
done done
python gerritdm -l 20 < "${TEMPDIR}/gerrit-reviewers.txt" > "${TEMPDIR}/gerrit-stats.txt" echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats.txt"
python gerritdm -z < "${TEMPDIR}/gerrit-reviewers.txt" > "${TEMPDIR}/gerrit-stats-all.txt" echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats-all.txt"
python gerritdm -l 20 < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats.txt"
python gerritdm -z < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats-all.txt"
fi fi
DEBUG "Cleaning up" DEBUG "Cleaning up"