From e3252c037f71a971c6d3faa8fa83f5a798f95fc5 Mon Sep 17 00:00:00 2001 From: Dan Stangel Date: Thu, 23 Jan 2014 15:43:24 -0700 Subject: [PATCH] Add header to output files and allow date ranges This change adds a brief one line header to all default output files to indicate when the analysis was generated. It also allows the gitdm configuration file to use a date of the format YYYY-MM-DD to specify the start or end date of a range of commits, rather than only allowing a git changeset hash. This is valuable for analyzing projects which do not routinely or consistently use tags, and when it is difficult to precisely pinpoint the git commit that corresponds to a time-based release. Change-Id: I899f6c6b046462addd3235e7797a3e1a63c8776c --- do-it.sh | 58 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/do-it.sh b/do-it.sh index 84b384c..0d0b9fe 100755 --- a/do-it.sh +++ b/do-it.sh @@ -8,6 +8,13 @@ function DEBUG() { # enable/disable debugging output GITDM_DEBUG=${GITDM_DEBUG:-"off"} +# determine if a given parameter is a date matching the format YYYY-MM-DD, +# i.e. 2013-09-13 This is used to decide if git should specify a start +# date with '--since YYYY-MM-DD' rather than use an absolute changeset id +function IS_DATE() { + [[ $1 =~ ^[0-9][0-9][0-9][0-9]\-[0-9][0-9]\-[0-9][0-9]$ ]] +} + GITBASE=${GITBASE:-~/git/openstack} RELEASE=${RELEASE:-havana} BASEDIR=$(pwd) @@ -23,6 +30,9 @@ LP_STATS=${LP_STATS:-n} QUERY_LP=${QUERY_LP:-y} GERRIT_STATS=${GERRIT_STATS:-y} REMOVE_TEMPDIR=${REMOVE_TEMPDIR:-y} +TIMESTAMP=`date` +# brief header to prepend to all of the analysis results +OUTPUT_HEADER="Statistics generated at ${TIMESTAMP}" if [ ! -d .venv ]; then echo "Creating a virtualenv" @@ -53,6 +63,13 @@ if [ "$GIT_STATS" = "y" ] ; then while read project revisions excludes x; do DEBUG "Generating git commit log for ${project}" cd ${GITBASE}/${project} + # match possible dates of the format YYYY-MM-DD to use in + # supplying git with a '--since DATE' paramter instead of a + # range of changeset ids + if IS_DATE $revisions; then + DEBUG "Matched a git --since date of '${revisions}'" + revisions="--since ${revisions}" + fi git log ${GITLOGARGS} ${revisions} > "${TEMPDIR}/${project}-commits.log" if [ -n "$excludes" ]; then awk "/^commit /{ok=1} /^commit ${excludes}/{ok=0} {if(ok) {print}}" \ @@ -66,9 +83,11 @@ if [ "$GIT_STATS" = "y" ] ; then grep -v '^#' ${CONFIGDIR}/${RELEASE} | while read project x; do DEBUG "Generating git stats for ${project}" - python gitdm -l 20 -n < "${TEMPDIR}/${project}-commits.log" > "${TEMPDIR}/${project}-git-stats.txt" + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.txt" + python gitdm -l 20 -n < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats.txt" # also create a full dump with csv for further downstream processing - python gitdm -n -y -z -x "${TEMPDIR}/${project}-git-stats.csv" < "${TEMPDIR}/${project}-commits.log" > "${TEMPDIR}/${project}-git-stats-all.txt" + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.csv" + python gitdm -n -y -z -x "${TEMPDIR}/${project}-git-stats.csv" < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats-all.txt" done DEBUG "Generating aggregate git stats for all projects" @@ -76,7 +95,8 @@ if [ "$GIT_STATS" = "y" ] ; then while read project x; do cat "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/git-commits.log" done - python gitdm -n -y -z -x "${TEMPDIR}/git-stats.csv" < "${TEMPDIR}/git-commits.log" > "${TEMPDIR}/git-stats.txt" + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/git-stats.txt" + python gitdm -n -y -z -x "${TEMPDIR}/git-stats.csv" < "${TEMPDIR}/git-commits.log" >> "${TEMPDIR}/git-stats.txt" fi if [ "$LP_STATS" = "y" ] ; then @@ -97,11 +117,12 @@ if [ "$LP_STATS" = "y" ] ; then echo "Generating launchpad statistics" cd ${BASEDIR} + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-lp-stats.txt" grep -v '^#' ${CONFIGDIR}/${RELEASE} | while read project x; do DEBUG "Generating launchpad stats for ${project}" grep -v '' "${TEMPDIR}/${project}-bugs.log" | - python lpdm -l 20 > "${TEMPDIR}/${project}-lp-stats.txt" + python lpdm -l 20 >> "${TEMPDIR}/${project}-lp-stats.txt" done DEBUG "Generating aggregate launchpad stats for all projects" @@ -110,8 +131,9 @@ if [ "$LP_STATS" = "y" ] ; then while read project x; do grep -v '' "${TEMPDIR}/${project}-bugs.log" >> "${TEMPDIR}/lp-bugs.log" done + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/lp-stats.txt" grep -v '' "${TEMPDIR}/lp-bugs.log" | - python lpdm -l 20 > "${TEMPDIR}/lp-stats.txt" + python lpdm -l 20 >> "${TEMPDIR}/lp-stats.txt" fi if [ "$GERRIT_STATS" = "y" ] ; then @@ -119,6 +141,13 @@ if [ "$GERRIT_STATS" = "y" ] ; then grep -v '^#' ${CONFIGDIR}/${RELEASE} | while read project revisions x; do cd "${GITBASE}/${project}" + # match possible dates of the format YYYY-MM-DD to use in + # supplying git with a '--since DATE' paramter instead of a + # range of changeset ids + if IS_DATE $revisions; then + DEBUG "Matched a git --since date of '${revisions}'" + revisions="--since ${revisions}" + fi git log ${revisions} | awk '/^ Change-Id: / { print $2 }' | split -l 100 -d - "${TEMPDIR}/${project}-${RELEASE}-change-ids-" @@ -142,6 +171,13 @@ if [ "$GERRIT_STATS" = "y" ] ; then while read project revisions x; do DEBUG "Generating a list of commit IDs for ${project}" cd "${GITBASE}/${project}" + # match possible dates of the format YYYY-MM-DD to use in + # supplying git with a '--since DATE' paramter instead of a + # range of changeset ids + if IS_DATE $revisions; then + DEBUG "Matched a git --since date of '${revisions}'" + revisions="--since ${revisions}" + fi git log --pretty=format:%H $revisions > \ "${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt" done @@ -160,15 +196,17 @@ if [ "$GERRIT_STATS" = "y" ] ; then echo "Generating gerrit statistics" cd ${BASEDIR} + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats.txt" + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats-all.txt" grep -v '^#' ${CONFIGDIR}/${RELEASE} | while read project x; do DEBUG "Generating gerrit statistics for ${project}" python gerritdm -l 20 \ < "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \ - > "${TEMPDIR}/${project}-gerrit-stats.txt" + >> "${TEMPDIR}/${project}-gerrit-stats.txt" python gerritdm -z \ < "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \ - > "${TEMPDIR}/${project}-gerrit-stats-all.txt" + >> "${TEMPDIR}/${project}-gerrit-stats-all.txt" done DEBUG "Generating aggregate gerrit statistics for all projects" @@ -177,8 +215,10 @@ if [ "$GERRIT_STATS" = "y" ] ; then while read project x; do cat "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" >> "${TEMPDIR}/gerrit-reviewers.txt" done - python gerritdm -l 20 < "${TEMPDIR}/gerrit-reviewers.txt" > "${TEMPDIR}/gerrit-stats.txt" - python gerritdm -z < "${TEMPDIR}/gerrit-reviewers.txt" > "${TEMPDIR}/gerrit-stats-all.txt" + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats.txt" + echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats-all.txt" + python gerritdm -l 20 < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats.txt" + python gerritdm -z < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats-all.txt" fi DEBUG "Cleaning up"