From d1a8929872216089864d4f28f9c27b2a648d7302 Mon Sep 17 00:00:00 2001 From: Michael Meeks Date: Mon, 29 Sep 2008 17:46:37 +0100 Subject: [PATCH] gitdm patch ... Hi guys, I knocked up a patch to generate some per-month, by-affiliation statistics from the gitdm output; attached for interest or merging. A sample of the output, complete with OO.o data-pilot, and pretty chart is here: http://www.gnome.org/~michael/data/2008-09-29-linux-stats.ods with chart here: http://www.gnome.org/~michael/images/2008-09-29-kernel-active.png caption being: "Graph showing number and affiliation of active kernel developers (contributing more than 100 lines per month). Quick affiliation key, from bottom up: Unknown, No-Affiliation, IBM, RedHat, Novell, Intel ..." These are as yet not published, I plan to use them as a comparison to OO.o's somewhat mediocre equivalents; hope to go live with them soon (and fix the horrible bugs in stacked area charts to make them actually pretty ). HTH, Michael. -- michael.meeks@novell.com <><, Pseudo Engineer, itinerant idiot Signed-off-by: Jonathan Corbet --- csv.py | 38 ++++++++++++++++++++++++++++++++++++++ gitdm | 17 +++++++++++++---- 2 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 csv.py diff --git a/csv.py b/csv.py new file mode 100644 index 0000000..34ea10a --- /dev/null +++ b/csv.py @@ -0,0 +1,38 @@ +# +# aggregate per-month statistics for people +# +import sys, datetime + +class CSVStat: + def __init__ (self, name, employer, date): + self.name = name + self.employer = employer + self.added = self.removed = 0 + self.date = date + def accumulate (self, p): + self.added = self.added + p.added + self.removed = self.removed + p.removed + +PeriodCommitHash = { } + +def AccumulatePatch (p): + date = "%.2d-%.2d-01"%(p.date.year, p.date.month) + authdatekey = "%s-%s"%(p.author.name, date) + if authdatekey not in PeriodCommitHash: + empl = p.author.emailemployer (p.email, p.date) + stat = CSVStat (p.author.name, empl, date) + PeriodCommitHash[authdatekey] = stat + else: + stat = PeriodCommitHash[authdatekey] + stat.accumulate (p) + +def OutputCSV (file): + if file is None: + return + file.write ("Name\tAffliation\tDate\tAdded\tRemoved\n") + for date, stat in PeriodCommitHash.items(): + # sanitise names " is common and \" sometimes too + empl_name = stat.employer.name.replace ("\"", ".").replace ("\\", ".") + author_name = stat.name.replace ("\"", ".").replace ("\\", ".") + file.write ("\"%s\"\t\"%s\"\t%s\t%d\t%d\n"%(author_name, empl_name, stat.date, \ + stat.added, stat.removed)) diff --git a/gitdm b/gitdm index 20ec257..1c3adc7 100755 --- a/gitdm +++ b/gitdm @@ -11,7 +11,7 @@ # Public License, version 2. -import database, ConfigFile +import database, csv, ConfigFile import getopt, datetime import os, re, sys, rfc822, string from patterns import * @@ -30,6 +30,7 @@ DevReports = 1 DateStats = 0 AuthorSOBs = 1 FileFilter = None +CSVFile = None AkpmOverLt = 0 DumpDB = 0 CFName = 'gitdm.config' @@ -46,14 +47,15 @@ CFName = 'gitdm.config' # -r pattern Restrict to files matching pattern # -s Ignore author SOB lines # -u Map unknown employers to '(Unknown)' +# -x file.csv Export raw statistics as CSV # -z Dump out the hacker database at completion def ParseOpts (): global Outfile, ListCount, MapUnknown, HTMLfile, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB - global CFName + global CFName, CSVFile - opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:suz') + opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:sux:z') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -76,6 +78,9 @@ def ParseOpts (): AuthorSOBs = 0 elif opt[0] == '-u': MapUnknown = 1 + elif opt[0] == '-x': + CSVFile = open (opt[1], 'w') + print "open output file " + opt[1] + "\n" elif opt[0] == '-z': DumpDB = 1 @@ -260,7 +265,6 @@ THead = '''

%s ''' - def BeginReport (title): global HTMLclass @@ -459,6 +463,7 @@ while (1): for sobemail, sob in p.sobs: sob.addsob (p) CSCount += 1 + csv.AccumulatePatch (p) print >> sys.stderr, 'Grabbing changesets...done' if DumpDB: @@ -479,6 +484,10 @@ if DateStats: PrintDateStats () sys.exit(0) +csv.OutputCSV (CSVFile) +if CSVFile is not None: + CSVFile.close () + if DevReports: ReportByPCount (hlist) ReportByLChanged (hlist)