diff --git a/csv.py b/csv.py new file mode 100644 index 0000000..34ea10a --- /dev/null +++ b/csv.py @@ -0,0 +1,38 @@ +# +# aggregate per-month statistics for people +# +import sys, datetime + +class CSVStat: + def __init__ (self, name, employer, date): + self.name = name + self.employer = employer + self.added = self.removed = 0 + self.date = date + def accumulate (self, p): + self.added = self.added + p.added + self.removed = self.removed + p.removed + +PeriodCommitHash = { } + +def AccumulatePatch (p): + date = "%.2d-%.2d-01"%(p.date.year, p.date.month) + authdatekey = "%s-%s"%(p.author.name, date) + if authdatekey not in PeriodCommitHash: + empl = p.author.emailemployer (p.email, p.date) + stat = CSVStat (p.author.name, empl, date) + PeriodCommitHash[authdatekey] = stat + else: + stat = PeriodCommitHash[authdatekey] + stat.accumulate (p) + +def OutputCSV (file): + if file is None: + return + file.write ("Name\tAffliation\tDate\tAdded\tRemoved\n") + for date, stat in PeriodCommitHash.items(): + # sanitise names " is common and \" sometimes too + empl_name = stat.employer.name.replace ("\"", ".").replace ("\\", ".") + author_name = stat.name.replace ("\"", ".").replace ("\\", ".") + file.write ("\"%s\"\t\"%s\"\t%s\t%d\t%d\n"%(author_name, empl_name, stat.date, \ + stat.added, stat.removed)) diff --git a/gitdm b/gitdm index 20ec257..1c3adc7 100755 --- a/gitdm +++ b/gitdm @@ -11,7 +11,7 @@ # Public License, version 2. -import database, ConfigFile +import database, csv, ConfigFile import getopt, datetime import os, re, sys, rfc822, string from patterns import * @@ -30,6 +30,7 @@ DevReports = 1 DateStats = 0 AuthorSOBs = 1 FileFilter = None +CSVFile = None AkpmOverLt = 0 DumpDB = 0 CFName = 'gitdm.config' @@ -46,14 +47,15 @@ CFName = 'gitdm.config' # -r pattern Restrict to files matching pattern # -s Ignore author SOB lines # -u Map unknown employers to '(Unknown)' +# -x file.csv Export raw statistics as CSV # -z Dump out the hacker database at completion def ParseOpts (): global Outfile, ListCount, MapUnknown, HTMLfile, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB - global CFName + global CFName, CSVFile - opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:suz') + opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:sux:z') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -76,6 +78,9 @@ def ParseOpts (): AuthorSOBs = 0 elif opt[0] == '-u': MapUnknown = 1 + elif opt[0] == '-x': + CSVFile = open (opt[1], 'w') + print "open output file " + opt[1] + "\n" elif opt[0] == '-z': DumpDB = 1 @@ -260,7 +265,6 @@ THead = '''

%s ''' - def BeginReport (title): global HTMLclass @@ -459,6 +463,7 @@ while (1): for sobemail, sob in p.sobs: sob.addsob (p) CSCount += 1 + csv.AccumulatePatch (p) print >> sys.stderr, 'Grabbing changesets...done' if DumpDB: @@ -479,6 +484,10 @@ if DateStats: PrintDateStats () sys.exit(0) +csv.OutputCSV (CSVFile) +if CSVFile is not None: + CSVFile.close () + if DevReports: ReportByPCount (hlist) ReportByLChanged (hlist)