Add version tracking support and an "unknown hackers" report

Version tracking was used to see who had contributed to the most kernel
releases; not sure it's a long-term-useful feature.  The unknown hackers
report helps when trying to improve the database.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
This commit is contained in:
Jonathan Corbet 2012-04-06 16:00:04 -06:00
parent 2797636b98
commit 1e293bc90a
3 changed files with 82 additions and 7 deletions

14
gitdm
View File

@ -5,8 +5,8 @@
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
# Copyright 2007-12 Eklektix, Inc.
# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This file may be distributed under the terms of the GNU General
@ -43,6 +43,7 @@ DirName = ''
Aggregate = 'month'
Numstat = 0
ReportByFileType = 0
ReportUnknowns = False
#
# Options:
@ -60,6 +61,7 @@ ReportByFileType = 0
# -r pattern Restrict to files matching pattern
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
# -U Dump unknown hackers in report
# -x file.csv Export raw statistics as CSV
# -w Aggregrate the raw statistics by weeks instead of months
# -y Aggregrate the raw statistics by years instead of months
@ -69,9 +71,9 @@ def ParseOpts ():
global MapUnknown, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
global ReportByFileType
global ReportByFileType, ReportUnknowns
opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:yz')
opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stUuwx:yz')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@ -102,6 +104,8 @@ def ParseOpts ():
ReportByFileType = 1
elif opt[0] == '-u':
MapUnknown = 1
elif opt[0] == '-U':
ReportUnknowns = True
elif opt[0] == '-x':
CSVFile = open (opt[1], 'w')
print "open output file " + opt[1] + "\n"
@ -492,6 +496,8 @@ if CSVFile:
if DevReports:
reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved)
if ReportUnknowns:
reports.ReportUnknowns(hlist, CSCount)
reports.EmplReports (elist, TotalChanged, CSCount)
if ReportByFileType and Numstat:

View File

@ -21,7 +21,8 @@ import re
_pemail = r'\s+"?([^<"]+)"?\s<([^>]+)>' # just email addr + name
patterns = {
'commit': re.compile (r'^commit ([0-9a-f ]+)$'),
'tagcommit': re.compile (r'^commit ([\da-f]+) .*tag: (v[23]\.\d(\.\d\d?)?)'),
'commit': re.compile (r'^commit ([0-9a-f ]+)'),
'author': re.compile (r'^Author:' + _pemail + '$'),
'signed-off-by': re.compile (r'^\s+Signed-off-by:' + _pemail + '.*$'),
'merge': re.compile (r'^Merge:.*$'),

View File

@ -3,8 +3,8 @@
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
# Copyright 2007-12 Eklektix, Inc.
# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
@ -58,6 +58,10 @@ TRow = ''' <tr class="%s">
<td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
'''
TRowStr = ''' <tr class="%s">
<td>%s</td><td align="right">%d</td><td>%s</td></tr>
'''
def ReportLine (text, count, pct):
global HTMLclass
if count == 0:
@ -67,6 +71,15 @@ def ReportLine (text, count, pct):
HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct))
HTMLclass ^= 1
def ReportLineStr (text, count, extra):
global HTMLclass
if count == 0:
return
Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
if HTMLfile:
HTMLfile.write (TRowStr % (HClasses[HTMLclass], text, count, extra))
HTMLclass ^= 1
def EndReport ():
if HTMLfile:
HTMLfile.write ('</table>\n\n')
@ -284,6 +297,34 @@ def ReportByRepCreds (hlist):
break
EndReport ()
#
# Versions.
#
def CompareVersionCounts (h1, h2):
if h1.versions and h2.versions:
return len (h2.versions) - len (h1.versions)
if h2.versions:
return 1
if h1.versions:
return -1
return 0
def MissedVersions (hv, allv):
missed = [v for v in allv if v not in hv]
missed.reverse ()
return ' '.join (missed)
def ReportVersions (hlist):
hlist.sort (CompareVersionCounts)
BeginReport ('Developers represented in the most kernel versions')
count = 0
allversions = hlist[0].versions
for h in hlist:
ReportLineStr (h.name, len (h.versions), MissedVersions (h.versions, allversions))
count += 1
if count >= ListCount:
break
EndReport ()
def CompareESOBs (e1, e2):
@ -341,6 +382,33 @@ def EmplReports (elist, totalchanged, cscount):
ReportByESOBs (elist)
ReportByEHackers (elist)
#
# Who are the unknown hackers?
#
def IsUnknown(h):
empl = h.employer[0][0][1].name
return h.email[0] == empl or empl == '(Unknown)'
def ReportUnknowns(hlist, cscount):
#
# Trim the list to just the unknowns; try to work properly whether
# mapping to (Unknown) is happening or not.
#
ulist = [ h for h in hlist if IsUnknown(h) ]
ulist.sort(ComparePCount)
count = 0
BeginReport('Developers with unknown affiliation')
for h in ulist:
pcount = len(h.patches)
if pcount > 0:
ReportLine(h.name, pcount, (pcount*100.0)/cscount)
count += 1
if count >= ListCount:
break
EndReport()
def ReportByFileType (hacker_list):
total = {}
total_by_hacker = {}