Add version tracking support and an "unknown hackers" report
Version tracking was used to see who had contributed to the most kernel releases; not sure it's a long-term-useful feature. The unknown hackers report helps when trying to improve the database. Signed-off-by: Jonathan Corbet <corbet@lwn.net>
This commit is contained in:
parent
2797636b98
commit
1e293bc90a
14
gitdm
14
gitdm
|
@ -5,8 +5,8 @@
|
|||
#
|
||||
# This code is part of the LWN git data miner.
|
||||
#
|
||||
# Copyright 2007-11 Eklektix, Inc.
|
||||
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
|
||||
# Copyright 2007-12 Eklektix, Inc.
|
||||
# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
|
||||
# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
|
||||
#
|
||||
# This file may be distributed under the terms of the GNU General
|
||||
|
@ -43,6 +43,7 @@ DirName = ''
|
|||
Aggregate = 'month'
|
||||
Numstat = 0
|
||||
ReportByFileType = 0
|
||||
ReportUnknowns = False
|
||||
|
||||
#
|
||||
# Options:
|
||||
|
@ -60,6 +61,7 @@ ReportByFileType = 0
|
|||
# -r pattern Restrict to files matching pattern
|
||||
# -s Ignore author SOB lines
|
||||
# -u Map unknown employers to '(Unknown)'
|
||||
# -U Dump unknown hackers in report
|
||||
# -x file.csv Export raw statistics as CSV
|
||||
# -w Aggregrate the raw statistics by weeks instead of months
|
||||
# -y Aggregrate the raw statistics by years instead of months
|
||||
|
@ -69,9 +71,9 @@ def ParseOpts ():
|
|||
global MapUnknown, DevReports
|
||||
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
|
||||
global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
|
||||
global ReportByFileType
|
||||
global ReportByFileType, ReportUnknowns
|
||||
|
||||
opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:yz')
|
||||
opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stUuwx:yz')
|
||||
for opt in opts:
|
||||
if opt[0] == '-a':
|
||||
AkpmOverLt = 1
|
||||
|
@ -102,6 +104,8 @@ def ParseOpts ():
|
|||
ReportByFileType = 1
|
||||
elif opt[0] == '-u':
|
||||
MapUnknown = 1
|
||||
elif opt[0] == '-U':
|
||||
ReportUnknowns = True
|
||||
elif opt[0] == '-x':
|
||||
CSVFile = open (opt[1], 'w')
|
||||
print "open output file " + opt[1] + "\n"
|
||||
|
@ -492,6 +496,8 @@ if CSVFile:
|
|||
|
||||
if DevReports:
|
||||
reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved)
|
||||
if ReportUnknowns:
|
||||
reports.ReportUnknowns(hlist, CSCount)
|
||||
reports.EmplReports (elist, TotalChanged, CSCount)
|
||||
|
||||
if ReportByFileType and Numstat:
|
||||
|
|
|
@ -21,7 +21,8 @@ import re
|
|||
_pemail = r'\s+"?([^<"]+)"?\s<([^>]+)>' # just email addr + name
|
||||
|
||||
patterns = {
|
||||
'commit': re.compile (r'^commit ([0-9a-f ]+)$'),
|
||||
'tagcommit': re.compile (r'^commit ([\da-f]+) .*tag: (v[23]\.\d(\.\d\d?)?)'),
|
||||
'commit': re.compile (r'^commit ([0-9a-f ]+)'),
|
||||
'author': re.compile (r'^Author:' + _pemail + '$'),
|
||||
'signed-off-by': re.compile (r'^\s+Signed-off-by:' + _pemail + '.*$'),
|
||||
'merge': re.compile (r'^Merge:.*$'),
|
||||
|
|
72
reports.py
72
reports.py
|
@ -3,8 +3,8 @@
|
|||
#
|
||||
# This code is part of the LWN git data miner.
|
||||
#
|
||||
# Copyright 2007-11 Eklektix, Inc.
|
||||
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
|
||||
# Copyright 2007-12 Eklektix, Inc.
|
||||
# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
|
||||
#
|
||||
# This file may be distributed under the terms of the GNU General
|
||||
# Public License, version 2.
|
||||
|
@ -58,6 +58,10 @@ TRow = ''' <tr class="%s">
|
|||
<td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
|
||||
'''
|
||||
|
||||
TRowStr = ''' <tr class="%s">
|
||||
<td>%s</td><td align="right">%d</td><td>%s</td></tr>
|
||||
'''
|
||||
|
||||
def ReportLine (text, count, pct):
|
||||
global HTMLclass
|
||||
if count == 0:
|
||||
|
@ -67,6 +71,15 @@ def ReportLine (text, count, pct):
|
|||
HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct))
|
||||
HTMLclass ^= 1
|
||||
|
||||
def ReportLineStr (text, count, extra):
|
||||
global HTMLclass
|
||||
if count == 0:
|
||||
return
|
||||
Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
|
||||
if HTMLfile:
|
||||
HTMLfile.write (TRowStr % (HClasses[HTMLclass], text, count, extra))
|
||||
HTMLclass ^= 1
|
||||
|
||||
def EndReport ():
|
||||
if HTMLfile:
|
||||
HTMLfile.write ('</table>\n\n')
|
||||
|
@ -284,6 +297,34 @@ def ReportByRepCreds (hlist):
|
|||
break
|
||||
EndReport ()
|
||||
|
||||
#
|
||||
# Versions.
|
||||
#
|
||||
def CompareVersionCounts (h1, h2):
|
||||
if h1.versions and h2.versions:
|
||||
return len (h2.versions) - len (h1.versions)
|
||||
if h2.versions:
|
||||
return 1
|
||||
if h1.versions:
|
||||
return -1
|
||||
return 0
|
||||
|
||||
def MissedVersions (hv, allv):
|
||||
missed = [v for v in allv if v not in hv]
|
||||
missed.reverse ()
|
||||
return ' '.join (missed)
|
||||
|
||||
def ReportVersions (hlist):
|
||||
hlist.sort (CompareVersionCounts)
|
||||
BeginReport ('Developers represented in the most kernel versions')
|
||||
count = 0
|
||||
allversions = hlist[0].versions
|
||||
for h in hlist:
|
||||
ReportLineStr (h.name, len (h.versions), MissedVersions (h.versions, allversions))
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
|
||||
def CompareESOBs (e1, e2):
|
||||
|
@ -341,6 +382,33 @@ def EmplReports (elist, totalchanged, cscount):
|
|||
ReportByESOBs (elist)
|
||||
ReportByEHackers (elist)
|
||||
|
||||
#
|
||||
# Who are the unknown hackers?
|
||||
#
|
||||
def IsUnknown(h):
|
||||
empl = h.employer[0][0][1].name
|
||||
return h.email[0] == empl or empl == '(Unknown)'
|
||||
|
||||
def ReportUnknowns(hlist, cscount):
|
||||
#
|
||||
# Trim the list to just the unknowns; try to work properly whether
|
||||
# mapping to (Unknown) is happening or not.
|
||||
#
|
||||
ulist = [ h for h in hlist if IsUnknown(h) ]
|
||||
ulist.sort(ComparePCount)
|
||||
count = 0
|
||||
BeginReport('Developers with unknown affiliation')
|
||||
for h in ulist:
|
||||
pcount = len(h.patches)
|
||||
if pcount > 0:
|
||||
ReportLine(h.name, pcount, (pcount*100.0)/cscount)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport()
|
||||
|
||||
|
||||
|
||||
def ReportByFileType (hacker_list):
|
||||
total = {}
|
||||
total_by_hacker = {}
|
||||
|
|
Loading…
Reference in New Issue