Initial commit
First commit of gitdm to the new repo. Call it version 0.10 or something silly like that.
This commit is contained in:
commit
e1a6d06d65
|
@ -0,0 +1,2 @@
|
|||
*.pyc
|
||||
*~
|
|
@ -0,0 +1,2 @@
|
|||
The code in this directory can be distributed under the terms of the GNU
|
||||
General Public License, version 2.
|
|
@ -0,0 +1,110 @@
|
|||
#
|
||||
# Stuff for dealing with configuration files.
|
||||
#
|
||||
import sys, re, datetime
|
||||
import database
|
||||
|
||||
#
|
||||
# Read a line and strip out junk.
|
||||
#
|
||||
def ReadConfigLine (file):
|
||||
line = file.readline ()
|
||||
if not line:
|
||||
return None
|
||||
line = line.split('#')[0] # Get rid of any comments
|
||||
line = line.strip () # and extra white space
|
||||
if len (line) == 0: # we got rid of everything
|
||||
return ReadConfigLine (file)
|
||||
return line
|
||||
|
||||
#
|
||||
# Give up and die.
|
||||
#
|
||||
def croak (message):
|
||||
sys.stderr.write (message + '\n')
|
||||
sys.exit (1)
|
||||
|
||||
#
|
||||
# Read a list of email aliases.
|
||||
#
|
||||
def ReadEmailAliases (name):
|
||||
try:
|
||||
file = open (name, 'r')
|
||||
except IOError:
|
||||
croak ('Unable to open email alias file %s' % (name))
|
||||
line = ReadConfigLine (file)
|
||||
while line:
|
||||
sline = line.split ()
|
||||
if len (sline) != 2:
|
||||
croak ('Funky email alias line "%s"' % (line))
|
||||
if sline[0].index ('@') <= 0 or sline[1].index ('@') <= 0:
|
||||
croak ('Non-addresses in email alias "%s"' % (line))
|
||||
database.AddEmailAlias (sline[0], sline[1])
|
||||
line = ReadConfigLine (file)
|
||||
file.close ()
|
||||
|
||||
#
|
||||
# The Email/Employer map
|
||||
#
|
||||
EMMpat = re.compile (r'^([^\s]+)\s+([^<]+)\s*(<\s*(\d+-\d+-\d+)\s*)?$')
|
||||
|
||||
def ReadEmailEmployers (name):
|
||||
try:
|
||||
file = open (name, 'r')
|
||||
except IOError:
|
||||
croak ('Unable to open email/employer file %s' % (name))
|
||||
line = ReadConfigLine (file)
|
||||
while line:
|
||||
m = EMMpat.match (line)
|
||||
if not m:
|
||||
croak ('Funky email/employer line "%s"' % (line))
|
||||
email = m.group (1)
|
||||
company = m.group (2).strip ()
|
||||
enddate = ParseDate (m.group (4))
|
||||
database.AddEmailEmployerMapping (email, company, enddate)
|
||||
line = ReadConfigLine (file)
|
||||
file.close ()
|
||||
|
||||
def ParseDate (cdate):
|
||||
if not cdate:
|
||||
return None
|
||||
sdate = cdate.split ('-')
|
||||
return datetime.date (int (sdate[0]), int (sdate[1]), int (sdate[2]))
|
||||
|
||||
|
||||
def ReadGroupMap (fname, employer):
|
||||
try:
|
||||
file = open (fname, 'r')
|
||||
except IOError:
|
||||
croak ('Unable to open group map file %s' % (fname))
|
||||
line = ReadConfigLine (file)
|
||||
while line:
|
||||
database.AddEmailEmployerMapping (line, employer)
|
||||
line = ReadConfigLine (file)
|
||||
file.close ()
|
||||
|
||||
#
|
||||
# Read an overall config file.
|
||||
#
|
||||
def ConfigFile (name):
|
||||
try:
|
||||
file = open (name, 'r')
|
||||
except IOError:
|
||||
croak ('Unable to open config file %s' % (name))
|
||||
line = ReadConfigLine (file)
|
||||
while line:
|
||||
sline = line.split ()
|
||||
if len (sline) < 2:
|
||||
croak ('Funky config line: "%s"' % (line))
|
||||
if sline[0] == 'EmailAliases':
|
||||
ReadEmailAliases (sline[1])
|
||||
elif sline[0] == 'EmailMap':
|
||||
ReadEmailEmployers (sline[1])
|
||||
elif sline[0] == 'GroupMap':
|
||||
if len (sline) != 3:
|
||||
croak ('Funky group map line "%s"' % (line))
|
||||
ReadGroupMap (sline[1], sline[2])
|
||||
else:
|
||||
croak ('Unrecognized config line: "%s"' % (line))
|
||||
line = ReadConfigLine (file)
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
The code in this directory makes up the "git data miner," a simple hack
|
||||
which attempts to figure things out from the revision history in a git
|
||||
repository.
|
||||
|
||||
RUNNING GITDM
|
||||
|
||||
Run it like this:
|
||||
|
||||
git log -p -M [details] | gitdm [options]
|
||||
|
||||
The [details] tell git which changesets are of interest; the [options] can
|
||||
be:
|
||||
|
||||
-a If a patch contains signoff lines from both Andrew Morton
|
||||
and Linus Torvalds, omit Linus's.
|
||||
|
||||
-c file Specify the name of the gitdm configuration file.
|
||||
By default, "./gitdm.config" is used.
|
||||
|
||||
-d Omit the developer reports, giving employer information
|
||||
only.
|
||||
|
||||
-D Rather than create the usual statistics, create a
|
||||
file providing lines changed per day, suitable for
|
||||
feeding to a tool like gnuplot.
|
||||
|
||||
-h file Generate HTML output to the given file
|
||||
|
||||
-l num Only list the top <num> entries in each report.
|
||||
|
||||
-o file Write text output to the given file (default is stdout).
|
||||
|
||||
-r pat Only generate statistics for changes to files whose
|
||||
name matches the given regular expression.
|
||||
|
||||
-s Ignore Signed-off-by lines which match the author of
|
||||
each patch.
|
||||
|
||||
-u Group all unknown developers under the "(Unknown)"
|
||||
employer.
|
||||
|
||||
-z Dump out the hacker database to "database.dump".
|
||||
|
||||
A typical command line used to generate the "who write 2.6.x" LWN articles
|
||||
looks like:
|
||||
|
||||
git log -p -M v2.6.19..v2.6.20 | \
|
||||
gitdm -u -s -a -o results -h results.html
|
||||
|
||||
|
||||
CONFIGURATION FILE
|
||||
|
||||
The main purpose of the configuration file is to direct the mapping of
|
||||
email addresses onto employers. Please note that the config file parser is
|
||||
exceptionally stupid and unrobust at this point, but it gets the job done.
|
||||
|
||||
Blank lines and lines beginning with "#" are ignored. Everything else
|
||||
specifies a file with some sort of mapping:
|
||||
|
||||
EmailAliases file
|
||||
|
||||
Developers often post code under a number of different email
|
||||
addresses, but it can be desirable to group them all together in
|
||||
the statistics. An EmailAliases file just contains a bunch of
|
||||
lines of the form:
|
||||
|
||||
alias@address canonical@address
|
||||
|
||||
Any patches originating from alias@address will be treated as if
|
||||
they had come from canonical@address.
|
||||
|
||||
|
||||
EmailMap file
|
||||
|
||||
Map email addresses onto employers. These files contain lines
|
||||
like:
|
||||
|
||||
[user@]domain employer [< yyyy-mm-dd]
|
||||
|
||||
If the "user@" portion is missing, all email from the given domain
|
||||
will be treated as being associated with the given employer. If a
|
||||
date is provided, the entry is only valid up to that date;
|
||||
otherwise it is considered valid into the indefinite future. This
|
||||
feature can be useful for properly tracking developers' work when
|
||||
they change employers but do not change email addresses.
|
||||
|
||||
|
||||
GroupMap file employer
|
||||
|
||||
This is a variant of EmailMap provided for convenience; it contains
|
||||
email addresses only, all of which are associated with the given
|
||||
employer.
|
||||
|
||||
|
||||
NOTES AND CREDITS
|
||||
|
||||
Gitdm was written by Jonathan Corbet; many useful contributions have come
|
||||
from Greg Kroah-Hartman.
|
||||
|
||||
Please note that this tool is provided in the hope that it will be useful,
|
||||
but it is not put forward as an example of excellence in design or
|
||||
implementation. Hacking on gitdm tends to stop the moment it performs
|
||||
whatever task is required of it at the moment. Patches to make it less
|
||||
hacky, less ugly, and more robust are welcome.
|
||||
|
||||
Jonathan Corbet
|
||||
corbet@lwn.net
|
|
@ -0,0 +1,202 @@
|
|||
#
|
||||
# The "database".
|
||||
#
|
||||
|
||||
#
|
||||
# This code is part of the LWN git data miner.
|
||||
#
|
||||
# Copyright 2007 LWN.net
|
||||
# Copyright 2007 Jonathan Corbet <corbet@lwn.net>
|
||||
#
|
||||
# This file may be distributed under the terms of the GNU General
|
||||
# Public License, version 2.
|
||||
import sys, datetime
|
||||
|
||||
|
||||
class Hacker:
|
||||
def __init__ (self, name, id, elist, email):
|
||||
self.name = name
|
||||
self.id = id
|
||||
self.employer = [ elist ]
|
||||
self.email = [ email ]
|
||||
self.added = self.removed = 0
|
||||
self.patches = [ ]
|
||||
self.signoffs = [ ]
|
||||
|
||||
def addemail (self, email, elist):
|
||||
self.email.append (email)
|
||||
self.employer.append (elist)
|
||||
HackersByEmail[email] = self
|
||||
|
||||
def emailemployer (self, email, date):
|
||||
for i in range (0, len (self.email)):
|
||||
if self.email[i] == email:
|
||||
for edate, empl in self.employer[i]:
|
||||
if edate > date:
|
||||
return empl
|
||||
print 'OOPS. ', self.name, self.employer, self.email, email, date
|
||||
return None # Should not happen
|
||||
|
||||
def addpatch (self, patch):
|
||||
self.added += patch.added
|
||||
self.removed += patch.removed
|
||||
self.patches.append (patch)
|
||||
|
||||
def addsob (self, patch):
|
||||
self.signoffs.append (patch)
|
||||
|
||||
HackersByName = { }
|
||||
HackersByEmail = { }
|
||||
HackersByID = { }
|
||||
MaxID = 0
|
||||
|
||||
def StoreHacker (name, elist, email):
|
||||
global MaxID
|
||||
|
||||
id = MaxID
|
||||
MaxID += 1
|
||||
h = Hacker (name, id, elist, email)
|
||||
HackersByName[name] = h
|
||||
HackersByEmail[email] = h
|
||||
HackersByID[id] = h
|
||||
return h
|
||||
|
||||
def LookupEmail (addr):
|
||||
try:
|
||||
return HackersByEmail[addr]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def LookupName (name):
|
||||
try:
|
||||
return HackersByName[name]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def LookupID (id):
|
||||
try:
|
||||
return HackersByID[id]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def AllHackers ():
|
||||
return HackersByID.values ()
|
||||
# return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0]
|
||||
|
||||
def DumpDB ():
|
||||
out = open ('database.dump', 'w')
|
||||
names = HackersByName.keys ()
|
||||
names.sort ()
|
||||
for name in names:
|
||||
h = HackersByName[name]
|
||||
out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
|
||||
len (h.patches),
|
||||
h.added, h.removed,
|
||||
len (h.signoffs)))
|
||||
for i in range (0, len (h.email)):
|
||||
out.write ('\t%s -> \n' % (h.email[i]))
|
||||
for date, empl in h.employer[i]:
|
||||
out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
|
||||
empl.name))
|
||||
|
||||
#
|
||||
# Employer info.
|
||||
#
|
||||
class Employer:
|
||||
def __init__ (self, name):
|
||||
self.name = name
|
||||
self.added = self.removed = self.count = self.changed = 0
|
||||
self.sobs = 0
|
||||
self.hackers = [ ]
|
||||
|
||||
def AddCSet (self, patch):
|
||||
self.added += patch.added
|
||||
self.removed += patch.removed
|
||||
self.changed += max(patch.added, patch.removed)
|
||||
self.count += 1
|
||||
if patch.author not in self.hackers:
|
||||
self.hackers.append (patch.author)
|
||||
|
||||
def AddSOB (self):
|
||||
self.sobs += 1
|
||||
|
||||
Employers = { }
|
||||
|
||||
def GetEmployer (name):
|
||||
try:
|
||||
return Employers[name]
|
||||
except KeyError:
|
||||
e = Employer (name)
|
||||
Employers[name] = e
|
||||
return e
|
||||
|
||||
def AllEmployers ():
|
||||
return Employers.values ()
|
||||
|
||||
#
|
||||
# The email map.
|
||||
#
|
||||
EmailAliases = { }
|
||||
|
||||
def AddEmailAlias (variant, canonical):
|
||||
if EmailAliases.has_key (variant):
|
||||
sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
|
||||
EmailAliases[variant] = canonical
|
||||
|
||||
def RemapEmail (email):
|
||||
email = email.lower ()
|
||||
try:
|
||||
return EmailAliases[email]
|
||||
except KeyError:
|
||||
return email
|
||||
|
||||
#
|
||||
# Email-to-employer mapping.
|
||||
#
|
||||
EmailToEmployer = { }
|
||||
nextyear = datetime.date.today () + datetime.timedelta (days = 365)
|
||||
|
||||
def AddEmailEmployerMapping (email, employer, end = nextyear):
|
||||
if end is None:
|
||||
end = nextyear
|
||||
email = email.lower ()
|
||||
empl = GetEmployer (employer)
|
||||
try:
|
||||
l = EmailToEmployer[email]
|
||||
print email, l
|
||||
for i in range (0, len(l)):
|
||||
date, xempl = l[i]
|
||||
if date == end: # probably both nextyear
|
||||
print 'WARNING: duplicate email/empl for %s' % (email)
|
||||
if date > end:
|
||||
l.insert (i, (end, empl))
|
||||
return
|
||||
l.append ((end, empl))
|
||||
except KeyError:
|
||||
EmailToEmployer[email] = [(end, empl)]
|
||||
|
||||
def MapToEmployer (email, unknown = 0):
|
||||
email = email.lower ()
|
||||
try:
|
||||
return EmailToEmployer[email]
|
||||
except KeyError:
|
||||
pass
|
||||
namedom = email.split ('@')
|
||||
if len (namedom) < 2:
|
||||
print 'Oops...funky email %s' % email
|
||||
return [(nextyear, GetEmployer ('Funky'))]
|
||||
s = namedom[1].split ('.')
|
||||
for dots in range (len (s) - 2, -1, -1):
|
||||
addr = '.'.join (s[dots:])
|
||||
try:
|
||||
return EmailToEmployer[addr]
|
||||
except KeyError:
|
||||
pass
|
||||
if unknown:
|
||||
return [(nextyear, GetEmployer ('(Unknown)'))]
|
||||
return [(nextyear, GetEmployer (email))]
|
||||
|
||||
|
||||
def LookupEmployer (email, mapunknown = 0):
|
||||
elist = MapToEmployer (email, mapunknown)
|
||||
return elist # GetEmployer (ename)
|
|
@ -0,0 +1,499 @@
|
|||
#!/usr/bin/python
|
||||
#
|
||||
|
||||
#
|
||||
# This code is part of the LWN git data miner.
|
||||
#
|
||||
# Copyright 2007 LWN.net
|
||||
# Copyright 2007 Jonathan Corbet <corbet@lwn.net>
|
||||
#
|
||||
# This file may be distributed under the terms of the GNU General
|
||||
# Public License, version 2.
|
||||
|
||||
|
||||
import database, ConfigFile
|
||||
import getopt, datetime
|
||||
import os, re, sys, rfc822, string
|
||||
|
||||
#
|
||||
# Some people, when confronted with a problem, think "I know, I'll use regular
|
||||
# expressions." Now they have two problems.
|
||||
# -- Jamie Zawinski
|
||||
#
|
||||
Pcommit = re.compile (r'^commit ([0-9a-f]+)$')
|
||||
Pauthor = re.compile (r'^Author: ([^<]+)\s<([^>]+)>$')
|
||||
Psob = re.compile (r'Signed-off-by:\s+([^<]+)\s+<([^>]+)>')
|
||||
Pmerge = re.compile (r'^Merge:.*$')
|
||||
Padd = re.compile (r'^\+[^\+].*$')
|
||||
Prem = re.compile (r'^-[^-].*$')
|
||||
Pdate = re.compile (r'^(Commit)?Date:\s+(.*)$')
|
||||
Pfilea = re.compile (r'^---\s+(.*)$')
|
||||
Pfileb = re.compile (r'^\+\+\+\s+(.*)$')
|
||||
|
||||
class patch:
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
# Control options.
|
||||
#
|
||||
Outfile = sys.stdout
|
||||
ListCount = 999999
|
||||
MapUnknown = 0
|
||||
DevReports = 1
|
||||
DateStats = 0
|
||||
AuthorSOBs = 1
|
||||
FileFilter = None
|
||||
AkpmOverLt = 0
|
||||
DumpDB = 0
|
||||
CFName = 'gitdm.config'
|
||||
#
|
||||
# Options:
|
||||
#
|
||||
# -a Andrew Morton's signoffs shadow Linus's
|
||||
# -c cfile Specify a configuration file
|
||||
# -d Output individual developer stats
|
||||
# -D Output date statistics
|
||||
# -h hfile HTML output to hfile
|
||||
# -l count Maximum length for output lists
|
||||
# -o file File for text output
|
||||
# -r pattern Restrict to files matching pattern
|
||||
# -s Ignore author SOB lines
|
||||
# -u Map unknown employers to '(Unknown)'
|
||||
# -z Dump out the hacker database at completion
|
||||
|
||||
def ParseOpts ():
|
||||
global Outfile, ListCount, MapUnknown, HTMLfile, DevReports
|
||||
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
|
||||
global CFName
|
||||
|
||||
opts, rest = getopt.getopt (sys.argv[1:], 'adDh:l:o:r:suz')
|
||||
for opt in opts:
|
||||
if opt[0] == '-a':
|
||||
AkpmOverLt = 1
|
||||
elif opt[0] == '-c':
|
||||
CFName = opt[1]
|
||||
elif opt[0] == '-d':
|
||||
DevReports = 0
|
||||
elif opt[0] == '-D':
|
||||
DateStats = 1
|
||||
elif opt[0] == '-h':
|
||||
HTMLfile = open (opt[1], 'w')
|
||||
elif opt[0] == '-l':
|
||||
ListCount = int (opt[1])
|
||||
elif opt[0] == '-o':
|
||||
Outfile = open (opt[1], 'w')
|
||||
elif opt[0] == '-r':
|
||||
print 'Filter on "%s"' % (opt[1])
|
||||
FileFilter = re.compile (opt[1])
|
||||
elif opt[0] == '-s':
|
||||
AuthorSOBs = 0
|
||||
elif opt[0] == '-u':
|
||||
MapUnknown = 1
|
||||
elif opt[0] == '-z':
|
||||
DumpDB = 1
|
||||
|
||||
|
||||
|
||||
def LookupStoreHacker (name, email):
|
||||
email = database.RemapEmail (email)
|
||||
h = database.LookupEmail (email)
|
||||
if h: # already there
|
||||
return h
|
||||
elist = database.LookupEmployer (email, MapUnknown)
|
||||
h = database.LookupName (name)
|
||||
if h: # new email
|
||||
h.addemail (email, elist)
|
||||
return h
|
||||
return database.StoreHacker(name, elist, email)
|
||||
|
||||
#
|
||||
# Date tracking.
|
||||
#
|
||||
|
||||
DateMap = { }
|
||||
|
||||
def AddDateLines(date, lines):
|
||||
if lines > 1000000:
|
||||
print 'Skip big patch (%d)' % lines
|
||||
return
|
||||
dt = (date.year, date.month, date.day)
|
||||
try:
|
||||
DateMap[date] += lines
|
||||
except KeyError:
|
||||
DateMap[date] = lines
|
||||
|
||||
def PrintDateStats():
|
||||
dates = DateMap.keys ()
|
||||
dates.sort ()
|
||||
total = 0
|
||||
datef = open ('datelc', 'w')
|
||||
for date in dates:
|
||||
total += DateMap[date]
|
||||
datef.write ('%d/%02d/%02d %6d %7d\n' % (date[0], date[1], date[2],
|
||||
DateMap[date], total))
|
||||
|
||||
#
|
||||
# The core hack for grabbing the information about a changeset.
|
||||
#
|
||||
def grabpatch():
|
||||
global NextLine, TotalAdded, TotalRemoved, TotalChanged
|
||||
|
||||
while (1):
|
||||
m = Pcommit.match (NextLine)
|
||||
if m:
|
||||
break;
|
||||
NextLine = sys.stdin.readline ()
|
||||
if not NextLine:
|
||||
return
|
||||
|
||||
p = patch()
|
||||
p.commit = m.group (1)
|
||||
p.merge = p.added = p.removed = 0
|
||||
p.author = LookupStoreHacker('Unknown hacker', 'unknown@hacker.net')
|
||||
p.email = 'unknown@hacker.net'
|
||||
p.sobs = [ ]
|
||||
NextLine = sys.stdin.readline ()
|
||||
ignore = (FileFilter is not None)
|
||||
while NextLine:
|
||||
Line = NextLine
|
||||
#
|
||||
# If this line starts a new commit, drop out.
|
||||
#
|
||||
m = Pcommit.match (Line)
|
||||
if m:
|
||||
break
|
||||
NextLine = sys.stdin.readline ()
|
||||
#
|
||||
# Maybe it's an author line?
|
||||
#
|
||||
m = Pauthor.match (Line)
|
||||
if m:
|
||||
p.email = database.RemapEmail (m.group (2))
|
||||
p.author = LookupStoreHacker(m.group (1), p.email)
|
||||
continue
|
||||
#
|
||||
# Could be a signed-off-by:
|
||||
#
|
||||
m = Psob.search (Line)
|
||||
if m:
|
||||
email = database.RemapEmail (m.group (2))
|
||||
sobber = LookupStoreHacker(m.group (1), email)
|
||||
if sobber != p.author or AuthorSOBs:
|
||||
p.sobs.append ((email, LookupStoreHacker(m.group (1), m.group (2))))
|
||||
continue
|
||||
#
|
||||
# If this one is a merge, make note of the fact.
|
||||
#
|
||||
m = Pmerge.match (Line)
|
||||
if m:
|
||||
p.merge = 1
|
||||
continue
|
||||
#
|
||||
# See if it's the date.
|
||||
#
|
||||
m = Pdate.match (Line)
|
||||
if m:
|
||||
dt = rfc822.parsedate(m.group (2))
|
||||
p.date = datetime.date (dt[0], dt[1], dt[2])
|
||||
continue
|
||||
#
|
||||
# If we have a file filter, check for file lines.
|
||||
#
|
||||
if FileFilter:
|
||||
ignore = ApplyFileFilter (Line, ignore)
|
||||
#
|
||||
# OK, maybe it's part of the diff itself.
|
||||
#
|
||||
if not ignore:
|
||||
if Padd.match (Line):
|
||||
p.added += 1
|
||||
continue
|
||||
if Prem.match (Line):
|
||||
p.removed += 1
|
||||
#
|
||||
# Record some global information - but only if this patch had
|
||||
# stuff which wasn't ignored. This work should be done
|
||||
# elsewhere,
|
||||
#
|
||||
if ((p.added + p.removed) > 0 or not FileFilter) and not p.merge:
|
||||
TotalAdded += p.added
|
||||
TotalRemoved += p.removed
|
||||
TotalChanged += max (p.added, p.removed)
|
||||
AddDateLines (p.date, max (p.added, p.removed))
|
||||
empl = p.author.emailemployer (p.email, p.date)
|
||||
empl.AddCSet (p)
|
||||
if AkpmOverLt:
|
||||
TrimLTSOBs (p)
|
||||
for sobemail, sobber in p.sobs:
|
||||
empl = sobber.emailemployer (sobemail, p.date)
|
||||
empl.AddSOB()
|
||||
return p
|
||||
|
||||
|
||||
def ApplyFileFilter (line, ignore):
|
||||
#
|
||||
# If this is the first file line (--- a/), set ignore one way
|
||||
# or the other.
|
||||
#
|
||||
m = Pfilea.match (line)
|
||||
if m:
|
||||
file = m.group (1)
|
||||
if FileFilter.search (file):
|
||||
return 0
|
||||
return 1
|
||||
#
|
||||
# For the second line, we can turn ignore off, but not on
|
||||
#
|
||||
m = Pfileb.match (line)
|
||||
if m:
|
||||
file = m.group (1)
|
||||
if FileFilter.search (file):
|
||||
return 0
|
||||
return ignore
|
||||
|
||||
#
|
||||
# If this patch is signed off by both Andrew Morton and Linus Torvalds,
|
||||
# remove the (redundant) Linus signoff.
|
||||
#
|
||||
def TrimLTSOBs (p):
|
||||
if Linus in p.sobs and Akpm in p.sobs:
|
||||
p.sobs.remove (Linus)
|
||||
|
||||
#
|
||||
# HTML output support stuff.
|
||||
#
|
||||
HTMLfile = None
|
||||
HTMLclass = 0
|
||||
HClasses = ['Even', 'Odd']
|
||||
|
||||
THead = '''<p>
|
||||
<table cellspacing=3>
|
||||
<tr><th colspan=3>%s</th></tr>
|
||||
'''
|
||||
|
||||
|
||||
def BeginReport (title):
|
||||
global HTMLclass
|
||||
|
||||
Outfile.write ('\n%s\n' % title)
|
||||
if HTMLfile:
|
||||
HTMLfile.write (THead % title)
|
||||
HTMLclass = 0
|
||||
|
||||
TRow = ''' <tr class="%s">
|
||||
<td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
|
||||
'''
|
||||
|
||||
def ReportLine (text, count, pct):
|
||||
global HTMLclass
|
||||
if count == 0:
|
||||
return
|
||||
Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct))
|
||||
if HTMLfile:
|
||||
HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct))
|
||||
HTMLclass ^= 1
|
||||
|
||||
def EndReport ():
|
||||
if HTMLfile:
|
||||
HTMLfile.write ('</table>\n\n')
|
||||
|
||||
#
|
||||
# Comparison and report generation functions.
|
||||
#
|
||||
def ComparePCount (h1, h2):
|
||||
return len (h2.patches) - len (h1.patches)
|
||||
|
||||
def ReportByPCount (hlist):
|
||||
hlist.sort (ComparePCount)
|
||||
count = 0
|
||||
BeginReport ('Developers with the most changesets')
|
||||
for h in hlist:
|
||||
pcount = len (h.patches)
|
||||
changed = max(h.added, h.removed)
|
||||
delta = h.added - h.removed
|
||||
if pcount > 0:
|
||||
ReportLine (h.name, pcount, (pcount*100.0)/CSCount)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
def CompareLChanged (h1, h2):
|
||||
return max(h2.added, h2.removed) - max(h1.added, h1.removed)
|
||||
|
||||
def ReportByLChanged (hlist):
|
||||
hlist.sort (CompareLChanged)
|
||||
count = 0
|
||||
BeginReport ('Developers with the most changed lines')
|
||||
for h in hlist:
|
||||
pcount = len (h.patches)
|
||||
changed = max(h.added, h.removed)
|
||||
delta = h.added - h.removed
|
||||
if (h.added + h.removed) > 0:
|
||||
ReportLine (h.name, changed, (changed*100.0)/TotalChanged)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
def CompareLRemoved (h1, h2):
|
||||
return (h2.removed - h2.added) - (h1.removed - h1.added)
|
||||
|
||||
def ReportByLRemoved (hlist):
|
||||
hlist.sort (CompareLRemoved)
|
||||
count = 0
|
||||
BeginReport ('Developers with the most lines removed')
|
||||
for h in hlist:
|
||||
pcount = len (h.patches)
|
||||
changed = max(h.added, h.removed)
|
||||
delta = h.added - h.removed
|
||||
if delta < 0:
|
||||
ReportLine (h.name, -delta, (-delta*100.0)/TotalRemoved)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
def CompareEPCount (e1, e2):
|
||||
return e2.count - e1.count
|
||||
|
||||
def ReportByPCEmpl (elist):
|
||||
elist.sort (CompareEPCount)
|
||||
count = 0
|
||||
BeginReport ('Top changeset contributors by employer')
|
||||
for e in elist:
|
||||
if e.count != 0:
|
||||
ReportLine (e.name, e.count, (e.count*100.0)/CSCount)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
|
||||
|
||||
def CompareELChanged (e1, e2):
|
||||
return e2.changed - e1.changed
|
||||
|
||||
def ReportByELChanged (elist):
|
||||
elist.sort (CompareELChanged)
|
||||
count = 0
|
||||
BeginReport ('Top lines changed by employer')
|
||||
for e in elist:
|
||||
if e.changed != 0:
|
||||
ReportLine (e.name, e.changed, (e.changed*100.0)/TotalChanged)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
|
||||
|
||||
def CompareSOBs (h1, h2):
|
||||
return len (h2.signoffs) - len (h1.signoffs)
|
||||
|
||||
def ReportBySOBs (hlist):
|
||||
hlist.sort (CompareSOBs)
|
||||
totalsobs = 0
|
||||
for h in hlist:
|
||||
totalsobs += len (h.signoffs)
|
||||
count = 0
|
||||
BeginReport ('Developers with the most signoffs (total %d)' % totalsobs)
|
||||
for h in hlist:
|
||||
scount = len (h.signoffs)
|
||||
if scount > 0:
|
||||
ReportLine (h.name, scount, (scount*100.0)/totalsobs)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
def CompareESOBs (e1, e2):
|
||||
return e2.sobs - e1.sobs
|
||||
|
||||
def ReportByESOBs (elist):
|
||||
elist.sort (CompareESOBs)
|
||||
totalsobs = 0
|
||||
for e in elist:
|
||||
totalsobs += e.sobs
|
||||
count = 0
|
||||
BeginReport ('Employers with the most signoffs (total %d)' % totalsobs)
|
||||
for e in elist:
|
||||
if e.sobs > 0:
|
||||
ReportLine (e.name, e.sobs, (e.sobs*100.0)/totalsobs)
|
||||
count += 1
|
||||
if count >= ListCount:
|
||||
break
|
||||
EndReport ()
|
||||
|
||||
#
|
||||
# Here starts the real program. Read the config files.
|
||||
#
|
||||
ConfigFile.ConfigFile (CFName)
|
||||
|
||||
#
|
||||
# Let's pre-seed the database with a couple of hackers
|
||||
# we want to remember.
|
||||
#
|
||||
Linus = ('torvalds@linux-foundation.org',
|
||||
LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org'))
|
||||
Akpm = ('akpm@linux-foundation.org',
|
||||
LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
|
||||
|
||||
NextLine = sys.stdin.readline ()
|
||||
TotalChanged = TotalAdded = TotalRemoved = 0
|
||||
ParseOpts ()
|
||||
|
||||
#
|
||||
# Snarf changesets.
|
||||
#
|
||||
print 'Grabbing changesets...\r',
|
||||
sys.stdout.flush ()
|
||||
|
||||
printcount = CSCount = 0
|
||||
while (1):
|
||||
if (printcount % 50) == 0:
|
||||
print 'Grabbing changesets...%d\r' % printcount,
|
||||
sys.stdout.flush ()
|
||||
printcount += 1
|
||||
p = grabpatch()
|
||||
if not p:
|
||||
break
|
||||
if p.added > 100000 or p.removed > 100000:
|
||||
print 'Skipping massive add'
|
||||
continue
|
||||
if FileFilter and p.added == 0 and p.removed == 0:
|
||||
continue
|
||||
if not p.merge:
|
||||
p.author.addpatch (p)
|
||||
for sobemail, sob in p.sobs:
|
||||
sob.addsob (p)
|
||||
CSCount += 1
|
||||
print 'Grabbing changesets...done'
|
||||
|
||||
if DumpDB:
|
||||
database.DumpDB ()
|
||||
#
|
||||
# Say something
|
||||
#
|
||||
hlist = database.AllHackers ()
|
||||
elist = database.AllEmployers ()
|
||||
Outfile.write ('Processed %d csets from %d developers\n' % (CSCount,
|
||||
len (hlist)))
|
||||
Outfile.write ('%d employers found\n' % len (elist))
|
||||
Outfile.write ('A total of %d lines added, %d removed (delta %d)\n' %
|
||||
(TotalAdded, TotalRemoved, TotalAdded - TotalRemoved))
|
||||
if TotalChanged == 0:
|
||||
TotalChanged = 1 # HACK to avoid div by zero
|
||||
if DateStats:
|
||||
PrintDateStats ()
|
||||
sys.exit(0)
|
||||
|
||||
if DevReports:
|
||||
ReportByPCount (hlist)
|
||||
ReportByLChanged (hlist)
|
||||
ReportByLRemoved (hlist)
|
||||
ReportBySOBs (hlist)
|
||||
ReportByPCEmpl (elist)
|
||||
ReportByELChanged (elist)
|
||||
ReportByESOBs (elist)
|
|
@ -0,0 +1,22 @@
|
|||
#
|
||||
# This is a sample gitdm configuration file.
|
||||
#
|
||||
|
||||
#
|
||||
# EmailAliases lets us cope with developers who use more
|
||||
# than one address.
|
||||
#
|
||||
EmailAliases sample-config/aliases
|
||||
|
||||
#
|
||||
# EmailMap does the main work of mapping addresses onto
|
||||
# employers.
|
||||
#
|
||||
EmailMap sample-config/domain-map
|
||||
|
||||
#
|
||||
# Use GroupMap to map a file full of addresses to the
|
||||
# same employer
|
||||
#
|
||||
# GroupMap sample-config/illuminati The Illuminati
|
||||
#
|
|
@ -0,0 +1,5 @@
|
|||
#
|
||||
# This is the email aliases file, mapping secondary addresses
|
||||
# onto a single, canonical address.
|
||||
#
|
||||
corbet@eklektix.com corbet@lwn.net
|
|
@ -0,0 +1,242 @@
|
|||
#
|
||||
# Here is a set of mappings of domain names onto employer names.
|
||||
#
|
||||
8d.com 8D Technologies
|
||||
aconex.com Aconex
|
||||
adaptec.com Adaptec
|
||||
aist.go.jp National Institute of Advanced Industrial Science and Technology
|
||||
akamai.com Akamai Technologies
|
||||
am.sony.com Sony
|
||||
amd.com AMD
|
||||
analog.com Analog Devices
|
||||
arastra.com Arastra Inc
|
||||
arm.com ARM
|
||||
artecdesign.ee Artec Design
|
||||
arvoo.nl ARVOO Engineering
|
||||
atmel.com Atmel
|
||||
atomide.com Atomide
|
||||
avtrex.com Avtrex
|
||||
axis.com Axis Communications
|
||||
azingo.com Azingo
|
||||
balabit.com BalaBit
|
||||
balabit.hu BalaBit
|
||||
baslerweb.com Basler Vision Technologies
|
||||
bluehost.com Bluehost
|
||||
bluewatersys.com Bluewater Systems
|
||||
broadcom.com Broadcom
|
||||
brontes3d.com Brontes Technologies
|
||||
bull.net Bull SAS
|
||||
cam.ac.uk University of Cambridge
|
||||
ccur.com Concurrent Computer Corporation
|
||||
celunite.com Azingo
|
||||
chelsio.com Chelsio
|
||||
cisco.com Cisco
|
||||
citi.umich.edu Univ. of Michigan CITI
|
||||
clusterfs.com Sun
|
||||
cn.fujitsu.com Fujitsu
|
||||
compulab.co.il CompuLab
|
||||
computergmbh.de CC Computer Consultants
|
||||
comx.dk ComX Networks
|
||||
conectiva.com.br Mandriva
|
||||
coraid.com Coraid
|
||||
cosmosbay.com Cosmosbay~Vectis
|
||||
cozybit.com cozybit
|
||||
cray.com Cray
|
||||
csr.com CSR
|
||||
cyberguard.com Secure Computing
|
||||
cybernetics.com Cybernetics
|
||||
data.slu.se Uppsala University
|
||||
dave.eu Dave S.r.l.
|
||||
de.bosch.com Bosch
|
||||
dell.com Dell
|
||||
denx.de DENX Software Engineering
|
||||
devicescape.com Devicescape
|
||||
digi.com Digi International
|
||||
dti2.net DTI2 - Desarrollo de la tecnologia de las comunicaciones
|
||||
edesix.com Edesix Ltd
|
||||
elandigitalsystems.com Elan Digital Systems
|
||||
embeddedalley.com Embedded Alley Solutions
|
||||
empirix.com Empirix
|
||||
emulex.com Emulex
|
||||
endrelia.com Endrelia
|
||||
ericsson.com Ericsson
|
||||
fixstars.com Fixstars Technologies
|
||||
free-electrons.com Free Electrons
|
||||
freescale.com Freescale
|
||||
fujitsu.com Fujitsu
|
||||
gaisler.com Gaisler Research
|
||||
gefanuc.com GE Fanuc
|
||||
geomatys.fr Geomatys
|
||||
google.com Google
|
||||
gvs.co.yu GVS
|
||||
hansenpartnership.com Hansen Partnership
|
||||
harris.com Harris Corporation
|
||||
hauppauge.com Hauppauge
|
||||
hermes-softlab.com HERMES SoftLab
|
||||
hevs.ch HES-SO Valais Wallis
|
||||
highpoint-tech.com HighPoint Technologies
|
||||
hitachi.co.jp Hitachi
|
||||
hitachi.com Hitachi
|
||||
hitachisoft.jp Hitachi
|
||||
hp.com HP
|
||||
hvsistemas.es HV Sistemas
|
||||
ibm.com IBM
|
||||
ibp.de ipb (uk) Ltd.
|
||||
icplus.com.tw IC Plus
|
||||
igel.co.jp igel
|
||||
inl.fr INL
|
||||
inria.fr INRIA
|
||||
intel.com Intel
|
||||
iram.es IRAM
|
||||
jmicron.com jmicron.com
|
||||
jp.fujitsu.com Fujitsu
|
||||
katalix.com Katalix Systems
|
||||
keyspan.com InnoSys
|
||||
laptop.org OLPC
|
||||
laurelnetworks.com ECI Telecom
|
||||
linutronix.de linutronix
|
||||
linux-foundation.org Linux Foundation
|
||||
lippert-at.de LiPPERT Embedded Computers GmbH
|
||||
lippertembedded.de LiPPERT Embedded Computers GmbH
|
||||
llnl.gov Lawrence Livermore National Laboratory
|
||||
lnxi.com Linux Networx
|
||||
logitech.com Logitech
|
||||
lsi.com LSI Logic
|
||||
lsil.com LSI Logic
|
||||
lwn.net LWN.net
|
||||
macqel.be Macq Electronique
|
||||
macqel.com Macq Electronique
|
||||
mandriva.com Mandriva
|
||||
mandriva.com.br Mandriva
|
||||
marvell.com Marvell
|
||||
mellanox.co.il Mellanox
|
||||
melware.de Cytronics & Melware
|
||||
microgate.com MicroGate Systems
|
||||
mips.com MIPS
|
||||
miraclelinux.com Miracle Linux
|
||||
mn-solutions.de M&N Solutions
|
||||
moreton.com.au Secure Computing
|
||||
motorola.com Motorola
|
||||
movial.fi Movial
|
||||
mvista.com MontaVista
|
||||
myri.com Myricom
|
||||
namesys.com NameSys
|
||||
nec.co.jp NEC
|
||||
nec.com NEC
|
||||
netapp.com NetApp
|
||||
neterion.com Neterion
|
||||
netxen.com NetXen
|
||||
niif.hu NIIF Institute
|
||||
nokia.com Nokia
|
||||
nomadgs.com Nomad Global Solutions
|
||||
nortel.com Nortel
|
||||
novell.com Novell
|
||||
ntt.co.jp NTT
|
||||
ntts.co.jp NTT
|
||||
nuovasystems.com Nuova Systems
|
||||
nvidia.com NVidia
|
||||
obsidianresearch.com Obsidian Research
|
||||
octant-fr.com Octant Informatique
|
||||
onelan.co.uk ONELAN
|
||||
onstor.com Onstor
|
||||
openedhand.com OpenedHand
|
||||
opengridcomputing.com Open Grid Computing
|
||||
openmoko.org OpenMoko
|
||||
openvz.org Parallels
|
||||
oracle.com Oracle
|
||||
ornl.gov Oak Ridge National Laboratory
|
||||
osdl.org Linux Foundation
|
||||
ozlabs.org IBM
|
||||
panasas.com Panasas
|
||||
panasonic.com Panasonic
|
||||
papercut.bz PaperCut Software
|
||||
papercut.com PaperCut Software
|
||||
parallels.com Parallels
|
||||
pasemi.com PA Semi Corporation
|
||||
pengutronix.de Pengutronix
|
||||
pheonix.com Phoeonix
|
||||
philosys.de Philosys Software
|
||||
pikron.com PiKRON s.r.o
|
||||
pmc-sierra.com PMC-Sierra
|
||||
promise.com Promise Technology
|
||||
qlogic.com QLogic
|
||||
qumranet.com Qumranet
|
||||
realtek.com.tw Realtek
|
||||
redhat.com Red Hat
|
||||
renesas.com Renesas Technology
|
||||
rockwell.com Rockwell
|
||||
rowland.harvard.edu Rowland Institute, Harvard
|
||||
rtr.ca Real-Time Remedies
|
||||
samsung.com Samsung
|
||||
sanpeople.com SANPeople
|
||||
savantav.com Savant Systems
|
||||
secretlab.ca Secretlab
|
||||
securecomputing.com Secure Computing
|
||||
semihalf.com Semihalf Embedded Systems
|
||||
sf-tec.de Science Fiction Technologies
|
||||
sgi.com SGI
|
||||
sicortex.com Sicortex
|
||||
siemens.com Siemens
|
||||
sierrawireless.com Sierra Wireless
|
||||
sigma-chemnitz.de SIGMA Chemnitz
|
||||
snapgear.com Snapgear
|
||||
solidboot.com Solid Boot Ltd.
|
||||
sony.co.jp Sony
|
||||
sony.com Sony
|
||||
sonycom.com Sony
|
||||
spidernet.net SpiderNet Services
|
||||
st.com ST Microelectronics
|
||||
stlinux.com ST Microelectronics
|
||||
starentnetworks.com Starent Networks
|
||||
steeleye.com SteelEye
|
||||
sun.com Sun
|
||||
suse.com Novell
|
||||
suse.cz Novell
|
||||
suse.de Novell
|
||||
sw.ru Parallels
|
||||
swsoft.com Parallels
|
||||
tapsys.com Tapestry Systems
|
||||
telargo.com Telargo
|
||||
tensilica.com Tensilica
|
||||
terascala.com Terascala
|
||||
thinktube.com Thinktube
|
||||
ti.com Texas Instruments
|
||||
til-technologies.fr TIL Technologies
|
||||
tls.msk.ru Telecom-Service
|
||||
toptica.com TOPTICA Photonics
|
||||
toshiba.co.jp Toshiba
|
||||
total-knowledge.com Total Knowledge
|
||||
towertech.it Tower Technologies
|
||||
tpi.com TriplePoint
|
||||
transitive.com Transitive
|
||||
transmode.se Transmode Systems
|
||||
tresys.com Tresys
|
||||
tripeaks.co.jp Tripeaks
|
||||
trustedcs.com Trusted Computer Solutions
|
||||
tungstengraphics.com Tungsten Graphics
|
||||
tycho.nsa.gov US National Security Agency
|
||||
ubuntu.com Canonical
|
||||
uhulinux.hu UHU-Linux
|
||||
unisys.com Unisys
|
||||
valinux.co.jp VA Linux Systems Japan
|
||||
verismonetworks.com Verismo
|
||||
veritas.com Veritas
|
||||
via.com.tw Via
|
||||
vivecode.com Vivecode
|
||||
vmware.com VMWare
|
||||
volkswagen.de Volkswagen
|
||||
voltaire.com Voltaire
|
||||
vyatta.com Vyatta
|
||||
wabtec.com Wabtec Railway Electronics
|
||||
wacom.com Wacom
|
||||
winbond.com Winbond Electronics
|
||||
winbond.com.tw Winbond Electronics
|
||||
wincor-nixdorf.com Wincor Nixdorf
|
||||
windriver.com Wind River
|
||||
wipro.com Wipro
|
||||
wolfsonmicro.com Wolfson Microelectronics
|
||||
xensource.com XenSource
|
||||
xiv.co.il XIV Information Systems
|
||||
xivstorage.com XIV Information Systems
|
||||
trinnov.com Trinnov Audio
|
||||
citrix.com Citrix
|
Loading…
Reference in New Issue