326 lines
9.1 KiB
Python
326 lines
9.1 KiB
Python
#
|
|
# The "database".
|
|
#
|
|
# This code is part of the LWN git data miner.
|
|
#
|
|
# Copyright 2007-11 Eklektix, Inc.
|
|
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
|
|
#
|
|
# This file may be distributed under the terms of the GNU General
|
|
# Public License, version 2.
|
|
#
|
|
import sys, datetime
|
|
|
|
|
|
class Hacker:
|
|
def __init__ (self, name, id, elist, email):
|
|
self.name = name
|
|
self.id = id
|
|
self.employer = [ elist ]
|
|
self.email = [ email ]
|
|
self.added = self.removed = 0
|
|
self.patches = [ ]
|
|
self.signoffs = [ ]
|
|
self.reviews = [ ]
|
|
self.tested = [ ]
|
|
self.reports = [ ]
|
|
self.bugsfixed = [ ]
|
|
self.testcred = self.repcred = 0
|
|
self.versions = [ ]
|
|
|
|
def addemail (self, email, elist):
|
|
self.email.append (email)
|
|
self.employer.append (elist)
|
|
HackersByEmail[email] = self
|
|
|
|
def emailemployer (self, email, date):
|
|
for i in range (0, len (self.email)):
|
|
if self.email[i] == email:
|
|
for edate, empl in self.employer[i]:
|
|
if edate > date:
|
|
return empl
|
|
print 'OOPS. ', self.name, self.employer, self.email, email, date
|
|
return None # Should not happen
|
|
|
|
def addpatch (self, patch):
|
|
self.added += patch.added
|
|
self.removed += patch.removed
|
|
self.patches.append (patch)
|
|
|
|
#
|
|
# Note that the author is represented in this release.
|
|
#
|
|
def addversion (self, release):
|
|
if release not in self.versions:
|
|
self.versions.append (release)
|
|
#
|
|
# There's got to be a better way.
|
|
#
|
|
def addsob (self, patch):
|
|
self.signoffs.append (patch)
|
|
def addreview (self, patch):
|
|
self.reviews.append (patch)
|
|
def addtested (self, patch):
|
|
self.tested.append (patch)
|
|
def addreport (self, patch):
|
|
self.reports.append (patch)
|
|
|
|
def reportcredit (self, patch):
|
|
self.repcred += 1
|
|
def testcredit (self, patch):
|
|
self.testcred += 1
|
|
|
|
def addbugfixed (self, bug):
|
|
self.bugsfixed.append (bug)
|
|
|
|
HackersByName = { }
|
|
HackersByEmail = { }
|
|
HackersByID = { }
|
|
MaxID = 0
|
|
|
|
def StoreHacker (name, elist, email):
|
|
global MaxID
|
|
|
|
id = MaxID
|
|
MaxID += 1
|
|
h = Hacker (name, id, elist, email)
|
|
HackersByName[name] = h
|
|
HackersByEmail[email] = h
|
|
HackersByID[id] = h
|
|
return h
|
|
|
|
def LookupEmail (addr):
|
|
try:
|
|
return HackersByEmail[addr]
|
|
except KeyError:
|
|
return None
|
|
|
|
def LookupName (name):
|
|
try:
|
|
return HackersByName[name]
|
|
except KeyError:
|
|
return None
|
|
|
|
def LookupID (id):
|
|
try:
|
|
return HackersByID[id]
|
|
except KeyError:
|
|
return None
|
|
|
|
def AllHackers ():
|
|
return HackersByID.values ()
|
|
# return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0]
|
|
|
|
def DumpDB ():
|
|
out = open ('database.dump', 'w')
|
|
names = HackersByName.keys ()
|
|
names.sort ()
|
|
for name in names:
|
|
h = HackersByName[name]
|
|
out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
|
|
len (h.patches),
|
|
h.added, h.removed,
|
|
len (h.signoffs)))
|
|
for i in range (0, len (h.email)):
|
|
out.write ('\t%s -> \n' % (h.email[i]))
|
|
for date, empl in h.employer[i]:
|
|
out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
|
|
empl.name))
|
|
if h.versions:
|
|
out.write ('\tVersions: %s\n' % ','.join (h.versions))
|
|
|
|
#
|
|
# Hack: The first visible tag comes a ways into the stream; when we see it,
|
|
# push it backward through the changes we've already seen.
|
|
#
|
|
def ApplyFirstTag (tag):
|
|
for n in HackersByName.keys ():
|
|
if HackersByName[n].versions:
|
|
HackersByName[n].versions = [tag]
|
|
|
|
#
|
|
# Employer info.
|
|
#
|
|
class Employer:
|
|
def __init__ (self, name):
|
|
self.name = name
|
|
self.added = self.removed = self.count = self.changed = 0
|
|
self.sobs = 0
|
|
self.bugsfixed = [ ]
|
|
self.reviews = [ ]
|
|
self.hackers = [ ]
|
|
|
|
def AddCSet (self, patch):
|
|
self.added += patch.added
|
|
self.removed += patch.removed
|
|
self.changed += max(patch.added, patch.removed)
|
|
self.count += 1
|
|
if patch.author not in self.hackers:
|
|
self.hackers.append (patch.author)
|
|
|
|
def AddSOB (self):
|
|
self.sobs += 1
|
|
|
|
def AddBug (self, bug):
|
|
self.bugsfixed.append(bug)
|
|
if bug.owner not in self.hackers:
|
|
self.hackers.append (bug.owner)
|
|
|
|
def AddReview (self, reviewer):
|
|
self.reviews.append(reviewer)
|
|
if reviewer not in self.hackers:
|
|
self.hackers.append (reviewer)
|
|
|
|
Employers = { }
|
|
|
|
def GetEmployer (name):
|
|
try:
|
|
return Employers[name]
|
|
except KeyError:
|
|
e = Employer (name)
|
|
Employers[name] = e
|
|
return e
|
|
|
|
def AllEmployers ():
|
|
return Employers.values ()
|
|
|
|
#
|
|
# Certain obnoxious developers, who will remain nameless (because we
|
|
# would never want to run afoul of Thomas) want their work split among
|
|
# multiple companies. Let's try to cope with that. Let's also hope
|
|
# this doesn't spread.
|
|
#
|
|
class VirtualEmployer (Employer):
|
|
def __init__ (self, name):
|
|
Employer.__init__ (self, name)
|
|
self.splits = [ ]
|
|
|
|
def addsplit (self, name, fraction):
|
|
self.splits.append ((name, fraction))
|
|
|
|
#
|
|
# Go through and (destructively) apply our credits to the
|
|
# real employer. Only one level of weirdness is supported.
|
|
#
|
|
def applysplits (self):
|
|
for name, fraction in self.splits:
|
|
real = GetEmployer (name)
|
|
real.added += int (self.added*fraction)
|
|
real.removed += int (self.removed*fraction)
|
|
real.changed += int (self.changed*fraction)
|
|
real.count += int (self.count*fraction)
|
|
self.__init__ (name) # Reset counts just in case
|
|
|
|
def store (self):
|
|
if Employers.has_key (self.name):
|
|
print Employers[self.name]
|
|
sys.stderr.write ('WARNING: Virtual empl %s overwrites another\n'
|
|
% (self.name))
|
|
if len (self.splits) == 0:
|
|
sys.stderr.write ('WARNING: Virtual empl %s has no splits\n'
|
|
% (self.name))
|
|
# Should check that they add up too, but I'm lazy
|
|
Employers[self.name] = self
|
|
|
|
class FileType:
|
|
def __init__ (self, patterns={}, order=[]):
|
|
self.patterns = patterns
|
|
self.order = order
|
|
|
|
def guess_file_type (self, filename, patterns=None, order=None):
|
|
patterns = patterns or self.patterns
|
|
order = order or self.order
|
|
|
|
for file_type in order:
|
|
if patterns.has_key (file_type):
|
|
for patt in patterns[file_type]:
|
|
if patt.search (filename):
|
|
return file_type
|
|
|
|
return 'unknown'
|
|
|
|
#
|
|
# By default we recognize nothing.
|
|
#
|
|
FileTypes = FileType ({}, [])
|
|
|
|
#
|
|
# Mix all the virtual employers into their real destinations.
|
|
#
|
|
def MixVirtuals ():
|
|
for empl in AllEmployers ():
|
|
if isinstance (empl, VirtualEmployer):
|
|
empl.applysplits ()
|
|
|
|
#
|
|
# The email map.
|
|
#
|
|
EmailAliases = { }
|
|
|
|
def AddEmailAlias (variant, canonical):
|
|
if EmailAliases.has_key (variant):
|
|
sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
|
|
EmailAliases[variant] = canonical
|
|
|
|
def RemapEmail (email):
|
|
email = email.lower ()
|
|
try:
|
|
return EmailAliases[email]
|
|
except KeyError:
|
|
return email
|
|
|
|
#
|
|
# Email-to-employer mapping.
|
|
#
|
|
EmailToEmployer = { }
|
|
nextyear = datetime.date.today () + datetime.timedelta (days = 365)
|
|
|
|
def AddEmailEmployerMapping (email, employer, end = nextyear):
|
|
if end is None:
|
|
end = nextyear
|
|
email = email.lower ()
|
|
empl = GetEmployer (employer)
|
|
try:
|
|
l = EmailToEmployer[email]
|
|
for i in range (0, len(l)):
|
|
date, xempl = l[i]
|
|
if date == end: # probably both nextyear
|
|
print 'WARNING: duplicate email/empl for %s' % (email)
|
|
if date > end:
|
|
l.insert (i, (end, empl))
|
|
return
|
|
l.append ((end, empl))
|
|
except KeyError:
|
|
EmailToEmployer[email] = [(end, empl)]
|
|
|
|
def MapToEmployer (email, unknown = 0):
|
|
# Somebody sometimes does s/@/ at /; let's fix it.
|
|
email = email.lower ().replace (' at ', '@')
|
|
try:
|
|
return EmailToEmployer[email]
|
|
except KeyError:
|
|
pass
|
|
namedom = email.split ('@')
|
|
if len (namedom) < 2:
|
|
print 'Oops...funky email %s' % email
|
|
return [(nextyear, GetEmployer ('Funky'))]
|
|
s = namedom[1].split ('.')
|
|
for dots in range (len (s) - 2, -1, -1):
|
|
addr = '.'.join (s[dots:])
|
|
try:
|
|
return EmailToEmployer[addr]
|
|
except KeyError:
|
|
pass
|
|
#
|
|
# We don't know who they work for.
|
|
#
|
|
if unknown:
|
|
return [(nextyear, GetEmployer ('(Unknown)'))]
|
|
return [(nextyear, GetEmployer (email))]
|
|
|
|
|
|
def LookupEmployer (email, mapunknown = 0):
|
|
elist = MapToEmployer (email, mapunknown)
|
|
return elist # GetEmployer (ename)
|
|
|