6eb60baac3
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
202 lines
5.2 KiB
Python
202 lines
5.2 KiB
Python
#
|
|
# The "database".
|
|
#
|
|
|
|
#
|
|
# This code is part of the LWN git data miner.
|
|
#
|
|
# Copyright 2007 LWN.net
|
|
# Copyright 2007 Jonathan Corbet <corbet@lwn.net>
|
|
#
|
|
# This file may be distributed under the terms of the GNU General
|
|
# Public License, version 2.
|
|
import sys, datetime
|
|
|
|
|
|
class Hacker:
|
|
def __init__ (self, name, id, elist, email):
|
|
self.name = name
|
|
self.id = id
|
|
self.employer = [ elist ]
|
|
self.email = [ email ]
|
|
self.added = self.removed = 0
|
|
self.patches = [ ]
|
|
self.signoffs = [ ]
|
|
|
|
def addemail (self, email, elist):
|
|
self.email.append (email)
|
|
self.employer.append (elist)
|
|
HackersByEmail[email] = self
|
|
|
|
def emailemployer (self, email, date):
|
|
for i in range (0, len (self.email)):
|
|
if self.email[i] == email:
|
|
for edate, empl in self.employer[i]:
|
|
if edate > date:
|
|
return empl
|
|
print 'OOPS. ', self.name, self.employer, self.email, email, date
|
|
return None # Should not happen
|
|
|
|
def addpatch (self, patch):
|
|
self.added += patch.added
|
|
self.removed += patch.removed
|
|
self.patches.append (patch)
|
|
|
|
def addsob (self, patch):
|
|
self.signoffs.append (patch)
|
|
|
|
HackersByName = { }
|
|
HackersByEmail = { }
|
|
HackersByID = { }
|
|
MaxID = 0
|
|
|
|
def StoreHacker (name, elist, email):
|
|
global MaxID
|
|
|
|
id = MaxID
|
|
MaxID += 1
|
|
h = Hacker (name, id, elist, email)
|
|
HackersByName[name] = h
|
|
HackersByEmail[email] = h
|
|
HackersByID[id] = h
|
|
return h
|
|
|
|
def LookupEmail (addr):
|
|
try:
|
|
return HackersByEmail[addr]
|
|
except KeyError:
|
|
return None
|
|
|
|
def LookupName (name):
|
|
try:
|
|
return HackersByName[name]
|
|
except KeyError:
|
|
return None
|
|
|
|
def LookupID (id):
|
|
try:
|
|
return HackersByID[id]
|
|
except KeyError:
|
|
return None
|
|
|
|
def AllHackers ():
|
|
return HackersByID.values ()
|
|
# return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0]
|
|
|
|
def DumpDB ():
|
|
out = open ('database.dump', 'w')
|
|
names = HackersByName.keys ()
|
|
names.sort ()
|
|
for name in names:
|
|
h = HackersByName[name]
|
|
out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
|
|
len (h.patches),
|
|
h.added, h.removed,
|
|
len (h.signoffs)))
|
|
for i in range (0, len (h.email)):
|
|
out.write ('\t%s -> \n' % (h.email[i]))
|
|
for date, empl in h.employer[i]:
|
|
out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
|
|
empl.name))
|
|
|
|
#
|
|
# Employer info.
|
|
#
|
|
class Employer:
|
|
def __init__ (self, name):
|
|
self.name = name
|
|
self.added = self.removed = self.count = self.changed = 0
|
|
self.sobs = 0
|
|
self.hackers = [ ]
|
|
|
|
def AddCSet (self, patch):
|
|
self.added += patch.added
|
|
self.removed += patch.removed
|
|
self.changed += max(patch.added, patch.removed)
|
|
self.count += 1
|
|
if patch.author not in self.hackers:
|
|
self.hackers.append (patch.author)
|
|
|
|
def AddSOB (self):
|
|
self.sobs += 1
|
|
|
|
Employers = { }
|
|
|
|
def GetEmployer (name):
|
|
try:
|
|
return Employers[name]
|
|
except KeyError:
|
|
e = Employer (name)
|
|
Employers[name] = e
|
|
return e
|
|
|
|
def AllEmployers ():
|
|
return Employers.values ()
|
|
|
|
#
|
|
# The email map.
|
|
#
|
|
EmailAliases = { }
|
|
|
|
def AddEmailAlias (variant, canonical):
|
|
if EmailAliases.has_key (variant):
|
|
sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
|
|
EmailAliases[variant] = canonical
|
|
|
|
def RemapEmail (email):
|
|
email = email.lower ()
|
|
try:
|
|
return EmailAliases[email]
|
|
except KeyError:
|
|
return email
|
|
|
|
#
|
|
# Email-to-employer mapping.
|
|
#
|
|
EmailToEmployer = { }
|
|
nextyear = datetime.date.today () + datetime.timedelta (days = 365)
|
|
|
|
def AddEmailEmployerMapping (email, employer, end = nextyear):
|
|
if end is None:
|
|
end = nextyear
|
|
email = email.lower ()
|
|
empl = GetEmployer (employer)
|
|
try:
|
|
l = EmailToEmployer[email]
|
|
for i in range (0, len(l)):
|
|
date, xempl = l[i]
|
|
if date == end: # probably both nextyear
|
|
print 'WARNING: duplicate email/empl for %s' % (email)
|
|
if date > end:
|
|
l.insert (i, (end, empl))
|
|
return
|
|
l.append ((end, empl))
|
|
except KeyError:
|
|
EmailToEmployer[email] = [(end, empl)]
|
|
|
|
def MapToEmployer (email, unknown = 0):
|
|
email = email.lower ()
|
|
try:
|
|
return EmailToEmployer[email]
|
|
except KeyError:
|
|
pass
|
|
namedom = email.split ('@')
|
|
if len (namedom) < 2:
|
|
print 'Oops...funky email %s' % email
|
|
return [(nextyear, GetEmployer ('Funky'))]
|
|
s = namedom[1].split ('.')
|
|
for dots in range (len (s) - 2, -1, -1):
|
|
addr = '.'.join (s[dots:])
|
|
try:
|
|
return EmailToEmployer[addr]
|
|
except KeyError:
|
|
pass
|
|
if unknown:
|
|
return [(nextyear, GetEmployer ('(Unknown)'))]
|
|
return [(nextyear, GetEmployer (email))]
|
|
|
|
|
|
def LookupEmployer (email, mapunknown = 0):
|
|
elist = MapToEmployer (email, mapunknown)
|
|
return elist # GetEmployer (ename)
|