# # The "database". # # This code is part of the LWN git data miner. # # Copyright 2007-11 Eklektix, Inc. # Copyright 2007-11 Jonathan Corbet # # This file may be distributed under the terms of the GNU General # Public License, version 2. # import sys, datetime class Hacker: def __init__ (self, name, id, elist, email): self.name = name self.id = id self.employer = [ elist ] self.email = [ email ] self.added = self.removed = 0 self.patches = [ ] self.signoffs = [ ] self.reviews = [ ] self.tested = [ ] self.reports = [ ] self.testcred = self.repcred = 0 def addemail (self, email, elist): self.email.append (email) self.employer.append (elist) HackersByEmail[email] = self def emailemployer (self, email, date): for i in range (0, len (self.email)): if self.email[i] == email: for edate, empl in self.employer[i]: if edate > date: return empl print 'OOPS. ', self.name, self.employer, self.email, email, date return None # Should not happen def addpatch (self, patch): self.added += patch.added self.removed += patch.removed self.patches.append (patch) # # There's got to be a better way. # def addsob (self, patch): self.signoffs.append (patch) def addreview (self, patch): self.reviews.append (patch) def addtested (self, patch): self.tested.append (patch) def addreport (self, patch): self.reports.append (patch) def reportcredit (self, patch): self.repcred += 1 def testcredit (self, patch): self.testcred += 1 HackersByName = { } HackersByEmail = { } HackersByID = { } MaxID = 0 def StoreHacker (name, elist, email): global MaxID id = MaxID MaxID += 1 h = Hacker (name, id, elist, email) HackersByName[name] = h HackersByEmail[email] = h HackersByID[id] = h return h def LookupEmail (addr): try: return HackersByEmail[addr] except KeyError: return None def LookupName (name): try: return HackersByName[name] except KeyError: return None def LookupID (id): try: return HackersByID[id] except KeyError: return None def AllHackers (): return HackersByID.values () # return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0] def DumpDB (): out = open ('database.dump', 'w') names = HackersByName.keys () names.sort () for name in names: h = HackersByName[name] out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name, len (h.patches), h.added, h.removed, len (h.signoffs))) for i in range (0, len (h.email)): out.write ('\t%s -> \n' % (h.email[i])) for date, empl in h.employer[i]: out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day, empl.name)) # # Employer info. # class Employer: def __init__ (self, name): self.name = name self.added = self.removed = self.count = self.changed = 0 self.sobs = 0 self.hackers = [ ] def AddCSet (self, patch): self.added += patch.added self.removed += patch.removed self.changed += max(patch.added, patch.removed) self.count += 1 if patch.author not in self.hackers: self.hackers.append (patch.author) def AddSOB (self): self.sobs += 1 Employers = { } def GetEmployer (name): try: return Employers[name] except KeyError: e = Employer (name) Employers[name] = e return e def AllEmployers (): return Employers.values () # # The email map. # EmailAliases = { } def AddEmailAlias (variant, canonical): if EmailAliases.has_key (variant): sys.stderr.write ('Duplicate email alias for %s\n' % (variant)) EmailAliases[variant] = canonical def RemapEmail (email): email = email.lower () try: return EmailAliases[email] except KeyError: return email # # Email-to-employer mapping. # EmailToEmployer = { } nextyear = datetime.date.today () + datetime.timedelta (days = 365) def AddEmailEmployerMapping (email, employer, end = nextyear): if end is None: end = nextyear email = email.lower () empl = GetEmployer (employer) try: l = EmailToEmployer[email] for i in range (0, len(l)): date, xempl = l[i] if date == end: # probably both nextyear print 'WARNING: duplicate email/empl for %s' % (email) if date > end: l.insert (i, (end, empl)) return l.append ((end, empl)) except KeyError: EmailToEmployer[email] = [(end, empl)] def MapToEmployer (email, unknown = 0): # Somebody sometimes does s/@/ at /; let's fix it. email = email.lower ().replace (' at ', '@') try: return EmailToEmployer[email] except KeyError: pass namedom = email.split ('@') if len (namedom) < 2: print 'Oops...funky email %s' % email return [(nextyear, GetEmployer ('Funky'))] s = namedom[1].split ('.') for dots in range (len (s) - 2, -1, -1): addr = '.'.join (s[dots:]) try: return EmailToEmployer[addr] except KeyError: pass if unknown: return [(nextyear, GetEmployer ('(Unknown)'))] return [(nextyear, GetEmployer (email))] def LookupEmployer (email, mapunknown = 0): elist = MapToEmployer (email, mapunknown) return elist # GetEmployer (ename)