0cc9c05b3a
The utility as a whole is still somewhat on the fragile side, though.
334 lines
8.4 KiB
Python
Executable File
334 lines
8.4 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# Create a graph of patch flow into the mainline.
|
|
#
|
|
# This code is part of the LWN git data miner.
|
|
#
|
|
# Copyright 2007-11 Eklektix, Inc.
|
|
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
|
|
#
|
|
# This file may be distributed under the terms of the GNU General
|
|
# Public License, version 2.
|
|
#
|
|
import sys
|
|
from patterns import patterns
|
|
|
|
#
|
|
# The various types of commit we understand.
|
|
#
|
|
class Commit:
|
|
def __init__(self, id, parent):
|
|
self.id = id
|
|
self.parent = parent
|
|
self.ismerge = 0
|
|
self.treepriority = 0
|
|
#
|
|
# Merges are special
|
|
#
|
|
class Merge (Commit):
|
|
def __init__(self, id, parent):
|
|
Commit.__init__(self, id, parent)
|
|
self.ismerge = 1
|
|
self.internal = 1 # Two branches within a repo?
|
|
self.parents = [ parent ]
|
|
|
|
def addparent(self, parentid):
|
|
self.parents.append(parentid)
|
|
|
|
def addtree(self, tree):
|
|
self.tree = tree
|
|
self.internal = 0
|
|
|
|
#
|
|
# Trees: where the commits come from.
|
|
#
|
|
class Tree:
|
|
def __init__(self, name, url):
|
|
self.name = name
|
|
self.url = url
|
|
self.inputs = [ ]
|
|
self.commits = [ ]
|
|
|
|
def addcommit(self, id):
|
|
self.commits.append(id)
|
|
|
|
def addinput(self, tree):
|
|
if tree not in self.inputs:
|
|
self.inputs.append(tree)
|
|
# print '%s -> %s' % (tree.name, self.name)
|
|
|
|
Mainline = Tree('Mainline',
|
|
'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git')
|
|
KnownTrees = { Mainline.url: Mainline }
|
|
|
|
def NormalizeURL(url):
|
|
if url[:4] == 'git:':
|
|
return url
|
|
if url == '../net-2.6/':
|
|
url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6'
|
|
url = url.replace('master.kernel.org:', 'git://git.kernel.org')
|
|
if url[-18:] == 'torvalds/linux-2.6':
|
|
url += '.git'
|
|
if url[:8] == '/pub/scm':
|
|
url = 'git://git.kernel.org' + url
|
|
return url
|
|
|
|
def LookupTree(url):
|
|
url = NormalizeURL(url)
|
|
try:
|
|
return KnownTrees[url]
|
|
except KeyError:
|
|
tree = Tree(url, url)
|
|
KnownTrees[url] = tree
|
|
return tree
|
|
|
|
#
|
|
# We track which tree every commit belongs to.
|
|
#
|
|
CommitTrees = { }
|
|
class CTEntry:
|
|
def __init__ (self, tree, priority, path):
|
|
self.tree = tree
|
|
self.priority = priority
|
|
self.path = path
|
|
|
|
def AddCommitTree(id, entry):
|
|
# print 'add: ', id, '[',
|
|
# for tree in entry.path:
|
|
# print tree.name,
|
|
# print ']'
|
|
try:
|
|
oldentry = CommitTrees[id]
|
|
if entry.priority < oldentry.priority:
|
|
CommitTrees[id] = entry
|
|
except KeyError:
|
|
CommitTrees[id] = entry
|
|
|
|
|
|
def LookupCommitTree(id):
|
|
try:
|
|
return CommitTrees[id]
|
|
except KeyError:
|
|
print 'Unfound commit %s' % (id)
|
|
return CTEntry (Mainline, 0, [])
|
|
|
|
#
|
|
# Input handling with one-line pushback.
|
|
#
|
|
SavedLine = None
|
|
Input = sys.stdin
|
|
|
|
def GetLine():
|
|
global SavedLine
|
|
if SavedLine:
|
|
ret = SavedLine
|
|
SavedLine = None
|
|
return ret
|
|
return Input.readline()
|
|
|
|
def SaveLine(line):
|
|
global SavedLine
|
|
SavedLine = line
|
|
|
|
#
|
|
# Pull in a commit and see what it is.
|
|
#
|
|
def GetCommit():
|
|
#
|
|
# Skip junk up to the next commit.
|
|
#
|
|
while 1:
|
|
line = GetLine()
|
|
if not line:
|
|
return None
|
|
m = patterns['commit'].match(line)
|
|
if m:
|
|
break
|
|
|
|
#
|
|
# Look at the commit and see how many parents we have.
|
|
#
|
|
ids = m.group(1).split()
|
|
if len(ids) <= 1:
|
|
if len(CommitTrees.values()) > 0:
|
|
print 'No-Parent commit:', ids[0]
|
|
return GetCommit()
|
|
print 'Did you run git with --parents?'
|
|
print ids
|
|
sys.exit(1)
|
|
if len(ids) == 2: # Simple commit
|
|
return Commit(ids[0], ids[1])
|
|
#
|
|
# OK, we have a merge.
|
|
#
|
|
merge = Merge(ids[0], ids[1])
|
|
for id in ids[2:]:
|
|
merge.addparent(id)
|
|
#
|
|
# We need to figure out what kind of merge it is, so read through the
|
|
# descriptive text to the merge line.
|
|
#
|
|
while 1:
|
|
line = GetLine()
|
|
if not line:
|
|
print 'EOF looking for merge line'
|
|
return None
|
|
#
|
|
# Maybe it's an external merge?
|
|
#
|
|
m = patterns['ExtMerge'].match(line)
|
|
if m:
|
|
merge.addtree(LookupTree(m.group(2)))
|
|
return merge
|
|
#
|
|
# OK, maybe it's internal
|
|
#
|
|
if patterns['IntMerge'].match(line) or patterns['IntMerge2'].match(line):
|
|
#print 'Internal:', line[:-1]
|
|
merge.internal = 1
|
|
return merge
|
|
m = patterns['commit'].match(line)
|
|
if m:
|
|
print 'Hit next commit (%s) looking for merge line' % (m.group(1))
|
|
SaveLine(line)
|
|
return GetCommit()
|
|
|
|
#
|
|
# Print out a tree and its inputs
|
|
#
|
|
def PrintTree(tree, indent = ''):
|
|
print '%s%4d %s' % (indent, len(tree.commits), tree.name)
|
|
for input in tree.inputs:
|
|
PrintTree(input, indent + ' ')
|
|
|
|
#
|
|
# Let's try to build a data structure giving the patch flows.
|
|
#
|
|
class FlowNode:
|
|
def __init__(self, tree):
|
|
self.tree = tree
|
|
self.inputs = { }
|
|
self.commits = 0
|
|
|
|
def BuildFlowTree():
|
|
rootnode = FlowNode(Mainline)
|
|
notree = Tree('[No tree]', '')
|
|
for centry in CommitTrees.values():
|
|
path = centry.path
|
|
if not path:
|
|
path = [ notree ]
|
|
FillFlowPath(path, rootnode)
|
|
return rootnode
|
|
|
|
def FillFlowPath(path, node):
|
|
node.commits += 1
|
|
if len(path) == 0:
|
|
return
|
|
next, rest = path[0], path[1:]
|
|
try:
|
|
nextnode = node.inputs[next.name]
|
|
except KeyError:
|
|
nextnode = node.inputs[next.name] = FlowNode(next)
|
|
return FillFlowPath(rest, nextnode)
|
|
|
|
def PrintFlowTree(ftree, indent = ''):
|
|
print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name)
|
|
inputs = ftree.inputs.values()
|
|
inputs.sort(GVSort)
|
|
for input in inputs:
|
|
PrintFlowTree(input, indent + ' ')
|
|
|
|
#
|
|
# Something for graphviz
|
|
#
|
|
GVHeader = '''digraph "runtree" {
|
|
graph [ label = "Patch flow into the mainline",
|
|
concentrate = true,
|
|
nodesep = 0.1,
|
|
rankdir = LR ];
|
|
node [shape = polygon,
|
|
sides = 4,
|
|
height = 0.3
|
|
fontsize = 8];
|
|
'''
|
|
|
|
|
|
MainlineCommits = 0
|
|
|
|
def GVTree(ftree):
|
|
global MainlineCommits
|
|
MainlineCommits = ftree.commits
|
|
gvf = open('runtree.gv', 'w')
|
|
gvf.write(GVHeader)
|
|
inputs = ftree.inputs.values()
|
|
inputs.sort(GVSort)
|
|
for input in inputs:
|
|
GVPrintNode(gvf, input, 'Mainline')
|
|
gvf.write('}\n')
|
|
|
|
def GVNodeName(treename):
|
|
sname = treename.split('/')
|
|
if treename.find('kernel.org') >= 0:
|
|
return '%s/%s' % (sname[-2], sname[-1])
|
|
sep = treename.find ('://')
|
|
if sep > 0:
|
|
return treename[sep+3:]
|
|
return treename
|
|
|
|
def GVSort(n1, n2):
|
|
return n2.commits - n1.commits
|
|
|
|
def GVPrintNode(gvf, node, parent):
|
|
name = GVNodeName(node.tree.name)
|
|
gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits))
|
|
gvf.write (', arrowsize = 0.5')
|
|
if MainlineCommits/node.commits < 20:
|
|
gvf.write(', color = red')
|
|
elif MainlineCommits/node.commits < 100:
|
|
gvf.write(', color = orange');
|
|
gvf.write(']\n')
|
|
inputs = node.inputs.values()
|
|
if inputs:
|
|
inputs.sort(GVSort)
|
|
for input in inputs:
|
|
GVPrintNode(gvf, input, name)
|
|
|
|
#
|
|
# Main code.
|
|
#
|
|
commit = GetCommit()
|
|
ncommits = 0
|
|
while commit:
|
|
ncommits += 1
|
|
entry = LookupCommitTree(commit.id)
|
|
tree = entry.tree
|
|
priority = entry.priority
|
|
tree.addcommit(commit.id)
|
|
#
|
|
# For regular commits, just remember the tree involved
|
|
#
|
|
if not commit.ismerge:
|
|
AddCommitTree(commit.parent, entry)
|
|
#
|
|
# For merges we have to deal with all the parents.
|
|
#
|
|
else:
|
|
AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path))
|
|
if commit.internal:
|
|
for p in commit.parents[1:]:
|
|
path = entry.path + [tree]
|
|
AddCommitTree(p, CTEntry (tree, priority, entry.path))
|
|
else:
|
|
for p in commit.parents[1:]:
|
|
path = entry.path + [commit.tree]
|
|
AddCommitTree(p, CTEntry (commit.tree, priority + 1, path))
|
|
if commit.tree is not Mainline:
|
|
tree.addinput(commit.tree)
|
|
commit = GetCommit()
|
|
|
|
#PrintTree(Mainline)
|
|
ftree = BuildFlowTree()
|
|
PrintFlowTree(ftree)
|
|
GVTree(ftree)
|
|
print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))
|