gitdm/treeplot
Jonathan Corbet 0cc9c05b3a Fix up pattern use in treeplot
The utility as a whole is still somewhat on the fragile side, though.
2011-12-13 12:21:18 -07:00

334 lines
8.4 KiB
Python
Executable File

#!/usr/bin/python
#
# Create a graph of patch flow into the mainline.
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
#
import sys
from patterns import patterns
#
# The various types of commit we understand.
#
class Commit:
def __init__(self, id, parent):
self.id = id
self.parent = parent
self.ismerge = 0
self.treepriority = 0
#
# Merges are special
#
class Merge (Commit):
def __init__(self, id, parent):
Commit.__init__(self, id, parent)
self.ismerge = 1
self.internal = 1 # Two branches within a repo?
self.parents = [ parent ]
def addparent(self, parentid):
self.parents.append(parentid)
def addtree(self, tree):
self.tree = tree
self.internal = 0
#
# Trees: where the commits come from.
#
class Tree:
def __init__(self, name, url):
self.name = name
self.url = url
self.inputs = [ ]
self.commits = [ ]
def addcommit(self, id):
self.commits.append(id)
def addinput(self, tree):
if tree not in self.inputs:
self.inputs.append(tree)
# print '%s -> %s' % (tree.name, self.name)
Mainline = Tree('Mainline',
'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git')
KnownTrees = { Mainline.url: Mainline }
def NormalizeURL(url):
if url[:4] == 'git:':
return url
if url == '../net-2.6/':
url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6'
url = url.replace('master.kernel.org:', 'git://git.kernel.org')
if url[-18:] == 'torvalds/linux-2.6':
url += '.git'
if url[:8] == '/pub/scm':
url = 'git://git.kernel.org' + url
return url
def LookupTree(url):
url = NormalizeURL(url)
try:
return KnownTrees[url]
except KeyError:
tree = Tree(url, url)
KnownTrees[url] = tree
return tree
#
# We track which tree every commit belongs to.
#
CommitTrees = { }
class CTEntry:
def __init__ (self, tree, priority, path):
self.tree = tree
self.priority = priority
self.path = path
def AddCommitTree(id, entry):
# print 'add: ', id, '[',
# for tree in entry.path:
# print tree.name,
# print ']'
try:
oldentry = CommitTrees[id]
if entry.priority < oldentry.priority:
CommitTrees[id] = entry
except KeyError:
CommitTrees[id] = entry
def LookupCommitTree(id):
try:
return CommitTrees[id]
except KeyError:
print 'Unfound commit %s' % (id)
return CTEntry (Mainline, 0, [])
#
# Input handling with one-line pushback.
#
SavedLine = None
Input = sys.stdin
def GetLine():
global SavedLine
if SavedLine:
ret = SavedLine
SavedLine = None
return ret
return Input.readline()
def SaveLine(line):
global SavedLine
SavedLine = line
#
# Pull in a commit and see what it is.
#
def GetCommit():
#
# Skip junk up to the next commit.
#
while 1:
line = GetLine()
if not line:
return None
m = patterns['commit'].match(line)
if m:
break
#
# Look at the commit and see how many parents we have.
#
ids = m.group(1).split()
if len(ids) <= 1:
if len(CommitTrees.values()) > 0:
print 'No-Parent commit:', ids[0]
return GetCommit()
print 'Did you run git with --parents?'
print ids
sys.exit(1)
if len(ids) == 2: # Simple commit
return Commit(ids[0], ids[1])
#
# OK, we have a merge.
#
merge = Merge(ids[0], ids[1])
for id in ids[2:]:
merge.addparent(id)
#
# We need to figure out what kind of merge it is, so read through the
# descriptive text to the merge line.
#
while 1:
line = GetLine()
if not line:
print 'EOF looking for merge line'
return None
#
# Maybe it's an external merge?
#
m = patterns['ExtMerge'].match(line)
if m:
merge.addtree(LookupTree(m.group(2)))
return merge
#
# OK, maybe it's internal
#
if patterns['IntMerge'].match(line) or patterns['IntMerge2'].match(line):
#print 'Internal:', line[:-1]
merge.internal = 1
return merge
m = patterns['commit'].match(line)
if m:
print 'Hit next commit (%s) looking for merge line' % (m.group(1))
SaveLine(line)
return GetCommit()
#
# Print out a tree and its inputs
#
def PrintTree(tree, indent = ''):
print '%s%4d %s' % (indent, len(tree.commits), tree.name)
for input in tree.inputs:
PrintTree(input, indent + ' ')
#
# Let's try to build a data structure giving the patch flows.
#
class FlowNode:
def __init__(self, tree):
self.tree = tree
self.inputs = { }
self.commits = 0
def BuildFlowTree():
rootnode = FlowNode(Mainline)
notree = Tree('[No tree]', '')
for centry in CommitTrees.values():
path = centry.path
if not path:
path = [ notree ]
FillFlowPath(path, rootnode)
return rootnode
def FillFlowPath(path, node):
node.commits += 1
if len(path) == 0:
return
next, rest = path[0], path[1:]
try:
nextnode = node.inputs[next.name]
except KeyError:
nextnode = node.inputs[next.name] = FlowNode(next)
return FillFlowPath(rest, nextnode)
def PrintFlowTree(ftree, indent = ''):
print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name)
inputs = ftree.inputs.values()
inputs.sort(GVSort)
for input in inputs:
PrintFlowTree(input, indent + ' ')
#
# Something for graphviz
#
GVHeader = '''digraph "runtree" {
graph [ label = "Patch flow into the mainline",
concentrate = true,
nodesep = 0.1,
rankdir = LR ];
node [shape = polygon,
sides = 4,
height = 0.3
fontsize = 8];
'''
MainlineCommits = 0
def GVTree(ftree):
global MainlineCommits
MainlineCommits = ftree.commits
gvf = open('runtree.gv', 'w')
gvf.write(GVHeader)
inputs = ftree.inputs.values()
inputs.sort(GVSort)
for input in inputs:
GVPrintNode(gvf, input, 'Mainline')
gvf.write('}\n')
def GVNodeName(treename):
sname = treename.split('/')
if treename.find('kernel.org') >= 0:
return '%s/%s' % (sname[-2], sname[-1])
sep = treename.find ('://')
if sep > 0:
return treename[sep+3:]
return treename
def GVSort(n1, n2):
return n2.commits - n1.commits
def GVPrintNode(gvf, node, parent):
name = GVNodeName(node.tree.name)
gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits))
gvf.write (', arrowsize = 0.5')
if MainlineCommits/node.commits < 20:
gvf.write(', color = red')
elif MainlineCommits/node.commits < 100:
gvf.write(', color = orange');
gvf.write(']\n')
inputs = node.inputs.values()
if inputs:
inputs.sort(GVSort)
for input in inputs:
GVPrintNode(gvf, input, name)
#
# Main code.
#
commit = GetCommit()
ncommits = 0
while commit:
ncommits += 1
entry = LookupCommitTree(commit.id)
tree = entry.tree
priority = entry.priority
tree.addcommit(commit.id)
#
# For regular commits, just remember the tree involved
#
if not commit.ismerge:
AddCommitTree(commit.parent, entry)
#
# For merges we have to deal with all the parents.
#
else:
AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path))
if commit.internal:
for p in commit.parents[1:]:
path = entry.path + [tree]
AddCommitTree(p, CTEntry (tree, priority, entry.path))
else:
for p in commit.parents[1:]:
path = entry.path + [commit.tree]
AddCommitTree(p, CTEntry (commit.tree, priority + 1, path))
if commit.tree is not Mainline:
tree.addinput(commit.tree)
commit = GetCommit()
#PrintTree(Mainline)
ftree = BuildFlowTree()
PrintFlowTree(ftree)
GVTree(ftree)
print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))