334 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			334 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/python
 | |
| #
 | |
| # Create a graph of patch flow into the mainline.
 | |
| #
 | |
| # This code is part of the LWN git data miner.
 | |
| #
 | |
| # Copyright 2007-8 LWN.net
 | |
| # Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
 | |
| #
 | |
| # This file may be distributed under the terms of the GNU General
 | |
| # Public License, version 2.
 | |
| #
 | |
| import sys
 | |
| import patterns
 | |
| 
 | |
| #
 | |
| # The various types of commit we understand.
 | |
| #
 | |
| class Commit:
 | |
|     def __init__(self, id, parent):
 | |
|         self.id = id
 | |
|         self.parent = parent
 | |
|         self.ismerge = 0
 | |
|         self.treepriority = 0
 | |
| #
 | |
| # Merges are special 
 | |
| #
 | |
| class Merge (Commit):
 | |
|     def __init__(self, id, parent):
 | |
|         Commit.__init__(self, id, parent)
 | |
|         self.ismerge = 1
 | |
|         self.internal = 1 # Two branches within a repo?
 | |
|         self.parents = [ parent ]
 | |
|         
 | |
|     def addparent(self, parentid):
 | |
|         self.parents.append(parentid)
 | |
| 
 | |
|     def addtree(self, tree):
 | |
|         self.tree = tree
 | |
|         self.internal = 0
 | |
| 
 | |
| #
 | |
| # Trees: where the commits come from.
 | |
| #
 | |
| class Tree:
 | |
|     def __init__(self, name, url):
 | |
|         self.name = name
 | |
|         self.url = url
 | |
|         self.inputs = [ ]
 | |
|         self.commits = [ ]
 | |
| 
 | |
|     def addcommit(self, id):
 | |
|         self.commits.append(id)
 | |
| 
 | |
|     def addinput(self, tree):
 | |
|         if tree not in self.inputs:
 | |
|             self.inputs.append(tree)
 | |
|             # print '%s -> %s' % (tree.name, self.name)
 | |
| 
 | |
| Mainline = Tree('Mainline',
 | |
|                 'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git')
 | |
| KnownTrees = { Mainline.url: Mainline }
 | |
| 
 | |
| def NormalizeURL(url):
 | |
|     if url[:4] == 'git:':
 | |
|         return url
 | |
|     if url == '../net-2.6/':
 | |
|         url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6'
 | |
|     url = url.replace('master.kernel.org:', 'git://git.kernel.org')
 | |
|     if url[-18:] == 'torvalds/linux-2.6':
 | |
|         url += '.git'
 | |
|     if url[:8] == '/pub/scm':
 | |
|         url = 'git://git.kernel.org' + url
 | |
|     return url
 | |
| 
 | |
| def LookupTree(url):
 | |
|     url = NormalizeURL(url)
 | |
|     try:
 | |
|         return KnownTrees[url]
 | |
|     except KeyError:
 | |
|         tree = Tree(url, url)
 | |
|         KnownTrees[url] = tree
 | |
|         return tree
 | |
| 
 | |
| #
 | |
| # We track which tree every commit belongs to.
 | |
| #
 | |
| CommitTrees = { }
 | |
| class CTEntry:
 | |
|     def __init__ (self, tree, priority, path):
 | |
|         self.tree = tree
 | |
|         self.priority = priority
 | |
|         self.path = path
 | |
| 
 | |
| def AddCommitTree(id, entry):
 | |
| #    print 'add: ', id, '[',
 | |
| #    for tree in entry.path:
 | |
| #        print tree.name,
 | |
| #    print ']'
 | |
|     try:
 | |
|         oldentry = CommitTrees[id]
 | |
|         if entry.priority < oldentry.priority:
 | |
|             CommitTrees[id] = entry
 | |
|     except KeyError:
 | |
|         CommitTrees[id] = entry
 | |
| 
 | |
| 
 | |
| def LookupCommitTree(id):
 | |
|     try:
 | |
|         return CommitTrees[id]
 | |
|     except KeyError:
 | |
|         print 'Unfound commit %s' % (id)
 | |
|         return CTEntry (Mainline, 0, [])
 | |
| 
 | |
| #
 | |
| # Input handling with one-line pushback.
 | |
| #
 | |
| SavedLine = None
 | |
| Input = sys.stdin
 | |
| 
 | |
| def GetLine():
 | |
|     global SavedLine
 | |
|     if SavedLine:
 | |
|         ret = SavedLine
 | |
|         SavedLine = None
 | |
|         return ret
 | |
|     return Input.readline()
 | |
| 
 | |
| def SaveLine(line):
 | |
|     global SavedLine
 | |
|     SavedLine = line
 | |
| 
 | |
| #
 | |
| # Pull in a commit and see what it is.
 | |
| #
 | |
| def GetCommit():
 | |
|     #
 | |
|     # Skip junk up to the next commit.
 | |
|     #
 | |
|     while 1:
 | |
|         line = GetLine()
 | |
|         if not line:
 | |
|             return None
 | |
|         m = patterns.Pcommit.match(line)
 | |
|         if m:
 | |
|             break
 | |
| 
 | |
|     #
 | |
|     # Look at the commit and see how many parents we have.
 | |
|     #
 | |
|     ids = m.group(1).split()
 | |
|     if len(ids) <= 1:
 | |
|         if len(CommitTrees.values()) > 0:
 | |
|             print 'No-Parent commit:', ids[0]
 | |
|             return GetCommit()
 | |
|         print 'Did you run git with --parents?'
 | |
|         print ids
 | |
|         sys.exit(1)
 | |
|     if len(ids) == 2:  # Simple commit
 | |
|         return Commit(ids[0], ids[1])
 | |
|     #
 | |
|     # OK, we have a merge.
 | |
|     #
 | |
|     merge = Merge(ids[0], ids[1])
 | |
|     for id in ids[2:]:
 | |
|         merge.addparent(id)
 | |
|     #
 | |
|     # We need to figure out what kind of merge it is, so read through the
 | |
|     # descriptive text to the merge line.
 | |
|     #
 | |
|     while 1:
 | |
|         line = GetLine()
 | |
|         if not line:
 | |
|             print 'EOF looking for merge line'
 | |
|             return None
 | |
|         #
 | |
|         # Maybe it's an external merge?
 | |
|         #
 | |
|         m = patterns.PExtMerge.match(line)
 | |
|         if m:
 | |
|             merge.addtree(LookupTree(m.group(2)))
 | |
|             return merge
 | |
|         #
 | |
|         # OK, maybe it's internal
 | |
|         #
 | |
|         if patterns.PIntMerge.match(line) or patterns.PIntMerge2.match(line):
 | |
|             #print 'Internal:', line[:-1]
 | |
|             merge.internal = 1
 | |
|             return merge
 | |
|         m = patterns.Pcommit.match(line)
 | |
|         if m:
 | |
|             print 'Hit next commit (%s) looking for merge line' % (m.group(1))
 | |
|             SaveLine(line)
 | |
|             return GetCommit()
 | |
| 
 | |
| #
 | |
| # Print out a tree and its inputs
 | |
| #
 | |
| def PrintTree(tree, indent = ''):
 | |
|     print '%s%4d %s' % (indent, len(tree.commits), tree.name)
 | |
|     for input in tree.inputs:
 | |
|         PrintTree(input, indent + '    ')
 | |
| 
 | |
| #
 | |
| # Let's try to build a data structure giving the patch flows.
 | |
| #
 | |
| class FlowNode:
 | |
|     def __init__(self, tree):
 | |
|         self.tree = tree
 | |
|         self.inputs = { }
 | |
|         self.commits = 0
 | |
| 
 | |
| def BuildFlowTree():
 | |
|     rootnode = FlowNode(Mainline)
 | |
|     notree = Tree('[No tree]', '')
 | |
|     for centry in CommitTrees.values():
 | |
|         path = centry.path
 | |
|         if not path:
 | |
|             path = [ notree ]
 | |
|         FillFlowPath(path, rootnode)
 | |
|     return rootnode
 | |
| 
 | |
| def FillFlowPath(path, node):
 | |
|     node.commits += 1
 | |
|     if len(path) == 0:
 | |
|         return
 | |
|     next, rest = path[0], path[1:]
 | |
|     try:
 | |
|         nextnode = node.inputs[next.name]
 | |
|     except KeyError:
 | |
|         nextnode = node.inputs[next.name] = FlowNode(next)
 | |
|     return FillFlowPath(rest, nextnode)
 | |
| 
 | |
| def PrintFlowTree(ftree, indent = ''):
 | |
|     print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name)
 | |
|     inputs = ftree.inputs.values()
 | |
|     inputs.sort(GVSort)
 | |
|     for input in inputs:
 | |
|         PrintFlowTree(input, indent + '  ')
 | |
| 
 | |
| #
 | |
| # Something for graphviz
 | |
| #
 | |
| GVHeader = '''digraph "runtree" {
 | |
| graph [ label = "Patch flow into the mainline",
 | |
|         concentrate = true,
 | |
|         nodesep = 0.1,
 | |
|         rankdir = LR ];
 | |
| node [shape = polygon,
 | |
|       sides = 4,
 | |
|       height = 0.3
 | |
|       fontsize = 8];
 | |
| '''
 | |
| 
 | |
| 
 | |
| MainlineCommits = 0
 | |
| 
 | |
| def GVTree(ftree):
 | |
|     global MainlineCommits
 | |
|     MainlineCommits = ftree.commits
 | |
|     gvf = open('runtree.gv', 'w')
 | |
|     gvf.write(GVHeader)
 | |
|     inputs = ftree.inputs.values()
 | |
|     inputs.sort(GVSort)
 | |
|     for input in inputs:
 | |
|         GVPrintNode(gvf, input, 'Mainline')
 | |
|     gvf.write('}\n')
 | |
| 
 | |
| def GVNodeName(treename):
 | |
|     sname = treename.split('/')
 | |
|     if treename.find('kernel.org') >= 0:
 | |
|         return '%s/%s' % (sname[-2], sname[-1])
 | |
|     sep = treename.find ('://')
 | |
|     if sep > 0:
 | |
|         return treename[sep+3:]
 | |
|     return treename
 | |
| 
 | |
| def GVSort(n1, n2):
 | |
|     return n2.commits - n1.commits
 | |
| 
 | |
| def GVPrintNode(gvf, node, parent):
 | |
|     name = GVNodeName(node.tree.name)
 | |
|     gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits))
 | |
|     gvf.write (', arrowsize = 0.5')
 | |
|     if MainlineCommits/node.commits < 20:
 | |
|         gvf.write(', color = red')
 | |
|     elif MainlineCommits/node.commits < 100:
 | |
|         gvf.write(', color = orange');
 | |
|     gvf.write(']\n')
 | |
|     inputs = node.inputs.values()
 | |
|     if inputs:
 | |
|         inputs.sort(GVSort)
 | |
|         for input in inputs:
 | |
|             GVPrintNode(gvf, input, name)
 | |
| 
 | |
| #
 | |
| # Main code.
 | |
| #
 | |
| commit = GetCommit()
 | |
| ncommits = 0
 | |
| while commit:
 | |
|     ncommits += 1
 | |
|     entry = LookupCommitTree(commit.id)
 | |
|     tree = entry.tree
 | |
|     priority = entry.priority
 | |
|     tree.addcommit(commit.id)
 | |
|     #
 | |
|     # For regular commits, just remember the tree involved
 | |
|     #
 | |
|     if not commit.ismerge:
 | |
|         AddCommitTree(commit.parent, entry)
 | |
|     #
 | |
|     # For merges we have to deal with all the parents.
 | |
|     #
 | |
|     else:
 | |
|         AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path))
 | |
|         if commit.internal:
 | |
|             for p in commit.parents[1:]:
 | |
|                 path = entry.path + [tree]
 | |
|                 AddCommitTree(p, CTEntry (tree, priority, entry.path))
 | |
|         else:
 | |
|             for p in commit.parents[1:]:
 | |
|                 path = entry.path + [commit.tree]
 | |
|                 AddCommitTree(p, CTEntry (commit.tree, priority + 1,  path))
 | |
|                 if commit.tree is not Mainline:
 | |
|                     tree.addinput(commit.tree)
 | |
|     commit = GetCommit()
 | |
|         
 | |
| #PrintTree(Mainline)
 | |
| ftree = BuildFlowTree()
 | |
| PrintFlowTree(ftree)
 | |
| GVTree(ftree)
 | |
| print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))
 | 
