Move out the grabpatch from the parser

The class LogPatchSplitter provides an iterator per patch.  This
makes the code cleaner, easier to read and more pythonic.
The class only gets each commit set as lines.

It is possible to test it separately by:
   $ git log | python logparser.py | more

Signed-off-by: Germán Póo-Caamaño <gpoo@gnome.org>
This commit is contained in:
Germán Póo-Caamaño 2011-06-22 18:57:02 -07:00
parent efcc420153
commit 7b26ae2109
2 changed files with 102 additions and 24 deletions

36
gitdm
View File

@ -1,11 +1,12 @@
#!/usr/bin/python
#
#-*- coding:utf-8 -*-
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
@ -15,6 +16,7 @@ import database, csvdump, ConfigFile, reports
import getopt, datetime
import os, re, sys, rfc822, string
import file_types
import logparser
from patterns import patterns
Today = datetime.date.today()
@ -204,29 +206,14 @@ def parse_numstat(line, file_filter):
#
# The core hack for grabbing the information about a changeset.
#
def grabpatch():
global NextLine
while (1):
m = patterns['commit'].match (NextLine)
if m:
break;
NextLine = sys.stdin.readline ()
if not NextLine:
return
def grabpatch(logpatch):
m = patterns['commit'].match (logpatch[0])
if not m:
return None
p = patch(m.group (1))
NextLine = sys.stdin.readline ()
ignore = (FileFilter is not None)
while NextLine:
Line = NextLine
#
# If this line starts a new commit, drop out.
#
m = patterns['commit'].match (Line)
if m:
break
NextLine = sys.stdin.readline ()
for Line in logpatch[1:]:
#
# Maybe it's an author line?
#
@ -379,7 +366,6 @@ if AkpmOverLt == 1:
Akpm = ('akpm@linux-foundation.org',
LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
NextLine = sys.stdin.readline ()
TotalChanged = TotalAdded = TotalRemoved = 0
#
@ -387,12 +373,14 @@ TotalChanged = TotalAdded = TotalRemoved = 0
#
print >> sys.stderr, 'Grabbing changesets...\r',
patches = logparser.LogPatchSplitter(sys.stdin)
printcount = CSCount = 0
while (1):
for logpatch in patches:
if (printcount % 50) == 0:
print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
printcount += 1
p = grabpatch()
p = grabpatch(logpatch)
if not p:
break
# if p.added > 100000 or p.removed > 100000:

90
logparser.py Normal file
View File

@ -0,0 +1,90 @@
#!/usr/bin/env python
#-*- coding:utf-8 -*-
#
# Copyright © 2009 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
import sys
from patterns import patterns
class LogPatchSplitter:
"""
LogPatchSplitters provides a iterator to extract every
changeset from a git log output.
Typical use case:
patches = LogPatchSplitter(sys.stdin)
for patch in patches:
parse_patch(patch)
"""
def __init__(self, fd):
self.fd = fd
self.buffer = None
self.patch = []
def __iter__(self):
return self
def next(self):
patch = self.__grab_patch__()
if not patch:
raise StopIteration
return patch
def __grab_patch__(self):
"""
Extract a patch from the file descriptor and the
patch is returned as a list of lines.
"""
patch = []
line = self.buffer or self.fd.readline()
while line:
m = patterns['commit'].match(line)
if m:
patch = [line]
break
line = self.fd.readline()
if not line:
return None
line = self.fd.readline()
while line:
# If this line starts a new commit, drop out.
m = patterns['commit'].match(line)
if m:
self.buffer = line
break
patch.append(line)
self.buffer = None
line = self.fd.readline()
return patch
if __name__ == '__main__':
patches = LogPatchSplitter(sys.stdin)
for patch in patches:
print '---------- NEW PATCH ----------'
for line in patch:
print line,