Added option to get the stats from numstat instead of diff

The option --numstat of git log gives the statistics of lines added and removed file. Hence, it is not necessary to parser a raw diff. Another benefit, it is a less verbose log to be processed, which helps to process long logs. This also prepares the code for counting the changes per file type. Signed-off-by: Germán Póo-Caamaño <gpoo@gnome.org>
2011-06-22 17:25:17 -07:00
parent 4a729f1d72
commit 935be113b3
3 changed files with 50 additions and 16 deletions
--- a/10
+++ b/10
@@ -20,6 +20,10 @@ Run it like this:

   git log -p -M [details] | gitdm [options]

+Alternatively, you can run with:
+
+   git log --numstat -M [details] | gitdm -n [options]
+
 The [details] tell git which changesets are of interest; the [options] can
 be:

@@ -44,6 +48,8 @@ be:

 	-l num	Only list the top <num> entries in each report.

+    -n  Use --numstat instead of generated patches to get the statistics.
+
 	-o file	Write text output to the given file (default is stdout).

 	-r pat	Only generate statistics for changes to files whose 
@@ -68,6 +74,10 @@ looks like:
    git log -p -M v2.6.19..v2.6.20 | \
 	gitdm -u -s -a -o results -h results.html

+or:
+
+    git log --numstat -M v2.6.19..v2.6.20 | \
+	gitdm -u -s -a -n -o results -h results.html

 CONFIGURATION FILE

--- a/52
+++ b/52
@@ -37,6 +37,7 @@ DumpDB = 0
 CFName = 'gitdm.config'
 DirName = ''
 Aggregate = 'month'
+Numstat = 0

 #
 # Options:
@@ -48,6 +49,7 @@ Aggregate = 'month'
 # -D		Output date statistics
 # -h hfile	HTML output to hfile
 # -l count	Maximum length for output lists
+# -n        Use numstats instead of generated patch from git log
 # -o file	File for text output
 # -r pattern	Restrict to files matching pattern
 # -s		Ignore author SOB lines
@@ -59,9 +61,9 @@ Aggregate = 'month'
 def ParseOpts ():
    global MapUnknown, DevReports
    global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
-    global CFName, CSVFile, DirName, Aggregate
+    global CFName, CSVFile, DirName, Aggregate, Numstat

-    opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:o:r:suwx:z')
+    opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:r:suwx:z')
    for opt in opts:
        if opt[0] == '-a':
            AkpmOverLt = 1
@@ -77,6 +79,8 @@ def ParseOpts ():
            reports.SetHTMLOutput (open (opt[1], 'w'))
        elif opt[0] == '-l':
            reports.SetMaxList (int (opt[1]))
+        elif opt[0] == '-n':
+            Numstat = 1
        elif opt[0] == '-o':
            reports.SetOutput (open (opt[1], 'w'))
        elif opt[0] == '-r':
@@ -248,20 +252,36 @@ def grabpatch():
                sys.stderr.write ('Funky date: %s\n' % p.date)
                p.date = Today
            continue
-        #
-        # If we have a file filter, check for file lines.
-        #
-        if FileFilter:
-            ignore = ApplyFileFilter (Line, ignore)
-        #
-        # OK, maybe it's part of the diff itself.
-        #
-        if not ignore:
-            if Padd.match (Line):
-                p.added += 1
-                continue
-            if Prem.match (Line):
-                p.removed += 1
+        if not Numstat:
+            #
+            # If we have a file filter, check for file lines.
+            #
+            if FileFilter:
+                ignore = ApplyFileFilter (Line, ignore)
+            #
+            # OK, maybe it's part of the diff itself.
+            #
+            if not ignore:
+                if Padd.match (Line):
+                    p.added += 1
+                    continue
+                if Prem.match (Line):
+                    p.removed += 1
+        else:
+            # Get the statistics (lines added/removes) using numstats
+            # and without requiring a diff (--numstat instead -p)
+            m = Pnumstat.match (Line)
+            if m:
+                # If we have a file filter, check for file lines.
+                if FileFilter and not FileFilter.search (m.group(3)):
+                    continue
+
+                try:
+                    p.added += int(m.group(1))
+                    p.removed += int(m.group(2))
+                except ValueError:
+                    # A binary file (image, etc.) is marked with '-'
+                    pass

    if '@' in p.author.name:
        GripeAboutAuthorName (p.author.name)
--- a/patterns.py
+++ b/patterns.py
@@ -37,3 +37,7 @@ PExtMerge = re.compile(r'^ +Merge( branch .* of)? ([^ ]+:[^ ]+)\n$')
 PIntMerge = re.compile(r'^ +(Merge|Pull) .* into .*$')
 # PIntMerge2 = re.compile(r"^ +Merge branch(es)? '.*$")
 PIntMerge2 = re.compile(r"^ +Merge .*$")
+#
+# Another way to get the statistics (per file).  It implies --numstat
+Pnumstat = re.compile('^(\d+|-)\s+(\d+|-)\s+(.*)$')
+Prename = re.compile('(.*)\{(.*) => (.*)\}(.*)')