diff --git a/file_types.py b/file_types.py new file mode 100644 index 0000000..78420cc --- /dev/null +++ b/file_types.py @@ -0,0 +1,406 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2006 Libresoft +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Authors : Gregorio Robles + +""" +This modules contains configuration parameters regarding filetypes +(documentation, develompent, sound, images...) + + +@author: Gregorio Robles +@organization: Grupo de Sistemas y Comunicaciones, Universidad Rey Juan Carlos +@copyright: Universidad Rey Juan Carlos (Madrid, Spain) +@license: GNU GPL version 2 or any later version +@contact: grex@gsyc.escet.urjc.es +""" + +import re + +# Code files (headers and the like included) +# (most common languages first) + +config_files_code = [ + re.compile('\.c$'), # C + re.compile('\.pc$'), # C + re.compile('\.ec$'), # C + re.compile('\.ecp$'), # C + re.compile('\.C$'), # C++ + re.compile('\.cpp$'), # C++ + re.compile('\.c\+\+$'), # C++ + re.compile('\.cxx$'), # C++ + re.compile('\.cc$'), # C++ + re.compile('\.pcc$'), # C++ + re.compile('\.cpy$'), # C++ + re.compile('\.h$'), # C or C++ header + re.compile('\.hh$'), # C++ header + re.compile('\.hpp$'), # C++ header + re.compile('\.hxx$'), # C++ header + re.compile('\.sh$'), # Shell + re.compile('\.pl$'), # Perl + re.compile('\.pm$'), # Perl + re.compile('\.pod$'), # Perl + re.compile('\.perl$'), # Perl + re.compile('\.cgi$'), # CGI + re.compile('\.php$'), # PHP + re.compile('\.php3$'), # PHP + re.compile('\.php4$'), # PHP + re.compile('\.inc$'), # PHP + re.compile('\.py$'), # Python + re.compile('\.java$'), # Java + re.compile('\.class$'), # Java Class (or at least a class in some OOPL) + re.compile('\.ada$'), # ADA + re.compile('\.ads$'), # ADA + re.compile('\.adb$'), # ADA + re.compile('\.pad$'), # ADA + re.compile('\.s$'), # Assembly + re.compile('\.S$'), # Assembly + re.compile('\.asm$'), # Assembly + re.compile('\.awk$'), # awk + re.compile('\.cs$'), # C# + re.compile('\.csh$'), # CShell (including tcsh) + re.compile('\.cob$'), # COBOL + re.compile('\.cbl$'), # COBOL + re.compile('\.COB$'), # COBOL + re.compile('\.CBL$'), # COBOL + re.compile('\.exp$'), # Expect + re.compile('\.l$'), # (F)lex + re.compile('\.ll$'), # (F)lex + re.compile('\.lex$'), # (F)lex + re.compile('\.f$'), # Fortran + re.compile('\.f77$'), # Fortran + re.compile('\.F$'), # Fortran + re.compile('\.hs$'), # Haskell + re.compile('\.lhs$'), # Not preprocessed Haskell + re.compile('\.el$'), # LISP (including Scheme) + re.compile('\.scm$'), # LISP (including Scheme) + re.compile('\.lsp$'), # LISP (including Scheme) + re.compile('\.jl$'), # LISP (including Scheme) + re.compile('\.ml$'), # ML + re.compile('\.ml3$'), # ML + re.compile('\.m3$'), # Modula3 + re.compile('\.i3$'), # Modula3 + re.compile('\.m$'), # Objective-C + re.compile('\.p$'), # Pascal + re.compile('\.pas$'), # Pascal + re.compile('\.rb$'), # Ruby + re.compile('\.sed$'), # sed + re.compile('\.tcl$'), # TCL + re.compile('\.tk$'), # TCL + re.compile('\.itk$'), # TCL + re.compile('\.y$'), # Yacc + re.compile('\.yy$'), # Yacc + re.compile('\.idl$'), # CORBA IDL + re.compile('\.gnorba$'), # GNOME CORBA IDL + re.compile('\.oafinfo$'), # GNOME OAF + re.compile('\.mcopclass$'), # MCOP IDL compiler generated class + re.compile('\.autoforms$'), # Autoform + re.compile('\.atf$'), # Autoform + re.compile('\.gnuplot$'), + re.compile('\.xs$'), # Shared library? Seen a lot of them in gnome-perl + re.compile('\.js$'), # JavaScript (and who knows, maybe more) + re.compile('\.patch$'), + re.compile('\.diff$'), # Sometimes patches appear this way + re.compile('\.ids$'), # Not really sure what this means + re.compile('\.upd$'), # ¿¿¿??? (from Kcontrol) + re.compile('$.ad$'), # ¿¿¿??? (from Kdisplay and mc) + re.compile('$.i$'), # Appears in the kbindings for Qt + re.compile('$.pri$'), # from Qt + re.compile('\.schema$'), # Not really sure what this means + re.compile('\.fd$'), # Something to do with latex + re.compile('\.cls$'), # Something to do with latex + re.compile('\.pro$'), # Postscript generation + re.compile('\.ppd$'), # PDF generation + re.compile('\.dlg$'), # Not really sure what this means + re.compile('\.plugin$'), # Plug-in file + re.compile('\.dsp'), # Microsoft Developer Studio Project File + re.compile('\.vim$'), # vim syntax file + re.compile('\.trm$'), # gnuplot term file + re.compile('\.font$'), # Font mapping + re.compile('\.ccg$'), # C++ files - Found in gtkmm* + re.compile('\.hg$'), # C++ headers - Found in gtkmm* + re.compile('\.dtd'), # XML Document Type Definition + re.compile('\.bat'), # DOS batch files + re.compile('\.vala'), # Vala + re.compile('\.py\.in$'), + re.compile('\.rhtml$'), # eRuby + re.compile('\.sql$') # SQL script + ] + +# Development documentation files (for hacking generally) + +config_files_devel_doc = [ + re.compile('^readme.*$'), + re.compile('^changelog.*'), + re.compile('^todo.*$'), + re.compile('^credits.*$'), + re.compile('^authors.*$'), + re.compile('^changes.*$'), + re.compile('^news.*$'), + re.compile('^install.*$'), + re.compile('^hacking.*$'), + re.compile('^copyright.*$'), + re.compile('^licen(s|c)e.*$'), + re.compile('^copying.*$'), + re.compile('manifest$'), + re.compile('faq$'), + re.compile('building$'), + re.compile('howto$'), + re.compile('design$'), + re.compile('\.files$'), + re.compile('files$'), + re.compile('subdirs$'), + re.compile('maintainers$'), + re.compile('developers$'), + re.compile('contributors$'), + re.compile('thanks$'), + re.compile('releasing$'), + re.compile('test$'), + re.compile('testing$'), + re.compile('build$'), + re.compile('comments?$'), + re.compile('bugs$'), + re.compile('buglist$'), + re.compile('problems$'), + re.compile('debug$'), + re.compile('hacks$'), + re.compile('hacking$'), + re.compile('versions?$'), + re.compile('mappings$'), + re.compile('tips$'), + re.compile('ideas?$'), + re.compile('spec$'), + re.compile('compiling$'), + re.compile('notes$'), + re.compile('missing$'), + re.compile('done$'), + re.compile('\.omf$'), # XML-based format used in GNOME + re.compile('\.lsm$'), + re.compile('^doxyfile$'), + re.compile('\.kdevprj$'), + re.compile('\.directory$'), + re.compile('\.dox$'), + re.compile('\.doap$') + ] + +# Building, compiling, configuration and CVS admin files + +config_files_building = [ + re.compile('\.in.*$'), + re.compile('configure.*$'), + re.compile('makefile.*$'), + re.compile('config\.sub$'), + re.compile('config\.guess$'), + re.compile('config\.status$'), + re.compile('ltmain\.sh$'), + re.compile('autogen\.sh$'), + re.compile('config$'), + re.compile('conf$'), + re.compile('cvsignore$'), + re.compile('\.cfg$'), + re.compile('\.m4$'), + re.compile('\.mk$'), + re.compile('\.mak$'), + re.compile('\.make$'), + re.compile('\.mbx$'), + re.compile('\.protocol$'), + re.compile('\.version$'), + re.compile('mkinstalldirs$'), + re.compile('install-sh$'), + re.compile('rules$'), + re.compile('\.kdelnk$'), + re.compile('\.menu$'), + re.compile('linguas$'), # Build translations + re.compile('potfiles.*$'), # Build translations + re.compile('\.shlibs$'), # Shared libraries +# re.compile('%debian%'), +# re.compile('%specs/%'), + re.compile('\.spec$'), # It seems they're necessary for RPM building + re.compile('\.def$') # build bootstrap for DLLs on win32 + ] + + + +# Documentation files + +config_files_documentation = [ +# 'doc/%'), +# re.compile('%HOWTO%'), + re.compile('\.html$'), + re.compile('\.txt$'), + re.compile('\.ps(\.gz|\.bz2)?$'), + re.compile('\.dvi(\.gz|\.bz2)?$'), + re.compile('\.lyx$'), + re.compile('\.tex$'), + re.compile('\.texi$'), + re.compile('\.pdf(\.gz|\.bz2)?$'), + re.compile('\.djvu$'), + re.compile('\.epub$'), + re.compile('\.sgml$'), + re.compile('\.docbook$'), + re.compile('\.wml$'), + re.compile('\.xhtml$'), + re.compile('\.phtml$'), + re.compile('\.shtml$'), + re.compile('\.htm$'), + re.compile('\.rdf$'), + re.compile('\.phtm$'), + re.compile('\.tmpl$'), + re.compile('\.ref$'), # References + re.compile('\.css$'), +# re.compile('%tutorial%'), + re.compile('\.templates$'), + re.compile('\.dsl$'), + re.compile('\.ent$'), + re.compile('\.xml$'), + re.compile('\.xmi$'), + re.compile('\.xsl$'), + re.compile('\.entities$'), + re.compile('\.[1-7]$'), # Man pages + re.compile('\.man$'), + re.compile('\.manpages$'), + re.compile('\.doc$'), + re.compile('\.rtf$'), + re.compile('\.wpd$'), + re.compile('\.qt3$'), + re.compile('man\d?/.*\.\d$'), + re.compile('\.docs$'), + re.compile('\.sdw$'), # OpenOffice.org Writer document + re.compile('\.odt$'), # OpenOffice.org document + re.compile('\.en$'), # Files in English language + re.compile('\.de$'), # Files in German + re.compile('\.es$'), # Files in Spanish + re.compile('\.fr$'), # Files in French + re.compile('\.it$'), # Files in Italian + re.compile('\.cz$') # Files in Czech + ] + +# Images + +config_files_images = [ + re.compile('\.png$'), + re.compile('\.jpg$'), + re.compile('\.jpeg$'), + re.compile('\.bmp$'), + re.compile('\.gif$'), + re.compile('\.xbm$'), + re.compile('\.eps$'), + re.compile('\.mng$'), + re.compile('\.pnm$'), + re.compile('\.pbm$'), + re.compile('\.ppm$'), + re.compile('\.pgm$'), + re.compile('\.gbr$'), + re.compile('\.svg$'), + re.compile('\.fig$'), + re.compile('\.tif$'), + re.compile('\.swf$'), + re.compile('\.svgz$'), + re.compile('\.shape$'), # XML files used for shapes for instance in Kivio + re.compile('\.sml$'), # XML files used for shapes for instance in Kivio + re.compile('\.bdf$'), # vfontcap - Vector Font Capability Database (VFlib Version 2) + re.compile('\.ico$'), + re.compile('\.dia$') # We consider .dia as images, I don't want them in unknown + ] + +# Translation files + +config_files_translation = [ + re.compile('\.po$'), + re.compile('\.pot$'), + re.compile('\.charset$'), + re.compile('\.mo$') + ] + +# User interface files + +config_files_ui = [ + re.compile('\.desktop$'), + re.compile('\.ui$'), + re.compile('\.xpm$'), + re.compile('\.xcf$'), + re.compile('\.3ds$'), + re.compile('\.theme$'), + re.compile('\.kimap$'), + re.compile('\.glade$'), + re.compile('\.gtkbuilder$'), + re.compile('rc$') + ] + +# Sound files + +config_files_sound = [ + re.compile('\.mp3$'), + re.compile('\.ogg$'), + re.compile('\.wav$'), + re.compile('\.au$'), + re.compile('\.mid$'), + re.compile('\.vorbis$'), + re.compile('\.midi$'), + re.compile('\.arts$') + ] + +# Packages (yes, there are people who upload packages to the repo) + +config_files_packages = [ + re.compile('\.tar$'), + re.compile('\.tar.gz$'), + re.compile('\.tar.bz2$'), + re.compile('\.tgz$'), + re.compile('\.deb$'), + re.compile('\.rpm$'), + re.compile('\.srpm$'), + re.compile('\.ebuild$') + ] + +# The list should keep this order +# ie. we want ltmain.sh -> build instead of code +config_files = [ + ('image' , config_files_images), + ('i18n' , config_files_translation), + ('ui' , config_files_ui), + ('multimedia' , config_files_sound), + ('package' , config_files_packages), + ('build' , config_files_building), + ('code' , config_files_code), + ('documentation' , config_files_documentation), + ('devel-doc' , config_files_devel_doc) + ] + +def guess_file_type (filename): + for type, patt_list in config_files: + for patt in patt_list: + if patt.search (filename.lower ()): + return type + + return 'unknown' + +if __name__ == '__main__': + import sys + import os + + path = sys.argv[1] + if os.path.isdir (path): + for root, dirs, files in os.walk (path): + for skip in ('.svn', 'CVS', '.git'): + if skip in dirs: + dirs.remove (skip) + + for file in files: + print "%s: %s" % (os.path.join (root, file), guess_file_type (file)) + else: + print guess_file_type (path) diff --git a/gitdm b/gitdm index d5cf60e..fa7e6ba 100755 --- a/gitdm +++ b/gitdm @@ -14,6 +14,7 @@ import database, csvdump, ConfigFile, reports import getopt, datetime import os, re, sys, rfc822, string +import file_types from patterns import patterns Today = datetime.date.today() @@ -143,6 +144,8 @@ def PrintDateStats(): # Let's slowly try to move some smarts into this class. # class patch: + (ADDED, REMOVED) = range (2) + def __init__ (self, commit): self.commit = commit self.merge = self.added = self.removed = 0 @@ -152,6 +155,7 @@ class patch: self.reviews = [ ] self.testers = [ ] self.reports = [ ] + self.filetypes = {} def addreviewer (self, reviewer): self.reviews.append (reviewer) @@ -162,6 +166,13 @@ class patch: def addreporter (self, reporter): self.reports.append (reporter) + def addfiletype (self, filetype, added, removed): + if self.filetypes.has_key (filetype): + self.filetypes[filetype][self.ADDED] += added + self.filetypes[filetype][self.REMOVED] += removed + else: + self.filetypes[filetype] = [added, removed] + def parse_numstat(line, file_filter): """ Receive a line of text, determine if fits a numstat line and @@ -172,7 +183,7 @@ def parse_numstat(line, file_filter): filename = m.group (3) # If we have a file filter, check for file lines. if file_filter and not file_filter.search (filename): - return None, None, None + return None, None, None, None try: added = int (m.group (1)) @@ -181,9 +192,14 @@ def parse_numstat(line, file_filter): # A binary file (image, etc.) is marked with '-' added = removed = 0 - return filename, added, removed + m = patterns['rename'].match (filename) + if m: + filename = '%s%s%s' % (m.group (1), m.group (3), m.group (4)) + + filetype = file_types.guess_file_type (os.path.basename(filename)) + return filename, filetype, added, removed else: - return None, None, None + return None, None, None, None # # The core hack for grabbing the information about a changeset. @@ -296,10 +312,11 @@ def grabpatch(): else: # Get the statistics (lines added/removes) using numstats # and without requiring a diff (--numstat instead -p) - (filename, added, removed) = parse_numstat (Line, FileFilter) - if filename: - p.added += added - p.removed += removed + (filename, filetype, added, removed) = parse_numstat (Line, FileFilter) + if filename: + p.added += added + p.removed += removed + p.addfiletype (filetype, added, removed) if '@' in p.author.name: GripeAboutAuthorName (p.author.name)