Improve validation tools

To use test.py in gating jobs, two options have been added that set default values. That way, we can have in the Jenkins files only the master switch and can control ourselves if we want to change what exactly is run as part of such a gate job. For validation we do not recurse the whole tree anymore if we only validate single files. A verbose mode has been added. Each pass shows now the number of files processed and only outputs filenames if --verbose is given. Additionally, formatting has been cleaned up by using pylint and pep8. pep8 reports no warnings or errors now. Example outputs of test.py: $ tools/test.py --job-niceness Validating files... config-reference/ch_networkingconfigure.xml: trailing or unnecessary whitespaces found in lines: 13 ... Validation failed, validated 14 files. $ tools/test.py --job-build Checking that no removed files are referenced... Passed removed file check, 41 files were removed, 627 files checked. Validating files... Validation passed, validated 14 files. Queuing the following books for building: config-reference install-guide admin-guide-network Building all books now... >>> Build of book admin-guide-network succeeded. >>> Build of book install-guide succeeded. >>> Build of book config-reference succeeded. Change-Id: Ia443553b6c774df7e72ef7c0d8768badfb34bc6d
2013-09-22 14:18:08 +02:00 · 2013-09-22 14:18:08 +02:00 · 6d555db21a
parent 99b93dc27e
commit 6d555db21a
2 changed files with 324 additions and 146 deletions
--- a/test.py
+++ b/test.py
@ -29,13 +29,17 @@ import sys
 import urllib2
 # These are files that are known to not be in DocBook format
-FILE_EXCEPTIONS = ['st-training-guides.xml', 'ha-guide-docinfo.xml', 'bk001-ch003-associate-general.xml', 'basic-install-pom.xml']
+FILE_EXCEPTIONS = ['st-training-guides.xml',
                   'ha-guide-docinfo.xml',
                   'bk001-ch003-associate-general.xml',
                   'basic-install-pom.xml']
 # These are books that we aren't checking yet
 BOOK_EXCEPTIONS = []
 RESULTS_OF_BUILDS = []
 # NOTE(berendt): check_output as provided in Python 2.7.5 to make script
 #                usable with Python < 2.7
 def check_output(*popenargs, **kwargs):
@ -84,7 +88,7 @@ def verify_section_tags_have_xmid(doc):
    for node in doc.xpath('//docbook:section', namespaces=ns):
        if "{http://www.w3.org/XML/1998/namespace}id" not in node.attrib:
            raise ValueError("section missing xml:id attribute, line %d" %
-                            node.sourceline)
+                             node.sourceline)
 def verify_nice_usage_of_whitespaces(docfile):
@ -108,8 +112,10 @@ def verify_nice_usage_of_whitespaces(docfile):
    ]
    for element in elements:
-        checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n" % element)),
+        checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n"
-        checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n" % element))
+                                 % element)),
        checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n"
                                 % element))
    lc = 0
    affected_lines = []
@ -121,7 +127,8 @@ def verify_nice_usage_of_whitespaces(docfile):
    if len(affected_lines) > 0:
        raise ValueError("trailing or unnecessary whitespaces "
-            "in following lines: %s" % ", ".join(affected_lines))
+                         "found in lines: %s"
                         % ", ".join(affected_lines))
 def error_message(error_log):
@ -138,10 +145,12 @@ def error_message(error_log):
 # Check whether only files in www got updated
 def only_www_touched():
    """Check whether only files in www directory are touched"""
    try:
-        args = ["git", "diff", "--name-only", "HEAD", "HEAD~1"]
+        args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
        modified_files = check_output(args).strip().split()
-    except (CalledProcessError, OSError) as e:
+    except (subprocess.CalledProcessError, OSError) as e:
        print("git failed: %s" % e)
        sys.exit(1)
@ -155,39 +164,44 @@ def only_www_touched():
    return www_changed and not other_changed
-def get_modified_files(rootdir, filter=None):
+
 def get_modified_files(rootdir, filtering=None):
    """Get modified files below doc directory"""
    # There are several tree traversals in this program that do a
-    # chroot, we need to run this git command always from the rootdir,
+    # chdir, we need to run this git command always from the rootdir,
    # so assure that.
    os.chdir(rootdir)
    try:
-        args = ["git", "diff", "--name-only", "--relative", "HEAD", "HEAD~1"]
+        args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"]
-        if filter != None:
+        if filtering is not None:
-            args.append(filter)
+            args.append(filtering)
        modified_files = check_output(args).strip().split()
    except (subprocess.CalledProcessError, OSError) as e:
        print("git failed: %s" % e)
        sys.exit(1)
    return modified_files
-def check_deleted_files(rootdir, file_exceptions):
+
-    """ Check whether files got deleted and verify that no other file references them.
+def check_deleted_files(rootdir, file_exceptions, verbose):
    """ Check whether files got deleted and verify that no other file
    references them.
    """
-    print("\nChecking for removed files")
+    print("\nChecking that no removed files are referenced...")
    deleted_files = get_modified_files(rootdir, "--diff-filter=D")
    if not deleted_files:
        print("No files were removed.")
        return
-    print(" Removed files:")
+    if verbose:
-    for f in deleted_files:
+        print(" Removed files:")
-        print ("   %s" % f)
+        for f in deleted_files:
- 
+            print ("   %s" % f)
    deleted_files = map(lambda x: os.path.abspath(x), deleted_files)
    no_checked_files = 0
    # Figure out whether files were included anywhere
    missing_reference = False
@ -204,17 +218,20 @@ def check_deleted_files(rootdir, file_exceptions):
        for f in files:
            if (f.endswith('.xml') and
-                f != 'pom.xml' and
+                    f != 'pom.xml' and
-                f not in file_exceptions):
+                    f not in file_exceptions):
                path = os.path.abspath(os.path.join(root, f))
                doc = etree.parse(path)
                no_checked_files = no_checked_files + 1
                # Check for inclusion of files as part of imagedata
-                for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'):
+                for node in doc.findall(
                        '//{http://docbook.org/ns/docbook}imagedata'):
                    href = node.get('fileref')
                    if (f not in file_exceptions and
-                        os.path.abspath(href) in deleted_files):
+                            os.path.abspath(href) in deleted_files):
-                        print("  File %s has an imagedata href for deleted file %s " % (f, href))
+                        print("  File %s has imagedata href for deleted "
                              "file %s" % (f, href))
                        missing_reference = True
                        break
@ -227,25 +244,92 @@ def check_deleted_files(rootdir, file_exceptions):
                for node in doc.xpath('//xi:include', namespaces=ns):
                    href = node.get('href')
                    if (os.path.abspath(href) in deleted_files):
-                        print("  File %s has an xi:include on deleted file %s " % (f, href))
+                        print("  File %s has an xi:include on deleted file %s"
                              % (f, href))
                        missing_reference = True
    if missing_reference:
        print("Failed removed file check, %d files were removed, "
              "%d files checked."
              % (len(deleted_files), no_checked_files))
        sys.exit(1)
-    print("Passed removed file check.")
+    print("Passed removed file check, %d files were removed, "
          "%d files checked."
          % (len(deleted_files), no_checked_files))
 def validate_one_file(schema, rootdir, path, verbose,
                      any_failures, check_syntax, check_niceness):
    """Validate a single file"""
    # We pass schema in as a way of caching it, generating it is expensive
    if verbose:
        print(" Validating %s" % os.path.relpath(path, rootdir))
    try:
        if check_syntax:
            doc = etree.parse(path)
            if validation_failed(schema, doc):
                any_failures = True
                print(error_message(schema.error_log))
            verify_section_tags_have_xmid(doc)
        if check_niceness:
            verify_nice_usage_of_whitespaces(path)
    except etree.XMLSyntaxError as e:
        any_failures = True
        print("  %s: %s" % (os.path.relpath(path, rootdir), e))
    except ValueError as e:
        any_failures = True
        print("  %s: %s" % (os.path.relpath(path, rootdir), e))
    return any_failures
 def is_xml(filename):
    """Returns true if file ends with .xml and is not a pom.xml file"""
    return filename.endswith('.xml') and not filename.endswith('/pom.xml')
 def validate_individual_files(rootdir, exceptions, verbose,
                              check_syntax=False, check_niceness=False,
                              voting=True):
    """Validate list of modified files."""
 def validate_individual_files(rootdir, exceptions, force=False, niceness=False, voting=True):
    schema = get_schema()
    any_failures = False
-    if force:
+    no_validated = 0
-        print("\nValidating all files")
+
-    else:
+    # Do not select delete files, just Added, Copied, Modified, Renamed,
-        modified_files = get_modified_files(rootdir)
+    # or Type changed
-        print("\nFollowing files will be validated:")
+    modified_files = get_modified_files(rootdir, "--diff-filter=ACMRT")
-        for f in modified_files:
+
-            print(">>> %s" % f)
+    modified_files = filter(is_xml, modified_files)
-        modified_files = map(lambda x: os.path.abspath(x), modified_files)
+    print("\nValidating files...")
    modified_files = map(lambda x: os.path.abspath(x), modified_files)
    for f in modified_files:
        if os.path.basename(f) in exceptions:
            continue
        any_failures = validate_one_file(
            schema, rootdir, f, verbose, any_failures,
            check_syntax, check_niceness)
        no_validated = no_validated + 1
    if voting and any_failures:
        print("Validation failed, validated %d files.\n" % no_validated)
        sys.exit(1)
    print("Validation passed, validated %d files.\n" % no_validated)
 def validate_all_files(rootdir, exceptions, verbose,
                       check_syntax, check_niceness=False, voting=True):
    """Validate all xml files."""
    schema = get_schema()
    any_failures = False
    no_validated = 0
    print("\nValidating all files...")
    for root, dirs, files in os.walk(rootdir):
        # Don't descend into 'target' subdirectories
@ -258,37 +342,28 @@ def validate_individual_files(rootdir, exceptions, force=False, niceness=False,
        for f in files:
            # Ignore maven files, which are called pom.xml
            if (f.endswith('.xml') and
-                f != 'pom.xml' and
+                    f != 'pom.xml' and
-                f not in exceptions):
+                    f not in exceptions):
-                try:
+                path = os.path.abspath(os.path.join(root, f))
-                    path = os.path.abspath(os.path.join(root, f))
+                any_failures = validate_one_file(
-                    if not force and path not in modified_files:
+                    schema, rootdir, path, verbose, any_failures,
-                        continue
+                    check_syntax, check_niceness)
-                    doc = etree.parse(path)
+                no_validated = no_validated + 1
                    if validation_failed(schema, doc):
                        any_failures = True
                        print(error_message(schema.error_log))
                    verify_section_tags_have_xmid(doc)
                    if niceness:
                        verify_nice_usage_of_whitespaces(os.path.join(root, f))
                except etree.XMLSyntaxError as e:
                    any_failures = True
                    print("%s: %s" % (path, e))
                except ValueError as e:
                    any_failures = True
                    print("%s: %s" % (path, e))
    if voting and any_failures:
        print("Validation failed, validated %d files.\n" % no_validated)
        sys.exit(1)
-    print("Validation passed.\n")
+    print("Validation passed, validated %d files.\n" % no_validated)
 def logging_build_book(result):
    """Callback for book building"""
    RESULTS_OF_BUILDS.append(result)
-def build_book(rootdir, book):
+def build_book(book):
    """Build a single book"""
    os.chdir(book)
    result = True
    returncode = 0
@ -305,7 +380,8 @@ def build_book(rootdir, book):
    return (os.path.basename(book), result, output, returncode)
-def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False, voting=True):
+def build_affected_books(rootdir, book_exceptions, file_exceptions,
                         force=False, voting=True):
    """Build all the books which are affected by modified files.
    Looks for all directories with "pom.xml" and checks if a
@ -338,16 +414,17 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
        for f in files:
            if (f.endswith('.xml') and
-                f != 'pom.xml' and
+                    f != 'pom.xml' and
-                f not in file_exceptions):
+                    f not in file_exceptions):
                path = os.path.abspath(os.path.join(root, f))
                doc = etree.parse(path)
                # Check for inclusion of files as part of imagedata
-                for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'):
+                for node in doc.findall(
                        '//{http://docbook.org/ns/docbook}imagedata'):
                    href = node.get('fileref')
                    if (f not in file_exceptions and
-                        os.path.abspath(href) in modified_files):
+                            os.path.abspath(href) in modified_files):
                        affected_books.append(book_root)
                        break
@ -359,7 +436,7 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
                for node in doc.xpath('//xi:include', namespaces=ns):
                    href = node.get('href')
                    if (f not in file_exceptions and
-                        os.path.abspath(href) in modified_files):
+                            os.path.abspath(href) in modified_files):
                        affected_books.append(book_root)
                        break
            if book_root in affected_books:
@ -380,7 +457,8 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
    print("Queuing the following books for building:")
    for book in books:
        print("  %s" % os.path.basename(book))
-        pool.apply_async(build_book, (rootdir, book), callback = logging_build_book)
+        pool.apply_async(build_book, book,
                         callback=logging_build_book)
    pool.close()
    print("Building all books now...")
    pool.join()
@ -391,7 +469,8 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
            print(">>> Build of book %s succeeded." % book)
        else:
            any_failures = True
-            print(">>> Build of book %s failed (returncode = %d)." % (book, returncode))
+            print(">>> Build of book %s failed (returncode = %d)."
                  % (book, returncode))
            print("\n%s" % output)
    if voting and any_failures:
@ -404,14 +483,29 @@ def main(args):
        print("Only files in www directory changed, nothing to do.")
        return
-    if args.check_syntax:
+    if args.job_build:
-        validate_individual_files(args.path, FILE_EXCEPTIONS, args.force, args.with_niceness, args.non_voting)
+        args.check_delete = True
        args.check_syntax = True
        args.check_build = True
    if args.job_niceness:
        args.check_niceness = True
    if args.check_delete:
-        check_deleted_files(args.path, FILE_EXCEPTIONS)
+        check_deleted_files(args.path, FILE_EXCEPTIONS, args.verbose)
    if args.check_syntax or args.check_niceness:
        if args.force:
            validate_all_files(args.path, FILE_EXCEPTIONS, args.verbose,
                               args.check_niceness, args.non_voting)
        else:
            validate_individual_files(args.path, FILE_EXCEPTIONS,
                                      args.verbose, args.check_syntax,
                                      args.check_niceness, args.non_voting)
    if args.check_build:
-        build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, args.force, args.non_voting)
+        build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS,
                             args.force, args.non_voting)
 def default_root():
@ -422,7 +516,7 @@ def default_root():
    try:
        args = ["git", "rev-parse", "--show-toplevel"]
        gitroot = check_output(args).rstrip()
-    except (CalledProcessError, OSError) as e:
+    except (subprocess.CalledProcessError, OSError) as e:
        print("git failed: %s" % e)
        sys.exit(1)
@ -434,18 +528,26 @@ if __name__ == '__main__':
    parser.add_argument('path', nargs='?', default=default_root(),
                        help="Root directory that contains DocBook files, "
                        "defaults to `git rev-parse --show-toplevel`/doc")
-    parser.add_argument("--force", help="force the validation of all files "
+    parser.add_argument("--force", help="Force the validation of all files "
                        "and build all books", action="store_true")
-    parser.add_argument("--check-build", help="try to build books using "
+    parser.add_argument("--check-build", help="Try to build books using "
                        "modified files", action="store_true")
-    parser.add_argument("--check-syntax", help="check the syntax of modified "
+    parser.add_argument("--check-syntax", help="Check the syntax of modified "
                        "files", action="store_true")
-    parser.add_argument("--check-delete", help="check that deleted files "
+    parser.add_argument("--check-delete", help="Check that deleted files "
-                        "are not used", action="store_true")
+                        "are not used.", action="store_true")
-    parser.add_argument("--with-niceness", help="when checking the syntax "
+    parser.add_argument("--check-niceness", help="Check the niceness of "
-                        "also check the niceness of the syntax",
+                        "files, for example whitespace.",
                        action="store_true")
-    parser.add_argument("--non-voting", help="do not exit on failures",
+    parser.add_argument("--non-voting", help="Do not exit on failures",
                        action="store_false")
    parser.add_argument("--verbose", help="Verbose execution",
                        action="store_true")
    parser.add_argument("--job-niceness", help="Override values "
                        "for running as niceness gate-job",
                        action="store_true")
    parser.add_argument("--job-build", help="Override values "
                        "for running as build gate-job",
                        action="store_true")
    args = parser.parse_args()
    main(args)
--- a/validate.py
+++ b/validate.py
@ -29,13 +29,17 @@ import sys
 import urllib2
 # These are files that are known to not be in DocBook format
-FILE_EXCEPTIONS = ['st-training-guides.xml', 'ha-guide-docinfo.xml', 'bk001-ch003-associate-general.xml', 'basic-install-pom.xml']
+FILE_EXCEPTIONS = ['st-training-guides.xml',
                   'ha-guide-docinfo.xml',
                   'bk001-ch003-associate-general.xml',
                   'basic-install-pom.xml']
 # These are books that we aren't checking yet
 BOOK_EXCEPTIONS = []
 RESULTS_OF_BUILDS = []
 # NOTE(berendt): check_output as provided in Python 2.7.5 to make script
 #                usable with Python < 2.7
 def check_output(*popenargs, **kwargs):
@ -108,8 +112,10 @@ def verify_nice_usage_of_whitespaces(rootdir, docfile, found_extra_whitespace):
    ]
    for element in elements:
-        checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n" % element)),
+        checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n"
-        checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n" % element))
+                                 % element)),
        checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n"
                                 % element))
    lc = 0
    affected_lines = []
@ -127,6 +133,7 @@ def verify_nice_usage_of_whitespaces(rootdir, docfile, found_extra_whitespace):
                                   ", ".join(affected_lines)))
    return found_extra_whitespace
 def error_message(error_log):
    """Return a string that contains the error message.
@ -138,12 +145,15 @@ def error_message(error_log):
    errs.reverse()
    return "\n".join(errs)
 # Check whether only files in www got updated
 def only_www_touched():
    """Check whether only files in www directory are touched"""
    try:
        args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
        modified_files = check_output(args).strip().split()
-    except (CalledProcessError, OSError) as e:
+    except (subprocess.CalledProcessError, OSError) as e:
        print("git failed: %s" % e)
        sys.exit(1)
@ -157,38 +167,41 @@ def only_www_touched():
    return www_changed and not other_changed
-def get_modified_files(rootdir, filter=None):
+
 def get_modified_files(rootdir, filtering=None):
    """Get modified files below doc directory"""
    # There are several tree traversals in this program that do a
-    # chroot, we need to run this git command always from the rootdir,
+    # chdir, we need to run this git command always from the rootdir,
    # so assure that.
    os.chdir(rootdir)
    try:
        args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"]
-        if filter != None:
+        if filtering is not None:
-            args.append(filter)
+            args.append(filtering)
        modified_files = check_output(args).strip().split()
-    except (CalledProcessError, OSError) as e:
+    except (subprocess.CalledProcessError, OSError) as e:
        print("git failed: %s" % e)
        sys.exit(1)
    return modified_files
-def check_deleted_files(rootdir, file_exceptions):
+def check_deleted_files(rootdir, file_exceptions, verbose):
-    """ Check whether files got deleted and verify that no other file references them.
+    """ Check whether files got deleted and verify that no other file
    references them.
    """
-    print("\nChecking for removed files")
+    print("\nChecking that no removed files are referenced...")
    deleted_files = get_modified_files(rootdir, "--diff-filter=D")
    if not deleted_files:
        print("No files were removed.")
        return
-    print(" Removed files:")
+    if verbose:
-    for f in deleted_files:
+        print(" Removed files:")
-        print ("   %s" % f)
+        for f in deleted_files:
- 
+            print ("   %s" % f)
    deleted_files = map(lambda x: os.path.abspath(x), deleted_files)
    # Figure out whether files were included anywhere
@ -206,17 +219,19 @@ def check_deleted_files(rootdir, file_exceptions):
        for f in files:
            if (f.endswith('.xml') and
-                f != 'pom.xml' and
+                    f != 'pom.xml' and
-                f not in file_exceptions):
+                    f not in file_exceptions):
                path = os.path.abspath(os.path.join(root, f))
                doc = etree.parse(path)
                # Check for inclusion of files as part of imagedata
-                for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'):
+                for node in doc.findall(
                        '//{http://docbook.org/ns/docbook}imagedata'):
                    href = node.get('fileref')
                    if (f not in file_exceptions and
-                        os.path.abspath(href) in deleted_files):
+                            os.path.abspath(href) in deleted_files):
-                        print("  File %s has an imagedata href for deleted file %s " % (f, href))
+                        print("  File %s has imagedata href for deleted "
                              "file %s" % (f, href))
                        missing_reference = True
                        break
@ -229,27 +244,86 @@ def check_deleted_files(rootdir, file_exceptions):
                for node in doc.xpath('//xi:include', namespaces=ns):
                    href = node.get('href')
                    if (os.path.abspath(href) in deleted_files):
-                        print("  File %s has an xi:include on deleted file %s " % (f, href))
+                        print("  File %s has an xi:include on deleted file %s "
                              % (f, href))
                        missing_reference = True
    if missing_reference:
        print("Failed removed file check, %d files were removed."
              % len(deleted_files))
        sys.exit(1)
-    print("Passed removed file check.")
+    print("Passed removed file check, %d files were removed."
          % len(deleted_files))
-def validate_individual_files(rootdir, exceptions, force):
+def validate_one_file(schema, rootdir, path, verbose,
                      any_failures, found_extra_whitespace):
    """Validate a single file"""
    # We pass schema in as a way of caching it, generating it is expensive
    if verbose:
        print(" Validating %s" % path)
    try:
        doc = etree.parse(path)
        if validation_failed(schema, doc):
            any_failures = True
            print(error_message(schema.error_log))
        verify_section_tags_have_xmid(doc)
        found_extra_whitespace = verify_nice_usage_of_whitespaces(
            rootdir, path, found_extra_whitespace)
    except etree.XMLSyntaxError as e:
        any_failures = True
        print("%s: %s" % (path, e))
    except ValueError as e:
        any_failures = True
        print("%s: %s" % (path, e))
    return any_failures, found_extra_whitespace
 def is_xml(filename):
    """Returns true if file ends with .xml and is not a pom.xml file"""
    return filename.endswith('.xml') and not filename.endswith('/pom.xml')
 def validate_individual_files(rootdir, exceptions, verbose):
    """Validate list of modified files."""
    schema = get_schema()
-    found_extra_whitespace = False
+    extra_whitespace = False
    any_failures = False
-    if force:
+    no_validated = 0
-        print("\nValidating all files")
+
-    else:
+    # Do not select delete files, just Added, Copied, Modified, Renamed,
-        modified_files = get_modified_files(rootdir)
+    # or Type changed
-        print("\nFollowing files will be validated:")
+    modified_files = get_modified_files(rootdir, "--diff-filter=ACMRT")
-        for f in modified_files:
+
-            print(">>> %s" % f)
+    modified_files = filter(is_xml, modified_files)
-        modified_files = map(lambda x: os.path.abspath(x), modified_files)
+    print("\nValidating files...")
    modified_files = map(lambda x: os.path.abspath(x), modified_files)
    for f in modified_files:
        if os.path.basename(f) in exceptions:
            continue
        any_failures, extra_whitespace = validate_one_file(
            schema, rootdir, f, verbose, any_failures, extra_whitespace)
        no_validated = no_validated + 1
    if any_failures:
        sys.exit(1)
    print("Validation passed, validated %d files.\n" % no_validated)
 def validate_all_files(rootdir, exceptions, verbose):
    """Validate all xml files."""
    schema = get_schema()
    extra_whitespace = False
    any_failures = False
    no_validated = 0
    print("\nValidating all files")
    for root, dirs, files in os.walk(rootdir):
        # Don't descend into 'target' subdirectories
@ -262,34 +336,27 @@ def validate_individual_files(rootdir, exceptions, force):
        for f in files:
            # Ignore maven files, which are called pom.xml
            if (f.endswith('.xml') and
-                f != 'pom.xml' and
+                    f != 'pom.xml' and
-                f not in exceptions):
+                    f not in exceptions):
-                try:
+                path = os.path.abspath(os.path.join(root, f))
-                    path = os.path.abspath(os.path.join(root, f))
+                any_failures, extra_whitespace = validate_one_file(
-                    if not force and path not in modified_files:
+                    schema, rootdir, path, verbose, any_failures,
-                        continue
+                    extra_whitespace)
-                    doc = etree.parse(path)
+                no_validated = no_validated + 1
                    if validation_failed(schema, doc):
                        any_failures = True
                        print(error_message(schema.error_log))
                    verify_section_tags_have_xmid(doc)
                    found_extra_whitespace = verify_nice_usage_of_whitespaces(rootdir, path, found_extra_whitespace)
                except etree.XMLSyntaxError as e:
                    any_failures = True
                    print("%s: %s" % (path, e))
                except ValueError as e:
                    any_failures = True
                    print("%s: %s" % (path, e))
    if any_failures:
        sys.exit(1)
-    print("Validation passed.\n")
+    print("Validation passed, validated %d files.\n" % no_validated)
 def logging_build_book(result):
    """Callback for book building"""
    RESULTS_OF_BUILDS.append(result)
-def build_book(rootdir, book):
+def build_book(book):
    """Build a single book"""
    os.chdir(book)
    result = True
    returncode = 0
@ -339,16 +406,17 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
        for f in files:
            if (f.endswith('.xml') and
-                f != 'pom.xml' and
+                    f != 'pom.xml' and
-                f not in file_exceptions):
+                    f not in file_exceptions):
                path = os.path.abspath(os.path.join(root, f))
                doc = etree.parse(path)
                # Check for inclusion of files as part of imagedata
-                for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'):
+                for node in doc.findall(
                        '//{http://docbook.org/ns/docbook}imagedata'):
                    href = node.get('fileref')
                    if (f not in file_exceptions and
-                        os.path.abspath(href) in modified_files):
+                            os.path.abspath(href) in modified_files):
                        affected_books.append(book_root)
                        break
@ -360,7 +428,7 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
                for node in doc.xpath('//xi:include', namespaces=ns):
                    href = node.get('href')
                    if (f not in file_exceptions and
-                        os.path.abspath(href) in modified_files):
+                            os.path.abspath(href) in modified_files):
                        affected_books.append(book_root)
                        break
            if book_root in affected_books:
@ -381,7 +449,8 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
    print("Queuing the following books for building:")
    for book in books:
        print("  %s" % os.path.basename(book))
-        pool.apply_async(build_book, (rootdir, book), callback = logging_build_book)
+        pool.apply_async(build_book, book,
                         callback=logging_build_book)
    pool.close()
    print("Building all queued books now...")
    pool.join()
@ -392,14 +461,15 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
            print(">>> Build of book %s succeeded." % book)
        else:
            any_failures = True
-            print(">>> Build of book %s failed (returncode = %d)." % (book, returncode))
+            print(">>> Build of book %s failed (returncode = %d)."
                  % (book, returncode))
            print("\n%s" % output)
    if any_failures:
        sys.exit(1)
-def main(rootdir, force):
+def main(rootdir, force, verbose):
    if force:
        print("Validation of all files and build of all books will be forced.")
@ -407,8 +477,12 @@ def main(rootdir, force):
        print("Only files in www directory changed, no validation done.")
        return
-    check_deleted_files(rootdir, FILE_EXCEPTIONS)
+    check_deleted_files(rootdir, FILE_EXCEPTIONS, verbose)
-    validate_individual_files(rootdir, FILE_EXCEPTIONS, force)
+    if force:
        validate_all_files(rootdir, FILE_EXCEPTIONS, verbose)
    else:
        validate_individual_files(rootdir, FILE_EXCEPTIONS, verbose)
    build_affected_books(rootdir, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, force)
@ -420,7 +494,7 @@ def default_root():
    try:
        args = ["git", "rev-parse", "--show-toplevel"]
        gitroot = check_output(args).rstrip()
-    except (CalledProcessError, OSError) as e:
+    except (subprocess.CalledProcessError, OSError) as e:
        print("git failed: %s" % e)
        sys.exit(1)
@ -432,7 +506,9 @@ if __name__ == '__main__':
    parser.add_argument('path', nargs='?', default=default_root(),
                        help="Root directory that contains DocBook files, "
                        "defaults to `git rev-parse --show-toplevel`/doc/")
-    parser.add_argument("--force", help="force the validation of all files "
+    parser.add_argument("--force", help="Force the validation of all files "
                        "and build all books", action="store_true")
    parser.add_argument("--verbose", help="Verbose execution",
                        action="store_true")
    args = parser.parse_args()
-    main(args.path, args.force)
+    main(args.path, args.force, args.verbose)