Merge "added parameters to control the behaviour"

2013-09-04 15:18:48 +00:00 · 2013-09-04 15:18:48 +00:00 · 7a42fcd01a
commit 7a42fcd01a
parent a2c5930630 03e400ba70
1 changed files with 322 additions and 0 deletions
--- a/tools/test.py
+++ b/tools/test.py
@ -0,0 +1,322 @@
+#!/usr/bin/env python
+'''
+
+Usage:
+    test.py [path]
+
+Validates all xml files against the DocBook 5 RELAX NG schema, and
+attempts to build all books.
+
+Options:
+    path     Root directory, defaults to <repo root>/doc/src/doc/docbkx
+
+Ignores pom.xml files and subdirectories named "target".
+
+Requires:
+    - Python 2.7 or greater (for argparse)
+    - lxml Python library
+    - Maven
+
+'''
+from lxml import etree
+
+import argparse
+import multiprocessing
+import os
+import re
+import subprocess
+import sys
+import urllib2
+
+# These are files that are known to not be in DocBook format
+FILE_EXCEPTIONS = ['ha-guide-docinfo.xml','bk001-ch003-associate-general.xml']
+
+# These are books that we aren't checking yet
+BOOK_EXCEPTIONS = []
+
+RESULTS_OF_BUILDS = []
+
+# NOTE(berendt): check_output as provided in Python 2.7.5 to make script
+#                usable with Python < 2.7
+def check_output(*popenargs, **kwargs):
+    """Run command with arguments and return its output as a byte string.
+
+    If the exit code was non-zero it raises a CalledProcessError.  The
+    CalledProcessError object will have the return code in the returncode
+    attribute and output in the output attribute.
+    """
+    if 'stdout' in kwargs:
+        raise ValueError('stdout argument not allowed, it will be overridden.')
+    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+    output, unused_err = process.communicate()
+    retcode = process.poll()
+    if retcode:
+        cmd = kwargs.get("args")
+        if cmd is None:
+            cmd = popenargs[0]
+        raise subprocess.CalledProcessError(retcode, cmd, output=output)
+    return output
+
+
+def get_schema():
+    """Return the DocBook RELAX NG schema"""
+    url = "http://www.oasis-open.org/docbook/xml/5.0b5/rng/docbookxi.rng"
+    relaxng_doc = etree.parse(urllib2.urlopen(url))
+    return etree.RelaxNG(relaxng_doc)
+
+
+def validation_failed(schema, doc):
+    """Return True if the parsed doc fails against the schema
+
+    This will ignore validation failures of the type: IDREF attribute linkend
+    references an unknown ID. This is because we are validating individual
+    files that are being imported, and sometimes the reference isn't present
+    in the current file."""
+    return not schema.validate(doc) and \
+        any(log.type_name != "DTD_UNKNOWN_ID" for log in schema.error_log)
+
+
+def verify_section_tags_have_xmid(doc):
+    """Check that all section tags have an xml:id attribute
+
+    Will throw an exception if there's at least one missing"""
+    ns = {"docbook": "http://docbook.org/ns/docbook"}
+    for node in doc.xpath('//docbook:section', namespaces=ns):
+        if "{http://www.w3.org/XML/1998/namespace}id" not in node.attrib:
+            raise ValueError("section missing xml:id attribute, line %d" %
+                            node.sourceline)
+
+
+def verify_nice_usage_of_whitespaces(docfile):
+    """Check that no unnecessary whitespaces are used"""
+    checks = [
+        re.compile(".*\s+\n$"),
+    ]
+
+    elements = [
+        'listitem',
+        'para',
+        'td',
+        'th',
+        'command',
+        'literal',
+        'title',
+        'caption',
+        'filename',
+        'userinput',
+        'programlisting'
+    ]
+
+    for element in elements:
+        checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n" % element)),
+        checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n" % element))
+
+    lc = 0
+    affected_lines = []
+    for line in open(docfile, 'r'):
+        lc = lc + 1
+        for check in checks:
+            if check.match(line) and lc not in affected_lines:
+                affected_lines.append(str(lc))
+
+    if len(affected_lines) > 0:
+        raise ValueError("trailing or unnecessary whitespaces "
+            "in following lines: %s" % ", ".join(affected_lines))
+
+
+def error_message(error_log):
+    """Return a string that contains the error message.
+
+    We use this to filter out false positives related to IDREF attributes
+    """
+    errs = [str(x) for x in error_log if x.type_name != 'DTD_UNKNOWN_ID']
+
+    # Reverse output so that earliest failures are reported first
+    errs.reverse()
+    return "\n".join(errs)
+
+
+def get_modified_files():
+    try:
+        args = ["git", "diff", "--name-only", "--relative", "HEAD", "HEAD~1"]
+        modified_files = check_output(args).strip().split()
+    except (subprocess.CalledProcessError, OSError) as e:
+        print("git failed: %s" % e)
+        sys.exit(1)
+    return modified_files
+
+
+def validate_individual_files(rootdir, exceptions, force=False, niceness=False, voting=True):
+    schema = get_schema()
+
+    any_failures = False
+    modified_files = get_modified_files()
+    print("\nFollowing files will be validated:")
+    for f in modified_files:
+        print(">>> %s" % f)
+    print("")
+    modified_files = map(lambda x: os.path.abspath(x), modified_files)
+
+    for root, dirs, files in os.walk(rootdir):
+        # Don't descend into 'target' subdirectories
+        try:
+            ind = dirs.index('target')
+            del dirs[ind]
+        except ValueError:
+            pass
+
+        for f in files:
+            # Ignore maven files, which are called pom.xml
+            if (f.endswith('.xml') and
+                f != 'pom.xml' and
+                f not in exceptions):
+                try:
+                    path = os.path.abspath(os.path.join(root, f))
+                    if not force and path not in modified_files:
+                        continue
+                    doc = etree.parse(path)
+                    if validation_failed(schema, doc):
+                        any_failures = True
+                        print(error_message(schema.error_log))
+                    verify_section_tags_have_xmid(doc)
+                    if niceness:
+                        verify_nice_usage_of_whitespaces(os.path.join(root, f))
+                except etree.XMLSyntaxError as e:
+                    any_failures = True
+                    print("%s: %s" % (path, e))
+                except ValueError as e:
+                    any_failures = True
+                    print("%s: %s" % (path, e))
+
+    if voting and any_failures:
+        sys.exit(1)
+
+
+def logging_build_book(result):
+    RESULTS_OF_BUILDS.append(result)
+
+
+def build_book(rootdir, book):
+    os.chdir(book)
+    result = True
+    returncode = 0
+    try:
+        output = subprocess.check_output(
+            ["mvn", "clean", "generate-sources"],
+            stderr=subprocess.STDOUT
+        )
+    except subprocess.CalledProcessError as e:
+        output = e.output
+        returncode = e.returncode
+        result = False
+
+    return (os.path.basename(book), result, output, returncode)
+
+
+def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False, voting=True):
+    """Build all the books which are affected by modified files.
+
+    Looks for all directories with "pom.xml" and checks if a
+    XML file in the directory includes a modified file. If at least
+    one XML file includes a modified file the method calls
+    "mvn clean generate-sources" in that directory.
+
+    This will throw an exception if a book fails to build
+    """
+    modified_files = get_modified_files()
+    modified_files = map(lambda x: os.path.abspath(x), modified_files)
+    affected_books = []
+    books = []
+    book_root = rootdir
+    for root, dirs, files in os.walk(rootdir):
+        if os.path.basename(root) in book_exceptions:
+            break
+        elif "pom.xml" in files:
+            books.append(root)
+            book_root = root
+
+        os.chdir(root)
+
+        for f in files:
+            if (f.endswith('.xml') and
+                f != 'pom.xml' and
+                f not in file_exceptions):
+                path = os.path.abspath(os.path.join(root, f))
+                doc = etree.parse(path)
+                ns = {"xi": "http://www.w3.org/2001/XInclude"}
+                for node in doc.xpath('//xi:include', namespaces=ns):
+                    href = node.get('href')
+                    if (href.endswith('.xml') and
+                        f not in file_exceptions and
+                        os.path.abspath(href) in modified_files):
+                        affected_books.append(book_root)
+                        break
+            if book_root in affected_books:
+                break
+
+    if not force and affected_books:
+        books = affected_books
+    else:
+        print("No books are affected by modified files. Building all books.")
+
+    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
+    for book in books:
+        pool.apply_async(build_book, (rootdir, book), callback = logging_build_book)
+    pool.close()
+    pool.join()
+
+    any_failures = False
+    for book, result, output, returncode in RESULTS_OF_BUILDS:
+        if result:
+            print(">>> Build of book %s succeeded." % book)
+        else:
+            any_failures = True
+            print(">>> Build of book %s failed (returncode = %d)." % (book, returncode))
+            print("\n%s" % output)
+
+    if voting and any_failures:
+        sys.exit(1)
+
+
+def main(args):
+    if args.check_syntax:
+        validate_individual_files(args.path, FILE_EXCEPTIONS, args.force, args.with_niceness, args.non_voting)
+
+    if args.check_build:
+        build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, args.force, args.non_voting)
+
+
+def default_root():
+    """Return the location of openstack-manuals/doc/src/docbkx
+
+    The current working directory must be inside of the openstack-manuals
+    repository for this method to succeed"""
+    try:
+        args = ["git", "rev-parse", "--show-toplevel"]
+        gitroot = check_output(args).rstrip()
+    except (CalledProcessError, OSError) as e:
+        print("git failed: %s" % e)
+        sys.exit(1)
+
+    return os.path.join(gitroot, "doc/src/docbkx")
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Validate XML files against "
+                                     "the DocBook 5 RELAX NG schema")
+    parser.add_argument('path', nargs='?', default=default_root(),
+                        help="Root directory that contains DocBook files, "
+                        "defaults to `git rev-parse --show-toplevel`/doc/src/"
+                        "docbkx")
+    parser.add_argument("--force", help="force the validation of all files "
+                        "and build all books", action="store_true")
+    parser.add_argument("--check-build", help="try to build books using "
+                        "modified files", action="store_true")
+    parser.add_argument("--check-syntax", help="check the syntax of modified "
+                        "files", action="store_true")
+    parser.add_argument("--with-niceness", help="when checking the syntax "
+                        "also check the niceness of the syntax",
+                        action="store_true")
+    parser.add_argument("--non-voting", help="do not exit on failures",
+                        action="store_false")
+    args = parser.parse_args()
+    main(args)