From fe34b9bcb61fe44e1fe983aef5d1dd06a74104a7 Mon Sep 17 00:00:00 2001
From: Andreas Jaeger <aj@suse.de>
Date: Mon, 7 Oct 2013 23:12:30 +0200
Subject: [PATCH] Remove tools/validate.py

The gates are using tools/test.py now and thus we can
delete tools/validate.py.

Change-Id: I38969ac13b78ec7429b73c32bfc9a159fabcd556
---
 validate.py | 642 ----------------------------------------------------
 1 file changed, 642 deletions(-)
 delete mode 100755 validate.py
diff --git a/validate.py b/validate.py
deleted file mode 100755
index 2bd39bfe..00000000
--- a/validate.py
+++ /dev/null
@@ -1,642 +0,0 @@
-#!/usr/bin/env python
-'''
-
-Usage:
-    validate.py [path]
-
-Validates all xml files against the DocBook 5 RELAX NG schema, and
-attempts to build all books.
-
-Options:
-    path     Root directory, defaults to <repo root>/doc
-
-Ignores pom.xml files and subdirectories named "target".
-
-Requires:
-    - Python 2.7 or greater (for argparse)
-    - lxml Python library
-    - Maven
-
-'''
-from lxml import etree
-
-import argparse
-import multiprocessing
-import os
-import re
-import shutil
-import subprocess
-import sys
-import urllib2
-
-# These are files that are known to not be in DocBook format
-FILE_EXCEPTIONS = ['st-training-guides.xml',
-                   'ha-guide-docinfo.xml',
-                   'basic-install-pom.xml']
-
-# These are books that we aren't checking yet
-BOOK_EXCEPTIONS = []
-
-RESULTS_OF_BUILDS = []
-
-
-# NOTE(berendt): check_output as provided in Python 2.7.5 to make script
-#                usable with Python < 2.7
-def check_output(*popenargs, **kwargs):
-    """Run command with arguments and return its output as a byte string.
-
-    If the exit code was non-zero it raises a CalledProcessError.  The
-    CalledProcessError object will have the return code in the returncode
-    attribute and output in the output attribute.
-    """
-    if 'stdout' in kwargs:
-        raise ValueError('stdout argument not allowed, it will be overridden.')
-    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
-    output, unused_err = process.communicate()
-    retcode = process.poll()
-    if retcode:
-        cmd = kwargs.get("args")
-        if cmd is None:
-            cmd = popenargs[0]
-        raise subprocess.CalledProcessError(retcode, cmd, output=output)
-    return output
-
-
-def get_schema():
-    """Return the DocBook RELAX NG schema"""
-    url = "http://docbook.org/xml/5.1CR1/rng/docbookxi.rng"
-    relaxng_doc = etree.parse(urllib2.urlopen(url))
-    return etree.RelaxNG(relaxng_doc)
-
-
-def validation_failed(schema, doc):
-    """Return True if the parsed doc fails against the schema
-
-    This will ignore validation failures of the type: IDREF attribute linkend
-    references an unknown ID. This is because we are validating individual
-    files that are being imported, and sometimes the reference isn't present
-    in the current file."""
-    return not schema.validate(doc) and \
-        any(log.type_name != "DTD_UNKNOWN_ID" for log in schema.error_log)
-
-
-def verify_section_tags_have_xmid(doc):
-    """Check that all section tags have an xml:id attribute
-
-    Will throw an exception if there's at least one missing"""
-    ns = {"docbook": "http://docbook.org/ns/docbook"}
-    for node in doc.xpath('//docbook:section', namespaces=ns):
-        if "{http://www.w3.org/XML/1998/namespace}id" not in node.attrib:
-            raise ValueError("section missing xml:id attribute, line %d" %
-                             node.sourceline)
-
-
-def verify_nice_usage_of_whitespaces(rootdir, docfile, found_extra_whitespace):
-    """Check that no unnecessary whitespaces are used"""
-    checks = [
-        re.compile(".*\s+\n$"),
-    ]
-
-    elements = [
-        'listitem',
-        'para',
-        'td',
-        'th',
-        'command',
-        'literal',
-        'title',
-        'caption',
-        'filename',
-        'userinput',
-        'programlisting'
-    ]
-
-    for element in elements:
-        checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n"
-                                 % element)),
-        checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n"
-                                 % element))
-
-    lc = 0
-    affected_lines = []
-    for line in open(docfile, 'r'):
-        lc = lc + 1
-        for check in checks:
-            if check.match(line) and lc not in affected_lines:
-                affected_lines.append(str(lc))
-
-    if len(affected_lines) > 0:
-        if not found_extra_whitespace:
-            print("  Trailing or unnessary whitespaces found:")
-        found_extra_whitespace = True
-        print("   %s lines: %s" % (os.path.relpath(docfile, rootdir),
-                                   ", ".join(affected_lines)))
-    return found_extra_whitespace
-
-
-def error_message(error_log):
-    """Return a string that contains the error message.
-
-    We use this to filter out false positives related to IDREF attributes
-    """
-    errs = [str(x) for x in error_log if x.type_name != 'DTD_UNKNOWN_ID']
-
-    # Reverse output so that earliest failures are reported first
-    errs.reverse()
-    return "\n".join(errs)
-
-
-# Check whether only files in www got updated
-def only_www_touched():
-    """Check whether only files in www directory are touched"""
-
-    try:
-        args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
-        modified_files = check_output(args).strip().split()
-    except (subprocess.CalledProcessError, OSError) as e:
-        print("git failed: %s" % e)
-        sys.exit(1)
-
-    www_changed = False
-    other_changed = False
-    for f in modified_files:
-        if f.startswith("www/"):
-            www_changed = True
-        else:
-            other_changed = True
-
-    return www_changed and not other_changed
-
-
-def ha_guide_touched():
-    """Check whether files in high-availability-guide directory are touched"""
-
-    try:
-        args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
-        modified_files = check_output(args).strip().split()
-    except (subprocess.CalledProcessError, OSError) as e:
-        print("git failed: %s" % e)
-        sys.exit(1)
-
-    ha_changed = False
-    for f in modified_files:
-        if f.startswith("doc/high-availability-guide/"):
-            ha_changed = True
-
-    return ha_changed
-
-
-def check_modified_affects_all(rootdir, verbose):
-    """Check whether special files were modified.
-
-    There are some special files where we should rebuild all books
-    if either of these is touched.
-    """
-
-    os.chdir(rootdir)
-
-    try:
-        args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
-        modified_files = check_output(args).strip().split()
-    except (subprocess.CalledProcessError, OSError) as e:
-        print("git failed: %s" % e)
-        sys.exit(1)
-
-    special_files = [
-        "tools/validate.py",
-        "tools/test.py",
-        "doc/pom.xml"
-    ]
-    for f in modified_files:
-        if f in special_files:
-            if verbose:
-                print("File %s modified, this affects all books." % f)
-            return True
-
-    return False
-
-
-def get_modified_files(rootdir, filtering=None):
-    """Get modified files below doc directory"""
-
-    # There are several tree traversals in this program that do a
-    # chdir, we need to run this git command always from the rootdir,
-    # so assure that.
-    os.chdir(rootdir)
-    try:
-        args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"]
-        if filtering is not None:
-            args.append(filtering)
-        modified_files = check_output(args).strip().split()
-    except (subprocess.CalledProcessError, OSError) as e:
-        print("git failed: %s" % e)
-        sys.exit(1)
-    return modified_files
-
-
-def check_deleted_files(rootdir, file_exceptions, verbose):
-    """ Check whether files got deleted and verify that no other file
-    references them.
-
-    """
-    print("\nChecking that no removed files are referenced...")
-    deleted_files = get_modified_files(rootdir, "--diff-filter=D")
-    if not deleted_files:
-        print("No files were removed.")
-        return
-
-    if verbose:
-        print(" Removed files:")
-        for f in deleted_files:
-            print ("   %s" % f)
-
-    deleted_files = map(lambda x: os.path.abspath(x), deleted_files)
-
-    # Figure out whether files were included anywhere
-    missing_reference = False
-
-    for root, dirs, files in os.walk(rootdir):
-        # Don't descend into 'target' subdirectories
-        try:
-            ind = dirs.index('target')
-            del dirs[ind]
-        except ValueError:
-            pass
-
-        os.chdir(root)
-
-        for f in files:
-            if (f.endswith('.xml') and
-                    f != 'pom.xml' and
-                    f not in file_exceptions):
-                path = os.path.abspath(os.path.join(root, f))
-                doc = etree.parse(path)
-
-                # Check for inclusion of files as part of imagedata
-                for node in doc.findall(
-                        '//{http://docbook.org/ns/docbook}imagedata'):
-                    href = node.get('fileref')
-                    if (f not in file_exceptions and
-                            os.path.abspath(href) in deleted_files):
-                        print("  File %s has imagedata href for deleted "
-                              "file %s" % (f, href))
-                        missing_reference = True
-
-                        break
-
-                if missing_reference:
-                    break
-
-                # Check for inclusion of files as part of xi:include
-                ns = {"xi": "http://www.w3.org/2001/XInclude"}
-                for node in doc.xpath('//xi:include', namespaces=ns):
-                    href = node.get('href')
-                    if (os.path.abspath(href) in deleted_files):
-                        print("  File %s has an xi:include on deleted file %s "
-                              % (f, href))
-                        missing_reference = True
-    if missing_reference:
-        print("Failed removed file check, %d files were removed."
-              % len(deleted_files))
-        sys.exit(1)
-
-    print("Passed removed file check, %d files were removed."
-          % len(deleted_files))
-
-
-def validate_one_file(schema, rootdir, path, verbose,
-                      any_failures, found_extra_whitespace):
-    """Validate a single file"""
-    # We pass schema in as a way of caching it, generating it is expensive
-
-    if verbose:
-        print(" Validating %s" % path)
-    try:
-        doc = etree.parse(path)
-        if validation_failed(schema, doc):
-            any_failures = True
-            print(error_message(schema.error_log))
-        verify_section_tags_have_xmid(doc)
-        found_extra_whitespace = verify_nice_usage_of_whitespaces(
-            rootdir, path, found_extra_whitespace)
-    except etree.XMLSyntaxError as e:
-        any_failures = True
-        print("%s: %s" % (path, e))
-    except ValueError as e:
-        any_failures = True
-        print("%s: %s" % (path, e))
-
-    return any_failures, found_extra_whitespace
-
-
-def is_xml(filename):
-    """Returns true if file ends with .xml and is not a pom.xml file"""
-
-    return filename.endswith('.xml') and not filename.endswith('/pom.xml')
-
-
-def validate_individual_files(rootdir, exceptions, verbose):
-    """Validate list of modified files."""
-
-    schema = get_schema()
-    extra_whitespace = False
-    any_failures = False
-    no_validated = 0
-
-    # Do not select delete files, just Added, Copied, Modified, Renamed,
-    # or Type changed
-    modified_files = get_modified_files(rootdir, "--diff-filter=ACMRT")
-
-    modified_files = filter(is_xml, modified_files)
-    print("\nValidating files...")
-    modified_files = map(lambda x: os.path.abspath(x), modified_files)
-
-    for f in modified_files:
-        base_f = os.path.basename(f)
-        if (base_f == "pom.xml" or
-                f in exceptions):
-            continue
-        any_failures, extra_whitespace = validate_one_file(
-            schema, rootdir, f, verbose, any_failures, extra_whitespace)
-        no_validated = no_validated + 1
-
-    if any_failures:
-        sys.exit(1)
-
-    print("Validation passed, validated %d files.\n" % no_validated)
-
-
-def validate_all_files(rootdir, exceptions, verbose):
-    """Validate all xml files."""
-
-    schema = get_schema()
-    extra_whitespace = False
-    any_failures = False
-    no_validated = 0
-    print("\nValidating all files")
-
-    for root, dirs, files in os.walk(rootdir):
-        # Don't descend into 'target' subdirectories
-        try:
-            ind = dirs.index('target')
-            del dirs[ind]
-        except ValueError:
-            pass
-
-        for f in files:
-            # Ignore maven files, which are called pom.xml
-            if (f.endswith('.xml') and
-                    f != 'pom.xml' and
-                    f not in exceptions):
-                path = os.path.abspath(os.path.join(root, f))
-                any_failures, extra_whitespace = validate_one_file(
-                    schema, rootdir, path, verbose, any_failures,
-                    extra_whitespace)
-                no_validated = no_validated + 1
-
-    if any_failures:
-        sys.exit(1)
-    print("Validation passed, validated %d files.\n" % no_validated)
-
-
-def logging_build_book(result):
-    """Callback for book building"""
-    RESULTS_OF_BUILDS.append(result)
-
-
-def build_book(book):
-    """Build book(s) in directory book"""
-
-    os.chdir(book)
-    result = True
-    returncode = 0
-    base_book = os.path.basename(book)
-    try:
-        shutil.rmtree(os.path.expanduser("~/.fop"),
-                      ignore_errors=True)
-        # Clean first and then build so that the output of all guides
-        # is available
-        output = subprocess.check_output(
-            ["mvn", "clean"],
-            stderr=subprocess.STDOUT
-        )
-        if base_book == "install-guide":
-            # Build Fedora
-            base_book = "install-guide (for Fedora)"
-            output = subprocess.check_output(
-                ["mvn", "generate-sources", "-B",
-                 "-Doperating.system=yum",
-                 "-Dprofile.os='centos;fedora;rhel'"],
-                stderr=subprocess.STDOUT
-            )
-            # Build openSUSE
-            base_book = "install-guide (for openSUSE)"
-            output = subprocess.check_output(
-                ["mvn", "generate-sources", "-B",
-                 "-Doperating.system=zypper", "-Dprofile.os=opensuse"],
-                stderr=subprocess.STDOUT
-            )
-            # Build Ubuntu
-            base_book = "install-guide (for Ubuntu)"
-            output = subprocess.check_output(
-                ["mvn", "generate-sources", "-B",
-                 "-Doperating.system=apt", "-Dprofile.os=ubuntu"],
-                stderr=subprocess.STDOUT
-            )
-            # Success
-            base_book = "install-guide (for Fedora, openSUSE, Ubuntu)"
-        elif base_book == "high-availability-guide":
-            output = subprocess.check_output(
-                ["../../tools/build-ha-guide.sh", ],
-                stderr=subprocess.STDOUT
-            )
-            output = subprocess.check_output(
-                ["mvn", "generate-sources", "-B"],
-                stderr=subprocess.STDOUT
-            )
-        else:
-            output = subprocess.check_output(
-                ["mvn", "generate-sources", "-B"],
-                stderr=subprocess.STDOUT
-            )
-    except subprocess.CalledProcessError as e:
-        output = e.output
-        returncode = e.returncode
-        result = False
-
-    return (base_book, result, output, returncode)
-
-
-def build_affected_books(rootdir, book_exceptions, file_exceptions, verbose,
-                         force):
-    """Build all the books which are affected by modified files.
-
-    Looks for all directories with "pom.xml" and checks if a
-    XML file in the directory includes a modified file. If at least
-    one XML file includes a modified file the method calls
-    "mvn clean generate-sources" in that directory.
-
-    This will throw an exception if a book fails to build
-    """
-    modified_files = get_modified_files(rootdir)
-    modified_files = map(lambda x: os.path.abspath(x), modified_files)
-    build_all_books = force or check_modified_affects_all(rootdir, verbose)
-    affected_books = []
-    books = []
-    book_root = rootdir
-    for root, dirs, files in os.walk(rootdir):
-        # Don't descend into 'target' subdirectories
-        try:
-            ind = dirs.index('target')
-            del dirs[ind]
-        except ValueError:
-            pass
-
-        if os.path.basename(root) in book_exceptions:
-            break
-        # Do not process files in doc itself
-        elif root.endswith('doc'):
-            continue
-        elif "pom.xml" in files:
-            books.append(root)
-            book_root = root
-
-        os.chdir(root)
-
-        # No need to check single books if we build all, we just
-        # collect list of books
-        if build_all_books:
-            continue
-
-        # ha-guide uses asciidoc which we do not track.
-        # Just check whether any file is touched in that directory
-        if root.endswith('doc/high-availability-guide'):
-            if ha_guide_touched():
-                affected_books.append(book_root)
-        # We can scan only for depth of one of inclusion
-        # therefore skip the common directory since there's no
-        # book build in it.
-        elif not root.endswith('doc/common'):
-            for f in files:
-                if (f.endswith('.xml') and
-                        f not in file_exceptions):
-                    path = os.path.abspath(os.path.join(root, f))
-
-                    # If the file itself is modified, build the book.
-                    # Note this is an optimization in most cases but
-                    # needed for bk-*.xml since those are included by
-                    # pom.xml and pom.xml is not checked for
-                    # modification of included files.
-                    if path in modified_files:
-                        affected_books.append(book_root)
-                        break
-
-                    # Now check whether the file includes a file that
-                    # was modified (scanning one level only)
-
-                    doc = etree.parse(path)
-
-                    # Check for inclusion of files as part of imagedata
-                    for node in doc.findall(
-                            '//{http://docbook.org/ns/docbook}imagedata'):
-                        href = node.get('fileref')
-                        if (f not in file_exceptions and
-                                os.path.abspath(href) in modified_files):
-                            affected_books.append(book_root)
-                            break
-
-                    if book_root in affected_books:
-                        break
-
-                    # Check for inclusion of files as part of xi:include
-                    ns = {"xi": "http://www.w3.org/2001/XInclude"}
-                    for node in doc.xpath('//xi:include', namespaces=ns):
-                        href = node.get('href')
-                        if (f not in file_exceptions and
-                                os.path.abspath(href) in modified_files):
-                            affected_books.append(book_root)
-                            break
-                if book_root in affected_books:
-                    break
-
-    if build_all_books:
-        print("Building all books.")
-    elif affected_books:
-        books = affected_books
-    else:
-        print("No books are affected by modified files. Building all books.")
-
-    maxjobs = multiprocessing.cpu_count()
-    # Jenkins fails sometimes with errors if too many jobs run, artificially
-    # limit to 4 for now.
-    # See https://bugs.launchpad.net/openstack-manuals/+bug/1221721
-    if maxjobs > 4:
-        maxjobs = 4
-    pool = multiprocessing.Pool(maxjobs)
-    print("Queuing the following books for building:")
-    for book in books:
-        print("  %s" % os.path.basename(book))
-        pool.apply_async(build_book, (book, ),
-                         callback=logging_build_book)
-    pool.close()
-    print("Building all queued %d books now..." % len(books))
-    pool.join()
-
-    any_failures = False
-    for book, result, output, returncode in RESULTS_OF_BUILDS:
-        if result:
-            print(">>> Build of book %s succeeded." % book)
-        else:
-            any_failures = True
-            print(">>> Build of book %s failed (returncode = %d)."
-                  % (book, returncode))
-            print("\n%s" % output)
-    if any_failures:
-        sys.exit(1)
-    print("Building finished.")
-
-
-def main(rootdir, force, verbose):
-    if force:
-        print("Validation of all files and build of all books will be forced.")
-
-    if not force and only_www_touched():
-        print("Only files in www directory changed, no validation done.")
-        return
-
-    check_deleted_files(rootdir, FILE_EXCEPTIONS, verbose)
-    if force:
-        validate_all_files(rootdir, FILE_EXCEPTIONS, verbose)
-    else:
-        validate_individual_files(rootdir, FILE_EXCEPTIONS, verbose)
-
-    build_affected_books(rootdir, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, verbose,
-                         force)
-
-
-def default_root():
-    """Return the location of openstack-manuals/doc/
-
-    The current working directory must be inside of the openstack-manuals
-    repository for this method to succeed"""
-    try:
-        args = ["git", "rev-parse", "--show-toplevel"]
-        gitroot = check_output(args).rstrip()
-    except (subprocess.CalledProcessError, OSError) as e:
-        print("git failed: %s" % e)
-        sys.exit(1)
-
-    return os.path.join(gitroot, "doc")
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Validate XML files against "
-                                     "the DocBook 5 RELAX NG schema")
-    parser.add_argument('path', nargs='?', default=default_root(),
-                        help="Root directory that contains DocBook files, "
-                        "defaults to `git rev-parse --show-toplevel`/doc/")
-    parser.add_argument("--force", help="Force the validation of all files "
-                        "and build all books", action="store_true")
-    parser.add_argument("--verbose", help="Verbose execution",
-                        action="store_true")
-    args = parser.parse_args()
-    main(args.path, args.force, args.verbose)