Improve validation tools

To use test.py in gating jobs, two options have been added that
set default values. That way, we can have in the Jenkins files
only the master switch and can control ourselves if we want to
change what exactly is run as part of such a gate job.

For validation we do not recurse the whole tree anymore
if we only validate single files.

A verbose mode has been added.
Each pass shows now the number of files processed and only outputs
filenames if --verbose is given.
Additionally, formatting has been cleaned up by using pylint and pep8.
pep8 reports no warnings or errors now.

Example outputs of test.py:
$ tools/test.py --job-niceness

Validating files...
  config-reference/ch_networkingconfigure.xml: trailing or unnecessary
whitespaces found in lines: 13
...
Validation failed, validated 14 files.

$ tools/test.py --job-build

Checking that no removed files are referenced...
Passed removed file check, 41 files were removed, 627 files checked.

Validating files...
Validation passed, validated 14 files.

Queuing the following books for building:
  config-reference
  install-guide
  admin-guide-network
Building all books now...
>>> Build of book admin-guide-network succeeded.
>>> Build of book install-guide succeeded.
>>> Build of book config-reference succeeded.

Change-Id: Ia443553b6c774df7e72ef7c0d8768badfb34bc6d
This commit is contained in:
Andreas Jaeger 2013-09-22 14:18:08 +02:00
parent 99b93dc27e
commit 6d555db21a
2 changed files with 324 additions and 146 deletions

260
test.py
View File

@ -29,13 +29,17 @@ import sys
import urllib2 import urllib2
# These are files that are known to not be in DocBook format # These are files that are known to not be in DocBook format
FILE_EXCEPTIONS = ['st-training-guides.xml', 'ha-guide-docinfo.xml', 'bk001-ch003-associate-general.xml', 'basic-install-pom.xml'] FILE_EXCEPTIONS = ['st-training-guides.xml',
'ha-guide-docinfo.xml',
'bk001-ch003-associate-general.xml',
'basic-install-pom.xml']
# These are books that we aren't checking yet # These are books that we aren't checking yet
BOOK_EXCEPTIONS = [] BOOK_EXCEPTIONS = []
RESULTS_OF_BUILDS = [] RESULTS_OF_BUILDS = []
# NOTE(berendt): check_output as provided in Python 2.7.5 to make script # NOTE(berendt): check_output as provided in Python 2.7.5 to make script
# usable with Python < 2.7 # usable with Python < 2.7
def check_output(*popenargs, **kwargs): def check_output(*popenargs, **kwargs):
@ -84,7 +88,7 @@ def verify_section_tags_have_xmid(doc):
for node in doc.xpath('//docbook:section', namespaces=ns): for node in doc.xpath('//docbook:section', namespaces=ns):
if "{http://www.w3.org/XML/1998/namespace}id" not in node.attrib: if "{http://www.w3.org/XML/1998/namespace}id" not in node.attrib:
raise ValueError("section missing xml:id attribute, line %d" % raise ValueError("section missing xml:id attribute, line %d" %
node.sourceline) node.sourceline)
def verify_nice_usage_of_whitespaces(docfile): def verify_nice_usage_of_whitespaces(docfile):
@ -108,8 +112,10 @@ def verify_nice_usage_of_whitespaces(docfile):
] ]
for element in elements: for element in elements:
checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n" % element)), checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n"
checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n" % element)) % element)),
checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n"
% element))
lc = 0 lc = 0
affected_lines = [] affected_lines = []
@ -121,7 +127,8 @@ def verify_nice_usage_of_whitespaces(docfile):
if len(affected_lines) > 0: if len(affected_lines) > 0:
raise ValueError("trailing or unnecessary whitespaces " raise ValueError("trailing or unnecessary whitespaces "
"in following lines: %s" % ", ".join(affected_lines)) "found in lines: %s"
% ", ".join(affected_lines))
def error_message(error_log): def error_message(error_log):
@ -138,10 +145,12 @@ def error_message(error_log):
# Check whether only files in www got updated # Check whether only files in www got updated
def only_www_touched(): def only_www_touched():
"""Check whether only files in www directory are touched"""
try: try:
args = ["git", "diff", "--name-only", "HEAD", "HEAD~1"] args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split() modified_files = check_output(args).strip().split()
except (CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -155,39 +164,44 @@ def only_www_touched():
return www_changed and not other_changed return www_changed and not other_changed
def get_modified_files(rootdir, filter=None):
def get_modified_files(rootdir, filtering=None):
"""Get modified files below doc directory""" """Get modified files below doc directory"""
# There are several tree traversals in this program that do a # There are several tree traversals in this program that do a
# chroot, we need to run this git command always from the rootdir, # chdir, we need to run this git command always from the rootdir,
# so assure that. # so assure that.
os.chdir(rootdir) os.chdir(rootdir)
try: try:
args = ["git", "diff", "--name-only", "--relative", "HEAD", "HEAD~1"] args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"]
if filter != None: if filtering is not None:
args.append(filter) args.append(filtering)
modified_files = check_output(args).strip().split() modified_files = check_output(args).strip().split()
except (subprocess.CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
return modified_files return modified_files
def check_deleted_files(rootdir, file_exceptions):
""" Check whether files got deleted and verify that no other file references them. def check_deleted_files(rootdir, file_exceptions, verbose):
""" Check whether files got deleted and verify that no other file
references them.
""" """
print("\nChecking for removed files") print("\nChecking that no removed files are referenced...")
deleted_files = get_modified_files(rootdir, "--diff-filter=D") deleted_files = get_modified_files(rootdir, "--diff-filter=D")
if not deleted_files: if not deleted_files:
print("No files were removed.") print("No files were removed.")
return return
print(" Removed files:") if verbose:
for f in deleted_files: print(" Removed files:")
print (" %s" % f) for f in deleted_files:
print (" %s" % f)
deleted_files = map(lambda x: os.path.abspath(x), deleted_files) deleted_files = map(lambda x: os.path.abspath(x), deleted_files)
no_checked_files = 0
# Figure out whether files were included anywhere # Figure out whether files were included anywhere
missing_reference = False missing_reference = False
@ -204,17 +218,20 @@ def check_deleted_files(rootdir, file_exceptions):
for f in files: for f in files:
if (f.endswith('.xml') and if (f.endswith('.xml') and
f != 'pom.xml' and f != 'pom.xml' and
f not in file_exceptions): f not in file_exceptions):
path = os.path.abspath(os.path.join(root, f)) path = os.path.abspath(os.path.join(root, f))
doc = etree.parse(path) doc = etree.parse(path)
no_checked_files = no_checked_files + 1
# Check for inclusion of files as part of imagedata # Check for inclusion of files as part of imagedata
for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'): for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref') href = node.get('fileref')
if (f not in file_exceptions and if (f not in file_exceptions and
os.path.abspath(href) in deleted_files): os.path.abspath(href) in deleted_files):
print(" File %s has an imagedata href for deleted file %s " % (f, href)) print(" File %s has imagedata href for deleted "
"file %s" % (f, href))
missing_reference = True missing_reference = True
break break
@ -227,25 +244,92 @@ def check_deleted_files(rootdir, file_exceptions):
for node in doc.xpath('//xi:include', namespaces=ns): for node in doc.xpath('//xi:include', namespaces=ns):
href = node.get('href') href = node.get('href')
if (os.path.abspath(href) in deleted_files): if (os.path.abspath(href) in deleted_files):
print(" File %s has an xi:include on deleted file %s " % (f, href)) print(" File %s has an xi:include on deleted file %s"
% (f, href))
missing_reference = True missing_reference = True
if missing_reference: if missing_reference:
print("Failed removed file check, %d files were removed, "
"%d files checked."
% (len(deleted_files), no_checked_files))
sys.exit(1) sys.exit(1)
print("Passed removed file check.") print("Passed removed file check, %d files were removed, "
"%d files checked."
% (len(deleted_files), no_checked_files))
def validate_one_file(schema, rootdir, path, verbose,
any_failures, check_syntax, check_niceness):
"""Validate a single file"""
# We pass schema in as a way of caching it, generating it is expensive
if verbose:
print(" Validating %s" % os.path.relpath(path, rootdir))
try:
if check_syntax:
doc = etree.parse(path)
if validation_failed(schema, doc):
any_failures = True
print(error_message(schema.error_log))
verify_section_tags_have_xmid(doc)
if check_niceness:
verify_nice_usage_of_whitespaces(path)
except etree.XMLSyntaxError as e:
any_failures = True
print(" %s: %s" % (os.path.relpath(path, rootdir), e))
except ValueError as e:
any_failures = True
print(" %s: %s" % (os.path.relpath(path, rootdir), e))
return any_failures
def is_xml(filename):
"""Returns true if file ends with .xml and is not a pom.xml file"""
return filename.endswith('.xml') and not filename.endswith('/pom.xml')
def validate_individual_files(rootdir, exceptions, verbose,
check_syntax=False, check_niceness=False,
voting=True):
"""Validate list of modified files."""
def validate_individual_files(rootdir, exceptions, force=False, niceness=False, voting=True):
schema = get_schema() schema = get_schema()
any_failures = False any_failures = False
if force: no_validated = 0
print("\nValidating all files")
else: # Do not select delete files, just Added, Copied, Modified, Renamed,
modified_files = get_modified_files(rootdir) # or Type changed
print("\nFollowing files will be validated:") modified_files = get_modified_files(rootdir, "--diff-filter=ACMRT")
for f in modified_files:
print(">>> %s" % f) modified_files = filter(is_xml, modified_files)
modified_files = map(lambda x: os.path.abspath(x), modified_files) print("\nValidating files...")
modified_files = map(lambda x: os.path.abspath(x), modified_files)
for f in modified_files:
if os.path.basename(f) in exceptions:
continue
any_failures = validate_one_file(
schema, rootdir, f, verbose, any_failures,
check_syntax, check_niceness)
no_validated = no_validated + 1
if voting and any_failures:
print("Validation failed, validated %d files.\n" % no_validated)
sys.exit(1)
print("Validation passed, validated %d files.\n" % no_validated)
def validate_all_files(rootdir, exceptions, verbose,
check_syntax, check_niceness=False, voting=True):
"""Validate all xml files."""
schema = get_schema()
any_failures = False
no_validated = 0
print("\nValidating all files...")
for root, dirs, files in os.walk(rootdir): for root, dirs, files in os.walk(rootdir):
# Don't descend into 'target' subdirectories # Don't descend into 'target' subdirectories
@ -258,37 +342,28 @@ def validate_individual_files(rootdir, exceptions, force=False, niceness=False,
for f in files: for f in files:
# Ignore maven files, which are called pom.xml # Ignore maven files, which are called pom.xml
if (f.endswith('.xml') and if (f.endswith('.xml') and
f != 'pom.xml' and f != 'pom.xml' and
f not in exceptions): f not in exceptions):
try: path = os.path.abspath(os.path.join(root, f))
path = os.path.abspath(os.path.join(root, f)) any_failures = validate_one_file(
if not force and path not in modified_files: schema, rootdir, path, verbose, any_failures,
continue check_syntax, check_niceness)
doc = etree.parse(path) no_validated = no_validated + 1
if validation_failed(schema, doc):
any_failures = True
print(error_message(schema.error_log))
verify_section_tags_have_xmid(doc)
if niceness:
verify_nice_usage_of_whitespaces(os.path.join(root, f))
except etree.XMLSyntaxError as e:
any_failures = True
print("%s: %s" % (path, e))
except ValueError as e:
any_failures = True
print("%s: %s" % (path, e))
if voting and any_failures: if voting and any_failures:
print("Validation failed, validated %d files.\n" % no_validated)
sys.exit(1) sys.exit(1)
print("Validation passed.\n") print("Validation passed, validated %d files.\n" % no_validated)
def logging_build_book(result): def logging_build_book(result):
"""Callback for book building"""
RESULTS_OF_BUILDS.append(result) RESULTS_OF_BUILDS.append(result)
def build_book(rootdir, book): def build_book(book):
"""Build a single book"""
os.chdir(book) os.chdir(book)
result = True result = True
returncode = 0 returncode = 0
@ -305,7 +380,8 @@ def build_book(rootdir, book):
return (os.path.basename(book), result, output, returncode) return (os.path.basename(book), result, output, returncode)
def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False, voting=True): def build_affected_books(rootdir, book_exceptions, file_exceptions,
force=False, voting=True):
"""Build all the books which are affected by modified files. """Build all the books which are affected by modified files.
Looks for all directories with "pom.xml" and checks if a Looks for all directories with "pom.xml" and checks if a
@ -338,16 +414,17 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
for f in files: for f in files:
if (f.endswith('.xml') and if (f.endswith('.xml') and
f != 'pom.xml' and f != 'pom.xml' and
f not in file_exceptions): f not in file_exceptions):
path = os.path.abspath(os.path.join(root, f)) path = os.path.abspath(os.path.join(root, f))
doc = etree.parse(path) doc = etree.parse(path)
# Check for inclusion of files as part of imagedata # Check for inclusion of files as part of imagedata
for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'): for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref') href = node.get('fileref')
if (f not in file_exceptions and if (f not in file_exceptions and
os.path.abspath(href) in modified_files): os.path.abspath(href) in modified_files):
affected_books.append(book_root) affected_books.append(book_root)
break break
@ -359,7 +436,7 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
for node in doc.xpath('//xi:include', namespaces=ns): for node in doc.xpath('//xi:include', namespaces=ns):
href = node.get('href') href = node.get('href')
if (f not in file_exceptions and if (f not in file_exceptions and
os.path.abspath(href) in modified_files): os.path.abspath(href) in modified_files):
affected_books.append(book_root) affected_books.append(book_root)
break break
if book_root in affected_books: if book_root in affected_books:
@ -380,7 +457,8 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
print("Queuing the following books for building:") print("Queuing the following books for building:")
for book in books: for book in books:
print(" %s" % os.path.basename(book)) print(" %s" % os.path.basename(book))
pool.apply_async(build_book, (rootdir, book), callback = logging_build_book) pool.apply_async(build_book, book,
callback=logging_build_book)
pool.close() pool.close()
print("Building all books now...") print("Building all books now...")
pool.join() pool.join()
@ -391,7 +469,8 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force=False,
print(">>> Build of book %s succeeded." % book) print(">>> Build of book %s succeeded." % book)
else: else:
any_failures = True any_failures = True
print(">>> Build of book %s failed (returncode = %d)." % (book, returncode)) print(">>> Build of book %s failed (returncode = %d)."
% (book, returncode))
print("\n%s" % output) print("\n%s" % output)
if voting and any_failures: if voting and any_failures:
@ -404,14 +483,29 @@ def main(args):
print("Only files in www directory changed, nothing to do.") print("Only files in www directory changed, nothing to do.")
return return
if args.check_syntax: if args.job_build:
validate_individual_files(args.path, FILE_EXCEPTIONS, args.force, args.with_niceness, args.non_voting) args.check_delete = True
args.check_syntax = True
args.check_build = True
if args.job_niceness:
args.check_niceness = True
if args.check_delete: if args.check_delete:
check_deleted_files(args.path, FILE_EXCEPTIONS) check_deleted_files(args.path, FILE_EXCEPTIONS, args.verbose)
if args.check_syntax or args.check_niceness:
if args.force:
validate_all_files(args.path, FILE_EXCEPTIONS, args.verbose,
args.check_niceness, args.non_voting)
else:
validate_individual_files(args.path, FILE_EXCEPTIONS,
args.verbose, args.check_syntax,
args.check_niceness, args.non_voting)
if args.check_build: if args.check_build:
build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, args.force, args.non_voting) build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS,
args.force, args.non_voting)
def default_root(): def default_root():
@ -422,7 +516,7 @@ def default_root():
try: try:
args = ["git", "rev-parse", "--show-toplevel"] args = ["git", "rev-parse", "--show-toplevel"]
gitroot = check_output(args).rstrip() gitroot = check_output(args).rstrip()
except (CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -434,18 +528,26 @@ if __name__ == '__main__':
parser.add_argument('path', nargs='?', default=default_root(), parser.add_argument('path', nargs='?', default=default_root(),
help="Root directory that contains DocBook files, " help="Root directory that contains DocBook files, "
"defaults to `git rev-parse --show-toplevel`/doc") "defaults to `git rev-parse --show-toplevel`/doc")
parser.add_argument("--force", help="force the validation of all files " parser.add_argument("--force", help="Force the validation of all files "
"and build all books", action="store_true") "and build all books", action="store_true")
parser.add_argument("--check-build", help="try to build books using " parser.add_argument("--check-build", help="Try to build books using "
"modified files", action="store_true") "modified files", action="store_true")
parser.add_argument("--check-syntax", help="check the syntax of modified " parser.add_argument("--check-syntax", help="Check the syntax of modified "
"files", action="store_true") "files", action="store_true")
parser.add_argument("--check-delete", help="check that deleted files " parser.add_argument("--check-delete", help="Check that deleted files "
"are not used", action="store_true") "are not used.", action="store_true")
parser.add_argument("--with-niceness", help="when checking the syntax " parser.add_argument("--check-niceness", help="Check the niceness of "
"also check the niceness of the syntax", "files, for example whitespace.",
action="store_true") action="store_true")
parser.add_argument("--non-voting", help="do not exit on failures", parser.add_argument("--non-voting", help="Do not exit on failures",
action="store_false") action="store_false")
parser.add_argument("--verbose", help="Verbose execution",
action="store_true")
parser.add_argument("--job-niceness", help="Override values "
"for running as niceness gate-job",
action="store_true")
parser.add_argument("--job-build", help="Override values "
"for running as build gate-job",
action="store_true")
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)

View File

@ -29,13 +29,17 @@ import sys
import urllib2 import urllib2
# These are files that are known to not be in DocBook format # These are files that are known to not be in DocBook format
FILE_EXCEPTIONS = ['st-training-guides.xml', 'ha-guide-docinfo.xml', 'bk001-ch003-associate-general.xml', 'basic-install-pom.xml'] FILE_EXCEPTIONS = ['st-training-guides.xml',
'ha-guide-docinfo.xml',
'bk001-ch003-associate-general.xml',
'basic-install-pom.xml']
# These are books that we aren't checking yet # These are books that we aren't checking yet
BOOK_EXCEPTIONS = [] BOOK_EXCEPTIONS = []
RESULTS_OF_BUILDS = [] RESULTS_OF_BUILDS = []
# NOTE(berendt): check_output as provided in Python 2.7.5 to make script # NOTE(berendt): check_output as provided in Python 2.7.5 to make script
# usable with Python < 2.7 # usable with Python < 2.7
def check_output(*popenargs, **kwargs): def check_output(*popenargs, **kwargs):
@ -108,8 +112,10 @@ def verify_nice_usage_of_whitespaces(rootdir, docfile, found_extra_whitespace):
] ]
for element in elements: for element in elements:
checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n" % element)), checks.append(re.compile(".*<%s>\s+[\w\-().:!?{}\[\]]+.*\n"
checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n" % element)) % element)),
checks.append(re.compile(".*[\w\-().:!?{}\[\]]+\s+<\/%s>.*\n"
% element))
lc = 0 lc = 0
affected_lines = [] affected_lines = []
@ -127,6 +133,7 @@ def verify_nice_usage_of_whitespaces(rootdir, docfile, found_extra_whitespace):
", ".join(affected_lines))) ", ".join(affected_lines)))
return found_extra_whitespace return found_extra_whitespace
def error_message(error_log): def error_message(error_log):
"""Return a string that contains the error message. """Return a string that contains the error message.
@ -138,12 +145,15 @@ def error_message(error_log):
errs.reverse() errs.reverse()
return "\n".join(errs) return "\n".join(errs)
# Check whether only files in www got updated # Check whether only files in www got updated
def only_www_touched(): def only_www_touched():
"""Check whether only files in www directory are touched"""
try: try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split() modified_files = check_output(args).strip().split()
except (CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -157,38 +167,41 @@ def only_www_touched():
return www_changed and not other_changed return www_changed and not other_changed
def get_modified_files(rootdir, filter=None):
def get_modified_files(rootdir, filtering=None):
"""Get modified files below doc directory""" """Get modified files below doc directory"""
# There are several tree traversals in this program that do a # There are several tree traversals in this program that do a
# chroot, we need to run this git command always from the rootdir, # chdir, we need to run this git command always from the rootdir,
# so assure that. # so assure that.
os.chdir(rootdir) os.chdir(rootdir)
try: try:
args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"] args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"]
if filter != None: if filtering is not None:
args.append(filter) args.append(filtering)
modified_files = check_output(args).strip().split() modified_files = check_output(args).strip().split()
except (CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
return modified_files return modified_files
def check_deleted_files(rootdir, file_exceptions): def check_deleted_files(rootdir, file_exceptions, verbose):
""" Check whether files got deleted and verify that no other file references them. """ Check whether files got deleted and verify that no other file
references them.
""" """
print("\nChecking for removed files") print("\nChecking that no removed files are referenced...")
deleted_files = get_modified_files(rootdir, "--diff-filter=D") deleted_files = get_modified_files(rootdir, "--diff-filter=D")
if not deleted_files: if not deleted_files:
print("No files were removed.") print("No files were removed.")
return return
print(" Removed files:") if verbose:
for f in deleted_files: print(" Removed files:")
print (" %s" % f) for f in deleted_files:
print (" %s" % f)
deleted_files = map(lambda x: os.path.abspath(x), deleted_files) deleted_files = map(lambda x: os.path.abspath(x), deleted_files)
# Figure out whether files were included anywhere # Figure out whether files were included anywhere
@ -206,17 +219,19 @@ def check_deleted_files(rootdir, file_exceptions):
for f in files: for f in files:
if (f.endswith('.xml') and if (f.endswith('.xml') and
f != 'pom.xml' and f != 'pom.xml' and
f not in file_exceptions): f not in file_exceptions):
path = os.path.abspath(os.path.join(root, f)) path = os.path.abspath(os.path.join(root, f))
doc = etree.parse(path) doc = etree.parse(path)
# Check for inclusion of files as part of imagedata # Check for inclusion of files as part of imagedata
for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'): for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref') href = node.get('fileref')
if (f not in file_exceptions and if (f not in file_exceptions and
os.path.abspath(href) in deleted_files): os.path.abspath(href) in deleted_files):
print(" File %s has an imagedata href for deleted file %s " % (f, href)) print(" File %s has imagedata href for deleted "
"file %s" % (f, href))
missing_reference = True missing_reference = True
break break
@ -229,27 +244,86 @@ def check_deleted_files(rootdir, file_exceptions):
for node in doc.xpath('//xi:include', namespaces=ns): for node in doc.xpath('//xi:include', namespaces=ns):
href = node.get('href') href = node.get('href')
if (os.path.abspath(href) in deleted_files): if (os.path.abspath(href) in deleted_files):
print(" File %s has an xi:include on deleted file %s " % (f, href)) print(" File %s has an xi:include on deleted file %s "
% (f, href))
missing_reference = True missing_reference = True
if missing_reference: if missing_reference:
print("Failed removed file check, %d files were removed."
% len(deleted_files))
sys.exit(1) sys.exit(1)
print("Passed removed file check.") print("Passed removed file check, %d files were removed."
% len(deleted_files))
def validate_individual_files(rootdir, exceptions, force): def validate_one_file(schema, rootdir, path, verbose,
any_failures, found_extra_whitespace):
"""Validate a single file"""
# We pass schema in as a way of caching it, generating it is expensive
if verbose:
print(" Validating %s" % path)
try:
doc = etree.parse(path)
if validation_failed(schema, doc):
any_failures = True
print(error_message(schema.error_log))
verify_section_tags_have_xmid(doc)
found_extra_whitespace = verify_nice_usage_of_whitespaces(
rootdir, path, found_extra_whitespace)
except etree.XMLSyntaxError as e:
any_failures = True
print("%s: %s" % (path, e))
except ValueError as e:
any_failures = True
print("%s: %s" % (path, e))
return any_failures, found_extra_whitespace
def is_xml(filename):
"""Returns true if file ends with .xml and is not a pom.xml file"""
return filename.endswith('.xml') and not filename.endswith('/pom.xml')
def validate_individual_files(rootdir, exceptions, verbose):
"""Validate list of modified files."""
schema = get_schema() schema = get_schema()
found_extra_whitespace = False extra_whitespace = False
any_failures = False any_failures = False
if force: no_validated = 0
print("\nValidating all files")
else: # Do not select delete files, just Added, Copied, Modified, Renamed,
modified_files = get_modified_files(rootdir) # or Type changed
print("\nFollowing files will be validated:") modified_files = get_modified_files(rootdir, "--diff-filter=ACMRT")
for f in modified_files:
print(">>> %s" % f) modified_files = filter(is_xml, modified_files)
modified_files = map(lambda x: os.path.abspath(x), modified_files) print("\nValidating files...")
modified_files = map(lambda x: os.path.abspath(x), modified_files)
for f in modified_files:
if os.path.basename(f) in exceptions:
continue
any_failures, extra_whitespace = validate_one_file(
schema, rootdir, f, verbose, any_failures, extra_whitespace)
no_validated = no_validated + 1
if any_failures:
sys.exit(1)
print("Validation passed, validated %d files.\n" % no_validated)
def validate_all_files(rootdir, exceptions, verbose):
"""Validate all xml files."""
schema = get_schema()
extra_whitespace = False
any_failures = False
no_validated = 0
print("\nValidating all files")
for root, dirs, files in os.walk(rootdir): for root, dirs, files in os.walk(rootdir):
# Don't descend into 'target' subdirectories # Don't descend into 'target' subdirectories
@ -262,34 +336,27 @@ def validate_individual_files(rootdir, exceptions, force):
for f in files: for f in files:
# Ignore maven files, which are called pom.xml # Ignore maven files, which are called pom.xml
if (f.endswith('.xml') and if (f.endswith('.xml') and
f != 'pom.xml' and f != 'pom.xml' and
f not in exceptions): f not in exceptions):
try: path = os.path.abspath(os.path.join(root, f))
path = os.path.abspath(os.path.join(root, f)) any_failures, extra_whitespace = validate_one_file(
if not force and path not in modified_files: schema, rootdir, path, verbose, any_failures,
continue extra_whitespace)
doc = etree.parse(path) no_validated = no_validated + 1
if validation_failed(schema, doc):
any_failures = True
print(error_message(schema.error_log))
verify_section_tags_have_xmid(doc)
found_extra_whitespace = verify_nice_usage_of_whitespaces(rootdir, path, found_extra_whitespace)
except etree.XMLSyntaxError as e:
any_failures = True
print("%s: %s" % (path, e))
except ValueError as e:
any_failures = True
print("%s: %s" % (path, e))
if any_failures: if any_failures:
sys.exit(1) sys.exit(1)
print("Validation passed.\n") print("Validation passed, validated %d files.\n" % no_validated)
def logging_build_book(result): def logging_build_book(result):
"""Callback for book building"""
RESULTS_OF_BUILDS.append(result) RESULTS_OF_BUILDS.append(result)
def build_book(rootdir, book): def build_book(book):
"""Build a single book"""
os.chdir(book) os.chdir(book)
result = True result = True
returncode = 0 returncode = 0
@ -339,16 +406,17 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
for f in files: for f in files:
if (f.endswith('.xml') and if (f.endswith('.xml') and
f != 'pom.xml' and f != 'pom.xml' and
f not in file_exceptions): f not in file_exceptions):
path = os.path.abspath(os.path.join(root, f)) path = os.path.abspath(os.path.join(root, f))
doc = etree.parse(path) doc = etree.parse(path)
# Check for inclusion of files as part of imagedata # Check for inclusion of files as part of imagedata
for node in doc.findall('//{http://docbook.org/ns/docbook}imagedata'): for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref') href = node.get('fileref')
if (f not in file_exceptions and if (f not in file_exceptions and
os.path.abspath(href) in modified_files): os.path.abspath(href) in modified_files):
affected_books.append(book_root) affected_books.append(book_root)
break break
@ -360,7 +428,7 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
for node in doc.xpath('//xi:include', namespaces=ns): for node in doc.xpath('//xi:include', namespaces=ns):
href = node.get('href') href = node.get('href')
if (f not in file_exceptions and if (f not in file_exceptions and
os.path.abspath(href) in modified_files): os.path.abspath(href) in modified_files):
affected_books.append(book_root) affected_books.append(book_root)
break break
if book_root in affected_books: if book_root in affected_books:
@ -381,7 +449,8 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
print("Queuing the following books for building:") print("Queuing the following books for building:")
for book in books: for book in books:
print(" %s" % os.path.basename(book)) print(" %s" % os.path.basename(book))
pool.apply_async(build_book, (rootdir, book), callback = logging_build_book) pool.apply_async(build_book, book,
callback=logging_build_book)
pool.close() pool.close()
print("Building all queued books now...") print("Building all queued books now...")
pool.join() pool.join()
@ -392,14 +461,15 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, force):
print(">>> Build of book %s succeeded." % book) print(">>> Build of book %s succeeded." % book)
else: else:
any_failures = True any_failures = True
print(">>> Build of book %s failed (returncode = %d)." % (book, returncode)) print(">>> Build of book %s failed (returncode = %d)."
% (book, returncode))
print("\n%s" % output) print("\n%s" % output)
if any_failures: if any_failures:
sys.exit(1) sys.exit(1)
def main(rootdir, force): def main(rootdir, force, verbose):
if force: if force:
print("Validation of all files and build of all books will be forced.") print("Validation of all files and build of all books will be forced.")
@ -407,8 +477,12 @@ def main(rootdir, force):
print("Only files in www directory changed, no validation done.") print("Only files in www directory changed, no validation done.")
return return
check_deleted_files(rootdir, FILE_EXCEPTIONS) check_deleted_files(rootdir, FILE_EXCEPTIONS, verbose)
validate_individual_files(rootdir, FILE_EXCEPTIONS, force) if force:
validate_all_files(rootdir, FILE_EXCEPTIONS, verbose)
else:
validate_individual_files(rootdir, FILE_EXCEPTIONS, verbose)
build_affected_books(rootdir, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, force) build_affected_books(rootdir, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, force)
@ -420,7 +494,7 @@ def default_root():
try: try:
args = ["git", "rev-parse", "--show-toplevel"] args = ["git", "rev-parse", "--show-toplevel"]
gitroot = check_output(args).rstrip() gitroot = check_output(args).rstrip()
except (CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -432,7 +506,9 @@ if __name__ == '__main__':
parser.add_argument('path', nargs='?', default=default_root(), parser.add_argument('path', nargs='?', default=default_root(),
help="Root directory that contains DocBook files, " help="Root directory that contains DocBook files, "
"defaults to `git rev-parse --show-toplevel`/doc/") "defaults to `git rev-parse --show-toplevel`/doc/")
parser.add_argument("--force", help="force the validation of all files " parser.add_argument("--force", help="Force the validation of all files "
"and build all books", action="store_true") "and build all books", action="store_true")
parser.add_argument("--verbose", help="Verbose execution",
action="store_true")
args = parser.parse_args() args = parser.parse_args()
main(args.path, args.force) main(args.path, args.force, args.verbose)