diff --git a/test.py b/test.py index 86a6f7c8..6b626912 100755 --- a/test.py +++ b/test.py @@ -147,8 +147,8 @@ def only_www_touched(): """Check whether only files in www directory are touched""" try: - args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] - modified_files = check_output(args).strip().split() + git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] + modified_files = check_output(git_args).strip().split() except (subprocess.CalledProcessError, OSError) as e: print("git failed: %s" % e) sys.exit(1) @@ -169,8 +169,8 @@ def ha_guide_touched(): """Check whether files in high-availability-guide directory are touched""" try: - args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] - modified_files = check_output(args).strip().split() + git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] + modified_files = check_output(git_args).strip().split() except (subprocess.CalledProcessError, OSError) as e: print("git failed: %s" % e) sys.exit(1) @@ -193,8 +193,8 @@ def check_modified_affects_all(rootdir, verbose): os.chdir(rootdir) try: - args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] - modified_files = check_output(args).strip().split() + git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] + modified_files = check_output(git_args).strip().split() except (subprocess.CalledProcessError, OSError) as e: print("git failed: %s" % e) sys.exit(1) @@ -222,10 +222,11 @@ def get_modified_files(rootdir, filtering=None): os.chdir(rootdir) try: - args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"] + git_args = ["git", "diff", "--name-only", "--relative", "HEAD~1", + "HEAD"] if filtering is not None: - args.append(filtering) - modified_files = check_output(args).strip().split() + git_args.append(filtering) + modified_files = check_output(git_args).strip().split() except (subprocess.CalledProcessError, OSError) as e: print("git failed: %s" % e) sys.exit(1) @@ -391,7 +392,6 @@ def validate_all_files(rootdir, exceptions, verbose, elif check_niceness: print("Checking niceness of all xml files...") - for root, dirs, files in os.walk(rootdir): # Don't descend into 'target' subdirectories try: @@ -485,23 +485,41 @@ def build_book(book): return (base_book, result, output, returncode) -def build_affected_books(rootdir, book_exceptions, file_exceptions, - verbose, force=False, voting=True): - """Build all the books which are affected by modified files. +def is_book_master(filename): + """Returns True if filename is one of the special filenames used for the + book master files. - Looks for all directories with "pom.xml" and checks if a - XML file in the directory includes a modified file. If at least - one XML file includes a modified file the method calls - "mvn clean generate-sources" in that directory. - - This will throw an exception if a book fails to build + We do not parse pom.xml for the includes directive to determine + the top-level files and thus have to use a heuristic. + """ + + return ((filename.startswith(('bk-', 'bk_', 'st-')) + and filename.endswith('.xml')) or + filename == 'openstack-glossary.xml') + + +def find_affected_books(rootdir, book_exceptions, verbose, + force): + """Check which books are affected by modified files. + + Returns a set with books. """ - modified_files = get_modified_files(rootdir) - modified_files = map(lambda x: os.path.abspath(x), modified_files) - build_all_books = force or check_modified_affects_all(rootdir, verbose) - affected_books = [] - books = [] book_root = rootdir + + books = [] + affected_books = set() + + build_all_books = force or check_modified_affects_all(rootdir, verbose) + + # Dictionary that contains a set of files. + # The key is a filename, the set contains files that include this file. + included_by = {} + + # Dictionary with books and their bk*.xml files + book_bk = {} + + # 1. Iterate over whole tree and analyze include files. + # This updates included_by, book_bk and books. for root, dirs, files in os.walk(rootdir): # Don't descend into 'target' subdirectories try: @@ -519,59 +537,77 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, books.append(root) book_root = root - os.chdir(root) - # No need to check single books if we build all, we just # collect list of books if build_all_books: continue + # ha-guide uses asciidoc which we do not track. # Just check whether any file is touched in that directory if root.endswith('doc/high-availability-guide'): if ha_guide_touched(): - affected_books.append(book_root) - # We can scan only for depth of one of inclusion - # therefore skip the common directory since there's no - # book build in it. - elif not root.endswith('doc/common'): - for f in files: - if (f.endswith('.xml') and - f not in file_exceptions): - path = os.path.abspath(os.path.join(root, f)) - # If the file itself is modified, build the book - if path in modified_files: - affected_books.append(book_root) - break + affected_books.add(book_root) - # If the file itself is modified, build the book. - # Note this is an optimization in most cases but - # needed for bk-*.xml since those are included by - # pom.xml and pom.xml is not checked for - # modification of included files. - doc = etree.parse(path) + for f in files: + f_base = os.path.basename(f) + f_abs = os.path.abspath(os.path.join(root, f)) + if is_book_master(f_base): + book_bk[f_abs] = book_root + if (f.endswith('.xml') and + f != "pom.xml" and + f != "ha-guide-docinfo.xml"): + doc = etree.parse(f_abs) + for node in doc.findall( + '//{http://docbook.org/ns/docbook}imagedata'): + href = node.get('fileref') + href_abs = os.path.abspath(os.path.join(root, href)) + if href_abs in included_by: + included_by[href_abs].add(f_abs) + else: + included_by[href_abs] = set([f_abs]) - # Check for inclusion of files as part of imagedata - for node in doc.findall( - '//{http://docbook.org/ns/docbook}imagedata'): - href = node.get('fileref') - if (f not in file_exceptions and - os.path.abspath(href) in modified_files): - affected_books.append(book_root) - break + ns = {"xi": "http://www.w3.org/2001/XInclude"} + for node in doc.xpath('//xi:include', namespaces=ns): + href = node.get('href') + href_abs = os.path.abspath(os.path.join(root, href)) + if href_abs in included_by: + included_by[href_abs].add(f_abs) + else: + included_by[href_abs] = set([f_abs]) - if book_root in affected_books: - break + if not build_all_books: + # Generate list of modified_files + modified_files = get_modified_files(rootdir) + modified_files = map(lambda x: os.path.abspath(x), modified_files) - # Check for inclusion of files as part of xi:include - ns = {"xi": "http://www.w3.org/2001/XInclude"} - for node in doc.xpath('//xi:include', namespaces=ns): - href = node.get('href') - if (f not in file_exceptions and - os.path.abspath(href) in modified_files): - affected_books.append(book_root) - break - if book_root in affected_books: - break + # 2. Find all modified files and where they are included + + # List of files that we have to iterate over, these are affected + # by some modification + new_files = modified_files + + # All files that are affected (either directly or indirectly) + affected_files = set(modified_files) + + # 3. Iterate over files that have includes on modified files + # and build a closure - the set of all files (affected_files) + # that have a path to a modified file via includes. + while len(new_files) > 0: + new_files_to_check = new_files + new_files = [] + for f in new_files_to_check: + # Skip bk*.xml files + if is_book_master(os.path.basename(f)): + book_modified = book_bk[f] + if book_modified not in affected_books: + affected_books.add(book_modified) + continue + if f not in included_by: + continue + for g in included_by[f]: + if g not in affected_files: + new_files.append(g) + affected_files.add(g) if build_all_books: print("Building all books.") @@ -580,6 +616,23 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions, else: print("No books are affected by modified files. Building all books.") + return books + + +def build_affected_books(rootdir, book_exceptions, + verbose, force=False, voting=True): + """Build all the books which are affected by modified files. + + Looks for all directories with "pom.xml" and checks if a + XML file in the directory includes a modified file. If at least + one XML file includes a modified file the method calls + "mvn clean generate-sources" in that directory. + + This will throw an exception if a book fails to build + """ + + books = find_affected_books(rootdir, book_exceptions, + verbose, force) maxjobs = multiprocessing.cpu_count() # Jenkins fails sometimes with errors if too many jobs run, artificially # limit to 4 for now. @@ -637,7 +690,7 @@ def main(args): args.check_niceness, args.non_voting) if args.check_build: - build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, + build_affected_books(args.path, BOOK_EXCEPTIONS, args.verbose, args.force, args.non_voting) @@ -647,8 +700,8 @@ def default_root(): The current working directory must be inside of the openstack-manuals repository for this method to succeed""" try: - args = ["git", "rev-parse", "--show-toplevel"] - gitroot = check_output(args).rstrip() + git_args = ["git", "rev-parse", "--show-toplevel"] + gitroot = check_output(git_args).rstrip() except (subprocess.CalledProcessError, OSError) as e: print("git failed: %s" % e) sys.exit(1) @@ -679,8 +732,8 @@ if __name__ == '__main__': action="store_false") parser.add_argument("--verbose", help="Verbose execution", action="store_true") - args = parser.parse_args() + prog_args = parser.parse_args() if (len(sys.argv) == 1): # No arguments given, use check-all - args.check_all = True - main(args) + prog_args.check_all = True + main(prog_args)