Search all dependencies to check which books to build

So far we only checked one level of depth for inclusion of modified files.
This let to missed book builds.

Now, we check all dependencies, even over multiple includes.

Rename args to silence pylint warnings.

Change-Id: I78df8803f17cdb736230c2137277bb3886071e85
This commit is contained in:
Andreas Jaeger 2013-10-07 15:20:21 +02:00
parent b2bf6d88fb
commit e9dc8cdc1d
1 changed files with 124 additions and 71 deletions

195
test.py
View File

@ -147,8 +147,8 @@ def only_www_touched():
"""Check whether only files in www directory are touched""" """Check whether only files in www directory are touched"""
try: try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split() modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -169,8 +169,8 @@ def ha_guide_touched():
"""Check whether files in high-availability-guide directory are touched""" """Check whether files in high-availability-guide directory are touched"""
try: try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split() modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -193,8 +193,8 @@ def check_modified_affects_all(rootdir, verbose):
os.chdir(rootdir) os.chdir(rootdir)
try: try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"] git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split() modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -222,10 +222,11 @@ def get_modified_files(rootdir, filtering=None):
os.chdir(rootdir) os.chdir(rootdir)
try: try:
args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"] git_args = ["git", "diff", "--name-only", "--relative", "HEAD~1",
"HEAD"]
if filtering is not None: if filtering is not None:
args.append(filtering) git_args.append(filtering)
modified_files = check_output(args).strip().split() modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -391,7 +392,6 @@ def validate_all_files(rootdir, exceptions, verbose,
elif check_niceness: elif check_niceness:
print("Checking niceness of all xml files...") print("Checking niceness of all xml files...")
for root, dirs, files in os.walk(rootdir): for root, dirs, files in os.walk(rootdir):
# Don't descend into 'target' subdirectories # Don't descend into 'target' subdirectories
try: try:
@ -485,23 +485,41 @@ def build_book(book):
return (base_book, result, output, returncode) return (base_book, result, output, returncode)
def build_affected_books(rootdir, book_exceptions, file_exceptions, def is_book_master(filename):
verbose, force=False, voting=True): """Returns True if filename is one of the special filenames used for the
"""Build all the books which are affected by modified files. book master files.
Looks for all directories with "pom.xml" and checks if a We do not parse pom.xml for the includes directive to determine
XML file in the directory includes a modified file. If at least the top-level files and thus have to use a heuristic.
one XML file includes a modified file the method calls """
"mvn clean generate-sources" in that directory.
return ((filename.startswith(('bk-', 'bk_', 'st-'))
This will throw an exception if a book fails to build and filename.endswith('.xml')) or
filename == 'openstack-glossary.xml')
def find_affected_books(rootdir, book_exceptions, verbose,
force):
"""Check which books are affected by modified files.
Returns a set with books.
""" """
modified_files = get_modified_files(rootdir)
modified_files = map(lambda x: os.path.abspath(x), modified_files)
build_all_books = force or check_modified_affects_all(rootdir, verbose)
affected_books = []
books = []
book_root = rootdir book_root = rootdir
books = []
affected_books = set()
build_all_books = force or check_modified_affects_all(rootdir, verbose)
# Dictionary that contains a set of files.
# The key is a filename, the set contains files that include this file.
included_by = {}
# Dictionary with books and their bk*.xml files
book_bk = {}
# 1. Iterate over whole tree and analyze include files.
# This updates included_by, book_bk and books.
for root, dirs, files in os.walk(rootdir): for root, dirs, files in os.walk(rootdir):
# Don't descend into 'target' subdirectories # Don't descend into 'target' subdirectories
try: try:
@ -519,59 +537,77 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions,
books.append(root) books.append(root)
book_root = root book_root = root
os.chdir(root)
# No need to check single books if we build all, we just # No need to check single books if we build all, we just
# collect list of books # collect list of books
if build_all_books: if build_all_books:
continue continue
# ha-guide uses asciidoc which we do not track. # ha-guide uses asciidoc which we do not track.
# Just check whether any file is touched in that directory # Just check whether any file is touched in that directory
if root.endswith('doc/high-availability-guide'): if root.endswith('doc/high-availability-guide'):
if ha_guide_touched(): if ha_guide_touched():
affected_books.append(book_root) affected_books.add(book_root)
# We can scan only for depth of one of inclusion
# therefore skip the common directory since there's no
# book build in it.
elif not root.endswith('doc/common'):
for f in files:
if (f.endswith('.xml') and
f not in file_exceptions):
path = os.path.abspath(os.path.join(root, f))
# If the file itself is modified, build the book
if path in modified_files:
affected_books.append(book_root)
break
# If the file itself is modified, build the book. for f in files:
# Note this is an optimization in most cases but f_base = os.path.basename(f)
# needed for bk-*.xml since those are included by f_abs = os.path.abspath(os.path.join(root, f))
# pom.xml and pom.xml is not checked for if is_book_master(f_base):
# modification of included files. book_bk[f_abs] = book_root
doc = etree.parse(path) if (f.endswith('.xml') and
f != "pom.xml" and
f != "ha-guide-docinfo.xml"):
doc = etree.parse(f_abs)
for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref')
href_abs = os.path.abspath(os.path.join(root, href))
if href_abs in included_by:
included_by[href_abs].add(f_abs)
else:
included_by[href_abs] = set([f_abs])
# Check for inclusion of files as part of imagedata ns = {"xi": "http://www.w3.org/2001/XInclude"}
for node in doc.findall( for node in doc.xpath('//xi:include', namespaces=ns):
'//{http://docbook.org/ns/docbook}imagedata'): href = node.get('href')
href = node.get('fileref') href_abs = os.path.abspath(os.path.join(root, href))
if (f not in file_exceptions and if href_abs in included_by:
os.path.abspath(href) in modified_files): included_by[href_abs].add(f_abs)
affected_books.append(book_root) else:
break included_by[href_abs] = set([f_abs])
if book_root in affected_books: if not build_all_books:
break # Generate list of modified_files
modified_files = get_modified_files(rootdir)
modified_files = map(lambda x: os.path.abspath(x), modified_files)
# Check for inclusion of files as part of xi:include # 2. Find all modified files and where they are included
ns = {"xi": "http://www.w3.org/2001/XInclude"}
for node in doc.xpath('//xi:include', namespaces=ns): # List of files that we have to iterate over, these are affected
href = node.get('href') # by some modification
if (f not in file_exceptions and new_files = modified_files
os.path.abspath(href) in modified_files):
affected_books.append(book_root) # All files that are affected (either directly or indirectly)
break affected_files = set(modified_files)
if book_root in affected_books:
break # 3. Iterate over files that have includes on modified files
# and build a closure - the set of all files (affected_files)
# that have a path to a modified file via includes.
while len(new_files) > 0:
new_files_to_check = new_files
new_files = []
for f in new_files_to_check:
# Skip bk*.xml files
if is_book_master(os.path.basename(f)):
book_modified = book_bk[f]
if book_modified not in affected_books:
affected_books.add(book_modified)
continue
if f not in included_by:
continue
for g in included_by[f]:
if g not in affected_files:
new_files.append(g)
affected_files.add(g)
if build_all_books: if build_all_books:
print("Building all books.") print("Building all books.")
@ -580,6 +616,23 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions,
else: else:
print("No books are affected by modified files. Building all books.") print("No books are affected by modified files. Building all books.")
return books
def build_affected_books(rootdir, book_exceptions,
verbose, force=False, voting=True):
"""Build all the books which are affected by modified files.
Looks for all directories with "pom.xml" and checks if a
XML file in the directory includes a modified file. If at least
one XML file includes a modified file the method calls
"mvn clean generate-sources" in that directory.
This will throw an exception if a book fails to build
"""
books = find_affected_books(rootdir, book_exceptions,
verbose, force)
maxjobs = multiprocessing.cpu_count() maxjobs = multiprocessing.cpu_count()
# Jenkins fails sometimes with errors if too many jobs run, artificially # Jenkins fails sometimes with errors if too many jobs run, artificially
# limit to 4 for now. # limit to 4 for now.
@ -637,7 +690,7 @@ def main(args):
args.check_niceness, args.non_voting) args.check_niceness, args.non_voting)
if args.check_build: if args.check_build:
build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS, build_affected_books(args.path, BOOK_EXCEPTIONS,
args.verbose, args.force, args.non_voting) args.verbose, args.force, args.non_voting)
@ -647,8 +700,8 @@ def default_root():
The current working directory must be inside of the openstack-manuals The current working directory must be inside of the openstack-manuals
repository for this method to succeed""" repository for this method to succeed"""
try: try:
args = ["git", "rev-parse", "--show-toplevel"] git_args = ["git", "rev-parse", "--show-toplevel"]
gitroot = check_output(args).rstrip() gitroot = check_output(git_args).rstrip()
except (subprocess.CalledProcessError, OSError) as e: except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e) print("git failed: %s" % e)
sys.exit(1) sys.exit(1)
@ -679,8 +732,8 @@ if __name__ == '__main__':
action="store_false") action="store_false")
parser.add_argument("--verbose", help="Verbose execution", parser.add_argument("--verbose", help="Verbose execution",
action="store_true") action="store_true")
args = parser.parse_args() prog_args = parser.parse_args()
if (len(sys.argv) == 1): if (len(sys.argv) == 1):
# No arguments given, use check-all # No arguments given, use check-all
args.check_all = True prog_args.check_all = True
main(args) main(prog_args)