#!/usr/bin/env python # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import argparse import glob import logging import os import os.path import re import sys from bs4 import BeautifulSoup import jinja2 import jsonschema import requests import yaml PAST_SERIES = [ 'kilo', 'liberty', 'mitaka', 'newton', ] RELEASED_SERIES = 'ocata' SERIES_IN_DEVELOPMENT = 'pike' FUTURE_SERIES = [ 'queens', 'rocky', ] ALL_SERIES = ( PAST_SERIES + [RELEASED_SERIES, SERIES_IN_DEVELOPMENT] + FUTURE_SERIES ) SERIES_PAT = re.compile('^(' + '|'.join(ALL_SERIES) + ')/') def initialize_logging(debug, verbose): """Initialize the Logger.""" logger = logging.getLogger() formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s') handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler) if verbose: logger.setLevel(logging.INFO) if debug: logger.setLevel(logging.DEBUG) return logger def parse_command_line_arguments(): """Parse the command line arguments.""" parser = argparse.ArgumentParser() parser.add_argument("--debug", help="Print debugging messages.", action="store_true", default=False) parser.add_argument("--verbose", help="Be more verbose.", action="store_true", default=False) parser.add_argument("--source-directory", type=str, default='www', help='Set source directory.') parser.add_argument("--output-directory", type=str, default='publish-docs/www', help='Set output directory.') parser.add_argument("--check-all-links", action="store_true", default=False, help='Check for links with flags set false.') parser.add_argument("--skip-links", action="store_true", default=False, help='Skip link checks') return parser.parse_args() def _check_url(url): "Return True if the URL exists, False otherwise." try: resp = requests.get(url) except requests.exceptions.TooManyRedirects: return False, 301 return (resp.status_code // 100) == 2, resp.status_code # NOTE(dhellmann): List of tuple of flag name and URL template. None # for the flag name means always apply the URL, otherwise look for a # True value associated with the flag in the project data. # # NOTE(dhellmann): We use URLs with explicit index.html to ensure that # a real page is published to the location, and we are not retrieving # a file list generated by the web server. _URLS = [ (None, 'https://docs.openstack.org/{name}/{series}/index.html'), ('has_install_guide', 'https://docs.openstack.org/{name}/{series}/install/index.html'), ('has_admin_guide', 'https://docs.openstack.org/{name}/{series}/admin/index.html'), ('has_config_ref', 'https://docs.openstack.org/{name}/{series}/configuration/index.html'), ('has_in_tree_api_docs', 'https://docs.openstack.org/{name}/{series}/api/index.html'), ('has_user_guide', 'https://docs.openstack.org/{name}/{series}/user/index.html'), ('has_api_ref', 'https://developer.openstack.org/api-ref/{service_type}/index.html'), ('has_api_guide', 'https://developer.openstack.org/api-guide/{service_type}/index.html'), ] _SERVICE_TYPES_URL = 'http://git.openstack.org/cgit/openstack/service-types-authority/plain/service-types.yaml' # noqa def _get_service_types(): "Return a map between repo base name and service type" raw = requests.get(_SERVICE_TYPES_URL) # noqa data = yaml.safe_load(raw.text) service_types = { d['project'].rsplit('/')[-1]: d['service_type'] for d in data['services'] } return service_types def load_project_data(source_directory, check_all_links=False, skip_links=False): "Return a dict with project data grouped by series." logger = logging.getLogger() project_data = {} fail = False service_types = _get_service_types() # Set up a schema validator so we can quickly check that the input # data conforms. project_schema_filename = os.path.join( source_directory, 'project-data', 'schema.yaml', ) with open(project_schema_filename, 'r') as f: project_schema = yaml.safe_load(f.read()) validator = jsonschema.Draft4Validator(project_schema) # Load the data files, using the file basename as the release # series name. for filename in glob.glob( os.path.join(source_directory, 'project-data', '*.yaml')): if filename.endswith('schema.yaml'): continue series, _ = os.path.splitext(os.path.basename(filename)) logger.info('loading %s project data from %s', series, filename) with open(filename, 'r') as f: data = yaml.safe_load(f.read()) for error in validator.iter_errors(data): logger.error(str(error)) fail = True for project in data: # If the project has a service-type set, ensure it matches # the value in the service-type-authority data.base. st = project.get('service_type') if st is not None: if project['name'] not in service_types: logger.error( 'did not find %s in %s', project['name'], _SERVICE_TYPES_URL, ) fail = True elif project['service_type'] != service_types[project['name']]: logger.error( 'expected service_type %r for %s but got %r', service_types[project['name']], project['name'], project['service_type'], ) fail = True # client projects must have a description project_type = project.get('type') if project_type == 'client' and not project.get('description'): logger.error( 'client project %s has no description', project['name'], ) fail = True # If the project claims to have a separately published guide # of some sort, look for it before allowing the flag to stand. if not skip_links: for flag, url_template in _URLS: if flag is None: flag_val = True else: flag_val = project.get(flag, False) try: url = url_template.format(series=series, **project) except KeyError: # The project data does not include a field needed # to build the URL (typically the # service_type). Ignore this URL, unless the flag # is set. if flag_val: raise continue # Only try to fetch the URL if we're going to do # something with the result. if flag_val or check_all_links: logger.info('%s:%s looking for %s', series, project['name'], url) exists, status = _check_url(url) if flag_val and not exists: logger.error( '%s set for %s but %s does not exist (%s)', flag, project['name'], url, status, ) fail = True elif (not flag_val) and check_all_links and exists: logger.warning( '%s not set for %s but %s does exist', flag, project['name'], url, ) if fail: raise ValueError('invalid input in %s' % filename) project_data[series] = data return project_data _GOVERNANCE_URL = 'http://git.openstack.org/cgit/openstack/governance/plain/reference/projects.yaml' # noqa def _get_official_repos(): """Return a tuple containing lists of all official repos. The first member is the list of regular project repos. The second member is the list of infra repos. """ raw = requests.get(_GOVERNANCE_URL) data = yaml.safe_load(raw.text) regular_repos = [] infra_repos = [] for t_name, team in data.items(): for d_name, d_data in team.get('deliverables', {}).items(): if t_name == 'Infrastructure': add = infra_repos.append else: add = regular_repos.append for repo in d_data.get('repos', []): add({'name': repo, 'base': repo.rsplit('/')[-1]}) return (regular_repos, infra_repos) def render_template(environment, project_data, regular_repos, infra_repos, template_file, output_directory, extra={}): logger = logging.getLogger() logger.info("generating %s", template_file) # Determine the relative path to a few common directories so # we don't need to set them in the templates. topdir = os.path.relpath( '.', os.path.dirname(template_file), ).rstrip('/') + '/' scriptdir = os.path.join(topdir, 'common', 'js').rstrip('/') + '/' cssdir = os.path.join(topdir, 'common', 'css').rstrip('/') + '/' imagedir = os.path.join(topdir, 'common', 'images').rstrip('/') + '/' series_match = SERIES_PAT.match(template_file) if series_match: series = series_match.groups()[0] if series == SERIES_IN_DEVELOPMENT: series = 'latest' else: series = None logger.info('series = %s', series) try: template = environment.get_template(template_file) except Exception as e: logger.error("parsing template %s failed: %s" % (template_file, e)) raise try: output = template.render( PROJECT_DATA=project_data, TEMPLATE_FILE=template_file, REGULAR_REPOS=regular_repos, INFRA_REPOS=infra_repos, ALL_SERIES=ALL_SERIES, RELEASED_SERIES=RELEASED_SERIES, SERIES_IN_DEVELOPMENT=SERIES_IN_DEVELOPMENT, topdir=topdir, scriptdir=scriptdir, cssdir=cssdir, imagedir=imagedir, series=series, **extra ) if template_file.endswith('.html'): soup = BeautifulSoup(output, "lxml") output = soup.prettify() except Exception as e: logger.error("rendering template %s failed: %s" % (template_file, e)) raise try: target_directory = os.path.join(output_directory, os.path.dirname(template_file)) target_file = os.path.join(output_directory, template_file) if not os.path.isdir(target_directory): logger.debug("creating target directory %s" % target_directory) os.makedirs(target_directory) logger.debug("writing %s" % target_file) with open(os.path.join(target_file), 'wb') as fh: fh.write(output.encode('utf8')) except (IOError, OSError, UnicodeEncodeError) as e: logger.error("writing %s failed: %s" % (target_file, e)) def main(): """Entry point for this script.""" args = parse_command_line_arguments() logger = initialize_logging(args.debug, args.verbose) project_data = load_project_data( args.source_directory, args.check_all_links, args.skip_links, ) regular_repos, infra_repos = _get_official_repos() # Set up jinja to discover the templates. try: logger.info('looking for templates in %s', args.source_directory) loader = jinja2.FileSystemLoader(args.source_directory) environment = jinja2.Environment(loader=loader) except Exception as e: logger.error("initialising template environment failed: %s" % e) return 1 # Render the templates. output_pages = [] page_list_template = None for template_file in environment.list_templates(): if not (template_file.endswith('.html') or template_file.endswith('.htaccess')): logger.info('ignoring %s', template_file) continue if template_file.endswith('www-index.html'): # Process this one at the end, so we have the full list of # other output files. page_list_template = template_file continue render_template( environment, project_data, regular_repos, infra_repos, template_file, args.output_directory, ) output_pages.append(template_file) if page_list_template is not None: output_pages.sort() render_template( environment, project_data, regular_repos, infra_repos, page_list_template, args.output_directory, extra={ 'file_list': output_pages, }, ) return 0 if __name__ == '__main__': sys.exit(main())