From 4a53589fea53ed8ab262c3fb7372566af6d043e0 Mon Sep 17 00:00:00 2001 From: Adam Spiers Date: Fri, 4 Jan 2019 16:39:25 +0000 Subject: [PATCH] www-generator.py: cache expensive repo/project metadata to disk Gathering the metadata of repos and projects requires a significant number of HTTP calls and is therefore slow and expensive. When hacking on www-generator.py the ability to skip these calls can save a lot of time allowing much more rapid iteration. So add a persistent cache for this data, using the percache Python module. This is mostly run in the gate, where the cache will be deleted on each run. Documentation contributors running this locally may occasionally need to manually remove the cache file, although governance data generally changes slow enough that this is expected to be a rare corner case. As noted by the comment in _get_official_repos(), in the future we may add support for loading governance data from a local repo, thereby skipping the need for slow HTTP calls. But for now this is a simpler solution. Change-Id: Ib52171625e37169d00da03f436b58456c103b04d --- .gitignore | 2 ++ test-requirements.txt | 1 + tools/www-generator.py | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index a717535898..ee266cf6c8 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ boxes/* .*.swo .bak /.project + +OS_GOVERNANCE_DATA_CACHE diff --git a/test-requirements.txt b/test-requirements.txt index 4715cd1bfa..d70b9491c6 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -19,6 +19,7 @@ PyYAML>=3.10.0 # MIT jsonschema>=2.6.0 os-service-types openstack-governance>=0.1.0 +percache>=0.3.0 # For translations # Babel 2.4.0 is broken, fixed in 2.4.1 diff --git a/tools/www-generator.py b/tools/www-generator.py index f07f800710..087d070013 100755 --- a/tools/www-generator.py +++ b/tools/www-generator.py @@ -29,6 +29,7 @@ from bs4 import BeautifulSoup import jinja2 import jsonschema import os_service_types +import percache import requests import yaml @@ -94,6 +95,8 @@ ALL_SERIES = list(sorted(SERIES_INFO.keys())) SERIES_PAT = re.compile('^(' + '|'.join(ALL_SERIES) + ')/') +cache = percache.Cache("./OS_GOVERNANCE_DATA_CACHE") + def initialize_logging(debug, verbose): """Initialize the Logger.""" @@ -238,6 +241,7 @@ _URLS = [ ] +@cache def load_project_data(source_directory, check_all_links=False, skip_links=False, @@ -424,6 +428,7 @@ _INFRA_REPOS_EXCEPTION = [ ] +@cache def _get_official_repos(): """Return a tuple containing lists of all official repos. @@ -552,7 +557,9 @@ def main(): args = parse_command_line_arguments() logger = initialize_logging(args.debug, args.verbose) + logger.debug("getting official repos ...") regular_repos, infra_repos, deliverables = _get_official_repos() + logger.debug("loading project data ...") project_data = load_project_data( source_directory=args.source_directory, check_all_links=args.check_all_links,