From 39b1e030470d40a775683937056748f31c025ddf Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Tue, 16 Apr 2019 15:12:28 +0000 Subject: [PATCH] Load subproject data from governance Instead of manually curating a list of all the subprojects in a project, we should use the governance repo that already includes most, if not all, of this information. For the moment, this change just adds the governance data to the data already in the json files. This is because I'm not sure whether there is a 1:1 relationship between what is in governance and what is in the json files. It's possible we could eliminate the json files entirely at some point, but for the moment this gets us updated subproject lists for essentially free, since it's likely that many projects' json files are missing subprojects added in the past few years. Change-Id: Ia3992dd30f83294a96ace155691e50838cd5543b --- requirements.txt | 1 + reviewstats/tests/test_projectinfo.py | 10 ++++-- reviewstats/utils.py | 49 +++++++++++++++++++++++++++ test-requirements.txt | 1 + 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index fb6ddd6..c3b7d7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ Babel>=1.3 pytz>=2010h requests>=2.2.0,!=2.4.0 six>=1.7.0 +PyYAML>=3.1.0 diff --git a/reviewstats/tests/test_projectinfo.py b/reviewstats/tests/test_projectinfo.py index 6bc59a8..736df08 100644 --- a/reviewstats/tests/test_projectinfo.py +++ b/reviewstats/tests/test_projectinfo.py @@ -10,19 +10,23 @@ # License for the specific language governing permissions and limitations # under the License. +import mock + from reviewstats.tests import base from reviewstats import utils +@mock.patch('reviewstats.utils.get_remote_data', return_value={}) class TestProjectInfo(base.TestCase): - def test_project_definitions_load(self): + def test_project_definitions_load(self, get_remote_data): utils.get_projects_info('', True) - def test_get_projects_info_single_name(self): + def test_get_projects_info_single_name(self, get_remote_data): projects = utils.get_projects_info('nova') self.assertEqual(1, len(projects)) - def test_get_projects_info_single_name_projects_prefixed(self): + def test_get_projects_info_single_name_projects_prefixed(self, + get_remote_data): projects = utils.get_projects_info('projects/stable.json') self.assertEqual(1, len(projects)) diff --git a/reviewstats/utils.py b/reviewstats/utils.py index c4eb657..2a24d5a 100644 --- a/reviewstats/utils.py +++ b/reviewstats/utils.py @@ -18,6 +18,7 @@ """ import glob +import gzip import json import logging import os @@ -25,12 +26,50 @@ import requests import requests.auth from six.moves import cPickle as pickle import time +import yaml import paramiko +from six.moves import cStringIO +from six.moves import urllib LOG = logging.getLogger(__name__) +PROJECTS_YAML = ('http://git.openstack.org/cgit/openstack/governance/plain/' + 'reference/projects.yaml') + + +class DataRetrievalFailed(Exception): + pass + + +# Copied from https://github.com/cybertron/zuul-status/blob/master/app.py +def get_remote_data(address, datatype='json'): + req = urllib.request.Request(address) + req.add_header('Accept-encoding', 'gzip') + try: + remote_data = urllib.request.urlopen(req, timeout=10) + except Exception as e: + msg = 'Failed to retrieve data from %s: %s' % (address, str(e)) + raise DataRetrievalFailed(msg) + data = "" + while True: + chunk = remote_data.read() + if not chunk: + break + data += chunk + + if remote_data.info().get('Content-Encoding') == 'gzip': + buf = cStringIO.StringIO(data) + f = gzip.GzipFile(fileobj=buf) + data = f.read() + + if datatype == 'json': + return json.loads(data) + else: + return yaml.safe_load(data) + + def get_projects_info(project=None, all_projects=False, base_dir='./projects/'): """Return the list of project dict objects. @@ -78,6 +117,16 @@ def get_projects_info(project=None, all_projects=False, raise if not (all_projects and project.get('unofficial')): projects.append(project) + # Get base project name + project_name = os.path.splitext(os.path.basename(fn))[0] + project_data = get_remote_data(PROJECTS_YAML, 'yaml') + for name, data in project_data.items(): + if name == project_name: + for d, d_data in data['deliverables'].items(): + projects[-1]['subprojects'] += d_data['repos'] + projects[-1]['subprojects'] = sorted( + list(set(projects[-1]['subprojects'])) + ) return projects diff --git a/test-requirements.txt b/test-requirements.txt index 857cca0..2cb6ece 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -6,3 +6,4 @@ python-subunit testrepository>=0.0.17 testscenarios>=0.4,<0.5 testtools>=0.9.32 +mock