Use GitHub lib to retrieve project list

* Project list is retrieved with help of PyGitHub
* Difference in default data is checked by hash
* Fixed issue with invalid utf8 chars in git parsing

Partially implements blueprint add-more-projects.

Change-Id: I7cc9bc758c1f0d522af403a2b41c8298d80d843c
This commit is contained in:
Ilya Shakhat 2013-08-05 15:10:01 +04:00
parent c6f76c154d
commit de7e8f297c
7 changed files with 48 additions and 58 deletions

View File

@ -14083,26 +14083,26 @@
}
],
"project_types": [
"project_sources": [
{
"organization": "openstack",
"project_type": "openstack",
"project_group": "other",
"uri": "https://api.github.com/orgs/openstack/repos"
"project_group": "other"
},
{
"organization": "openstack-dev",
"project_type": "openstack",
"project_group": "other",
"uri": "https://api.github.com/orgs/openstack-dev/repos"
"project_group": "other"
},
{
"organization": "openstack-infra",
"project_type": "openstack",
"project_group": "infrastructure",
"uri": "https://api.github.com/orgs/openstack-infra/repos"
"project_group": "infrastructure"
},
{
"organization": "stackforge",
"project_type": "stackforge",
"project_group": null,
"uri": "https://api.github.com/orgs/stackforge/repos"
}
],

View File

@ -45,11 +45,11 @@
}
],
"project_types": [
"project_sources": [
{
"project_type": "openstack",
"project_group": null,
"uri": "https://api.github.com/orgs/openstack-dev/repos"
"organization": "stackforge",
"project_type": "stackforge",
"project_group": null
}
],

View File

@ -7,6 +7,7 @@ oslo.config
paramiko>=1.8.0
pbr>=0.5.16,<0.6
psutil
PyGithub
python-memcached
pymongo
sh

View File

@ -12,79 +12,67 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import json
import urllib
from github import MainClass
from stackalytics.openstack.common import log as logging
from stackalytics.processor import normalizer
from stackalytics.processor import persistent_storage
from stackalytics.processor import record_processor
from stackalytics.processor import vcs
LOG = logging.getLogger(__name__)
def items_match(item, p_item):
if not p_item:
return True
for key, value in item.iteritems():
if (key not in p_item) or (p_item[key] != value):
return False
def _check_default_data_change(runtime_storage_inst, default_data):
h = hashlib.new('sha1')
h.update(json.dumps(default_data))
digest = h.hexdigest()
p_digest = runtime_storage_inst.get_last_id('default_data_digest')
if digest == p_digest:
LOG.debug('No changes in default data detected, sha1: %s', digest)
return False
LOG.debug('Default data has changes, sha1: %s', digest)
runtime_storage_inst.set_last_id('default_data_digest', digest)
return True
def _update_persistent_storage(persistent_storage_inst, default_data):
need_update = False
for table, primary_key in persistent_storage.PRIMARY_KEYS.iteritems():
if table in default_data:
for item in default_data[table]:
param = {primary_key: item[primary_key]}
for p_item in persistent_storage_inst.find(table, **param):
break
else:
p_item = None
if not items_match(item, p_item):
need_update = True
if p_item:
persistent_storage_inst.update(table, item)
else:
persistent_storage_inst.insert(table, item)
return need_update
def _retrieve_project_list(default_data):
if 'project_types' not in default_data:
if 'project_sources' not in default_data:
return
LOG.info('Retrieving project list from GitHub')
repo_index = {}
for repo in default_data['repos']:
repo_index[repo['uri']] = repo
for project_type in default_data['project_types']:
uri = project_type['uri']
repos_fd = urllib.urlopen(uri)
raw = repos_fd.read()
repos_fd.close()
repos = json.loads(raw)
github = MainClass.Github()
for project_source in default_data['project_sources']:
organization = project_source['organization']
repos = github.get_organization(organization).get_repos()
LOG.debug('Get list of projects for organization %s', organization)
for repo in repos:
repo_uri = repo['git_url']
repo_name = repo['name']
repo_uri = repo.git_url
repo_name = repo.name
if repo_uri not in repo_index:
r = {
'branches': ['master'],
'module': repo_name,
'project_type': project_type['project_type'],
'project_group': project_type['project_group'],
'project_type': project_source['project_type'],
'project_group': project_source['project_group'],
'uri': repo_uri
}
default_data['repos'].append(r)
LOG.debug('Project is added to default data: %s', r)
def process(persistent_storage_inst, runtime_storage_inst, default_data,
@ -94,7 +82,7 @@ def process(persistent_storage_inst, runtime_storage_inst, default_data,
normalizer.normalize_default_data(default_data)
if _update_persistent_storage(persistent_storage_inst, default_data):
if _check_default_data_change(runtime_storage_inst, default_data):
persistent_storage_inst.reset(default_data)

View File

@ -136,7 +136,7 @@ def apply_corrections(uri, runtime_storage_inst):
valid_corrections.append(c)
else:
LOG.warn('Correction misses primary key: %s', c)
runtime_storage_inst.apply_corrections(corrections)
runtime_storage_inst.apply_corrections(valid_corrections)
def _read_default_data(uri):

View File

@ -25,7 +25,7 @@ PRIMARY_KEYS = {
'repos': 'uri',
'users': 'user_id',
'releases': 'release_name',
'project_types': 'uri',
'project_sources': 'organization',
}

View File

@ -126,7 +126,8 @@ class Git(Vcs):
if head_commit_id:
commit_range = head_commit_id + '..HEAD'
output = sh.git('log', '--pretty=%s' % GIT_LOG_FORMAT, '--shortstat',
'-M', '--no-merges', commit_range, _tty_out=False)
'-M', '--no-merges', commit_range, _tty_out=False,
_decode_errors='ignore')
for rec in re.finditer(GIT_LOG_PATTERN, str(output)):
i = 1