add an in-memory cache
Processing large data sets with frequent duplication performs better when we use an in-memory cache to supplement the on-disk cache. The lru_cache decorator in Python requires all arguments to the decorated function to be hashable, so this patch converts the existing factory functions to classes so the fetch() methods can be decorated and still have access to the underlying cache object. Change-Id: I1144874e76b5db8fd125679da3a4b5b9ec4540cd Signed-off-by: Doug Hellmann <doug@doughellmann.com>
This commit is contained in:
parent
6e76c3a5a0
commit
505c77728a
@ -11,6 +11,7 @@
|
||||
# under the License.
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import logging
|
||||
|
||||
from goal_tools import apis
|
||||
@ -130,23 +131,29 @@ def lookup_member(email):
|
||||
return None
|
||||
|
||||
|
||||
def fetch_member(email, cache):
|
||||
"""Find the member in the cache or look it up in the API.
|
||||
class MemberFactory:
|
||||
|
||||
:param email: Email address of the member to look for.
|
||||
:type email: str
|
||||
:param cache: Storage for repeated lookups.
|
||||
:type cache: goal_tools.cache.Cache
|
||||
def __init__(self, cache):
|
||||
self._cache = cache
|
||||
|
||||
"""
|
||||
key = ('member', email)
|
||||
if key in cache:
|
||||
LOG.debug('found %s cached', email)
|
||||
data = cache[key]
|
||||
else:
|
||||
data = lookup_member(email)
|
||||
@functools.lru_cache(maxsize=1024)
|
||||
def fetch(self, email):
|
||||
"""Find the member in the cache or look it up in the API.
|
||||
|
||||
:param email: Email address of the member to look for.
|
||||
:type email: str
|
||||
:param cache: Storage for repeated lookups.
|
||||
:type cache: goal_tools.cache.Cache
|
||||
|
||||
"""
|
||||
key = ('member', email)
|
||||
if key in self._cache:
|
||||
LOG.debug('found %s cached', email)
|
||||
data = self._cache[key]
|
||||
else:
|
||||
data = lookup_member(email)
|
||||
if data:
|
||||
self._cache[key] = data
|
||||
if data:
|
||||
cache[key] = data
|
||||
if data:
|
||||
return Member(email, data)
|
||||
return None
|
||||
return Member(email, data)
|
||||
return None
|
||||
|
@ -13,6 +13,7 @@
|
||||
import collections
|
||||
import datetime
|
||||
import fileinput
|
||||
import functools
|
||||
import logging
|
||||
import urllib.parse
|
||||
|
||||
@ -113,6 +114,8 @@ class Review:
|
||||
@property
|
||||
def owner(self):
|
||||
owner = self._data['owner']
|
||||
if 'email' not in owner:
|
||||
owner['email'] = owner.get('email', 'no-reply@openstack.org')
|
||||
return Participant(
|
||||
'owner',
|
||||
owner['name'],
|
||||
@ -142,6 +145,8 @@ class Review:
|
||||
|
||||
for revision in revisions:
|
||||
uploader = revision['uploader']
|
||||
if 'email' not in uploader:
|
||||
uploader['email'] = 'no-reply@openstack.org'
|
||||
if uploader['email'] in known_uploaders:
|
||||
# Ignore duplicates
|
||||
continue
|
||||
@ -202,28 +207,34 @@ def cache_review(review_id, data, cache):
|
||||
cache[('review', str(review_id))] = data
|
||||
|
||||
|
||||
def fetch_review(review_id, cache):
|
||||
"""Find the review in the cache or look it up in the API.
|
||||
class ReviewFactory:
|
||||
|
||||
Review data is only cached if the review is MERGED because
|
||||
otherwise it is more likely to change.
|
||||
def __init__(self, cache):
|
||||
self._cache = cache
|
||||
|
||||
:param review_id: Review ID of the review to look for.
|
||||
:type review_id: str
|
||||
:param cache: Storage for repeated lookups.
|
||||
:type cache: goal_tools.cache.Cache
|
||||
@functools.lru_cache(maxsize=1024)
|
||||
def fetch(self, review_id):
|
||||
"""Find the review in the cache or look it up in the API.
|
||||
|
||||
"""
|
||||
key = ('review', str(review_id))
|
||||
if key in cache:
|
||||
LOG.debug('found %s cached', review_id)
|
||||
return Review(review_id, cache[key])
|
||||
data = query_gerrit(
|
||||
'changes/' + review_id + '/detail',
|
||||
params={
|
||||
'o': QUERY_OPTIONS,
|
||||
},
|
||||
)
|
||||
response = Review(review_id, data)
|
||||
cache_review(review_id, data, cache)
|
||||
return response
|
||||
Review data is only cached if the review is MERGED because
|
||||
otherwise it is more likely to change.
|
||||
|
||||
:param review_id: Review ID of the review to look for.
|
||||
:type review_id: str
|
||||
:param cache: Storage for repeated lookups.
|
||||
:type cache: goal_tools.cache.Cache
|
||||
|
||||
"""
|
||||
key = ('review', str(review_id))
|
||||
if key in self._cache:
|
||||
LOG.debug('found %s cached', review_id)
|
||||
return Review(review_id, self._cache[key])
|
||||
data = query_gerrit(
|
||||
'changes/' + review_id + '/detail',
|
||||
params={
|
||||
'o': QUERY_OPTIONS,
|
||||
},
|
||||
)
|
||||
response = Review(review_id, data)
|
||||
cache_review(review_id, data, self._cache)
|
||||
return response
|
||||
|
@ -79,24 +79,24 @@ class TestFoundationMember(base.TestCase):
|
||||
|
||||
class TestFetchMember(base.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.cache = {}
|
||||
self.f = foundation.MemberFactory(self.cache)
|
||||
|
||||
def test_not_in_cache(self):
|
||||
cache = {}
|
||||
with mock.patch('goal_tools.foundation.lookup_member') as f:
|
||||
f.return_value = _member_data
|
||||
results = foundation.fetch_member(
|
||||
'doug@doughellmann.com', cache)
|
||||
self.assertIn(('member', 'doug@doughellmann.com'), cache)
|
||||
results = self.f.fetch('doug@doughellmann.com')
|
||||
self.assertIn(('member', 'doug@doughellmann.com'), self.cache)
|
||||
self.assertEqual(_member_data, results._data)
|
||||
self.assertEqual(_member_data,
|
||||
cache[('member', 'doug@doughellmann.com')])
|
||||
self.cache[('member', 'doug@doughellmann.com')])
|
||||
|
||||
def test_in_cache(self):
|
||||
cache = {
|
||||
('member', 'doug@doughellmann.com'): _member_data,
|
||||
}
|
||||
self.cache[('member', 'doug@doughellmann.com')] = _member_data
|
||||
with mock.patch('goal_tools.foundation.lookup_member') as f:
|
||||
f.side_effect = AssertionError('should not be called')
|
||||
results = foundation.fetch_member(
|
||||
'doug@doughellmann.com', cache)
|
||||
self.assertIn(('member', 'doug@doughellmann.com'), cache)
|
||||
results = self.f.fetch('doug@doughellmann.com')
|
||||
self.assertIn(('member', 'doug@doughellmann.com'), self.cache)
|
||||
self.assertEqual(_member_data, results._data)
|
||||
|
@ -154,29 +154,30 @@ class TestReview(base.TestCase):
|
||||
|
||||
class TestFetchReview(base.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.cache = {}
|
||||
self.f = gerrit.ReviewFactory(self.cache)
|
||||
|
||||
def test_not_in_cache_new(self):
|
||||
cache = {}
|
||||
with mock.patch('goal_tools.gerrit.query_gerrit') as f:
|
||||
f.return_value = _data_55535
|
||||
results = gerrit.fetch_review('55535', cache)
|
||||
self.assertNotIn(('review', '55535'), cache)
|
||||
results = self.f.fetch('55535')
|
||||
self.assertNotIn(('review', '55535'), self.cache)
|
||||
self.assertEqual(_data_55535, results._data)
|
||||
|
||||
def test_not_in_cache_merged(self):
|
||||
cache = {}
|
||||
with mock.patch('goal_tools.gerrit.query_gerrit') as f:
|
||||
f.return_value = _data_561507
|
||||
results = gerrit.fetch_review('561507', cache)
|
||||
self.assertIn(('review', '561507'), cache)
|
||||
results = self.f.fetch('561507')
|
||||
self.assertIn(('review', '561507'), self.cache)
|
||||
self.assertEqual(_data_561507, results._data)
|
||||
self.assertEqual(_data_561507, cache[('review', '561507')])
|
||||
self.assertEqual(_data_561507, self.cache[('review', '561507')])
|
||||
|
||||
def test_in_cache(self):
|
||||
cache = {
|
||||
('review', '561507'): _data_561507,
|
||||
}
|
||||
self.cache[('review', '561507')] = _data_561507
|
||||
with mock.patch('goal_tools.gerrit.query_gerrit') as f:
|
||||
f.side_effect = AssertionError('should not be called')
|
||||
results = gerrit.fetch_review('561507', cache)
|
||||
self.assertIn(('review', '561507'), cache)
|
||||
results = self.f.fetch('561507')
|
||||
self.assertIn(('review', '561507'), self.cache)
|
||||
self.assertEqual(_data_561507, results._data)
|
||||
|
@ -76,8 +76,9 @@ class ListContributions(lister.Lister):
|
||||
parsed_args.governance_project_list)
|
||||
|
||||
roles = parsed_args.role
|
||||
cache = self.app.cache
|
||||
include_unofficial = parsed_args.include_unofficial
|
||||
member_factory = foundation.MemberFactory(self.app.cache)
|
||||
review_factory = gerrit.ReviewFactory(self.app.cache)
|
||||
|
||||
review_ids = utils.unique(
|
||||
gerrit.parse_review_lists(parsed_args.review_list)
|
||||
@ -85,7 +86,7 @@ class ListContributions(lister.Lister):
|
||||
|
||||
for review_id in review_ids:
|
||||
|
||||
review = gerrit.fetch_review(review_id, cache)
|
||||
review = review_factory.fetch(review_id)
|
||||
|
||||
for participant in review.participants:
|
||||
|
||||
@ -106,7 +107,7 @@ class ListContributions(lister.Lister):
|
||||
# Figure out which organization the user was
|
||||
# affiliated with at the time of the work.
|
||||
organization = None
|
||||
member = foundation.fetch_member(participant.email, cache)
|
||||
member = member_factory.fetch(participant.email)
|
||||
if member:
|
||||
affiliation = member.find_affiliation(participant.date)
|
||||
if affiliation:
|
||||
|
Loading…
Reference in New Issue
Block a user