add an in-memory cache

Processing large data sets with frequent duplication performs better
when we use an in-memory cache to supplement the on-disk cache. The
lru_cache decorator in Python requires all arguments to the decorated
function to be hashable, so this patch converts the existing factory
functions to classes so the fetch() methods can be decorated and still
have access to the underlying cache object.

Change-Id: I1144874e76b5db8fd125679da3a4b5b9ec4540cd
Signed-off-by: Doug Hellmann <doug@doughellmann.com>
This commit is contained in:
Doug Hellmann 2018-04-30 15:40:02 -04:00
parent 6e76c3a5a0
commit 505c77728a
5 changed files with 85 additions and 65 deletions

View File

@ -11,6 +11,7 @@
# under the License.
import datetime
import functools
import logging
from goal_tools import apis
@ -130,23 +131,29 @@ def lookup_member(email):
return None
def fetch_member(email, cache):
"""Find the member in the cache or look it up in the API.
class MemberFactory:
:param email: Email address of the member to look for.
:type email: str
:param cache: Storage for repeated lookups.
:type cache: goal_tools.cache.Cache
def __init__(self, cache):
self._cache = cache
"""
key = ('member', email)
if key in cache:
LOG.debug('found %s cached', email)
data = cache[key]
else:
data = lookup_member(email)
@functools.lru_cache(maxsize=1024)
def fetch(self, email):
"""Find the member in the cache or look it up in the API.
:param email: Email address of the member to look for.
:type email: str
:param cache: Storage for repeated lookups.
:type cache: goal_tools.cache.Cache
"""
key = ('member', email)
if key in self._cache:
LOG.debug('found %s cached', email)
data = self._cache[key]
else:
data = lookup_member(email)
if data:
self._cache[key] = data
if data:
cache[key] = data
if data:
return Member(email, data)
return None
return Member(email, data)
return None

View File

@ -13,6 +13,7 @@
import collections
import datetime
import fileinput
import functools
import logging
import urllib.parse
@ -113,6 +114,8 @@ class Review:
@property
def owner(self):
owner = self._data['owner']
if 'email' not in owner:
owner['email'] = owner.get('email', 'no-reply@openstack.org')
return Participant(
'owner',
owner['name'],
@ -142,6 +145,8 @@ class Review:
for revision in revisions:
uploader = revision['uploader']
if 'email' not in uploader:
uploader['email'] = 'no-reply@openstack.org'
if uploader['email'] in known_uploaders:
# Ignore duplicates
continue
@ -202,28 +207,34 @@ def cache_review(review_id, data, cache):
cache[('review', str(review_id))] = data
def fetch_review(review_id, cache):
"""Find the review in the cache or look it up in the API.
class ReviewFactory:
Review data is only cached if the review is MERGED because
otherwise it is more likely to change.
def __init__(self, cache):
self._cache = cache
:param review_id: Review ID of the review to look for.
:type review_id: str
:param cache: Storage for repeated lookups.
:type cache: goal_tools.cache.Cache
@functools.lru_cache(maxsize=1024)
def fetch(self, review_id):
"""Find the review in the cache or look it up in the API.
"""
key = ('review', str(review_id))
if key in cache:
LOG.debug('found %s cached', review_id)
return Review(review_id, cache[key])
data = query_gerrit(
'changes/' + review_id + '/detail',
params={
'o': QUERY_OPTIONS,
},
)
response = Review(review_id, data)
cache_review(review_id, data, cache)
return response
Review data is only cached if the review is MERGED because
otherwise it is more likely to change.
:param review_id: Review ID of the review to look for.
:type review_id: str
:param cache: Storage for repeated lookups.
:type cache: goal_tools.cache.Cache
"""
key = ('review', str(review_id))
if key in self._cache:
LOG.debug('found %s cached', review_id)
return Review(review_id, self._cache[key])
data = query_gerrit(
'changes/' + review_id + '/detail',
params={
'o': QUERY_OPTIONS,
},
)
response = Review(review_id, data)
cache_review(review_id, data, self._cache)
return response

View File

@ -79,24 +79,24 @@ class TestFoundationMember(base.TestCase):
class TestFetchMember(base.TestCase):
def setUp(self):
super().setUp()
self.cache = {}
self.f = foundation.MemberFactory(self.cache)
def test_not_in_cache(self):
cache = {}
with mock.patch('goal_tools.foundation.lookup_member') as f:
f.return_value = _member_data
results = foundation.fetch_member(
'doug@doughellmann.com', cache)
self.assertIn(('member', 'doug@doughellmann.com'), cache)
results = self.f.fetch('doug@doughellmann.com')
self.assertIn(('member', 'doug@doughellmann.com'), self.cache)
self.assertEqual(_member_data, results._data)
self.assertEqual(_member_data,
cache[('member', 'doug@doughellmann.com')])
self.cache[('member', 'doug@doughellmann.com')])
def test_in_cache(self):
cache = {
('member', 'doug@doughellmann.com'): _member_data,
}
self.cache[('member', 'doug@doughellmann.com')] = _member_data
with mock.patch('goal_tools.foundation.lookup_member') as f:
f.side_effect = AssertionError('should not be called')
results = foundation.fetch_member(
'doug@doughellmann.com', cache)
self.assertIn(('member', 'doug@doughellmann.com'), cache)
results = self.f.fetch('doug@doughellmann.com')
self.assertIn(('member', 'doug@doughellmann.com'), self.cache)
self.assertEqual(_member_data, results._data)

View File

@ -154,29 +154,30 @@ class TestReview(base.TestCase):
class TestFetchReview(base.TestCase):
def setUp(self):
super().setUp()
self.cache = {}
self.f = gerrit.ReviewFactory(self.cache)
def test_not_in_cache_new(self):
cache = {}
with mock.patch('goal_tools.gerrit.query_gerrit') as f:
f.return_value = _data_55535
results = gerrit.fetch_review('55535', cache)
self.assertNotIn(('review', '55535'), cache)
results = self.f.fetch('55535')
self.assertNotIn(('review', '55535'), self.cache)
self.assertEqual(_data_55535, results._data)
def test_not_in_cache_merged(self):
cache = {}
with mock.patch('goal_tools.gerrit.query_gerrit') as f:
f.return_value = _data_561507
results = gerrit.fetch_review('561507', cache)
self.assertIn(('review', '561507'), cache)
results = self.f.fetch('561507')
self.assertIn(('review', '561507'), self.cache)
self.assertEqual(_data_561507, results._data)
self.assertEqual(_data_561507, cache[('review', '561507')])
self.assertEqual(_data_561507, self.cache[('review', '561507')])
def test_in_cache(self):
cache = {
('review', '561507'): _data_561507,
}
self.cache[('review', '561507')] = _data_561507
with mock.patch('goal_tools.gerrit.query_gerrit') as f:
f.side_effect = AssertionError('should not be called')
results = gerrit.fetch_review('561507', cache)
self.assertIn(('review', '561507'), cache)
results = self.f.fetch('561507')
self.assertIn(('review', '561507'), self.cache)
self.assertEqual(_data_561507, results._data)

View File

@ -76,8 +76,9 @@ class ListContributions(lister.Lister):
parsed_args.governance_project_list)
roles = parsed_args.role
cache = self.app.cache
include_unofficial = parsed_args.include_unofficial
member_factory = foundation.MemberFactory(self.app.cache)
review_factory = gerrit.ReviewFactory(self.app.cache)
review_ids = utils.unique(
gerrit.parse_review_lists(parsed_args.review_list)
@ -85,7 +86,7 @@ class ListContributions(lister.Lister):
for review_id in review_ids:
review = gerrit.fetch_review(review_id, cache)
review = review_factory.fetch(review_id)
for participant in review.participants:
@ -106,7 +107,7 @@ class ListContributions(lister.Lister):
# Figure out which organization the user was
# affiliated with at the time of the work.
organization = None
member = foundation.fetch_member(participant.email, cache)
member = member_factory.fetch(participant.email)
if member:
affiliation = member.find_affiliation(participant.date)
if affiliation: