Rework node cache clean up according to recent changes

* After timeout, set error status rather than delete from cache
* Drop too old status information from database

Change-Id: Ice29e04033e4be8e776cce9816c21421f21f30dd
Implements: blueprint get-status-api
This commit is contained in:
Dmitry Tantsur 2015-01-09 18:49:57 +01:00
parent 3f903a7b1e
commit 4ba0dbb78e
5 changed files with 49 additions and 13 deletions

View File

@ -34,7 +34,11 @@
;ports_for_inactive_interfaces = false
; Timeout after which discovery is considered failed, set to 0 to disable.
;timeout = 3600
; Amount of time in seconds, after which repeat clean up of timed out nodes.
; For how much time (in seconds) to keep status information about nodes after
; discovery was finished for them. Default value is 1 week.
;node_status_keep_time = 604800
; Amount of time in seconds, after which repeat clean up of timed out nodes
; and old nodes status information.
;clean_up_period = 60
; Whether to overwrite existing values in node database. In the future
; non-matching ports will be deleted as well. Setting this to true makes

View File

@ -28,6 +28,7 @@ DEFAULTS = {
# Discovery process settings
'ports_for_inactive_interfaces': 'false',
'timeout': '3600',
'node_status_keep_time': '604800',
'clean_up_period': '60',
'overwrite_existing': 'false',
'enable_setting_ipmi_credentials': 'false',

View File

@ -92,12 +92,12 @@ def periodic_update(period):
def periodic_clean_up(period):
while True:
LOG.debug('Running periodic clean up of timed out nodes')
LOG.debug('Running periodic clean up of node cache')
try:
if node_cache.clean_up():
firewall.update_filters()
except Exception:
LOG.exception('Periodic clean up failed')
LOG.exception('Periodic clean up of node cache failed')
eventlet.greenthread.sleep(period)

View File

@ -191,10 +191,20 @@ def find_node(**attributes):
def clean_up():
"""Reset discovery for timed out nodes.
"""Clean up the cache.
* Finish discovery for timed out nodes.
* Drop outdated node status information.
:return: list of timed out node UUID's
"""
status_keep_threshold = (time.time() -
conf.getint('discoverd', 'node_status_keep_time'))
with _db() as db:
db.execute('delete from nodes where finished_at < ?',
(status_keep_threshold,))
timeout = conf.getint('discoverd', 'timeout')
if timeout <= 0:
return []
@ -208,8 +218,9 @@ def clean_up():
return []
LOG.error('Discovery for nodes %s has timed out', uuids)
db.execute('delete from nodes where started_at < ?',
(threshold,))
db.execute('update nodes set finished_at=?, error=? '
'where started_at < ?',
(time.time(), 'Discovery timed out', threshold))
db.executemany('delete from attributes where uuid=?',
[(u,) for u in uuids])

View File

@ -121,9 +121,10 @@ class TestNodeCachePop(test_base.NodeTest):
class TestNodeCacheCleanUp(test_base.NodeTest):
def setUp(self):
super(TestNodeCacheCleanUp, self).setUp()
self.started_at = 100.0
with self.db:
self.db.execute('insert into nodes(uuid, started_at) '
'values(?, ?)', (self.uuid, time.time() - 3600000))
'values(?, ?)', (self.uuid, self.started_at))
self.db.executemany('insert into attributes(name, value, uuid) '
'values(?, ?, ?)',
[('mac', v, self.uuid) for v in self.macs])
@ -133,8 +134,9 @@ class TestNodeCacheCleanUp(test_base.NodeTest):
self.assertFalse(node_cache.clean_up())
self.assertEqual(1, len(self.db.execute(
'select * from nodes').fetchall()))
res = [tuple(row) for row in self.db.execute(
'select finished_at, error from nodes').fetchall()]
self.assertEqual([(None, None)], res)
self.assertEqual(len(self.macs), len(self.db.execute(
'select * from attributes').fetchall()))
@ -144,18 +146,36 @@ class TestNodeCacheCleanUp(test_base.NodeTest):
self.assertFalse(node_cache.clean_up())
self.assertEqual(1, len(self.db.execute(
'select * from nodes').fetchall()))
res = [tuple(row) for row in self.db.execute(
'select finished_at, error from nodes').fetchall()]
self.assertEqual([(None, None)], res)
self.assertEqual(len(self.macs), len(self.db.execute(
'select * from attributes').fetchall()))
def test_cleaned(self):
@mock.patch.object(time, 'time')
def test_timeout(self, time_mock):
conf.CONF.set('discoverd', 'timeout', '99')
time_mock.return_value = self.started_at + 100
self.assertEqual([self.uuid], node_cache.clean_up())
self.assertEqual([], self.db.execute('select * from nodes').fetchall())
res = [tuple(row) for row in self.db.execute(
'select finished_at, error from nodes').fetchall()]
self.assertEqual([(self.started_at + 100, 'Discovery timed out')], res)
self.assertEqual([], self.db.execute(
'select * from attributes').fetchall())
def test_old_status(self):
conf.CONF.set('discoverd', 'node_status_keep_time', '42')
with self.db:
self.db.execute('update nodes set finished_at=?',
(time.time() - 100,))
self.assertEqual([], node_cache.clean_up())
self.assertEqual([], self.db.execute(
'select * from nodes').fetchall())
class TestNodeCacheGetNode(test_base.NodeTest):
def test_ok(self):