Rework node cache clean up according to recent changes
* After timeout, set error status rather than delete from cache * Drop too old status information from database Change-Id: Ice29e04033e4be8e776cce9816c21421f21f30dd Implements: blueprint get-status-api
This commit is contained in:
parent
3f903a7b1e
commit
4ba0dbb78e
|
@ -34,7 +34,11 @@
|
|||
;ports_for_inactive_interfaces = false
|
||||
; Timeout after which discovery is considered failed, set to 0 to disable.
|
||||
;timeout = 3600
|
||||
; Amount of time in seconds, after which repeat clean up of timed out nodes.
|
||||
; For how much time (in seconds) to keep status information about nodes after
|
||||
; discovery was finished for them. Default value is 1 week.
|
||||
;node_status_keep_time = 604800
|
||||
; Amount of time in seconds, after which repeat clean up of timed out nodes
|
||||
; and old nodes status information.
|
||||
;clean_up_period = 60
|
||||
; Whether to overwrite existing values in node database. In the future
|
||||
; non-matching ports will be deleted as well. Setting this to true makes
|
||||
|
|
|
@ -28,6 +28,7 @@ DEFAULTS = {
|
|||
# Discovery process settings
|
||||
'ports_for_inactive_interfaces': 'false',
|
||||
'timeout': '3600',
|
||||
'node_status_keep_time': '604800',
|
||||
'clean_up_period': '60',
|
||||
'overwrite_existing': 'false',
|
||||
'enable_setting_ipmi_credentials': 'false',
|
||||
|
|
|
@ -92,12 +92,12 @@ def periodic_update(period):
|
|||
|
||||
def periodic_clean_up(period):
|
||||
while True:
|
||||
LOG.debug('Running periodic clean up of timed out nodes')
|
||||
LOG.debug('Running periodic clean up of node cache')
|
||||
try:
|
||||
if node_cache.clean_up():
|
||||
firewall.update_filters()
|
||||
except Exception:
|
||||
LOG.exception('Periodic clean up failed')
|
||||
LOG.exception('Periodic clean up of node cache failed')
|
||||
eventlet.greenthread.sleep(period)
|
||||
|
||||
|
||||
|
|
|
@ -191,10 +191,20 @@ def find_node(**attributes):
|
|||
|
||||
|
||||
def clean_up():
|
||||
"""Reset discovery for timed out nodes.
|
||||
"""Clean up the cache.
|
||||
|
||||
* Finish discovery for timed out nodes.
|
||||
* Drop outdated node status information.
|
||||
|
||||
:return: list of timed out node UUID's
|
||||
"""
|
||||
status_keep_threshold = (time.time() -
|
||||
conf.getint('discoverd', 'node_status_keep_time'))
|
||||
|
||||
with _db() as db:
|
||||
db.execute('delete from nodes where finished_at < ?',
|
||||
(status_keep_threshold,))
|
||||
|
||||
timeout = conf.getint('discoverd', 'timeout')
|
||||
if timeout <= 0:
|
||||
return []
|
||||
|
@ -208,8 +218,9 @@ def clean_up():
|
|||
return []
|
||||
|
||||
LOG.error('Discovery for nodes %s has timed out', uuids)
|
||||
db.execute('delete from nodes where started_at < ?',
|
||||
(threshold,))
|
||||
db.execute('update nodes set finished_at=?, error=? '
|
||||
'where started_at < ?',
|
||||
(time.time(), 'Discovery timed out', threshold))
|
||||
db.executemany('delete from attributes where uuid=?',
|
||||
[(u,) for u in uuids])
|
||||
|
||||
|
|
|
@ -121,9 +121,10 @@ class TestNodeCachePop(test_base.NodeTest):
|
|||
class TestNodeCacheCleanUp(test_base.NodeTest):
|
||||
def setUp(self):
|
||||
super(TestNodeCacheCleanUp, self).setUp()
|
||||
self.started_at = 100.0
|
||||
with self.db:
|
||||
self.db.execute('insert into nodes(uuid, started_at) '
|
||||
'values(?, ?)', (self.uuid, time.time() - 3600000))
|
||||
'values(?, ?)', (self.uuid, self.started_at))
|
||||
self.db.executemany('insert into attributes(name, value, uuid) '
|
||||
'values(?, ?, ?)',
|
||||
[('mac', v, self.uuid) for v in self.macs])
|
||||
|
@ -133,8 +134,9 @@ class TestNodeCacheCleanUp(test_base.NodeTest):
|
|||
|
||||
self.assertFalse(node_cache.clean_up())
|
||||
|
||||
self.assertEqual(1, len(self.db.execute(
|
||||
'select * from nodes').fetchall()))
|
||||
res = [tuple(row) for row in self.db.execute(
|
||||
'select finished_at, error from nodes').fetchall()]
|
||||
self.assertEqual([(None, None)], res)
|
||||
self.assertEqual(len(self.macs), len(self.db.execute(
|
||||
'select * from attributes').fetchall()))
|
||||
|
||||
|
@ -144,18 +146,36 @@ class TestNodeCacheCleanUp(test_base.NodeTest):
|
|||
|
||||
self.assertFalse(node_cache.clean_up())
|
||||
|
||||
self.assertEqual(1, len(self.db.execute(
|
||||
'select * from nodes').fetchall()))
|
||||
res = [tuple(row) for row in self.db.execute(
|
||||
'select finished_at, error from nodes').fetchall()]
|
||||
self.assertEqual([(None, None)], res)
|
||||
self.assertEqual(len(self.macs), len(self.db.execute(
|
||||
'select * from attributes').fetchall()))
|
||||
|
||||
def test_cleaned(self):
|
||||
@mock.patch.object(time, 'time')
|
||||
def test_timeout(self, time_mock):
|
||||
conf.CONF.set('discoverd', 'timeout', '99')
|
||||
time_mock.return_value = self.started_at + 100
|
||||
|
||||
self.assertEqual([self.uuid], node_cache.clean_up())
|
||||
|
||||
self.assertEqual([], self.db.execute('select * from nodes').fetchall())
|
||||
res = [tuple(row) for row in self.db.execute(
|
||||
'select finished_at, error from nodes').fetchall()]
|
||||
self.assertEqual([(self.started_at + 100, 'Discovery timed out')], res)
|
||||
self.assertEqual([], self.db.execute(
|
||||
'select * from attributes').fetchall())
|
||||
|
||||
def test_old_status(self):
|
||||
conf.CONF.set('discoverd', 'node_status_keep_time', '42')
|
||||
with self.db:
|
||||
self.db.execute('update nodes set finished_at=?',
|
||||
(time.time() - 100,))
|
||||
|
||||
self.assertEqual([], node_cache.clean_up())
|
||||
|
||||
self.assertEqual([], self.db.execute(
|
||||
'select * from nodes').fetchall())
|
||||
|
||||
|
||||
class TestNodeCacheGetNode(test_base.NodeTest):
|
||||
def test_ok(self):
|
||||
|
|
Loading…
Reference in New Issue