Implement timeout for discovery

Unfortunately, we can't really notify that we timed out, because
we can't set Ironic internal fields like 'last_error'. So we rely
on Ironic to do it's own timeout.

Change-Id: I5901f8e80b59d53975b6b265fba61e965884bb31
Closes-Bug: #1391871
This commit is contained in:
Dmitry Tantsur 2014-11-26 18:09:49 +01:00
parent dafa4d0013
commit 5d51e82c50
6 changed files with 88 additions and 0 deletions

View File

@ -192,6 +192,7 @@ Change Log
v1.0.0
~~~~~~
* Discovery now times out by default.
* Add support for plugins that hook into data processing pipeline, see
`plugin-architecture blueprint`_ for details.
* Cache nodes under discovery in a local SQLite database. Set ``database``

View File

@ -24,6 +24,10 @@
; for all interfaces. You should leave it as false, unless you encounter any
; bugs with this behavior.
;ports_for_inactive_interfaces = false
; Timeout after which discovery is considered failed, set to 0 to disable.
;timeout = 3600
; Amount of time in seconds, after which repeat clean up of timed out nodes.
;firewall_update_period = 60
; IP to listen on.
;listen_address = 0.0.0.0

View File

@ -26,6 +26,8 @@ DEFAULTS = {
'ironic_retry_period': '5',
'database': '',
'processing_hooks': '',
'timeout': '3600',
'clean_up_period': '60',
}

View File

@ -73,6 +73,17 @@ def periodic_update(period):
eventlet.greenthread.sleep(period)
def periodic_clean_up(period):
while True:
LOG.debug('Running periodic clean up of timed out nodes')
try:
if node_cache.clean_up():
firewall.update_filters()
except Exception:
LOG.exception('Periodic clean up failed')
eventlet.greenthread.sleep(period)
def main():
if len(sys.argv) < 2:
sys.exit("Usage: %s config-file" % sys.argv[0])
@ -94,6 +105,8 @@ def main():
period = conf.getint('discoverd', 'firewall_update_period')
eventlet.greenthread.spawn_n(periodic_update, period)
period = conf.getint('discoverd', 'clean_up_period')
eventlet.greenthread.spawn_n(periodic_clean_up, period)
app.run(debug=debug,
host=conf.get('discoverd', 'listen_address'),

View File

@ -142,3 +142,30 @@ def pop_node(**attributes):
uuid = found.pop()
drop_node(uuid)
return uuid
def clean_up():
"""Reset discovery for timed out nodes.
:return: list of timed out node UUID's
"""
timeout = conf.getint('discoverd', 'timeout')
if timeout <= 0:
LOG.debug('Timeout is disabled')
return []
threshold = time.time() - timeout
with _db() as db:
uuids = [row[0] for row in db.execute('select uuid from nodes '
'where started_at < ?',
(threshold,))]
if not uuids:
return []
LOG.error('Discovery for nodes %s has timed out', uuids)
db.execute('delete from nodes where started_at < ?',
(threshold,))
db.executemany('delete from attributes where uuid=?',
[(u,) for u in uuids])
return uuids

View File

@ -579,5 +579,46 @@ class TestPlugins(unittest.TestCase):
mock_post.assert_called_once_with(ANY, 'node', ['port'], 'node_info')
class TestNodeCacheCleanUp(BaseTest):
def setUp(self):
super(TestNodeCacheCleanUp, self).setUp()
self.uuid = 'uuid'
self.macs = ['11:22:33:44:55:66', '66:55:44:33:22:11']
with self.db:
self.db.execute('insert into nodes(uuid, started_at) '
'values(?, ?)', (self.uuid, time.time() - 3600000))
self.db.executemany('insert into attributes(name, value, uuid) '
'values(?, ?, ?)',
[('mac', v, self.uuid) for v in self.macs])
def test_no_timeout(self):
conf.CONF.set('discoverd', 'timeout', '0')
self.assertFalse(node_cache.clean_up())
self.assertEqual(1, len(self.db.execute(
'select * from nodes').fetchall()))
self.assertEqual(len(self.macs), len(self.db.execute(
'select * from attributes').fetchall()))
@patch.object(time, 'time')
def test_ok(self, time_mock):
time_mock.return_value = 1000
self.assertFalse(node_cache.clean_up())
self.assertEqual(1, len(self.db.execute(
'select * from nodes').fetchall()))
self.assertEqual(len(self.macs), len(self.db.execute(
'select * from attributes').fetchall()))
def test_cleaned(self):
self.assertEqual([self.uuid], node_cache.clean_up())
self.assertEqual([], self.db.execute('select * from nodes').fetchall())
self.assertEqual([], self.db.execute(
'select * from attributes').fetchall())
if __name__ == '__main__':
unittest.main()