Implement timeout for discovery
Unfortunately, we can't really notify that we timed out, because we can't set Ironic internal fields like 'last_error'. So we rely on Ironic to do it's own timeout. Change-Id: I5901f8e80b59d53975b6b265fba61e965884bb31 Closes-Bug: #1391871
This commit is contained in:
parent
dafa4d0013
commit
5d51e82c50
|
@ -192,6 +192,7 @@ Change Log
|
|||
v1.0.0
|
||||
~~~~~~
|
||||
|
||||
* Discovery now times out by default.
|
||||
* Add support for plugins that hook into data processing pipeline, see
|
||||
`plugin-architecture blueprint`_ for details.
|
||||
* Cache nodes under discovery in a local SQLite database. Set ``database``
|
||||
|
|
|
@ -24,6 +24,10 @@
|
|||
; for all interfaces. You should leave it as false, unless you encounter any
|
||||
; bugs with this behavior.
|
||||
;ports_for_inactive_interfaces = false
|
||||
; Timeout after which discovery is considered failed, set to 0 to disable.
|
||||
;timeout = 3600
|
||||
; Amount of time in seconds, after which repeat clean up of timed out nodes.
|
||||
;firewall_update_period = 60
|
||||
|
||||
; IP to listen on.
|
||||
;listen_address = 0.0.0.0
|
||||
|
|
|
@ -26,6 +26,8 @@ DEFAULTS = {
|
|||
'ironic_retry_period': '5',
|
||||
'database': '',
|
||||
'processing_hooks': '',
|
||||
'timeout': '3600',
|
||||
'clean_up_period': '60',
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -73,6 +73,17 @@ def periodic_update(period):
|
|||
eventlet.greenthread.sleep(period)
|
||||
|
||||
|
||||
def periodic_clean_up(period):
|
||||
while True:
|
||||
LOG.debug('Running periodic clean up of timed out nodes')
|
||||
try:
|
||||
if node_cache.clean_up():
|
||||
firewall.update_filters()
|
||||
except Exception:
|
||||
LOG.exception('Periodic clean up failed')
|
||||
eventlet.greenthread.sleep(period)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
sys.exit("Usage: %s config-file" % sys.argv[0])
|
||||
|
@ -94,6 +105,8 @@ def main():
|
|||
|
||||
period = conf.getint('discoverd', 'firewall_update_period')
|
||||
eventlet.greenthread.spawn_n(periodic_update, period)
|
||||
period = conf.getint('discoverd', 'clean_up_period')
|
||||
eventlet.greenthread.spawn_n(periodic_clean_up, period)
|
||||
|
||||
app.run(debug=debug,
|
||||
host=conf.get('discoverd', 'listen_address'),
|
||||
|
|
|
@ -142,3 +142,30 @@ def pop_node(**attributes):
|
|||
uuid = found.pop()
|
||||
drop_node(uuid)
|
||||
return uuid
|
||||
|
||||
|
||||
def clean_up():
|
||||
"""Reset discovery for timed out nodes.
|
||||
|
||||
:return: list of timed out node UUID's
|
||||
"""
|
||||
timeout = conf.getint('discoverd', 'timeout')
|
||||
if timeout <= 0:
|
||||
LOG.debug('Timeout is disabled')
|
||||
return []
|
||||
|
||||
threshold = time.time() - timeout
|
||||
with _db() as db:
|
||||
uuids = [row[0] for row in db.execute('select uuid from nodes '
|
||||
'where started_at < ?',
|
||||
(threshold,))]
|
||||
if not uuids:
|
||||
return []
|
||||
|
||||
LOG.error('Discovery for nodes %s has timed out', uuids)
|
||||
db.execute('delete from nodes where started_at < ?',
|
||||
(threshold,))
|
||||
db.executemany('delete from attributes where uuid=?',
|
||||
[(u,) for u in uuids])
|
||||
|
||||
return uuids
|
||||
|
|
|
@ -579,5 +579,46 @@ class TestPlugins(unittest.TestCase):
|
|||
mock_post.assert_called_once_with(ANY, 'node', ['port'], 'node_info')
|
||||
|
||||
|
||||
class TestNodeCacheCleanUp(BaseTest):
|
||||
def setUp(self):
|
||||
super(TestNodeCacheCleanUp, self).setUp()
|
||||
self.uuid = 'uuid'
|
||||
self.macs = ['11:22:33:44:55:66', '66:55:44:33:22:11']
|
||||
with self.db:
|
||||
self.db.execute('insert into nodes(uuid, started_at) '
|
||||
'values(?, ?)', (self.uuid, time.time() - 3600000))
|
||||
self.db.executemany('insert into attributes(name, value, uuid) '
|
||||
'values(?, ?, ?)',
|
||||
[('mac', v, self.uuid) for v in self.macs])
|
||||
|
||||
def test_no_timeout(self):
|
||||
conf.CONF.set('discoverd', 'timeout', '0')
|
||||
|
||||
self.assertFalse(node_cache.clean_up())
|
||||
|
||||
self.assertEqual(1, len(self.db.execute(
|
||||
'select * from nodes').fetchall()))
|
||||
self.assertEqual(len(self.macs), len(self.db.execute(
|
||||
'select * from attributes').fetchall()))
|
||||
|
||||
@patch.object(time, 'time')
|
||||
def test_ok(self, time_mock):
|
||||
time_mock.return_value = 1000
|
||||
|
||||
self.assertFalse(node_cache.clean_up())
|
||||
|
||||
self.assertEqual(1, len(self.db.execute(
|
||||
'select * from nodes').fetchall()))
|
||||
self.assertEqual(len(self.macs), len(self.db.execute(
|
||||
'select * from attributes').fetchall()))
|
||||
|
||||
def test_cleaned(self):
|
||||
self.assertEqual([self.uuid], node_cache.clean_up())
|
||||
|
||||
self.assertEqual([], self.db.execute('select * from nodes').fetchall())
|
||||
self.assertEqual([], self.db.execute(
|
||||
'select * from attributes').fetchall())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Loading…
Reference in New Issue