Rework node cache clean up according to recent changes

* After timeout, set error status rather than delete from cache * Drop too old status information from database Change-Id: Ice29e04033e4be8e776cce9816c21421f21f30dd Implements: blueprint get-status-api
2015-01-09 18:49:57 +01:00 · 2015-01-09 18:49:57 +01:00 · 4ba0dbb78e
parent 3f903a7b1e
commit 4ba0dbb78e
5 changed files with 49 additions and 13 deletions
--- a/example.conf
+++ b/example.conf
@ -34,7 +34,11 @@
 ;ports_for_inactive_interfaces = false
 ; Timeout after which discovery is considered failed, set to 0 to disable.
 ;timeout = 3600
-; Amount of time in seconds, after which repeat clean up of timed out nodes.
+; For how much time (in seconds) to keep status information about nodes after
+; discovery was finished for them. Default value is 1 week.
+;node_status_keep_time = 604800
+; Amount of time in seconds, after which repeat clean up of timed out nodes
+; and old nodes status information.
 ;clean_up_period = 60
 ; Whether to overwrite existing values in node database. In the future
 ; non-matching ports will be deleted as well. Setting this to true makes
--- a/ironic_discoverd/conf.py
+++ b/ironic_discoverd/conf.py
@ -28,6 +28,7 @@ DEFAULTS = {
    # Discovery process settings
    'ports_for_inactive_interfaces': 'false',
    'timeout': '3600',
+    'node_status_keep_time': '604800',
    'clean_up_period': '60',
    'overwrite_existing': 'false',
    'enable_setting_ipmi_credentials': 'false',
--- a/ironic_discoverd/main.py
+++ b/ironic_discoverd/main.py
@ -92,12 +92,12 @@ def periodic_update(period):

 def periodic_clean_up(period):
    while True:
-        LOG.debug('Running periodic clean up of timed out nodes')
+        LOG.debug('Running periodic clean up of node cache')
        try:
            if node_cache.clean_up():
                firewall.update_filters()
        except Exception:
-            LOG.exception('Periodic clean up failed')
+            LOG.exception('Periodic clean up of node cache failed')
        eventlet.greenthread.sleep(period)


--- a/ironic_discoverd/node_cache.py
+++ b/ironic_discoverd/node_cache.py
@ -191,10 +191,20 @@ def find_node(**attributes):


 def clean_up():
-    """Reset discovery for timed out nodes.
+    """Clean up the cache.
+
+    * Finish discovery for timed out nodes.
+    * Drop outdated node status information.

    :return: list of timed out node UUID's
    """
+    status_keep_threshold = (time.time() -
+                             conf.getint('discoverd', 'node_status_keep_time'))
+
+    with _db() as db:
+        db.execute('delete from nodes where finished_at < ?',
+                   (status_keep_threshold,))
+
    timeout = conf.getint('discoverd', 'timeout')
    if timeout <= 0:
        return []
@ -208,8 +218,9 @@ def clean_up():
            return []

        LOG.error('Discovery for nodes %s has timed out', uuids)
-        db.execute('delete from nodes where started_at < ?',
-                   (threshold,))
+        db.execute('update nodes set finished_at=?, error=? '
+                   'where started_at < ?',
+                   (time.time(), 'Discovery timed out', threshold))
        db.executemany('delete from attributes where uuid=?',
                       [(u,) for u in uuids])

--- a/ironic_discoverd/test/test_node_cache.py
+++ b/ironic_discoverd/test/test_node_cache.py
@ -121,9 +121,10 @@ class TestNodeCachePop(test_base.NodeTest):
 class TestNodeCacheCleanUp(test_base.NodeTest):
    def setUp(self):
        super(TestNodeCacheCleanUp, self).setUp()
+        self.started_at = 100.0
        with self.db:
            self.db.execute('insert into nodes(uuid, started_at) '
-                            'values(?, ?)', (self.uuid, time.time() - 3600000))
+                            'values(?, ?)', (self.uuid, self.started_at))
            self.db.executemany('insert into attributes(name, value, uuid) '
                                'values(?, ?, ?)',
                                [('mac', v, self.uuid) for v in self.macs])
@ -133,8 +134,9 @@ class TestNodeCacheCleanUp(test_base.NodeTest):

        self.assertFalse(node_cache.clean_up())

-        self.assertEqual(1, len(self.db.execute(
-            'select * from nodes').fetchall()))
+        res = [tuple(row) for row in self.db.execute(
+            'select finished_at, error from nodes').fetchall()]
+        self.assertEqual([(None, None)], res)
        self.assertEqual(len(self.macs), len(self.db.execute(
            'select * from attributes').fetchall()))

@ -144,18 +146,36 @@ class TestNodeCacheCleanUp(test_base.NodeTest):

        self.assertFalse(node_cache.clean_up())

-        self.assertEqual(1, len(self.db.execute(
-            'select * from nodes').fetchall()))
+        res = [tuple(row) for row in self.db.execute(
+            'select finished_at, error from nodes').fetchall()]
+        self.assertEqual([(None, None)], res)
        self.assertEqual(len(self.macs), len(self.db.execute(
            'select * from attributes').fetchall()))

-    def test_cleaned(self):
+    @mock.patch.object(time, 'time')
+    def test_timeout(self, time_mock):
+        conf.CONF.set('discoverd', 'timeout', '99')
+        time_mock.return_value = self.started_at + 100
+
        self.assertEqual([self.uuid], node_cache.clean_up())

-        self.assertEqual([], self.db.execute('select * from nodes').fetchall())
+        res = [tuple(row) for row in self.db.execute(
+            'select finished_at, error from nodes').fetchall()]
+        self.assertEqual([(self.started_at + 100, 'Discovery timed out')], res)
        self.assertEqual([], self.db.execute(
            'select * from attributes').fetchall())

+    def test_old_status(self):
+        conf.CONF.set('discoverd', 'node_status_keep_time', '42')
+        with self.db:
+            self.db.execute('update nodes set finished_at=?',
+                            (time.time() - 100,))
+
+        self.assertEqual([], node_cache.clean_up())
+
+        self.assertEqual([], self.db.execute(
+            'select * from nodes').fetchall())
+

 class TestNodeCacheGetNode(test_base.NodeTest):
    def test_ok(self):