Show an engine as down if service record is not updated twice

We use the same periodic_interval to update the service record and compare it with current time when doing 'service list'. So, It's possible there will be a small window where 'service list' would show the engine as down. Tools that use service list for monitoring would wrongly assume the service as down. Let's change service list to report the service as down if it's not updated in 2*periodic_interval. Change-Id: I0f6a30e06bb214bb673930b31a2db946600926b0 Task: 35946
2019-07-22 16:36:05 +05:30 · 2019-07-22 16:36:05 +05:30 · fd23308f6e
commit fd23308f6e
parent 5a403d7090
2 changed files with 10 additions and 14 deletions
--- a/heat/common/service_utils.py
+++ b/heat/common/service_utils.py
@ -51,14 +51,10 @@ def format_service(service):
        return

    status = 'down'
-    if service.updated_at is not None:
-        if ((timeutils.utcnow() - service.updated_at).total_seconds()
-                <= service.report_interval):
-            status = 'up'
-    else:
-        if ((timeutils.utcnow() - service.created_at).total_seconds()
-                <= service.report_interval):
-            status = 'up'
+    last_updated = service.updated_at or service.created_at
+    check_interval = (timeutils.utcnow() - last_updated).total_seconds()
+    if check_interval <= 2 * service.report_interval:
+        status = 'up'

    result = {
        SERVICE_ID: service.id,
--- a/heat/tests/test_common_service_utils.py
+++ b/heat/tests/test_common_service_utils.py
@ -51,23 +51,23 @@ class TestServiceUtils(common.HeatTestCase):

        self.assertEqual(service_dict['status'], 'up')

-        # check again within first report_interval time (60)
+        # check again within first report_interval time
        service_dict = service_utils.format_service(service)
        self.assertEqual(service_dict['status'], 'up')

-        # check update not happen within report_interval time (60+)
+        # check update not happen within 2*report_interval time
        service.created_at = (timeutils.utcnow() -
-                              datetime.timedelta(0, 70))
+                              datetime.timedelta(0, 130))
        service_dict = service_utils.format_service(service)
        self.assertEqual(service_dict['status'], 'down')

-        # check update happened after report_interval time (60+)
+        # check update happened after 2* report_interval time
        service.updated_at = (timeutils.utcnow() -
-                              datetime.timedelta(0, 70))
+                              datetime.timedelta(0, 130))
        service_dict = service_utils.format_service(service)
        self.assertEqual(service_dict['status'], 'down')

-        # check update happened within report_interval time (60)
+        # check update happened within report_interval time
        service.updated_at = (timeutils.utcnow() -
                              datetime.timedelta(0, 50))
        service_dict = service_utils.format_service(service)