From fd23308f6ec20e4441d8da486a4f6930a2ba366d Mon Sep 17 00:00:00 2001 From: Rabi Mishra Date: Mon, 22 Jul 2019 16:36:05 +0530 Subject: [PATCH] Show an engine as down if service record is not updated twice We use the same periodic_interval to update the service record and compare it with current time when doing 'service list'. So, It's possible there will be a small window where 'service list' would show the engine as down. Tools that use service list for monitoring would wrongly assume the service as down. Let's change service list to report the service as down if it's not updated in 2*periodic_interval. Change-Id: I0f6a30e06bb214bb673930b31a2db946600926b0 Task: 35946 --- heat/common/service_utils.py | 12 ++++-------- heat/tests/test_common_service_utils.py | 12 ++++++------ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/heat/common/service_utils.py b/heat/common/service_utils.py index b5cf004851..a59fef8e8b 100644 --- a/heat/common/service_utils.py +++ b/heat/common/service_utils.py @@ -51,14 +51,10 @@ def format_service(service): return status = 'down' - if service.updated_at is not None: - if ((timeutils.utcnow() - service.updated_at).total_seconds() - <= service.report_interval): - status = 'up' - else: - if ((timeutils.utcnow() - service.created_at).total_seconds() - <= service.report_interval): - status = 'up' + last_updated = service.updated_at or service.created_at + check_interval = (timeutils.utcnow() - last_updated).total_seconds() + if check_interval <= 2 * service.report_interval: + status = 'up' result = { SERVICE_ID: service.id, diff --git a/heat/tests/test_common_service_utils.py b/heat/tests/test_common_service_utils.py index 6bad2a83d2..0745e8f858 100644 --- a/heat/tests/test_common_service_utils.py +++ b/heat/tests/test_common_service_utils.py @@ -51,23 +51,23 @@ class TestServiceUtils(common.HeatTestCase): self.assertEqual(service_dict['status'], 'up') - # check again within first report_interval time (60) + # check again within first report_interval time service_dict = service_utils.format_service(service) self.assertEqual(service_dict['status'], 'up') - # check update not happen within report_interval time (60+) + # check update not happen within 2*report_interval time service.created_at = (timeutils.utcnow() - - datetime.timedelta(0, 70)) + datetime.timedelta(0, 130)) service_dict = service_utils.format_service(service) self.assertEqual(service_dict['status'], 'down') - # check update happened after report_interval time (60+) + # check update happened after 2* report_interval time service.updated_at = (timeutils.utcnow() - - datetime.timedelta(0, 70)) + datetime.timedelta(0, 130)) service_dict = service_utils.format_service(service) self.assertEqual(service_dict['status'], 'down') - # check update happened within report_interval time (60) + # check update happened within report_interval time service.updated_at = (timeutils.utcnow() - datetime.timedelta(0, 50)) service_dict = service_utils.format_service(service)