Browse Source

Show an engine as down if service record is not updated twice

We use the same periodic_interval to update the service record
and compare it with current time when doing 'service list'. So, It's
possible there will be a small window where 'service list' would show
the engine as down. Tools that use service list for monitoring would
wrongly assume the service as down. Let's change service list to
report the service as down if it's not updated in 2*periodic_interval.

Change-Id: I0f6a30e06bb214bb673930b31a2db946600926b0
Task: 35946
changes/16/672016/4
Rabi Mishra 2 years ago
parent
commit
fd23308f6e
  1. 12
      heat/common/service_utils.py
  2. 12
      heat/tests/test_common_service_utils.py

12
heat/common/service_utils.py

@ -51,14 +51,10 @@ def format_service(service):
return
status = 'down'
if service.updated_at is not None:
if ((timeutils.utcnow() - service.updated_at).total_seconds()
<= service.report_interval):
status = 'up'
else:
if ((timeutils.utcnow() - service.created_at).total_seconds()
<= service.report_interval):
status = 'up'
last_updated = service.updated_at or service.created_at
check_interval = (timeutils.utcnow() - last_updated).total_seconds()
if check_interval <= 2 * service.report_interval:
status = 'up'
result = {
SERVICE_ID: service.id,

12
heat/tests/test_common_service_utils.py

@ -51,23 +51,23 @@ class TestServiceUtils(common.HeatTestCase):
self.assertEqual(service_dict['status'], 'up')
# check again within first report_interval time (60)
# check again within first report_interval time
service_dict = service_utils.format_service(service)
self.assertEqual(service_dict['status'], 'up')
# check update not happen within report_interval time (60+)
# check update not happen within 2*report_interval time
service.created_at = (timeutils.utcnow() -
datetime.timedelta(0, 70))
datetime.timedelta(0, 130))
service_dict = service_utils.format_service(service)
self.assertEqual(service_dict['status'], 'down')
# check update happened after report_interval time (60+)
# check update happened after 2* report_interval time
service.updated_at = (timeutils.utcnow() -
datetime.timedelta(0, 70))
datetime.timedelta(0, 130))
service_dict = service_utils.format_service(service)
self.assertEqual(service_dict['status'], 'down')
# check update happened within report_interval time (60)
# check update happened within report_interval time
service.updated_at = (timeutils.utcnow() -
datetime.timedelta(0, 50))
service_dict = service_utils.format_service(service)

Loading…
Cancel
Save