Fix service filtering when claiming clusters
The health manager is not claiming clusters accurately when service is restarted. The reason is that there are some dead service records to be removed next time one of the engines gets restarted. This patch adds a check about the service's status and filters the service records based on the check result. Change-Id: Id482e74068cc1cb4394c79f14f1c6442cf9f717b Partial-Bug: #1613552
This commit is contained in:
parent
7a47a3fd8d
commit
ad8a538094
@ -1188,11 +1188,12 @@ def service_get_all(context):
|
|||||||
# HealthRegistry
|
# HealthRegistry
|
||||||
def registry_claim(context, engine_id):
|
def registry_claim(context, engine_id):
|
||||||
with session_for_write() as session:
|
with session_for_write() as session:
|
||||||
q_eng = session.query(models.Service)
|
engines = session.query(models.Service).all()
|
||||||
svc_ids = [s.id for s in q_eng.all()]
|
svc_ids = [e.id for e in engines if not utils.is_service_dead(e)]
|
||||||
|
|
||||||
q_reg = session.query(models.HealthRegistry)
|
q_reg = session.query(models.HealthRegistry)
|
||||||
q_reg = q_reg.filter(models.HealthRegistry.engine_id.notin_(svc_ids))
|
if svc_ids:
|
||||||
|
q_reg = q_reg.filter(
|
||||||
|
models.HealthRegistry.engine_id.notin_(svc_ids))
|
||||||
q_reg.update({'engine_id': engine_id}, synchronize_session=False)
|
q_reg.update({'engine_id': engine_id}, synchronize_session=False)
|
||||||
result = q_reg.all()
|
result = q_reg.all()
|
||||||
return result
|
return result
|
||||||
|
@ -12,6 +12,9 @@
|
|||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
from oslo_config import cfg
|
||||||
|
from oslo_utils import timeutils
|
||||||
|
|
||||||
|
|
||||||
def exact_filter(query, model, filters):
|
def exact_filter(query, model, filters):
|
||||||
"""Applies exact match filtering to a query.
|
"""Applies exact match filtering to a query.
|
||||||
@ -73,3 +76,11 @@ def get_sort_params(value, default_key=None):
|
|||||||
dirs.append('asc')
|
dirs.append('asc')
|
||||||
|
|
||||||
return keys, dirs
|
return keys, dirs
|
||||||
|
|
||||||
|
|
||||||
|
def is_service_dead(service):
|
||||||
|
"""Check if a given service is dead."""
|
||||||
|
cfg.CONF.import_opt("periodic_interval", "senlin.common.config")
|
||||||
|
max_elapse = 2 * cfg.CONF.periodic_interval
|
||||||
|
|
||||||
|
return timeutils.is_older_than(service.updated_at, max_elapse)
|
||||||
|
@ -10,8 +10,10 @@
|
|||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
|
import mock
|
||||||
|
|
||||||
from senlin.db.sqlalchemy import api as db_api
|
from senlin.db.sqlalchemy import api as db_api
|
||||||
|
from senlin.db.sqlalchemy import utils as db_utils
|
||||||
from senlin.tests.unit.common import base
|
from senlin.tests.unit.common import base
|
||||||
from senlin.tests.unit.common import utils
|
from senlin.tests.unit.common import utils
|
||||||
|
|
||||||
@ -58,6 +60,23 @@ class DBAPIRegistryTest(base.SenlinTestCase):
|
|||||||
self.assertEqual('ENGINE_ID', registries[0].engine_id)
|
self.assertEqual('ENGINE_ID', registries[0].engine_id)
|
||||||
self.assertEqual('ENGINE_ID', registries[1].engine_id)
|
self.assertEqual('ENGINE_ID', registries[1].engine_id)
|
||||||
|
|
||||||
|
@mock.patch.object(db_utils, 'is_service_dead')
|
||||||
|
def test_registry_claim_with_dead_engine(self, mock_check):
|
||||||
|
db_api.service_create(self.ctx, 'SERVICE_ID_DEAD')
|
||||||
|
self._create_registry(
|
||||||
|
cluster_id='CLUSTER_1', check_type='NODE_STATUS_POLLING',
|
||||||
|
interval=60, params={}, engine_id='SERVICE_ID')
|
||||||
|
self._create_registry(
|
||||||
|
cluster_id='CLUSTER_1', check_type='NODE_STATUS_POLLING',
|
||||||
|
interval=60, params={}, engine_id='SERVICE_ID_DEAD')
|
||||||
|
|
||||||
|
mock_check.side_effect = [False, True]
|
||||||
|
|
||||||
|
registries = db_api.registry_claim(self.ctx, engine_id='ENGINE_ID')
|
||||||
|
|
||||||
|
self.assertEqual(1, len(registries))
|
||||||
|
self.assertEqual('ENGINE_ID', registries[0].engine_id)
|
||||||
|
|
||||||
def test_registry_delete(self):
|
def test_registry_delete(self):
|
||||||
registry = self._create_registry('CLUSTER_ID',
|
registry = self._create_registry('CLUSTER_ID',
|
||||||
check_type='NODE_STATUS_POLLING',
|
check_type='NODE_STATUS_POLLING',
|
||||||
|
@ -11,6 +11,8 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import mock
|
import mock
|
||||||
|
from oslo_config import cfg
|
||||||
|
from oslo_utils import timeutils
|
||||||
|
|
||||||
from senlin.db.sqlalchemy import utils
|
from senlin.db.sqlalchemy import utils
|
||||||
from senlin.tests.unit.common import base
|
from senlin.tests.unit.common import base
|
||||||
@ -95,3 +97,22 @@ class SortParamTest(base.SenlinTestCase):
|
|||||||
self.assertEqual(['foo', 'bar', 'id'], keys)
|
self.assertEqual(['foo', 'bar', 'id'], keys)
|
||||||
self.assertEqual(['asc-nullsfirst', 'asc-nullsfirst',
|
self.assertEqual(['asc-nullsfirst', 'asc-nullsfirst',
|
||||||
'asc-nullsfirst'], dirs)
|
'asc-nullsfirst'], dirs)
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceAliveTest(base.SenlinTestCase):
|
||||||
|
|
||||||
|
def test_alive(self):
|
||||||
|
cfg.CONF.set_override('periodic_interval', 100)
|
||||||
|
service = mock.Mock(updated_at=timeutils.utcnow())
|
||||||
|
|
||||||
|
res = utils.is_service_dead(service)
|
||||||
|
|
||||||
|
self.assertFalse(res)
|
||||||
|
|
||||||
|
def test_dead(self):
|
||||||
|
cfg.CONF.set_override('periodic_interval', 0)
|
||||||
|
service = mock.Mock(updated_at=timeutils.utcnow())
|
||||||
|
|
||||||
|
res = utils.is_service_dead(service)
|
||||||
|
|
||||||
|
self.assertTrue(res)
|
||||||
|
Loading…
Reference in New Issue
Block a user