Fix performance of housekeeping DB clean up
The Housekeeping service grows in utilization of CPU as more amphorae are created and/or marked as DELETED. The problem lays on the SELECT statement constructed in get_all_deleted_expiring_amphora via the ORM -- it is joined eager loading all relationships. The task does not need such amount of information, only the amphora ID. The statement could be simplified by not loading any relationship or, at most, lazy loading them. This patch also fixes performance of cleaning up deleted and expired load balancers. The code was doing multiple round-trips to the database unecessarily: 1. retrieving all deleted LBs 2. for each LB, retrieving it again from DB to check expired date 3. delete LB Step 1 and 2 are now condensed in get_all_deleted_expiring(), making it a single round-trip. Story: 2004665 Task: 28643 Change-Id: Iffc960c7c3a986328cfded1b4e408931ab0a7877
This commit is contained in:
parent
9ce614ad84
commit
be552d99a4
@ -19,7 +19,6 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from sqlalchemy.orm import exc as sqlalchemy_exceptions
|
||||
|
||||
from octavia.common import constants
|
||||
from octavia.controller.worker import controller_worker as cw
|
||||
from octavia.db import api as db_api
|
||||
from octavia.db import repositories as repo
|
||||
@ -72,24 +71,24 @@ class DatabaseCleanup(object):
|
||||
seconds=CONF.house_keeping.amphora_expiry_age)
|
||||
|
||||
session = db_api.get_session()
|
||||
expiring_amphora = self.amp_repo.get_all_deleted_expiring_amphora(
|
||||
session, exp_age=exp_age)
|
||||
amp_ids = self.amp_repo.get_all_deleted_expiring(session,
|
||||
exp_age=exp_age)
|
||||
|
||||
for amp in expiring_amphora:
|
||||
for amp_id in amp_ids:
|
||||
# If we're here, we already think the amp is expiring according to
|
||||
# the amphora table. Now check it is expired in the health table.
|
||||
# In this way, we ensure that amps aren't deleted unless they are
|
||||
# both expired AND no longer receiving zombie heartbeats.
|
||||
if self.amp_health_repo.check_amphora_health_expired(
|
||||
session, amp.id, exp_age):
|
||||
session, amp_id, exp_age):
|
||||
LOG.debug('Attempting to purge db record for Amphora ID: %s',
|
||||
amp.id)
|
||||
self.amp_repo.delete(session, id=amp.id)
|
||||
amp_id)
|
||||
self.amp_repo.delete(session, id=amp_id)
|
||||
try:
|
||||
self.amp_health_repo.delete(session, amphora_id=amp.id)
|
||||
self.amp_health_repo.delete(session, amphora_id=amp_id)
|
||||
except sqlalchemy_exceptions.NoResultFound:
|
||||
pass # Best effort delete, this record might not exist
|
||||
LOG.info('Purged db record for Amphora ID: %s', amp.id)
|
||||
LOG.info('Purged db record for Amphora ID: %s', amp_id)
|
||||
|
||||
def cleanup_load_balancers(self):
|
||||
"""Checks the DB for old load balancers and triggers their removal."""
|
||||
@ -97,15 +96,13 @@ class DatabaseCleanup(object):
|
||||
seconds=CONF.house_keeping.load_balancer_expiry_age)
|
||||
|
||||
session = db_api.get_session()
|
||||
load_balancers, _ = self.lb_repo.get_all(
|
||||
session, provisioning_status=constants.DELETED)
|
||||
lb_ids = self.lb_repo.get_all_deleted_expiring(session,
|
||||
exp_age=exp_age)
|
||||
|
||||
for lb in load_balancers:
|
||||
if self.lb_repo.check_load_balancer_expired(session, lb.id,
|
||||
exp_age):
|
||||
LOG.info('Attempting to delete load balancer id : %s', lb.id)
|
||||
self.lb_repo.delete(session, id=lb.id)
|
||||
LOG.info('Deleted load balancer id : %s', lb.id)
|
||||
for lb_id in lb_ids:
|
||||
LOG.info('Attempting to delete load balancer id : %s', lb_id)
|
||||
self.lb_repo.delete(session, id=lb_id)
|
||||
LOG.info('Deleted load balancer id : %s', lb_id)
|
||||
|
||||
|
||||
class CertRotation(object):
|
||||
|
@ -179,6 +179,31 @@ class BaseRepository(object):
|
||||
"""
|
||||
return bool(session.query(self.model_class).filter_by(id=id).first())
|
||||
|
||||
def get_all_deleted_expiring(self, session, exp_age):
|
||||
"""Get all previously deleted resources that are now expiring.
|
||||
|
||||
:param session: A Sql Alchemy database session.
|
||||
:param exp_age: A standard datetime delta which is used to see for how
|
||||
long can a resource live without updates before
|
||||
it is considered expired
|
||||
:returns: A list of resource IDs
|
||||
"""
|
||||
|
||||
expiry_time = datetime.datetime.utcnow() - exp_age
|
||||
|
||||
query = session.query(self.model_class).filter(
|
||||
self.model_class.updated_at < expiry_time)
|
||||
if hasattr(self.model_class, 'status'):
|
||||
query = query.filter_by(status=consts.DELETED)
|
||||
else:
|
||||
query = query.filter_by(operating_status=consts.DELETED)
|
||||
# Do not load any relationship
|
||||
query = query.options(noload('*'))
|
||||
model_list = query.all()
|
||||
|
||||
id_list = [model.id for model in model_list]
|
||||
return id_list
|
||||
|
||||
|
||||
class Repositories(object):
|
||||
def __init__(self):
|
||||
@ -809,30 +834,6 @@ class LoadBalancerRepository(BaseRepository):
|
||||
session.add(lb)
|
||||
return True
|
||||
|
||||
def check_load_balancer_expired(self, session, lb_id, exp_age=None):
|
||||
"""Checks if a given load balancer is expired.
|
||||
|
||||
:param session: A Sql Alchemy database session.
|
||||
:param lb_id: id of an load balancer object
|
||||
:param exp_age: A standard datetime delta which is used to see for how
|
||||
long can a load balancer live without updates before
|
||||
it is considered expired (default:
|
||||
CONF.house_keeping.load_balancer_expiry_age)
|
||||
:returns: boolean
|
||||
"""
|
||||
if not exp_age:
|
||||
exp_age = datetime.timedelta(
|
||||
seconds=CONF.house_keeping.load_balancer_expiry_age)
|
||||
|
||||
timestamp = datetime.datetime.utcnow() - exp_age
|
||||
lb = self.get(session, id=lb_id)
|
||||
if lb:
|
||||
# If a load balancer was never updated use its creation timestamp
|
||||
last_update = lb.updated_at or lb.created_at
|
||||
return last_update < timestamp
|
||||
# Load balancer was just deleted.
|
||||
return True
|
||||
|
||||
|
||||
class VipRepository(BaseRepository):
|
||||
model_class = models.Vip
|
||||
@ -1187,33 +1188,6 @@ class AmphoraRepository(BaseRepository):
|
||||
return db_lb.to_data_model()
|
||||
return None
|
||||
|
||||
def get_all_deleted_expiring_amphora(self, session, exp_age=None):
|
||||
|
||||
"""Get all previously deleted amphora that are now expiring.
|
||||
|
||||
:param session: A Sql Alchemy database session.
|
||||
:param exp_age: A standard datetime delta which is used to see for how
|
||||
long can an amphora live without updates before it is
|
||||
considered expired (default:
|
||||
CONF.house_keeping.amphora_expiry_age)
|
||||
:returns: [octavia.common.data_model]
|
||||
"""
|
||||
if not exp_age:
|
||||
exp_age = datetime.timedelta(
|
||||
seconds=CONF.house_keeping.amphora_expiry_age)
|
||||
|
||||
expiry_time = datetime.datetime.utcnow() - exp_age
|
||||
|
||||
query = session.query(self.model_class).filter_by(
|
||||
status=consts.DELETED).filter(
|
||||
self.model_class.updated_at < expiry_time)
|
||||
# Only make one trip to the database
|
||||
query = query.options(joinedload('*'))
|
||||
model_list = query.all()
|
||||
|
||||
data_model_list = [model.to_data_model() for model in model_list]
|
||||
return data_model_list
|
||||
|
||||
def get_spare_amphora_count(self, session):
|
||||
"""Get the count of the spare amphora.
|
||||
|
||||
|
@ -2881,36 +2881,6 @@ class LoadBalancerRepositoryTest(BaseRepositoryTest):
|
||||
lb = self.lb_repo.get(self.session, id=lb_id)
|
||||
self.assertEqual(constants.PENDING_UPDATE, lb.provisioning_status)
|
||||
|
||||
def test_check_load_balancer_expired_default_exp_age(self):
|
||||
"""When exp_age defaults to load_balancer_expiry_age."""
|
||||
newdate = datetime.datetime.utcnow() - datetime.timedelta(minutes=10)
|
||||
self.lb_repo.create(self.session, id=self.FAKE_UUID_1,
|
||||
project_id=self.FAKE_UUID_2,
|
||||
provisioning_status=constants.ACTIVE,
|
||||
operating_status=constants.ONLINE,
|
||||
enabled=True,
|
||||
updated_at=newdate)
|
||||
check_res = self.lb_repo.check_load_balancer_expired(
|
||||
self.session, self.FAKE_UUID_1)
|
||||
# Default load_balancer_expiry_age value is 1 week so load balancer
|
||||
# shouldn't be considered expired.
|
||||
self.assertFalse(check_res)
|
||||
|
||||
def test_check_load_balancer_expired_with_exp_age(self):
|
||||
"""When exp_age is passed as an argument."""
|
||||
exp_age = datetime.timedelta(
|
||||
seconds=self.FAKE_EXP_AGE)
|
||||
newdate = datetime.datetime.utcnow() - datetime.timedelta(minutes=10)
|
||||
self.lb_repo.create(self.session, id=self.FAKE_UUID_1,
|
||||
project_id=self.FAKE_UUID_2,
|
||||
provisioning_status=constants.ACTIVE,
|
||||
operating_status=constants.ONLINE,
|
||||
enabled=True,
|
||||
updated_at=newdate)
|
||||
check_res = self.lb_repo.check_load_balancer_expired(
|
||||
self.session, self.FAKE_UUID_1, exp_age)
|
||||
self.assertTrue(check_res)
|
||||
|
||||
|
||||
class VipRepositoryTest(BaseRepositoryTest):
|
||||
|
||||
@ -3149,7 +3119,7 @@ class AmphoraRepositoryTest(BaseRepositoryTest):
|
||||
amphora2 = self.create_amphora(
|
||||
self.FAKE_UUID_2, status=constants.DELETED)
|
||||
|
||||
expiring_list = self.amphora_repo.get_all_deleted_expiring_amphora(
|
||||
expiring_list = self.amphora_repo.get_all_deleted_expiring(
|
||||
self.session, exp_age=exp_age)
|
||||
expiring_ids = [amp.id for amp in expiring_list]
|
||||
self.assertIn(amphora1.id, expiring_ids)
|
||||
|
@ -117,10 +117,10 @@ class TestDatabaseCleanup(base.TestCase):
|
||||
vrrp_ip=self.FAKE_IP,
|
||||
ha_ip=self.FAKE_IP,
|
||||
updated_at=expired_time)
|
||||
self.amp_repo.get_all_deleted_expiring_amphora.return_value = [amphora]
|
||||
self.amp_repo.get_all_deleted_expiring.return_value = [amphora.id]
|
||||
self.amp_health_repo.check_amphora_health_expired.return_value = True
|
||||
self.dbclean.delete_old_amphorae()
|
||||
self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called)
|
||||
self.assertTrue(self.amp_repo.get_all_deleted_expiring.called)
|
||||
self.assertTrue(
|
||||
self.amp_health_repo.check_amphora_health_expired.called)
|
||||
self.assertTrue(self.amp_repo.delete.called)
|
||||
@ -138,9 +138,9 @@ class TestDatabaseCleanup(base.TestCase):
|
||||
vrrp_ip=self.FAKE_IP,
|
||||
ha_ip=self.FAKE_IP,
|
||||
updated_at=datetime.datetime.now())
|
||||
self.amp_repo.get_all_deleted_expiring_amphora.return_value = []
|
||||
self.amp_repo.get_all_deleted_expiring.return_value = []
|
||||
self.dbclean.delete_old_amphorae()
|
||||
self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called)
|
||||
self.assertTrue(self.amp_repo.get_all_deleted_expiring.called)
|
||||
self.assertFalse(
|
||||
self.amp_health_repo.check_amphora_health_expired.called)
|
||||
self.assertFalse(self.amp_repo.delete.called)
|
||||
@ -165,10 +165,10 @@ class TestDatabaseCleanup(base.TestCase):
|
||||
vrrp_ip=self.FAKE_IP,
|
||||
ha_ip=self.FAKE_IP,
|
||||
updated_at=expired_time)
|
||||
self.amp_repo.get_all_deleted_expiring_amphora.return_value = [amphora]
|
||||
self.amp_repo.get_all_deleted_expiring.return_value = [amphora.id]
|
||||
self.amp_health_repo.check_amphora_health_expired.return_value = False
|
||||
self.dbclean.delete_old_amphorae()
|
||||
self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called)
|
||||
self.assertTrue(self.amp_repo.get_all_deleted_expiring.called)
|
||||
self.assertTrue(
|
||||
self.amp_health_repo.check_amphora_health_expired.called)
|
||||
self.assertFalse(self.amp_repo.delete.called)
|
||||
@ -187,12 +187,13 @@ class TestDatabaseCleanup(base.TestCase):
|
||||
for expired_status in [True, False]:
|
||||
lb_repo = mock.MagicMock()
|
||||
self.dbclean.lb_repo = lb_repo
|
||||
lb_repo.get_all.return_value = ([load_balancer], None)
|
||||
lb_repo.check_load_balancer_expired.return_value = (
|
||||
expired_status)
|
||||
if expired_status:
|
||||
expiring_lbs = [load_balancer.id]
|
||||
else:
|
||||
expiring_lbs = []
|
||||
lb_repo.get_all_deleted_expiring.return_value = expiring_lbs
|
||||
self.dbclean.cleanup_load_balancers()
|
||||
self.assertTrue(lb_repo.get_all.called)
|
||||
self.assertTrue(lb_repo.check_load_balancer_expired.called)
|
||||
self.assertTrue(lb_repo.get_all_deleted_expiring.called)
|
||||
if expired_status:
|
||||
self.assertTrue(lb_repo.delete.called)
|
||||
else:
|
||||
|
@ -0,0 +1,6 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixed a performance issue where the Housekeeping service could
|
||||
significantly and incrementally utilize CPU as more amphorae and load
|
||||
balancers are created and/or marked as DELETED.
|
Loading…
Reference in New Issue
Block a user