Merge "Fix performance of housekeeping DB clean up"

This commit is contained in:
Zuul 2019-03-11 18:56:32 +00:00 committed by Gerrit Code Review
commit 7fc5406bd2
5 changed files with 58 additions and 110 deletions

View File

@ -19,7 +19,6 @@ from oslo_config import cfg
from oslo_log import log as logging
from sqlalchemy.orm import exc as sqlalchemy_exceptions
from octavia.common import constants
from octavia.controller.worker import controller_worker as cw
from octavia.db import api as db_api
from octavia.db import repositories as repo
@ -72,24 +71,24 @@ class DatabaseCleanup(object):
seconds=CONF.house_keeping.amphora_expiry_age)
session = db_api.get_session()
expiring_amphora = self.amp_repo.get_all_deleted_expiring_amphora(
session, exp_age=exp_age)
amp_ids = self.amp_repo.get_all_deleted_expiring(session,
exp_age=exp_age)
for amp in expiring_amphora:
for amp_id in amp_ids:
# If we're here, we already think the amp is expiring according to
# the amphora table. Now check it is expired in the health table.
# In this way, we ensure that amps aren't deleted unless they are
# both expired AND no longer receiving zombie heartbeats.
if self.amp_health_repo.check_amphora_health_expired(
session, amp.id, exp_age):
session, amp_id, exp_age):
LOG.debug('Attempting to purge db record for Amphora ID: %s',
amp.id)
self.amp_repo.delete(session, id=amp.id)
amp_id)
self.amp_repo.delete(session, id=amp_id)
try:
self.amp_health_repo.delete(session, amphora_id=amp.id)
self.amp_health_repo.delete(session, amphora_id=amp_id)
except sqlalchemy_exceptions.NoResultFound:
pass # Best effort delete, this record might not exist
LOG.info('Purged db record for Amphora ID: %s', amp.id)
LOG.info('Purged db record for Amphora ID: %s', amp_id)
def cleanup_load_balancers(self):
"""Checks the DB for old load balancers and triggers their removal."""
@ -97,15 +96,13 @@ class DatabaseCleanup(object):
seconds=CONF.house_keeping.load_balancer_expiry_age)
session = db_api.get_session()
load_balancers, _ = self.lb_repo.get_all(
session, provisioning_status=constants.DELETED)
lb_ids = self.lb_repo.get_all_deleted_expiring(session,
exp_age=exp_age)
for lb in load_balancers:
if self.lb_repo.check_load_balancer_expired(session, lb.id,
exp_age):
LOG.info('Attempting to delete load balancer id : %s', lb.id)
self.lb_repo.delete(session, id=lb.id)
LOG.info('Deleted load balancer id : %s', lb.id)
for lb_id in lb_ids:
LOG.info('Attempting to delete load balancer id : %s', lb_id)
self.lb_repo.delete(session, id=lb_id)
LOG.info('Deleted load balancer id : %s', lb_id)
class CertRotation(object):

View File

@ -179,6 +179,31 @@ class BaseRepository(object):
"""
return bool(session.query(self.model_class).filter_by(id=id).first())
def get_all_deleted_expiring(self, session, exp_age):
"""Get all previously deleted resources that are now expiring.
:param session: A Sql Alchemy database session.
:param exp_age: A standard datetime delta which is used to see for how
long can a resource live without updates before
it is considered expired
:returns: A list of resource IDs
"""
expiry_time = datetime.datetime.utcnow() - exp_age
query = session.query(self.model_class).filter(
self.model_class.updated_at < expiry_time)
if hasattr(self.model_class, 'status'):
query = query.filter_by(status=consts.DELETED)
else:
query = query.filter_by(operating_status=consts.DELETED)
# Do not load any relationship
query = query.options(noload('*'))
model_list = query.all()
id_list = [model.id for model in model_list]
return id_list
class Repositories(object):
def __init__(self):
@ -809,30 +834,6 @@ class LoadBalancerRepository(BaseRepository):
session.add(lb)
return True
def check_load_balancer_expired(self, session, lb_id, exp_age=None):
"""Checks if a given load balancer is expired.
:param session: A Sql Alchemy database session.
:param lb_id: id of an load balancer object
:param exp_age: A standard datetime delta which is used to see for how
long can a load balancer live without updates before
it is considered expired (default:
CONF.house_keeping.load_balancer_expiry_age)
:returns: boolean
"""
if not exp_age:
exp_age = datetime.timedelta(
seconds=CONF.house_keeping.load_balancer_expiry_age)
timestamp = datetime.datetime.utcnow() - exp_age
lb = self.get(session, id=lb_id)
if lb:
# If a load balancer was never updated use its creation timestamp
last_update = lb.updated_at or lb.created_at
return last_update < timestamp
# Load balancer was just deleted.
return True
class VipRepository(BaseRepository):
model_class = models.Vip
@ -1187,33 +1188,6 @@ class AmphoraRepository(BaseRepository):
return db_lb.to_data_model()
return None
def get_all_deleted_expiring_amphora(self, session, exp_age=None):
"""Get all previously deleted amphora that are now expiring.
:param session: A Sql Alchemy database session.
:param exp_age: A standard datetime delta which is used to see for how
long can an amphora live without updates before it is
considered expired (default:
CONF.house_keeping.amphora_expiry_age)
:returns: [octavia.common.data_model]
"""
if not exp_age:
exp_age = datetime.timedelta(
seconds=CONF.house_keeping.amphora_expiry_age)
expiry_time = datetime.datetime.utcnow() - exp_age
query = session.query(self.model_class).filter_by(
status=consts.DELETED).filter(
self.model_class.updated_at < expiry_time)
# Only make one trip to the database
query = query.options(joinedload('*'))
model_list = query.all()
data_model_list = [model.to_data_model() for model in model_list]
return data_model_list
def get_spare_amphora_count(self, session):
"""Get the count of the spare amphora.

View File

@ -2936,36 +2936,6 @@ class LoadBalancerRepositoryTest(BaseRepositoryTest):
lb = self.lb_repo.get(self.session, id=lb_id)
self.assertEqual(constants.PENDING_UPDATE, lb.provisioning_status)
def test_check_load_balancer_expired_default_exp_age(self):
"""When exp_age defaults to load_balancer_expiry_age."""
newdate = datetime.datetime.utcnow() - datetime.timedelta(minutes=10)
self.lb_repo.create(self.session, id=self.FAKE_UUID_1,
project_id=self.FAKE_UUID_2,
provisioning_status=constants.ACTIVE,
operating_status=constants.ONLINE,
enabled=True,
updated_at=newdate)
check_res = self.lb_repo.check_load_balancer_expired(
self.session, self.FAKE_UUID_1)
# Default load_balancer_expiry_age value is 1 week so load balancer
# shouldn't be considered expired.
self.assertFalse(check_res)
def test_check_load_balancer_expired_with_exp_age(self):
"""When exp_age is passed as an argument."""
exp_age = datetime.timedelta(
seconds=self.FAKE_EXP_AGE)
newdate = datetime.datetime.utcnow() - datetime.timedelta(minutes=10)
self.lb_repo.create(self.session, id=self.FAKE_UUID_1,
project_id=self.FAKE_UUID_2,
provisioning_status=constants.ACTIVE,
operating_status=constants.ONLINE,
enabled=True,
updated_at=newdate)
check_res = self.lb_repo.check_load_balancer_expired(
self.session, self.FAKE_UUID_1, exp_age)
self.assertTrue(check_res)
class VipRepositoryTest(BaseRepositoryTest):
@ -3213,7 +3183,7 @@ class AmphoraRepositoryTest(BaseRepositoryTest):
amphora2 = self.create_amphora(
self.FAKE_UUID_2, status=constants.DELETED)
expiring_list = self.amphora_repo.get_all_deleted_expiring_amphora(
expiring_list = self.amphora_repo.get_all_deleted_expiring(
self.session, exp_age=exp_age)
expiring_ids = [amp.id for amp in expiring_list]
self.assertIn(amphora1.id, expiring_ids)

View File

@ -117,10 +117,10 @@ class TestDatabaseCleanup(base.TestCase):
vrrp_ip=self.FAKE_IP,
ha_ip=self.FAKE_IP,
updated_at=expired_time)
self.amp_repo.get_all_deleted_expiring_amphora.return_value = [amphora]
self.amp_repo.get_all_deleted_expiring.return_value = [amphora.id]
self.amp_health_repo.check_amphora_health_expired.return_value = True
self.dbclean.delete_old_amphorae()
self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called)
self.assertTrue(self.amp_repo.get_all_deleted_expiring.called)
self.assertTrue(
self.amp_health_repo.check_amphora_health_expired.called)
self.assertTrue(self.amp_repo.delete.called)
@ -138,9 +138,9 @@ class TestDatabaseCleanup(base.TestCase):
vrrp_ip=self.FAKE_IP,
ha_ip=self.FAKE_IP,
updated_at=datetime.datetime.now())
self.amp_repo.get_all_deleted_expiring_amphora.return_value = []
self.amp_repo.get_all_deleted_expiring.return_value = []
self.dbclean.delete_old_amphorae()
self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called)
self.assertTrue(self.amp_repo.get_all_deleted_expiring.called)
self.assertFalse(
self.amp_health_repo.check_amphora_health_expired.called)
self.assertFalse(self.amp_repo.delete.called)
@ -165,10 +165,10 @@ class TestDatabaseCleanup(base.TestCase):
vrrp_ip=self.FAKE_IP,
ha_ip=self.FAKE_IP,
updated_at=expired_time)
self.amp_repo.get_all_deleted_expiring_amphora.return_value = [amphora]
self.amp_repo.get_all_deleted_expiring.return_value = [amphora.id]
self.amp_health_repo.check_amphora_health_expired.return_value = False
self.dbclean.delete_old_amphorae()
self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called)
self.assertTrue(self.amp_repo.get_all_deleted_expiring.called)
self.assertTrue(
self.amp_health_repo.check_amphora_health_expired.called)
self.assertFalse(self.amp_repo.delete.called)
@ -187,12 +187,13 @@ class TestDatabaseCleanup(base.TestCase):
for expired_status in [True, False]:
lb_repo = mock.MagicMock()
self.dbclean.lb_repo = lb_repo
lb_repo.get_all.return_value = ([load_balancer], None)
lb_repo.check_load_balancer_expired.return_value = (
expired_status)
if expired_status:
expiring_lbs = [load_balancer.id]
else:
expiring_lbs = []
lb_repo.get_all_deleted_expiring.return_value = expiring_lbs
self.dbclean.cleanup_load_balancers()
self.assertTrue(lb_repo.get_all.called)
self.assertTrue(lb_repo.check_load_balancer_expired.called)
self.assertTrue(lb_repo.get_all_deleted_expiring.called)
if expired_status:
self.assertTrue(lb_repo.delete.called)
else:

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fixed a performance issue where the Housekeeping service could
significantly and incrementally utilize CPU as more amphorae and load
balancers are created and/or marked as DELETED.