diff --git a/octavia/controller/housekeeping/house_keeping.py b/octavia/controller/housekeeping/house_keeping.py index 48bc475b65..991b535953 100644 --- a/octavia/controller/housekeeping/house_keeping.py +++ b/octavia/controller/housekeeping/house_keeping.py @@ -19,7 +19,6 @@ from oslo_config import cfg from oslo_log import log as logging from sqlalchemy.orm import exc as sqlalchemy_exceptions -from octavia.common import constants from octavia.controller.worker import controller_worker as cw from octavia.db import api as db_api from octavia.db import repositories as repo @@ -72,24 +71,24 @@ class DatabaseCleanup(object): seconds=CONF.house_keeping.amphora_expiry_age) session = db_api.get_session() - expiring_amphora = self.amp_repo.get_all_deleted_expiring_amphora( - session, exp_age=exp_age) + amp_ids = self.amp_repo.get_all_deleted_expiring(session, + exp_age=exp_age) - for amp in expiring_amphora: + for amp_id in amp_ids: # If we're here, we already think the amp is expiring according to # the amphora table. Now check it is expired in the health table. # In this way, we ensure that amps aren't deleted unless they are # both expired AND no longer receiving zombie heartbeats. if self.amp_health_repo.check_amphora_health_expired( - session, amp.id, exp_age): + session, amp_id, exp_age): LOG.debug('Attempting to purge db record for Amphora ID: %s', - amp.id) - self.amp_repo.delete(session, id=amp.id) + amp_id) + self.amp_repo.delete(session, id=amp_id) try: - self.amp_health_repo.delete(session, amphora_id=amp.id) + self.amp_health_repo.delete(session, amphora_id=amp_id) except sqlalchemy_exceptions.NoResultFound: pass # Best effort delete, this record might not exist - LOG.info('Purged db record for Amphora ID: %s', amp.id) + LOG.info('Purged db record for Amphora ID: %s', amp_id) def cleanup_load_balancers(self): """Checks the DB for old load balancers and triggers their removal.""" @@ -97,15 +96,13 @@ class DatabaseCleanup(object): seconds=CONF.house_keeping.load_balancer_expiry_age) session = db_api.get_session() - load_balancers, _ = self.lb_repo.get_all( - session, provisioning_status=constants.DELETED) + lb_ids = self.lb_repo.get_all_deleted_expiring(session, + exp_age=exp_age) - for lb in load_balancers: - if self.lb_repo.check_load_balancer_expired(session, lb.id, - exp_age): - LOG.info('Attempting to delete load balancer id : %s', lb.id) - self.lb_repo.delete(session, id=lb.id) - LOG.info('Deleted load balancer id : %s', lb.id) + for lb_id in lb_ids: + LOG.info('Attempting to delete load balancer id : %s', lb_id) + self.lb_repo.delete(session, id=lb_id) + LOG.info('Deleted load balancer id : %s', lb_id) class CertRotation(object): diff --git a/octavia/db/repositories.py b/octavia/db/repositories.py index 28ea5e643c..8fcf6a54e9 100644 --- a/octavia/db/repositories.py +++ b/octavia/db/repositories.py @@ -179,6 +179,31 @@ class BaseRepository(object): """ return bool(session.query(self.model_class).filter_by(id=id).first()) + def get_all_deleted_expiring(self, session, exp_age): + """Get all previously deleted resources that are now expiring. + + :param session: A Sql Alchemy database session. + :param exp_age: A standard datetime delta which is used to see for how + long can a resource live without updates before + it is considered expired + :returns: A list of resource IDs + """ + + expiry_time = datetime.datetime.utcnow() - exp_age + + query = session.query(self.model_class).filter( + self.model_class.updated_at < expiry_time) + if hasattr(self.model_class, 'status'): + query = query.filter_by(status=consts.DELETED) + else: + query = query.filter_by(operating_status=consts.DELETED) + # Do not load any relationship + query = query.options(noload('*')) + model_list = query.all() + + id_list = [model.id for model in model_list] + return id_list + class Repositories(object): def __init__(self): @@ -809,30 +834,6 @@ class LoadBalancerRepository(BaseRepository): session.add(lb) return True - def check_load_balancer_expired(self, session, lb_id, exp_age=None): - """Checks if a given load balancer is expired. - - :param session: A Sql Alchemy database session. - :param lb_id: id of an load balancer object - :param exp_age: A standard datetime delta which is used to see for how - long can a load balancer live without updates before - it is considered expired (default: - CONF.house_keeping.load_balancer_expiry_age) - :returns: boolean - """ - if not exp_age: - exp_age = datetime.timedelta( - seconds=CONF.house_keeping.load_balancer_expiry_age) - - timestamp = datetime.datetime.utcnow() - exp_age - lb = self.get(session, id=lb_id) - if lb: - # If a load balancer was never updated use its creation timestamp - last_update = lb.updated_at or lb.created_at - return last_update < timestamp - # Load balancer was just deleted. - return True - class VipRepository(BaseRepository): model_class = models.Vip @@ -1187,33 +1188,6 @@ class AmphoraRepository(BaseRepository): return db_lb.to_data_model() return None - def get_all_deleted_expiring_amphora(self, session, exp_age=None): - - """Get all previously deleted amphora that are now expiring. - - :param session: A Sql Alchemy database session. - :param exp_age: A standard datetime delta which is used to see for how - long can an amphora live without updates before it is - considered expired (default: - CONF.house_keeping.amphora_expiry_age) - :returns: [octavia.common.data_model] - """ - if not exp_age: - exp_age = datetime.timedelta( - seconds=CONF.house_keeping.amphora_expiry_age) - - expiry_time = datetime.datetime.utcnow() - exp_age - - query = session.query(self.model_class).filter_by( - status=consts.DELETED).filter( - self.model_class.updated_at < expiry_time) - # Only make one trip to the database - query = query.options(joinedload('*')) - model_list = query.all() - - data_model_list = [model.to_data_model() for model in model_list] - return data_model_list - def get_spare_amphora_count(self, session): """Get the count of the spare amphora. diff --git a/octavia/tests/functional/db/test_repositories.py b/octavia/tests/functional/db/test_repositories.py index 9059a40c3a..003901e907 100644 --- a/octavia/tests/functional/db/test_repositories.py +++ b/octavia/tests/functional/db/test_repositories.py @@ -2936,36 +2936,6 @@ class LoadBalancerRepositoryTest(BaseRepositoryTest): lb = self.lb_repo.get(self.session, id=lb_id) self.assertEqual(constants.PENDING_UPDATE, lb.provisioning_status) - def test_check_load_balancer_expired_default_exp_age(self): - """When exp_age defaults to load_balancer_expiry_age.""" - newdate = datetime.datetime.utcnow() - datetime.timedelta(minutes=10) - self.lb_repo.create(self.session, id=self.FAKE_UUID_1, - project_id=self.FAKE_UUID_2, - provisioning_status=constants.ACTIVE, - operating_status=constants.ONLINE, - enabled=True, - updated_at=newdate) - check_res = self.lb_repo.check_load_balancer_expired( - self.session, self.FAKE_UUID_1) - # Default load_balancer_expiry_age value is 1 week so load balancer - # shouldn't be considered expired. - self.assertFalse(check_res) - - def test_check_load_balancer_expired_with_exp_age(self): - """When exp_age is passed as an argument.""" - exp_age = datetime.timedelta( - seconds=self.FAKE_EXP_AGE) - newdate = datetime.datetime.utcnow() - datetime.timedelta(minutes=10) - self.lb_repo.create(self.session, id=self.FAKE_UUID_1, - project_id=self.FAKE_UUID_2, - provisioning_status=constants.ACTIVE, - operating_status=constants.ONLINE, - enabled=True, - updated_at=newdate) - check_res = self.lb_repo.check_load_balancer_expired( - self.session, self.FAKE_UUID_1, exp_age) - self.assertTrue(check_res) - class VipRepositoryTest(BaseRepositoryTest): @@ -3213,7 +3183,7 @@ class AmphoraRepositoryTest(BaseRepositoryTest): amphora2 = self.create_amphora( self.FAKE_UUID_2, status=constants.DELETED) - expiring_list = self.amphora_repo.get_all_deleted_expiring_amphora( + expiring_list = self.amphora_repo.get_all_deleted_expiring( self.session, exp_age=exp_age) expiring_ids = [amp.id for amp in expiring_list] self.assertIn(amphora1.id, expiring_ids) diff --git a/octavia/tests/unit/controller/housekeeping/test_house_keeping.py b/octavia/tests/unit/controller/housekeeping/test_house_keeping.py index 402e5037f1..4022d485a1 100644 --- a/octavia/tests/unit/controller/housekeeping/test_house_keeping.py +++ b/octavia/tests/unit/controller/housekeeping/test_house_keeping.py @@ -117,10 +117,10 @@ class TestDatabaseCleanup(base.TestCase): vrrp_ip=self.FAKE_IP, ha_ip=self.FAKE_IP, updated_at=expired_time) - self.amp_repo.get_all_deleted_expiring_amphora.return_value = [amphora] + self.amp_repo.get_all_deleted_expiring.return_value = [amphora.id] self.amp_health_repo.check_amphora_health_expired.return_value = True self.dbclean.delete_old_amphorae() - self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called) + self.assertTrue(self.amp_repo.get_all_deleted_expiring.called) self.assertTrue( self.amp_health_repo.check_amphora_health_expired.called) self.assertTrue(self.amp_repo.delete.called) @@ -138,9 +138,9 @@ class TestDatabaseCleanup(base.TestCase): vrrp_ip=self.FAKE_IP, ha_ip=self.FAKE_IP, updated_at=datetime.datetime.now()) - self.amp_repo.get_all_deleted_expiring_amphora.return_value = [] + self.amp_repo.get_all_deleted_expiring.return_value = [] self.dbclean.delete_old_amphorae() - self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called) + self.assertTrue(self.amp_repo.get_all_deleted_expiring.called) self.assertFalse( self.amp_health_repo.check_amphora_health_expired.called) self.assertFalse(self.amp_repo.delete.called) @@ -165,10 +165,10 @@ class TestDatabaseCleanup(base.TestCase): vrrp_ip=self.FAKE_IP, ha_ip=self.FAKE_IP, updated_at=expired_time) - self.amp_repo.get_all_deleted_expiring_amphora.return_value = [amphora] + self.amp_repo.get_all_deleted_expiring.return_value = [amphora.id] self.amp_health_repo.check_amphora_health_expired.return_value = False self.dbclean.delete_old_amphorae() - self.assertTrue(self.amp_repo.get_all_deleted_expiring_amphora.called) + self.assertTrue(self.amp_repo.get_all_deleted_expiring.called) self.assertTrue( self.amp_health_repo.check_amphora_health_expired.called) self.assertFalse(self.amp_repo.delete.called) @@ -187,12 +187,13 @@ class TestDatabaseCleanup(base.TestCase): for expired_status in [True, False]: lb_repo = mock.MagicMock() self.dbclean.lb_repo = lb_repo - lb_repo.get_all.return_value = ([load_balancer], None) - lb_repo.check_load_balancer_expired.return_value = ( - expired_status) + if expired_status: + expiring_lbs = [load_balancer.id] + else: + expiring_lbs = [] + lb_repo.get_all_deleted_expiring.return_value = expiring_lbs self.dbclean.cleanup_load_balancers() - self.assertTrue(lb_repo.get_all.called) - self.assertTrue(lb_repo.check_load_balancer_expired.called) + self.assertTrue(lb_repo.get_all_deleted_expiring.called) if expired_status: self.assertTrue(lb_repo.delete.called) else: diff --git a/releasenotes/notes/fix-housekeeping-db-performance-b0d0fcfcce696314.yaml b/releasenotes/notes/fix-housekeeping-db-performance-b0d0fcfcce696314.yaml new file mode 100644 index 0000000000..1e09933afb --- /dev/null +++ b/releasenotes/notes/fix-housekeeping-db-performance-b0d0fcfcce696314.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixed a performance issue where the Housekeeping service could + significantly and incrementally utilize CPU as more amphorae and load + balancers are created and/or marked as DELETED.