Merge "Fix driver task pattern to reduce periodic db load" into stable/wallaby

This commit is contained in:
Zuul 2021-09-16 16:51:54 +00:00 committed by Gerrit Code Review
commit 3699365a81
7 changed files with 76 additions and 30 deletions

View File

@ -201,6 +201,14 @@ class DracWSManBIOS(base.BIOSInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
# NOTE(TheJulia) Evaluate if work is actually required before
# creating a task for every node in the deployment which does
# not have a lock and is not in maintenance mode.
if (not driver_internal_info.get("bios_config_job_ids")
and not driver_internal_info.get(
"factory_reset_time_before_reboot")):
continue
lock_purpose = 'checking async bios configuration jobs'
# Performing read-only/non-destructive work with shared lock
with task_manager.acquire(context, node_uuid,

View File

@ -478,6 +478,17 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
task_monitor_url = driver_internal_info.get(
'import_task_monitor_url')
# NOTE(TheJulia): Evaluate if a task montitor URL exists
# based upon our inital DB query before pulling a task for
# every node in the deployment which reduces the overall
# number of DB queries triggering in the background where
# no work is required.
if not task_monitor_url:
continue
lock_purpose = 'checking async import configuration task'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -485,10 +496,6 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
if not isinstance(task.driver.management,
DracRedfishManagement):
continue
task_monitor_url = driver_internal_info.get(
'import_task_monitor_url')
if not task_monitor_url:
continue
self._check_import_configuration_task(
task, task_monitor_url)
except exception.NodeNotFound:

View File

@ -1482,6 +1482,14 @@ class DracWSManRAID(base.RAIDInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
job_ids = driver_internal_info.get('raid_config_job_ids')
# NOTE(TheJulia): Evaluate if there is work to be done
# based upon the original DB query's results so we don't
# proceed creating tasks for every node in the deployment.
if not job_ids:
continue
lock_purpose = 'checking async raid configuration jobs'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -1489,10 +1497,6 @@ class DracWSManRAID(base.RAIDInterface):
if not isinstance(task.driver.raid, DracWSManRAID):
continue
job_ids = driver_internal_info.get('raid_config_job_ids')
if not job_ids:
continue
self._check_node_raid_jobs(task)
except exception.NodeNotFound:

View File

@ -434,6 +434,13 @@ class IRMCRAID(base.RAIDInterface):
node_list = manager.iter_nodes(fields=fields, filters=filters)
for (node_uuid, driver, conductor_group, raid_config) in node_list:
try:
# NOTE(TheJulia): Evaluate based upon presence of raid
# configuration before triggering a task, as opposed to after
# so we don't create excess node task objects with related
# DB queries.
if not raid_config or raid_config.get('fgi_status'):
continue
lock_purpose = 'checking async RAID configuration tasks'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -444,8 +451,6 @@ class IRMCRAID(base.RAIDInterface):
continue
if task.node.target_raid_config is None:
continue
if not raid_config or raid_config.get('fgi_status'):
continue
task.upgrade_lock()
if node.provision_state != states.CLEANWAIT:
continue

View File

@ -872,6 +872,15 @@ class RedfishManagement(base.ManagementInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
firmware_updates = driver_internal_info.get(
'firmware_updates')
# NOTE(TheJulia): If we don't have a entry upfront, we can
# safely skip past the node as we know work here is not
# required, otherwise minimizing the number of potential
# nodes to visit.
if not firmware_updates:
continue
lock_purpose = 'checking async firmware update failed.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -880,11 +889,6 @@ class RedfishManagement(base.ManagementInterface):
RedfishManagement):
continue
firmware_updates = driver_internal_info.get(
'firmware_updates')
if not firmware_updates:
continue
node = task.node
# A firmware update failed. Discard any remaining firmware
@ -921,6 +925,14 @@ class RedfishManagement(base.ManagementInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
firmware_updates = driver_internal_info.get(
'firmware_updates')
# NOTE(TheJulia): Check and skip upfront before creating a
# task so we don't generate additional tasks and db queries
# for every node in CLEANWAIT which is not locked.
if not firmware_updates:
continue
lock_purpose = 'checking async firmware update tasks.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -929,11 +941,6 @@ class RedfishManagement(base.ManagementInterface):
RedfishManagement):
continue
firmware_updates = driver_internal_info.get(
'firmware_updates')
if not firmware_updates:
continue
self._check_node_firmware_update(task)
except exception.NodeNotFound:

View File

@ -1033,6 +1033,15 @@ class RedfishRAID(base.RAIDInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
raid_configs = driver_internal_info.get(
'raid_configs')
# NOTE(TheJulia): Evaluate the presence of raid configuration
# activity before pulling the task, so we don't needlessly
# create database queries with tasks which would be skipped
# anyhow.
if not raid_configs:
continue
lock_purpose = 'checking async RAID config failed.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -1040,11 +1049,6 @@ class RedfishRAID(base.RAIDInterface):
if not isinstance(task.driver.raid, RedfishRAID):
continue
raid_configs = driver_internal_info.get(
'raid_configs')
if not raid_configs:
continue
node = task.node
# A RAID config failed. Discard any remaining RAID
@ -1081,6 +1085,14 @@ class RedfishRAID(base.RAIDInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
raid_configs = driver_internal_info.get(
'raid_configs')
# NOTE(TheJulia): Skip to next record if we do not
# have raid configuraiton tasks, so we don't pull tasks
# for every unrelated node in CLEANWAIT.
if not raid_configs:
continue
lock_purpose = 'checking async RAID config tasks.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@ -1088,11 +1100,6 @@ class RedfishRAID(base.RAIDInterface):
if not isinstance(task.driver.raid, RedfishRAID):
continue
raid_configs = driver_internal_info.get(
'raid_configs')
if not raid_configs:
continue
self._check_node_raid_config(task)
except exception.NodeNotFound:

View File

@ -0,0 +1,8 @@
---
fixes:
- |
Fixes the pattern of execution for periodic tasks such that the majority
of drivers now evaluate *if* work needs to be performed in advance of
creating a node task. Depending on the individual driver query pattern,
this prevents excess database queries from being triggered with every
task execution.