From f4e9e7d00fd561fc0cd60aab1cfb6819cb2f453b Mon Sep 17 00:00:00 2001 From: Jiping Ma Date: Mon, 15 Dec 2025 02:29:34 +0000 Subject: [PATCH] kernel-rt:dm-snapshot: fix 'scheduling while atomic' on rt kernel We observed "BUG: scheduling while atomic: lvm/1380/0x00000003" for the rt kernel under the upgrade and rollback test in 6.6, 6.12 and mainline. The issue is related with dm_exception_table_lock(&lock), in which function preempt_disable() is called twice. The code block is between dm_exception_table_lock(&lock) and dm_exception_table_unlock(&lock), if the code involves rt_spin_lock that will trigger such as "BUG: scheduling while atomic: kworker/u72:11/349/0x00000003" because the preempt number is 3 in this time. There are several places that involve the same issue in dm-snap.c, such as dm_add_exception(), pending_complete() and snapshot_map(). Fix this by converting a hlist_bl spinlock into a regular spinlock. Cherry pick the upstream commit https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8581b19eb2c5 ("dm-snapshot: fix 'scheduling while atomic' on real-time kernels") Here we removed the patch 0015-sched-fair-Block-delayed-tasks-on-throttled-hierarch.patch, it was not required after the kernel upgrade to 6.12.57. Verification: - Build iso success for rt and std. - Run the tests upgrade and rollback for more than one day. Closes-Bug: 2136084 Change-Id: I7a7d094c60f8f8fd9f4da3c441fcda89f6048241 Signed-off-by: Jiping Ma --- ...scheduling-while-atomic-on-real-time.patch | 236 ++++++++++++++++++ kernel-rt/debian/bullseye/patches/series | 1 + ...scheduling-while-atomic-on-real-time.patch | 236 ++++++++++++++++++ ...-delayed-tasks-on-throttled-hierarch.patch | 97 ------- kernel-rt/debian/trixie/patches/series | 1 + 5 files changed, 474 insertions(+), 97 deletions(-) create mode 100644 kernel-rt/debian/bullseye/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch create mode 100644 kernel-rt/debian/trixie/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch delete mode 100644 kernel-rt/debian/trixie/patches/0015-sched-fair-Block-delayed-tasks-on-throttled-hierarch.patch diff --git a/kernel-rt/debian/bullseye/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch b/kernel-rt/debian/bullseye/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch new file mode 100644 index 00000000..cfbc1d1f --- /dev/null +++ b/kernel-rt/debian/bullseye/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch @@ -0,0 +1,236 @@ +From e94787a61d0f7617801bafaf5e63474683d76cd8 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 1 Dec 2025 22:13:10 +0100 +Subject: [PATCH] dm-snapshot: fix 'scheduling while atomic' on real-time + kernels + +There is reported 'scheduling while atomic' bug when using dm-snapshot on +real-time kernels. The reason for the bug is that the hlist_bl code does +preempt_disable() when taking the lock and the kernel attempts to take +other spinlocks while holding the hlist_bl lock. + +Fix this by converting a hlist_bl spinlock into a regular spinlock. + +Signed-off-by: Mikulas Patocka +Reported-by: Jiping Ma +(cherry picked from commit 8581b19eb2c5ccf06c195d3b5468c3c9d17a5020) +Signed-off-by: Jiping Ma +--- + drivers/md/dm-exception-store.h | 2 +- + drivers/md/dm-snap.c | 73 +++++++++++++++------------------ + 2 files changed, 35 insertions(+), 40 deletions(-) + +diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h +index b67976637538..061b4d310813 100644 +--- a/drivers/md/dm-exception-store.h ++++ b/drivers/md/dm-exception-store.h +@@ -29,7 +29,7 @@ typedef sector_t chunk_t; + * chunk within the device. + */ + struct dm_exception { +- struct hlist_bl_node hash_list; ++ struct hlist_node hash_list; + + chunk_t old_chunk; + chunk_t new_chunk; +diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c +index f40c18da4000..dbd148967de4 100644 +--- a/drivers/md/dm-snap.c ++++ b/drivers/md/dm-snap.c +@@ -40,10 +40,15 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; + #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) + ++struct dm_hlist_head { ++ struct hlist_head head; ++ spinlock_t lock; ++}; ++ + struct dm_exception_table { + uint32_t hash_mask; + unsigned int hash_shift; +- struct hlist_bl_head *table; ++ struct dm_hlist_head *table; + }; + + struct dm_snapshot { +@@ -628,8 +633,8 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); + + /* Lock to protect access to the completed and pending exception hash tables. */ + struct dm_exception_table_lock { +- struct hlist_bl_head *complete_slot; +- struct hlist_bl_head *pending_slot; ++ spinlock_t *complete_slot; ++ spinlock_t *pending_slot; + }; + + static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, +@@ -638,20 +643,20 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, + struct dm_exception_table *complete = &s->complete; + struct dm_exception_table *pending = &s->pending; + +- lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; +- lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; ++ lock->complete_slot = &complete->table[exception_hash(complete, chunk)].lock; ++ lock->pending_slot = &pending->table[exception_hash(pending, chunk)].lock; + } + + static void dm_exception_table_lock(struct dm_exception_table_lock *lock) + { +- hlist_bl_lock(lock->complete_slot); +- hlist_bl_lock(lock->pending_slot); ++ spin_lock_nested(lock->complete_slot, 1); ++ spin_lock_nested(lock->pending_slot, 2); + } + + static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) + { +- hlist_bl_unlock(lock->pending_slot); +- hlist_bl_unlock(lock->complete_slot); ++ spin_unlock(lock->pending_slot); ++ spin_unlock(lock->complete_slot); + } + + static int dm_exception_table_init(struct dm_exception_table *et, +@@ -661,13 +666,15 @@ static int dm_exception_table_init(struct dm_exception_table *et, + + et->hash_shift = hash_shift; + et->hash_mask = size - 1; +- et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head), ++ et->table = kvmalloc_array(size, sizeof(struct dm_hlist_head), + GFP_KERNEL); + if (!et->table) + return -ENOMEM; + +- for (i = 0; i < size; i++) +- INIT_HLIST_BL_HEAD(et->table + i); ++ for (i = 0; i < size; i++) { ++ INIT_HLIST_HEAD(&et->table[i].head); ++ spin_lock_init(&et->table[i].lock); ++ } + + return 0; + } +@@ -675,16 +682,17 @@ static int dm_exception_table_init(struct dm_exception_table *et, + static void dm_exception_table_exit(struct dm_exception_table *et, + struct kmem_cache *mem) + { +- struct hlist_bl_head *slot; ++ struct dm_hlist_head *slot; + struct dm_exception *ex; +- struct hlist_bl_node *pos, *n; ++ struct hlist_node *pos; + int i, size; + + size = et->hash_mask + 1; + for (i = 0; i < size; i++) { + slot = et->table + i; + +- hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { ++ hlist_for_each_entry_safe(ex, pos, &slot->head, hash_list) { ++ hlist_del(&ex->hash_list); + kmem_cache_free(mem, ex); + cond_resched(); + } +@@ -700,7 +708,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) + + static void dm_remove_exception(struct dm_exception *e) + { +- hlist_bl_del(&e->hash_list); ++ hlist_del(&e->hash_list); + } + + /* +@@ -710,12 +718,11 @@ static void dm_remove_exception(struct dm_exception *e) + static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, + chunk_t chunk) + { +- struct hlist_bl_head *slot; +- struct hlist_bl_node *pos; ++ struct hlist_head *slot; + struct dm_exception *e; + +- slot = &et->table[exception_hash(et, chunk)]; +- hlist_bl_for_each_entry(e, pos, slot, hash_list) ++ slot = &et->table[exception_hash(et, chunk)].head; ++ hlist_for_each_entry(e, slot, hash_list) + if (chunk >= e->old_chunk && + chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) + return e; +@@ -762,18 +769,17 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe) + static void dm_insert_exception(struct dm_exception_table *eh, + struct dm_exception *new_e) + { +- struct hlist_bl_head *l; +- struct hlist_bl_node *pos; ++ struct hlist_head *l; + struct dm_exception *e = NULL; + +- l = &eh->table[exception_hash(eh, new_e->old_chunk)]; ++ l = &eh->table[exception_hash(eh, new_e->old_chunk)].head; + + /* Add immediately if this table doesn't support consecutive chunks */ + if (!eh->hash_shift) + goto out; + + /* List is ordered by old_chunk */ +- hlist_bl_for_each_entry(e, pos, l, hash_list) { ++ hlist_for_each_entry(e, l, hash_list) { + /* Insert after an existing chunk? */ + if (new_e->old_chunk == (e->old_chunk + + dm_consecutive_chunk_count(e) + 1) && +@@ -804,13 +810,13 @@ static void dm_insert_exception(struct dm_exception_table *eh, + * Either the table doesn't support consecutive chunks or slot + * l is empty. + */ +- hlist_bl_add_head(&new_e->hash_list, l); ++ hlist_add_head(&new_e->hash_list, l); + } else if (new_e->old_chunk < e->old_chunk) { + /* Add before an existing exception */ +- hlist_bl_add_before(&new_e->hash_list, &e->hash_list); ++ hlist_add_before(&new_e->hash_list, &e->hash_list); + } else { + /* Add to l's tail: e is the last exception in this slot */ +- hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); ++ hlist_add_behind(&new_e->hash_list, &e->hash_list); + } + } + +@@ -820,7 +826,6 @@ static void dm_insert_exception(struct dm_exception_table *eh, + */ + static int dm_add_exception(void *context, chunk_t old, chunk_t new) + { +- struct dm_exception_table_lock lock; + struct dm_snapshot *s = context; + struct dm_exception *e; + +@@ -833,17 +838,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) + /* Consecutive_count is implicitly initialised to zero */ + e->new_chunk = new; + +- /* +- * Although there is no need to lock access to the exception tables +- * here, if we don't then hlist_bl_add_head(), called by +- * dm_insert_exception(), will complain about accessing the +- * corresponding list without locking it first. +- */ +- dm_exception_table_lock_init(s, old, &lock); +- +- dm_exception_table_lock(&lock); + dm_insert_exception(&s->complete, e); +- dm_exception_table_unlock(&lock); + + return 0; + } +@@ -873,7 +868,7 @@ static int calc_max_buckets(void) + /* use a fixed size of 2MB */ + unsigned long mem = 2 * 1024 * 1024; + +- mem /= sizeof(struct hlist_bl_head); ++ mem /= sizeof(struct dm_hlist_head); + + return mem; + } +-- +2.49.0 + diff --git a/kernel-rt/debian/bullseye/patches/series b/kernel-rt/debian/bullseye/patches/series index 29b00354..369588db 100644 --- a/kernel-rt/debian/bullseye/patches/series +++ b/kernel-rt/debian/bullseye/patches/series @@ -38,3 +38,4 @@ zl3073x-backport/0003-devlink-introduce-devlink_nl_put_u64.patch zl3073x-backport/0004-dpll-zl3073x-Fix-missing-header-build-error-on-older.patch zl3073x-backport/0005-dpll-add-phase-offset-monitor-feature-to-netlink-spe.patch zl3073x-backport/0006-dpll-add-phase_offset_monitor_get-set-callback-ops.patch +0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch diff --git a/kernel-rt/debian/trixie/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch b/kernel-rt/debian/trixie/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch new file mode 100644 index 00000000..cfbc1d1f --- /dev/null +++ b/kernel-rt/debian/trixie/patches/0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch @@ -0,0 +1,236 @@ +From e94787a61d0f7617801bafaf5e63474683d76cd8 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 1 Dec 2025 22:13:10 +0100 +Subject: [PATCH] dm-snapshot: fix 'scheduling while atomic' on real-time + kernels + +There is reported 'scheduling while atomic' bug when using dm-snapshot on +real-time kernels. The reason for the bug is that the hlist_bl code does +preempt_disable() when taking the lock and the kernel attempts to take +other spinlocks while holding the hlist_bl lock. + +Fix this by converting a hlist_bl spinlock into a regular spinlock. + +Signed-off-by: Mikulas Patocka +Reported-by: Jiping Ma +(cherry picked from commit 8581b19eb2c5ccf06c195d3b5468c3c9d17a5020) +Signed-off-by: Jiping Ma +--- + drivers/md/dm-exception-store.h | 2 +- + drivers/md/dm-snap.c | 73 +++++++++++++++------------------ + 2 files changed, 35 insertions(+), 40 deletions(-) + +diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h +index b67976637538..061b4d310813 100644 +--- a/drivers/md/dm-exception-store.h ++++ b/drivers/md/dm-exception-store.h +@@ -29,7 +29,7 @@ typedef sector_t chunk_t; + * chunk within the device. + */ + struct dm_exception { +- struct hlist_bl_node hash_list; ++ struct hlist_node hash_list; + + chunk_t old_chunk; + chunk_t new_chunk; +diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c +index f40c18da4000..dbd148967de4 100644 +--- a/drivers/md/dm-snap.c ++++ b/drivers/md/dm-snap.c +@@ -40,10 +40,15 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; + #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) + ++struct dm_hlist_head { ++ struct hlist_head head; ++ spinlock_t lock; ++}; ++ + struct dm_exception_table { + uint32_t hash_mask; + unsigned int hash_shift; +- struct hlist_bl_head *table; ++ struct dm_hlist_head *table; + }; + + struct dm_snapshot { +@@ -628,8 +633,8 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); + + /* Lock to protect access to the completed and pending exception hash tables. */ + struct dm_exception_table_lock { +- struct hlist_bl_head *complete_slot; +- struct hlist_bl_head *pending_slot; ++ spinlock_t *complete_slot; ++ spinlock_t *pending_slot; + }; + + static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, +@@ -638,20 +643,20 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, + struct dm_exception_table *complete = &s->complete; + struct dm_exception_table *pending = &s->pending; + +- lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; +- lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; ++ lock->complete_slot = &complete->table[exception_hash(complete, chunk)].lock; ++ lock->pending_slot = &pending->table[exception_hash(pending, chunk)].lock; + } + + static void dm_exception_table_lock(struct dm_exception_table_lock *lock) + { +- hlist_bl_lock(lock->complete_slot); +- hlist_bl_lock(lock->pending_slot); ++ spin_lock_nested(lock->complete_slot, 1); ++ spin_lock_nested(lock->pending_slot, 2); + } + + static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) + { +- hlist_bl_unlock(lock->pending_slot); +- hlist_bl_unlock(lock->complete_slot); ++ spin_unlock(lock->pending_slot); ++ spin_unlock(lock->complete_slot); + } + + static int dm_exception_table_init(struct dm_exception_table *et, +@@ -661,13 +666,15 @@ static int dm_exception_table_init(struct dm_exception_table *et, + + et->hash_shift = hash_shift; + et->hash_mask = size - 1; +- et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head), ++ et->table = kvmalloc_array(size, sizeof(struct dm_hlist_head), + GFP_KERNEL); + if (!et->table) + return -ENOMEM; + +- for (i = 0; i < size; i++) +- INIT_HLIST_BL_HEAD(et->table + i); ++ for (i = 0; i < size; i++) { ++ INIT_HLIST_HEAD(&et->table[i].head); ++ spin_lock_init(&et->table[i].lock); ++ } + + return 0; + } +@@ -675,16 +682,17 @@ static int dm_exception_table_init(struct dm_exception_table *et, + static void dm_exception_table_exit(struct dm_exception_table *et, + struct kmem_cache *mem) + { +- struct hlist_bl_head *slot; ++ struct dm_hlist_head *slot; + struct dm_exception *ex; +- struct hlist_bl_node *pos, *n; ++ struct hlist_node *pos; + int i, size; + + size = et->hash_mask + 1; + for (i = 0; i < size; i++) { + slot = et->table + i; + +- hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { ++ hlist_for_each_entry_safe(ex, pos, &slot->head, hash_list) { ++ hlist_del(&ex->hash_list); + kmem_cache_free(mem, ex); + cond_resched(); + } +@@ -700,7 +708,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) + + static void dm_remove_exception(struct dm_exception *e) + { +- hlist_bl_del(&e->hash_list); ++ hlist_del(&e->hash_list); + } + + /* +@@ -710,12 +718,11 @@ static void dm_remove_exception(struct dm_exception *e) + static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, + chunk_t chunk) + { +- struct hlist_bl_head *slot; +- struct hlist_bl_node *pos; ++ struct hlist_head *slot; + struct dm_exception *e; + +- slot = &et->table[exception_hash(et, chunk)]; +- hlist_bl_for_each_entry(e, pos, slot, hash_list) ++ slot = &et->table[exception_hash(et, chunk)].head; ++ hlist_for_each_entry(e, slot, hash_list) + if (chunk >= e->old_chunk && + chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) + return e; +@@ -762,18 +769,17 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe) + static void dm_insert_exception(struct dm_exception_table *eh, + struct dm_exception *new_e) + { +- struct hlist_bl_head *l; +- struct hlist_bl_node *pos; ++ struct hlist_head *l; + struct dm_exception *e = NULL; + +- l = &eh->table[exception_hash(eh, new_e->old_chunk)]; ++ l = &eh->table[exception_hash(eh, new_e->old_chunk)].head; + + /* Add immediately if this table doesn't support consecutive chunks */ + if (!eh->hash_shift) + goto out; + + /* List is ordered by old_chunk */ +- hlist_bl_for_each_entry(e, pos, l, hash_list) { ++ hlist_for_each_entry(e, l, hash_list) { + /* Insert after an existing chunk? */ + if (new_e->old_chunk == (e->old_chunk + + dm_consecutive_chunk_count(e) + 1) && +@@ -804,13 +810,13 @@ static void dm_insert_exception(struct dm_exception_table *eh, + * Either the table doesn't support consecutive chunks or slot + * l is empty. + */ +- hlist_bl_add_head(&new_e->hash_list, l); ++ hlist_add_head(&new_e->hash_list, l); + } else if (new_e->old_chunk < e->old_chunk) { + /* Add before an existing exception */ +- hlist_bl_add_before(&new_e->hash_list, &e->hash_list); ++ hlist_add_before(&new_e->hash_list, &e->hash_list); + } else { + /* Add to l's tail: e is the last exception in this slot */ +- hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); ++ hlist_add_behind(&new_e->hash_list, &e->hash_list); + } + } + +@@ -820,7 +826,6 @@ static void dm_insert_exception(struct dm_exception_table *eh, + */ + static int dm_add_exception(void *context, chunk_t old, chunk_t new) + { +- struct dm_exception_table_lock lock; + struct dm_snapshot *s = context; + struct dm_exception *e; + +@@ -833,17 +838,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) + /* Consecutive_count is implicitly initialised to zero */ + e->new_chunk = new; + +- /* +- * Although there is no need to lock access to the exception tables +- * here, if we don't then hlist_bl_add_head(), called by +- * dm_insert_exception(), will complain about accessing the +- * corresponding list without locking it first. +- */ +- dm_exception_table_lock_init(s, old, &lock); +- +- dm_exception_table_lock(&lock); + dm_insert_exception(&s->complete, e); +- dm_exception_table_unlock(&lock); + + return 0; + } +@@ -873,7 +868,7 @@ static int calc_max_buckets(void) + /* use a fixed size of 2MB */ + unsigned long mem = 2 * 1024 * 1024; + +- mem /= sizeof(struct hlist_bl_head); ++ mem /= sizeof(struct dm_hlist_head); + + return mem; + } +-- +2.49.0 + diff --git a/kernel-rt/debian/trixie/patches/0015-sched-fair-Block-delayed-tasks-on-throttled-hierarch.patch b/kernel-rt/debian/trixie/patches/0015-sched-fair-Block-delayed-tasks-on-throttled-hierarch.patch deleted file mode 100644 index dc020249..00000000 --- a/kernel-rt/debian/trixie/patches/0015-sched-fair-Block-delayed-tasks-on-throttled-hierarch.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 753594c69939fb8fbf6971e7f4858052e0978bf1 Mon Sep 17 00:00:00 2001 -From: Jiping Ma -Date: Mon, 13 Oct 2025 07:28:52 +0000 -Subject: [PATCH] sched/fair: Block delayed tasks on throttled hierarchy during - dequeue - -Dequeuing a fair task on a throttled hierarchy returns early on -encountering a throttled cfs_rq since the throttle path has already -dequeued the hierarchy above and has adjusted the h_nr_* accounting till -the root cfs_rq. - -dequeue_entities() crucially misses calling __block_task() for delayed -tasks being dequeued on the throttled hierarchies, but this was mostly -harmless until commit b7ca5743a260 ("sched/core: Tweak -wait_task_inactive() to force dequeue sched_delayed tasks") since all -existing cases would re-enqueue the task if task_on_rq_queued() returned -true and the task would eventually be blocked at pick after the -hierarchy was unthrottled. - -wait_task_inactive() is special as it expects the delayed task on -throttled hierarchy to reach the blocked state on dequeue but since -__block_task() is never called, task_on_rq_queued() continues to return -true. Furthermore, since the task is now off the hierarchy, the pick -never reaches it to fully block the task even after unthrottle leading -to wait_task_inactive() looping endlessly. - -Remedy this by calling __block_task() if a delayed task is being -dequeued on a throttled hierarchy. - -This fix is only required for stabled kernels implementing delay dequeue -(>= v6.12) before v6.18 since upstream commit e1fad12dcb66 ("sched/fair: -Switch to task based throttle model") indirectly fixes this by removing -the early return conditions in dequeue_entities() as part of the per-task -throttle feature. - -Cc: stable@vger.kernel.org -Reported-by: Matt Fleming -Closes: https://lore.kernel.org/all/20250925133310.1843863-1-matt@readmodwrite.com/ -Fixes: b7ca5743a260 ("sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks") -Tested-by: Matt Fleming -Signed-off-by: K Prateek Nayak -Signed-off-by: Jiping Ma ---- - kernel/sched/fair.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index d894ccc8cb8f..64fd09e5bb79 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -7187,6 +7187,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) - int h_nr_delayed = 0; - struct cfs_rq *cfs_rq; - u64 slice = 0; -+ int ret = 0; - - if (entity_is_task(se)) { - p = task_of(se); -@@ -7218,7 +7219,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) - - /* end evaluation on encountering a throttled cfs_rq */ - if (cfs_rq_throttled(cfs_rq)) -- return 0; -+ goto out; - - /* Don't dequeue parent if it has other entities besides us */ - if (cfs_rq->load.weight) { -@@ -7261,7 +7262,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) - - /* end evaluation on encountering a throttled cfs_rq */ - if (cfs_rq_throttled(cfs_rq)) -- return 0; -+ goto out; - } - - sub_nr_running(rq, h_nr_queued); -@@ -7273,6 +7274,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) - if (unlikely(!was_sched_idle && sched_idle_rq(rq))) - rq->next_balance = jiffies; - -+ ret = 1; -+out: - if (p && task_delayed) { - SCHED_WARN_ON(!task_sleep); - SCHED_WARN_ON(p->on_rq != 1); -@@ -7288,7 +7291,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) - __block_task(rq, p); - } - -- return 1; -+ return ret; - } - - /* --- -2.49.0 - diff --git a/kernel-rt/debian/trixie/patches/series b/kernel-rt/debian/trixie/patches/series index 29b00354..369588db 100644 --- a/kernel-rt/debian/trixie/patches/series +++ b/kernel-rt/debian/trixie/patches/series @@ -38,3 +38,4 @@ zl3073x-backport/0003-devlink-introduce-devlink_nl_put_u64.patch zl3073x-backport/0004-dpll-zl3073x-Fix-missing-header-build-error-on-older.patch zl3073x-backport/0005-dpll-add-phase-offset-monitor-feature-to-netlink-spe.patch zl3073x-backport/0006-dpll-add-phase_offset_monitor_get-set-callback-ops.patch +0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch