kernel-rt:dm-snapshot: fix 'scheduling while atomic' on rt kernel
We observed "BUG: scheduling while atomic: lvm/1380/0x00000003" for the rt kernel under the upgrade and rollback test in 6.6, 6.12 and mainline. The issue is related with dm_exception_table_lock(&lock), in which function preempt_disable() is called twice. The code block is between dm_exception_table_lock(&lock) and dm_exception_table_unlock(&lock), if the code involves rt_spin_lock that will trigger such as "BUG: scheduling while atomic: kworker/u72:11/349/0x00000003" because the preempt number is 3 in this time. There are several places that involve the same issue in dm-snap.c, such as dm_add_exception(), pending_complete() and snapshot_map(). Fix this by converting a hlist_bl spinlock into a regular spinlock. Cherry pick the upstream commit https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8581b19eb2c5 ("dm-snapshot: fix 'scheduling while atomic' on real-time kernels") Here we removed the patch 0015-sched-fair-Block-delayed-tasks-on-throttled-hierarch.patch, it was not required after the kernel upgrade to 6.12.57. Verification: - Build iso success for rt and std. - Run the tests upgrade and rollback for more than one day. Closes-Bug: 2136084 Change-Id: I7a7d094c60f8f8fd9f4da3c441fcda89f6048241 Signed-off-by: Jiping Ma <jiping.ma2@windriver.com>
This commit is contained in:
@@ -0,0 +1,236 @@
|
||||
From e94787a61d0f7617801bafaf5e63474683d76cd8 Mon Sep 17 00:00:00 2001
|
||||
From: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Mon, 1 Dec 2025 22:13:10 +0100
|
||||
Subject: [PATCH] dm-snapshot: fix 'scheduling while atomic' on real-time
|
||||
kernels
|
||||
|
||||
There is reported 'scheduling while atomic' bug when using dm-snapshot on
|
||||
real-time kernels. The reason for the bug is that the hlist_bl code does
|
||||
preempt_disable() when taking the lock and the kernel attempts to take
|
||||
other spinlocks while holding the hlist_bl lock.
|
||||
|
||||
Fix this by converting a hlist_bl spinlock into a regular spinlock.
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Reported-by: Jiping Ma <jiping.ma2@windriver.com>
|
||||
(cherry picked from commit 8581b19eb2c5ccf06c195d3b5468c3c9d17a5020)
|
||||
Signed-off-by: Jiping Ma <jiping.ma2@windriver.com>
|
||||
---
|
||||
drivers/md/dm-exception-store.h | 2 +-
|
||||
drivers/md/dm-snap.c | 73 +++++++++++++++------------------
|
||||
2 files changed, 35 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
|
||||
index b67976637538..061b4d310813 100644
|
||||
--- a/drivers/md/dm-exception-store.h
|
||||
+++ b/drivers/md/dm-exception-store.h
|
||||
@@ -29,7 +29,7 @@ typedef sector_t chunk_t;
|
||||
* chunk within the device.
|
||||
*/
|
||||
struct dm_exception {
|
||||
- struct hlist_bl_node hash_list;
|
||||
+ struct hlist_node hash_list;
|
||||
|
||||
chunk_t old_chunk;
|
||||
chunk_t new_chunk;
|
||||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
|
||||
index f40c18da4000..dbd148967de4 100644
|
||||
--- a/drivers/md/dm-snap.c
|
||||
+++ b/drivers/md/dm-snap.c
|
||||
@@ -40,10 +40,15 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
|
||||
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
|
||||
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
|
||||
|
||||
+struct dm_hlist_head {
|
||||
+ struct hlist_head head;
|
||||
+ spinlock_t lock;
|
||||
+};
|
||||
+
|
||||
struct dm_exception_table {
|
||||
uint32_t hash_mask;
|
||||
unsigned int hash_shift;
|
||||
- struct hlist_bl_head *table;
|
||||
+ struct dm_hlist_head *table;
|
||||
};
|
||||
|
||||
struct dm_snapshot {
|
||||
@@ -628,8 +633,8 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
|
||||
|
||||
/* Lock to protect access to the completed and pending exception hash tables. */
|
||||
struct dm_exception_table_lock {
|
||||
- struct hlist_bl_head *complete_slot;
|
||||
- struct hlist_bl_head *pending_slot;
|
||||
+ spinlock_t *complete_slot;
|
||||
+ spinlock_t *pending_slot;
|
||||
};
|
||||
|
||||
static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
|
||||
@@ -638,20 +643,20 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
|
||||
struct dm_exception_table *complete = &s->complete;
|
||||
struct dm_exception_table *pending = &s->pending;
|
||||
|
||||
- lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
|
||||
- lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
|
||||
+ lock->complete_slot = &complete->table[exception_hash(complete, chunk)].lock;
|
||||
+ lock->pending_slot = &pending->table[exception_hash(pending, chunk)].lock;
|
||||
}
|
||||
|
||||
static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
|
||||
{
|
||||
- hlist_bl_lock(lock->complete_slot);
|
||||
- hlist_bl_lock(lock->pending_slot);
|
||||
+ spin_lock_nested(lock->complete_slot, 1);
|
||||
+ spin_lock_nested(lock->pending_slot, 2);
|
||||
}
|
||||
|
||||
static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
|
||||
{
|
||||
- hlist_bl_unlock(lock->pending_slot);
|
||||
- hlist_bl_unlock(lock->complete_slot);
|
||||
+ spin_unlock(lock->pending_slot);
|
||||
+ spin_unlock(lock->complete_slot);
|
||||
}
|
||||
|
||||
static int dm_exception_table_init(struct dm_exception_table *et,
|
||||
@@ -661,13 +666,15 @@ static int dm_exception_table_init(struct dm_exception_table *et,
|
||||
|
||||
et->hash_shift = hash_shift;
|
||||
et->hash_mask = size - 1;
|
||||
- et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head),
|
||||
+ et->table = kvmalloc_array(size, sizeof(struct dm_hlist_head),
|
||||
GFP_KERNEL);
|
||||
if (!et->table)
|
||||
return -ENOMEM;
|
||||
|
||||
- for (i = 0; i < size; i++)
|
||||
- INIT_HLIST_BL_HEAD(et->table + i);
|
||||
+ for (i = 0; i < size; i++) {
|
||||
+ INIT_HLIST_HEAD(&et->table[i].head);
|
||||
+ spin_lock_init(&et->table[i].lock);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -675,16 +682,17 @@ static int dm_exception_table_init(struct dm_exception_table *et,
|
||||
static void dm_exception_table_exit(struct dm_exception_table *et,
|
||||
struct kmem_cache *mem)
|
||||
{
|
||||
- struct hlist_bl_head *slot;
|
||||
+ struct dm_hlist_head *slot;
|
||||
struct dm_exception *ex;
|
||||
- struct hlist_bl_node *pos, *n;
|
||||
+ struct hlist_node *pos;
|
||||
int i, size;
|
||||
|
||||
size = et->hash_mask + 1;
|
||||
for (i = 0; i < size; i++) {
|
||||
slot = et->table + i;
|
||||
|
||||
- hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) {
|
||||
+ hlist_for_each_entry_safe(ex, pos, &slot->head, hash_list) {
|
||||
+ hlist_del(&ex->hash_list);
|
||||
kmem_cache_free(mem, ex);
|
||||
cond_resched();
|
||||
}
|
||||
@@ -700,7 +708,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
|
||||
|
||||
static void dm_remove_exception(struct dm_exception *e)
|
||||
{
|
||||
- hlist_bl_del(&e->hash_list);
|
||||
+ hlist_del(&e->hash_list);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -710,12 +718,11 @@ static void dm_remove_exception(struct dm_exception *e)
|
||||
static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
|
||||
chunk_t chunk)
|
||||
{
|
||||
- struct hlist_bl_head *slot;
|
||||
- struct hlist_bl_node *pos;
|
||||
+ struct hlist_head *slot;
|
||||
struct dm_exception *e;
|
||||
|
||||
- slot = &et->table[exception_hash(et, chunk)];
|
||||
- hlist_bl_for_each_entry(e, pos, slot, hash_list)
|
||||
+ slot = &et->table[exception_hash(et, chunk)].head;
|
||||
+ hlist_for_each_entry(e, slot, hash_list)
|
||||
if (chunk >= e->old_chunk &&
|
||||
chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
|
||||
return e;
|
||||
@@ -762,18 +769,17 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
|
||||
static void dm_insert_exception(struct dm_exception_table *eh,
|
||||
struct dm_exception *new_e)
|
||||
{
|
||||
- struct hlist_bl_head *l;
|
||||
- struct hlist_bl_node *pos;
|
||||
+ struct hlist_head *l;
|
||||
struct dm_exception *e = NULL;
|
||||
|
||||
- l = &eh->table[exception_hash(eh, new_e->old_chunk)];
|
||||
+ l = &eh->table[exception_hash(eh, new_e->old_chunk)].head;
|
||||
|
||||
/* Add immediately if this table doesn't support consecutive chunks */
|
||||
if (!eh->hash_shift)
|
||||
goto out;
|
||||
|
||||
/* List is ordered by old_chunk */
|
||||
- hlist_bl_for_each_entry(e, pos, l, hash_list) {
|
||||
+ hlist_for_each_entry(e, l, hash_list) {
|
||||
/* Insert after an existing chunk? */
|
||||
if (new_e->old_chunk == (e->old_chunk +
|
||||
dm_consecutive_chunk_count(e) + 1) &&
|
||||
@@ -804,13 +810,13 @@ static void dm_insert_exception(struct dm_exception_table *eh,
|
||||
* Either the table doesn't support consecutive chunks or slot
|
||||
* l is empty.
|
||||
*/
|
||||
- hlist_bl_add_head(&new_e->hash_list, l);
|
||||
+ hlist_add_head(&new_e->hash_list, l);
|
||||
} else if (new_e->old_chunk < e->old_chunk) {
|
||||
/* Add before an existing exception */
|
||||
- hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
|
||||
+ hlist_add_before(&new_e->hash_list, &e->hash_list);
|
||||
} else {
|
||||
/* Add to l's tail: e is the last exception in this slot */
|
||||
- hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
|
||||
+ hlist_add_behind(&new_e->hash_list, &e->hash_list);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -820,7 +826,6 @@ static void dm_insert_exception(struct dm_exception_table *eh,
|
||||
*/
|
||||
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
|
||||
{
|
||||
- struct dm_exception_table_lock lock;
|
||||
struct dm_snapshot *s = context;
|
||||
struct dm_exception *e;
|
||||
|
||||
@@ -833,17 +838,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
|
||||
/* Consecutive_count is implicitly initialised to zero */
|
||||
e->new_chunk = new;
|
||||
|
||||
- /*
|
||||
- * Although there is no need to lock access to the exception tables
|
||||
- * here, if we don't then hlist_bl_add_head(), called by
|
||||
- * dm_insert_exception(), will complain about accessing the
|
||||
- * corresponding list without locking it first.
|
||||
- */
|
||||
- dm_exception_table_lock_init(s, old, &lock);
|
||||
-
|
||||
- dm_exception_table_lock(&lock);
|
||||
dm_insert_exception(&s->complete, e);
|
||||
- dm_exception_table_unlock(&lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -873,7 +868,7 @@ static int calc_max_buckets(void)
|
||||
/* use a fixed size of 2MB */
|
||||
unsigned long mem = 2 * 1024 * 1024;
|
||||
|
||||
- mem /= sizeof(struct hlist_bl_head);
|
||||
+ mem /= sizeof(struct dm_hlist_head);
|
||||
|
||||
return mem;
|
||||
}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@@ -38,3 +38,4 @@ zl3073x-backport/0003-devlink-introduce-devlink_nl_put_u64.patch
|
||||
zl3073x-backport/0004-dpll-zl3073x-Fix-missing-header-build-error-on-older.patch
|
||||
zl3073x-backport/0005-dpll-add-phase-offset-monitor-feature-to-netlink-spe.patch
|
||||
zl3073x-backport/0006-dpll-add-phase_offset_monitor_get-set-callback-ops.patch
|
||||
0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
From e94787a61d0f7617801bafaf5e63474683d76cd8 Mon Sep 17 00:00:00 2001
|
||||
From: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Mon, 1 Dec 2025 22:13:10 +0100
|
||||
Subject: [PATCH] dm-snapshot: fix 'scheduling while atomic' on real-time
|
||||
kernels
|
||||
|
||||
There is reported 'scheduling while atomic' bug when using dm-snapshot on
|
||||
real-time kernels. The reason for the bug is that the hlist_bl code does
|
||||
preempt_disable() when taking the lock and the kernel attempts to take
|
||||
other spinlocks while holding the hlist_bl lock.
|
||||
|
||||
Fix this by converting a hlist_bl spinlock into a regular spinlock.
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Reported-by: Jiping Ma <jiping.ma2@windriver.com>
|
||||
(cherry picked from commit 8581b19eb2c5ccf06c195d3b5468c3c9d17a5020)
|
||||
Signed-off-by: Jiping Ma <jiping.ma2@windriver.com>
|
||||
---
|
||||
drivers/md/dm-exception-store.h | 2 +-
|
||||
drivers/md/dm-snap.c | 73 +++++++++++++++------------------
|
||||
2 files changed, 35 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
|
||||
index b67976637538..061b4d310813 100644
|
||||
--- a/drivers/md/dm-exception-store.h
|
||||
+++ b/drivers/md/dm-exception-store.h
|
||||
@@ -29,7 +29,7 @@ typedef sector_t chunk_t;
|
||||
* chunk within the device.
|
||||
*/
|
||||
struct dm_exception {
|
||||
- struct hlist_bl_node hash_list;
|
||||
+ struct hlist_node hash_list;
|
||||
|
||||
chunk_t old_chunk;
|
||||
chunk_t new_chunk;
|
||||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
|
||||
index f40c18da4000..dbd148967de4 100644
|
||||
--- a/drivers/md/dm-snap.c
|
||||
+++ b/drivers/md/dm-snap.c
|
||||
@@ -40,10 +40,15 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
|
||||
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
|
||||
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
|
||||
|
||||
+struct dm_hlist_head {
|
||||
+ struct hlist_head head;
|
||||
+ spinlock_t lock;
|
||||
+};
|
||||
+
|
||||
struct dm_exception_table {
|
||||
uint32_t hash_mask;
|
||||
unsigned int hash_shift;
|
||||
- struct hlist_bl_head *table;
|
||||
+ struct dm_hlist_head *table;
|
||||
};
|
||||
|
||||
struct dm_snapshot {
|
||||
@@ -628,8 +633,8 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
|
||||
|
||||
/* Lock to protect access to the completed and pending exception hash tables. */
|
||||
struct dm_exception_table_lock {
|
||||
- struct hlist_bl_head *complete_slot;
|
||||
- struct hlist_bl_head *pending_slot;
|
||||
+ spinlock_t *complete_slot;
|
||||
+ spinlock_t *pending_slot;
|
||||
};
|
||||
|
||||
static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
|
||||
@@ -638,20 +643,20 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
|
||||
struct dm_exception_table *complete = &s->complete;
|
||||
struct dm_exception_table *pending = &s->pending;
|
||||
|
||||
- lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
|
||||
- lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
|
||||
+ lock->complete_slot = &complete->table[exception_hash(complete, chunk)].lock;
|
||||
+ lock->pending_slot = &pending->table[exception_hash(pending, chunk)].lock;
|
||||
}
|
||||
|
||||
static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
|
||||
{
|
||||
- hlist_bl_lock(lock->complete_slot);
|
||||
- hlist_bl_lock(lock->pending_slot);
|
||||
+ spin_lock_nested(lock->complete_slot, 1);
|
||||
+ spin_lock_nested(lock->pending_slot, 2);
|
||||
}
|
||||
|
||||
static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
|
||||
{
|
||||
- hlist_bl_unlock(lock->pending_slot);
|
||||
- hlist_bl_unlock(lock->complete_slot);
|
||||
+ spin_unlock(lock->pending_slot);
|
||||
+ spin_unlock(lock->complete_slot);
|
||||
}
|
||||
|
||||
static int dm_exception_table_init(struct dm_exception_table *et,
|
||||
@@ -661,13 +666,15 @@ static int dm_exception_table_init(struct dm_exception_table *et,
|
||||
|
||||
et->hash_shift = hash_shift;
|
||||
et->hash_mask = size - 1;
|
||||
- et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head),
|
||||
+ et->table = kvmalloc_array(size, sizeof(struct dm_hlist_head),
|
||||
GFP_KERNEL);
|
||||
if (!et->table)
|
||||
return -ENOMEM;
|
||||
|
||||
- for (i = 0; i < size; i++)
|
||||
- INIT_HLIST_BL_HEAD(et->table + i);
|
||||
+ for (i = 0; i < size; i++) {
|
||||
+ INIT_HLIST_HEAD(&et->table[i].head);
|
||||
+ spin_lock_init(&et->table[i].lock);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -675,16 +682,17 @@ static int dm_exception_table_init(struct dm_exception_table *et,
|
||||
static void dm_exception_table_exit(struct dm_exception_table *et,
|
||||
struct kmem_cache *mem)
|
||||
{
|
||||
- struct hlist_bl_head *slot;
|
||||
+ struct dm_hlist_head *slot;
|
||||
struct dm_exception *ex;
|
||||
- struct hlist_bl_node *pos, *n;
|
||||
+ struct hlist_node *pos;
|
||||
int i, size;
|
||||
|
||||
size = et->hash_mask + 1;
|
||||
for (i = 0; i < size; i++) {
|
||||
slot = et->table + i;
|
||||
|
||||
- hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) {
|
||||
+ hlist_for_each_entry_safe(ex, pos, &slot->head, hash_list) {
|
||||
+ hlist_del(&ex->hash_list);
|
||||
kmem_cache_free(mem, ex);
|
||||
cond_resched();
|
||||
}
|
||||
@@ -700,7 +708,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
|
||||
|
||||
static void dm_remove_exception(struct dm_exception *e)
|
||||
{
|
||||
- hlist_bl_del(&e->hash_list);
|
||||
+ hlist_del(&e->hash_list);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -710,12 +718,11 @@ static void dm_remove_exception(struct dm_exception *e)
|
||||
static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
|
||||
chunk_t chunk)
|
||||
{
|
||||
- struct hlist_bl_head *slot;
|
||||
- struct hlist_bl_node *pos;
|
||||
+ struct hlist_head *slot;
|
||||
struct dm_exception *e;
|
||||
|
||||
- slot = &et->table[exception_hash(et, chunk)];
|
||||
- hlist_bl_for_each_entry(e, pos, slot, hash_list)
|
||||
+ slot = &et->table[exception_hash(et, chunk)].head;
|
||||
+ hlist_for_each_entry(e, slot, hash_list)
|
||||
if (chunk >= e->old_chunk &&
|
||||
chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
|
||||
return e;
|
||||
@@ -762,18 +769,17 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
|
||||
static void dm_insert_exception(struct dm_exception_table *eh,
|
||||
struct dm_exception *new_e)
|
||||
{
|
||||
- struct hlist_bl_head *l;
|
||||
- struct hlist_bl_node *pos;
|
||||
+ struct hlist_head *l;
|
||||
struct dm_exception *e = NULL;
|
||||
|
||||
- l = &eh->table[exception_hash(eh, new_e->old_chunk)];
|
||||
+ l = &eh->table[exception_hash(eh, new_e->old_chunk)].head;
|
||||
|
||||
/* Add immediately if this table doesn't support consecutive chunks */
|
||||
if (!eh->hash_shift)
|
||||
goto out;
|
||||
|
||||
/* List is ordered by old_chunk */
|
||||
- hlist_bl_for_each_entry(e, pos, l, hash_list) {
|
||||
+ hlist_for_each_entry(e, l, hash_list) {
|
||||
/* Insert after an existing chunk? */
|
||||
if (new_e->old_chunk == (e->old_chunk +
|
||||
dm_consecutive_chunk_count(e) + 1) &&
|
||||
@@ -804,13 +810,13 @@ static void dm_insert_exception(struct dm_exception_table *eh,
|
||||
* Either the table doesn't support consecutive chunks or slot
|
||||
* l is empty.
|
||||
*/
|
||||
- hlist_bl_add_head(&new_e->hash_list, l);
|
||||
+ hlist_add_head(&new_e->hash_list, l);
|
||||
} else if (new_e->old_chunk < e->old_chunk) {
|
||||
/* Add before an existing exception */
|
||||
- hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
|
||||
+ hlist_add_before(&new_e->hash_list, &e->hash_list);
|
||||
} else {
|
||||
/* Add to l's tail: e is the last exception in this slot */
|
||||
- hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
|
||||
+ hlist_add_behind(&new_e->hash_list, &e->hash_list);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -820,7 +826,6 @@ static void dm_insert_exception(struct dm_exception_table *eh,
|
||||
*/
|
||||
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
|
||||
{
|
||||
- struct dm_exception_table_lock lock;
|
||||
struct dm_snapshot *s = context;
|
||||
struct dm_exception *e;
|
||||
|
||||
@@ -833,17 +838,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
|
||||
/* Consecutive_count is implicitly initialised to zero */
|
||||
e->new_chunk = new;
|
||||
|
||||
- /*
|
||||
- * Although there is no need to lock access to the exception tables
|
||||
- * here, if we don't then hlist_bl_add_head(), called by
|
||||
- * dm_insert_exception(), will complain about accessing the
|
||||
- * corresponding list without locking it first.
|
||||
- */
|
||||
- dm_exception_table_lock_init(s, old, &lock);
|
||||
-
|
||||
- dm_exception_table_lock(&lock);
|
||||
dm_insert_exception(&s->complete, e);
|
||||
- dm_exception_table_unlock(&lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -873,7 +868,7 @@ static int calc_max_buckets(void)
|
||||
/* use a fixed size of 2MB */
|
||||
unsigned long mem = 2 * 1024 * 1024;
|
||||
|
||||
- mem /= sizeof(struct hlist_bl_head);
|
||||
+ mem /= sizeof(struct dm_hlist_head);
|
||||
|
||||
return mem;
|
||||
}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
From 753594c69939fb8fbf6971e7f4858052e0978bf1 Mon Sep 17 00:00:00 2001
|
||||
From: Jiping Ma <jiping.ma2@windriver.com>
|
||||
Date: Mon, 13 Oct 2025 07:28:52 +0000
|
||||
Subject: [PATCH] sched/fair: Block delayed tasks on throttled hierarchy during
|
||||
dequeue
|
||||
|
||||
Dequeuing a fair task on a throttled hierarchy returns early on
|
||||
encountering a throttled cfs_rq since the throttle path has already
|
||||
dequeued the hierarchy above and has adjusted the h_nr_* accounting till
|
||||
the root cfs_rq.
|
||||
|
||||
dequeue_entities() crucially misses calling __block_task() for delayed
|
||||
tasks being dequeued on the throttled hierarchies, but this was mostly
|
||||
harmless until commit b7ca5743a260 ("sched/core: Tweak
|
||||
wait_task_inactive() to force dequeue sched_delayed tasks") since all
|
||||
existing cases would re-enqueue the task if task_on_rq_queued() returned
|
||||
true and the task would eventually be blocked at pick after the
|
||||
hierarchy was unthrottled.
|
||||
|
||||
wait_task_inactive() is special as it expects the delayed task on
|
||||
throttled hierarchy to reach the blocked state on dequeue but since
|
||||
__block_task() is never called, task_on_rq_queued() continues to return
|
||||
true. Furthermore, since the task is now off the hierarchy, the pick
|
||||
never reaches it to fully block the task even after unthrottle leading
|
||||
to wait_task_inactive() looping endlessly.
|
||||
|
||||
Remedy this by calling __block_task() if a delayed task is being
|
||||
dequeued on a throttled hierarchy.
|
||||
|
||||
This fix is only required for stabled kernels implementing delay dequeue
|
||||
(>= v6.12) before v6.18 since upstream commit e1fad12dcb66 ("sched/fair:
|
||||
Switch to task based throttle model") indirectly fixes this by removing
|
||||
the early return conditions in dequeue_entities() as part of the per-task
|
||||
throttle feature.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Matt Fleming <matt@readmodwrite.com>
|
||||
Closes: https://lore.kernel.org/all/20250925133310.1843863-1-matt@readmodwrite.com/
|
||||
Fixes: b7ca5743a260 ("sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks")
|
||||
Tested-by: Matt Fleming <mfleming@cloudflare.com>
|
||||
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
|
||||
Signed-off-by: Jiping Ma <jiping.ma2@windriver.com>
|
||||
---
|
||||
kernel/sched/fair.c | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index d894ccc8cb8f..64fd09e5bb79 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -7187,6 +7187,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
int h_nr_delayed = 0;
|
||||
struct cfs_rq *cfs_rq;
|
||||
u64 slice = 0;
|
||||
+ int ret = 0;
|
||||
|
||||
if (entity_is_task(se)) {
|
||||
p = task_of(se);
|
||||
@@ -7218,7 +7219,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
- return 0;
|
||||
+ goto out;
|
||||
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight) {
|
||||
@@ -7261,7 +7262,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
- return 0;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
sub_nr_running(rq, h_nr_queued);
|
||||
@@ -7273,6 +7274,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
|
||||
rq->next_balance = jiffies;
|
||||
|
||||
+ ret = 1;
|
||||
+out:
|
||||
if (p && task_delayed) {
|
||||
SCHED_WARN_ON(!task_sleep);
|
||||
SCHED_WARN_ON(p->on_rq != 1);
|
||||
@@ -7288,7 +7291,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
__block_task(rq, p);
|
||||
}
|
||||
|
||||
- return 1;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@@ -38,3 +38,4 @@ zl3073x-backport/0003-devlink-introduce-devlink_nl_put_u64.patch
|
||||
zl3073x-backport/0004-dpll-zl3073x-Fix-missing-header-build-error-on-older.patch
|
||||
zl3073x-backport/0005-dpll-add-phase-offset-monitor-feature-to-netlink-spe.patch
|
||||
zl3073x-backport/0006-dpll-add-phase_offset_monitor_get-set-callback-ops.patch
|
||||
0015-dm-snapshot-fix-scheduling-while-atomic-on-real-time.patch
|
||||
|
||||
Reference in New Issue
Block a user