* [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched()
@ 2025-03-26 15:26 Kent Overstreet
2025-03-26 15:54 ` Kuan-Wei Chiu
2025-03-27 18:09 ` Andrew Morton
0 siblings, 2 replies; 5+ messages in thread
From: Kent Overstreet @ 2025-03-26 15:26 UTC (permalink / raw)
To: linux-bcachefs, linux-kernel
Cc: Kent Overstreet, Kuan-Wei Chiu, Andrew Morton
Andrew - if you're ok with this patch I'd like to get it in soon as a
bugfix, I've been getting quite a few reports on this one.
I don't much care for the naming though, thoughts there?
-- >8 --
bcachefs calls sort() during recovery to sort all keys it found in the
journal, and this may be very large - gigabytes on large machines.
This has been causing "task blocked" warnings, so needs a
cond_resched().
Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/sort.h | 11 +++++++++++
lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++-----
2 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/include/linux/sort.h b/include/linux/sort.h
index e163287ac6c1..8e5603b10941 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -13,4 +13,15 @@ void sort(void *base, size_t num, size_t size,
cmp_func_t cmp_func,
swap_func_t swap_func);
+/* Versions that periodically call cond_resched(): */
+
+void sort_r_nonatomic(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv);
+
+void sort_nonatomic(void *base, size_t num, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func);
+
#endif
diff --git a/lib/sort.c b/lib/sort.c
index 8e73dc55476b..60b51dac00ec 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -186,6 +186,8 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
return i / 2;
}
+#include <linux/sched.h>
+
/**
* sort_r - sort an array of elements
* @base: pointer to data to sort
@@ -212,10 +214,11 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
* O(n*n) worst-case behavior and extra memory requirements that make
* it less suitable for kernel use.
*/
-void sort_r(void *base, size_t num, size_t size,
- cmp_r_func_t cmp_func,
- swap_r_func_t swap_func,
- const void *priv)
+static void __sort_r(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv,
+ bool may_schedule)
{
/* pre-scale counters for performance */
size_t n = num * size, a = (num/2) * size;
@@ -286,6 +289,9 @@ void sort_r(void *base, size_t num, size_t size,
b = parent(b, lsbit, size);
do_swap(base + b, base + c, size, swap_func, priv);
}
+
+ if (may_schedule)
+ cond_resched();
}
n -= size;
@@ -293,8 +299,25 @@ void sort_r(void *base, size_t num, size_t size,
if (n == size * 2 && do_cmp(base, base + size, cmp_func, priv) > 0)
do_swap(base, base + size, size, swap_func, priv);
}
+
+void sort_r(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ __sort_r(base, num, size, cmp_func, swap_func, priv, false);
+}
EXPORT_SYMBOL(sort_r);
+void sort_r_nonatomic(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ __sort_r(base, num, size, cmp_func, swap_func, priv, true);
+}
+EXPORT_SYMBOL(sort_r_nonatomic);
+
void sort(void *base, size_t num, size_t size,
cmp_func_t cmp_func,
swap_func_t swap_func)
@@ -304,6 +327,19 @@ void sort(void *base, size_t num, size_t size,
.swap = swap_func,
};
- return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+ return __sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w, false);
}
EXPORT_SYMBOL(sort);
+
+void sort_nonatomic(void *base, size_t num, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap = swap_func,
+ };
+
+ return __sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w, true);
+}
+EXPORT_SYMBOL(sort_nonatomic);
--
2.49.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched()
2025-03-26 15:26 [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched() Kent Overstreet
@ 2025-03-26 15:54 ` Kuan-Wei Chiu
2025-03-26 16:09 ` Kent Overstreet
2025-03-27 18:09 ` Andrew Morton
1 sibling, 1 reply; 5+ messages in thread
From: Kuan-Wei Chiu @ 2025-03-26 15:54 UTC (permalink / raw)
To: Kent Overstreet; +Cc: linux-bcachefs, linux-kernel, Andrew Morton
On Wed, Mar 26, 2025 at 11:26:06AM -0400, Kent Overstreet wrote:
> Andrew - if you're ok with this patch I'd like to get it in soon as a
> bugfix, I've been getting quite a few reports on this one.
>
> I don't much care for the naming though, thoughts there?
>
> -- >8 --
>
> bcachefs calls sort() during recovery to sort all keys it found in the
> journal, and this may be very large - gigabytes on large machines.
>
> This has been causing "task blocked" warnings, so needs a
> cond_resched().
>
> Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
> include/linux/sort.h | 11 +++++++++++
> lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 52 insertions(+), 5 deletions(-)
>
I don't have strong opinions on this, but I recall that UBIFS had a
similar issue with list_sort(), and they addressed it by calling
cond_resched() within the compare function. Would that approach be
simpler and more appropriate than introducing a new API in the library
code?
Regards,
Kuan-Wei
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched()
2025-03-26 15:54 ` Kuan-Wei Chiu
@ 2025-03-26 16:09 ` Kent Overstreet
0 siblings, 0 replies; 5+ messages in thread
From: Kent Overstreet @ 2025-03-26 16:09 UTC (permalink / raw)
To: Kuan-Wei Chiu; +Cc: linux-bcachefs, linux-kernel, Andrew Morton
On Wed, Mar 26, 2025 at 11:54:15PM +0800, Kuan-Wei Chiu wrote:
> On Wed, Mar 26, 2025 at 11:26:06AM -0400, Kent Overstreet wrote:
> > Andrew - if you're ok with this patch I'd like to get it in soon as a
> > bugfix, I've been getting quite a few reports on this one.
> >
> > I don't much care for the naming though, thoughts there?
> >
> > -- >8 --
> >
> > bcachefs calls sort() during recovery to sort all keys it found in the
> > journal, and this may be very large - gigabytes on large machines.
> >
> > This has been causing "task blocked" warnings, so needs a
> > cond_resched().
> >
> > Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > ---
> > include/linux/sort.h | 11 +++++++++++
> > lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++-----
> > 2 files changed, 52 insertions(+), 5 deletions(-)
> >
>
> I don't have strong opinions on this, but I recall that UBIFS had a
> similar issue with list_sort(), and they addressed it by calling
> cond_resched() within the compare function. Would that approach be
> simpler and more appropriate than introducing a new API in the library
> code?
That'd be an option, but it would be heavier; sort() has nested loops so
it has a more natural place to put it.
And I'd say the nonatomic scheduling version should likely be the
default, anyways; even if other users aren't hitting the 10 second
warning, going 1 second without scheduling isn't good.
Not going to audit all the existing callers, but we should probably
provide it.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched()
2025-03-26 15:26 [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched() Kent Overstreet
2025-03-26 15:54 ` Kuan-Wei Chiu
@ 2025-03-27 18:09 ` Andrew Morton
2025-03-27 19:47 ` Kent Overstreet
1 sibling, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2025-03-27 18:09 UTC (permalink / raw)
To: Kent Overstreet; +Cc: linux-bcachefs, linux-kernel, Kuan-Wei Chiu
On Wed, 26 Mar 2025 11:26:06 -0400 Kent Overstreet <kent.overstreet@linux.dev> wrote:
> bcachefs calls sort() during recovery to sort all keys it found in the
> journal, and this may be very large - gigabytes on large machines.
>
> This has been causing "task blocked" warnings, so needs a
> cond_resched().
I assume this has been happening for a while, so a cc:stable is appropriate?
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched()
2025-03-27 18:09 ` Andrew Morton
@ 2025-03-27 19:47 ` Kent Overstreet
0 siblings, 0 replies; 5+ messages in thread
From: Kent Overstreet @ 2025-03-27 19:47 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-bcachefs, linux-kernel, Kuan-Wei Chiu
On Thu, Mar 27, 2025 at 11:09:23AM -0700, Andrew Morton wrote:
> On Wed, 26 Mar 2025 11:26:06 -0400 Kent Overstreet <kent.overstreet@linux.dev> wrote:
>
> > bcachefs calls sort() during recovery to sort all keys it found in the
> > journal, and this may be very large - gigabytes on large machines.
> >
> > This has been causing "task blocked" warnings, so needs a
> > cond_resched().
>
> I assume this has been happening for a while, so a cc:stable is appropriate?
Well, right now I'm only doing backports for the most critical stuff,
and this doesn't meet that bar. I'm planning on doing normal backports
after I take the experimental label off, in at most another year.
And I've told the stable team I don't want them touching fs/bcachefs/, I
want to be doing those backports and running them through my normal QA
process. We could for this, but perhaps better not to let the process
get confused :)
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-03-27 19:47 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-26 15:26 [PATCH] lib/sort.c: Add _nonatomic() variants with cond_resched() Kent Overstreet
2025-03-26 15:54 ` Kuan-Wei Chiu
2025-03-26 16:09 ` Kent Overstreet
2025-03-27 18:09 ` Andrew Morton
2025-03-27 19:47 ` Kent Overstreet
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.