aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 13:48:52 +0530
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 13:48:52 +0530
commita53fcff8fc7530f59a8171824ed586200df724a0 (patch)
treed8ebac39633a9cfd9cebbf8de7f6e0df3aab61ee /arch
parenta60ce761d99ff2d9eefe33374c5f20726465a140 (diff)
parent6199f9999a9b62b2b84a1bf5b52a9fd0bb8de5af (diff)
downloadath-a53fcff8fc7530f59a8171824ed586200df724a0.tar.gz
Merge tag 'timers-nohz-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip
Pull NOHZ updates from Thomas Gleixner: - Fix a long standing TOCTOU in get_cpu_sleep_time_us() - Make the CPU offline NOHZ handling more robust by disabling NOHZ on the outgoing CPU early instead of creating unneeded state which needs to be undone. - Unify idle CPU time accounting instead of having two different accounting mechanisms. These two different mechanisms are not really independent, but the different properties can in the worst case cause that gloabl idle time can be observed going backwards. - Consolidate the idle/iowait time retrieval interfaces instead of converting back and forth between them. - Make idle interrupt time accounting more robust. The original code assumes that interrupt time accouting is enabled and therefore stops elapsing idle time while an interrupt is handled in NOHZ dyntick state. That assumption is not correct as interrupt time accounting can be disabled at compile and runtime. - Fix an accounting error between dyntick idle time and dyntick idle steal time. The stolen time is not accounted and therefore idle time becomes inaccurate. The stolen time is now accounted after the fact as there is no way to predict the steal time upfront. * tag 'timers-nohz-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip: sched/cputime: Handle dyntick-idle steal time correctly sched/cputime: Handle idle irqtime gracefully sched/cputime: Provide get_cpu_[idle|iowait]_time_us() off-case tick/sched: Consolidate idle time fetching APIs tick/sched: Account tickless idle cputime only when tick is stopped tick/sched: Remove unused fields tick/sched: Move dyntick-idle cputime accounting to cputime code tick/sched: Remove nohz disabled special case in cputime fetch tick/sched: Unify idle cputime accounting s390/time: Prepare to stop elapsing in dynticks-idle powerpc/time: Prepare to stop elapsing in dynticks-idle sched/cputime: Correctly support generic vtime idle time sched/cputime: Remove superfluous and error prone kcpustat_field() parameter sched/idle: Handle offlining first in idle loop tick/sched: Fix TOCTOU in nohz idle time fetch
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kernel/time.c41
-rw-r--r--arch/s390/include/asm/idle.h2
-rw-r--r--arch/s390/kernel/idle.c5
-rw-r--r--arch/s390/kernel/vtime.c75
4 files changed, 114 insertions, 9 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b4472288e0d43..3460d1a5a97c2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -376,6 +376,47 @@ void vtime_task_switch(struct task_struct *prev)
acct->starttime = acct0->starttime;
}
}
+
+#ifdef CONFIG_NO_HZ_COMMON
+/**
+ * vtime_reset - Fast forward vtime entry clocks
+ *
+ * Called from dynticks idle IRQ entry to fast-forward the clocks to current time
+ * so that the IRQ time is still accounted by vtime while nohz cputime is paused.
+ */
+void vtime_reset(void)
+{
+ struct cpu_accounting_data *acct = get_accounting(current);
+
+ acct->starttime = mftb();
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ acct->startspurr = read_spurr(acct->starttime);
+#endif
+}
+
+/**
+ * vtime_dyntick_start - Inform vtime about entry to idle-dynticks
+ *
+ * Called when idle enters in dyntick mode. The idle cputime that elapsed so far
+ * is accumulated and the tick subsystem takes over the idle cputime accounting.
+ */
+void vtime_dyntick_start(void)
+{
+ vtime_account_idle(current);
+}
+
+/**
+ * vtime_dyntick_stop - Inform vtime about exit from idle-dynticks
+ *
+ * Called when idle exits from dyntick mode. The vtime entry clocks are
+ * fast-forward to current time so that idle accounting restarts elapsing from
+ * now.
+ */
+void vtime_dyntick_stop(void)
+{
+ vtime_reset();
+}
+#endif /* CONFIG_NO_HZ_COMMON */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
void __no_kcsan __delay(unsigned long loops)
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 32536ee34aa09..e4ad09a22400b 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -8,10 +8,12 @@
#ifndef _S390_IDLE_H
#define _S390_IDLE_H
+#include <linux/percpu-defs.h>
#include <linux/types.h>
#include <linux/device.h>
struct s390_idle_data {
+ bool idle_dyntick;
unsigned long idle_count;
unsigned long idle_time;
unsigned long clock_idle_enter;
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 1f1b06b6b4efa..4685d7c5bc518 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -31,7 +31,10 @@ void account_idle_time_irq(void)
/* Account time spent with enabled wait psw loaded as idle time. */
__atomic64_add(idle_time, &idle->idle_time);
__atomic64_add_const(1, &idle->idle_count);
- account_idle_time(cputime_to_nsecs(idle_time));
+
+ /* Dyntick idle time accounted by nohz/scheduler */
+ if (!idle->idle_dyntick)
+ account_idle_time(cputime_to_nsecs(idle_time));
}
void noinstr arch_cpu_idle(void)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index bf48744d09123..d1102a6f80bda 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -17,6 +17,7 @@
#include <asm/vtimer.h>
#include <asm/vtime.h>
#include <asm/cpu_mf.h>
+#include <asm/idle.h>
#include <asm/smp.h>
#include "entry.h"
@@ -110,6 +111,16 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
account_system_index_time(p, cputime_to_nsecs(cputime), index);
}
+static inline void vtime_reset_last_update(struct lowcore *lc)
+{
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+ " stckf %1" /* Store current tod clock value */
+ : "=Q" (lc->last_update_timer),
+ "=Q" (lc->last_update_clock)
+ : : "cc");
+}
+
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
@@ -121,17 +132,16 @@ static int do_account_vtime(struct task_struct *tsk)
timer = lc->last_update_timer;
clock = lc->last_update_clock;
- asm volatile(
- " stpt %0\n" /* Store current cpu timer value */
- " stckf %1" /* Store current tod clock value */
- : "=Q" (lc->last_update_timer),
- "=Q" (lc->last_update_clock)
- : : "cc");
+
+ vtime_reset_last_update(lc);
+
clock = lc->last_update_clock - clock;
timer -= lc->last_update_timer;
if (hardirq_count())
lc->hardirq_timer += timer;
+ else if (in_serving_softirq())
+ lc->softirq_timer += timer;
else
lc->system_timer += timer;
@@ -231,13 +241,62 @@ EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_softirq(struct task_struct *tsk)
{
- get_lowcore()->softirq_timer += vtime_delta();
+ if (!__this_cpu_read(s390_idle.idle_dyntick))
+ get_lowcore()->softirq_timer += vtime_delta();
+ else
+ vtime_flush(tsk);
}
void vtime_account_hardirq(struct task_struct *tsk)
{
- get_lowcore()->hardirq_timer += vtime_delta();
+ if (!__this_cpu_read(s390_idle.idle_dyntick)) {
+ get_lowcore()->hardirq_timer += vtime_delta();
+ } else {
+ /*
+ * In dynticks mode, the idle cputime is accounted by the nohz
+ * subsystem. Therefore the s390 timer/clocks are reset on IRQ
+ * entry and steal time must be accounted now.
+ */
+ vtime_flush(tsk);
+ }
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
+/**
+ * vtime_reset - Fast forward vtime entry clocks
+ *
+ * Called from dynticks idle IRQ entry to fast-forward the clocks to current time
+ * so that the IRQ time is still accounted by vtime while nohz cputime is paused.
+ */
+void vtime_reset(void)
+{
+ vtime_reset_last_update(get_lowcore());
+}
+
+/**
+ * vtime_dyntick_start - Inform vtime about entry to idle-dynticks
+ *
+ * Called when idle enters in dyntick mode. The idle cputime that elapsed so far
+ * is flushed and the tick subsystem takes over the idle cputime accounting.
+ */
+void vtime_dyntick_start(void)
+{
+ __this_cpu_write(s390_idle.idle_dyntick, true);
+ vtime_flush(current);
+}
+
+/**
+ * vtime_dyntick_stop - Inform vtime about exit from idle-dynticks
+ *
+ * Called when idle exits from dyntick mode. The vtime entry clocks are
+ * fast-forward to current time and idle accounting resumes.
+ */
+void vtime_dyntick_stop(void)
+{
+ vtime_reset_last_update(get_lowcore());
+ __this_cpu_write(s390_idle.idle_dyntick, false);
}
+#endif /* CONFIG_NO_HZ_COMMON */
/*
* Sorted add to a list. List is linear searched until first bigger