From a9ec7ed936e9341de5d462c3630fec5c9c0f714b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:22 -0800 Subject: [PATCH] BACKPORT: timers: implement usleep_idle_range() Patch series "mm/damon: Fix fake /proc/loadavg reports", v3. This patchset fixes DAMON's fake load report issue. The first patch makes yet another variant of usleep_range() for this fix, and the second patch fixes the issue of DAMON by making it using the newly introduced function. This patch (of 2): Some kernel threads such as DAMON could need to repeatedly sleep in micro seconds level. Because usleep_range() sleeps in uninterruptible state, however, such threads would make /proc/loadavg reports fake load. To help such cases, this commit implements a variant of usleep_range() called usleep_idle_range(). It is same to usleep_range() but sets the state of the current task as TASK_IDLE while sleeping. Link: https://lkml.kernel.org/r/20211126145015.15862-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211126145015.15862-2-sj@kernel.org Signed-off-by: SeongJae Park Suggested-by: Andrew Morton Reviewed-by: Thomas Gleixner Tested-by: Oleksandr Natalenko Cc: John Stultz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e4779015fd5d2fb8390c258268addff24d6077c7) Bug: 228223814 Signed-off-by: Hailong Tu Change-Id: Ie590ba5fcff22c981d0a7ecae6d8e551160136f3 --- include/linux/delay.h | 8 ++++++++ kernel/time/timer.c | 36 +++++++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/include/linux/delay.h b/include/linux/delay.h index 1d0e2ce6b6d9..abecbccab6e4 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,8 +59,15 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); void usleep_range(unsigned long min, unsigned long max); +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} + static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ed71c4141047..1535065a7d76 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2052,6 +2052,32 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); +/** + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping + * + * In non-atomic context where the exact wakeup time is flexible, use + * usleep_range_state() instead of udelay(). The sleep improves responsiveness + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces + * power usage by allowing hrtimers to take advantage of an already- + * scheduled interrupt instead of scheduling a new one just for this sleep. + */ +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) +{ + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(state); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } +} + /** * usleep_range - Sleep for an approximate time * @min: Minimum time in usecs to sleep @@ -2065,14 +2091,6 @@ EXPORT_SYMBOL(msleep_interruptible); */ void __sched usleep_range(unsigned long min, unsigned long max) { - ktime_t exp = ktime_add_us(ktime_get(), min); - u64 delta = (u64)(max - min) * NSEC_PER_USEC; - - for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); - /* Do not return before the requested sleep time has elapsed */ - if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) - break; - } + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(usleep_range);