mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'sched_ext-for-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext updates from Tejun Heo: - scx_bpf_now() added so that BPF scheduler can access the cached timestamp in struct rq to avoid reading TSC multiple times within a locked scheduling operation. - Minor updates to the built-in idle CPU selection logic. - tool/sched_ext updates and other misc changes. * tag 'sched_ext-for-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: fix kernel-doc warnings sched_ext: Use time helpers in BPF schedulers sched_ext: Replace bpf_ktime_get_ns() to scx_bpf_now() sched_ext: Add time helpers for BPF schedulers sched_ext: Add scx_bpf_now() for BPF scheduler sched_ext: Implement scx_bpf_now() sched_ext: Relocate scx_enabled() related code sched_ext: Add option -l in selftest runner to list all available tests sched_ext: Include remaining task time slice in error state dump sched_ext: update scx_bpf_dsq_insert() doc for SCX_DSQ_LOCAL_ON sched_ext: idle: small CPU iteration refactoring sched_ext: idle: introduce check_builtin_idle_enabled() helper sched_ext: idle: clarify comments sched_ext: idle: use assign_cpu() to update the idle cpumask sched_ext: Use str_enabled_disabled() helper in update_selcpu_topology() sched_ext: Use sizeof_field for key_len in dsq_hash_params tools/sched_ext: Receive updates from SCX repo sched_ext: Use the NUMA scheduling domain for NUMA optimizations
This commit is contained in:
@@ -242,9 +242,9 @@ The following briefly shows how a waking task is scheduled and executed.
|
||||
task was inserted directly from ``ops.select_cpu()``). ``ops.enqueue()``
|
||||
can make one of the following decisions:
|
||||
|
||||
* Immediately insert the task into either the global or local DSQ by
|
||||
calling ``scx_bpf_dsq_insert()`` with ``SCX_DSQ_GLOBAL`` or
|
||||
``SCX_DSQ_LOCAL``, respectively.
|
||||
* Immediately insert the task into either the global or a local DSQ by
|
||||
calling ``scx_bpf_dsq_insert()`` with one of the following options:
|
||||
``SCX_DSQ_GLOBAL``, ``SCX_DSQ_LOCAL``, or ``SCX_DSQ_LOCAL_ON | cpu``.
|
||||
|
||||
* Immediately insert the task into a custom DSQ by calling
|
||||
``scx_bpf_dsq_insert()`` with a DSQ ID which is smaller than 2^63.
|
||||
|
||||
@@ -793,6 +793,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
|
||||
void update_rq_clock(struct rq *rq)
|
||||
{
|
||||
s64 delta;
|
||||
u64 clock;
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
|
||||
@@ -804,11 +805,14 @@ void update_rq_clock(struct rq *rq)
|
||||
SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
|
||||
rq->clock_update_flags |= RQCF_UPDATED;
|
||||
#endif
|
||||
clock = sched_clock_cpu(cpu_of(rq));
|
||||
scx_rq_clock_update(rq, clock);
|
||||
|
||||
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
||||
delta = clock - rq->clock;
|
||||
if (delta < 0)
|
||||
return;
|
||||
rq->clock += delta;
|
||||
|
||||
update_rq_clock_task(rq, delta);
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -759,6 +759,7 @@ enum scx_rq_flags {
|
||||
SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */
|
||||
SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */
|
||||
SCX_RQ_BYPASSING = 1 << 4,
|
||||
SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */
|
||||
|
||||
SCX_RQ_IN_WAKEUP = 1 << 16,
|
||||
SCX_RQ_IN_BALANCE = 1 << 17,
|
||||
@@ -771,9 +772,10 @@ struct scx_rq {
|
||||
unsigned long ops_qseq;
|
||||
u64 extra_enq_flags; /* see move_task_to_local_dsq() */
|
||||
u32 nr_running;
|
||||
u32 flags;
|
||||
u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */
|
||||
bool cpu_released;
|
||||
u32 flags;
|
||||
u64 clock; /* current per-rq clock -- see scx_bpf_now() */
|
||||
cpumask_var_t cpus_to_kick;
|
||||
cpumask_var_t cpus_to_kick_if_idle;
|
||||
cpumask_var_t cpus_to_preempt;
|
||||
@@ -1722,6 +1724,38 @@ struct rq_flags {
|
||||
|
||||
extern struct balance_callback balance_push_callback;
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
extern const struct sched_class ext_sched_class;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
|
||||
|
||||
#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
|
||||
#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
|
||||
|
||||
static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
|
||||
{
|
||||
if (!scx_enabled())
|
||||
return;
|
||||
WRITE_ONCE(rq->scx.clock, clock);
|
||||
smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID);
|
||||
}
|
||||
|
||||
static inline void scx_rq_clock_invalidate(struct rq *rq)
|
||||
{
|
||||
if (!scx_enabled())
|
||||
return;
|
||||
WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_SCHED_CLASS_EXT */
|
||||
#define scx_enabled() false
|
||||
#define scx_switched_all() false
|
||||
|
||||
static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
|
||||
static inline void scx_rq_clock_invalidate(struct rq *rq) {}
|
||||
#endif /* !CONFIG_SCHED_CLASS_EXT */
|
||||
|
||||
/*
|
||||
* Lockdep annotation that avoids accidental unlocks; it's like a
|
||||
* sticky/continuous lockdep_assert_held().
|
||||
@@ -1751,7 +1785,7 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
|
||||
if (rq->clock_update_flags > RQCF_ACT_SKIP)
|
||||
rf->clock_update_flags = RQCF_UPDATED;
|
||||
#endif
|
||||
|
||||
scx_rq_clock_invalidate(rq);
|
||||
lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
|
||||
}
|
||||
|
||||
@@ -2510,19 +2544,6 @@ extern const struct sched_class rt_sched_class;
|
||||
extern const struct sched_class fair_sched_class;
|
||||
extern const struct sched_class idle_sched_class;
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
extern const struct sched_class ext_sched_class;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
|
||||
|
||||
#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
|
||||
#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
|
||||
#else /* !CONFIG_SCHED_CLASS_EXT */
|
||||
#define scx_enabled() false
|
||||
#define scx_switched_all() false
|
||||
#endif /* !CONFIG_SCHED_CLASS_EXT */
|
||||
|
||||
/*
|
||||
* Iterate only active classes. SCX can take over all fair tasks or be
|
||||
* completely disabled. If the former, skip fair. If the latter, skip SCX.
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#ifdef LSP
|
||||
#define __bpf__
|
||||
#include "../vmlinux/vmlinux.h"
|
||||
#include "../vmlinux.h"
|
||||
#else
|
||||
#include "vmlinux.h"
|
||||
#endif
|
||||
@@ -24,6 +24,10 @@
|
||||
#define PF_EXITING 0x00000004
|
||||
#define CLOCK_MONOTONIC 1
|
||||
|
||||
extern int LINUX_KERNEL_VERSION __kconfig;
|
||||
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
|
||||
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
|
||||
|
||||
/*
|
||||
* Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can
|
||||
* lead to really confusing misbehaviors. Let's trigger a build failure.
|
||||
@@ -72,6 +76,7 @@ bool scx_bpf_task_running(const struct task_struct *p) __ksym;
|
||||
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
|
||||
u64 scx_bpf_now(void) __ksym __weak;
|
||||
|
||||
/*
|
||||
* Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from
|
||||
@@ -98,7 +103,7 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
||||
___bpf_fill(___param, args); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
|
||||
/*
|
||||
* scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
|
||||
@@ -136,6 +141,20 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||
___scx_bpf_bstr_format_checker(fmt, ##args); \
|
||||
})
|
||||
|
||||
/*
|
||||
* scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header
|
||||
* of system information for debugging.
|
||||
*/
|
||||
#define scx_bpf_dump_header() \
|
||||
({ \
|
||||
scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n", \
|
||||
LINUX_KERNEL_VERSION >> 16, \
|
||||
LINUX_KERNEL_VERSION >> 8 & 0xFF, \
|
||||
LINUX_KERNEL_VERSION & 0xFF, \
|
||||
CONFIG_LOCALVERSION, \
|
||||
CONFIG_CC_VERSION_TEXT); \
|
||||
})
|
||||
|
||||
#define BPF_STRUCT_OPS(name, args...) \
|
||||
SEC("struct_ops/"#name) \
|
||||
BPF_PROG(name, ##args)
|
||||
@@ -317,6 +336,66 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
|
||||
const struct cpumask *src2) __ksym;
|
||||
u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
|
||||
|
||||
int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym;
|
||||
int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym;
|
||||
void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym;
|
||||
|
||||
#define def_iter_struct(name) \
|
||||
struct bpf_iter_##name { \
|
||||
struct bpf_iter_bits it; \
|
||||
const struct cpumask *bitmap; \
|
||||
};
|
||||
|
||||
#define def_iter_new(name) \
|
||||
static inline int bpf_iter_##name##_new( \
|
||||
struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words) \
|
||||
{ \
|
||||
it->bitmap = scx_bpf_get_##name##_cpumask(); \
|
||||
return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap, \
|
||||
sizeof(struct cpumask) / 8); \
|
||||
}
|
||||
|
||||
#define def_iter_next(name) \
|
||||
static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) { \
|
||||
return bpf_iter_bits_next(&it->it); \
|
||||
}
|
||||
|
||||
#define def_iter_destroy(name) \
|
||||
static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) { \
|
||||
scx_bpf_put_cpumask(it->bitmap); \
|
||||
bpf_iter_bits_destroy(&it->it); \
|
||||
}
|
||||
#define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu)
|
||||
|
||||
/// Provides iterator for possible and online cpus.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// static inline void example_use() {
|
||||
/// int *cpu;
|
||||
///
|
||||
/// for_each_possible_cpu(cpu){
|
||||
/// bpf_printk("CPU %d is possible", *cpu);
|
||||
/// }
|
||||
///
|
||||
/// for_each_online_cpu(cpu){
|
||||
/// bpf_printk("CPU %d is online", *cpu);
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
def_iter_struct(possible);
|
||||
def_iter_new(possible);
|
||||
def_iter_next(possible);
|
||||
def_iter_destroy(possible);
|
||||
#define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0)
|
||||
|
||||
def_iter_struct(online);
|
||||
def_iter_new(online);
|
||||
def_iter_next(online);
|
||||
def_iter_destroy(online);
|
||||
#define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0)
|
||||
|
||||
/*
|
||||
* Access a cpumask in read-only mode (typically to check bits).
|
||||
*/
|
||||
@@ -329,6 +408,100 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
|
||||
void bpf_rcu_read_lock(void) __ksym;
|
||||
void bpf_rcu_read_unlock(void) __ksym;
|
||||
|
||||
/*
|
||||
* Time helpers, most of which are from jiffies.h.
|
||||
*/
|
||||
|
||||
/**
|
||||
* time_delta - Calculate the delta between new and old time stamp
|
||||
* @after: first comparable as u64
|
||||
* @before: second comparable as u64
|
||||
*
|
||||
* Return: the time difference, which is >= 0
|
||||
*/
|
||||
static inline s64 time_delta(u64 after, u64 before)
|
||||
{
|
||||
return (s64)(after - before) > 0 ? : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* time_after - returns true if the time a is after time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Do this with "<0" and ">=0" to only test the sign of the result. A
|
||||
* good compiler would generate better code (and a really good compiler
|
||||
* wouldn't care). Gcc is currently neither.
|
||||
*
|
||||
* Return: %true is time a is after time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_after(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(b - a) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* time_before - returns true if the time a is before time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Return: %true is time a is before time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_before(u64 a, u64 b)
|
||||
{
|
||||
return time_after(b, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* time_after_eq - returns true if the time a is after or the same as time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Return: %true is time a is after or the same as time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_after_eq(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* time_before_eq - returns true if the time a is before or the same as time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Return: %true is time a is before or the same as time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_before_eq(u64 a, u64 b)
|
||||
{
|
||||
return time_after_eq(b, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* time_in_range - Calculate whether a is in the range of [b, c].
|
||||
* @a: time to test
|
||||
* @b: beginning of the range
|
||||
* @c: end of the range
|
||||
*
|
||||
* Return: %true is time a is in the range [b, c], otherwise %false.
|
||||
*/
|
||||
static inline bool time_in_range(u64 a, u64 b, u64 c)
|
||||
{
|
||||
return time_after_eq(a, b) && time_before_eq(a, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* time_in_range_open - Calculate whether a is in the range of [b, c).
|
||||
* @a: time to test
|
||||
* @b: beginning of the range
|
||||
* @c: end of the range
|
||||
*
|
||||
* Return: %true is time a is in the range [b, c), otherwise %false.
|
||||
*/
|
||||
static inline bool time_in_range_open(u64 a, u64 b, u64 c)
|
||||
{
|
||||
return time_after_eq(a, b) && time_before(a, c);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Other helpers
|
||||
@@ -423,5 +596,6 @@ static inline u32 log2_u64(u64 v)
|
||||
}
|
||||
|
||||
#include "compat.bpf.h"
|
||||
#include "enums.bpf.h"
|
||||
|
||||
#endif /* __SCX_COMMON_BPF_H */
|
||||
|
||||
@@ -71,5 +71,11 @@ typedef int64_t s64;
|
||||
|
||||
#include "user_exit_info.h"
|
||||
#include "compat.h"
|
||||
#include "enums.h"
|
||||
|
||||
/* not available when building kernel tools/sched_ext */
|
||||
#if __has_include(<lib/sdt_task.h>)
|
||||
#include <lib/sdt_task.h>
|
||||
#endif
|
||||
|
||||
#endif /* __SCHED_EXT_COMMON_H */
|
||||
|
||||
@@ -125,6 +125,11 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
|
||||
false; \
|
||||
})
|
||||
|
||||
#define scx_bpf_now() \
|
||||
(bpf_ksym_exists(scx_bpf_now) ? \
|
||||
scx_bpf_now() : \
|
||||
bpf_ktime_get_ns())
|
||||
|
||||
/*
|
||||
* Define sched_ext_ops. This may be expanded to define multiple variants for
|
||||
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
|
||||
|
||||
@@ -149,6 +149,7 @@ static inline long scx_hotplug_seq(void)
|
||||
__skel = __scx_name##__open(); \
|
||||
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
|
||||
__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \
|
||||
SCX_ENUM_INIT(__skel); \
|
||||
__skel; \
|
||||
})
|
||||
|
||||
|
||||
105
tools/sched_ext/include/scx/enums.autogen.bpf.h
Normal file
105
tools/sched_ext/include/scx/enums.autogen.bpf.h
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* WARNING: This file is autogenerated from scripts/gen_enums.py. If you would
|
||||
* like to access an enum that is currently missing, add it to the script
|
||||
* and run it from the root directory to update this file.
|
||||
*/
|
||||
|
||||
const volatile u64 __SCX_OPS_NAME_LEN __weak;
|
||||
#define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN
|
||||
|
||||
const volatile u64 __SCX_SLICE_DFL __weak;
|
||||
#define SCX_SLICE_DFL __SCX_SLICE_DFL
|
||||
|
||||
const volatile u64 __SCX_SLICE_INF __weak;
|
||||
#define SCX_SLICE_INF __SCX_SLICE_INF
|
||||
|
||||
const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak;
|
||||
#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN
|
||||
|
||||
const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak;
|
||||
#define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON
|
||||
|
||||
const volatile u64 __SCX_DSQ_INVALID __weak;
|
||||
#define SCX_DSQ_INVALID __SCX_DSQ_INVALID
|
||||
|
||||
const volatile u64 __SCX_DSQ_GLOBAL __weak;
|
||||
#define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL
|
||||
|
||||
const volatile u64 __SCX_DSQ_LOCAL __weak;
|
||||
#define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL
|
||||
|
||||
const volatile u64 __SCX_DSQ_LOCAL_ON __weak;
|
||||
#define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON
|
||||
|
||||
const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak;
|
||||
#define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK
|
||||
|
||||
const volatile u64 __SCX_TASK_QUEUED __weak;
|
||||
#define SCX_TASK_QUEUED __SCX_TASK_QUEUED
|
||||
|
||||
const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak;
|
||||
#define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT
|
||||
|
||||
const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak;
|
||||
#define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP
|
||||
|
||||
const volatile u64 __SCX_TASK_STATE_SHIFT __weak;
|
||||
#define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT
|
||||
|
||||
const volatile u64 __SCX_TASK_STATE_BITS __weak;
|
||||
#define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS
|
||||
|
||||
const volatile u64 __SCX_TASK_STATE_MASK __weak;
|
||||
#define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK
|
||||
|
||||
const volatile u64 __SCX_TASK_CURSOR __weak;
|
||||
#define SCX_TASK_CURSOR __SCX_TASK_CURSOR
|
||||
|
||||
const volatile u64 __SCX_TASK_NONE __weak;
|
||||
#define SCX_TASK_NONE __SCX_TASK_NONE
|
||||
|
||||
const volatile u64 __SCX_TASK_INIT __weak;
|
||||
#define SCX_TASK_INIT __SCX_TASK_INIT
|
||||
|
||||
const volatile u64 __SCX_TASK_READY __weak;
|
||||
#define SCX_TASK_READY __SCX_TASK_READY
|
||||
|
||||
const volatile u64 __SCX_TASK_ENABLED __weak;
|
||||
#define SCX_TASK_ENABLED __SCX_TASK_ENABLED
|
||||
|
||||
const volatile u64 __SCX_TASK_NR_STATES __weak;
|
||||
#define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES
|
||||
|
||||
const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak;
|
||||
#define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ
|
||||
|
||||
const volatile u64 __SCX_KICK_IDLE __weak;
|
||||
#define SCX_KICK_IDLE __SCX_KICK_IDLE
|
||||
|
||||
const volatile u64 __SCX_KICK_PREEMPT __weak;
|
||||
#define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT
|
||||
|
||||
const volatile u64 __SCX_KICK_WAIT __weak;
|
||||
#define SCX_KICK_WAIT __SCX_KICK_WAIT
|
||||
|
||||
const volatile u64 __SCX_ENQ_WAKEUP __weak;
|
||||
#define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP
|
||||
|
||||
const volatile u64 __SCX_ENQ_HEAD __weak;
|
||||
#define SCX_ENQ_HEAD __SCX_ENQ_HEAD
|
||||
|
||||
const volatile u64 __SCX_ENQ_PREEMPT __weak;
|
||||
#define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT
|
||||
|
||||
const volatile u64 __SCX_ENQ_REENQ __weak;
|
||||
#define SCX_ENQ_REENQ __SCX_ENQ_REENQ
|
||||
|
||||
const volatile u64 __SCX_ENQ_LAST __weak;
|
||||
#define SCX_ENQ_LAST __SCX_ENQ_LAST
|
||||
|
||||
const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak;
|
||||
#define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS
|
||||
|
||||
const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak;
|
||||
#define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ
|
||||
|
||||
41
tools/sched_ext/include/scx/enums.autogen.h
Normal file
41
tools/sched_ext/include/scx/enums.autogen.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* WARNING: This file is autogenerated from scripts/gen_enums.py. If you would
|
||||
* like to access an enum that is currently missing, add it to the script
|
||||
* and run it from the root directory to update this file.
|
||||
*/
|
||||
|
||||
#define SCX_ENUM_INIT(skel) do { \
|
||||
SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \
|
||||
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \
|
||||
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \
|
||||
SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \
|
||||
SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \
|
||||
SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \
|
||||
SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \
|
||||
} while (0)
|
||||
12
tools/sched_ext/include/scx/enums.bpf.h
Normal file
12
tools/sched_ext/include/scx/enums.bpf.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Convenience macros for getting/setting struct scx_enums instances.
|
||||
*
|
||||
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
|
||||
*/
|
||||
#ifndef __SCX_ENUMS_BPF_H
|
||||
#define __SCX_ENUMS_BPF_H
|
||||
|
||||
#include "enums.autogen.bpf.h"
|
||||
|
||||
#endif /* __SCX_ENUMS_BPF_H */
|
||||
27
tools/sched_ext/include/scx/enums.h
Normal file
27
tools/sched_ext/include/scx/enums.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Define struct scx_enums that stores the load-time values of enums
|
||||
* used by the BPF program.
|
||||
*
|
||||
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
|
||||
*/
|
||||
|
||||
#ifndef __SCX_ENUMS_H
|
||||
#define __SCX_ENUMS_H
|
||||
|
||||
static inline void __ENUM_set(u64 *val, char *type, char *name)
|
||||
{
|
||||
bool res;
|
||||
|
||||
res = __COMPAT_read_enum(type, name, val);
|
||||
SCX_BUG_ON(!res, "enum not found(%s)", name);
|
||||
}
|
||||
|
||||
#define SCX_ENUM_SET(skel, type, name) do { \
|
||||
__ENUM_set(&skel->rodata->__##name, #type, #name); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#include "enums.autogen.h"
|
||||
|
||||
#endif /* __SCX_ENUMS_H */
|
||||
@@ -10,6 +10,11 @@
|
||||
#ifndef __USER_EXIT_INFO_H
|
||||
#define __USER_EXIT_INFO_H
|
||||
|
||||
#ifdef LSP
|
||||
#define __bpf__
|
||||
#include "../vmlinux.h"
|
||||
#endif
|
||||
|
||||
enum uei_sizes {
|
||||
UEI_REASON_LEN = 128,
|
||||
UEI_MSG_LEN = 1024,
|
||||
@@ -25,9 +30,7 @@ struct user_exit_info {
|
||||
|
||||
#ifdef __bpf__
|
||||
|
||||
#ifdef LSP
|
||||
#include "../vmlinux/vmlinux.h"
|
||||
#else
|
||||
#ifndef LSP
|
||||
#include "vmlinux.h"
|
||||
#endif
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
@@ -57,7 +57,7 @@ enum {
|
||||
|
||||
const volatile s32 central_cpu;
|
||||
const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */
|
||||
const volatile u64 slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 slice_ns;
|
||||
|
||||
bool timer_pinned = true;
|
||||
u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
|
||||
@@ -87,11 +87,6 @@ struct {
|
||||
__type(value, struct central_timer);
|
||||
} central_timer SEC(".maps");
|
||||
|
||||
static bool vtime_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p,
|
||||
s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
@@ -245,7 +240,7 @@ void BPF_STRUCT_OPS(central_running, struct task_struct *p)
|
||||
s32 cpu = scx_bpf_task_cpu(p);
|
||||
u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
|
||||
if (started_at)
|
||||
*started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */
|
||||
*started_at = scx_bpf_now() ?: 1; /* 0 indicates idle */
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
|
||||
@@ -258,7 +253,7 @@ void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
|
||||
|
||||
static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
{
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
u64 now = scx_bpf_now();
|
||||
u64 nr_to_kick = nr_queued;
|
||||
s32 i, curr_cpu;
|
||||
|
||||
@@ -279,7 +274,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
/* kick iff the current one exhausted its slice */
|
||||
started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
|
||||
if (started_at && *started_at &&
|
||||
vtime_before(now, *started_at + slice_ns))
|
||||
time_before(now, *started_at + slice_ns))
|
||||
continue;
|
||||
|
||||
/* and there's something pending */
|
||||
|
||||
@@ -58,6 +58,7 @@ restart:
|
||||
|
||||
skel->rodata->central_cpu = 0;
|
||||
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
|
||||
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
|
||||
switch (opt) {
|
||||
|
||||
@@ -57,7 +57,7 @@ enum {
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */
|
||||
const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 cgrp_slice_ns;
|
||||
const volatile bool fifo_sched;
|
||||
|
||||
u64 cvtime_now;
|
||||
@@ -137,11 +137,6 @@ static u64 div_round_up(u64 dividend, u64 divisor)
|
||||
return (dividend + divisor - 1) / divisor;
|
||||
}
|
||||
|
||||
static bool vtime_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
|
||||
{
|
||||
struct cgv_node *cgc_a, *cgc_b;
|
||||
@@ -271,7 +266,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc)
|
||||
*/
|
||||
max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) /
|
||||
(2 * FCG_HWEIGHT_ONE);
|
||||
if (vtime_before(cvtime, cvtime_now - max_budget))
|
||||
if (time_before(cvtime, cvtime_now - max_budget))
|
||||
cvtime = cvtime_now - max_budget;
|
||||
|
||||
cgv_node->cvtime = cvtime;
|
||||
@@ -401,7 +396,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
* Limit the amount of budget that an idling task can accumulate
|
||||
* to one slice.
|
||||
*/
|
||||
if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
|
||||
if (time_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
|
||||
tvtime = cgc->tvtime_now - SCX_SLICE_DFL;
|
||||
|
||||
scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL,
|
||||
@@ -535,7 +530,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p)
|
||||
* from multiple CPUs and thus racy. Any error should be
|
||||
* contained and temporary. Let's just live with it.
|
||||
*/
|
||||
if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime))
|
||||
if (time_before(cgc->tvtime_now, p->scx.dsq_vtime))
|
||||
cgc->tvtime_now = p->scx.dsq_vtime;
|
||||
}
|
||||
bpf_cgroup_release(cgrp);
|
||||
@@ -645,7 +640,7 @@ static bool try_pick_next_cgroup(u64 *cgidp)
|
||||
cgv_node = container_of(rb_node, struct cgv_node, rb_node);
|
||||
cgid = cgv_node->cgid;
|
||||
|
||||
if (vtime_before(cvtime_now, cgv_node->cvtime))
|
||||
if (time_before(cvtime_now, cgv_node->cvtime))
|
||||
cvtime_now = cgv_node->cvtime;
|
||||
|
||||
/*
|
||||
@@ -734,7 +729,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
|
||||
struct fcg_cpu_ctx *cpuc;
|
||||
struct fcg_cgrp_ctx *cgc;
|
||||
struct cgroup *cgrp;
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
u64 now = scx_bpf_now();
|
||||
bool picked_next = false;
|
||||
|
||||
cpuc = find_cpu_ctx();
|
||||
@@ -744,7 +739,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
|
||||
if (!cpuc->cur_cgid)
|
||||
goto pick_next_cgroup;
|
||||
|
||||
if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) {
|
||||
if (time_before(now, cpuc->cur_at + cgrp_slice_ns)) {
|
||||
if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) {
|
||||
stat_inc(FCG_STAT_CNS_KEEP);
|
||||
return;
|
||||
@@ -920,14 +915,14 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p,
|
||||
struct cgroup *from, struct cgroup *to)
|
||||
{
|
||||
struct fcg_cgrp_ctx *from_cgc, *to_cgc;
|
||||
s64 vtime_delta;
|
||||
s64 delta;
|
||||
|
||||
/* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */
|
||||
if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to)))
|
||||
return;
|
||||
|
||||
vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now;
|
||||
p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta;
|
||||
delta = time_delta(p->scx.dsq_vtime, from_cgc->tvtime_now);
|
||||
p->scx.dsq_vtime = to_cgc->tvtime_now + delta;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init)
|
||||
|
||||
@@ -137,6 +137,7 @@ restart:
|
||||
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
|
||||
|
||||
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
|
||||
skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
|
||||
double v;
|
||||
|
||||
@@ -33,7 +33,7 @@ enum consts {
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
const volatile u64 slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 slice_ns;
|
||||
const volatile u32 stall_user_nth;
|
||||
const volatile u32 stall_kernel_nth;
|
||||
const volatile u32 dsp_inf_loop_after;
|
||||
|
||||
@@ -64,6 +64,8 @@ int main(int argc, char **argv)
|
||||
|
||||
skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
|
||||
|
||||
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
|
||||
@@ -52,11 +52,6 @@ static void stat_inc(u32 idx)
|
||||
(*cnt_p)++;
|
||||
}
|
||||
|
||||
static inline bool vtime_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
bool is_idle = false;
|
||||
@@ -84,7 +79,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
* Limit the amount of budget that an idling task can accumulate
|
||||
* to one slice.
|
||||
*/
|
||||
if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
|
||||
if (time_before(vtime, vtime_now - SCX_SLICE_DFL))
|
||||
vtime = vtime_now - SCX_SLICE_DFL;
|
||||
|
||||
scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
|
||||
@@ -108,7 +103,7 @@ void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
|
||||
* thus racy. Any error should be contained and temporary. Let's just
|
||||
* live with it.
|
||||
*/
|
||||
if (vtime_before(vtime_now, p->scx.dsq_vtime))
|
||||
if (time_before(vtime_now, p->scx.dsq_vtime))
|
||||
vtime_now = p->scx.dsq_vtime;
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user