You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'perf/jump-labels' into perf/core
Merge reason: After much naming discussion, there seems to be consensus
now - queue it up for v3.4.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -0,0 +1,286 @@
|
||||
Static Keys
|
||||
-----------
|
||||
|
||||
By: Jason Baron <jbaron@redhat.com>
|
||||
|
||||
0) Abstract
|
||||
|
||||
Static keys allows the inclusion of seldom used features in
|
||||
performance-sensitive fast-path kernel code, via a GCC feature and a code
|
||||
patching technique. A quick example:
|
||||
|
||||
struct static_key key = STATIC_KEY_INIT_FALSE;
|
||||
|
||||
...
|
||||
|
||||
if (static_key_false(&key))
|
||||
do unlikely code
|
||||
else
|
||||
do likely code
|
||||
|
||||
...
|
||||
static_key_slow_inc();
|
||||
...
|
||||
static_key_slow_inc();
|
||||
...
|
||||
|
||||
The static_key_false() branch will be generated into the code with as little
|
||||
impact to the likely code path as possible.
|
||||
|
||||
|
||||
1) Motivation
|
||||
|
||||
|
||||
Currently, tracepoints are implemented using a conditional branch. The
|
||||
conditional check requires checking a global variable for each tracepoint.
|
||||
Although the overhead of this check is small, it increases when the memory
|
||||
cache comes under pressure (memory cache lines for these global variables may
|
||||
be shared with other memory accesses). As we increase the number of tracepoints
|
||||
in the kernel this overhead may become more of an issue. In addition,
|
||||
tracepoints are often dormant (disabled) and provide no direct kernel
|
||||
functionality. Thus, it is highly desirable to reduce their impact as much as
|
||||
possible. Although tracepoints are the original motivation for this work, other
|
||||
kernel code paths should be able to make use of the static keys facility.
|
||||
|
||||
|
||||
2) Solution
|
||||
|
||||
|
||||
gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label:
|
||||
|
||||
http://gcc.gnu.org/ml/gcc-patches/2009-07/msg01556.html
|
||||
|
||||
Using the 'asm goto', we can create branches that are either taken or not taken
|
||||
by default, without the need to check memory. Then, at run-time, we can patch
|
||||
the branch site to change the branch direction.
|
||||
|
||||
For example, if we have a simple branch that is disabled by default:
|
||||
|
||||
if (static_key_false(&key))
|
||||
printk("I am the true branch\n");
|
||||
|
||||
Thus, by default the 'printk' will not be emitted. And the code generated will
|
||||
consist of a single atomic 'no-op' instruction (5 bytes on x86), in the
|
||||
straight-line code path. When the branch is 'flipped', we will patch the
|
||||
'no-op' in the straight-line codepath with a 'jump' instruction to the
|
||||
out-of-line true branch. Thus, changing branch direction is expensive but
|
||||
branch selection is basically 'free'. That is the basic tradeoff of this
|
||||
optimization.
|
||||
|
||||
This lowlevel patching mechanism is called 'jump label patching', and it gives
|
||||
the basis for the static keys facility.
|
||||
|
||||
3) Static key label API, usage and examples:
|
||||
|
||||
|
||||
In order to make use of this optimization you must first define a key:
|
||||
|
||||
struct static_key key;
|
||||
|
||||
Which is initialized as:
|
||||
|
||||
struct static_key key = STATIC_KEY_INIT_TRUE;
|
||||
|
||||
or:
|
||||
|
||||
struct static_key key = STATIC_KEY_INIT_FALSE;
|
||||
|
||||
If the key is not initialized, it is default false. The 'struct static_key',
|
||||
must be a 'global'. That is, it can't be allocated on the stack or dynamically
|
||||
allocated at run-time.
|
||||
|
||||
The key is then used in code as:
|
||||
|
||||
if (static_key_false(&key))
|
||||
do unlikely code
|
||||
else
|
||||
do likely code
|
||||
|
||||
Or:
|
||||
|
||||
if (static_key_true(&key))
|
||||
do likely code
|
||||
else
|
||||
do unlikely code
|
||||
|
||||
A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
|
||||
'static_key_false()' construct. Likewise, a key initialized via
|
||||
'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
|
||||
single key can be used in many branches, but all the branches must match the
|
||||
way that the key has been initialized.
|
||||
|
||||
The branch(es) can then be switched via:
|
||||
|
||||
static_key_slow_inc(&key);
|
||||
...
|
||||
static_key_slow_dec(&key);
|
||||
|
||||
Thus, 'static_key_slow_inc()' means 'make the branch true', and
|
||||
'static_key_slow_dec()' means 'make the the branch false' with appropriate
|
||||
reference counting. For example, if the key is initialized true, a
|
||||
static_key_slow_dec(), will switch the branch to false. And a subsequent
|
||||
static_key_slow_inc(), will change the branch back to true. Likewise, if the
|
||||
key is initialized false, a 'static_key_slow_inc()', will change the branch to
|
||||
true. And then a 'static_key_slow_dec()', will again make the branch false.
|
||||
|
||||
An example usage in the kernel is the implementation of tracepoints:
|
||||
|
||||
static inline void trace_##name(proto) \
|
||||
{ \
|
||||
if (static_key_false(&__tracepoint_##name.key)) \
|
||||
__DO_TRACE(&__tracepoint_##name, \
|
||||
TP_PROTO(data_proto), \
|
||||
TP_ARGS(data_args), \
|
||||
TP_CONDITION(cond)); \
|
||||
}
|
||||
|
||||
Tracepoints are disabled by default, and can be placed in performance critical
|
||||
pieces of the kernel. Thus, by using a static key, the tracepoints can have
|
||||
absolutely minimal impact when not in use.
|
||||
|
||||
|
||||
4) Architecture level code patching interface, 'jump labels'
|
||||
|
||||
|
||||
There are a few functions and macros that architectures must implement in order
|
||||
to take advantage of this optimization. If there is no architecture support, we
|
||||
simply fall back to a traditional, load, test, and jump sequence.
|
||||
|
||||
* select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
|
||||
|
||||
* #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
|
||||
|
||||
* __always_inline bool arch_static_branch(struct static_key *key), see:
|
||||
arch/x86/include/asm/jump_label.h
|
||||
|
||||
* void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
|
||||
see: arch/x86/kernel/jump_label.c
|
||||
|
||||
* __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type),
|
||||
see: arch/x86/kernel/jump_label.c
|
||||
|
||||
|
||||
* struct jump_entry, see: arch/x86/include/asm/jump_label.h
|
||||
|
||||
|
||||
5) Static keys / jump label analysis, results (x86_64):
|
||||
|
||||
|
||||
As an example, let's add the following branch to 'getppid()', such that the
|
||||
system call now looks like:
|
||||
|
||||
SYSCALL_DEFINE0(getppid)
|
||||
{
|
||||
int pid;
|
||||
|
||||
+ if (static_key_false(&key))
|
||||
+ printk("I am the true branch\n");
|
||||
|
||||
rcu_read_lock();
|
||||
pid = task_tgid_vnr(rcu_dereference(current->real_parent));
|
||||
rcu_read_unlock();
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
The resulting instructions with jump labels generated by GCC is:
|
||||
|
||||
ffffffff81044290 <sys_getppid>:
|
||||
ffffffff81044290: 55 push %rbp
|
||||
ffffffff81044291: 48 89 e5 mov %rsp,%rbp
|
||||
ffffffff81044294: e9 00 00 00 00 jmpq ffffffff81044299 <sys_getppid+0x9>
|
||||
ffffffff81044299: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax
|
||||
ffffffff810442a0: 00 00
|
||||
ffffffff810442a2: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax
|
||||
ffffffff810442a9: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax
|
||||
ffffffff810442b0: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi
|
||||
ffffffff810442b7: e8 f4 d9 00 00 callq ffffffff81051cb0 <pid_vnr>
|
||||
ffffffff810442bc: 5d pop %rbp
|
||||
ffffffff810442bd: 48 98 cltq
|
||||
ffffffff810442bf: c3 retq
|
||||
ffffffff810442c0: 48 c7 c7 e3 54 98 81 mov $0xffffffff819854e3,%rdi
|
||||
ffffffff810442c7: 31 c0 xor %eax,%eax
|
||||
ffffffff810442c9: e8 71 13 6d 00 callq ffffffff8171563f <printk>
|
||||
ffffffff810442ce: eb c9 jmp ffffffff81044299 <sys_getppid+0x9>
|
||||
|
||||
Without the jump label optimization it looks like:
|
||||
|
||||
ffffffff810441f0 <sys_getppid>:
|
||||
ffffffff810441f0: 8b 05 8a 52 d8 00 mov 0xd8528a(%rip),%eax # ffffffff81dc9480 <key>
|
||||
ffffffff810441f6: 55 push %rbp
|
||||
ffffffff810441f7: 48 89 e5 mov %rsp,%rbp
|
||||
ffffffff810441fa: 85 c0 test %eax,%eax
|
||||
ffffffff810441fc: 75 27 jne ffffffff81044225 <sys_getppid+0x35>
|
||||
ffffffff810441fe: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax
|
||||
ffffffff81044205: 00 00
|
||||
ffffffff81044207: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax
|
||||
ffffffff8104420e: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax
|
||||
ffffffff81044215: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi
|
||||
ffffffff8104421c: e8 2f da 00 00 callq ffffffff81051c50 <pid_vnr>
|
||||
ffffffff81044221: 5d pop %rbp
|
||||
ffffffff81044222: 48 98 cltq
|
||||
ffffffff81044224: c3 retq
|
||||
ffffffff81044225: 48 c7 c7 13 53 98 81 mov $0xffffffff81985313,%rdi
|
||||
ffffffff8104422c: 31 c0 xor %eax,%eax
|
||||
ffffffff8104422e: e8 60 0f 6d 00 callq ffffffff81715193 <printk>
|
||||
ffffffff81044233: eb c9 jmp ffffffff810441fe <sys_getppid+0xe>
|
||||
ffffffff81044235: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1)
|
||||
ffffffff8104423c: 00 00 00 00
|
||||
|
||||
Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction
|
||||
vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched
|
||||
to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump
|
||||
label case adds:
|
||||
|
||||
6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes.
|
||||
|
||||
If we then include the padding bytes, the jump label code saves, 16 total bytes
|
||||
of instruction memory for this small fucntion. In this case the non-jump label
|
||||
function is 80 bytes long. Thus, we have have saved 20% of the instruction
|
||||
footprint. We can in fact improve this even further, since the 5-byte no-op
|
||||
really can be a 2-byte no-op since we can reach the branch with a 2-byte jmp.
|
||||
However, we have not yet implemented optimal no-op sizes (they are currently
|
||||
hard-coded).
|
||||
|
||||
Since there are a number of static key API uses in the scheduler paths,
|
||||
'pipe-test' (also known as 'perf bench sched pipe') can be used to show the
|
||||
performance improvement. Testing done on 3.3.0-rc2:
|
||||
|
||||
jump label disabled:
|
||||
|
||||
Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
|
||||
|
||||
855.700314 task-clock # 0.534 CPUs utilized ( +- 0.11% )
|
||||
200,003 context-switches # 0.234 M/sec ( +- 0.00% )
|
||||
0 CPU-migrations # 0.000 M/sec ( +- 39.58% )
|
||||
487 page-faults # 0.001 M/sec ( +- 0.02% )
|
||||
1,474,374,262 cycles # 1.723 GHz ( +- 0.17% )
|
||||
<not supported> stalled-cycles-frontend
|
||||
<not supported> stalled-cycles-backend
|
||||
1,178,049,567 instructions # 0.80 insns per cycle ( +- 0.06% )
|
||||
208,368,926 branches # 243.507 M/sec ( +- 0.06% )
|
||||
5,569,188 branch-misses # 2.67% of all branches ( +- 0.54% )
|
||||
|
||||
1.601607384 seconds time elapsed ( +- 0.07% )
|
||||
|
||||
jump label enabled:
|
||||
|
||||
Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
|
||||
|
||||
841.043185 task-clock # 0.533 CPUs utilized ( +- 0.12% )
|
||||
200,004 context-switches # 0.238 M/sec ( +- 0.00% )
|
||||
0 CPU-migrations # 0.000 M/sec ( +- 40.87% )
|
||||
487 page-faults # 0.001 M/sec ( +- 0.05% )
|
||||
1,432,559,428 cycles # 1.703 GHz ( +- 0.18% )
|
||||
<not supported> stalled-cycles-frontend
|
||||
<not supported> stalled-cycles-backend
|
||||
1,175,363,994 instructions # 0.82 insns per cycle ( +- 0.04% )
|
||||
206,859,359 branches # 245.956 M/sec ( +- 0.04% )
|
||||
4,884,119 branch-misses # 2.36% of all branches ( +- 0.85% )
|
||||
|
||||
1.579384366 seconds time elapsed
|
||||
|
||||
The percentage of saved branches is .7%, and we've saved 12% on
|
||||
'branch-misses'. This is where we would expect to get the most savings, since
|
||||
this optimization is about reducing the number of branches. In addition, we've
|
||||
saved .2% on instructions, and 2.8% on cycles and 1.4% on elapsed time.
|
||||
+20
-9
@@ -47,18 +47,29 @@ config KPROBES
|
||||
If in doubt, say "N".
|
||||
|
||||
config JUMP_LABEL
|
||||
bool "Optimize trace point call sites"
|
||||
bool "Optimize very unlikely/likely branches"
|
||||
depends on HAVE_ARCH_JUMP_LABEL
|
||||
help
|
||||
If it is detected that the compiler has support for "asm goto",
|
||||
the kernel will compile trace point locations with just a
|
||||
nop instruction. When trace points are enabled, the nop will
|
||||
be converted to a jump to the trace function. This technique
|
||||
lowers overhead and stress on the branch prediction of the
|
||||
processor.
|
||||
This option enables a transparent branch optimization that
|
||||
makes certain almost-always-true or almost-always-false branch
|
||||
conditions even cheaper to execute within the kernel.
|
||||
|
||||
On i386, options added to the compiler flags may increase
|
||||
the size of the kernel slightly.
|
||||
Certain performance-sensitive kernel code, such as trace points,
|
||||
scheduler functionality, networking code and KVM have such
|
||||
branches and include support for this optimization technique.
|
||||
|
||||
If it is detected that the compiler has support for "asm goto",
|
||||
the kernel will compile such branches with just a nop
|
||||
instruction. When the condition flag is toggled to true, the
|
||||
nop will be converted to a jump instruction to execute the
|
||||
conditional block of instructions.
|
||||
|
||||
This technique lowers overhead and stress on the branch prediction
|
||||
of the processor and generally makes the kernel faster. The update
|
||||
of the condition is slower, but those are always very rare.
|
||||
|
||||
( On 32-bit x86, the necessary options added to the compiler
|
||||
flags may increase the size of the kernel slightly. )
|
||||
|
||||
config OPTPROBES
|
||||
def_bool y
|
||||
|
||||
@@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu)
|
||||
pv_time_ops.init_missing_ticks_accounting(cpu);
|
||||
}
|
||||
|
||||
struct jump_label_key;
|
||||
extern struct jump_label_key paravirt_steal_enabled;
|
||||
extern struct jump_label_key paravirt_steal_rq_enabled;
|
||||
struct static_key;
|
||||
extern struct static_key paravirt_steal_enabled;
|
||||
extern struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
static inline int
|
||||
paravirt_do_steal_accounting(unsigned long *new_itm)
|
||||
|
||||
@@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = {
|
||||
* pv_time_ops
|
||||
* time operations
|
||||
*/
|
||||
struct jump_label_key paravirt_steal_enabled;
|
||||
struct jump_label_key paravirt_steal_rq_enabled;
|
||||
struct static_key paravirt_steal_enabled;
|
||||
struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
static int
|
||||
ia64_native_do_steal_accounting(unsigned long *new_itm)
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#define WORD_INSN ".word"
|
||||
#endif
|
||||
|
||||
static __always_inline bool arch_static_branch(struct jump_label_key *key)
|
||||
static __always_inline bool arch_static_branch(struct static_key *key)
|
||||
{
|
||||
asm goto("1:\tnop\n\t"
|
||||
"nop\n\t"
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG)
|
||||
#define JUMP_LABEL_NOP_SIZE 4
|
||||
|
||||
static __always_inline bool arch_static_branch(struct jump_label_key *key)
|
||||
static __always_inline bool arch_static_branch(struct static_key *key)
|
||||
{
|
||||
asm goto("1:\n\t"
|
||||
"nop\n\t"
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#define ASM_ALIGN ".balign 4"
|
||||
#endif
|
||||
|
||||
static __always_inline bool arch_static_branch(struct jump_label_key *key)
|
||||
static __always_inline bool arch_static_branch(struct static_key *key)
|
||||
{
|
||||
asm goto("0: brcl 0,0\n"
|
||||
".pushsection __jump_table, \"aw\"\n"
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#define JUMP_LABEL_NOP_SIZE 4
|
||||
|
||||
static __always_inline bool arch_static_branch(struct jump_label_key *key)
|
||||
static __always_inline bool arch_static_branch(struct static_key *key)
|
||||
{
|
||||
asm goto("1:\n\t"
|
||||
"nop\n\t"
|
||||
|
||||
@@ -9,12 +9,12 @@
|
||||
|
||||
#define JUMP_LABEL_NOP_SIZE 5
|
||||
|
||||
#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
|
||||
#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
|
||||
|
||||
static __always_inline bool arch_static_branch(struct jump_label_key *key)
|
||||
static __always_inline bool arch_static_branch(struct static_key *key)
|
||||
{
|
||||
asm goto("1:"
|
||||
JUMP_LABEL_INITIAL_NOP
|
||||
STATIC_KEY_INITIAL_NOP
|
||||
".pushsection __jump_table, \"aw\" \n\t"
|
||||
_ASM_ALIGN "\n\t"
|
||||
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
|
||||
|
||||
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void)
|
||||
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
|
||||
}
|
||||
|
||||
struct jump_label_key;
|
||||
extern struct jump_label_key paravirt_steal_enabled;
|
||||
extern struct jump_label_key paravirt_steal_rq_enabled;
|
||||
struct static_key;
|
||||
extern struct static_key paravirt_steal_enabled;
|
||||
extern struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
static inline u64 paravirt_steal_clock(int cpu)
|
||||
{
|
||||
|
||||
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void)
|
||||
static __init int activate_jump_labels(void)
|
||||
{
|
||||
if (has_steal_clock) {
|
||||
jump_label_inc(¶virt_steal_enabled);
|
||||
static_key_slow_inc(¶virt_steal_enabled);
|
||||
if (steal_acc)
|
||||
jump_label_inc(¶virt_steal_rq_enabled);
|
||||
static_key_slow_inc(¶virt_steal_rq_enabled);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr)
|
||||
__native_flush_tlb_single(addr);
|
||||
}
|
||||
|
||||
struct jump_label_key paravirt_steal_enabled;
|
||||
struct jump_label_key paravirt_steal_rq_enabled;
|
||||
struct static_key paravirt_steal_enabled;
|
||||
struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
static u64 native_steal_clock(int cpu)
|
||||
{
|
||||
|
||||
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
static bool mmu_audit;
|
||||
static struct jump_label_key mmu_audit_key;
|
||||
static struct static_key mmu_audit_key;
|
||||
|
||||
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
|
||||
{
|
||||
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
|
||||
|
||||
static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
|
||||
{
|
||||
if (static_branch((&mmu_audit_key)))
|
||||
if (static_key_false((&mmu_audit_key)))
|
||||
__kvm_mmu_audit(vcpu, point);
|
||||
}
|
||||
|
||||
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
|
||||
if (mmu_audit)
|
||||
return;
|
||||
|
||||
jump_label_inc(&mmu_audit_key);
|
||||
static_key_slow_inc(&mmu_audit_key);
|
||||
mmu_audit = true;
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
|
||||
if (!mmu_audit)
|
||||
return;
|
||||
|
||||
jump_label_dec(&mmu_audit_key);
|
||||
static_key_slow_dec(&mmu_audit_key);
|
||||
mmu_audit = false;
|
||||
}
|
||||
|
||||
|
||||
+100
-39
@@ -9,15 +9,15 @@
|
||||
*
|
||||
* Jump labels provide an interface to generate dynamic branches using
|
||||
* self-modifying code. Assuming toolchain and architecture support the result
|
||||
* of a "if (static_branch(&key))" statement is a unconditional branch (which
|
||||
* of a "if (static_key_false(&key))" statement is a unconditional branch (which
|
||||
* defaults to false - and the true block is placed out of line).
|
||||
*
|
||||
* However at runtime we can change the 'static' branch target using
|
||||
* jump_label_{inc,dec}(). These function as a 'reference' count on the key
|
||||
* However at runtime we can change the branch target using
|
||||
* static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
|
||||
* object and for as long as there are references all branches referring to
|
||||
* that particular key will point to the (out of line) true block.
|
||||
*
|
||||
* Since this relies on modifying code the jump_label_{inc,dec}() functions
|
||||
* Since this relies on modifying code the static_key_slow_{inc,dec}() functions
|
||||
* must be considered absolute slow paths (machine wide synchronization etc.).
|
||||
* OTOH, since the affected branches are unconditional their runtime overhead
|
||||
* will be absolutely minimal, esp. in the default (off) case where the total
|
||||
@@ -26,12 +26,26 @@
|
||||
*
|
||||
* When the control is directly exposed to userspace it is prudent to delay the
|
||||
* decrement to avoid high frequency code modifications which can (and do)
|
||||
* cause significant performance degradation. Struct jump_label_key_deferred and
|
||||
* jump_label_dec_deferred() provide for this.
|
||||
* cause significant performance degradation. Struct static_key_deferred and
|
||||
* static_key_slow_dec_deferred() provide for this.
|
||||
*
|
||||
* Lacking toolchain and or architecture support, it falls back to a simple
|
||||
* conditional branch.
|
||||
*/
|
||||
*
|
||||
* struct static_key my_key = STATIC_KEY_INIT_TRUE;
|
||||
*
|
||||
* if (static_key_true(&my_key)) {
|
||||
* }
|
||||
*
|
||||
* will result in the true case being in-line and starts the key with a single
|
||||
* reference. Mixing static_key_true() and static_key_false() on the same key is not
|
||||
* allowed.
|
||||
*
|
||||
* Not initializing the key (static data is initialized to 0s anyway) is the
|
||||
* same as using STATIC_KEY_INIT_FALSE and static_key_false() is
|
||||
* equivalent with static_branch().
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/compiler.h>
|
||||
@@ -39,16 +53,17 @@
|
||||
|
||||
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
|
||||
|
||||
struct jump_label_key {
|
||||
struct static_key {
|
||||
atomic_t enabled;
|
||||
/* Set lsb bit to 1 if branch is default true, 0 ot */
|
||||
struct jump_entry *entries;
|
||||
#ifdef CONFIG_MODULES
|
||||
struct jump_label_mod *next;
|
||||
struct static_key_mod *next;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct jump_label_key_deferred {
|
||||
struct jump_label_key key;
|
||||
struct static_key_deferred {
|
||||
struct static_key key;
|
||||
unsigned long timeout;
|
||||
struct delayed_work work;
|
||||
};
|
||||
@@ -66,13 +81,34 @@ struct module;
|
||||
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL}
|
||||
#else
|
||||
#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL}
|
||||
#endif
|
||||
#define JUMP_LABEL_TRUE_BRANCH 1UL
|
||||
|
||||
static __always_inline bool static_branch(struct jump_label_key *key)
|
||||
static
|
||||
inline struct jump_entry *jump_label_get_entries(struct static_key *key)
|
||||
{
|
||||
return (struct jump_entry *)((unsigned long)key->entries
|
||||
& ~JUMP_LABEL_TRUE_BRANCH);
|
||||
}
|
||||
|
||||
static inline bool jump_label_get_branch_default(struct static_key *key)
|
||||
{
|
||||
if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static __always_inline bool static_key_false(struct static_key *key)
|
||||
{
|
||||
return arch_static_branch(key);
|
||||
}
|
||||
|
||||
static __always_inline bool static_key_true(struct static_key *key)
|
||||
{
|
||||
return !static_key_false(key);
|
||||
}
|
||||
|
||||
/* Deprecated. Please use 'static_key_false() instead. */
|
||||
static __always_inline bool static_branch(struct static_key *key)
|
||||
{
|
||||
return arch_static_branch(key);
|
||||
}
|
||||
@@ -88,21 +124,24 @@ extern void arch_jump_label_transform(struct jump_entry *entry,
|
||||
extern void arch_jump_label_transform_static(struct jump_entry *entry,
|
||||
enum jump_label_type type);
|
||||
extern int jump_label_text_reserved(void *start, void *end);
|
||||
extern void jump_label_inc(struct jump_label_key *key);
|
||||
extern void jump_label_dec(struct jump_label_key *key);
|
||||
extern void jump_label_dec_deferred(struct jump_label_key_deferred *key);
|
||||
extern bool jump_label_enabled(struct jump_label_key *key);
|
||||
extern void static_key_slow_inc(struct static_key *key);
|
||||
extern void static_key_slow_dec(struct static_key *key);
|
||||
extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
|
||||
extern bool static_key_enabled(struct static_key *key);
|
||||
extern void jump_label_apply_nops(struct module *mod);
|
||||
extern void jump_label_rate_limit(struct jump_label_key_deferred *key,
|
||||
unsigned long rl);
|
||||
extern void
|
||||
jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
|
||||
|
||||
#define STATIC_KEY_INIT_TRUE ((struct static_key) \
|
||||
{ .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
|
||||
#define STATIC_KEY_INIT_FALSE ((struct static_key) \
|
||||
{ .enabled = ATOMIC_INIT(0), .entries = (void *)0 })
|
||||
|
||||
#else /* !HAVE_JUMP_LABEL */
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
|
||||
|
||||
struct jump_label_key {
|
||||
struct static_key {
|
||||
atomic_t enabled;
|
||||
};
|
||||
|
||||
@@ -110,30 +149,45 @@ static __always_inline void jump_label_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
struct jump_label_key_deferred {
|
||||
struct jump_label_key key;
|
||||
struct static_key_deferred {
|
||||
struct static_key key;
|
||||
};
|
||||
|
||||
static __always_inline bool static_branch(struct jump_label_key *key)
|
||||
static __always_inline bool static_key_false(struct static_key *key)
|
||||
{
|
||||
if (unlikely(atomic_read(&key->enabled)))
|
||||
if (unlikely(atomic_read(&key->enabled)) > 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void jump_label_inc(struct jump_label_key *key)
|
||||
static __always_inline bool static_key_true(struct static_key *key)
|
||||
{
|
||||
if (likely(atomic_read(&key->enabled)) > 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Deprecated. Please use 'static_key_false() instead. */
|
||||
static __always_inline bool static_branch(struct static_key *key)
|
||||
{
|
||||
if (unlikely(atomic_read(&key->enabled)) > 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void static_key_slow_inc(struct static_key *key)
|
||||
{
|
||||
atomic_inc(&key->enabled);
|
||||
}
|
||||
|
||||
static inline void jump_label_dec(struct jump_label_key *key)
|
||||
static inline void static_key_slow_dec(struct static_key *key)
|
||||
{
|
||||
atomic_dec(&key->enabled);
|
||||
}
|
||||
|
||||
static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key)
|
||||
static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
|
||||
{
|
||||
jump_label_dec(&key->key);
|
||||
static_key_slow_dec(&key->key);
|
||||
}
|
||||
|
||||
static inline int jump_label_text_reserved(void *start, void *end)
|
||||
@@ -144,9 +198,9 @@ static inline int jump_label_text_reserved(void *start, void *end)
|
||||
static inline void jump_label_lock(void) {}
|
||||
static inline void jump_label_unlock(void) {}
|
||||
|
||||
static inline bool jump_label_enabled(struct jump_label_key *key)
|
||||
static inline bool static_key_enabled(struct static_key *key)
|
||||
{
|
||||
return !!atomic_read(&key->enabled);
|
||||
return (atomic_read(&key->enabled) > 0);
|
||||
}
|
||||
|
||||
static inline int jump_label_apply_nops(struct module *mod)
|
||||
@@ -154,13 +208,20 @@ static inline int jump_label_apply_nops(struct module *mod)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
|
||||
static inline void
|
||||
jump_label_rate_limit(struct static_key_deferred *key,
|
||||
unsigned long rl)
|
||||
{
|
||||
}
|
||||
|
||||
#define STATIC_KEY_INIT_TRUE ((struct static_key) \
|
||||
{ .enabled = ATOMIC_INIT(1) })
|
||||
#define STATIC_KEY_INIT_FALSE ((struct static_key) \
|
||||
{ .enabled = ATOMIC_INIT(0) })
|
||||
|
||||
#endif /* HAVE_JUMP_LABEL */
|
||||
|
||||
#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), })
|
||||
#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), })
|
||||
#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
|
||||
#define jump_label_enabled static_key_enabled
|
||||
|
||||
#endif /* _LINUX_JUMP_LABEL_H */
|
||||
|
||||
@@ -214,8 +214,8 @@ enum {
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
#ifdef CONFIG_RPS
|
||||
#include <linux/jump_label.h>
|
||||
extern struct jump_label_key rps_needed;
|
||||
#include <linux/static_key.h>
|
||||
extern struct static_key rps_needed;
|
||||
#endif
|
||||
|
||||
struct neighbour;
|
||||
|
||||
@@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
|
||||
extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||
|
||||
#if defined(CONFIG_JUMP_LABEL)
|
||||
#include <linux/jump_label.h>
|
||||
extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||
#include <linux/static_key.h>
|
||||
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
|
||||
{
|
||||
if (__builtin_constant_p(pf) &&
|
||||
__builtin_constant_p(hook))
|
||||
return static_branch(&nf_hooks_needed[pf][hook]);
|
||||
return static_key_false(&nf_hooks_needed[pf][hook]);
|
||||
|
||||
return !list_empty(&nf_hooks[pf][hook]);
|
||||
}
|
||||
|
||||
@@ -514,7 +514,7 @@ struct perf_guest_info_callbacks {
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/local.h>
|
||||
|
||||
@@ -1041,7 +1041,7 @@ static inline int is_software_event(struct perf_event *event)
|
||||
return event->pmu->task_ctx_nr == perf_sw_context;
|
||||
}
|
||||
|
||||
extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
|
||||
extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
|
||||
|
||||
extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
|
||||
|
||||
@@ -1069,7 +1069,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
|
||||
{
|
||||
struct pt_regs hot_regs;
|
||||
|
||||
if (static_branch(&perf_swevent_enabled[event_id])) {
|
||||
if (static_key_false(&perf_swevent_enabled[event_id])) {
|
||||
if (!regs) {
|
||||
perf_fetch_caller_regs(&hot_regs);
|
||||
regs = &hot_regs;
|
||||
@@ -1078,12 +1078,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
|
||||
}
|
||||
}
|
||||
|
||||
extern struct jump_label_key_deferred perf_sched_events;
|
||||
extern struct static_key_deferred perf_sched_events;
|
||||
|
||||
static inline void perf_event_task_sched_in(struct task_struct *prev,
|
||||
struct task_struct *task)
|
||||
{
|
||||
if (static_branch(&perf_sched_events.key))
|
||||
if (static_key_false(&perf_sched_events.key))
|
||||
__perf_event_task_sched_in(prev, task);
|
||||
}
|
||||
|
||||
@@ -1092,7 +1092,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
|
||||
{
|
||||
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
|
||||
|
||||
if (static_branch(&perf_sched_events.key))
|
||||
if (static_key_false(&perf_sched_events.key))
|
||||
__perf_event_task_sched_out(prev, next);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
#include <linux/jump_label.h>
|
||||
@@ -17,7 +17,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/static_key.h>
|
||||
|
||||
struct module;
|
||||
struct tracepoint;
|
||||
@@ -29,7 +29,7 @@ struct tracepoint_func {
|
||||
|
||||
struct tracepoint {
|
||||
const char *name; /* Tracepoint name */
|
||||
struct jump_label_key key;
|
||||
struct static_key key;
|
||||
void (*regfunc)(void);
|
||||
void (*unregfunc)(void);
|
||||
struct tracepoint_func __rcu *funcs;
|
||||
@@ -145,7 +145,7 @@ static inline void tracepoint_synchronize_unregister(void)
|
||||
extern struct tracepoint __tracepoint_##name; \
|
||||
static inline void trace_##name(proto) \
|
||||
{ \
|
||||
if (static_branch(&__tracepoint_##name.key)) \
|
||||
if (static_key_false(&__tracepoint_##name.key)) \
|
||||
__DO_TRACE(&__tracepoint_##name, \
|
||||
TP_PROTO(data_proto), \
|
||||
TP_ARGS(data_args), \
|
||||
@@ -188,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void)
|
||||
__attribute__((section("__tracepoints_strings"))) = #name; \
|
||||
struct tracepoint __tracepoint_##name \
|
||||
__attribute__((section("__tracepoints"))) = \
|
||||
{ __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
|
||||
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
|
||||
static struct tracepoint * const __tracepoint_ptr_##name __used \
|
||||
__attribute__((section("__tracepoints_ptrs"))) = \
|
||||
&__tracepoint_##name;
|
||||
|
||||
+3
-3
@@ -55,7 +55,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/res_counter.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/static_key.h>
|
||||
|
||||
#include <linux/filter.h>
|
||||
#include <linux/rculist_nulls.h>
|
||||
@@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk)
|
||||
#endif /* SOCK_REFCNT_DEBUG */
|
||||
|
||||
#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET)
|
||||
extern struct jump_label_key memcg_socket_limit_enabled;
|
||||
extern struct static_key memcg_socket_limit_enabled;
|
||||
static inline struct cg_proto *parent_cg_proto(struct proto *proto,
|
||||
struct cg_proto *cg_proto)
|
||||
{
|
||||
return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
|
||||
}
|
||||
#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled)
|
||||
#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
|
||||
#else
|
||||
#define mem_cgroup_sockets_enabled 0
|
||||
static inline struct cg_proto *parent_cg_proto(struct proto *proto,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user