You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'core-rseq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull restartable sequence support from Thomas Gleixner: "The restartable sequences syscall (finally): After a lot of back and forth discussion and massive delays caused by the speculative distraction of maintainers, the core set of restartable sequences has finally reached a consensus. It comes with the basic non disputed core implementation along with support for arm, powerpc and x86 and a full set of selftests It was exposed to linux-next earlier this week, so it does not fully comply with the merge window requirements, but there is really no point to drag it out for yet another cycle" * 'core-rseq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: rseq/selftests: Provide Makefile, scripts, gitignore rseq/selftests: Provide parametrized tests rseq/selftests: Provide basic percpu ops test rseq/selftests: Provide basic test rseq/selftests: Provide rseq library selftests/lib.mk: Introduce OVERRIDE_TARGETS powerpc: Wire up restartable sequences system call powerpc: Add syscall detection for restartable sequences powerpc: Add support for restartable sequences x86: Wire up restartable sequence system call x86: Add support for restartable sequences arm: Wire up restartable sequences system call arm: Add syscall detection for restartable sequences arm: Add restartable sequences support rseq: Introduce restartable sequences system call uapi/headers: Provide types_32_64.h
This commit is contained in:
+12
@@ -12134,6 +12134,18 @@ F: include/dt-bindings/reset/
|
||||
F: include/linux/reset.h
|
||||
F: include/linux/reset-controller.h
|
||||
|
||||
RESTARTABLE SEQUENCES SUPPORT
|
||||
M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
M: Peter Zijlstra <peterz@infradead.org>
|
||||
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
|
||||
M: Boqun Feng <boqun.feng@gmail.com>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
F: kernel/rseq.c
|
||||
F: include/uapi/linux/rseq.h
|
||||
F: include/trace/events/rseq.h
|
||||
F: tools/testing/selftests/rseq/
|
||||
|
||||
RFKILL
|
||||
M: Johannes Berg <johannes@sipsolutions.net>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
|
||||
@@ -272,6 +272,13 @@ config HAVE_REGS_AND_STACK_ACCESS_API
|
||||
declared in asm/ptrace.h
|
||||
For example the kprobes-based event tracer needs this API.
|
||||
|
||||
config HAVE_RSEQ
|
||||
bool
|
||||
depends on HAVE_REGS_AND_STACK_ACCESS_API
|
||||
help
|
||||
This symbol should be selected by an architecture if it
|
||||
supports an implementation of restartable sequences.
|
||||
|
||||
config HAVE_CLK
|
||||
bool
|
||||
help
|
||||
|
||||
@@ -91,6 +91,7 @@ config ARM
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_RSEQ
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UID16
|
||||
select HAVE_VIRT_CPU_ACCOUNTING_GEN
|
||||
|
||||
@@ -39,12 +39,13 @@ saved_pc .req lr
|
||||
|
||||
.section .entry.text,"ax",%progbits
|
||||
.align 5
|
||||
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
|
||||
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \
|
||||
IS_ENABLED(CONFIG_DEBUG_RSEQ))
|
||||
/*
|
||||
* This is the fast syscall return path. We do as little as possible here,
|
||||
* such as avoiding writing r0 to the stack. We only use this path if we
|
||||
* have tracing and context tracking disabled - the overheads from those
|
||||
* features make this path too inefficient.
|
||||
* have tracing, context tracking and rseq debug disabled - the overheads
|
||||
* from those features make this path too inefficient.
|
||||
*/
|
||||
ret_fast_syscall:
|
||||
UNWIND(.fnstart )
|
||||
@@ -71,14 +72,20 @@ fast_work_pending:
|
||||
/* fall through to work_pending */
|
||||
#else
|
||||
/*
|
||||
* The "replacement" ret_fast_syscall for when tracing or context tracking
|
||||
* is enabled. As we will need to call out to some C functions, we save
|
||||
* r0 first to avoid needing to save registers around each C function call.
|
||||
* The "replacement" ret_fast_syscall for when tracing, context tracking,
|
||||
* or rseq debug is enabled. As we will need to call out to some C functions,
|
||||
* we save r0 first to avoid needing to save registers around each C function
|
||||
* call.
|
||||
*/
|
||||
ret_fast_syscall:
|
||||
UNWIND(.fnstart )
|
||||
UNWIND(.cantunwind )
|
||||
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
|
||||
#if IS_ENABLED(CONFIG_DEBUG_RSEQ)
|
||||
/* do_rseq_syscall needs interrupts enabled. */
|
||||
mov r0, sp @ 'regs'
|
||||
bl do_rseq_syscall
|
||||
#endif
|
||||
disable_irq_notrace @ disable interrupts
|
||||
ldr r2, [tsk, #TI_ADDR_LIMIT]
|
||||
cmp r2, #TASK_SIZE
|
||||
@@ -113,6 +120,12 @@ ENDPROC(ret_fast_syscall)
|
||||
*/
|
||||
ENTRY(ret_to_user)
|
||||
ret_slow_syscall:
|
||||
#if IS_ENABLED(CONFIG_DEBUG_RSEQ)
|
||||
/* do_rseq_syscall needs interrupts enabled. */
|
||||
enable_irq_notrace @ enable interrupts
|
||||
mov r0, sp @ 'regs'
|
||||
bl do_rseq_syscall
|
||||
#endif
|
||||
disable_irq_notrace @ disable interrupts
|
||||
ENTRY(ret_to_user_from_irq)
|
||||
ldr r2, [tsk, #TI_ADDR_LIMIT]
|
||||
|
||||
@@ -540,6 +540,12 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
||||
sigset_t *oldset = sigmask_to_save();
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Increment event counter and perform fixup for the pre-signal
|
||||
* frame.
|
||||
*/
|
||||
rseq_signal_deliver(regs);
|
||||
|
||||
/*
|
||||
* Set up the stack frame
|
||||
*/
|
||||
@@ -660,6 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
|
||||
} else {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
rseq_handle_notify_resume(regs);
|
||||
}
|
||||
}
|
||||
local_irq_disable();
|
||||
@@ -703,3 +710,10 @@ asmlinkage void addr_limit_check_failed(void)
|
||||
{
|
||||
addr_limit_user_check();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_RSEQ
|
||||
asmlinkage void do_rseq_syscall(struct pt_regs *regs)
|
||||
{
|
||||
rseq_syscall(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -412,3 +412,4 @@
|
||||
395 common pkey_alloc sys_pkey_alloc
|
||||
396 common pkey_free sys_pkey_free
|
||||
397 common statx sys_statx
|
||||
398 common rseq sys_rseq
|
||||
|
||||
@@ -220,6 +220,7 @@ config PPC
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_VIRT_CPU_ACCOUNTING
|
||||
select HAVE_IRQ_TIME_ACCOUNTING
|
||||
select HAVE_RSEQ
|
||||
select IOMMU_HELPER if PPC64
|
||||
select IRQ_DOMAIN
|
||||
select IRQ_FORCED_THREADING
|
||||
|
||||
@@ -392,3 +392,4 @@ SYSCALL(statx)
|
||||
SYSCALL(pkey_alloc)
|
||||
SYSCALL(pkey_free)
|
||||
SYSCALL(pkey_mprotect)
|
||||
SYSCALL(rseq)
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <uapi/asm/unistd.h>
|
||||
|
||||
|
||||
#define NR_syscalls 387
|
||||
#define NR_syscalls 388
|
||||
|
||||
#define __NR__exit __NR_exit
|
||||
|
||||
|
||||
@@ -398,5 +398,6 @@
|
||||
#define __NR_pkey_alloc 384
|
||||
#define __NR_pkey_free 385
|
||||
#define __NR_pkey_mprotect 386
|
||||
#define __NR_rseq 387
|
||||
|
||||
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
|
||||
|
||||
@@ -365,6 +365,13 @@ syscall_dotrace_cont:
|
||||
blrl /* Call handler */
|
||||
.globl ret_from_syscall
|
||||
ret_from_syscall:
|
||||
#ifdef CONFIG_DEBUG_RSEQ
|
||||
/* Check whether the syscall is issued inside a restartable sequence */
|
||||
stw r3,GPR3(r1)
|
||||
addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
bl rseq_syscall
|
||||
lwz r3,GPR3(r1)
|
||||
#endif
|
||||
mr r6,r3
|
||||
CURRENT_THREAD_INFO(r12, r1)
|
||||
/* disable interrupts so current_thread_info()->flags can't change */
|
||||
|
||||
@@ -194,6 +194,14 @@ system_call: /* label this so stack traces look sane */
|
||||
|
||||
.Lsyscall_exit:
|
||||
std r3,RESULT(r1)
|
||||
|
||||
#ifdef CONFIG_DEBUG_RSEQ
|
||||
/* Check whether the syscall is issued inside a restartable sequence */
|
||||
addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
bl rseq_syscall
|
||||
ld r3,RESULT(r1)
|
||||
#endif
|
||||
|
||||
CURRENT_THREAD_INFO(r12, r1)
|
||||
|
||||
ld r8,_MSR(r1)
|
||||
|
||||
@@ -134,6 +134,8 @@ static void do_signal(struct task_struct *tsk)
|
||||
/* Re-enable the breakpoints for the signal stack */
|
||||
thread_change_pc(tsk, tsk->thread.regs);
|
||||
|
||||
rseq_signal_deliver(tsk->thread.regs);
|
||||
|
||||
if (is32) {
|
||||
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
|
||||
ret = handle_rt_signal32(&ksig, oldset, tsk);
|
||||
@@ -168,6 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
|
||||
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
rseq_handle_notify_resume(regs);
|
||||
}
|
||||
|
||||
user_enter();
|
||||
|
||||
@@ -183,6 +183,7 @@ config X86
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_RSEQ
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
select HAVE_USER_RETURN_NOTIFIER
|
||||
|
||||
@@ -164,6 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
||||
if (cached_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
rseq_handle_notify_resume(regs);
|
||||
}
|
||||
|
||||
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
@@ -254,6 +255,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
|
||||
WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
|
||||
local_irq_enable();
|
||||
|
||||
rseq_syscall(regs);
|
||||
|
||||
/*
|
||||
* First do one-time work. If these work items are enabled, we
|
||||
* want to run them exactly once per syscall exit with IRQs on.
|
||||
|
||||
@@ -397,3 +397,4 @@
|
||||
383 i386 statx sys_statx __ia32_sys_statx
|
||||
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
|
||||
385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
|
||||
386 i386 rseq sys_rseq __ia32_sys_rseq
|
||||
|
||||
@@ -342,6 +342,7 @@
|
||||
331 common pkey_free __x64_sys_pkey_free
|
||||
332 common statx __x64_sys_statx
|
||||
333 common io_pgetevents __x64_sys_io_pgetevents
|
||||
334 common rseq __x64_sys_rseq
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
||||
@@ -688,6 +688,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
|
||||
sigset_t *set = sigmask_to_save();
|
||||
compat_sigset_t *cset = (compat_sigset_t *) set;
|
||||
|
||||
/*
|
||||
* Increment event counter and perform fixup for the pre-signal
|
||||
* frame.
|
||||
*/
|
||||
rseq_signal_deliver(regs);
|
||||
|
||||
/* Set up the stack frame */
|
||||
if (is_ia32_frame(ksig)) {
|
||||
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
|
||||
|
||||
@@ -1824,6 +1824,7 @@ static int __do_execve_file(int fd, struct filename *filename,
|
||||
current->fs->in_exec = 0;
|
||||
current->in_execve = 0;
|
||||
membarrier_execve(current);
|
||||
rseq_execve(current);
|
||||
acct_update_integrals(current);
|
||||
task_numa_free(current);
|
||||
free_bprm(bprm);
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <linux/signal_types.h>
|
||||
#include <linux/mm_types_task.h>
|
||||
#include <linux/task_io_accounting.h>
|
||||
#include <linux/rseq.h>
|
||||
|
||||
/* task_struct member predeclarations (sorted alphabetically): */
|
||||
struct audit_context;
|
||||
@@ -1047,6 +1048,17 @@ struct task_struct {
|
||||
unsigned long numa_pages_migrated;
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
#ifdef CONFIG_RSEQ
|
||||
struct rseq __user *rseq;
|
||||
u32 rseq_len;
|
||||
u32 rseq_sig;
|
||||
/*
|
||||
* RmW on rseq_event_mask must be performed atomically
|
||||
* with respect to preemption.
|
||||
*/
|
||||
unsigned long rseq_event_mask;
|
||||
#endif
|
||||
|
||||
struct tlbflush_unmap_batch tlb_ubc;
|
||||
|
||||
struct rcu_head rcu;
|
||||
@@ -1757,4 +1769,126 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
|
||||
#define TASK_SIZE_OF(tsk) TASK_SIZE
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RSEQ
|
||||
|
||||
/*
|
||||
* Map the event mask on the user-space ABI enum rseq_cs_flags
|
||||
* for direct mask checks.
|
||||
*/
|
||||
enum rseq_event_mask_bits {
|
||||
RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
|
||||
RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
|
||||
RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
|
||||
};
|
||||
|
||||
enum rseq_event_mask {
|
||||
RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
|
||||
RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
|
||||
RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
|
||||
};
|
||||
|
||||
static inline void rseq_set_notify_resume(struct task_struct *t)
|
||||
{
|
||||
if (t->rseq)
|
||||
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
|
||||
}
|
||||
|
||||
void __rseq_handle_notify_resume(struct pt_regs *regs);
|
||||
|
||||
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
|
||||
{
|
||||
if (current->rseq)
|
||||
__rseq_handle_notify_resume(regs);
|
||||
}
|
||||
|
||||
static inline void rseq_signal_deliver(struct pt_regs *regs)
|
||||
{
|
||||
preempt_disable();
|
||||
__set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
|
||||
preempt_enable();
|
||||
rseq_handle_notify_resume(regs);
|
||||
}
|
||||
|
||||
/* rseq_preempt() requires preemption to be disabled. */
|
||||
static inline void rseq_preempt(struct task_struct *t)
|
||||
{
|
||||
__set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
|
||||
rseq_set_notify_resume(t);
|
||||
}
|
||||
|
||||
/* rseq_migrate() requires preemption to be disabled. */
|
||||
static inline void rseq_migrate(struct task_struct *t)
|
||||
{
|
||||
__set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
|
||||
rseq_set_notify_resume(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* If parent process has a registered restartable sequences area, the
|
||||
* child inherits. Only applies when forking a process, not a thread. In
|
||||
* case a parent fork() in the middle of a restartable sequence, set the
|
||||
* resume notifier to force the child to retry.
|
||||
*/
|
||||
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
|
||||
{
|
||||
if (clone_flags & CLONE_THREAD) {
|
||||
t->rseq = NULL;
|
||||
t->rseq_len = 0;
|
||||
t->rseq_sig = 0;
|
||||
t->rseq_event_mask = 0;
|
||||
} else {
|
||||
t->rseq = current->rseq;
|
||||
t->rseq_len = current->rseq_len;
|
||||
t->rseq_sig = current->rseq_sig;
|
||||
t->rseq_event_mask = current->rseq_event_mask;
|
||||
rseq_preempt(t);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rseq_execve(struct task_struct *t)
|
||||
{
|
||||
t->rseq = NULL;
|
||||
t->rseq_len = 0;
|
||||
t->rseq_sig = 0;
|
||||
t->rseq_event_mask = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void rseq_set_notify_resume(struct task_struct *t)
|
||||
{
|
||||
}
|
||||
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
|
||||
{
|
||||
}
|
||||
static inline void rseq_signal_deliver(struct pt_regs *regs)
|
||||
{
|
||||
}
|
||||
static inline void rseq_preempt(struct task_struct *t)
|
||||
{
|
||||
}
|
||||
static inline void rseq_migrate(struct task_struct *t)
|
||||
{
|
||||
}
|
||||
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
|
||||
{
|
||||
}
|
||||
static inline void rseq_execve(struct task_struct *t)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_RSEQ
|
||||
|
||||
void rseq_syscall(struct pt_regs *regs);
|
||||
|
||||
#else
|
||||
|
||||
static inline void rseq_syscall(struct pt_regs *regs)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user