Merge branch 'core-rseq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull restartable sequence support from Thomas Gleixner:
 "The restartable sequences syscall (finally):

  After a lot of back and forth discussion and massive delays caused by
  the speculative distraction of maintainers, the core set of
  restartable sequences has finally reached a consensus.

  It comes with the basic non disputed core implementation along with
  support for arm, powerpc and x86 and a full set of selftests

  It was exposed to linux-next earlier this week, so it does not fully
  comply with the merge window requirements, but there is really no
  point to drag it out for yet another cycle"

* 'core-rseq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  rseq/selftests: Provide Makefile, scripts, gitignore
  rseq/selftests: Provide parametrized tests
  rseq/selftests: Provide basic percpu ops test
  rseq/selftests: Provide basic test
  rseq/selftests: Provide rseq library
  selftests/lib.mk: Introduce OVERRIDE_TARGETS
  powerpc: Wire up restartable sequences system call
  powerpc: Add syscall detection for restartable sequences
  powerpc: Add support for restartable sequences
  x86: Wire up restartable sequence system call
  x86: Add support for restartable sequences
  arm: Wire up restartable sequences system call
  arm: Add syscall detection for restartable sequences
  arm: Add restartable sequences support
  rseq: Introduce restartable sequences system call
  uapi/headers: Provide types_32_64.h
This commit is contained in:
Linus Torvalds
2018-06-10 10:17:09 -07:00
44 changed files with 5491 additions and 8 deletions
+12
View File
@@ -12134,6 +12134,18 @@ F: include/dt-bindings/reset/
F: include/linux/reset.h
F: include/linux/reset-controller.h
RESTARTABLE SEQUENCES SUPPORT
M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
M: Peter Zijlstra <peterz@infradead.org>
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
M: Boqun Feng <boqun.feng@gmail.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: kernel/rseq.c
F: include/uapi/linux/rseq.h
F: include/trace/events/rseq.h
F: tools/testing/selftests/rseq/
RFKILL
M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
+7
View File
@@ -272,6 +272,13 @@ config HAVE_REGS_AND_STACK_ACCESS_API
declared in asm/ptrace.h
For example the kprobes-based event tracer needs this API.
config HAVE_RSEQ
bool
depends on HAVE_REGS_AND_STACK_ACCESS_API
help
This symbol should be selected by an architecture if it
supports an implementation of restartable sequences.
config HAVE_CLK
bool
help
+1
View File
@@ -91,6 +91,7 @@ config ARM
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16
select HAVE_VIRT_CPU_ACCOUNTING_GEN
+19 -6
View File
@@ -39,12 +39,13 @@ saved_pc .req lr
.section .entry.text,"ax",%progbits
.align 5
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \
IS_ENABLED(CONFIG_DEBUG_RSEQ))
/*
* This is the fast syscall return path. We do as little as possible here,
* such as avoiding writing r0 to the stack. We only use this path if we
* have tracing and context tracking disabled - the overheads from those
* features make this path too inefficient.
* have tracing, context tracking and rseq debug disabled - the overheads
* from those features make this path too inefficient.
*/
ret_fast_syscall:
UNWIND(.fnstart )
@@ -71,14 +72,20 @@ fast_work_pending:
/* fall through to work_pending */
#else
/*
* The "replacement" ret_fast_syscall for when tracing or context tracking
* is enabled. As we will need to call out to some C functions, we save
* r0 first to avoid needing to save registers around each C function call.
* The "replacement" ret_fast_syscall for when tracing, context tracking,
* or rseq debug is enabled. As we will need to call out to some C functions,
* we save r0 first to avoid needing to save registers around each C function
* call.
*/
ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
#if IS_ENABLED(CONFIG_DEBUG_RSEQ)
/* do_rseq_syscall needs interrupts enabled. */
mov r0, sp @ 'regs'
bl do_rseq_syscall
#endif
disable_irq_notrace @ disable interrupts
ldr r2, [tsk, #TI_ADDR_LIMIT]
cmp r2, #TASK_SIZE
@@ -113,6 +120,12 @@ ENDPROC(ret_fast_syscall)
*/
ENTRY(ret_to_user)
ret_slow_syscall:
#if IS_ENABLED(CONFIG_DEBUG_RSEQ)
/* do_rseq_syscall needs interrupts enabled. */
enable_irq_notrace @ enable interrupts
mov r0, sp @ 'regs'
bl do_rseq_syscall
#endif
disable_irq_notrace @ disable interrupts
ENTRY(ret_to_user_from_irq)
ldr r2, [tsk, #TI_ADDR_LIMIT]
+14
View File
@@ -540,6 +540,12 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
sigset_t *oldset = sigmask_to_save();
int ret;
/*
* Increment event counter and perform fixup for the pre-signal
* frame.
*/
rseq_signal_deliver(regs);
/*
* Set up the stack frame
*/
@@ -660,6 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
} else {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(regs);
}
}
local_irq_disable();
@@ -703,3 +710,10 @@ asmlinkage void addr_limit_check_failed(void)
{
addr_limit_user_check();
}
#ifdef CONFIG_DEBUG_RSEQ
asmlinkage void do_rseq_syscall(struct pt_regs *regs)
{
rseq_syscall(regs);
}
#endif
+1
View File
@@ -412,3 +412,4 @@
395 common pkey_alloc sys_pkey_alloc
396 common pkey_free sys_pkey_free
397 common statx sys_statx
398 common rseq sys_rseq
+1
View File
@@ -220,6 +220,7 @@ config PPC
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_RSEQ
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+1
View File
@@ -392,3 +392,4 @@ SYSCALL(statx)
SYSCALL(pkey_alloc)
SYSCALL(pkey_free)
SYSCALL(pkey_mprotect)
SYSCALL(rseq)
+1 -1
View File
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
#define NR_syscalls 387
#define NR_syscalls 388
#define __NR__exit __NR_exit
+1
View File
@@ -398,5 +398,6 @@
#define __NR_pkey_alloc 384
#define __NR_pkey_free 385
#define __NR_pkey_mprotect 386
#define __NR_rseq 387
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
+7
View File
@@ -365,6 +365,13 @@ syscall_dotrace_cont:
blrl /* Call handler */
.globl ret_from_syscall
ret_from_syscall:
#ifdef CONFIG_DEBUG_RSEQ
/* Check whether the syscall is issued inside a restartable sequence */
stw r3,GPR3(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl rseq_syscall
lwz r3,GPR3(r1)
#endif
mr r6,r3
CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
+8
View File
@@ -194,6 +194,14 @@ system_call: /* label this so stack traces look sane */
.Lsyscall_exit:
std r3,RESULT(r1)
#ifdef CONFIG_DEBUG_RSEQ
/* Check whether the syscall is issued inside a restartable sequence */
addi r3,r1,STACK_FRAME_OVERHEAD
bl rseq_syscall
ld r3,RESULT(r1)
#endif
CURRENT_THREAD_INFO(r12, r1)
ld r8,_MSR(r1)
+3
View File
@@ -134,6 +134,8 @@ static void do_signal(struct task_struct *tsk)
/* Re-enable the breakpoints for the signal stack */
thread_change_pc(tsk, tsk->thread.regs);
rseq_signal_deliver(tsk->thread.regs);
if (is32) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
ret = handle_rt_signal32(&ksig, oldset, tsk);
@@ -168,6 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(regs);
}
user_enter();
+1
View File
@@ -183,6 +183,7 @@ config X86
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
+3
View File
@@ -164,6 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
if (cached_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(regs);
}
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
@@ -254,6 +255,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
local_irq_enable();
rseq_syscall(regs);
/*
* First do one-time work. If these work items are enabled, we
* want to run them exactly once per syscall exit with IRQs on.
+1
View File
@@ -397,3 +397,4 @@
383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
386 i386 rseq sys_rseq __ia32_sys_rseq
+1
View File
@@ -342,6 +342,7 @@
331 common pkey_free __x64_sys_pkey_free
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
#
# x32-specific system call numbers start at 512 to avoid cache impact
+6
View File
@@ -688,6 +688,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
/*
* Increment event counter and perform fixup for the pre-signal
* frame.
*/
rseq_signal_deliver(regs);
/* Set up the stack frame */
if (is_ia32_frame(ksig)) {
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+1
View File
@@ -1824,6 +1824,7 @@ static int __do_execve_file(int fd, struct filename *filename,
current->fs->in_exec = 0;
current->in_execve = 0;
membarrier_execve(current);
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current);
free_bprm(bprm);
+134
View File
@@ -27,6 +27,7 @@
#include <linux/signal_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
#include <linux/rseq.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -1047,6 +1048,17 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_RSEQ
struct rseq __user *rseq;
u32 rseq_len;
u32 rseq_sig;
/*
* RmW on rseq_event_mask must be performed atomically
* with respect to preemption.
*/
unsigned long rseq_event_mask;
#endif
struct tlbflush_unmap_batch tlb_ubc;
struct rcu_head rcu;
@@ -1757,4 +1769,126 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
#ifdef CONFIG_RSEQ
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
*/
enum rseq_event_mask_bits {
RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
};
enum rseq_event_mask {
RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
};
static inline void rseq_set_notify_resume(struct task_struct *t)
{
if (t->rseq)
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}
void __rseq_handle_notify_resume(struct pt_regs *regs);
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
if (current->rseq)
__rseq_handle_notify_resume(regs);
}
static inline void rseq_signal_deliver(struct pt_regs *regs)
{
preempt_disable();
__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
preempt_enable();
rseq_handle_notify_resume(regs);
}
/* rseq_preempt() requires preemption to be disabled. */
static inline void rseq_preempt(struct task_struct *t)
{
__set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
rseq_set_notify_resume(t);
}
/* rseq_migrate() requires preemption to be disabled. */
static inline void rseq_migrate(struct task_struct *t)
{
__set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
rseq_set_notify_resume(t);
}
/*
* If parent process has a registered restartable sequences area, the
* child inherits. Only applies when forking a process, not a thread. In
* case a parent fork() in the middle of a restartable sequence, set the
* resume notifier to force the child to retry.
*/
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
if (clone_flags & CLONE_THREAD) {
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
} else {
t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
t->rseq_event_mask = current->rseq_event_mask;
rseq_preempt(t);
}
}
static inline void rseq_execve(struct task_struct *t)
{
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
}
#else
static inline void rseq_set_notify_resume(struct task_struct *t)
{
}
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
}
static inline void rseq_signal_deliver(struct pt_regs *regs)
{
}
static inline void rseq_preempt(struct task_struct *t)
{
}
static inline void rseq_migrate(struct task_struct *t)
{
}
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
}
static inline void rseq_execve(struct task_struct *t)
{
}
#endif
#ifdef CONFIG_DEBUG_RSEQ
void rseq_syscall(struct pt_regs *regs);
#else
static inline void rseq_syscall(struct pt_regs *regs)
{
}
#endif
#endif

Some files were not shown because too many files have changed in this diff Show More