seccomp: add "seccomp" syscall

This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	arch/x86/syscalls/syscall_32.tbl
	arch/x86/syscalls/syscall_64.tbl
	include/uapi/asm-generic/unistd.h
	kernel/seccomp.c

And fixup of unistd32.h to truly enable sys_secomp.

Change-Id: I95bea02382c52007d22e5e9dc563c7d055c2c83f
This commit is contained in:
Kees Cook
2014-06-25 16:08:24 -07:00
committed by JP Abgrall
parent 8908dde5a7
commit e985fd474d
9 changed files with 74 additions and 7 deletions

View File

@@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
- secure_computing is called from a ptrace_event()-safe context
- secure_computing return value is checked and a return value of -1
results in the system call being skipped immediately.
- seccomp syscall wired up
config SECCOMP_FILTER
def_bool y

View File

@@ -788,4 +788,4 @@ __SYSCALL(381, sys_ni_syscall)
/* #define __NR_renameat2 382 */
__SYSCALL(382, sys_ni_syscall)
#define __NR_seccomp 383
__SYSCALL(__NR_seccomp, sys_ni_syscall)
__SYSCALL(__NR_seccomp, sys_seccomp)

View File

@@ -357,3 +357,4 @@
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
349 i386 kcmp sys_kcmp
350 i386 finit_module sys_finit_module
351 i386 seccomp sys_seccomp

View File

@@ -320,6 +320,7 @@
311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp
313 common finit_module sys_finit_module
314 common seccomp sys_seccomp
#
# x32-specific system call numbers start at 512 to avoid cache impact

View File

@@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2);
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs);
#endif

View File

@@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
__SYSCALL(__NR_finit_module, sys_finit_module)
/* Backporting seccomp, skip a few ...
* #define __NR_sched_setattr 274
__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
* #define __NR_sched_getattr 275
__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
* #define __NR_renameat2 276
__SYSCALL(__NR_renameat2, sys_renameat2)
*/
#define __NR_seccomp 277
__SYSCALL(__NR_seccomp, sys_seccomp)
#undef __NR_syscalls
#define __NR_syscalls 274
#define __NR_syscalls 278
/*
* All syscalls below here should go away really,

View File

@@ -10,6 +10,10 @@
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
/* Valid operations for seccomp syscall. */
#define SECCOMP_SET_MODE_STRICT 0
#define SECCOMP_SET_MODE_FILTER 1
/*
* All BPF programs must return a 32-bit value.
* The bottom 16-bits are for optional return data.

View File

@@ -18,6 +18,7 @@
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/seccomp.h>
#include <linux/syscalls.h>
/* #define SECCOMP_DEBUG 1 */
@@ -309,7 +310,7 @@ fail:
*
* Returns 0 on success and non-zero otherwise.
*/
long seccomp_attach_user_filter(char __user *user_filter)
static long seccomp_attach_user_filter(const char __user *user_filter)
{
struct sock_fprog fprog;
long ret = -EFAULT;
@@ -511,6 +512,7 @@ out:
#ifdef CONFIG_SECCOMP_FILTER
/**
* seccomp_set_mode_filter: internal function for setting seccomp filter
* @flags: flags to change filter behavior
* @filter: struct sock_fprog containing filter
*
* This function may be called repeatedly to install additional filters.
@@ -521,11 +523,16 @@ out:
*
* Returns 0 on success or -EINVAL on failure.
*/
static long seccomp_set_mode_filter(char __user *filter)
static long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{
const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
long ret = -EINVAL;
/* Validate flags. */
if (flags != 0)
goto out;
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
@@ -538,12 +545,35 @@ out:
return ret;
}
#else
static inline long seccomp_set_mode_filter(char __user *filter)
static inline long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{
return -EINVAL;
}
#endif
/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs)
{
switch (op) {
case SECCOMP_SET_MODE_STRICT:
if (flags != 0 || uargs != NULL)
return -EINVAL;
return seccomp_set_mode_strict();
case SECCOMP_SET_MODE_FILTER:
return seccomp_set_mode_filter(flags, uargs);
default:
return -EINVAL;
}
}
SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
const char __user *, uargs)
{
return do_seccomp(op, flags, uargs);
}
/**
* prctl_set_seccomp: configures current->seccomp.mode
* @seccomp_mode: requested mode to use
@@ -553,12 +583,27 @@ static inline long seccomp_set_mode_filter(char __user *filter)
*/
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
{
unsigned int op;
char __user *uargs;
switch (seccomp_mode) {
case SECCOMP_MODE_STRICT:
return seccomp_set_mode_strict();
op = SECCOMP_SET_MODE_STRICT;
/*
* Setting strict mode through prctl always ignored filter,
* so make sure it is always NULL here to pass the internal
* check in do_seccomp().
*/
uargs = NULL;
break;
case SECCOMP_MODE_FILTER:
return seccomp_set_mode_filter(filter);
op = SECCOMP_SET_MODE_FILTER;
uargs = filter;
break;
default:
return -EINVAL;
}
/* prctl interface doesn't have flags, so they are always zero. */
return do_seccomp(op, 0, uargs);
}

View File

@@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */
cond_syscall(sys_kcmp);
/* operate on Secure Computing state */
cond_syscall(sys_seccomp);