s390: convert to generic entry

This patch converts s390 to use the generic entry infrastructure from
kernel/entry/*.

There are a few special things on s390:

- PIF_PER_TRAP is moved to TIF_PER_TRAP as the generic code doesn't
  know about our PIF flags in exit_to_user_mode_loop().

- The old code had several ways to restart syscalls:

  a) PIF_SYSCALL_RESTART, which was only set during execve to force a
     restart after upgrading a process (usually qemu-kvm) to pgste page
     table extensions.

  b) PIF_SYSCALL, which is set by do_signal() to indicate that the
     current syscall should be restarted. This is changed so that
     do_signal() now also uses PIF_SYSCALL_RESTART. Continuing to use
     PIF_SYSCALL doesn't work with the generic code, and changing it
     to PIF_SYSCALL_RESTART makes PIF_SYSCALL and PIF_SYSCALL_RESTART
     more unique.

- On s390 calling sys_sigreturn or sys_rt_sigreturn is implemented by
executing a svc instruction on the process stack which causes a fault.
While handling that fault the fault code sets PIF_SYSCALL to hand over
processing to the syscall code on exit to usermode.

The patch introduces PIF_SYSCALL_RET_SET, which is set if ptrace sets
a return value for a syscall. The s390x ptrace ABI uses r2 both for the
syscall number and return value, so ptrace cannot set the syscall number +
return value at the same time. The flag makes handling that a bit easier.
do_syscall() will just skip executing the syscall if PIF_SYSCALL_RET_SET
is set.

CONFIG_DEBUG_ASCE was removd in favour of the generic CONFIG_DEBUG_ENTRY.
CR1/7/13 will be checked both on kernel entry and exit to contain the
correct asces.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Sven Schnelle
2020-11-21 11:14:56 +01:00
committed by Vasily Gorbik
parent ac94a2911e
commit 56e62a7370
39 changed files with 652 additions and 920 deletions

View File

@@ -123,6 +123,7 @@ config S390
select GENERIC_ALLOCATOR
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_PTDUMP

View File

@@ -6,10 +6,12 @@ config TRACE_IRQFLAGS_SUPPORT
config EARLY_PRINTK
def_bool y
config DEBUG_USER_ASCE
bool "Debug User ASCE"
config DEBUG_ENTRY
bool "Debug low-level entry code"
depends on DEBUG_KERNEL
help
Check on exit to user space that address space control
elements are setup correctly.
This option enables sanity checks in s390 low-level entry code.
Some of these sanity checks may slow down kernel entries and
exits or otherwise impact performance.
If unsure, say N.

View File

@@ -833,7 +833,6 @@ CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
CONFIG_DEBUG_USER_ASCE=y
CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
@@ -857,3 +856,4 @@ CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
CONFIG_DEBUG_ENTRY=y

View File

@@ -781,7 +781,6 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_DEBUG_USER_ASCE=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y

View File

@@ -35,4 +35,6 @@ u64 arch_cpu_idle_time(int cpu);
#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
void account_idle_time_irq(void);
#endif /* _S390_CPUTIME_H */

View File

@@ -233,8 +233,7 @@ extern char elf_platform[];
do { \
set_personality(PER_LINUX | \
(current->personality & (~PER_MASK))); \
current->thread.sys_call_table = \
(unsigned long) &sys_call_table; \
current->thread.sys_call_table = sys_call_table; \
} while (0)
#else /* CONFIG_COMPAT */
#define SET_PERSONALITY(ex) \
@@ -245,11 +244,11 @@ do { \
if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
set_thread_flag(TIF_31BIT); \
current->thread.sys_call_table = \
(unsigned long) &sys_call_table_emu; \
sys_call_table_emu; \
} else { \
clear_thread_flag(TIF_31BIT); \
current->thread.sys_call_table = \
(unsigned long) &sys_call_table; \
sys_call_table; \
} \
} while (0)
#endif /* CONFIG_COMPAT */

View File

@@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_S390_ENTRY_COMMON_H
#define ARCH_S390_ENTRY_COMMON_H
#include <linux/sched.h>
#include <linux/audit.h>
#include <linux/tracehook.h>
#include <linux/processor.h>
#include <linux/uaccess.h>
#include <asm/fpu/api.h>
#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
void do_per_trap(struct pt_regs *regs);
void do_syscall(struct pt_regs *regs);
typedef void (*pgm_check_func)(struct pt_regs *regs);
extern pgm_check_func pgm_check_table[128];
#ifdef CONFIG_DEBUG_ENTRY
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
{
debug_user_asce(0);
}
#define arch_check_user_regs arch_check_user_regs
#endif /* CONFIG_DEBUG_ENTRY */
static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
unsigned long ti_work)
{
if (ti_work & _TIF_PER_TRAP) {
clear_thread_flag(TIF_PER_TRAP);
do_per_trap(regs);
}
if (ti_work & _TIF_GUARDED_STORAGE)
gs_load_bc_cb(regs);
}
#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
static __always_inline void arch_exit_to_user_mode(void)
{
if (test_cpu_flag(CIF_FPU))
__load_fpu_regs();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
debug_user_asce(1);
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
static inline bool on_thread_stack(void)
{
return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1));
}
#endif

View File

@@ -47,6 +47,8 @@
#include <linux/preempt.h>
void save_fpu_regs(void);
void load_fpu_regs(void);
void __load_fpu_regs(void);
static inline int test_fp_ctl(u32 fpc)
{

View File

@@ -20,11 +20,13 @@ struct s390_idle_data {
unsigned long long clock_idle_exit;
unsigned long long timer_idle_enter;
unsigned long long timer_idle_exit;
unsigned long mt_cycles_enter[8];
};
extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
void psw_idle(struct s390_idle_data *, unsigned long);
void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
void psw_idle_exit(void);
#endif /* _S390_IDLE_H */

View File

@@ -81,8 +81,8 @@ struct lowcore {
psw_t return_mcck_psw; /* 0x02a0 */
/* CPU accounting and timing values. */
__u64 sync_enter_timer; /* 0x02b0 */
__u64 async_enter_timer; /* 0x02b8 */
__u64 sys_enter_timer; /* 0x02b0 */
__u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */
__u64 mcck_enter_timer; /* 0x02c0 */
__u64 exit_timer; /* 0x02c8 */
__u64 user_timer; /* 0x02d0 */

View File

@@ -99,6 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc);
void nmi_free_per_cpu(struct lowcore *lc);
void s390_handle_mcck(void);
void __s390_handle_mcck(void);
int s390_do_machine_check(struct pt_regs *regs);
#endif /* __ASSEMBLY__ */

View File

@@ -38,6 +38,11 @@
#include <asm/runtime_instr.h>
#include <asm/fpu/types.h>
#include <asm/fpu/internal.h>
#include <asm/irqflags.h>
typedef long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
static inline void set_cpu_flag(int flag)
{
@@ -101,31 +106,32 @@ extern void __bpon(void);
*/
struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
unsigned long sys_call_table; /* system call table address */
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
const sys_call_ptr_t *sys_call_table; /* system call table address */
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
/* Per-thread information related to debugging */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
unsigned int system_call; /* system call number in signal */
unsigned long last_break; /* last breaking-event-address. */
/* pfault_wait is used to block the process on a pfault event */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
unsigned int system_call; /* system call number in signal */
unsigned long last_break; /* last breaking-event-address. */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
struct gs_cb *gs_cb; /* Current guarded storage cb */
struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
struct gs_cb *gs_cb; /* Current guarded storage cb */
struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
* the end.
@@ -184,6 +190,7 @@ static inline void release_thread(struct task_struct *tsk) { }
/* Free guarded storage control block */
void guarded_storage_release(struct task_struct *tsk);
void gs_load_bc_cb(struct pt_regs *regs);
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
@@ -324,6 +331,11 @@ extern void memcpy_absolute(void *, void *, size_t);
extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);
static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
{
return arch_irqs_disabled_flags(regs->psw.mask);
}
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */

View File

@@ -11,13 +11,13 @@
#include <uapi/asm/ptrace.h>
#define PIF_SYSCALL 0 /* inside a system call */
#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
#define PIF_SYSCALL_RESTART 2 /* restart the current system call */
#define PIF_SYSCALL_RESTART 1 /* restart the current system call */
#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
#define _PIF_SYSCALL BIT(PIF_SYSCALL)
#define _PIF_PER_TRAP BIT(PIF_PER_TRAP)
#define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART)
#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
#ifndef __ASSEMBLY__
@@ -68,6 +68,9 @@ enum {
&(*(struct psw_bits *)(&(__psw))); \
}))
#define PGM_INT_CODE_MASK 0x7f
#define PGM_INT_CODE_PER 0x80
/*
* The pt_regs struct defines the way the registers are stored on
* the stack during a system call.

View File

@@ -14,8 +14,8 @@
#include <linux/err.h>
#include <asm/ptrace.h>
extern const unsigned long sys_call_table[];
extern const unsigned long sys_call_table_emu[];
extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t sys_call_table_emu[];
static inline long syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
@@ -56,6 +56,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET);
regs->gprs[2] = error ? error : val;
}
@@ -97,4 +98,10 @@ static inline int syscall_get_arch(struct task_struct *task)
#endif
return AUDIT_ARCH_S390X;
}
static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
{
return false;
}
#endif /* _ASM_SYSCALL_H */

View File

@@ -36,6 +36,7 @@
*/
struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
};
/*
@@ -68,6 +69,7 @@ void arch_setup_new_exec(void);
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
@@ -91,6 +93,7 @@ void arch_setup_new_exec(void);
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)
#define _TIF_31BIT BIT(TIF_31BIT)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)

View File

@@ -18,7 +18,7 @@
#include <asm/extable.h>
#include <asm/facility.h>
void debug_user_asce(void);
void debug_user_asce(int exit);
static inline int __range_ok(unsigned long addr, unsigned long size)
{

View File

@@ -4,4 +4,18 @@
#define __ARCH_HAS_VTIME_TASK_SWITCH
static inline void update_timer_sys(void)
{
S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
}
static inline void update_timer_mcck(void)
{
S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
}
#endif /* _S390_VTIME_H */

View File

@@ -179,8 +179,9 @@
#define ACR_SIZE 4
#define PTRACE_OLDSETOPTIONS 21
#define PTRACE_OLDSETOPTIONS 21
#define PTRACE_SYSEMU 31
#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>

View File

@@ -34,7 +34,7 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o

View File

@@ -26,26 +26,14 @@ int main(void)
BLANK();
/* thread struct offsets */
OFFSET(__THREAD_ksp, thread_struct, ksp);
OFFSET(__THREAD_sysc_table, thread_struct, sys_call_table);
OFFSET(__THREAD_last_break, thread_struct, last_break);
OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
OFFSET(__THREAD_per_address, thread_struct, per_event.address);
OFFSET(__THREAD_per_paid, thread_struct, per_event.paid);
OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_ARGS, pt_regs, args);
OFFSET(__PT_PSW, pt_regs, psw);
OFFSET(__PT_GPRS, pt_regs, gprs);
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_INT_PARM, pt_regs, int_parm);
OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
OFFSET(__PT_FLAGS, pt_regs, flags);
OFFSET(__PT_CR1, pt_regs, cr1);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
@@ -64,6 +52,7 @@ int main(void)
OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
BLANK();
/* hardware defined lowcore locations 0x000 - 0x1ff */
OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
@@ -115,13 +104,9 @@ int main(void)
OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer);
OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
OFFSET(__LC_USER_TIMER, lowcore, user_timer);
OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer);
OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer);
OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);

Some files were not shown because too many files have changed in this diff Show More