s390: remove critical section cleanup from entry.S

The current code is rather complex and caused a lot of subtle
and hard to debug bugs in the past. Simplify the code by calling
the system_call handler with interrupts disabled, save
machine state, and re-enable them later.

This requires significant changes to the machine check handling code
as well. When the machine check interrupt arrived while being in kernel
mode the new code will signal pending machine checks with a SIGP external
call. When userspace was interrupted, the handler will switch to the
kernel stack and directly execute s390_handle_mcck().

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Sven Schnelle
2020-02-20 12:09:36 +01:00
committed by Vasily Gorbik
parent 11886c199d
commit 0b0ed657fe
9 changed files with 143 additions and 390 deletions

View File

@@ -99,7 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc);
void nmi_free_per_cpu(struct lowcore *lc);
void s390_handle_mcck(void);
void s390_do_machine_check(struct pt_regs *regs);
int s390_do_machine_check(struct pt_regs *regs);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_NMI_H */

View File

@@ -14,17 +14,15 @@
#include <linux/bits.h>
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */
#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */
#define CIF_FPU 4 /* restore FPU registers */
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */
#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */
#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore FPU registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
#define _CIF_MCCK_PENDING BIT(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY)
#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)

File diff suppressed because it is too large Load Diff

View File

@@ -24,19 +24,19 @@ void enabled_wait(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
unsigned long long idle_time;
unsigned long psw_mask;
unsigned long psw_mask, flags;
trace_hardirqs_on();
/* Wait for external, I/O or machine check interrupt. */
psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
local_irq_save(flags);
/* Call the assembler magic in entry.S */
psw_idle(idle, psw_mask);
local_irq_restore(flags);
trace_hardirqs_off();
/* Account time spent with enabled wait psw loaded as idle time. */
write_seqcount_begin(&idle->seqcount);
@@ -118,22 +118,16 @@ u64 arch_cpu_idle_time(int cpu)
void arch_cpu_idle_enter(void)
{
local_mcck_disable();
}
void arch_cpu_idle(void)
{
if (!test_cpu_flag(CIF_MCCK_PENDING))
/* Halt the cpu and keep track of cpu time accounting. */
enabled_wait();
enabled_wait();
local_irq_enable();
}
void arch_cpu_idle_exit(void)
{
local_mcck_enable();
if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
}
void arch_cpu_idle_dead(void)

View File

@@ -148,7 +148,6 @@ void s390_handle_mcck(void)
local_mcck_disable();
mcck = *this_cpu_ptr(&cpu_mcck);
memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
clear_cpu_flag(CIF_MCCK_PENDING);
local_mcck_enable();
local_irq_restore(flags);
@@ -333,7 +332,7 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info);
/*
* machine check handler.
*/
void notrace s390_do_machine_check(struct pt_regs *regs)
int notrace s390_do_machine_check(struct pt_regs *regs)
{
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
@@ -342,6 +341,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
unsigned long long tmp;
union mci mci;
unsigned long mcck_dam_code;
int mcck_pending = 0;
nmi_enter();
inc_irq_stat(NMI_NMI);
@@ -400,7 +400,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
*/
mcck->kill_task = 1;
mcck->mcck_code = mci.val;
set_cpu_flag(CIF_MCCK_PENDING);
mcck_pending = 1;
}
/*
@@ -420,8 +420,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->stp_queue |= stp_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
if (mcck->stp_queue)
set_cpu_flag(CIF_MCCK_PENDING);
mcck_pending = 1;
}
/*
@@ -442,12 +441,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
set_cpu_flag(CIF_MCCK_PENDING);
mcck_pending = 1;
}
if (mci.w) {
/* Warning pending */
mcck->warning = 1;
set_cpu_flag(CIF_MCCK_PENDING);
mcck_pending = 1;
}
/*
@@ -462,7 +461,17 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
}
clear_cpu_flag(CIF_MCCK_GUEST);
if (user_mode(regs) && mcck_pending) {
nmi_exit();
return 1;
}
if (mcck_pending)
schedule_mcck_handler();
nmi_exit();
return 0;
}
NOKPROBE_SYMBOL(s390_do_machine_check);

View File

@@ -384,8 +384,7 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_psw.addr = (unsigned long) restart_int_handler;
lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
lc->svc_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;

View File

@@ -3995,9 +3995,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (need_resched())
schedule();
if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc)

View File

@@ -1002,8 +1002,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (need_resched())
schedule();
if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);

View File

@@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay);
static void __udelay_disabled(unsigned long long usecs)
{
unsigned long cr0, cr0_new, psw_mask;
unsigned long cr0, cr0_new, psw_mask, flags;
struct s390_idle_data idle;
u64 end;
@@ -45,7 +45,9 @@ static void __udelay_disabled(unsigned long long usecs)
psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
set_clock_comparator(end);
set_cpu_flag(CIF_IGNORE_IRQ);
local_irq_save(flags);
psw_idle(&idle, psw_mask);
local_irq_restore(flags);
clear_cpu_flag(CIF_IGNORE_IRQ);
set_clock_comparator(S390_lowcore.clock_comparator);
__ctl_load(cr0, 0, 0);