mirror of
https://github.com/ukui/kernel.git
synced 2026-03-09 10:07:04 -07:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Overlapping changes all over. The mini-qdisc bits were a little bit tricky, however. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -375,3 +375,19 @@ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: information about CPUs heterogeneity.
|
||||
|
||||
cpu_capacity: capacity of cpu#.
|
||||
|
||||
What: /sys/devices/system/cpu/vulnerabilities
|
||||
/sys/devices/system/cpu/vulnerabilities/meltdown
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v1
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v2
|
||||
Date: January 2018
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: Information about CPU vulnerabilities
|
||||
|
||||
The files are named after the code names of CPU
|
||||
vulnerabilities. The output of those files reflects the
|
||||
state of the CPUs in the system. Possible output values:
|
||||
|
||||
"Not affected" CPU is not affected by the vulnerability
|
||||
"Vulnerable" CPU is affected and no mitigation in effect
|
||||
"Mitigation: $M" CPU is affected and mitigation $M is in effect
|
||||
|
||||
@@ -713,9 +713,6 @@
|
||||
It will be ignored when crashkernel=X,high is not used
|
||||
or memory reserved is below 4G.
|
||||
|
||||
crossrelease_fullstack
|
||||
[KNL] Allow to record full stack trace in cross-release
|
||||
|
||||
cryptomgr.notests
|
||||
[KNL] Disable crypto self-tests
|
||||
|
||||
@@ -2626,6 +2623,11 @@
|
||||
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
|
||||
Equivalent to smt=1.
|
||||
|
||||
nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
|
||||
(indirect branch prediction) vulnerability. System may
|
||||
allow data leaks with this option, which is equivalent
|
||||
to spectre_v2=off.
|
||||
|
||||
noxsave [BUGS=X86] Disables x86 extended register state save
|
||||
and restore using xsave. The kernel will fallback to
|
||||
enabling legacy floating-point and sse state.
|
||||
@@ -2712,8 +2714,6 @@
|
||||
steal time is computed, but won't influence scheduler
|
||||
behaviour
|
||||
|
||||
nopti [X86-64] Disable kernel page table isolation
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
@@ -3100,6 +3100,12 @@
|
||||
pcie_scan_all Scan all possible PCIe devices. Otherwise we
|
||||
only look for one device below a PCIe downstream
|
||||
port.
|
||||
big_root_window Try to add a big 64bit memory window to the PCIe
|
||||
root complex on AMD CPUs. Some GFX hardware
|
||||
can resize a BAR to allow access to all VRAM.
|
||||
Adding the window is slightly risky (it may
|
||||
conflict with unreported devices), so this
|
||||
taints the kernel.
|
||||
|
||||
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
|
||||
Management.
|
||||
@@ -3288,11 +3294,20 @@
|
||||
pt. [PARIDE]
|
||||
See Documentation/blockdev/paride.txt.
|
||||
|
||||
pti= [X86_64]
|
||||
Control user/kernel address space isolation:
|
||||
on - enable
|
||||
off - disable
|
||||
auto - default setting
|
||||
pti= [X86_64] Control Page Table Isolation of user and
|
||||
kernel address spaces. Disabling this feature
|
||||
removes hardening, but improves performance of
|
||||
system calls and interrupts.
|
||||
|
||||
on - unconditionally enable
|
||||
off - unconditionally disable
|
||||
auto - kernel detects whether your CPU model is
|
||||
vulnerable to issues that PTI mitigates
|
||||
|
||||
Not specifying this option is equivalent to pti=auto.
|
||||
|
||||
nopti [X86_64]
|
||||
Equivalent to pti=off
|
||||
|
||||
pty.legacy_count=
|
||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||
@@ -3943,6 +3958,29 @@
|
||||
sonypi.*= [HW] Sony Programmable I/O Control Device driver
|
||||
See Documentation/laptops/sonypi.txt
|
||||
|
||||
spectre_v2= [X86] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability.
|
||||
|
||||
on - unconditionally enable
|
||||
off - unconditionally disable
|
||||
auto - kernel detects whether your CPU model is
|
||||
vulnerable
|
||||
|
||||
Selecting 'on' will, and 'auto' may, choose a
|
||||
mitigation method at run time according to the
|
||||
CPU, the available microcode, the setting of the
|
||||
CONFIG_RETPOLINE configuration option, and the
|
||||
compiler with which the kernel was built.
|
||||
|
||||
Specific mitigations can also be selected manually:
|
||||
|
||||
retpoline - replace indirect branches
|
||||
retpoline,generic - google's original retpoline
|
||||
retpoline,amd - AMD-specific minimal thunk
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2=auto.
|
||||
|
||||
spia_io_base= [HW,MTD]
|
||||
spia_fio_base=
|
||||
spia_pedr=
|
||||
|
||||
@@ -25,8 +25,8 @@ available from the following download page. At least "mkfs.nilfs2",
|
||||
cleaner or garbage collector) are required. Details on the tools are
|
||||
described in the man pages included in the package.
|
||||
|
||||
Project web page: http://nilfs.sourceforge.net/
|
||||
Download page: http://nilfs.sourceforge.net/en/download.html
|
||||
Project web page: https://nilfs.sourceforge.io/
|
||||
Download page: https://nilfs.sourceforge.io/en/download.html
|
||||
List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
|
||||
|
||||
Caveats
|
||||
|
||||
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
|
||||
<expr> ::= <symbol> (1)
|
||||
<symbol> '=' <symbol> (2)
|
||||
<symbol> '!=' <symbol> (3)
|
||||
'(' <expr> ')' (4)
|
||||
'!' <expr> (5)
|
||||
<expr> '&&' <expr> (6)
|
||||
<expr> '||' <expr> (7)
|
||||
<symbol1> '<' <symbol2> (4)
|
||||
<symbol1> '>' <symbol2> (4)
|
||||
<symbol1> '<=' <symbol2> (4)
|
||||
<symbol1> '>=' <symbol2> (4)
|
||||
'(' <expr> ')' (5)
|
||||
'!' <expr> (6)
|
||||
<expr> '&&' <expr> (7)
|
||||
<expr> '||' <expr> (8)
|
||||
|
||||
Expressions are listed in decreasing order of precedence.
|
||||
|
||||
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
|
||||
otherwise 'n'.
|
||||
(3) If the values of both symbols are equal, it returns 'n',
|
||||
otherwise 'y'.
|
||||
(4) Returns the value of the expression. Used to override precedence.
|
||||
(5) Returns the result of (2-/expr/).
|
||||
(6) Returns the result of min(/expr/, /expr/).
|
||||
(7) Returns the result of max(/expr/, /expr/).
|
||||
(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
|
||||
or greater-or-equal than value of <symbol2>, it returns 'y',
|
||||
otherwise 'n'.
|
||||
(5) Returns the value of the expression. Used to override precedence.
|
||||
(6) Returns the result of (2-/expr/).
|
||||
(7) Returns the result of min(/expr/, /expr/).
|
||||
(8) Returns the result of max(/expr/, /expr/).
|
||||
|
||||
An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
|
||||
respectively for calculations). A menu entry becomes visible when its
|
||||
|
||||
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
|
||||
in each line. The rules stated above are best illustrated with an example:
|
||||
|
||||
# mkdir functions/uvc.usb0/control/header/h
|
||||
# cd functions/uvc.usb0/control/header/h
|
||||
# cd functions/uvc.usb0/control/
|
||||
# ln -s header/h class/fs
|
||||
# ln -s header/h class/ss
|
||||
# mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
|
||||
|
||||
186
Documentation/x86/pti.txt
Normal file
186
Documentation/x86/pti.txt
Normal file
@@ -0,0 +1,186 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
Page Table Isolation (pti, previously known as KAISER[1]) is a
|
||||
countermeasure against attacks on the shared user/kernel address
|
||||
space such as the "Meltdown" approach[2].
|
||||
|
||||
To mitigate this class of attacks, we create an independent set of
|
||||
page tables for use only when running userspace applications. When
|
||||
the kernel is entered via syscalls, interrupts or exceptions, the
|
||||
page tables are switched to the full "kernel" copy. When the system
|
||||
switches back to user mode, the user copy is used again.
|
||||
|
||||
The userspace page tables contain only a minimal amount of kernel
|
||||
data: only what is needed to enter/exit the kernel such as the
|
||||
entry/exit functions themselves and the interrupt descriptor table
|
||||
(IDT). There are a few strictly unnecessary things that get mapped
|
||||
such as the first C function when entering an interrupt (see
|
||||
comments in pti.c).
|
||||
|
||||
This approach helps to ensure that side-channel attacks leveraging
|
||||
the paging structures do not function when PTI is enabled. It can be
|
||||
enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
|
||||
Once enabled at compile-time, it can be disabled at boot with the
|
||||
'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
|
||||
|
||||
Page Table Management
|
||||
=====================
|
||||
|
||||
When PTI is enabled, the kernel manages two sets of page tables.
|
||||
The first set is very similar to the single set which is present in
|
||||
kernels without PTI. This includes a complete mapping of userspace
|
||||
that the kernel can use for things like copy_to_user().
|
||||
|
||||
Although _complete_, the user portion of the kernel page tables is
|
||||
crippled by setting the NX bit in the top level. This ensures
|
||||
that any missed kernel->user CR3 switch will immediately crash
|
||||
userspace upon executing its first instruction.
|
||||
|
||||
The userspace page tables map only the kernel data needed to enter
|
||||
and exit the kernel. This data is entirely contained in the 'struct
|
||||
cpu_entry_area' structure which is placed in the fixmap which gives
|
||||
each CPU's copy of the area a compile-time-fixed virtual address.
|
||||
|
||||
For new userspace mappings, the kernel makes the entries in its
|
||||
page tables like normal. The only difference is when the kernel
|
||||
makes entries in the top (PGD) level. In addition to setting the
|
||||
entry in the main kernel PGD, a copy of the entry is made in the
|
||||
userspace page tables' PGD.
|
||||
|
||||
This sharing at the PGD level also inherently shares all the lower
|
||||
layers of the page tables. This leaves a single, shared set of
|
||||
userspace page tables to manage. One PTE to lock, one set of
|
||||
accessed bits, dirty bits, etc...
|
||||
|
||||
Overhead
|
||||
========
|
||||
|
||||
Protection against side-channel attacks is important. But,
|
||||
this protection comes at a cost:
|
||||
|
||||
1. Increased Memory Use
|
||||
a. Each process now needs an order-1 PGD instead of order-0.
|
||||
(Consumes an additional 4k per process).
|
||||
b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
|
||||
aligned so that it can be mapped by setting a single PMD
|
||||
entry. This consumes nearly 2MB of RAM once the kernel
|
||||
is decompressed, but no space in the kernel image itself.
|
||||
|
||||
2. Runtime Cost
|
||||
a. CR3 manipulation to switch between the page table copies
|
||||
must be done at interrupt, syscall, and exception entry
|
||||
and exit (it can be skipped when the kernel is interrupted,
|
||||
though.) Moves to CR3 are on the order of a hundred
|
||||
cycles, and are required at every entry and exit.
|
||||
b. A "trampoline" must be used for SYSCALL entry. This
|
||||
trampoline depends on a smaller set of resources than the
|
||||
non-PTI SYSCALL entry code, so requires mapping fewer
|
||||
things into the userspace page tables. The downside is
|
||||
that stacks must be switched at entry time.
|
||||
d. Global pages are disabled for all kernel structures not
|
||||
mapped into both kernel and userspace page tables. This
|
||||
feature of the MMU allows different processes to share TLB
|
||||
entries mapping the kernel. Losing the feature means more
|
||||
TLB misses after a context switch. The actual loss of
|
||||
performance is very small, however, never exceeding 1%.
|
||||
d. Process Context IDentifiers (PCID) is a CPU feature that
|
||||
allows us to skip flushing the entire TLB when switching page
|
||||
tables by setting a special bit in CR3 when the page tables
|
||||
are changed. This makes switching the page tables (at context
|
||||
switch, or kernel entry/exit) cheaper. But, on systems with
|
||||
PCID support, the context switch code must flush both the user
|
||||
and kernel entries out of the TLB. The user PCID TLB flush is
|
||||
deferred until the exit to userspace, minimizing the cost.
|
||||
See intel.com/sdm for the gory PCID/INVPCID details.
|
||||
e. The userspace page tables must be populated for each new
|
||||
process. Even without PTI, the shared kernel mappings
|
||||
are created by copying top-level (PGD) entries into each
|
||||
new process. But, with PTI, there are now *two* kernel
|
||||
mappings: one in the kernel page tables that maps everything
|
||||
and one for the entry/exit structures. At fork(), we need to
|
||||
copy both.
|
||||
f. In addition to the fork()-time copying, there must also
|
||||
be an update to the userspace PGD any time a set_pgd() is done
|
||||
on a PGD used to map userspace. This ensures that the kernel
|
||||
and userspace copies always map the same userspace
|
||||
memory.
|
||||
g. On systems without PCID support, each CR3 write flushes
|
||||
the entire TLB. That means that each syscall, interrupt
|
||||
or exception flushes the TLB.
|
||||
h. INVPCID is a TLB-flushing instruction which allows flushing
|
||||
of TLB entries for non-current PCIDs. Some systems support
|
||||
PCIDs, but do not support INVPCID. On these systems, addresses
|
||||
can only be flushed from the TLB for the current PCID. When
|
||||
flushing a kernel address, we need to flush all PCIDs, so a
|
||||
single kernel address flush will require a TLB-flushing CR3
|
||||
write upon the next use of every PCID.
|
||||
|
||||
Possible Future Work
|
||||
====================
|
||||
1. We can be more careful about not actually writing to CR3
|
||||
unless its value is actually changed.
|
||||
2. Allow PTI to be enabled/disabled at runtime in addition to the
|
||||
boot-time switching.
|
||||
|
||||
Testing
|
||||
========
|
||||
|
||||
To test stability of PTI, the following test procedure is recommended,
|
||||
ideally doing all of these in parallel:
|
||||
|
||||
1. Set CONFIG_DEBUG_ENTRY=y
|
||||
2. Run several copies of all of the tools/testing/selftests/x86/ tests
|
||||
(excluding MPX and protection_keys) in a loop on multiple CPUs for
|
||||
several minutes. These tests frequently uncover corner cases in the
|
||||
kernel entry code. In general, old kernels might cause these tests
|
||||
themselves to crash, but they should never crash the kernel.
|
||||
3. Run the 'perf' tool in a mode (top or record) that generates many
|
||||
frequent performance monitoring non-maskable interrupts (see "NMI"
|
||||
in /proc/interrupts). This exercises the NMI entry/exit code which
|
||||
is known to trigger bugs in code paths that did not expect to be
|
||||
interrupted, including nested NMIs. Using "-c" boosts the rate of
|
||||
NMIs, and using two -c with separate counters encourages nested NMIs
|
||||
and less deterministic behavior.
|
||||
|
||||
while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
|
||||
|
||||
4. Launch a KVM virtual machine.
|
||||
5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
|
||||
This has been a lightly-tested code path and needs extra scrutiny.
|
||||
|
||||
Debugging
|
||||
=========
|
||||
|
||||
Bugs in PTI cause a few different signatures of crashes
|
||||
that are worth noting here.
|
||||
|
||||
* Failures of the selftests/x86 code. Usually a bug in one of the
|
||||
more obscure corners of entry_64.S
|
||||
* Crashes in early boot, especially around CPU bringup. Bugs
|
||||
in the trampoline code or mappings cause these.
|
||||
* Crashes at the first interrupt. Caused by bugs in entry_64.S,
|
||||
like screwing up a page table switch. Also caused by
|
||||
incorrectly mapping the IRQ handler entry code.
|
||||
* Crashes at the first NMI. The NMI code is separate from main
|
||||
interrupt handlers and can have bugs that do not affect
|
||||
normal interrupts. Also caused by incorrectly mapping NMI
|
||||
code. NMIs that interrupt the entry code must be very
|
||||
careful and can be the cause of crashes that show up when
|
||||
running perf.
|
||||
* Kernel crashes at the first exit to userspace. entry_64.S
|
||||
bugs, or failing to map some of the exit code.
|
||||
* Crashes at first interrupt that interrupts userspace. The paths
|
||||
in entry_64.S that return to userspace are sometimes separate
|
||||
from the ones that return to the kernel.
|
||||
* Double faults: overflowing the kernel stack because of page
|
||||
faults upon page faults. Caused by touching non-pti-mapped
|
||||
data in the entry code, or forgetting to switch to kernel
|
||||
CR3 before calling into C functions which are not pti-mapped.
|
||||
* Userspace segfaults early in boot, sometimes manifesting
|
||||
as mount(8) failing to mount the rootfs. These have
|
||||
tended to be TLB invalidation issues. Usually invalidating
|
||||
the wrong PCID, or otherwise missing an invalidation.
|
||||
|
||||
1. https://gruss.cc/files/kaiser.pdf
|
||||
2. https://meltdownattack.com/meltdown.pdf
|
||||
@@ -9660,8 +9660,8 @@ F: include/uapi/linux/sunrpc/
|
||||
NILFS2 FILESYSTEM
|
||||
M: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
|
||||
L: linux-nilfs@vger.kernel.org
|
||||
W: http://nilfs.sourceforge.net/
|
||||
W: http://nilfs.osdn.jp/
|
||||
W: https://nilfs.sourceforge.io/
|
||||
W: https://nilfs.osdn.jp/
|
||||
T: git git://github.com/konis/nilfs2.git
|
||||
S: Supported
|
||||
F: Documentation/filesystems/nilfs2.txt
|
||||
|
||||
45
Makefile
45
Makefile
@@ -2,7 +2,7 @@
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 15
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc7
|
||||
EXTRAVERSION = -rc8
|
||||
NAME = Fearless Coyote
|
||||
|
||||
# *DOCUMENTATION*
|
||||
@@ -484,26 +484,6 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN)
|
||||
endif
|
||||
KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
|
||||
KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
|
||||
KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
|
||||
# Quiet clang warning: comparison of unsigned expression < 0 is always false
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
|
||||
# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
|
||||
# source of a reference will be _MergedGlobals and not on of the whitelisted names.
|
||||
# See modpost pattern 2
|
||||
KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
|
||||
KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
|
||||
KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
|
||||
KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
|
||||
else
|
||||
|
||||
# These warnings generated too much noise in a regular build.
|
||||
# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
|
||||
endif
|
||||
|
||||
ifeq ($(config-targets),1)
|
||||
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
|
||||
endif
|
||||
KBUILD_CFLAGS += $(stackp-flag)
|
||||
|
||||
ifeq ($(cc-name),clang)
|
||||
KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
|
||||
# Quiet clang warning: comparison of unsigned expression < 0 is always false
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
|
||||
# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
|
||||
# source of a reference will be _MergedGlobals and not on of the whitelisted names.
|
||||
# See modpost pattern 2
|
||||
KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
|
||||
KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
|
||||
KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
|
||||
KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
|
||||
else
|
||||
|
||||
# These warnings generated too much noise in a regular build.
|
||||
# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
|
||||
endif
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
|
||||
else
|
||||
|
||||
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
|
||||
}
|
||||
|
||||
if (ti->softirq_time) {
|
||||
delta = cycle_to_nsec(ti->softirq_time));
|
||||
delta = cycle_to_nsec(ti->softirq_time);
|
||||
account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
|
||||
}
|
||||
|
||||
|
||||
@@ -209,5 +209,11 @@ exc_##label##_book3e:
|
||||
ori r3,r3,vector_offset@l; \
|
||||
mtspr SPRN_IVOR##vector_number,r3;
|
||||
|
||||
#define RFI_TO_KERNEL \
|
||||
rfi
|
||||
|
||||
#define RFI_TO_USER \
|
||||
rfi
|
||||
|
||||
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
|
||||
|
||||
|
||||
@@ -74,6 +74,59 @@
|
||||
*/
|
||||
#define EX_R3 EX_DAR
|
||||
|
||||
/*
|
||||
* Macros for annotating the expected destination of (h)rfid
|
||||
*
|
||||
* The nop instructions allow us to insert one or more instructions to flush the
|
||||
* L1-D cache when returning to userspace or a guest.
|
||||
*/
|
||||
#define RFI_FLUSH_SLOT \
|
||||
RFI_FLUSH_FIXUP_SECTION; \
|
||||
nop; \
|
||||
nop; \
|
||||
nop
|
||||
|
||||
#define RFI_TO_KERNEL \
|
||||
rfid
|
||||
|
||||
#define RFI_TO_USER \
|
||||
RFI_FLUSH_SLOT; \
|
||||
rfid; \
|
||||
b rfi_flush_fallback
|
||||
|
||||
#define RFI_TO_USER_OR_KERNEL \
|
||||
RFI_FLUSH_SLOT; \
|
||||
rfid; \
|
||||
b rfi_flush_fallback
|
||||
|
||||
#define RFI_TO_GUEST \
|
||||
RFI_FLUSH_SLOT; \
|
||||
rfid; \
|
||||
b rfi_flush_fallback
|
||||
|
||||
#define HRFI_TO_KERNEL \
|
||||
hrfid
|
||||
|
||||
#define HRFI_TO_USER \
|
||||
RFI_FLUSH_SLOT; \
|
||||
hrfid; \
|
||||
b hrfi_flush_fallback
|
||||
|
||||
#define HRFI_TO_USER_OR_KERNEL \
|
||||
RFI_FLUSH_SLOT; \
|
||||
hrfid; \
|
||||
b hrfi_flush_fallback
|
||||
|
||||
#define HRFI_TO_GUEST \
|
||||
RFI_FLUSH_SLOT; \
|
||||
hrfid; \
|
||||
b hrfi_flush_fallback
|
||||
|
||||
#define HRFI_TO_UNKNOWN \
|
||||
RFI_FLUSH_SLOT; \
|
||||
hrfid; \
|
||||
b hrfi_flush_fallback
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
|
||||
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
|
||||
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||
mtspr SPRN_##h##SRR0,r12; \
|
||||
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
|
||||
mtspr SPRN_##h##SRR1,r10; \
|
||||
h##rfid; \
|
||||
h##RFI_TO_KERNEL; \
|
||||
b . /* prevent speculative execution */
|
||||
#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
|
||||
__EXCEPTION_PROLOG_PSERIES_1(label, h)
|
||||
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||
mtspr SPRN_##h##SRR0,r12; \
|
||||
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
|
||||
mtspr SPRN_##h##SRR1,r10; \
|
||||
h##rfid; \
|
||||
h##RFI_TO_KERNEL; \
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
|
||||
|
||||
@@ -187,7 +187,20 @@ label##3: \
|
||||
FTR_ENTRY_OFFSET label##1b-label##3b; \
|
||||
.popsection;
|
||||
|
||||
#define RFI_FLUSH_FIXUP_SECTION \
|
||||
951: \
|
||||
.pushsection __rfi_flush_fixup,"a"; \
|
||||
.align 2; \
|
||||
952: \
|
||||
FTR_ENTRY_OFFSET 951b-952b; \
|
||||
.popsection;
|
||||
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
|
||||
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
|
||||
|
||||
void apply_feature_fixups(void);
|
||||
void setup_feature_keys(void);
|
||||
#endif
|
||||
|
||||
@@ -241,6 +241,7 @@
|
||||
#define H_GET_HCA_INFO 0x1B8
|
||||
#define H_GET_PERF_COUNT 0x1BC
|
||||
#define H_MANAGE_TRACE 0x1C0
|
||||
#define H_GET_CPU_CHARACTERISTICS 0x1C8
|
||||
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
|
||||
#define H_QUERY_INT_STATE 0x1E4
|
||||
#define H_POLL_PENDING 0x1D8
|
||||
@@ -330,6 +331,17 @@
|
||||
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
|
||||
/* >= 0 values are CPU number */
|
||||
|
||||
/* H_GET_CPU_CHARACTERISTICS return values */
|
||||
#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
|
||||
#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
|
||||
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
|
||||
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
|
||||
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
|
||||
|
||||
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
|
||||
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
|
||||
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
|
||||
|
||||
/* Flag values used in H_REGISTER_PROC_TBL hcall */
|
||||
#define PROC_TABLE_OP_MASK 0x18
|
||||
#define PROC_TABLE_DEREG 0x10
|
||||
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
|
||||
}
|
||||
}
|
||||
|
||||
struct h_cpu_char_result {
|
||||
u64 character;
|
||||
u64 behaviour;
|
||||
};
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _ASM_POWERPC_HVCALL_H */
|
||||
|
||||
@@ -232,6 +232,16 @@ struct paca_struct {
|
||||
struct sibling_subcore_state *sibling_subcore_state;
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/*
|
||||
* rfi fallback flush must be in its own cacheline to prevent
|
||||
* other paca data leaking into the L1d
|
||||
*/
|
||||
u64 exrfi[EX_SIZE] __aligned(0x80);
|
||||
void *rfi_flush_fallback_area;
|
||||
u64 l1d_flush_congruence;
|
||||
u64 l1d_flush_sets;
|
||||
#endif
|
||||
};
|
||||
|
||||
extern void copy_mm_to_paca(struct mm_struct *mm);
|
||||
|
||||
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
|
||||
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
|
||||
}
|
||||
|
||||
static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
|
||||
{
|
||||
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
||||
long rc;
|
||||
|
||||
rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
|
||||
if (rc == H_SUCCESS) {
|
||||
p->character = retbuf[0];
|
||||
p->behaviour = retbuf[1];
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
|
||||
|
||||
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
|
||||
static inline void pseries_little_endian_exceptions(void) {}
|
||||
#endif /* CONFIG_PPC_PSERIES */
|
||||
|
||||
void rfi_flush_enable(bool enable);
|
||||
|
||||
/* These are bit flags */
|
||||
enum l1d_flush_type {
|
||||
L1D_FLUSH_NONE = 0x1,
|
||||
L1D_FLUSH_FALLBACK = 0x2,
|
||||
L1D_FLUSH_ORI = 0x4,
|
||||
L1D_FLUSH_MTTRIG = 0x8,
|
||||
};
|
||||
|
||||
void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
|
||||
void do_rfi_flush_fixups(enum l1d_flush_type types);
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_POWERPC_SETUP_H */
|
||||
|
||||
@@ -237,6 +237,11 @@ int main(void)
|
||||
OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
|
||||
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
|
||||
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
|
||||
OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
|
||||
OFFSET(PACA_EXRFI, paca_struct, exrfi);
|
||||
OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
|
||||
OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
|
||||
|
||||
#endif
|
||||
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
|
||||
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
|
||||
|
||||
@@ -37,6 +37,11 @@
|
||||
#include <asm/tm.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
#include <asm/export.h>
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
#include <asm/exception-64s.h>
|
||||
#else
|
||||
#include <asm/exception-64e.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* System calls.
|
||||
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
|
||||
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
|
||||
ld r2,GPR2(r1)
|
||||
ld r1,GPR1(r1)
|
||||
mtlr r4
|
||||
mtcr r5
|
||||
mtspr SPRN_SRR0,r7
|
||||
mtspr SPRN_SRR1,r8
|
||||
RFI_TO_USER
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
/* exit to kernel */
|
||||
1: ld r2,GPR2(r1)
|
||||
ld r1,GPR1(r1)
|
||||
mtlr r4
|
||||
mtcr r5
|
||||
mtspr SPRN_SRR0,r7
|
||||
mtspr SPRN_SRR1,r8
|
||||
RFI
|
||||
RFI_TO_KERNEL
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
.Lsyscall_error:
|
||||
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
mtmsrd r10, 1
|
||||
mtspr SPRN_SRR0, r11
|
||||
mtspr SPRN_SRR1, r12
|
||||
|
||||
rfid
|
||||
RFI_TO_USER
|
||||
b . /* prevent speculative execution */
|
||||
#endif
|
||||
_ASM_NOKPROBE_SYMBOL(system_call_common);
|
||||
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
|
||||
REST_GPR(13, r1)
|
||||
1:
|
||||
|
||||
mtspr SPRN_SRR1,r3
|
||||
|
||||
ld r2,_CCR(r1)
|
||||
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
ld r3,GPR3(r1)
|
||||
ld r4,GPR4(r1)
|
||||
ld r1,GPR1(r1)
|
||||
RFI_TO_USER
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
rfid
|
||||
1: mtspr SPRN_SRR1,r3
|
||||
|
||||
ld r2,_CCR(r1)
|
||||
mtcrf 0xFF,r2
|
||||
ld r2,_NIP(r1)
|
||||
mtspr SPRN_SRR0,r2
|
||||
|
||||
ld r0,GPR0(r1)
|
||||
ld r2,GPR2(r1)
|
||||
ld r3,GPR3(r1)
|
||||
ld r4,GPR4(r1)
|
||||
ld r1,GPR1(r1)
|
||||
RFI_TO_KERNEL
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3E */
|
||||
@@ -1073,7 +1101,7 @@ __enter_rtas:
|
||||
|
||||
mtspr SPRN_SRR0,r5
|
||||
mtspr SPRN_SRR1,r6
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
rtas_return_loc:
|
||||
@@ -1098,7 +1126,7 @@ rtas_return_loc:
|
||||
|
||||
mtspr SPRN_SRR0,r3
|
||||
mtspr SPRN_SRR1,r4
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b . /* prevent speculative execution */
|
||||
_ASM_NOKPROBE_SYMBOL(__enter_rtas)
|
||||
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
|
||||
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
|
||||
LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
|
||||
andc r11,r11,r12
|
||||
mtsrr1 r11
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
#endif /* CONFIG_PPC_BOOK3E */
|
||||
|
||||
1: /* Return from OF */
|
||||
|
||||
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
|
||||
LOAD_HANDLER(r12, machine_check_handle_early)
|
||||
1: mtspr SPRN_SRR0,r12
|
||||
mtspr SPRN_SRR1,r11
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b . /* prevent speculative execution */
|
||||
2:
|
||||
/* Stack overflow. Stay on emergency stack and panic.
|
||||
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
|
||||
li r3,MSR_ME
|
||||
andc r10,r10,r3 /* Turn off MSR_ME */
|
||||
mtspr SPRN_SRR1,r10
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b .
|
||||
2:
|
||||
/*
|
||||
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
|
||||
*/
|
||||
bl machine_check_queue_event
|
||||
MACHINE_CHECK_HANDLER_WINDUP
|
||||
rfid
|
||||
RFI_TO_USER_OR_KERNEL
|
||||
9:
|
||||
/* Deliver the machine check to host kernel in V mode. */
|
||||
MACHINE_CHECK_HANDLER_WINDUP
|
||||
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
|
||||
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
|
||||
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
|
||||
|
||||
andi. r9,r11,MSR_PR // Check for exception from userspace
|
||||
cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
|
||||
|
||||
/*
|
||||
* Test MSR_RI before calling slb_allocate_realmode, because the
|
||||
* MSR in r11 gets clobbered. However we still want to allocate
|
||||
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||
|
||||
/* All done -- return from exception. */
|
||||
|
||||
bne cr4,1f /* returning to kernel */
|
||||
|
||||
.machine push
|
||||
.machine "power4"
|
||||
mtcrf 0x80,r9
|
||||
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
|
||||
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
|
||||
mtcrf 0x02,r9 /* I/D indication is in cr6 */
|
||||
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
||||
@@ -640,8 +646,29 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||
ld r11,PACA_EXSLB+EX_R11(r13)
|
||||
ld r12,PACA_EXSLB+EX_R12(r13)
|
||||
ld r13,PACA_EXSLB+EX_R13(r13)
|
||||
rfid
|
||||
RFI_TO_USER
|
||||
b . /* prevent speculative execution */
|
||||
1:
|
||||
.machine push
|
||||
.machine "power4"
|
||||
mtcrf 0x80,r9
|
||||
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
|
||||
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
|
||||
mtcrf 0x02,r9 /* I/D indication is in cr6 */
|
||||
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
||||
.machine pop
|
||||
|
||||
RESTORE_CTR(r9, PACA_EXSLB)
|
||||
RESTORE_PPR_PACA(PACA_EXSLB, r9)
|
||||
mr r3,r12
|
||||
ld r9,PACA_EXSLB+EX_R9(r13)
|
||||
ld r10,PACA_EXSLB+EX_R10(r13)
|
||||
ld r11,PACA_EXSLB+EX_R11(r13)
|
||||
ld r12,PACA_EXSLB+EX_R12(r13)
|
||||
ld r13,PACA_EXSLB+EX_R13(r13)
|
||||
RFI_TO_KERNEL
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
|
||||
2: std r3,PACA_EXSLB+EX_DAR(r13)
|
||||
mr r3,r12
|
||||
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||
mtspr SPRN_SRR0,r10
|
||||
ld r10,PACAKMSR(r13)
|
||||
mtspr SPRN_SRR1,r10
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b .
|
||||
|
||||
8: std r3,PACA_EXSLB+EX_DAR(r13)
|
||||
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||
mtspr SPRN_SRR0,r10
|
||||
ld r10,PACAKMSR(r13)
|
||||
mtspr SPRN_SRR1,r10
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b .
|
||||
|
||||
EXC_COMMON_BEGIN(unrecov_slb)
|
||||
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
|
||||
mtspr SPRN_SRR0,r10 ; \
|
||||
ld r10,PACAKMSR(r13) ; \
|
||||
mtspr SPRN_SRR1,r10 ; \
|
||||
rfid ; \
|
||||
RFI_TO_KERNEL ; \
|
||||
b . ; /* prevent speculative execution */
|
||||
|
||||
#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
|
||||
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
|
||||
xori r12,r12,MSR_LE ; \
|
||||
mtspr SPRN_SRR1,r12 ; \
|
||||
mr r13,r9 ; \
|
||||
rfid ; /* return to userspace */ \
|
||||
RFI_TO_USER ; /* return to userspace */ \
|
||||
b . ; /* prevent speculative execution */
|
||||
#else
|
||||
#define SYSCALL_FASTENDIAN_TEST
|
||||
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
|
||||
mtcr r11
|
||||
REST_GPR(11, r1)
|
||||
ld r1,GPR1(r1)
|
||||
hrfid
|
||||
HRFI_TO_USER_OR_KERNEL
|
||||
|
||||
1: mtcr r11
|
||||
REST_GPR(11, r1)
|
||||
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
||||
ld r11,PACA_EXGEN+EX_R11(r13)
|
||||
ld r12,PACA_EXGEN+EX_R12(r13)
|
||||
ld r13,PACA_EXGEN+EX_R13(r13)
|
||||
HRFID
|
||||
HRFI_TO_UNKNOWN
|
||||
b .
|
||||
#endif
|
||||
|
||||
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt: \
|
||||
ld r10,PACA_EXGEN+EX_R10(r13); \
|
||||
ld r11,PACA_EXGEN+EX_R11(r13); \
|
||||
/* returns to kernel where r13 must be set up, so don't restore it */ \
|
||||
##_H##rfid; \
|
||||
##_H##RFI_TO_KERNEL; \
|
||||
b .; \
|
||||
MASKED_DEC_HANDLER(_H)
|
||||
|
||||
TRAMP_REAL_BEGIN(rfi_flush_fallback)
|
||||
SET_SCRATCH0(r13);
|
||||
GET_PACA(r13);
|
||||
std r9,PACA_EXRFI+EX_R9(r13)
|
||||
std r10,PACA_EXRFI+EX_R10(r13)
|
||||
std r11,PACA_EXRFI+EX_R11(r13)
|
||||
std r12,PACA_EXRFI+EX_R12(r13)
|
||||
std r8,PACA_EXRFI+EX_R13(r13)
|
||||
mfctr r9
|
||||
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
|
||||
ld r11,PACA_L1D_FLUSH_SETS(r13)
|
||||
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
|
||||
/*
|
||||
* The load adresses are at staggered offsets within cachelines,
|
||||
* which suits some pipelines better (on others it should not
|
||||
* hurt).
|
||||
*/
|
||||
addi r12,r12,8
|
||||
mtctr r11
|
||||
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
|
||||
|
||||
/* order ld/st prior to dcbt stop all streams with flushing */
|
||||
sync
|
||||
1: li r8,0
|
||||
.rept 8 /* 8-way set associative */
|
||||
ldx r11,r10,r8
|
||||
add r8,r8,r12
|
||||
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
|
||||
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
|
||||
.endr
|
||||
addi r10,r10,128 /* 128 byte cache line */
|
||||
bdnz 1b
|
||||
|
||||
mtctr r9
|
||||
ld r9,PACA_EXRFI+EX_R9(r13)
|
||||
ld r10,PACA_EXRFI+EX_R10(r13)
|
||||
ld r11,PACA_EXRFI+EX_R11(r13)
|
||||
ld r12,PACA_EXRFI+EX_R12(r13)
|
||||
ld r8,PACA_EXRFI+EX_R13(r13)
|
||||
GET_SCRATCH0(r13);
|
||||
rfid
|
||||
|
||||
TRAMP_REAL_BEGIN(hrfi_flush_fallback)
|
||||
SET_SCRATCH0(r13);
|
||||
GET_PACA(r13);
|
||||
std r9,PACA_EXRFI+EX_R9(r13)
|
||||
std r10,PACA_EXRFI+EX_R10(r13)
|
||||
std r11,PACA_EXRFI+EX_R11(r13)
|
||||
std r12,PACA_EXRFI+EX_R12(r13)
|
||||
std r8,PACA_EXRFI+EX_R13(r13)
|
||||
mfctr r9
|
||||
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
|
||||
ld r11,PACA_L1D_FLUSH_SETS(r13)
|
||||
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
|
||||
/*
|
||||
* The load adresses are at staggered offsets within cachelines,
|
||||
* which suits some pipelines better (on others it should not
|
||||
* hurt).
|
||||
*/
|
||||
addi r12,r12,8
|
||||
mtctr r11
|
||||
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
|
||||
|
||||
/* order ld/st prior to dcbt stop all streams with flushing */
|
||||
sync
|
||||
1: li r8,0
|
||||
.rept 8 /* 8-way set associative */
|
||||
ldx r11,r10,r8
|
||||
add r8,r8,r12
|
||||
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
|
||||
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
|
||||
.endr
|
||||
addi r10,r10,128 /* 128 byte cache line */
|
||||
bdnz 1b
|
||||
|
||||
mtctr r9
|
||||
ld r9,PACA_EXRFI+EX_R9(r13)
|
||||
ld r10,PACA_EXRFI+EX_R10(r13)
|
||||
ld r11,PACA_EXRFI+EX_R11(r13)
|
||||
ld r12,PACA_EXRFI+EX_R12(r13)
|
||||
ld r8,PACA_EXRFI+EX_R13(r13)
|
||||
GET_SCRATCH0(r13);
|
||||
hrfid
|
||||
|
||||
/*
|
||||
* Real mode exceptions actually use this too, but alternate
|
||||
* instruction code patches (which end up in the common .text area)
|
||||
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
|
||||
addi r13, r13, 4
|
||||
mtspr SPRN_SRR0, r13
|
||||
GET_SCRATCH0(r13)
|
||||
rfid
|
||||
RFI_TO_KERNEL
|
||||
b .
|
||||
|
||||
TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
|
||||
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
|
||||
addi r13, r13, 4
|
||||
mtspr SPRN_HSRR0, r13
|
||||
GET_SCRATCH0(r13)
|
||||
hrfid
|
||||
HRFI_TO_KERNEL
|
||||
b .
|
||||
#endif
|
||||
|
||||
|
||||
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
|
||||
return 0;
|
||||
}
|
||||
early_initcall(disable_hardlockup_detector);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
static enum l1d_flush_type enabled_flush_types;
|
||||
static void *l1d_flush_fallback_area;
|
||||
static bool no_rfi_flush;
|
||||
bool rfi_flush;
|
||||
|
||||
static int __init handle_no_rfi_flush(char *p)
|
||||
{
|
||||
pr_info("rfi-flush: disabled on command line.");
|
||||
no_rfi_flush = true;
|
||||
return 0;
|
||||
}
|
||||
early_param("no_rfi_flush", handle_no_rfi_flush);
|
||||
|
||||
/*
|
||||
* The RFI flush is not KPTI, but because users will see doco that says to use
|
||||
* nopti we hijack that option here to also disable the RFI flush.
|
||||
*/
|
||||
static int __init handle_no_pti(char *p)
|
||||
{
|
||||
pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
|
||||
handle_no_rfi_flush(NULL);
|
||||
return 0;
|
||||
}
|
||||
early_param("nopti", handle_no_pti);
|
||||
|
||||
static void do_nothing(void *unused)
|
||||
{
|
||||
/*
|
||||
* We don't need to do the flush explicitly, just enter+exit kernel is
|
||||
* sufficient, the RFI exit handlers will do the right thing.
|
||||
*/
|
||||
}
|
||||
|
||||
void rfi_flush_enable(bool enable)
|
||||
{
|
||||
if (rfi_flush == enable)
|
||||
return;
|
||||
|
||||
if (enable) {
|
||||
do_rfi_flush_fixups(enabled_flush_types);
|
||||
on_each_cpu(do_nothing, NULL, 1);
|
||||
} else
|
||||
do_rfi_flush_fixups(L1D_FLUSH_NONE);
|
||||
|
||||
rfi_flush = enable;
|
||||
}
|
||||
|
||||
static void init_fallback_flush(void)
|
||||
{
|
||||
u64 l1d_size, limit;
|
||||
int cpu;
|
||||
|
||||
l1d_size = ppc64_caches.l1d.size;
|
||||
limit = min(safe_stack_limit(), ppc64_rma_size);
|
||||
|
||||
/*
|
||||
* Align to L1d size, and size it at 2x L1d size, to catch possible
|
||||
* hardware prefetch runoff. We don't have a recipe for load patterns to
|
||||
* reliably avoid the prefetcher.
|
||||
*/
|
||||
l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
|
||||
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
/*
|
||||
* The fallback flush is currently coded for 8-way
|
||||
* associativity. Different associativity is possible, but it
|
||||
* will be treated as 8-way and may not evict the lines as
|
||||
* effectively.
|
||||
*
|
||||
* 128 byte lines are mandatory.
|
||||
*/
|
||||
u64 c = l1d_size / 8;
|
||||
|
||||
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
|
||||
paca[cpu].l1d_flush_congruence = c;
|
||||
paca[cpu].l1d_flush_sets = c / 128;
|
||||
}
|
||||
}
|
||||
|
||||
void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
|
||||
{
|
||||
if (types & L1D_FLUSH_FALLBACK) {
|
||||
pr_info("rfi-flush: Using fallback displacement flush\n");
|
||||
init_fallback_flush();
|
||||
}
|
||||
|
||||
if (types & L1D_FLUSH_ORI)
|
||||
pr_info("rfi-flush: Using ori type flush\n");
|
||||
|
||||
if (types & L1D_FLUSH_MTTRIG)
|
||||
pr_info("rfi-flush: Using mttrig type flush\n");
|
||||
|
||||
enabled_flush_types = types;
|
||||
|
||||
if (!no_rfi_flush)
|
||||
rfi_flush_enable(enable);
|
||||
}
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user