Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 core updates from Borislav Petkov:

 - Add the call depth tracking mitigation for Retbleed which has been
   long in the making. It is a lighterweight software-only fix for
   Skylake-based cores where enabling IBRS is a big hammer and causes a
   significant performance impact.

   What it basically does is, it aligns all kernel functions to 16 bytes
   boundary and adds a 16-byte padding before the function, objtool
   collects all functions' locations and when the mitigation gets
   applied, it patches a call accounting thunk which is used to track
   the call depth of the stack at any time.

   When that call depth reaches a magical, microarchitecture-specific
   value for the Return Stack Buffer, the code stuffs that RSB and
   avoids its underflow which could otherwise lead to the Intel variant
   of Retbleed.

   This software-only solution brings a lot of the lost performance
   back, as benchmarks suggest:

       https://lore.kernel.org/all/20220915111039.092790446@infradead.org/

   That page above also contains a lot more detailed explanation of the
   whole mechanism

 - Implement a new control flow integrity scheme called FineIBT which is
   based on the software kCFI implementation and uses hardware IBT
   support where present to annotate and track indirect branches using a
   hash to validate them

 - Other misc fixes and cleanups

* tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits)
  x86/paravirt: Use common macro for creating simple asm paravirt functions
  x86/paravirt: Remove clobber bitmask from .parainstructions
  x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al
  x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit
  x86/Kconfig: Enable kernel IBT by default
  x86,pm: Force out-of-line memcpy()
  objtool: Fix weak hole vs prefix symbol
  objtool: Optimize elf_dirty_reloc_sym()
  x86/cfi: Add boot time hash randomization
  x86/cfi: Boot time selection of CFI scheme
  x86/ibt: Implement FineIBT
  objtool: Add --cfi to generate the .cfi_sites section
  x86: Add prefix symbols for function padding
  objtool: Add option to generate prefix symbols
  objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf
  objtool: Slice up elf_create_section_symbol()
  kallsyms: Revert "Take callthunks into account"
  x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces
  x86/retpoline: Fix crash printing warning
  x86/paravirt: Fix a !PARAVIRT build warning
  ...
This commit is contained in:
Linus Torvalds
2022-12-14 15:03:00 -08:00
103 changed files with 2709 additions and 597 deletions

View File

@@ -1006,8 +1006,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI)
export CC_FLAGS_CFI
endif
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B
KBUILD_CFLAGS += -falign-functions=64
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
endif
# arch Makefile may override CC so keep this after arch Makefile is included

View File

@@ -1438,4 +1438,28 @@ source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
config FUNCTION_ALIGNMENT_4B
bool
config FUNCTION_ALIGNMENT_8B
bool
config FUNCTION_ALIGNMENT_16B
bool
config FUNCTION_ALIGNMENT_32B
bool
config FUNCTION_ALIGNMENT_64B
bool
config FUNCTION_ALIGNMENT
int
default 64 if FUNCTION_ALIGNMENT_64B
default 32 if FUNCTION_ALIGNMENT_32B
default 16 if FUNCTION_ALIGNMENT_16B
default 8 if FUNCTION_ALIGNMENT_8B
default 4 if FUNCTION_ALIGNMENT_4B
default 0
endmenu

View File

@@ -63,6 +63,7 @@ config IA64
select NUMA if !FLATMEM
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select ZONE_DMA32
select FUNCTION_ALIGNMENT_32B
default y
help
The Itanium Processor Family is Intel's 64-bit successor to

View File

@@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp
EXTRA :=
cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
-falign-functions=32 -frename-registers -fno-optimize-sibling-calls
-frename-registers -fno-optimize-sibling-calls
KBUILD_CFLAGS_KERNEL := -mconstant-gp
GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")

View File

@@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end)
{
}
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi)
{
}
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}

View File

@@ -292,6 +292,8 @@ config X86
select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX
select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16
select FUNCTION_ALIGNMENT_4B
imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
@@ -1855,7 +1857,7 @@ config CC_HAS_IBT
config X86_KERNEL_IBT
prompt "Indirect Branch Tracking"
bool
def_bool y
depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
depends on !LD_IS_LLD || LLD_VERSION >= 140000
@@ -2492,6 +2494,46 @@ config CC_HAS_SLS
config CC_HAS_RETURN_THUNK
def_bool $(cc-option,-mfunction-return=thunk-extern)
config CC_HAS_ENTRY_PADDING
def_bool $(cc-option,-fpatchable-function-entry=16,16)
config FUNCTION_PADDING_CFI
int
default 59 if FUNCTION_ALIGNMENT_64B
default 27 if FUNCTION_ALIGNMENT_32B
default 11 if FUNCTION_ALIGNMENT_16B
default 3 if FUNCTION_ALIGNMENT_8B
default 0
# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG
# except Kconfig can't do arithmetic :/
config FUNCTION_PADDING_BYTES
int
default FUNCTION_PADDING_CFI if CFI_CLANG
default FUNCTION_ALIGNMENT
config CALL_PADDING
def_bool n
depends on CC_HAS_ENTRY_PADDING && OBJTOOL
select FUNCTION_ALIGNMENT_16B
config FINEIBT
def_bool y
depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
select CALL_PADDING
config HAVE_CALL_THUNKS
def_bool y
depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
config CALL_THUNKS
def_bool n
select CALL_PADDING
config PREFIX_SYMBOLS
def_bool y
depends on CALL_PADDING && !CFI_CLANG
menuconfig SPECULATION_MITIGATIONS
bool "Mitigations for speculative execution vulnerabilities"
default y
@@ -2543,6 +2585,37 @@ config CPU_UNRET_ENTRY
help
Compile the kernel with support for the retbleed=unret mitigation.
config CALL_DEPTH_TRACKING
bool "Mitigate RSB underflow with call depth tracking"
depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
select CALL_THUNKS
default y
help
Compile the kernel with call depth tracking to mitigate the Intel
SKL Return-Speculation-Buffer (RSB) underflow issue. The
mitigation is off by default and needs to be enabled on the
kernel command line via the retbleed=stuff option. For
non-affected systems the overhead of this option is marginal as
the call depth tracking is using run-time generated call thunks
in a compiler generated padding area and call patching. This
increases text size by ~5%. For non affected systems this space
is unused. On affected SKL systems this results in a significant
performance gain over the IBRS mitigation.
config CALL_THUNKS_DEBUG
bool "Enable call thunks and call depth tracking debugging"
depends on CALL_DEPTH_TRACKING
select FUNCTION_ALIGNMENT_32B
default n
help
Enable call/ret counters for imbalance detection and build in
a noisy dmesg about callthunks generation and call patching for
trouble shooting. The debug prints need to be enabled on the
kernel command line with 'debug-callthunks'.
Only enable this, when you are debugging call thunks as this
creates a noticable runtime overhead. If unsure say N.
config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
depends on CPU_SUP_AMD && X86_64

View File

@@ -208,6 +208,12 @@ ifdef CONFIG_SLS
KBUILD_CFLAGS += -mharden-sls=all
endif
ifdef CONFIG_CALL_PADDING
PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
KBUILD_CFLAGS += $(PADDING_CFLAGS)
export PADDING_CFLAGS
endif
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG

View File

@@ -37,6 +37,14 @@
#include <asm/trapnr.h>
#include "pgtable.h"
/*
* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
* in assembly errors due to trying to move .org backward due to the excessive
* alignment.
*/
#undef __ALIGN
#define __ALIGN .balign 16, 0x90
/*
* Locally defined symbols should be marked hidden:
*/

View File

@@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
/* input:
* %rdi: ctx, CTX
@@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
jmp .Lenc_done;
SYM_FUNC_END(__camellia_enc_blk16)
.align 8
SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
/* input:
* %rdi: ctx, CTX

View File

@@ -221,7 +221,6 @@
* Size optimization... with inlined roundsm32 binary would be over 5 times
* larger and would only marginally faster.
*/
.align 8
SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
@@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
RET;
SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
@@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
/* input:
* %rdi: ctx, CTX
@@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
jmp .Lenc_done;
SYM_FUNC_END(__camellia_enc_blk32)
.align 8
SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
/* input:
* %rdi: ctx, CTX

View File

@@ -208,7 +208,6 @@
.text
.align 16
SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
/* input:
* %rdi: ctx
@@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
RET;
SYM_FUNC_END(__cast5_enc_blk16)
.align 16
SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
/* input:
* %rdi: ctx

View File

@@ -94,7 +94,6 @@
#
# Assumes len >= 16.
#
.align 16
SYM_FUNC_START(crc_t10dif_pcl)
movdqa .Lbswap_mask(%rip), BSWAP_MASK

View File

@@ -108,7 +108,6 @@ if (!$kernel) {
sub declare_function() {
my ($name, $align, $nargs) = @_;
if($kernel) {
$code .= ".align $align\n";
$code .= "SYM_FUNC_START($name)\n";
$code .= ".L$name:\n";
} else {

View File

@@ -550,7 +550,6 @@
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
/* input:
* %rdi: ctx, CTX
@@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
RET;
SYM_FUNC_END(__serpent_enc_blk8_avx)
.align 8
SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
/* input:
* %rdi: ctx, CTX

View File

@@ -550,7 +550,6 @@
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
/* input:
* %rdi: ctx, CTX
@@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
RET;
SYM_FUNC_END(__serpent_enc_blk16)
.align 8
SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
/* input:
* %rdi: ctx, CTX

View File

@@ -93,7 +93,6 @@
* numBlocks: Number of blocks to process
*/
.text
.align 32
SYM_TYPED_FUNC_START(sha1_ni_transform)
push %rbp
mov %rsp, %rbp

View File

@@ -348,7 +348,6 @@ a = TMP_
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_avx)
.align 32
pushq %rbx
pushq %r12
pushq %r13

View File

@@ -525,7 +525,6 @@ STACK_SIZE = _CTX + _CTX_SIZE
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_rorx)
.align 32
pushq %rbx
pushq %r12
pushq %r13

View File

@@ -357,7 +357,6 @@ a = TMP_
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_ssse3)
.align 32
pushq %rbx
pushq %r12
pushq %r13

View File

@@ -97,7 +97,6 @@
*/
.text
.align 32
SYM_TYPED_FUNC_START(sha256_ni_transform)
shl $6, NUM_BLKS /* convert to bytes */

Some files were not shown because too many files have changed in this diff Show More