mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 's390-6.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Heiko Carstens:
- Various virtual vs physical address usage fixes
- Fix error handling in Processor Activity Instrumentation device
driver, and export number of counters with a sysfs file
- Allow for multiple events when Processor Activity Instrumentation
counters are monitored in system wide sampling
- Change multiplier and shift values of the Time-of-Day clock source to
improve steering precision
- Remove a couple of unneeded GFP_DMA flags from allocations
- Disable mmap alignment if randomize_va_space is also disabled, to
avoid a too small heap
- Various changes to allow s390 to be compiled with LLVM=1, since
ld.lld and llvm-objcopy will have proper s390 support witch clang 19
- Add __uninitialized macro to Compiler Attributes. This is helpful
with s390's FPU code where some users have up to 520 byte stack
frames. Clearing such stack frames (if INIT_STACK_ALL_PATTERN or
INIT_STACK_ALL_ZERO is enabled) before they are used contradicts the
intention (performance improvement) of such code sections.
- Convert switch_to() to an out-of-line function, and use the generic
switch_to header file
- Replace the usage of s390's debug feature with pr_debug() calls
within the zcrypt device driver
- Improve hotplug support of the Adjunct Processor device driver
- Improve retry handling in the zcrypt device driver
- Various changes to the in-kernel FPU code:
- Make in-kernel FPU sections preemptible
- Convert various larger inline assemblies and assembler files to
C, mainly by using singe instruction inline assemblies. This
increases readability, but also allows makes it easier to add
proper instrumentation hooks
- Cleanup of the header files
- Provide fast variants of csum_partial() and
csum_partial_copy_nocheck() based on vector instructions
- Introduce and use a lock to synchronize accesses to zpci device data
structures to avoid inconsistent states caused by concurrent accesses
- Compile the kernel without -fPIE. This addresses the following
problems if the kernel is compiled with -fPIE:
- It uses dynamic symbols (.dynsym), for which the linker refuses
to allow more than 64k sections. This can break features which
use '-ffunction-sections' and '-fdata-sections', including
kpatch-build and function granular KASLR
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses
- Fix shared_cpu_list for CPU private L2 caches, which incorrectly were
reported as globally shared
* tag 's390-6.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (117 commits)
s390/tools: handle rela R_390_GOTPCDBL/R_390_GOTOFF64
s390/cache: prevent rebuild of shared_cpu_list
s390/crypto: remove retry loop with sleep from PAES pkey invocation
s390/pkey: improve pkey retry behavior
s390/zcrypt: improve zcrypt retry behavior
s390/zcrypt: introduce retries on in-kernel send CPRB functions
s390/ap: introduce mutex to lock the AP bus scan
s390/ap: rework ap_scan_bus() to return true on config change
s390/ap: clarify AP scan bus related functions and variables
s390/ap: rearm APQNs bindings complete completion
s390/configs: increase number of LOCKDEP_BITS
s390/vfio-ap: handle hardware checkstop state on queue reset operation
s390/pai: change sampling event assignment for PMU device driver
s390/boot: fix minor comment style damages
s390/boot: do not check for zero-termination relocation entry
s390/boot: make type of __vmlinux_relocs_64_start|end consistent
s390/boot: sanitize kaslr_adjust_relocs() function prototype
s390/boot: simplify GOT handling
s390: vmlinux.lds.S: fix .got.plt assertion
s390/boot: workaround current 'llvm-objdump -t -j ...' behavior
...
This commit is contained in:
@@ -127,6 +127,7 @@ config S390
|
||||
select ARCH_WANT_DEFAULT_BPF_JIT
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select ARCH_WANT_KERNEL_PMD_MKWRITE
|
||||
select ARCH_WANT_LD_ORPHAN_WARN
|
||||
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
|
||||
select BUILDTIME_TABLE_SORT
|
||||
select CLONE_BACKWARDS2
|
||||
@@ -448,7 +449,7 @@ config COMPAT
|
||||
select COMPAT_OLD_SIGACTION
|
||||
select HAVE_UID16
|
||||
depends on MULTIUSER
|
||||
depends on !CC_IS_CLANG
|
||||
depends on !CC_IS_CLANG && !LD_IS_LLD
|
||||
help
|
||||
Select this option if you want to enable your system kernel to
|
||||
handle system-calls from ELF binaries for 31 bit ESA. This option
|
||||
@@ -582,14 +583,23 @@ config RELOCATABLE
|
||||
help
|
||||
This builds a kernel image that retains relocation information
|
||||
so it can be loaded at an arbitrary address.
|
||||
The kernel is linked as a position-independent executable (PIE)
|
||||
and contains dynamic relocations which are processed early in the
|
||||
bootup process.
|
||||
The relocations make the kernel image about 15% larger (compressed
|
||||
10%), but are discarded at runtime.
|
||||
Note: this option exists only for documentation purposes, please do
|
||||
not remove it.
|
||||
|
||||
config PIE_BUILD
|
||||
def_bool CC_IS_CLANG && !$(cc-option,-munaligned-symbols)
|
||||
help
|
||||
If the compiler is unable to generate code that can manage unaligned
|
||||
symbols, the kernel is linked as a position-independent executable
|
||||
(PIE) and includes dynamic relocations that are processed early
|
||||
during bootup.
|
||||
|
||||
For kpatch functionality, it is recommended to build the kernel
|
||||
without the PIE_BUILD option. PIE_BUILD is only enabled when the
|
||||
compiler lacks proper support for handling unaligned symbols.
|
||||
|
||||
config RANDOMIZE_BASE
|
||||
bool "Randomize the address of the kernel image (KASLR)"
|
||||
default y
|
||||
|
||||
@@ -14,8 +14,14 @@ KBUILD_AFLAGS_MODULE += -fPIC
|
||||
KBUILD_CFLAGS_MODULE += -fPIC
|
||||
KBUILD_AFLAGS += -m64
|
||||
KBUILD_CFLAGS += -m64
|
||||
ifdef CONFIG_PIE_BUILD
|
||||
KBUILD_CFLAGS += -fPIE
|
||||
LDFLAGS_vmlinux := -pie
|
||||
LDFLAGS_vmlinux := -pie -z notext
|
||||
else
|
||||
KBUILD_CFLAGS += $(call cc-option,-munaligned-symbols,)
|
||||
LDFLAGS_vmlinux := --emit-relocs --discard-none
|
||||
extra_tools := relocs
|
||||
endif
|
||||
aflags_dwarf := -Wa,-gdwarf-2
|
||||
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
|
||||
ifndef CONFIG_AS_IS_LLVM
|
||||
@@ -143,7 +149,7 @@ archheaders:
|
||||
|
||||
archprepare:
|
||||
$(Q)$(MAKE) $(build)=$(syscalls) kapi
|
||||
$(Q)$(MAKE) $(build)=$(tools) kapi
|
||||
$(Q)$(MAKE) $(build)=$(tools) kapi $(extra_tools)
|
||||
ifeq ($(KBUILD_EXTMOD),)
|
||||
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
|
||||
# In order to do that, we should use the archprepare target, but we can't since
|
||||
|
||||
1
arch/s390/boot/.gitignore
vendored
1
arch/s390/boot/.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
image
|
||||
bzImage
|
||||
relocs.S
|
||||
section_cmp.*
|
||||
vmlinux
|
||||
vmlinux.lds
|
||||
|
||||
@@ -37,7 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
|
||||
|
||||
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
|
||||
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
|
||||
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
|
||||
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
|
||||
obj-y += $(if $(CONFIG_PIE_BUILD),machine_kexec_reloc.o,relocs.o)
|
||||
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
|
||||
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
||||
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
|
||||
@@ -48,6 +49,9 @@ targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y
|
||||
targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
|
||||
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
|
||||
targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
|
||||
ifndef CONFIG_PIE_BUILD
|
||||
targets += relocs.S
|
||||
endif
|
||||
|
||||
OBJECTS := $(addprefix $(obj)/,$(obj-y))
|
||||
OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
|
||||
@@ -56,9 +60,9 @@ clean-files += vmlinux.map
|
||||
|
||||
quiet_cmd_section_cmp = SECTCMP $*
|
||||
define cmd_section_cmp
|
||||
s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \
|
||||
s1=`$(OBJDUMP) -t "$<" | grep "\s$*\s\+" | sort | \
|
||||
sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
|
||||
s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \
|
||||
s2=`$(OBJDUMP) -t "$(word 2,$^)" | grep "\s$*\s\+" | sort | \
|
||||
sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
|
||||
if [ "$$s1" != "$$s2" ]; then \
|
||||
echo "error: section $* differs between $< and $(word 2,$^)" >&2; \
|
||||
@@ -73,11 +77,12 @@ $(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.b
|
||||
$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
|
||||
$(call if_changed,section_cmp)
|
||||
|
||||
LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
|
||||
LDFLAGS_vmlinux-$(CONFIG_LD_ORPHAN_WARN) := --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
|
||||
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
|
||||
$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
|
||||
LDFLAGS_vmlinux.syms := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup -T
|
||||
$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
@@ -93,7 +98,7 @@ OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .
|
||||
$(obj)/syms.o: $(obj)/syms.bin FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
|
||||
OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=alloc,load
|
||||
$(obj)/info.bin: vmlinux FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
@@ -105,6 +110,14 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
|
||||
$(obj)/vmlinux.bin: vmlinux FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
ifndef CONFIG_PIE_BUILD
|
||||
CMD_RELOCS=arch/s390/tools/relocs
|
||||
quiet_cmd_relocs = RELOCS $@
|
||||
cmd_relocs = $(CMD_RELOCS) $< > $@
|
||||
$(obj)/relocs.S: vmlinux FORCE
|
||||
$(call if_changed,relocs)
|
||||
endif
|
||||
|
||||
suffix-$(CONFIG_KERNEL_GZIP) := .gz
|
||||
suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
|
||||
suffix-$(CONFIG_KERNEL_LZ4) := .lz4
|
||||
|
||||
@@ -25,9 +25,14 @@ struct vmlinux_info {
|
||||
unsigned long bootdata_size;
|
||||
unsigned long bootdata_preserved_off;
|
||||
unsigned long bootdata_preserved_size;
|
||||
#ifdef CONFIG_PIE_BUILD
|
||||
unsigned long dynsym_start;
|
||||
unsigned long rela_dyn_start;
|
||||
unsigned long rela_dyn_end;
|
||||
#else
|
||||
unsigned long got_start;
|
||||
unsigned long got_end;
|
||||
#endif
|
||||
unsigned long amode31_size;
|
||||
unsigned long init_mm_off;
|
||||
unsigned long swapper_pg_dir_off;
|
||||
@@ -83,6 +88,7 @@ extern unsigned long vmalloc_size;
|
||||
extern int vmalloc_size_set;
|
||||
extern char __boot_data_start[], __boot_data_end[];
|
||||
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
|
||||
extern char __vmlinux_relocs_64_start[], __vmlinux_relocs_64_end[];
|
||||
extern char _decompressor_syms_start[], _decompressor_syms_end[];
|
||||
extern char _stack_start[], _stack_end[];
|
||||
extern char _end[], _decompressor_end[];
|
||||
|
||||
@@ -141,7 +141,8 @@ static void copy_bootdata(void)
|
||||
memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
|
||||
}
|
||||
|
||||
static void handle_relocs(unsigned long offset)
|
||||
#ifdef CONFIG_PIE_BUILD
|
||||
static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
|
||||
{
|
||||
Elf64_Rela *rela_start, *rela_end, *rela;
|
||||
int r_type, r_sym, rc;
|
||||
@@ -172,6 +173,54 @@ static void handle_relocs(unsigned long offset)
|
||||
}
|
||||
}
|
||||
|
||||
static void kaslr_adjust_got(unsigned long offset) {}
|
||||
static void rescue_relocs(void) {}
|
||||
static void free_relocs(void) {}
|
||||
#else
|
||||
static int *vmlinux_relocs_64_start;
|
||||
static int *vmlinux_relocs_64_end;
|
||||
|
||||
static void rescue_relocs(void)
|
||||
{
|
||||
unsigned long size = __vmlinux_relocs_64_end - __vmlinux_relocs_64_start;
|
||||
|
||||
vmlinux_relocs_64_start = (void *)physmem_alloc_top_down(RR_RELOC, size, 0);
|
||||
vmlinux_relocs_64_end = (void *)vmlinux_relocs_64_start + size;
|
||||
memmove(vmlinux_relocs_64_start, __vmlinux_relocs_64_start, size);
|
||||
}
|
||||
|
||||
static void free_relocs(void)
|
||||
{
|
||||
physmem_free(RR_RELOC);
|
||||
}
|
||||
|
||||
static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
|
||||
{
|
||||
int *reloc;
|
||||
long loc;
|
||||
|
||||
/* Adjust R_390_64 relocations */
|
||||
for (reloc = vmlinux_relocs_64_start; reloc < vmlinux_relocs_64_end; reloc++) {
|
||||
loc = (long)*reloc + offset;
|
||||
if (loc < min_addr || loc > max_addr)
|
||||
error("64-bit relocation outside of kernel!\n");
|
||||
*(u64 *)loc += offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void kaslr_adjust_got(unsigned long offset)
|
||||
{
|
||||
u64 *entry;
|
||||
|
||||
/*
|
||||
* Even without -fPIE, Clang still uses a global offset table for some
|
||||
* reason. Adjust the GOT entries.
|
||||
*/
|
||||
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
|
||||
*entry += offset;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Merge information from several sources into a single ident_map_size value.
|
||||
* "ident_map_size" represents the upper limit of physical memory we may ever
|
||||
@@ -299,14 +348,19 @@ static void setup_vmalloc_size(void)
|
||||
vmalloc_size = max(size, vmalloc_size);
|
||||
}
|
||||
|
||||
static void offset_vmlinux_info(unsigned long offset)
|
||||
static void kaslr_adjust_vmlinux_info(unsigned long offset)
|
||||
{
|
||||
*(unsigned long *)(&vmlinux.entry) += offset;
|
||||
vmlinux.bootdata_off += offset;
|
||||
vmlinux.bootdata_preserved_off += offset;
|
||||
#ifdef CONFIG_PIE_BUILD
|
||||
vmlinux.rela_dyn_start += offset;
|
||||
vmlinux.rela_dyn_end += offset;
|
||||
vmlinux.dynsym_start += offset;
|
||||
#else
|
||||
vmlinux.got_start += offset;
|
||||
vmlinux.got_end += offset;
|
||||
#endif
|
||||
vmlinux.init_mm_off += offset;
|
||||
vmlinux.swapper_pg_dir_off += offset;
|
||||
vmlinux.invalid_pg_dir_off += offset;
|
||||
@@ -361,6 +415,7 @@ void startup_kernel(void)
|
||||
detect_physmem_online_ranges(max_physmem_end);
|
||||
save_ipl_cert_comp_list();
|
||||
rescue_initrd(safe_addr, ident_map_size);
|
||||
rescue_relocs();
|
||||
|
||||
if (kaslr_enabled()) {
|
||||
vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size,
|
||||
@@ -368,7 +423,7 @@ void startup_kernel(void)
|
||||
ident_map_size);
|
||||
if (vmlinux_lma) {
|
||||
__kaslr_offset = vmlinux_lma - vmlinux.default_lma;
|
||||
offset_vmlinux_info(__kaslr_offset);
|
||||
kaslr_adjust_vmlinux_info(__kaslr_offset);
|
||||
}
|
||||
}
|
||||
vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
|
||||
@@ -393,18 +448,20 @@ void startup_kernel(void)
|
||||
/*
|
||||
* The order of the following operations is important:
|
||||
*
|
||||
* - handle_relocs() must follow clear_bss_section() to establish static
|
||||
* memory references to data in .bss to be used by setup_vmem()
|
||||
* - kaslr_adjust_relocs() must follow clear_bss_section() to establish
|
||||
* static memory references to data in .bss to be used by setup_vmem()
|
||||
* (i.e init_mm.pgd)
|
||||
*
|
||||
* - setup_vmem() must follow handle_relocs() to be able using
|
||||
* - setup_vmem() must follow kaslr_adjust_relocs() to be able using
|
||||
* static memory references to data in .bss (i.e init_mm.pgd)
|
||||
*
|
||||
* - copy_bootdata() must follow setup_vmem() to propagate changes to
|
||||
* bootdata made by setup_vmem()
|
||||
* - copy_bootdata() must follow setup_vmem() to propagate changes
|
||||
* to bootdata made by setup_vmem()
|
||||
*/
|
||||
clear_bss_section(vmlinux_lma);
|
||||
handle_relocs(__kaslr_offset);
|
||||
kaslr_adjust_relocs(vmlinux_lma, vmlinux_lma + vmlinux.image_size, __kaslr_offset);
|
||||
kaslr_adjust_got(__kaslr_offset);
|
||||
free_relocs();
|
||||
setup_vmem(asce_limit);
|
||||
copy_bootdata();
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ SECTIONS
|
||||
_text = .; /* Text */
|
||||
*(.text)
|
||||
*(.text.*)
|
||||
INIT_TEXT
|
||||
_etext = . ;
|
||||
}
|
||||
.rodata : {
|
||||
@@ -39,6 +40,9 @@ SECTIONS
|
||||
*(.rodata.*)
|
||||
_erodata = . ;
|
||||
}
|
||||
.got : {
|
||||
*(.got)
|
||||
}
|
||||
NOTES
|
||||
.data : {
|
||||
_data = . ;
|
||||
@@ -106,6 +110,24 @@ SECTIONS
|
||||
_compressed_end = .;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_PIE_BUILD
|
||||
/*
|
||||
* When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
|
||||
* uncompressed vmlinux.bin is positioned in the bzImage decompressor
|
||||
* image at the default kernel LMA of 0x100000, enabling it to be
|
||||
* executed in-place. However, the size of .vmlinux.relocs could be
|
||||
* large enough to cause an overlap with the uncompressed kernel at the
|
||||
* address 0x100000. To address this issue, .vmlinux.relocs is
|
||||
* positioned after the .rodata.compressed.
|
||||
*/
|
||||
. = ALIGN(4);
|
||||
.vmlinux.relocs : {
|
||||
__vmlinux_relocs_64_start = .;
|
||||
*(.vmlinux.relocs_64)
|
||||
__vmlinux_relocs_64_end = .;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SB_TRAILER_SIZE 32
|
||||
/* Trailer needed for Secure Boot */
|
||||
. += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
|
||||
@@ -118,8 +140,34 @@ SECTIONS
|
||||
}
|
||||
_end = .;
|
||||
|
||||
DWARF_DEBUG
|
||||
ELF_DETAILS
|
||||
|
||||
/*
|
||||
* Make sure that the .got.plt is either completely empty or it
|
||||
* contains only the three reserved double words.
|
||||
*/
|
||||
.got.plt : {
|
||||
*(.got.plt)
|
||||
}
|
||||
ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
|
||||
|
||||
/*
|
||||
* Sections that should stay zero sized, which is safer to
|
||||
* explicitly check instead of blindly discarding.
|
||||
*/
|
||||
.plt : {
|
||||
*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
|
||||
}
|
||||
ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
|
||||
.rela.dyn : {
|
||||
*(.rela.*) *(.rela_*)
|
||||
}
|
||||
ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
|
||||
|
||||
/* Sections to be discarded */
|
||||
/DISCARD/ : {
|
||||
COMMON_DISCARDS
|
||||
*(.eh_frame)
|
||||
*(__ex_table)
|
||||
*(*__ksymtab*)
|
||||
|
||||
@@ -824,6 +824,8 @@ CONFIG_TEST_LOCKUP=m
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
CONFIG_PROVE_LOCKING=y
|
||||
CONFIG_LOCK_STAT=y
|
||||
CONFIG_LOCKDEP_BITS=16
|
||||
CONFIG_LOCKDEP_CHAINS_BITS=17
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP=y
|
||||
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
|
||||
CONFIG_DEBUG_IRQFLAGS=y
|
||||
|
||||
@@ -15,14 +15,14 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu.h>
|
||||
#include "chacha-s390.h"
|
||||
|
||||
static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int nbytes, const u32 *key,
|
||||
u32 *counter)
|
||||
{
|
||||
struct kernel_fpu vxstate;
|
||||
DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
|
||||
|
||||
kernel_fpu_begin(&vxstate, KERNEL_VXR);
|
||||
chacha20_vx(dst, src, nbytes, key, counter);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/nospec-insn.h>
|
||||
#include <asm/vx-insn.h>
|
||||
#include <asm/fpu-insn.h>
|
||||
|
||||
#define SP %r15
|
||||
#define FRAME (16 * 8 + 4 * 8)
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
#include <asm/fpu.h>
|
||||
#include "crc32-vx.h"
|
||||
|
||||
#define CRC32_BLOCK_SIZE 1
|
||||
#define CRC32_DIGEST_SIZE 4
|
||||
@@ -31,11 +31,6 @@ struct crc_desc_ctx {
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
/* Prototypes for functions in assembly files */
|
||||
u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
|
||||
/*
|
||||
* DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
|
||||
*
|
||||
@@ -49,8 +44,8 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
static u32 __pure ___fname(u32 crc, \
|
||||
unsigned char const *data, size_t datalen) \
|
||||
{ \
|
||||
struct kernel_fpu vxstate; \
|
||||
unsigned long prealign, aligned, remaining; \
|
||||
DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
|
||||
\
|
||||
if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
|
||||
return ___crc32_sw(crc, data, datalen); \
|
||||
|
||||
12
arch/s390/crypto/crc32-vx.h
Normal file
12
arch/s390/crypto/crc32-vx.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _CRC32_VX_S390_H
|
||||
#define _CRC32_VX_S390_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
|
||||
|
||||
#endif /* _CRC32_VX_S390_H */
|
||||
@@ -12,20 +12,17 @@
|
||||
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/nospec-insn.h>
|
||||
#include <asm/vx-insn.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/fpu.h>
|
||||
#include "crc32-vx.h"
|
||||
|
||||
/* Vector register range containing CRC-32 constants */
|
||||
#define CONST_R1R2 %v9
|
||||
#define CONST_R3R4 %v10
|
||||
#define CONST_R5 %v11
|
||||
#define CONST_R6 %v12
|
||||
#define CONST_RU_POLY %v13
|
||||
#define CONST_CRC_POLY %v14
|
||||
|
||||
.data
|
||||
.balign 8
|
||||
#define CONST_R1R2 9
|
||||
#define CONST_R3R4 10
|
||||
#define CONST_R5 11
|
||||
#define CONST_R6 12
|
||||
#define CONST_RU_POLY 13
|
||||
#define CONST_CRC_POLY 14
|
||||
|
||||
/*
|
||||
* The CRC-32 constant block contains reduction constants to fold and
|
||||
@@ -58,105 +55,74 @@
|
||||
* P'(x) = 0xEDB88320
|
||||
*/
|
||||
|
||||
SYM_DATA_START_LOCAL(constants_CRC_32_BE)
|
||||
.quad 0x08833794c, 0x0e6228b11 # R1, R2
|
||||
.quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4
|
||||
.quad 0x0f200aa66, 1 << 32 # R5, x32
|
||||
.quad 0x0490d678d, 1 # R6, 1
|
||||
.quad 0x104d101df, 0 # u
|
||||
.quad 0x104C11DB7, 0 # P(x)
|
||||
SYM_DATA_END(constants_CRC_32_BE)
|
||||
static unsigned long constants_CRC_32_BE[] = {
|
||||
0x08833794c, 0x0e6228b11, /* R1, R2 */
|
||||
0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */
|
||||
0x0f200aa66, 1UL << 32, /* R5, x32 */
|
||||
0x0490d678d, 1, /* R6, 1 */
|
||||
0x104d101df, 0, /* u */
|
||||
0x104C11DB7, 0, /* P(x) */
|
||||
};
|
||||
|
||||
.previous
|
||||
|
||||
GEN_BR_THUNK %r14
|
||||
|
||||
.text
|
||||
/*
|
||||
* The CRC-32 function(s) use these calling conventions:
|
||||
*
|
||||
* Parameters:
|
||||
*
|
||||
* %r2: Initial CRC value, typically ~0; and final CRC (return) value.
|
||||
* %r3: Input buffer pointer, performance might be improved if the
|
||||
* buffer is on a doubleword boundary.
|
||||
* %r4: Length of the buffer, must be 64 bytes or greater.
|
||||
/**
|
||||
* crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
|
||||
* @crc: Initial CRC value, typically ~0.
|
||||
* @buf: Input buffer pointer, performance might be improved if the
|
||||
* buffer is on a doubleword boundary.
|
||||
* @size: Size of the buffer, must be 64 bytes or greater.
|
||||
*
|
||||
* Register usage:
|
||||
*
|
||||
* %r5: CRC-32 constant pool base pointer.
|
||||
* V0: Initial CRC value and intermediate constants and results.
|
||||
* V1..V4: Data for CRC computation.
|
||||
* V5..V8: Next data chunks that are fetched from the input buffer.
|
||||
*
|
||||
* V9..V14: CRC-32 constants.
|
||||
*/
|
||||
SYM_FUNC_START(crc32_be_vgfm_16)
|
||||
u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
|
||||
{
|
||||
/* Load CRC-32 constants */
|
||||
larl %r5,constants_CRC_32_BE
|
||||
VLM CONST_R1R2,CONST_CRC_POLY,0,%r5
|
||||
fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
|
||||
fpu_vzero(0);
|
||||
|
||||
/* Load the initial CRC value into the leftmost word of V0. */
|
||||
VZERO %v0
|
||||
VLVGF %v0,%r2,0
|
||||
fpu_vlvgf(0, crc, 0);
|
||||
|
||||
/* Load a 64-byte data chunk and XOR with CRC */
|
||||
VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
|
||||
VX %v1,%v0,%v1 /* V1 ^= CRC */
|
||||
aghi %r3,64 /* BUF = BUF + 64 */
|
||||
aghi %r4,-64 /* LEN = LEN - 64 */
|
||||
fpu_vlm(1, 4, buf);
|
||||
fpu_vx(1, 0, 1);
|
||||
buf += 64;
|
||||
size -= 64;
|
||||
|
||||
/* Check remaining buffer size and jump to proper folding method */
|
||||
cghi %r4,64
|
||||
jl .Lless_than_64bytes
|
||||
while (size >= 64) {
|
||||
/* Load the next 64-byte data chunk into V5 to V8 */
|
||||
fpu_vlm(5, 8, buf);
|
||||
|
||||
.Lfold_64bytes_loop:
|
||||
/* Load the next 64-byte data chunk into V5 to V8 */
|
||||
VLM %v5,%v8,0,%r3
|
||||
/*
|
||||
* Perform a GF(2) multiplication of the doublewords in V1 with
|
||||
* the reduction constants in V0. The intermediate result is
|
||||
* then folded (accumulated) with the next data chunk in V5 and
|
||||
* stored in V1. Repeat this step for the register contents
|
||||
* in V2, V3, and V4 respectively.
|
||||
*/
|
||||
fpu_vgfmag(1, CONST_R1R2, 1, 5);
|
||||
fpu_vgfmag(2, CONST_R1R2, 2, 6);
|
||||
fpu_vgfmag(3, CONST_R1R2, 3, 7);
|
||||
fpu_vgfmag(4, CONST_R1R2, 4, 8);
|
||||
buf += 64;
|
||||
size -= 64;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a GF(2) multiplication of the doublewords in V1 with
|
||||
* the reduction constants in V0. The intermediate result is
|
||||
* then folded (accumulated) with the next data chunk in V5 and
|
||||
* stored in V1. Repeat this step for the register contents
|
||||
* in V2, V3, and V4 respectively.
|
||||
*/
|
||||
VGFMAG %v1,CONST_R1R2,%v1,%v5
|
||||
VGFMAG %v2,CONST_R1R2,%v2,%v6
|
||||
VGFMAG %v3,CONST_R1R2,%v3,%v7
|
||||
VGFMAG %v4,CONST_R1R2,%v4,%v8
|
||||
|
||||
/* Adjust buffer pointer and length for next loop */
|
||||
aghi %r3,64 /* BUF = BUF + 64 */
|
||||
aghi %r4,-64 /* LEN = LEN - 64 */
|
||||
|
||||
cghi %r4,64
|
||||
jnl .Lfold_64bytes_loop
|
||||
|
||||
.Lless_than_64bytes:
|
||||
/* Fold V1 to V4 into a single 128-bit value in V1 */
|
||||
VGFMAG %v1,CONST_R3R4,%v1,%v2
|
||||
VGFMAG %v1,CONST_R3R4,%v1,%v3
|
||||
VGFMAG %v1,CONST_R3R4,%v1,%v4
|
||||
fpu_vgfmag(1, CONST_R3R4, 1, 2);
|
||||
fpu_vgfmag(1, CONST_R3R4, 1, 3);
|
||||
fpu_vgfmag(1, CONST_R3R4, 1, 4);
|
||||
|
||||
/* Check whether to continue with 64-bit folding */
|
||||
cghi %r4,16
|
||||
jl .Lfinal_fold
|
||||
while (size >= 16) {
|
||||
fpu_vl(2, buf);
|
||||
fpu_vgfmag(1, CONST_R3R4, 1, 2);
|
||||
buf += 16;
|
||||
size -= 16;
|
||||
}
|
||||
|
||||
.Lfold_16bytes_loop:
|
||||
|
||||
VL %v2,0,,%r3 /* Load next data chunk */
|
||||
VGFMAG %v1,CONST_R3R4,%v1,%v2 /* Fold next data chunk */
|
||||
|
||||
/* Adjust buffer pointer and size for folding next data chunk */
|
||||
aghi %r3,16
|
||||
aghi %r4,-16
|
||||
|
||||
/* Process remaining data chunks */
|
||||
cghi %r4,16
|
||||
jnl .Lfold_16bytes_loop
|
||||
|
||||
.Lfinal_fold:
|
||||
/*
|
||||
* The R5 constant is used to fold a 128-bit value into an 96-bit value
|
||||
* that is XORed with the next 96-bit input data chunk. To use a single
|
||||
@@ -164,7 +130,7 @@ SYM_FUNC_START(crc32_be_vgfm_16)
|
||||
* form an intermediate 96-bit value (with appended zeros) which is then
|
||||
* XORed with the intermediate reduction result.
|
||||
*/
|
||||
VGFMG %v1,CONST_R5,%v1
|
||||
fpu_vgfmg(1, CONST_R5, 1);
|
||||
|
||||
/*
|
||||
* Further reduce the remaining 96-bit value to a 64-bit value using a
|
||||
@@ -173,7 +139,7 @@ SYM_FUNC_START(crc32_be_vgfm_16)
|
||||
* doubleword with R6. The result is a 64-bit value and is subject to
|
||||
* the Barret reduction.
|
||||
*/
|
||||
VGFMG %v1,CONST_R6,%v1
|
||||
fpu_vgfmg(1, CONST_R6, 1);
|
||||
|
||||
/*
|
||||
* The input values to the Barret reduction are the degree-63 polynomial
|
||||
@@ -194,20 +160,15 @@ SYM_FUNC_START(crc32_be_vgfm_16)
|
||||
*/
|
||||
|
||||
/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
|
||||
VUPLLF %v2,%v1
|
||||
VGFMG %v2,CONST_RU_POLY,%v2
|
||||
fpu_vupllf(2, 1);
|
||||
fpu_vgfmg(2, CONST_RU_POLY, 2);
|
||||
|
||||
/*
|
||||
* Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
|
||||
* V2 and XOR the intermediate result, T2(x), with the value in V1.
|
||||
* The final result is in the rightmost word of V2.
|
||||
*/
|
||||
VUPLLF %v2,%v2
|
||||
VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
|
||||
|
||||
.Ldone:
|
||||
VLGVF %r2,%v2,3
|
||||
BR_EX %r14
|
||||
SYM_FUNC_END(crc32_be_vgfm_16)
|
||||
|
||||
.previous
|
||||
fpu_vupllf(2, 2);
|
||||
fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
|
||||
return fpu_vlgvf(2, 3);
|
||||
}
|
||||
@@ -13,20 +13,17 @@
|
||||
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/nospec-insn.h>
|
||||
#include <asm/vx-insn.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/fpu.h>
|
||||
#include "crc32-vx.h"
|
||||
|
||||
/* Vector register range containing CRC-32 constants */
|
||||
#define CONST_PERM_LE2BE %v9
|
||||
#define CONST_R2R1 %v10
|
||||
#define CONST_R4R3 %v11
|
||||
#define CONST_R5 %v12
|
||||
#define CONST_RU_POLY %v13
|
||||
#define CONST_CRC_POLY %v14
|
||||
|
||||
.data
|
||||
.balign 8
|
||||
#define CONST_PERM_LE2BE 9
|
||||
#define CONST_R2R1 10
|
||||
#define CONST_R4R3 11
|
||||
#define CONST_R5 12
|
||||
#define CONST_RU_POLY 13
|
||||
#define CONST_CRC_POLY 14
|
||||
|
||||
/*
|
||||
* The CRC-32 constant block contains reduction constants to fold and
|
||||
@@ -59,64 +56,43 @@
|
||||
* P'(x) = 0x82F63B78
|
||||
*/
|
||||
|
||||
SYM_DATA_START_LOCAL(constants_CRC_32_LE)
|
||||
.octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
|
||||
.quad 0x1c6e41596, 0x154442bd4 # R2, R1
|
||||
.quad 0x0ccaa009e, 0x1751997d0 # R4, R3
|
||||
.octa 0x163cd6124 # R5
|
||||
.octa 0x1F7011641 # u'
|
||||
.octa 0x1DB710641 # P'(x) << 1
|
||||
SYM_DATA_END(constants_CRC_32_LE)
|
||||
static unsigned long constants_CRC_32_LE[] = {
|
||||
0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
|
||||
0x1c6e41596, 0x154442bd4, /* R2, R1 */
|
||||
0x0ccaa009e, 0x1751997d0, /* R4, R3 */
|
||||
0x0, 0x163cd6124, /* R5 */
|
||||
0x0, 0x1f7011641, /* u' */
|
||||
0x0, 0x1db710641 /* P'(x) << 1 */
|
||||
};
|
||||
|
||||
SYM_DATA_START_LOCAL(constants_CRC_32C_LE)
|
||||
.octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
|
||||
.quad 0x09e4addf8, 0x740eef02 # R2, R1
|
||||
.quad 0x14cd00bd6, 0xf20c0dfe # R4, R3
|
||||
.octa 0x0dd45aab8 # R5
|
||||
.octa 0x0dea713f1 # u'
|
||||
.octa 0x105ec76f0 # P'(x) << 1
|
||||
SYM_DATA_END(constants_CRC_32C_LE)
|
||||
static unsigned long constants_CRC_32C_LE[] = {
|
||||
0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
|
||||
0x09e4addf8, 0x740eef02, /* R2, R1 */
|
||||
0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */
|
||||
0x0, 0x0dd45aab8, /* R5 */
|
||||
0x0, 0x0dea713f1, /* u' */
|
||||
0x0, 0x105ec76f0 /* P'(x) << 1 */
|
||||
};
|
||||
|
||||
.previous
|
||||
|
||||
GEN_BR_THUNK %r14
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* The CRC-32 functions use these calling conventions:
|
||||
*
|
||||
* Parameters:
|
||||
*
|
||||
* %r2: Initial CRC value, typically ~0; and final CRC (return) value.
|
||||
* %r3: Input buffer pointer, performance might be improved if the
|
||||
* buffer is on a doubleword boundary.
|
||||
* %r4: Length of the buffer, must be 64 bytes or greater.
|
||||
/**
|
||||
* crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
|
||||
* @crc: Initial CRC value, typically ~0.
|
||||
* @buf: Input buffer pointer, performance might be improved if the
|
||||
* buffer is on a doubleword boundary.
|
||||
* @size: Size of the buffer, must be 64 bytes or greater.
|
||||
* @constants: CRC-32 constant pool base pointer.
|
||||
*
|
||||
* Register usage:
|
||||
*
|
||||
* %r5: CRC-32 constant pool base pointer.
|
||||
* V0: Initial CRC value and intermediate constants and results.
|
||||
* V1..V4: Data for CRC computation.
|
||||
* V5..V8: Next data chunks that are fetched from the input buffer.
|
||||
* V9: Constant for BE->LE conversion and shift operations
|
||||
*
|
||||
* V0: Initial CRC value and intermediate constants and results.
|
||||
* V1..V4: Data for CRC computation.
|
||||
* V5..V8: Next data chunks that are fetched from the input buffer.
|
||||
* V9: Constant for BE->LE conversion and shift operations
|
||||
* V10..V14: CRC-32 constants.
|
||||
*/
|
||||
|
||||
SYM_FUNC_START(crc32_le_vgfm_16)
|
||||
larl %r5,constants_CRC_32_LE
|
||||
j crc32_le_vgfm_generic
|
||||
SYM_FUNC_END(crc32_le_vgfm_16)
|
||||
|
||||
SYM_FUNC_START(crc32c_le_vgfm_16)
|
||||
larl %r5,constants_CRC_32C_LE
|
||||
j crc32_le_vgfm_generic
|
||||
SYM_FUNC_END(crc32c_le_vgfm_16)
|
||||
|
||||
SYM_FUNC_START(crc32_le_vgfm_generic)
|
||||
static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
|
||||
{
|
||||
/* Load CRC-32 constants */
|
||||
VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
|
||||
fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
|
||||
|
||||
/*
|
||||
* Load the initial CRC value.
|
||||
@@ -125,90 +101,73 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
|
||||
* vector register and is later XORed with the LSB portion
|
||||
* of the loaded input data.
|
||||
*/
|
||||
VZERO %v0 /* Clear V0 */
|
||||
VLVGF %v0,%r2,3 /* Load CRC into rightmost word */
|
||||
fpu_vzero(0); /* Clear V0 */
|
||||
fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */
|
||||
|
||||
/* Load a 64-byte data chunk and XOR with CRC */
|
||||
VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
|
||||
VPERM %v1,%v1,%v1,CONST_PERM_LE2BE
|
||||
VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
|
||||
VPERM %v3,%v3,%v3,CONST_PERM_LE2BE
|
||||
VPERM %v4,%v4,%v4,CONST_PERM_LE2BE
|
||||
fpu_vlm(1, 4, buf);
|
||||
fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
|
||||
fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
|
||||
fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
|
||||
fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
|
||||
|
||||
VX %v1,%v0,%v1 /* V1 ^= CRC */
|
||||
aghi %r3,64 /* BUF = BUF + 64 */
|
||||
aghi %r4,-64 /* LEN = LEN - 64 */
|
||||
fpu_vx(1, 0, 1); /* V1 ^= CRC */
|
||||
buf += 64;
|
||||
size -= 64;
|
||||
|
||||
cghi %r4,64
|
||||
jl .Lless_than_64bytes
|
||||
while (size >= 64) {
|
||||
fpu_vlm(5, 8, buf);
|
||||
fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
|
||||
fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
|
||||
fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
|
||||
fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
|
||||
/*
|
||||
* Perform a GF(2) multiplication of the doublewords in V1 with
|
||||
* the R1 and R2 reduction constants in V0. The intermediate
|
||||
* result is then folded (accumulated) with the next data chunk
|
||||
* in V5 and stored in V1. Repeat this step for the register
|
||||
* contents in V2, V3, and V4 respectively.
|
||||
*/
|
||||
fpu_vgfmag(1, CONST_R2R1, 1, 5);
|
||||
fpu_vgfmag(2, CONST_R2R1, 2, 6);
|
||||
fpu_vgfmag(3, CONST_R2R1, 3, 7);
|
||||
fpu_vgfmag(4, CONST_R2R1, 4, 8);
|
||||
buf += 64;
|
||||
size -= 64;
|
||||
}
|
||||
|
||||
.Lfold_64bytes_loop:
|
||||
/* Load the next 64-byte data chunk into V5 to V8 */
|
||||
VLM %v5,%v8,0,%r3
|
||||
VPERM %v5,%v5,%v5,CONST_PERM_LE2BE
|
||||
VPERM %v6,%v6,%v6,CONST_PERM_LE2BE
|
||||
VPERM %v7,%v7,%v7,CONST_PERM_LE2BE
|
||||
VPERM %v8,%v8,%v8,CONST_PERM_LE2BE
|
||||
|
||||
/*
|
||||
* Perform a GF(2) multiplication of the doublewords in V1 with
|
||||
* the R1 and R2 reduction constants in V0. The intermediate result
|
||||
* is then folded (accumulated) with the next data chunk in V5 and
|
||||
* stored in V1. Repeat this step for the register contents
|
||||
* in V2, V3, and V4 respectively.
|
||||
*/
|
||||
VGFMAG %v1,CONST_R2R1,%v1,%v5
|
||||
VGFMAG %v2,CONST_R2R1,%v2,%v6
|
||||
VGFMAG %v3,CONST_R2R1,%v3,%v7
|
||||
VGFMAG %v4,CONST_R2R1,%v4,%v8
|
||||
|
||||
aghi %r3,64 /* BUF = BUF + 64 */
|
||||
aghi %r4,-64 /* LEN = LEN - 64 */
|
||||
|
||||
cghi %r4,64
|
||||
jnl .Lfold_64bytes_loop
|
||||
|
||||
.Lless_than_64bytes:
|
||||
/*
|
||||
* Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
|
||||
* and R4 and accumulating the next 128-bit chunk until a single 128-bit
|
||||
* value remains.
|
||||
*/
|
||||
VGFMAG %v1,CONST_R4R3,%v1,%v2
|
||||
VGFMAG %v1,CONST_R4R3,%v1,%v3
|
||||
VGFMAG %v1,CONST_R4R3,%v1,%v4
|
||||
fpu_vgfmag(1, CONST_R4R3, 1, 2);
|
||||
fpu_vgfmag(1, CONST_R4R3, 1, 3);
|
||||
fpu_vgfmag(1, CONST_R4R3, 1, 4);
|
||||
|
||||
cghi %r4,16
|
||||
jl .Lfinal_fold
|
||||
while (size >= 16) {
|
||||
fpu_vl(2, buf);
|
||||
fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
|
||||
fpu_vgfmag(1, CONST_R4R3, 1, 2);
|
||||
buf += 16;
|
||||
size -= 16;
|
||||
}
|
||||
|
||||
.Lfold_16bytes_loop:
|
||||
|
||||
VL %v2,0,,%r3 /* Load next data chunk */
|
||||
VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
|
||||
VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */
|
||||
|
||||
aghi %r3,16
|
||||
aghi %r4,-16
|
||||
|
||||
cghi %r4,16
|
||||
jnl .Lfold_16bytes_loop
|
||||
|
||||
.Lfinal_fold:
|
||||
/*
|
||||
* Set up a vector register for byte shifts. The shift value must
|
||||
* be loaded in bits 1-4 in byte element 7 of a vector register.
|
||||
* Shift by 8 bytes: 0x40
|
||||
* Shift by 4 bytes: 0x20
|
||||
*/
|
||||
VLEIB %v9,0x40,7
|
||||
fpu_vleib(9, 0x40, 7);
|
||||
|
||||
/*
|
||||
* Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
|
||||
* to move R4 into the rightmost doubleword and set the leftmost
|
||||
* doubleword to 0x1.
|
||||
*/
|
||||
VSRLB %v0,CONST_R4R3,%v9
|
||||
VLEIG %v0,1,0
|
||||
fpu_vsrlb(0, CONST_R4R3, 9);
|
||||
fpu_vleig(0, 1, 0);
|
||||
|
||||
/*
|
||||
* Compute GF(2) product of V1 and V0. The rightmost doubleword
|
||||
@@ -216,7 +175,7 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
|
||||
* multiplied by 0x1 and is then XORed with rightmost product.
|
||||
* Implicitly, the intermediate leftmost product becomes padded
|
||||
*/
|
||||
VGFMG %v1,%v0,%v1
|
||||
fpu_vgfmg(1, 0, 1);
|
||||
|
||||
/*
|
||||
* Now do the final 32-bit fold by multiplying the rightmost word
|
||||
@@ -231,10 +190,10 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
|
||||
* rightmost doubleword and the leftmost doubleword is zero to ignore
|
||||
* the leftmost product of V1.
|
||||
*/
|
||||
VLEIB %v9,0x20,7 /* Shift by words */
|
||||
VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */
|
||||
VUPLLF %v1,%v1 /* Split rightmost doubleword */
|
||||
VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */
|
||||
fpu_vleib(9, 0x20, 7); /* Shift by words */
|
||||
fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */
|
||||
fpu_vupllf(1, 1); /* Split rightmost doubleword */
|
||||
fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */
|
||||
|
||||
/*
|
||||
* Apply a Barret reduction to compute the final 32-bit CRC value.
|
||||
@@ -256,20 +215,26 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
|
||||
*/
|
||||
|
||||
/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
|
||||
VUPLLF %v2,%v1
|
||||
VGFMG %v2,CONST_RU_POLY,%v2
|
||||
fpu_vupllf(2, 1);
|
||||
fpu_vgfmg(2, CONST_RU_POLY, 2);
|
||||
|
||||
/*
|
||||
* Compute the GF(2) product of the CRC polynomial with T1(x) in
|
||||
* V2 and XOR the intermediate result, T2(x), with the value in V1.
|
||||
* The final result is stored in word element 2 of V2.
|
||||
*/
|
||||
VUPLLF %v2,%v2
|
||||
VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
|
||||
fpu_vupllf(2, 2);
|
||||
fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
|
||||
|
||||
.Ldone:
|
||||
VLGVF %r2,%v2,2
|
||||
BR_EX %r14
|
||||
SYM_FUNC_END(crc32_le_vgfm_generic)
|
||||
return fpu_vlgvf(2, 2);
|
||||
}
|
||||
|
||||
.previous
|
||||
u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
|
||||
{
|
||||
return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
|
||||
}
|
||||
|
||||
u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
|
||||
{
|
||||
return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
|
||||
}
|
||||
@@ -125,20 +125,8 @@ struct s390_pxts_ctx {
|
||||
static inline int __paes_keyblob2pkey(struct key_blob *kb,
|
||||
struct pkey_protkey *pk)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
/* try three times in case of failure */
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (i > 0 && ret == -EAGAIN && in_task())
|
||||
if (msleep_interruptible(1000))
|
||||
return -EINTR;
|
||||
ret = pkey_keyblob2pkey(kb->key, kb->keylen,
|
||||
pk->protkey, &pk->len, &pk->type);
|
||||
if (ret == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return pkey_keyblob2pkey(kb->key, kb->keylen,
|
||||
pk->protkey, &pk->len, &pk->type);
|
||||
}
|
||||
|
||||
static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
|
||||
|
||||
@@ -20,8 +20,7 @@
|
||||
*/
|
||||
static void diag0c_fn(void *data)
|
||||
{
|
||||
diag_stat_inc(DIAG_STAT_X00C);
|
||||
diag_amode31_ops.diag0c(((void **)data)[smp_processor_id()]);
|
||||
diag0c(((void **)data)[smp_processor_id()]);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
|
||||
{
|
||||
union register_pair r1 = { .even = (unsigned long)data, };
|
||||
union register_pair r1 = { .even = virt_to_phys(data), };
|
||||
|
||||
asm volatile("diag %[r1],%[r3],0x304\n"
|
||||
: [r1] "+&d" (r1.pair)
|
||||
@@ -74,7 +74,7 @@ static int __hypfs_sprp_ioctl(void __user *user_area)
|
||||
int rc;
|
||||
|
||||
rc = -ENOMEM;
|
||||
data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
|
||||
data = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
diag304 = kzalloc(sizeof(*diag304), GFP_KERNEL);
|
||||
if (!data || !diag304)
|
||||
goto out;
|
||||
|
||||
38
arch/s390/include/asm/access-regs.h
Normal file
38
arch/s390/include/asm/access-regs.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright IBM Corp. 1999, 2024
|
||||
*/
|
||||
|
||||
#ifndef __ASM_S390_ACCESS_REGS_H
|
||||
#define __ASM_S390_ACCESS_REGS_H
|
||||
|
||||
#include <linux/instrumented.h>
|
||||
#include <asm/sigcontext.h>
|
||||
|
||||
struct access_regs {
|
||||
unsigned int regs[NUM_ACRS];
|
||||
};
|
||||
|
||||
static inline void save_access_regs(unsigned int *acrs)
|
||||
{
|
||||
struct access_regs *regs = (struct access_regs *)acrs;
|
||||
|
||||
instrument_write(regs, sizeof(*regs));
|
||||
asm volatile("stamy 0,15,%[regs]"
|
||||
: [regs] "=QS" (*regs)
|
||||
:
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline void restore_access_regs(unsigned int *acrs)
|
||||
{
|
||||
struct access_regs *regs = (struct access_regs *)acrs;
|
||||
|
||||
instrument_read(regs, sizeof(*regs));
|
||||
asm volatile("lamy 0,15,%[regs]"
|
||||
:
|
||||
: [regs] "QS" (*regs)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif /* __ASM_S390_ACCESS_REGS_H */
|
||||
@@ -54,13 +54,13 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list,
|
||||
parm_list->function = fn;
|
||||
parm_list->parlist_length = sizeof(*parm_list);
|
||||
parm_list->buffer_length = length;
|
||||
parm_list->product_id_addr = (unsigned long) id;
|
||||
parm_list->product_id_addr = virt_to_phys(id);
|
||||
parm_list->buffer_addr = virt_to_phys(buffer);
|
||||
diag_stat_inc(DIAG_STAT_X0DC);
|
||||
asm volatile(
|
||||
" diag %1,%0,0xdc"
|
||||
: "=d" (ry)
|
||||
: "d" (parm_list), "m" (*parm_list), "m" (*id)
|
||||
: "d" (virt_to_phys(parm_list)), "m" (*parm_list), "m" (*id)
|
||||
: "cc");
|
||||
return ry;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu.h>
|
||||
#include <asm-generic/asm-prototypes.h>
|
||||
|
||||
__int128_t __ashlti3(__int128_t a, int b);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user