You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.205
Former-commit-id: 7f59f7e792705db773f1caecdaa823092f4e2927
This commit is contained in:
parent
5cd5df71cc
commit
8e12397d70
27
external/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
vendored
Normal file
27
external/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
; CHECK-LABEL: @correct_order(
|
||||
; CHECK: [[LOAD_PTR:%[0-9]+]] = bitcast i32* %next.gep1
|
||||
; CHECK: load <2 x i32>, <2 x i32>* [[LOAD_PTR]]
|
||||
; CHECK: load i32, i32* %next.gep
|
||||
; CHECK: [[STORE_PTR:%[0-9]+]] = bitcast i32* %next.gep
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK-SAME: <2 x i32>* [[STORE_PTR]]
|
||||
; CHECK: load i32, i32* %next.gep1
|
||||
define void @correct_order(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
||||
|
||||
%l1 = load i32, i32* %next.gep1, align 4
|
||||
%l2 = load i32, i32* %next.gep, align 4
|
||||
store i32 0, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep, align 4
|
||||
%l3 = load i32, i32* %next.gep1, align 4
|
||||
%l4 = load i32, i32* %next.gep2, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
3
external/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg
vendored
Normal file
3
external/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
if not 'X86' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
38
external/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
vendored
Normal file
38
external/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
|
||||
|
||||
define <8 x double> @loadwidth_insert_extract(double* %ptr) {
|
||||
%a = bitcast double* %ptr to <2 x double> *
|
||||
%b = getelementptr <2 x double>, <2 x double>* %a, i32 1
|
||||
%c = getelementptr <2 x double>, <2 x double>* %a, i32 2
|
||||
%d = getelementptr <2 x double>, <2 x double>* %a, i32 3
|
||||
; CHECK-HSW: load <4 x double>
|
||||
; CHECK-HSW: load <4 x double>
|
||||
; CHECK-HSW-NOT: load
|
||||
; CHECK-KNL: load <8 x double>
|
||||
; CHECK-KNL-NOT: load
|
||||
%la = load <2 x double>, <2 x double> *%a
|
||||
%lb = load <2 x double>, <2 x double> *%b
|
||||
%lc = load <2 x double>, <2 x double> *%c
|
||||
%ld = load <2 x double>, <2 x double> *%d
|
||||
; Scalarize everything - Explicitly not a shufflevector to test this code
|
||||
; path in the LSV
|
||||
%v1 = extractelement <2 x double> %la, i32 0
|
||||
%v2 = extractelement <2 x double> %la, i32 1
|
||||
%v3 = extractelement <2 x double> %lb, i32 0
|
||||
%v4 = extractelement <2 x double> %lb, i32 1
|
||||
%v5 = extractelement <2 x double> %lc, i32 0
|
||||
%v6 = extractelement <2 x double> %lc, i32 1
|
||||
%v7 = extractelement <2 x double> %ld, i32 0
|
||||
%v8 = extractelement <2 x double> %ld, i32 1
|
||||
; Make a vector again
|
||||
%i1 = insertelement <8 x double> undef, double %v1, i32 0
|
||||
%i2 = insertelement <8 x double> %i1, double %v2, i32 1
|
||||
%i3 = insertelement <8 x double> %i2, double %v3, i32 2
|
||||
%i4 = insertelement <8 x double> %i3, double %v4, i32 3
|
||||
%i5 = insertelement <8 x double> %i4, double %v5, i32 4
|
||||
%i6 = insertelement <8 x double> %i5, double %v6, i32 5
|
||||
%i7 = insertelement <8 x double> %i6, double %v7, i32 6
|
||||
%i8 = insertelement <8 x double> %i7, double %v8, i32 7
|
||||
ret <8 x double> %i8
|
||||
}
|
46
external/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll
vendored
Normal file
46
external/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S < %s | \
|
||||
; RUN: FileCheck %s
|
||||
;
|
||||
; The GPU Load & Store Vectorizer may merge differently-typed accesses into a
|
||||
; single instruction. This test checks that we merge TBAA tags for such
|
||||
; accesses correctly.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; struct S {
|
||||
; float f;
|
||||
; int i;
|
||||
; };
|
||||
%struct.S = type { float, i32 }
|
||||
|
||||
; float foo(S *p) {
|
||||
; p->f -= 1;
|
||||
; p->i -= 1;
|
||||
; return p->f;
|
||||
; }
|
||||
define float @foo(%struct.S* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: foo
|
||||
; CHECK: load <2 x i32>, {{.*}}, !tbaa [[TAG_char:!.*]]
|
||||
; CHECK: store <2 x i32> {{.*}}, !tbaa [[TAG_char]]
|
||||
%f = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0
|
||||
%0 = load float, float* %f, align 4, !tbaa !2
|
||||
%sub = fadd float %0, -1.000000e+00
|
||||
store float %sub, float* %f, align 4, !tbaa !2
|
||||
%i = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1
|
||||
%1 = load i32, i32* %i, align 4, !tbaa !8
|
||||
%sub1 = add nsw i32 %1, -1
|
||||
store i32 %sub1, i32* %i, align 4, !tbaa !8
|
||||
ret float %sub
|
||||
}
|
||||
|
||||
!2 = !{!3, !4, i64 0}
|
||||
!3 = !{!"_ZTS1S", !4, i64 0, !7, i64 4}
|
||||
!4 = !{!"float", !5, i64 0}
|
||||
!5 = !{!"omnipotent char", !6, i64 0}
|
||||
!6 = !{!"Simple C++ TBAA"}
|
||||
!7 = !{!"int", !5, i64 0}
|
||||
!8 = !{!3, !7, i64 4}
|
||||
|
||||
; CHECK-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", {{.*}}, i64 0}
|
||||
; CHECK-FAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
|
30
external/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll
vendored
Normal file
30
external/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -load-store-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
||||
|
||||
%rec = type { i32, i28 }
|
||||
|
||||
; We currently do not optimize this scenario.
|
||||
; But we verify that we no longer crash when compiling this.
|
||||
define void @test1(%rec* %out, %rec* %in) {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: [[IN1:%.*]] = getelementptr [[REC:%.*]], %rec* [[IN:%.*]], i16 0, i32 0
|
||||
; CHECK-NEXT: [[IN2:%.*]] = getelementptr [[REC]], %rec* [[IN]], i16 0, i32 1
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[IN1]], align 8
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = load i28, i28* [[IN2]]
|
||||
; CHECK-NEXT: [[OUT1:%.*]] = getelementptr [[REC]], %rec* [[OUT:%.*]], i16 0, i32 0
|
||||
; CHECK-NEXT: [[OUT2:%.*]] = getelementptr [[REC]], %rec* [[OUT]], i16 0, i32 1
|
||||
; CHECK-NEXT: store i32 [[VAL1]], i32* [[OUT1]], align 8
|
||||
; CHECK-NEXT: store i28 [[VAL2]], i28* [[OUT2]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%in1 = getelementptr %rec, %rec* %in, i16 0, i32 0
|
||||
%in2 = getelementptr %rec, %rec* %in, i16 0, i32 1
|
||||
%val1 = load i32, i32* %in1, align 8
|
||||
%val2 = load i28, i28* %in2
|
||||
%out1 = getelementptr %rec, %rec* %out, i16 0, i32 0
|
||||
%out2 = getelementptr %rec, %rec* %out, i16 0, i32 1
|
||||
store i32 %val1, i32* %out1, align 8
|
||||
store i28 %val2, i28* %out2
|
||||
ret void
|
||||
}
|
||||
|
28
external/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
vendored
Normal file
28
external/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
%struct.buffer_t = type { i32, i8* }
|
||||
|
||||
; Check an i32 and i8* get vectorized, and that the two accesses
|
||||
; (load into buff.val and store to buff.p) preserve their order.
|
||||
; Vectorized loads should be inserted at the position of the first load,
|
||||
; and instructions which were between the first and last load should be
|
||||
; reordered preserving their relative order inasmuch as possible.
|
||||
|
||||
; CHECK-LABEL: @preserve_order_32(
|
||||
; CHECK: load <2 x i32>
|
||||
; CHECK: %buff.val = load i8
|
||||
; CHECK: store i8 0
|
||||
define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {
|
||||
entry:
|
||||
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1
|
||||
%buff.p = load i8*, i8** %tmp1
|
||||
%buff.val = load i8, i8* %buff.p
|
||||
store i8 0, i8* %buff.p, align 8
|
||||
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0
|
||||
%buff.int = load i32, i32* %tmp0, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
77
external/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
vendored
Normal file
77
external/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
%struct.buffer_t = type { i64, i8* }
|
||||
%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t }
|
||||
|
||||
; Check an i64 and i8* get vectorized, and that the two accesses
|
||||
; (load into buff.val and store to buff.p) preserve their order.
|
||||
; Vectorized loads should be inserted at the position of the first load,
|
||||
; and instructions which were between the first and last load should be
|
||||
; reordered preserving their relative order inasmuch as possible.
|
||||
|
||||
; CHECK-LABEL: @preserve_order_64(
|
||||
; CHECK: load <2 x i64>
|
||||
; CHECK: %buff.val = load i8
|
||||
; CHECK: store i8 0
|
||||
define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {
|
||||
entry:
|
||||
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
|
||||
%buff.p = load i8*, i8** %tmp1
|
||||
%buff.val = load i8, i8* %buff.p
|
||||
store i8 0, i8* %buff.p, align 8
|
||||
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
|
||||
%buff.int = load i64, i64* %tmp0, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check reordering recurses correctly.
|
||||
|
||||
; CHECK-LABEL: @transitive_reorder(
|
||||
; CHECK: load <2 x i64>
|
||||
; CHECK: %buff.val = load i8
|
||||
; CHECK: store i8 0
|
||||
define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 {
|
||||
entry:
|
||||
%nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
|
||||
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1
|
||||
%buff.p = load i8*, i8** %tmp1
|
||||
%buff.val = load i8, i8* %buff.p
|
||||
store i8 0, i8* %buff.p, align 8
|
||||
%nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
|
||||
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0
|
||||
%buff.int = load i64, i64* %tmp0, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check for no vectorization over phi node
|
||||
|
||||
; CHECK-LABEL: @no_vect_phi(
|
||||
; CHECK: load i8*
|
||||
; CHECK: load i8
|
||||
; CHECK: store i8 0
|
||||
; CHECK: load i64
|
||||
define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {
|
||||
entry:
|
||||
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
|
||||
%buff.p = load i8*, i8** %tmp1
|
||||
%buff.val = load i8, i8* %buff.p
|
||||
store i8 0, i8* %buff.p, align 8
|
||||
br label %"for something"
|
||||
|
||||
"for something":
|
||||
%index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]
|
||||
|
||||
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
|
||||
%buff.int = load i64, i64* %tmp0, align 16
|
||||
|
||||
%index.next = add i64 %index, 8
|
||||
%cmp_res = icmp eq i64 %index.next, 8
|
||||
br i1 %cmp_res, label %ending, label %"for something"
|
||||
|
||||
ending:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
117
external/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
vendored
Normal file
117
external/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
vendored
Normal file
@ -0,0 +1,117 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
; Vectorized subsets of the load/store chains in the presence of
|
||||
; interleaved loads/stores
|
||||
|
||||
; CHECK-LABEL: @interleave_2L_2S(
|
||||
; CHECK: load <2 x i32>
|
||||
; CHECK: load i32
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK: load i32
|
||||
define void @interleave_2L_2S(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
||||
|
||||
%l1 = load i32, i32* %next.gep1, align 4
|
||||
%l2 = load i32, i32* %next.gep, align 4
|
||||
store i32 0, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep, align 4
|
||||
%l3 = load i32, i32* %next.gep1, align 4
|
||||
%l4 = load i32, i32* %next.gep2, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interleave_3L_2S_1L(
|
||||
; CHECK: load <3 x i32>
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK: load i32
|
||||
|
||||
define void @interleave_3L_2S_1L(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
||||
|
||||
%l2 = load i32, i32* %next.gep, align 4
|
||||
%l1 = load i32, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep, align 4
|
||||
%l3 = load i32, i32* %next.gep1, align 4
|
||||
%l4 = load i32, i32* %next.gep2, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @chain_suffix(
|
||||
; CHECK: load i32
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK: load <2 x i32>
|
||||
define void @chain_suffix(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
||||
|
||||
%l2 = load i32, i32* %next.gep, align 4
|
||||
store i32 0, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep, align 4
|
||||
%l3 = load i32, i32* %next.gep1, align 4
|
||||
%l4 = load i32, i32* %next.gep2, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: @chain_prefix_suffix(
|
||||
; CHECK: load <2 x i32>
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK: load <3 x i32>
|
||||
define void @chain_prefix_suffix(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
||||
%next.gep3 = getelementptr i32, i32* %ptr, i64 3
|
||||
|
||||
%l1 = load i32, i32* %next.gep, align 4
|
||||
%l2 = load i32, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep1, align 4
|
||||
store i32 0, i32* %next.gep2, align 4
|
||||
%l3 = load i32, i32* %next.gep1, align 4
|
||||
%l4 = load i32, i32* %next.gep2, align 4
|
||||
%l5 = load i32, i32* %next.gep3, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: If the chain is too long and TLI says misaligned is not fast,
|
||||
; then LSV fails to vectorize anything in that chain.
|
||||
; To reproduce below, add a tmp5 (ptr+4) and load tmp5 into l6 and l7.
|
||||
|
||||
; CHECK-LABEL: @interleave_get_longest
|
||||
; CHECK: load <3 x i32>
|
||||
; CHECK: load i32
|
||||
; CHECK: store <2 x i32> zeroinitializer
|
||||
; CHECK: load i32
|
||||
; CHECK: load i32
|
||||
; CHECK: load i32
|
||||
|
||||
define void @interleave_get_longest(i32* noalias %ptr) {
|
||||
%tmp1 = getelementptr i32, i32* %ptr, i64 0
|
||||
%tmp2 = getelementptr i32, i32* %ptr, i64 1
|
||||
%tmp3 = getelementptr i32, i32* %ptr, i64 2
|
||||
%tmp4 = getelementptr i32, i32* %ptr, i64 3
|
||||
|
||||
%l1 = load i32, i32* %tmp2, align 4
|
||||
%l2 = load i32, i32* %tmp1, align 4
|
||||
store i32 0, i32* %tmp2, align 4
|
||||
store i32 0, i32* %tmp1, align 4
|
||||
%l3 = load i32, i32* %tmp2, align 4
|
||||
%l4 = load i32, i32* %tmp3, align 4
|
||||
%l5 = load i32, i32* %tmp4, align 4
|
||||
%l6 = load i32, i32* %tmp4, align 4
|
||||
%l7 = load i32, i32* %tmp4, align 4
|
||||
|
||||
ret void
|
||||
}
|
Reference in New Issue
Block a user