Imported Upstream version 5.18.0.205

Former-commit-id: 7f59f7e792705db773f1caecdaa823092f4e2927
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-11-16 08:20:38 +00:00
parent 5cd5df71cc
commit 8e12397d70
28486 changed files with 3867013 additions and 66 deletions

View File

@ -0,0 +1,27 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: @correct_order(
; CHECK: [[LOAD_PTR:%[0-9]+]] = bitcast i32* %next.gep1
; CHECK: load <2 x i32>, <2 x i32>* [[LOAD_PTR]]
; CHECK: load i32, i32* %next.gep
; CHECK: [[STORE_PTR:%[0-9]+]] = bitcast i32* %next.gep
; CHECK: store <2 x i32>
; CHECK-SAME: <2 x i32>* [[STORE_PTR]]
; CHECK: load i32, i32* %next.gep1
define void @correct_order(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
%l1 = load i32, i32* %next.gep1, align 4
%l2 = load i32, i32* %next.gep, align 4
store i32 0, i32* %next.gep1, align 4
store i32 0, i32* %next.gep, align 4
%l3 = load i32, i32* %next.gep1, align 4
%l4 = load i32, i32* %next.gep2, align 4
ret void
}

View File

@ -0,0 +1,3 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,38 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
define <8 x double> @loadwidth_insert_extract(double* %ptr) {
%a = bitcast double* %ptr to <2 x double> *
%b = getelementptr <2 x double>, <2 x double>* %a, i32 1
%c = getelementptr <2 x double>, <2 x double>* %a, i32 2
%d = getelementptr <2 x double>, <2 x double>* %a, i32 3
; CHECK-HSW: load <4 x double>
; CHECK-HSW: load <4 x double>
; CHECK-HSW-NOT: load
; CHECK-KNL: load <8 x double>
; CHECK-KNL-NOT: load
%la = load <2 x double>, <2 x double> *%a
%lb = load <2 x double>, <2 x double> *%b
%lc = load <2 x double>, <2 x double> *%c
%ld = load <2 x double>, <2 x double> *%d
; Scalarize everything - Explicitly not a shufflevector to test this code
; path in the LSV
%v1 = extractelement <2 x double> %la, i32 0
%v2 = extractelement <2 x double> %la, i32 1
%v3 = extractelement <2 x double> %lb, i32 0
%v4 = extractelement <2 x double> %lb, i32 1
%v5 = extractelement <2 x double> %lc, i32 0
%v6 = extractelement <2 x double> %lc, i32 1
%v7 = extractelement <2 x double> %ld, i32 0
%v8 = extractelement <2 x double> %ld, i32 1
; Make a vector again
%i1 = insertelement <8 x double> undef, double %v1, i32 0
%i2 = insertelement <8 x double> %i1, double %v2, i32 1
%i3 = insertelement <8 x double> %i2, double %v3, i32 2
%i4 = insertelement <8 x double> %i3, double %v4, i32 3
%i5 = insertelement <8 x double> %i4, double %v5, i32 4
%i6 = insertelement <8 x double> %i5, double %v6, i32 5
%i7 = insertelement <8 x double> %i6, double %v7, i32 6
%i8 = insertelement <8 x double> %i7, double %v8, i32 7
ret <8 x double> %i8
}

View File

@ -0,0 +1,46 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S < %s | \
; RUN: FileCheck %s
;
; The GPU Load & Store Vectorizer may merge differently-typed accesses into a
; single instruction. This test checks that we merge TBAA tags for such
; accesses correctly.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; struct S {
; float f;
; int i;
; };
%struct.S = type { float, i32 }
; float foo(S *p) {
; p->f -= 1;
; p->i -= 1;
; return p->f;
; }
define float @foo(%struct.S* %p) {
entry:
; CHECK-LABEL: foo
; CHECK: load <2 x i32>, {{.*}}, !tbaa [[TAG_char:!.*]]
; CHECK: store <2 x i32> {{.*}}, !tbaa [[TAG_char]]
%f = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0
%0 = load float, float* %f, align 4, !tbaa !2
%sub = fadd float %0, -1.000000e+00
store float %sub, float* %f, align 4, !tbaa !2
%i = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1
%1 = load i32, i32* %i, align 4, !tbaa !8
%sub1 = add nsw i32 %1, -1
store i32 %sub1, i32* %i, align 4, !tbaa !8
ret float %sub
}
!2 = !{!3, !4, i64 0}
!3 = !{!"_ZTS1S", !4, i64 0, !7, i64 4}
!4 = !{!"float", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C++ TBAA"}
!7 = !{!"int", !5, i64 0}
!8 = !{!3, !7, i64 4}
; CHECK-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", {{.*}}, i64 0}
; CHECK-FAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}

View File

@ -0,0 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -load-store-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
%rec = type { i32, i28 }
; We currently do not optimize this scenario.
; But we verify that we no longer crash when compiling this.
define void @test1(%rec* %out, %rec* %in) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[IN1:%.*]] = getelementptr [[REC:%.*]], %rec* [[IN:%.*]], i16 0, i32 0
; CHECK-NEXT: [[IN2:%.*]] = getelementptr [[REC]], %rec* [[IN]], i16 0, i32 1
; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[IN1]], align 8
; CHECK-NEXT: [[VAL2:%.*]] = load i28, i28* [[IN2]]
; CHECK-NEXT: [[OUT1:%.*]] = getelementptr [[REC]], %rec* [[OUT:%.*]], i16 0, i32 0
; CHECK-NEXT: [[OUT2:%.*]] = getelementptr [[REC]], %rec* [[OUT]], i16 0, i32 1
; CHECK-NEXT: store i32 [[VAL1]], i32* [[OUT1]], align 8
; CHECK-NEXT: store i28 [[VAL2]], i28* [[OUT2]]
; CHECK-NEXT: ret void
;
%in1 = getelementptr %rec, %rec* %in, i16 0, i32 0
%in2 = getelementptr %rec, %rec* %in, i16 0, i32 1
%val1 = load i32, i32* %in1, align 8
%val2 = load i28, i28* %in2
%out1 = getelementptr %rec, %rec* %out, i16 0, i32 0
%out2 = getelementptr %rec, %rec* %out, i16 0, i32 1
store i32 %val1, i32* %out1, align 8
store i28 %val2, i28* %out2
ret void
}

View File

@ -0,0 +1,28 @@
; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
%struct.buffer_t = type { i32, i8* }
; Check an i32 and i8* get vectorized, and that the two accesses
; (load into buff.val and store to buff.p) preserve their order.
; Vectorized loads should be inserted at the position of the first load,
; and instructions which were between the first and last load should be
; reordered preserving their relative order inasmuch as possible.
; CHECK-LABEL: @preserve_order_32(
; CHECK: load <2 x i32>
; CHECK: %buff.val = load i8
; CHECK: store i8 0
define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {
entry:
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1
%buff.p = load i8*, i8** %tmp1
%buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0
%buff.int = load i32, i32* %tmp0, align 8
ret void
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,77 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
%struct.buffer_t = type { i64, i8* }
%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t }
; Check an i64 and i8* get vectorized, and that the two accesses
; (load into buff.val and store to buff.p) preserve their order.
; Vectorized loads should be inserted at the position of the first load,
; and instructions which were between the first and last load should be
; reordered preserving their relative order inasmuch as possible.
; CHECK-LABEL: @preserve_order_64(
; CHECK: load <2 x i64>
; CHECK: %buff.val = load i8
; CHECK: store i8 0
define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {
entry:
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
%buff.p = load i8*, i8** %tmp1
%buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
%buff.int = load i64, i64* %tmp0, align 16
ret void
}
; Check reordering recurses correctly.
; CHECK-LABEL: @transitive_reorder(
; CHECK: load <2 x i64>
; CHECK: %buff.val = load i8
; CHECK: store i8 0
define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 {
entry:
%nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1
%buff.p = load i8*, i8** %tmp1
%buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
%nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0
%buff.int = load i64, i64* %tmp0, align 16
ret void
}
; Check for no vectorization over phi node
; CHECK-LABEL: @no_vect_phi(
; CHECK: load i8*
; CHECK: load i8
; CHECK: store i8 0
; CHECK: load i64
define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {
entry:
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
%buff.p = load i8*, i8** %tmp1
%buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
br label %"for something"
"for something":
%index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
%buff.int = load i64, i64* %tmp0, align 16
%index.next = add i64 %index, 8
%cmp_res = icmp eq i64 %index.next, 8
br i1 %cmp_res, label %ending, label %"for something"
ending:
ret void
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,117 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; Vectorized subsets of the load/store chains in the presence of
; interleaved loads/stores
; CHECK-LABEL: @interleave_2L_2S(
; CHECK: load <2 x i32>
; CHECK: load i32
; CHECK: store <2 x i32>
; CHECK: load i32
define void @interleave_2L_2S(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
%l1 = load i32, i32* %next.gep1, align 4
%l2 = load i32, i32* %next.gep, align 4
store i32 0, i32* %next.gep1, align 4
store i32 0, i32* %next.gep, align 4
%l3 = load i32, i32* %next.gep1, align 4
%l4 = load i32, i32* %next.gep2, align 4
ret void
}
; CHECK-LABEL: @interleave_3L_2S_1L(
; CHECK: load <3 x i32>
; CHECK: store <2 x i32>
; CHECK: load i32
define void @interleave_3L_2S_1L(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
%l2 = load i32, i32* %next.gep, align 4
%l1 = load i32, i32* %next.gep1, align 4
store i32 0, i32* %next.gep1, align 4
store i32 0, i32* %next.gep, align 4
%l3 = load i32, i32* %next.gep1, align 4
%l4 = load i32, i32* %next.gep2, align 4
ret void
}
; CHECK-LABEL: @chain_suffix(
; CHECK: load i32
; CHECK: store <2 x i32>
; CHECK: load <2 x i32>
define void @chain_suffix(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
%l2 = load i32, i32* %next.gep, align 4
store i32 0, i32* %next.gep1, align 4
store i32 0, i32* %next.gep, align 4
%l3 = load i32, i32* %next.gep1, align 4
%l4 = load i32, i32* %next.gep2, align 4
ret void
}
; CHECK-LABEL: @chain_prefix_suffix(
; CHECK: load <2 x i32>
; CHECK: store <2 x i32>
; CHECK: load <3 x i32>
define void @chain_prefix_suffix(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
%next.gep3 = getelementptr i32, i32* %ptr, i64 3
%l1 = load i32, i32* %next.gep, align 4
%l2 = load i32, i32* %next.gep1, align 4
store i32 0, i32* %next.gep1, align 4
store i32 0, i32* %next.gep2, align 4
%l3 = load i32, i32* %next.gep1, align 4
%l4 = load i32, i32* %next.gep2, align 4
%l5 = load i32, i32* %next.gep3, align 4
ret void
}
; FIXME: If the chain is too long and TLI says misaligned is not fast,
; then LSV fails to vectorize anything in that chain.
; To reproduce below, add a tmp5 (ptr+4) and load tmp5 into l6 and l7.
; CHECK-LABEL: @interleave_get_longest
; CHECK: load <3 x i32>
; CHECK: load i32
; CHECK: store <2 x i32> zeroinitializer
; CHECK: load i32
; CHECK: load i32
; CHECK: load i32
define void @interleave_get_longest(i32* noalias %ptr) {
%tmp1 = getelementptr i32, i32* %ptr, i64 0
%tmp2 = getelementptr i32, i32* %ptr, i64 1
%tmp3 = getelementptr i32, i32* %ptr, i64 2
%tmp4 = getelementptr i32, i32* %ptr, i64 3
%l1 = load i32, i32* %tmp2, align 4
%l2 = load i32, i32* %tmp1, align 4
store i32 0, i32* %tmp2, align 4
store i32 0, i32* %tmp1, align 4
%l3 = load i32, i32* %tmp2, align 4
%l4 = load i32, i32* %tmp3, align 4
%l5 = load i32, i32* %tmp4, align 4
%l6 = load i32, i32* %tmp4, align 4
%l7 = load i32, i32* %tmp4, align 4
ret void
}