Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@@ -1,50 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; In this loop just because we access A through different types (int, float)
; we still have a dependence cycle:
;
; for (i = 0; i < n; i++) {
; A_float = (float *) A;
; A_float[i + 1] = A[i] * B[i];
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; CHECK: Report: unsafe dependent memory operations in loop
; CHECK-NOT: Memory dependences are safe
@B = common global i32* null, align 8
@A = common global i32* null, align 8
define void @f() {
entry:
%a = load i32*, i32** @A, align 8
%b = load i32*, i32** @B, align 8
br label %for.body
for.body: ; preds = %for.body, %entry
%storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %storemerge3
%loadA = load i32, i32* %arrayidxA, align 2
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %storemerge3
%loadB = load i32, i32* %arrayidxB, align 2
%mul = mul i32 %loadB, %loadA
%add = add nuw nsw i64 %storemerge3, 1
%a_float = bitcast i32* %a to float*
%arrayidxA_plus_2 = getelementptr inbounds float, float* %a_float, i64 %add
%mul_float = sitofp i32 %mul to float
store float %mul_float, float* %arrayidxA_plus_2, align 2
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,45 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; for (unsigned i = 0; i < 100; i++) {
; A[i+8] = B[i] + 2;
; C[i] = A[i] * 2;
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %B, i32* %C, i64 %N) {
; CHECK: Dependences:
; CHECK-NEXT: Forward:
; CHECK-NEXT: store i32 %a_p1, i32* %Aidx_ahead, align 4 ->
; CHECK-NEXT: %a = load i32, i32* %Aidx, align 4
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%idx = add nuw nsw i64 %indvars.iv, 8
%Aidx_ahead = getelementptr inbounds i32, i32* %A, i64 %idx
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
%Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%b = load i32, i32* %Bidx, align 4
%a_p1 = add i32 %b, 2
store i32 %a_p1, i32* %Aidx_ahead, align 4
%a = load i32, i32* %Aidx, align 4
%c = mul i32 %a, 2
store i32 %c, i32* %Cidx, align 4
%exitcond = icmp eq i64 %indvars.iv.next, %N
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,65 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; Check that loop-indepedent forward dependences are discovered properly.
;
; FIXME: This does not actually always work which is pretty confusing. Right
; now there is hack in LAA that tries to figure out loop-indepedent forward
; dependeces *outside* of the MemoryDepChecker logic (i.e. proper dependence
; analysis).
;
; Therefore if there is only loop-independent dependences for an array
; (i.e. the same index is used), we don't discover the forward dependence.
; So, at ***, we add another non-I-based access of A to trigger
; MemoryDepChecker analysis for accesses of A.
;
; for (unsigned i = 0; i < 100; i++) {
; A[i + 1] = B[i] + 1; // ***
; A[i] = B[i] + 2;
; C[i] = A[i] * 2;
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
; CHECK: Dependences:
; CHECK-NEXT: Forward:
; CHECK-NEXT: store i32 %b_p1, i32* %Aidx, align 4 ->
; CHECK-NEXT: %a = load i32, i32* %Aidx, align 4
; CHECK: ForwardButPreventsForwarding:
; CHECK-NEXT: store i32 %b_p2, i32* %Aidx_next, align 4 ->
; CHECK-NEXT: %a = load i32, i32* %Aidx, align 4
; CHECK: Forward:
; CHECK-NEXT: store i32 %b_p2, i32* %Aidx_next, align 4 ->
; CHECK-NEXT: store i32 %b_p1, i32* %Aidx, align 4
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
%Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
%Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%b = load i32, i32* %Bidx, align 4
%b_p2 = add i32 %b, 1
store i32 %b_p2, i32* %Aidx_next, align 4
%b_p1 = add i32 %b, 2
store i32 %b_p1, i32* %Aidx, align 4
%a = load i32, i32* %Aidx, align 4
%c = mul i32 %a, 2
store i32 %c, i32* %Cidx, align 4
%exitcond = icmp eq i64 %indvars.iv.next, %N
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,46 +0,0 @@
; RUN: opt < %s -store-to-load-forwarding-conflict-detection=false -loop-accesses -analyze | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -store-to-load-forwarding-conflict-detection=false -disable-output < %s 2>&1 | FileCheck %s
; This test checks that we prove the strided accesses to be independent before
; concluding that there is a forward dependence.
; struct pair {
; int x;
; int y;
; };
;
; int independent_interleaved(struct pair *p, int z, int n) {
; int s = 0;
; for (int i = 0; i < n; i++) {
; p[i].y = z;
; s += p[i].x;
; }
; return s;
; }
; CHECK: for.body:
; CHECK-NOT: Forward:
; CHECK-NOT: store i32 %z, i32* %p_i.y, align 8 ->
; CHECK-NOT: %0 = load i32, i32* %p_i.x, align 8
%pair = type { i32, i32 }
define i32 @independent_interleaved(%pair *%p, i64 %n, i32 %z) {
entry:
br label %for.body
for.body:
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
%s = phi i32 [ %1, %for.body ], [ 0, %entry ]
%p_i.x = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
%p_i.y = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
store i32 %z, i32* %p_i.y, align 8
%0 = load i32, i32* %p_i.x, align 8
%1 = add nsw i32 %0, %s
%i.next = add nuw nsw i64 %i, 1
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end:
%2 = phi i32 [ %1, %for.body ]
ret i32 %2
}

View File

@@ -1,29 +0,0 @@
; RUN: opt -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
; CHECK-LABEL: TestFoo
; CHECK-NOT: %wide.vec
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
define void @TestFoo(i1 %X, i1 %Y) {
bb:
br label %.loopexit5.outer
.loopexit5.outer:
br label %.lr.ph12
.loopexit:
br i1 %X, label %.loopexit5.outer, label %.lr.ph12
.lr.ph12:
%f.110 = phi i32* [ %tmp1, %.loopexit ], [ null, %.loopexit5.outer ]
%tmp1 = getelementptr inbounds i32, i32* %f.110, i64 -2
br i1 %Y, label %bb4, label %.loopexit
bb4:
%j.27 = phi i32 [ 0, %.lr.ph12 ], [ %tmp7, %bb4 ]
%tmp5 = load i32, i32* %f.110, align 4
%tmp7 = add nsw i32 %j.27, 1
%exitcond = icmp eq i32 %tmp7, 0
br i1 %exitcond, label %.loopexit, label %bb4
}

View File

@@ -1,39 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; Handle memchecks involving loop-invariant addresses:
;
; extern int *A, *b;
; for (i = 0; i < N; ++i) {
; A[i] = b;
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: Memory dependences are safe with run-time checks
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ({{.*}}):
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
; CHECK-NEXT: Against group ({{.*}}):
; CHECK-NEXT: i32* %b
define void @f(i32* %a, i32* %b) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
%loadB = load i32, i32* %b, align 4
store i32 %loadB, i32* %arrayidxA, align 4
%inc = add nuw nsw i64 %ind, 1
%exitcond = icmp eq i64 %inc, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,51 +0,0 @@
; RUN: opt -analyze --loop-accesses %s | FileCheck %s
; This test verifies run-time boundary check of memory accesses.
; The original loop:
; void fastCopy(const char* src, char* op) {
; int len = 32;
; while (len > 0) {
; *(reinterpret_cast<long long*>(op)) = *(reinterpret_cast<const long long*>(src));
; src += 8;
; op += 8;
; len -= 8;
; }
; }
; Boundaries calculations before this patch:
; (Low: %src High: (24 + %src))
; and the actual distance between two pointers was 31, (%op - %src = 31)
; IsConflict = (24 > 31) = false -> execution is directed to the vectorized loop.
; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ),
; store a value at *%op touched memory under *%src.
;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy'
;CHECK: (Low: %op High: (32 + %op))
;CHECK: (Low: %src High: (32 + %src))
define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) {
entry:
br label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ]
%op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ]
%src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ]
%0 = bitcast i8* %src.addr.05 to i64*
%1 = load i64, i64* %0, align 8
%2 = bitcast i8* %op.addr.06 to i64*
store i64 %1, i64* %2, align 8
%add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i64 8
%add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i64 8
%sub = add nsw i32 %len.addr.07, -8
%cmp = icmp sgt i32 %len.addr.07, 8
br i1 %cmp, label %while.body, label %while.end.loopexit
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret void
}

View File

@@ -1,107 +0,0 @@
; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; i and i + 1 can overflow in the following kernel:
; void test1(unsigned long long x, int *a, int *b) {
; for (unsigned i = 0; i < x; ++i)
; b[i] = a[i+1] + 1;
; }
;
; If accesses to a and b can alias, we need to emit a run-time alias check
; between accesses to a and b. However, when i and i + 1 can wrap, their
; SCEV expression is not an AddRec. We need to create SCEV predicates and
; coerce the expressions to AddRecs in order to be able to emit the run-time
; alias check.
;
; The accesses at b[i] and a[i+1] correspond to the addresses %arrayidx and
; %arrayidx4 in the test. The SCEV expressions for these are:
; ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
; ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
;
; The transformed expressions are:
; i64 {(4 + %a),+,4}<%for.body>
; i64 {(4 + %b),+,4}<%for.body>
; CHECK-LABEL: test1
; CHECK: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group
; CHECK-NEXT: (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a))
; CHECK-NEXT: Member: {(4 + %a),+,4}<%for.body>
; CHECK-NEXT: Group
; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b))
; CHECK-NEXT: Member: {%b,+,4}<%for.body>
; CHECK: Store to invariant address was not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom:
; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body>
; CHECK-NEXT: [PSE] %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11:
; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
; CHECK-NEXT: --> {%b,+,4}<%for.body>
define void @test1(i64 %x, i32* %a, i32* %b) {
entry:
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ]
%i.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%add = add i32 %i.010, 1
%idxprom = zext i32 %add to i64
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
%ld = load i32, i32* %arrayidx, align 4
%add2 = add nsw i32 %ld, 1
%arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11
store i32 %add2, i32* %arrayidx4, align 4
%conv = zext i32 %add to i64
%cmp = icmp ult i64 %conv, %x
br i1 %cmp, label %for.body, label %exit
exit:
ret void
}
; i can overflow in the following kernel:
; void test2(unsigned long long x, int *a) {
; for (unsigned i = 0; i < x; ++i)
; a[i] = a[i] + 1;
; }
;
; We need to check that i doesn't wrap, but we don't need a run-time alias
; check. We also need an extra no-wrap check to get the backedge taken count.
; CHECK-LABEL: test2
; CHECK: Memory dependences are safe
; CHECK: SCEV assumptions:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
define void @test2(i64 %x, i32* %a) {
entry:
br label %for.body
for.body:
%conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ]
%i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %conv11
%ld = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %ld, 1
store i32 %add, i32* %arrayidx, align 4
%inc = add i32 %i.010, 1
%conv = zext i32 %inc to i64
%cmp = icmp ult i64 %conv, %x
br i1 %cmp, label %for.body, label %exit
exit:
ret void
}

View File

@@ -1,68 +0,0 @@
; RUN: opt -loop-accesses -analyze -S < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; This is the test case from PR26314.
; When we were retrying dependence checking with memchecks only,
; the loop-invariant access in the inner loop was incorrectly determined to be wrapping
; because it was not strided in the inner loop.
; #define Z 32
; typedef struct s {
; int v1[Z];
; int v2[Z];
; int v3[Z][Z];
; } s;
;
; void slow_function (s* const obj, int z) {
; for (int j=0; j<Z; j++) {
; for (int k=0; k<z; k++) {
; int x = obj->v1[k] + obj->v2[j];
; obj->v3[j][k] += x;
; }
; }
; }
; CHECK: function 'Test':
; CHECK: .inner:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK: Check 0:
; CHECK: Check 1:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] }
define void @Test(%struct.s* nocapture %obj, i64 %z) #0 {
br label %.outer.preheader
.outer.preheader:
%i = phi i64 [ 0, %0 ], [ %i.next, %.outer ]
%1 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 1, i64 %i
br label %.inner
.exit:
ret void
.outer:
%i.next = add nuw nsw i64 %i, 1
%exitcond.outer = icmp eq i64 %i.next, 32
br i1 %exitcond.outer, label %.exit, label %.outer.preheader
.inner:
%j = phi i64 [ 0, %.outer.preheader ], [ %j.next, %.inner ]
%2 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 0, i64 %j
%3 = load i32, i32* %2
%4 = load i32, i32* %1
%5 = add nsw i32 %4, %3
%6 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 2, i64 %i, i64 %j
%7 = load i32, i32* %6
%8 = add nsw i32 %5, %7
store i32 %8, i32* %6
%j.next = add nuw nsw i64 %j, 1
%exitcond.inner = icmp eq i64 %j.next, %z
br i1 %exitcond.inner, label %.outer, label %.inner
}

View File

@@ -1,42 +0,0 @@
; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<aa>,require<scalar-evolution>,require<aa>,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s 2>&1 | FileCheck %s
; For this loop:
; for (int i = 0; i < n; i++)
; A[2 * i] = A[2 * i] + B[i];
;
; , SCEV is unable to prove that A[2 * i] does not overflow. However,
; analyzing the IR helps us to conclude it and in turn allow dependence
; analysis.
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: Memory dependences are safe{{$}}
define void @f(i16* noalias %a,
i16* noalias %b, i64 %N) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%mul = mul nuw nsw i64 %ind, 2
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %mul
%loadA = load i16, i16* %arrayidxA, align 2
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
%loadB = load i16, i16* %arrayidxB, align 2
%add = mul i16 %loadA, %loadB
store i16 %add, i16* %arrayidxA, align 2
%inc = add nuw nsw i64 %ind, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,39 +0,0 @@
; RUN: opt -loop-accesses -analyze %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; Test that the loop accesses are proven safe in this case.
; The analyzer uses to be confused by the "diamond" because GetUnderlyingObjects
; is saying that the two pointers can both points to null. The loop analyzer
; needs to ignore null in the results returned by GetUnderlyingObjects.
; CHECK: Memory dependences are safe with run-time checks
; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
; Function Attrs: ssp uwtable
define void @foo(i1 %cond, i32* %ptr1, i32* %ptr2) {
br i1 %cond, label %.preheader, label %diamond
diamond: ; preds = %.noexc.i.i
br label %.preheader
.preheader: ; preds = %diamond, %0
%ptr1_or_null = phi i32* [ null, %0 ], [ %ptr1, %diamond ]
%ptr2_or_null = phi i32* [ null, %0 ], [ %ptr2, %diamond ]
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %.preheader
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 10, %.preheader ]
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%tmp4 = getelementptr inbounds i32, i32* %ptr2_or_null, i64 %indvars.iv.next
%tmp5 = load i32, i32* %tmp4, align 4
%tmp6 = getelementptr inbounds i32, i32* %ptr1_or_null, i64 %indvars.iv.next
store i32 undef, i32* %tmp6, align 4
br i1 false, label %.lr.ph, label %.end
.end:
ret void
}

View File

@@ -1,288 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
; 3 reads and 3 writes should need 12 memchecks
; CHECK: function 'testf':
; CHECK: Memory dependences are safe with run-time checks
; Memory dependencies have labels starting from 0, so in
; order to verify that we have n checks, we look for
; (n-1): and not n:.
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK: Check 11:
; CHECK-NOT: Check 12:
define void @testf(i16* %a,
i16* %b,
i16* %c,
i16* %d,
i16* %e,
i16* %f) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%add = add nuw nsw i64 %ind, 1
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
%loadA = load i16, i16* %arrayidxA, align 2
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
%loadB = load i16, i16* %arrayidxB, align 2
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
%loadC = load i16, i16* %arrayidxC, align 2
%mul = mul i16 %loadB, %loadA
%mul1 = mul i16 %mul, %loadC
%arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind
store i16 %mul1, i16* %arrayidxD, align 2
%arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind
store i16 %mul, i16* %arrayidxE, align 2
%arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind
store i16 %mul1, i16* %arrayidxF, align 2
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; The following (testg and testh) check that we can group
; memory checks of accesses which differ by a constant value.
; Both tests are based on the following C code:
;
; void testh(short *a, short *b, short *c) {
; unsigned long ind = 0;
; for (unsigned long ind = 0; ind < 20; ++ind) {
; c[2 * ind] = a[ind] * a[ind + 1];
; c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind];
; }
; }
;
; It is sufficient to check the intervals
; [a, a + 21], [b, b + 20] against [c, c + 41].
; 3 reads and 2 writes - two of the reads can be merged,
; and the writes can be merged as well. This gives us a
; total of 2 memory checks.
; CHECK: function 'testg':
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
; CHECK-NEXT: Against group ([[ONE:.+]]):
; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
; CHECK-NEXT: Check 1:
; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
; CHECK-NEXT: Against group ([[TWO:.+]]):
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: %c High: (80 + %c))
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
; CHECK-NEXT: Member: {%c,+,4}
; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (42 + %a))
; CHECK-NEXT: Member: {(2 + %a)<nsw>,+,2}
; CHECK-NEXT: Member: {%a,+,2}
; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: %b High: (40 + %b))
; CHECK-NEXT: Member: {%b,+,2}
define void @testg(i16* %a,
i16* %b,
i16* %c) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
%add = add nuw nsw i64 %ind, 1
%store_ind_inc = add nuw nsw i64 %store_ind, 1
%store_ind_next = add nuw nsw i64 %store_ind_inc, 1
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
%loadA = load i16, i16* %arrayidxA, align 2
%arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
%loadA1 = load i16, i16* %arrayidxA1, align 2
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
%loadB = load i16, i16* %arrayidxB, align 2
%mul = mul i16 %loadA, %loadA1
%mul1 = mul i16 %mul, %loadB
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
store i16 %mul1, i16* %arrayidxC, align 2
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
store i16 %mul, i16* %arrayidxC1, align 2
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; 3 reads and 2 writes - the writes can be merged into a single
; group, but the GEPs used for the reads are not marked as inbounds.
; We can still merge them because we are using a unit stride for
; accesses, so we cannot overflow the GEPs.
; CHECK: function 'testh':
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
; CHECK-NEXT: Against group ([[ONE:.+]]):
; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind
; CHECK-NEXT: Check 1:
; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
; CHECK-NEXT: Against group ([[TWO:.+]]):
; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: %c High: (80 + %c))
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
; CHECK-NEXT: Member: {%c,+,4}
; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (42 + %a))
; CHECK-NEXT: Member: {(2 + %a),+,2}
; CHECK-NEXT: Member: {%a,+,2}
; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: %b High: (40 + %b))
; CHECK-NEXT: Member: {%b,+,2}
define void @testh(i16* %a,
i16* %b,
i16* %c) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
%add = add nuw nsw i64 %ind, 1
%store_ind_inc = add nuw nsw i64 %store_ind, 1
%store_ind_next = add nuw nsw i64 %store_ind_inc, 1
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%loadA = load i16, i16* %arrayidxA, align 2
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%loadA1 = load i16, i16* %arrayidxA1, align 2
%arrayidxB = getelementptr i16, i16* %b, i64 %ind
%loadB = load i16, i16* %arrayidxB, align 2
%mul = mul i16 %loadA, %loadA1
%mul1 = mul i16 %mul, %loadB
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
store i16 %mul1, i16* %arrayidxC, align 2
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
store i16 %mul, i16* %arrayidxC1, align 2
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; Don't merge pointers if we need to perform a check against a pointer
; to the same underlying object (doing so would emit a check that could be
; falsely invalidated) For example, in the following loop:
;
; for (i = 0; i < 5000; ++i)
; a[i + offset] = a[i] + a[i + 10000]
;
; we should not merge the intervals associated with the reads (0,5000) and
; (10000, 15000) into (0, 15000) as this will pottentially fail the check
; against the interval associated with the write.
;
; We cannot have this check unless ShouldRetryWithRuntimeCheck is set,
; and therefore the grouping algorithm would create a separate group for
; each pointer.
; CHECK: function 'testi':
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
; CHECK-NEXT: Against group ([[ONE:.+]]):
; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
; CHECK-NEXT: Check 1:
; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
; CHECK-NEXT: Against group ([[TWO:.+]]):
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
; CHECK-NEXT: Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (10000 + %a))
; CHECK-NEXT: Member: {%a,+,2}<%for.body>
; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a))
; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
define void @testi(i16* %a,
i64 %offset) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%store_ind = phi i64 [ %offset, %entry ], [ %store_ind_inc, %for.body ]
%add = add nuw nsw i64 %ind, 1
%store_ind_inc = add nuw nsw i64 %store_ind, 1
%arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
%ind2 = add nuw nsw i64 %ind, 10000
%arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
%loadA1 = load i16, i16* %arrayidxA1, align 2
%loadA2 = load i16, i16* %arrayidxA2, align 2
%addres = add i16 %loadA1, %loadA2
%storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
store i16 %addres, i16* %storeidx, align 2
%exitcond = icmp eq i64 %add, 5000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,43 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; We shouldn't quit the analysis if we encounter a pointer without known
; bounds *unless* we actually need to emit a memcheck for it. (We only
; compute bounds for SCEVAddRecs so A[i*i] is deemed not having known bounds.)
;
; for (i = 0; i < 20; ++i)
; A[i*i] *= 2;
; CHECK: for.body:
; CHECK: Report: unsafe dependent memory operations in loop
; CHECK-NOT: Report: cannot identify array bounds
; CHECK: Dependences:
; CHECK: Unknown:
; CHECK: %loadA = load i16, i16* %arrayidxA, align 2 ->
; CHECK: store i16 %mul, i16* %arrayidxA, align 2
define void @f(i16* %a) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%access_ind = mul i64 %ind, %ind
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %access_ind
%loadA = load i16, i16* %arrayidxA, align 2
%mul = mul i16 %loadA, 2
store i16 %mul, i16* %arrayidxA, align 2
%add = add nuw nsw i64 %ind, 1
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,99 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Check that the compile-time-unknown depenendece-distance is resolved
; statically. Due to the non-unit stride of the accesses in this testcase
; we are currently not able to create runtime dependence checks, and therefore
; if we don't resolve the dependence statically we cannot vectorize the loop.
;
; Specifically in this example, during dependence analysis we get 6 unknown
; dependence distances between the 8 real/imaginary accesses below:
; dist = 8*D, 4+8*D, -4+8*D, -8*D, 4-8*D, -4-8*D.
; At compile time we can prove for all of the above that |dist|>loopBound*step
; (where the step is 8bytes, and the loopBound is D-1), and thereby conclude
; that there are no dependencies (without runtime tests):
; |8*D|>8*D-8, |4+8*D|>8*D-8, |-4+8*D|>8*D-8, etc.
; #include <stdlib.h>
; class Complex {
; private:
; float real_;
; float imaginary_;
;
; public:
; Complex() : real_(0), imaginary_(0) { }
; Complex(float real, float imaginary) : real_(real), imaginary_(imaginary) { }
; Complex(const Complex &rhs) : real_(rhs.real()), imaginary_(rhs.imaginary()) { }
;
; inline float real() const { return real_; }
; inline float imaginary() const { return imaginary_; }
;
; Complex operator+(const Complex& rhs) const
; {
; return Complex(real_ + rhs.real_, imaginary_ + rhs.imaginary_);
; }
;
; Complex operator-(const Complex& rhs) const
; {
; return Complex(real_ - rhs.real_, imaginary_ - rhs.imaginary_);
; }
; };
;
; void Test(Complex *out, size_t size)
; {
; size_t D = size / 2;
; for (size_t offset = 0; offset < D; ++offset)
; {
; Complex t0 = out[offset];
; Complex t1 = out[offset + D];
; out[offset] = t1 + t0;
; out[offset + D] = t0 - t1;
; }
; }
; CHECK-LABEL: Test
; CHECK: Memory dependences are safe
%class.Complex = type { float, float }
define void @Test(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr {
entry:
%div = lshr i64 %size, 1
%cmp47 = icmp eq i64 %div, 0
br i1 %cmp47, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader:
br label %for.body
for.cond.cleanup.loopexit:
br label %for.cond.cleanup
for.cond.cleanup:
ret void
for.body:
%offset.048 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%0 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 0
%1 = load float, float* %0, align 4
%imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 1
%2 = load float, float* %imaginary_.i.i, align 4
%add = add nuw i64 %offset.048, %div
%3 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 0
%4 = load float, float* %3, align 4
%imaginary_.i.i28 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 1
%5 = load float, float* %imaginary_.i.i28, align 4
%add.i = fadd fast float %4, %1
%add4.i = fadd fast float %5, %2
store float %add.i, float* %0, align 4
store float %add4.i, float* %imaginary_.i.i, align 4
%sub.i = fsub fast float %1, %4
%sub4.i = fsub fast float %2, %5
store float %sub.i, float* %3, align 4
store float %sub4.i, float* %imaginary_.i.i28, align 4
%inc = add nuw nsw i64 %offset.048, 1
%exitcond = icmp eq i64 %inc, %div
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}

View File

@@ -1,60 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; We give up analyzing the dependences in this loop due to non-constant
; distance between A[i+offset] and A[i] and add memchecks to prove
; independence. Make sure that no dependences are reported in
; this case.
;
; for (i = 0; i < n; i++)
; A[i + offset] = A[i] * B[i] * C[i];
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; CHECK: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: 0:
; CHECK-NEXT: Comparing group
; CHECK-NEXT: %arrayidxA2 = getelementptr inbounds i16, i16* %a, i64 %idx
; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %indvar
@B = common global i16* null, align 8
@A = common global i16* null, align 8
@C = common global i16* null, align 8
define void @f(i64 %offset) {
entry:
%a = load i16*, i16** @A, align 8
%b = load i16*, i16** @B, align 8
%c = load i16*, i16** @C, align 8
br label %for.body
for.body: ; preds = %for.body, %entry
%indvar = phi i64 [ 0, %entry ], [ %add, %for.body ]
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %indvar
%loadA = load i16, i16* %arrayidxA, align 2
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %indvar
%loadB = load i16, i16* %arrayidxB, align 2
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %indvar
%loadC = load i16, i16* %arrayidxC, align 2
%mul = mul i16 %loadB, %loadA
%mul1 = mul i16 %mul, %loadC
%idx = add i64 %indvar, %offset
%arrayidxA2 = getelementptr inbounds i16, i16* %a, i64 %idx
store i16 %mul1, i16* %arrayidxA2, align 2
%add = add nuw nsw i64 %indvar, 1
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,90 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; The runtime memory check code and the access grouping
; algorithm both assume that the start and end values
; for an access range are ordered (start <= stop).
; When generating checks for accesses with negative stride
; we need to take this into account and swap the interval
; ends.
;
; for (i = 0; i < 10000; i++) {
; B[i] = A[15000 - i] * 3;
; }
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
; CHECK: function 'f':
; CHECK: (Low: (20000 + %a) High: (60004 + %a))
@B = common global i32* null, align 8
@A = common global i32* null, align 8
define void @f() {
entry:
%a = load i32*, i32** @A, align 8
%b = load i32*, i32** @B, align 8
br label %for.body
for.body: ; preds = %for.body, %entry
%idx = phi i64 [ 0, %entry ], [ %add, %for.body ]
%negidx = sub i64 15000, %idx
%arrayidxA0 = getelementptr inbounds i32, i32* %a, i64 %negidx
%loadA0 = load i32, i32* %arrayidxA0, align 2
%res = mul i32 %loadA0, 3
%add = add nuw nsw i64 %idx, 1
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %idx
store i32 %res, i32* %arrayidxB, align 2
%exitcond = icmp eq i64 %idx, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; CHECK: function 'g':
; When the stride is not constant, we are forced to do umin/umax to get
; the interval limits.
; for (i = 0; i < 10000; i++) {
; B[i] = A[15000 - step * i] * 3;
; }
; Here it is not obvious what the limits are, since 'step' could be negative.
; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
define void @g(i64 %step) {
entry:
%a = load i32*, i32** @A, align 8
%b = load i32*, i32** @B, align 8
br label %for.body
for.body: ; preds = %for.body, %entry
%idx = phi i64 [ 0, %entry ], [ %add, %for.body ]
%idx_mul = mul i64 %idx, %step
%negidx = sub i64 15000, %idx_mul
%arrayidxA0 = getelementptr inbounds i32, i32* %a, i64 %negidx
%loadA0 = load i32, i32* %arrayidxA0, align 2
%res = mul i32 %loadA0, 3
%add = add nuw nsw i64 %idx, 1
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %idx
store i32 %res, i32* %arrayidxB, align 2
%exitcond = icmp eq i64 %idx, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,52 +0,0 @@
; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<aa>,require<scalar-evolution>,require<aa>,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s 2>&1 | FileCheck %s
; If the arrays don't alias this loop is safe with no memchecks:
; for (i = 0; i < n; i++)
; A[i] = A[i+1] * B[i] * C[i];
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; Check the loop-carried forward anti-dep between the load of A[i+1] and the
; store of A[i];
; CHECK: Memory dependences are safe{{$}}
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Forward:
; CHECK-NEXT: %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2 ->
; CHECK-NEXT: store i16 %mul1, i16* %arrayidxA, align 2
define void @f(i16* noalias %a,
i16* noalias %b,
i16* noalias %c) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%add = add nuw nsw i64 %ind, 1
%arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add
%loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
%loadB = load i16, i16* %arrayidxB, align 2
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
%loadC = load i16, i16* %arrayidxC, align 2
%mul = mul i16 %loadB, %loadA_plus_2
%mul1 = mul i16 %mul, %loadC
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
store i16 %mul1, i16* %arrayidxA, align 2
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,38 +0,0 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; Analyze this loop:
; for (i = 0; i < n; i++)
; A[i + 4] = A[i] * 2;
; CHECK: Memory dependences are safe with a maximum dependence distance of 8 bytes
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
@A = common global i16* null, align 8
define void @f() {
entry:
%a = load i16*, i16** @A, align 8
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
%loadA = load i16, i16* %arrayidxA, align 2
%mul = mul i16 %loadA, 2
%next = add nuw nsw i64 %ind, 4
%arrayidxA_next = getelementptr inbounds i16, i16* %a, i64 %next
store i16 %mul, i16* %arrayidxA_next, align 2
%add = add nuw nsw i64 %ind, 1
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@@ -1,63 +0,0 @@
; RUN: opt < %s -loop-accesses -analyze | FileCheck -check-prefix=OLDPM %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck -check-prefix=NEWPM %s
; Test to confirm LAA will find store to invariant address.
; Inner loop has a store to invariant address.
;
; for(; i < itr; i++) {
; for(; j < itr; j++) {
; var1[i] = var2[j] + var1[i];
; }
; }
; The LAA with the new PM is a loop pass so we go from inner to outer loops.
; OLDPM: for.cond1.preheader:
; OLDPM: Store to invariant address was not found in loop.
; OLDPM: for.body3:
; OLDPM: Store to invariant address was found in loop.
; NEWPM: for.body3:
; NEWPM: Store to invariant address was found in loop.
; NEWPM: for.cond1.preheader:
; NEWPM: Store to invariant address was not found in loop.
define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
entry:
%cmp20 = icmp eq i32 %itr, 0
br i1 %cmp20, label %for.end10, label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.inc8
%indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
%j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
%cmp218 = icmp ult i32 %j.022, %itr
br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
for.body3.lr.ph: ; preds = %for.cond1.preheader
%arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
%0 = zext i32 %j.022 to i64
br label %for.body3
for.body3: ; preds = %for.body3, %for.body3.lr.ph
%indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
%arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
%1 = load i32, i32* %arrayidx, align 4
%2 = load i32, i32* %arrayidx5, align 4
%add = add nsw i32 %2, %1
store i32 %add, i32* %arrayidx5, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %itr
br i1 %exitcond, label %for.inc8, label %for.body3
for.inc8: ; preds = %for.body3, %for.cond1.preheader
%j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
%exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
for.end10: ; preds = %for.inc8, %entry
ret i32 undef
}

View File

@@ -1,55 +0,0 @@
; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
; Test to confirm LAA will not find store to invariant address.
; Inner loop has no store to invariant address.
;
; for(; i < itr; i++) {
; for(; j < itr; j++) {
; var2[j] = var2[j] + var1[i];
; }
; }
; CHECK: Store to invariant address was not found in loop.
; CHECK-NOT: Store to invariant address was found in loop.
define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
entry:
%cmp20 = icmp eq i32 %itr, 0
br i1 %cmp20, label %for.end10, label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.inc8
%indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
%j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
%cmp218 = icmp ult i32 %j.022, %itr
br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
for.body3.lr.ph: ; preds = %for.cond1.preheader
%arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
%0 = zext i32 %j.022 to i64
br label %for.body3
for.body3: ; preds = %for.body3, %for.body3.lr.ph
%indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
%arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
%1 = load i32, i32* %arrayidx, align 4
%2 = load i32, i32* %arrayidx5, align 4
%add = add nsw i32 %2, %1
store i32 %add, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %itr
br i1 %exitcond, label %for.inc8, label %for.body3
for.inc8: ; preds = %for.body3, %for.cond1.preheader
%j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
%exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
for.end10: ; preds = %for.inc8, %entry
ret i32 undef
}

Some files were not shown because too many files have changed in this diff Show More