You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,3 +0,0 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,127 +0,0 @@
|
||||
; RUN: opt -loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
|
||||
|
||||
; Mostly copied from x86 version.
|
||||
|
||||
;To recognize this pattern:
|
||||
;int popcount(unsigned long long a) {
|
||||
; int c = 0;
|
||||
; while (a) {
|
||||
; c++;
|
||||
; a &= a - 1;
|
||||
; }
|
||||
; return c;
|
||||
;}
|
||||
;
|
||||
|
||||
; CHECK-LABEL: @popcount_i64
|
||||
; CHECK: entry
|
||||
; CHECK: llvm.ctpop.i64
|
||||
; CHECK: ret
|
||||
define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool3 = icmp eq i64 %a, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.05, 1
|
||||
%sub = add i64 %a.addr.04, -1
|
||||
%and = and i64 %sub, %a.addr.04
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @popcount_i32
|
||||
; CHECK: entry
|
||||
; CHECK: llvm.ctpop.i32
|
||||
; CHECK: ret
|
||||
define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool3 = icmp eq i32 %a, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.05, 1
|
||||
%sub = add i32 %a.addr.04, -1
|
||||
%and = and i32 %sub, %a.addr.04
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @popcount_i128
|
||||
; CHECK: entry
|
||||
; CHECK: llvm.ctpop.i128
|
||||
; CHECK: ret
|
||||
define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool3 = icmp eq i128 %a, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.05, 1
|
||||
%sub = add i128 %a.addr.04, -1
|
||||
%and = and i128 %sub, %a.addr.04
|
||||
%tobool = icmp eq i128 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
}
|
||||
|
||||
; To recognize this pattern:
|
||||
;int popcount(unsigned long long a, int mydata1, int mydata2) {
|
||||
; int c = 0;
|
||||
; while (a) {
|
||||
; c++;
|
||||
; a &= a - 1;
|
||||
; mydata1 *= c;
|
||||
; mydata2 *= (int)a;
|
||||
; }
|
||||
; return c + mydata1 + mydata2;
|
||||
;}
|
||||
|
||||
; CHECK-LABEL: @popcount2
|
||||
; CHECK: entry
|
||||
; CHECK: llvm.ctpop.i64
|
||||
; CHECK: ret
|
||||
define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool9 = icmp eq i64 %a, 0
|
||||
br i1 %tobool9, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
|
||||
%mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
|
||||
%a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.013, 1
|
||||
%sub = add i64 %a.addr.010, -1
|
||||
%and = and i64 %sub, %a.addr.010
|
||||
%mul = mul nsw i32 %inc, %mydata1.addr.011
|
||||
%conv = trunc i64 %and to i32
|
||||
%mul1 = mul nsw i32 %conv, %mydata2.addr.012
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
%mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
|
||||
%mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
|
||||
%add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
|
||||
%add2 = add i32 %add, %c.0.lcssa
|
||||
ret i32 %add2
|
||||
}
|
185
external/llvm/test/Transforms/LoopIdiom/ARM/ctlz.ll
vendored
185
external/llvm/test/Transforms/LoopIdiom/ARM/ctlz.ll
vendored
@ -1,185 +0,0 @@
|
||||
; RUN: opt -loop-idiom -mtriple=armv7a < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s
|
||||
; RUN: opt -loop-idiom -mtriple=armv4t < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here we'll just convert loop to countable,
|
||||
; so do not insert builtin if CPU do not support CTLZ
|
||||
;
|
||||
; int ctlz_and_other(int n, char *a)
|
||||
; {
|
||||
; int i = 0, n0 = n;
|
||||
; while(n >>= 1) {
|
||||
; a[i] = (n0 & (1 << i)) ? 1 : 0;
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; LZCNT: entry
|
||||
; LZCNT: %0 = call i32 @llvm.ctlz.i32(i32 %shr8, i1 true)
|
||||
; LZCNT-NEXT: %1 = sub i32 32, %0
|
||||
; LZCNT-NEXT: %2 = zext i32 %1 to i64
|
||||
; LZCNT: %indvars.iv.next.lcssa = phi i64 [ %2, %while.body ]
|
||||
; LZCNT: %4 = trunc i64 %indvars.iv.next.lcssa to i32
|
||||
; LZCNT: %i.0.lcssa = phi i32 [ 0, %entry ], [ %4, %while.end.loopexit ]
|
||||
; LZCNT: ret i32 %i.0.lcssa
|
||||
|
||||
; NOLZCNT: entry
|
||||
; NOLZCNT-NOT: @llvm.ctlz
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
|
||||
entry:
|
||||
%shr8 = ashr i32 %n, 1
|
||||
%tobool9 = icmp eq i32 %shr8, 0
|
||||
br i1 %tobool9, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
|
||||
%shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
%shl = shl i32 1, %0
|
||||
%and = and i32 %shl, %n
|
||||
%tobool1 = icmp ne i32 %and, 0
|
||||
%conv = zext i1 %tobool1 to i8
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
|
||||
store i8 %conv, i8* %arrayidx, align 1
|
||||
%indvars.iv.next = add nuw i64 %indvars.iv, 1
|
||||
%shr = ashr i32 %shr11, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
br i1 %tobool, label %while.end.loopexit, label %while.body
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
%1 = trunc i64 %indvars.iv.next to i32
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
|
||||
ret i32 %i.0.lcssa
|
||||
}
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here it will replace the loop -
|
||||
; assume builtin is always profitable.
|
||||
;
|
||||
; int ctlz_zero_check(int n)
|
||||
; {
|
||||
; int i = 0;
|
||||
; while(n) {
|
||||
; n >>= 1;
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; ALL: entry
|
||||
; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
|
||||
; ALL-NEXT: %1 = sub i32 32, %0
|
||||
; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
|
||||
; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
|
||||
; ALL: ret i32 %i.0.lcssa
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @ctlz_zero_check(i32 %n) {
|
||||
entry:
|
||||
%tobool4 = icmp eq i32 %n, 0
|
||||
br i1 %tobool4, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
|
||||
%n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
|
||||
%shr = ashr i32 %n.addr.05, 1
|
||||
%inc = add nsw i32 %i.06, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
br i1 %tobool, label %while.end.loopexit, label %while.body
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
|
||||
ret i32 %i.0.lcssa
|
||||
}
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here it will replace the loop -
|
||||
; assume builtin is always profitable.
|
||||
;
|
||||
; int ctlz(int n)
|
||||
; {
|
||||
; int i = 0;
|
||||
; while(n >>= 1) {
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; ALL: entry
|
||||
; ALL: %0 = ashr i32 %n, 1
|
||||
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
|
||||
; ALL-NEXT: %2 = sub i32 32, %1
|
||||
; ALL-NEXT: %3 = add i32 %2, 1
|
||||
; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
|
||||
; ALL: ret i32 %i.0.lcssa
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @ctlz(i32 %n) {
|
||||
entry:
|
||||
br label %while.cond
|
||||
|
||||
while.cond: ; preds = %while.cond, %entry
|
||||
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
|
||||
%shr = ashr i32 %n.addr.0, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br i1 %tobool, label %while.end, label %while.cond
|
||||
|
||||
while.end: ; preds = %while.cond
|
||||
ret i32 %i.0
|
||||
}
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here it will replace the loop -
|
||||
; assume builtin is always profitable.
|
||||
;
|
||||
; int ctlz_add(int n, int i0)
|
||||
; {
|
||||
; int i = i0;
|
||||
; while(n >>= 1) {
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; ALL: entry
|
||||
; ALL: %0 = ashr i32 %n, 1
|
||||
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
|
||||
; ALL-NEXT: %2 = sub i32 32, %1
|
||||
; ALL-NEXT: %3 = add i32 %2, 1
|
||||
; ALL-NEXT: %4 = add i32 %2, %i0
|
||||
; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
|
||||
; ALL: ret i32 %i.0.lcssa
|
||||
;
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @ctlz_add(i32 %n, i32 %i0) {
|
||||
entry:
|
||||
br label %while.cond
|
||||
|
||||
while.cond: ; preds = %while.cond, %entry
|
||||
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
|
||||
%i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
|
||||
%shr = ashr i32 %n.addr.0, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br i1 %tobool, label %while.end, label %while.cond
|
||||
|
||||
while.end: ; preds = %while.cond
|
||||
ret i32 %i.0
|
||||
}
|
185
external/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
vendored
185
external/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
vendored
@ -1,185 +0,0 @@
|
||||
; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s
|
||||
; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here we'll just convert loop to countable,
|
||||
; so do not insert builtin if CPU do not support CTLZ
|
||||
;
|
||||
; int ctlz_and_other(int n, char *a)
|
||||
; {
|
||||
; int i = 0, n0 = n;
|
||||
; while(n >>= 1) {
|
||||
; a[i] = (n0 & (1 << i)) ? 1 : 0;
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; LZCNT: entry
|
||||
; LZCNT: %0 = call i32 @llvm.ctlz.i32(i32 %shr8, i1 true)
|
||||
; LZCNT-NEXT: %1 = sub i32 32, %0
|
||||
; LZCNT-NEXT: %2 = zext i32 %1 to i64
|
||||
; LZCNT: %indvars.iv.next.lcssa = phi i64 [ %2, %while.body ]
|
||||
; LZCNT: %4 = trunc i64 %indvars.iv.next.lcssa to i32
|
||||
; LZCNT: %i.0.lcssa = phi i32 [ 0, %entry ], [ %4, %while.end.loopexit ]
|
||||
; LZCNT: ret i32 %i.0.lcssa
|
||||
|
||||
; NOLZCNT: entry
|
||||
; NOLZCNT-NOT: @llvm.ctlz
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
|
||||
entry:
|
||||
%shr8 = ashr i32 %n, 1
|
||||
%tobool9 = icmp eq i32 %shr8, 0
|
||||
br i1 %tobool9, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
|
||||
%shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
%shl = shl i32 1, %0
|
||||
%and = and i32 %shl, %n
|
||||
%tobool1 = icmp ne i32 %and, 0
|
||||
%conv = zext i1 %tobool1 to i8
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
|
||||
store i8 %conv, i8* %arrayidx, align 1
|
||||
%indvars.iv.next = add nuw i64 %indvars.iv, 1
|
||||
%shr = ashr i32 %shr11, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
br i1 %tobool, label %while.end.loopexit, label %while.body
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
%1 = trunc i64 %indvars.iv.next to i32
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
|
||||
ret i32 %i.0.lcssa
|
||||
}
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here it will replace the loop -
|
||||
; assume builtin is always profitable.
|
||||
;
|
||||
; int ctlz_zero_check(int n)
|
||||
; {
|
||||
; int i = 0;
|
||||
; while(n) {
|
||||
; n >>= 1;
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; ALL: entry
|
||||
; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
|
||||
; ALL-NEXT: %1 = sub i32 32, %0
|
||||
; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
|
||||
; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
|
||||
; ALL: ret i32 %i.0.lcssa
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @ctlz_zero_check(i32 %n) {
|
||||
entry:
|
||||
%tobool4 = icmp eq i32 %n, 0
|
||||
br i1 %tobool4, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
|
||||
%n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
|
||||
%shr = ashr i32 %n.addr.05, 1
|
||||
%inc = add nsw i32 %i.06, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
br i1 %tobool, label %while.end.loopexit, label %while.body
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
|
||||
ret i32 %i.0.lcssa
|
||||
}
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here it will replace the loop -
|
||||
; assume builtin is always profitable.
|
||||
;
|
||||
; int ctlz(int n)
|
||||
; {
|
||||
; int i = 0;
|
||||
; while(n >>= 1) {
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; ALL: entry
|
||||
; ALL: %0 = ashr i32 %n, 1
|
||||
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
|
||||
; ALL-NEXT: %2 = sub i32 32, %1
|
||||
; ALL-NEXT: %3 = add i32 %2, 1
|
||||
; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
|
||||
; ALL: ret i32 %i.0.lcssa
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @ctlz(i32 %n) {
|
||||
entry:
|
||||
br label %while.cond
|
||||
|
||||
while.cond: ; preds = %while.cond, %entry
|
||||
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
|
||||
%shr = ashr i32 %n.addr.0, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br i1 %tobool, label %while.end, label %while.cond
|
||||
|
||||
while.end: ; preds = %while.cond
|
||||
ret i32 %i.0
|
||||
}
|
||||
|
||||
; Recognize CTLZ builtin pattern.
|
||||
; Here it will replace the loop -
|
||||
; assume builtin is always profitable.
|
||||
;
|
||||
; int ctlz_add(int n, int i0)
|
||||
; {
|
||||
; int i = i0;
|
||||
; while(n >>= 1) {
|
||||
; i++;
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
;
|
||||
; ALL: entry
|
||||
; ALL: %0 = ashr i32 %n, 1
|
||||
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
|
||||
; ALL-NEXT: %2 = sub i32 32, %1
|
||||
; ALL-NEXT: %3 = add i32 %2, 1
|
||||
; ALL-NEXT: %4 = add i32 %2, %i0
|
||||
; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
|
||||
; ALL: ret i32 %i.0.lcssa
|
||||
;
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @ctlz_add(i32 %n, i32 %i0) {
|
||||
entry:
|
||||
br label %while.cond
|
||||
|
||||
while.cond: ; preds = %while.cond, %entry
|
||||
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
|
||||
%i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
|
||||
%shr = ashr i32 %n.addr.0, 1
|
||||
%tobool = icmp eq i32 %shr, 0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br i1 %tobool, label %while.end, label %while.cond
|
||||
|
||||
while.end: ; preds = %while.cond
|
||||
ret i32 %i.0
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
if not 'X86' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,140 +0,0 @@
|
||||
; RUN: opt -loop-idiom < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -S | FileCheck %s
|
||||
|
||||
;To recognize this pattern:
|
||||
;int popcount(unsigned long long a) {
|
||||
; int c = 0;
|
||||
; while (a) {
|
||||
; c++;
|
||||
; a &= a - 1;
|
||||
; }
|
||||
; return c;
|
||||
;}
|
||||
;
|
||||
; CHECK: entry
|
||||
; CHECK: llvm.ctpop.i64
|
||||
; CHECK: ret
|
||||
define i32 @popcount(i64 %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool3 = icmp eq i64 %a, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.05, 1
|
||||
%sub = add i64 %a.addr.04, -1
|
||||
%and = and i64 %sub, %a.addr.04
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
}
|
||||
|
||||
; To recognize this pattern:
|
||||
;int popcount(unsigned long long a, int mydata1, int mydata2) {
|
||||
; int c = 0;
|
||||
; while (a) {
|
||||
; c++;
|
||||
; a &= a - 1;
|
||||
; mydata1 *= c;
|
||||
; mydata2 *= (int)a;
|
||||
; }
|
||||
; return c + mydata1 + mydata2;
|
||||
;}
|
||||
; CHECK: entry
|
||||
; CHECK: llvm.ctpop.i64
|
||||
; CHECK: ret
|
||||
define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool9 = icmp eq i64 %a, 0
|
||||
br i1 %tobool9, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
|
||||
%mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
|
||||
%a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.013, 1
|
||||
%sub = add i64 %a.addr.010, -1
|
||||
%and = and i64 %sub, %a.addr.010
|
||||
%mul = mul nsw i32 %inc, %mydata1.addr.011
|
||||
%conv = trunc i64 %and to i32
|
||||
%mul1 = mul nsw i32 %conv, %mydata2.addr.012
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
%mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
|
||||
%mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
|
||||
%add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
|
||||
%add2 = add i32 %add, %c.0.lcssa
|
||||
ret i32 %add2
|
||||
}
|
||||
|
||||
; Some variants once cause crash
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
define i32 @PopCntCrash1(i64 %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool3 = icmp eq i64 %a, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%t = add i32 %c.05, %c.05
|
||||
%inc = add nsw i32 %t, 1
|
||||
%sub = add i64 %a.addr.04, -1
|
||||
%and = and i64 %sub, %a.addr.04
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
|
||||
; CHECK: entry
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define i32 @PopCntCrash2(i64 %a, i32 %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%tobool3 = icmp eq i64 %a, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ %b, %entry ]
|
||||
%a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.05, 1
|
||||
%sub = add i64 %a.addr.04, -1
|
||||
%and = and i64 %sub, %a.addr.04
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
}
|
||||
|
||||
define i32 @PopCntCrash3(i64 %a, i32 %x) {
|
||||
entry:
|
||||
%tobool3 = icmp eq i64 %a, 0
|
||||
%cmp = icmp eq i32 %x, 0
|
||||
br i1 %tobool3, label %while.end, label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
||||
%a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
|
||||
%inc = add nsw i32 %c.05, 1
|
||||
%sub = add i64 %a.addr.04, -1
|
||||
%and = and i64 %sub, %a.addr.04
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
br i1 %cmp, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
ret i32 %c.0.lcssa
|
||||
}
|
@ -1,456 +0,0 @@
|
||||
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;; memcpy.atomic formation (atomic load & store)
|
||||
define void @test1(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load atomic i8, i8* %I.0.014 unordered, align 1
|
||||
store atomic i8 %V, i8* %DestI unordered, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation (atomic store, normal load)
|
||||
define void @test2(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load i8, i8* %I.0.014, align 1
|
||||
store atomic i8 %V, i8* %DestI unordered, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (atomic store, normal load w/ no align)
|
||||
define void @test2b(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test2b(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load i8, i8* %I.0.014
|
||||
store atomic i8 %V, i8* %DestI unordered, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
|
||||
define void @test2c(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test2c(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i32, i32 10000
|
||||
%Dest = alloca i32, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
||||
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
||||
%V = load i32, i32* %I.0.014, align 2
|
||||
store atomic i32 %V, i32* %DestI unordered, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
|
||||
define void @test2d(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test2d(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i32, i32 10000
|
||||
%Dest = alloca i32, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
||||
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
||||
%V = load i32, i32* %I.0.014, align 4
|
||||
store atomic i32 %V, i32* %DestI unordered, align 2
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;; memcpy.atomic formation (normal store, atomic load)
|
||||
define void @test3(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test3(
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load atomic i8, i8* %I.0.014 unordered, align 1
|
||||
store i8 %V, i8* %DestI, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
|
||||
define void @test3b(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test3b(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load atomic i8, i8* %I.0.014 unordered, align 1
|
||||
store i8 %V, i8* %DestI
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
|
||||
define void @test3c(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test3c(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i32, i32 10000
|
||||
%Dest = alloca i32, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
||||
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
||||
%V = load atomic i32, i32* %I.0.014 unordered, align 2
|
||||
store i32 %V, i32* %DestI, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
|
||||
define void @test3d(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test3d(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i32, i32 10000
|
||||
%Dest = alloca i32, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
||||
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
||||
%V = load atomic i32, i32* %I.0.014 unordered, align 4
|
||||
store i32 %V, i32* %DestI, align 2
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
|
||||
define void @test4(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test4(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load atomic i8, i8* %I.0.014 unordered, align 1
|
||||
store atomic i8 %V, i8* %DestI monotonic, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
|
||||
define void @test5(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test5(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i8, i32 10000
|
||||
%Dest = alloca i8, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
||||
%V = load atomic i8, i8* %I.0.014 monotonic, align 1
|
||||
store atomic i8 %V, i8* %DestI unordered, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation (atomic load & store) -- element size 2
|
||||
define void @test6(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test6(
|
||||
; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 1
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i16, i32 10000
|
||||
%Dest = alloca i16, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
|
||||
%DestI = getelementptr i16, i16* %Dest, i64 %indvar
|
||||
%V = load atomic i16, i16* %I.0.014 unordered, align 2
|
||||
store atomic i16 %V, i16* %DestI unordered, align 2
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation (atomic load & store) -- element size 4
|
||||
define void @test7(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test7(
|
||||
; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 2
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i32, i32 10000
|
||||
%Dest = alloca i32, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
||||
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
||||
%V = load atomic i32, i32* %I.0.014 unordered, align 4
|
||||
store atomic i32 %V, i32* %DestI unordered, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation (atomic load & store) -- element size 8
|
||||
define void @test8(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test8(
|
||||
; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 3
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i64, i32 10000
|
||||
%Dest = alloca i64, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i64, i64* %Base, i64 %indvar
|
||||
%DestI = getelementptr i64, i64* %Dest, i64 %indvar
|
||||
%V = load atomic i64, i64* %I.0.014 unordered, align 8
|
||||
store atomic i64 %V, i64* %DestI unordered, align 8
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
|
||||
define void @test9(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test9(
|
||||
; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 4
|
||||
; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i128, i32 10000
|
||||
%Dest = alloca i128, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i128, i128* %Base, i64 %indvar
|
||||
%DestI = getelementptr i128, i128* %Dest, i64 %indvar
|
||||
%V = load atomic i128, i128* %I.0.014 unordered, align 16
|
||||
store atomic i128 %V, i128* %DestI unordered, align 16
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
|
||||
define void @test10(i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test10(
|
||||
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph:
|
||||
%Base = alloca i256, i32 10000
|
||||
%Dest = alloca i256, i32 10000
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i256, i256* %Base, i64 %indvar
|
||||
%DestI = getelementptr i256, i256* %Dest, i64 %indvar
|
||||
%V = load atomic i256, i256* %I.0.014 unordered, align 32
|
||||
store atomic i256 %V, i256* %DestI unordered, align 32
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
; Make sure that atomic memset doesn't get recognized by mistake
|
||||
define void @test_nomemset(i8* %Base, i64 %Size) nounwind ssp {
|
||||
; CHECK-LABEL: @test_nomemset(
|
||||
; CHECK-NOT: call void @llvm.memset
|
||||
; CHECK: store
|
||||
; CHECK: ret void
|
||||
bb.nph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
||||
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
||||
store atomic i8 0, i8* %I.0.014 unordered, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Verify that unordered memset_pattern isn't recognized.
|
||||
; This is a replica of test11_pattern from basic.ll
|
||||
define void @test_nomemset_pattern(i32* nocapture %P) nounwind ssp {
|
||||
; CHECK-LABEL: @test_nomemset_pattern(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NOT: bitcast
|
||||
; CHECK-NOT: memset_pattern
|
||||
; CHECK: store atomic
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||
%arrayidx = getelementptr i32, i32* %P, i64 %indvar
|
||||
store atomic i32 1, i32* %arrayidx unordered, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
@ -1,91 +0,0 @@
|
||||
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-p1:64:64:64-p2:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
; Two dimensional nested loop should be promoted to one big memset.
|
||||
define void @test10(i8 addrspace(2)* %X) nounwind ssp {
|
||||
; CHECK-LABEL: @test10(
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
|
||||
entry:
|
||||
br label %bb.nph
|
||||
|
||||
bb.nph: ; preds = %entry, %for.inc10
|
||||
%i.04 = phi i16 [ 0, %entry ], [ %inc12, %for.inc10 ]
|
||||
br label %for.body5
|
||||
|
||||
for.body5: ; preds = %for.body5, %bb.nph
|
||||
%j.02 = phi i16 [ 0, %bb.nph ], [ %inc, %for.body5 ]
|
||||
%mul = mul nsw i16 %i.04, 100
|
||||
%add = add nsw i16 %j.02, %mul
|
||||
%arrayidx = getelementptr inbounds i8, i8 addrspace(2)* %X, i16 %add
|
||||
store i8 0, i8 addrspace(2)* %arrayidx, align 1
|
||||
%inc = add nsw i16 %j.02, 1
|
||||
%cmp4 = icmp eq i16 %inc, 100
|
||||
br i1 %cmp4, label %for.inc10, label %for.body5
|
||||
|
||||
for.inc10: ; preds = %for.body5
|
||||
%inc12 = add nsw i16 %i.04, 1
|
||||
%cmp = icmp eq i16 %inc12, 100
|
||||
br i1 %cmp, label %for.end13, label %bb.nph
|
||||
|
||||
for.end13: ; preds = %for.inc10
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test11_pattern(i32 addrspace(2)* nocapture %P) nounwind ssp {
|
||||
; CHECK-LABEL: @test11_pattern(
|
||||
; CHECK-NOT: memset_pattern
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||
%arrayidx = getelementptr i32, i32 addrspace(2)* %P, i64 %indvar
|
||||
store i32 1, i32 addrspace(2)* %arrayidx, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; PR9815 - This is a partial overlap case that cannot be safely transformed
|
||||
; into a memcpy.
|
||||
@g_50 = addrspace(2) global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
|
||||
|
||||
|
||||
define i32 @test14() nounwind {
|
||||
; CHECK-LABEL: @test14(
|
||||
; CHECK: for.body:
|
||||
; CHECK: load i32
|
||||
; CHECK: store i32
|
||||
; CHECK: br i1 %cmp
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.inc, %for.body.lr.ph
|
||||
%tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||
%add = add nsw i32 %tmp5, 4
|
||||
%idxprom = sext i32 %add to i64
|
||||
%arrayidx = getelementptr inbounds [7 x i32], [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom
|
||||
%tmp2 = load i32, i32 addrspace(2)* %arrayidx, align 4
|
||||
%add4 = add nsw i32 %tmp5, 5
|
||||
%idxprom5 = sext i32 %add4 to i64
|
||||
%arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom5
|
||||
store i32 %tmp2, i32 addrspace(2)* %arrayidx6, align 4
|
||||
%inc = add nsw i32 %tmp5, 1
|
||||
%cmp = icmp slt i32 %inc, 2
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.inc
|
||||
%tmp8 = load i32, i32 addrspace(2)* getelementptr inbounds ([7 x i32], [7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
|
||||
ret i32 %tmp8
|
||||
}
|
||||
|
637
external/llvm/test/Transforms/LoopIdiom/basic.ll
vendored
637
external/llvm/test/Transforms/LoopIdiom/basic.ll
vendored
File diff suppressed because it is too large
Load Diff
25
external/llvm/test/Transforms/LoopIdiom/crash.ll
vendored
25
external/llvm/test/Transforms/LoopIdiom/crash.ll
vendored
@ -1,25 +0,0 @@
|
||||
; RUN: opt -basicaa -loop-idiom -S < %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; Don't crash inside DependenceAnalysis
|
||||
; PR14219
|
||||
define void @test1(i64* %iwork, i64 %x) {
|
||||
bb0:
|
||||
%mul116 = mul nsw i64 %x, %x
|
||||
%incdec.ptr6.sum175 = add i64 42, %x
|
||||
%arrayidx135 = getelementptr inbounds i64, i64* %iwork, i64 %incdec.ptr6.sum175
|
||||
br label %bb1
|
||||
bb1:
|
||||
%storemerge4226 = phi i64 [ 0, %bb0 ], [ %inc139, %bb1 ]
|
||||
store i64 1, i64* %arrayidx135, align 8
|
||||
%incdec.ptr6.sum176 = add i64 %mul116, %storemerge4226
|
||||
%arrayidx137 = getelementptr inbounds i64, i64* %iwork, i64 %incdec.ptr6.sum176
|
||||
store i64 1, i64* %arrayidx137, align 8
|
||||
%inc139 = add nsw i64 %storemerge4226, 1
|
||||
%cmp131 = icmp sgt i64 %storemerge4226, 42
|
||||
br i1 %cmp131, label %bb2, label %bb1
|
||||
bb2:
|
||||
ret void
|
||||
}
|
||||
|
@ -1,34 +0,0 @@
|
||||
; RUN: opt -loop-idiom -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-apple-ios8.0.0"
|
||||
|
||||
; When we replace the precondition with a ctpop, we need to ensure
|
||||
; that only the first branch reads the ctpop. The store prior
|
||||
; to that should continue to read from the original compare.
|
||||
|
||||
; CHECK: %tobool.5 = icmp ne i32 %num, 0
|
||||
; CHECK: store i1 %tobool.5, i1* %ptr
|
||||
|
||||
define internal fastcc i32 @num_bits_set(i32 %num, i1* %ptr) #1 {
|
||||
entry:
|
||||
%tobool.5 = icmp ne i32 %num, 0
|
||||
store i1 %tobool.5, i1* %ptr
|
||||
br i1 %tobool.5, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %for.body
|
||||
%count.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
||||
%num.addr.06 = phi i32 [ %num, %for.body.lr.ph ], [ %and, %for.body ]
|
||||
%sub = add i32 %num.addr.06, -1
|
||||
%and = and i32 %sub, %num.addr.06
|
||||
%inc = add nsw i32 %count.07, 1
|
||||
%tobool = icmp ne i32 %and, 0
|
||||
br i1 %tobool, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
||||
%count.0.lcssa = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||
ret i32 %count.0.lcssa
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
; RUN: opt -loop-idiom < %s -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
|
||||
define void @foo(double* nocapture %a) nounwind ssp !dbg !0 {
|
||||
entry:
|
||||
tail call void @llvm.dbg.value(metadata double* %a, metadata !5, metadata !DIExpression()), !dbg !8
|
||||
tail call void @llvm.dbg.value(metadata i32 0, metadata !10, metadata !DIExpression()), !dbg !14
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||
%arrayidx = getelementptr double, double* %a, i64 %indvar
|
||||
; CHECK: call void @llvm.memset{{.+}} !dbg
|
||||
store double 0.000000e+00, double* %arrayidx, align 8, !dbg !15
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp ne i64 %indvar.next, 1000
|
||||
br i1 %exitcond, label %for.body, label %for.end, !dbg !14
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
tail call void @llvm.dbg.value(metadata !{null}, metadata !10, metadata !DIExpression()), !dbg !16
|
||||
ret void, !dbg !17
|
||||
}
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
|
||||
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
|
||||
|
||||
!llvm.module.flags = !{!19}
|
||||
!llvm.dbg.cu = !{!2}
|
||||
|
||||
!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3)
|
||||
!1 = !DIFile(filename: "li.c", directory: "/private/tmp")
|
||||
!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: FullDebug, file: !18, enums: !9, retainedTypes: !9)
|
||||
!3 = !DISubroutineType(types: !4)
|
||||
!4 = !{null}
|
||||
!5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
|
||||
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !7)
|
||||
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
|
||||
!8 = !DILocation(line: 2, column: 18, scope: !0)
|
||||
!9 = !{}
|
||||
!10 = !DILocalVariable(name: "i", line: 3, scope: !11, file: !1, type: !13)
|
||||
!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !18, scope: !12)
|
||||
!12 = distinct !DILexicalBlock(line: 2, column: 21, file: !18, scope: !0)
|
||||
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
||||
!14 = !DILocation(line: 3, column: 3, scope: !12)
|
||||
!15 = !DILocation(line: 4, column: 5, scope: !11)
|
||||
!16 = !DILocation(line: 3, column: 29, scope: !11)
|
||||
!17 = !DILocation(line: 5, column: 1, scope: !12)
|
||||
!18 = !DIFile(filename: "li.c", directory: "/private/tmp")
|
||||
!19 = !{i32 1, !"Debug Info Version", i32 3}
|
@ -1,23 +0,0 @@
|
||||
; RUN: opt -S < %s -loop-idiom | FileCheck %s
|
||||
|
||||
declare void @llvm.sideeffect()
|
||||
|
||||
; Loop idiom recognition across a @llvm.sideeffect.
|
||||
|
||||
; CHECK-LABEL: zero
|
||||
; CHECK: llvm.memset
|
||||
define void @zero(float* %p, i64 %n) nounwind {
|
||||
bb7.lr.ph:
|
||||
br label %bb7
|
||||
|
||||
bb7:
|
||||
%i.02 = phi i64 [ 0, %bb7.lr.ph ], [ %tmp13, %bb7 ]
|
||||
%tmp10 = getelementptr inbounds float, float* %p, i64 %i.02
|
||||
store float 0.000000e+00, float* %tmp10, align 4
|
||||
%tmp13 = add i64 %i.02, 1
|
||||
%tmp6 = icmp ult i64 %tmp13, %n
|
||||
br i1 %tmp6, label %bb7, label %bb14
|
||||
|
||||
bb14:
|
||||
ret void
|
||||
}
|
@ -1,182 +0,0 @@
|
||||
; RUN: opt -basicaa -loop-idiom -use-lir-code-size-heurs=true < %s -S | FileCheck %s
|
||||
|
||||
; When compiling for codesize we avoid idiom recognition for a
|
||||
; multi-block loop unless it is one of
|
||||
; - a loop_memset idiom, or
|
||||
; - a memset/memcpy idiom in a nested loop.
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
|
||||
@APPLES = common global i32 0, align 4
|
||||
@ORANGES = common global i32 0, align 4
|
||||
|
||||
; LIR allowed: loop_memset idiom in multi-block loop.
|
||||
; ===================================================
|
||||
; CHECK-LABEL: @LoopMemset
|
||||
; CHECK: for.body.preheader:
|
||||
; CHECK: call void @llvm.memset
|
||||
; CHECK: for.body:
|
||||
;
|
||||
define i32 @LoopMemset([2048 x i8]* noalias nocapture %DST, i32 %SIZE) local_unnamed_addr optsize {
|
||||
entry:
|
||||
%cmp12 = icmp sgt i32 %SIZE, 0
|
||||
br i1 %cmp12, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.inc
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
||||
%BASKET.013 = phi i32 [ %BASKET.1, %for.inc ], [ 0, %for.body.preheader ]
|
||||
%arraydecay = getelementptr inbounds [2048 x i8], [2048 x i8]* %DST, i64 %indvars.iv, i64 0
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 -1, i64 2048, i32 1, i1 false)
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
%rem11 = and i32 %0, 1
|
||||
%cmp1 = icmp eq i32 %rem11, 0
|
||||
%1 = load i32, i32* @ORANGES, align 4
|
||||
%2 = load i32, i32* @APPLES, align 4
|
||||
br i1 %cmp1, label %if.then, label %if.else
|
||||
|
||||
if.else: ; preds = %for.body
|
||||
%dec3 = add nsw i32 %2, -1
|
||||
store i32 %dec3, i32* @APPLES, align 4
|
||||
br label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%dec = add nsw i32 %1, -1
|
||||
store i32 %dec, i32* @ORANGES, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.then, %if.else
|
||||
%.pn = phi i32 [ %2, %if.then ], [ %1, %if.else ]
|
||||
%BASKET.1 = add nsw i32 %.pn, %BASKET.013
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp ne i32 %lftr.wideiv, %SIZE
|
||||
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.inc
|
||||
%BASKET.1.lcssa = phi i32 [ %BASKET.1, %for.inc ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%BASKET.0.lcssa = phi i32 [ 0, %entry ], [ %BASKET.1.lcssa, %for.end.loopexit ]
|
||||
ret i32 %BASKET.0.lcssa
|
||||
}
|
||||
|
||||
; LIR allowed: memset idiom in multi-block nested loop,
|
||||
; which is recognized as a loop_memset in its turn.
|
||||
; =====================================================
|
||||
; CHECK-LABEL: @NestedMemset_LoopMemset
|
||||
; CHECK: for.cond1.preheader.preheader:
|
||||
; CHECK: call void @llvm.memset
|
||||
; CHECK: for.cond1.preheader:
|
||||
;
|
||||
define i32 @NestedMemset_LoopMemset([2046 x i8]* noalias nocapture %DST, i32 %SIZE) local_unnamed_addr optsize {
|
||||
entry:
|
||||
%cmp25 = icmp sgt i32 %SIZE, 0
|
||||
br i1 %cmp25, label %for.cond1.preheader.preheader, label %for.end11
|
||||
|
||||
for.cond1.preheader.preheader: ; preds = %entry
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc9
|
||||
%i.027 = phi i32 [ %inc10, %for.inc9 ], [ 0, %for.cond1.preheader.preheader ]
|
||||
%BASKET.026 = phi i32 [ %BASKET.2.lcssa, %for.inc9 ], [ 0, %for.cond1.preheader.preheader ]
|
||||
%idxprom4 = sext i32 %i.027 to i64
|
||||
%rem22 = and i32 %i.027, 1
|
||||
%cmp6 = icmp eq i32 %rem22, 0
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.cond1.preheader, %for.inc
|
||||
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
|
||||
%BASKET.123 = phi i32 [ %BASKET.026, %for.cond1.preheader ], [ %BASKET.2, %for.inc ]
|
||||
%arrayidx5 = getelementptr inbounds [2046 x i8], [2046 x i8]* %DST, i64 %idxprom4, i64 %indvars.iv
|
||||
store i8 -1, i8* %arrayidx5, align 1
|
||||
%0 = load i32, i32* @APPLES, align 4
|
||||
%1 = load i32, i32* @ORANGES, align 4
|
||||
br i1 %cmp6, label %if.then, label %if.else
|
||||
|
||||
if.else: ; preds = %for.body3
|
||||
%dec8 = add nsw i32 %0, -1
|
||||
store i32 %dec8, i32* @APPLES, align 4
|
||||
br label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body3
|
||||
%dec = add nsw i32 %1, -1
|
||||
store i32 %dec, i32* @ORANGES, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.then, %if.else
|
||||
%.pn = phi i32 [ %0, %if.then ], [ %1, %if.else ]
|
||||
%BASKET.2 = add nsw i32 %.pn, %BASKET.123
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 2046
|
||||
br i1 %exitcond, label %for.body3, label %for.inc9
|
||||
|
||||
for.inc9: ; preds = %for.inc
|
||||
%BASKET.2.lcssa = phi i32 [ %BASKET.2, %for.inc ]
|
||||
%inc10 = add nsw i32 %i.027, 1
|
||||
%cmp = icmp slt i32 %inc10, %SIZE
|
||||
br i1 %cmp, label %for.cond1.preheader, label %for.end11.loopexit
|
||||
|
||||
for.end11.loopexit: ; preds = %for.inc9
|
||||
%BASKET.2.lcssa.lcssa = phi i32 [ %BASKET.2.lcssa, %for.inc9 ]
|
||||
br label %for.end11
|
||||
|
||||
for.end11: ; preds = %for.end11.loopexit, %entry
|
||||
%BASKET.0.lcssa = phi i32 [ 0, %entry ], [ %BASKET.2.lcssa.lcssa, %for.end11.loopexit ]
|
||||
ret i32 %BASKET.0.lcssa
|
||||
}
|
||||
|
||||
; LIR avoided: memset idiom in multi-block top-level loop.
|
||||
; ========================================================
|
||||
; CHECK-LABEL: @Non_NestedMemset
|
||||
; CHECK-NOT: call void @llvm.memset
|
||||
;
|
||||
define i32 @Non_NestedMemset(i8* noalias nocapture %DST, i32 %SIZE) local_unnamed_addr optsize {
|
||||
entry:
|
||||
%cmp12 = icmp sgt i32 %SIZE, 0
|
||||
br i1 %cmp12, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.inc
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
||||
%BASKET.013 = phi i32 [ %BASKET.1, %for.inc ], [ 0, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds i8, i8* %DST, i64 %indvars.iv
|
||||
store i8 -1, i8* %arrayidx, align 1
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
%rem11 = and i32 %0, 1
|
||||
%cmp1 = icmp eq i32 %rem11, 0
|
||||
%1 = load i32, i32* @ORANGES, align 4
|
||||
%2 = load i32, i32* @APPLES, align 4
|
||||
br i1 %cmp1, label %if.then, label %if.else
|
||||
|
||||
if.else: ; preds = %for.body
|
||||
%dec3 = add nsw i32 %2, -1
|
||||
store i32 %dec3, i32* @APPLES, align 4
|
||||
br label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%dec = add nsw i32 %1, -1
|
||||
store i32 %dec, i32* @ORANGES, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.then, %if.else
|
||||
%.pn = phi i32 [ %2, %if.then ], [ %1, %if.else ]
|
||||
%BASKET.1 = add nsw i32 %.pn, %BASKET.013
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp ne i32 %lftr.wideiv, %SIZE
|
||||
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.inc
|
||||
%BASKET.1.lcssa = phi i32 [ %BASKET.1, %for.inc ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%BASKET.0.lcssa = phi i32 [ 0, %entry ], [ %BASKET.1.lcssa, %for.end.loopexit ]
|
||||
ret i32 %BASKET.0.lcssa
|
||||
}
|
||||
|
@ -1,30 +0,0 @@
|
||||
; RUN: opt -loop-idiom < %s -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
; CHECK-LABEL: @memset(
|
||||
; CHECK-NOT: llvm.memset
|
||||
define i8* @memset(i8* %b, i32 %c, i64 %len) nounwind uwtable ssp {
|
||||
entry:
|
||||
%cmp1 = icmp ult i64 0, %len
|
||||
br i1 %cmp1, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%conv6 = trunc i32 %c to i8
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %for.body
|
||||
%indvar = phi i64 [ 0, %for.body.lr.ph ], [ %indvar.next, %for.body ]
|
||||
%p.02 = getelementptr i8, i8* %b, i64 %indvar
|
||||
store i8 %conv6, i8* %p.02, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp ne i64 %indvar.next, %len
|
||||
br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
|
||||
|
||||
for.cond.for.end_crit_edge: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
||||
ret i8* %b
|
||||
}
|
||||
|
@ -1,34 +0,0 @@
|
||||
; RUN: opt -S -loop-idiom < %s
|
||||
; Don't crash
|
||||
; PR13892
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test(i32* %currMB) nounwind uwtable {
|
||||
entry:
|
||||
br i1 undef, label %start.exit, label %if.then.i
|
||||
|
||||
if.then.i: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
start.exit: ; preds = %entry
|
||||
indirectbr i8* undef, [label %0, label %for.bodyprime]
|
||||
|
||||
; <label>:0 ; preds = %start.exit
|
||||
unreachable
|
||||
|
||||
for.bodyprime: ; preds = %for.bodyprime, %start.exit
|
||||
%i.057375 = phi i32 [ 0, %start.exit ], [ %1, %for.bodyprime ]
|
||||
%arrayidx8prime = getelementptr inbounds i32, i32* %currMB, i32 %i.057375
|
||||
store i32 0, i32* %arrayidx8prime, align 4
|
||||
%1 = add i32 %i.057375, 1
|
||||
%cmp5prime = icmp slt i32 %1, 4
|
||||
br i1 %cmp5prime, label %for.bodyprime, label %for.endprime
|
||||
|
||||
for.endprime: ; preds = %for.bodyprime
|
||||
br label %for.body23prime
|
||||
|
||||
for.body23prime: ; preds = %for.body23prime, %for.endprime
|
||||
br label %for.body23prime
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
; RUN: opt -S -basicaa -loop-idiom < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @f_0(i8 addrspace(3)** %ptr) {
|
||||
; CHECK-LABEL: @f_0(
|
||||
; CHECK: call{{.*}}memset
|
||||
|
||||
; LIR'ing stores of pointers with address space 3 is fine, since
|
||||
; they're integral pointers.
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||
%arrayidx = getelementptr i8 addrspace(3)*, i8 addrspace(3)** %ptr, i64 %indvar
|
||||
store i8 addrspace(3)* null, i8 addrspace(3)** %arrayidx, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f_1(i8 addrspace(4)** %ptr) {
|
||||
; CHECK-LABEL: @f_1(
|
||||
; CHECK-NOT: call{{.*}}memset
|
||||
|
||||
; LIR'ing stores of pointers with address space 4 is not ok, since
|
||||
; they're non-integral pointers.
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||
%arrayidx = getelementptr i8 addrspace(4)*, i8 addrspace(4)** %ptr, i64 %indvar
|
||||
store i8 addrspace(4)* null, i8 addrspace(4)** %arrayidx, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
; RUN: opt -loop-idiom < %s -S | FileCheck %s
|
||||
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
%struct.bigBlock_t = type { [256 x <4 x float>] }
|
||||
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NOT: llvm.memset
|
||||
define void @test(%struct.bigBlock_t* %p) {
|
||||
entry:
|
||||
%0 = getelementptr inbounds %struct.bigBlock_t, %struct.bigBlock_t* %p, i64 0, i32 0, i64 0, i64 0
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%index.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||||
%dst.01 = phi float* [ %0, %entry ], [ %add.ptr2, %for.body ]
|
||||
%cast.i5 = bitcast float* %dst.01 to <4 x float>*
|
||||
store <4 x float> zeroinitializer, <4 x float>* %cast.i5, align 16, !nontemporal !0
|
||||
%add.ptr1 = getelementptr inbounds float, float* %dst.01, i64 4
|
||||
%cast.i = bitcast float* %add.ptr1 to <4 x float>*
|
||||
store <4 x float> zeroinitializer, <4 x float>* %cast.i, align 16, !nontemporal !0
|
||||
%add.ptr2 = getelementptr inbounds float, float* %dst.01, i64 8
|
||||
%add = add nuw nsw i32 %index.02, 32
|
||||
%cmp = icmp ult i32 %add, 4096
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{i32 1}
|
@ -1,26 +0,0 @@
|
||||
; RUN: opt -loop-idiom -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test1() {
|
||||
entry:
|
||||
br label %for.body.preheader
|
||||
|
||||
for.body.preheader: ; preds = %for.cond
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.preheader
|
||||
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%add.ptr3 = getelementptr inbounds i32, i32* null, i32 %indvars.iv
|
||||
%add.ptr4 = getelementptr inbounds i32, i32* %add.ptr3, i32 1
|
||||
%0 = load i32, i32* %add.ptr4, align 4
|
||||
store i32 %0, i32* %add.ptr3, align 4
|
||||
%indvars.iv.next = add nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp ne i32 %indvars.iv.next, 6
|
||||
br i1 %exitcond, label %for.body, label %for.body.preheader
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @test1(
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* inttoptr (i64 4 to i8*), i64 24, i32 4, i1 false)
|
||||
; CHECK-NOT: store
|
@ -1,35 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; Check that we're not crashing while looking at the recurrence variable.
|
||||
; RUN: opt -S -loop-idiom %s | FileCheck %s
|
||||
|
||||
define void @tinkywinky() {
|
||||
; CHECK-LABEL: @tinkywinky(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[PH:%.*]]
|
||||
; CHECK: ph:
|
||||
; CHECK-NEXT: [[MYPHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[PATATINO:%.*]] = ashr i32 [[MYPHI]], undef
|
||||
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[PATATINO]], 0
|
||||
; CHECK-NEXT: br i1 [[TOBOOL]], label [[EXIT_LOOPEXIT:%.*]], label [[IF_END]]
|
||||
; CHECK: exit.loopexit:
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br i1 true, label %exit, label %ph
|
||||
|
||||
ph:
|
||||
%myphi = phi i32 [ 1, %entry ]
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%patatino = ashr i32 %myphi, undef
|
||||
%tobool = icmp eq i32 %patatino, 0
|
||||
br i1 %tobool, label %exit, label %if.end
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user