Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,117 +0,0 @@
; RUN: opt -codegenprepare -S < %s | FileCheck %s
; The following target lines are needed for the test to exercise what it should.
; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc"
declare i32 @__CxxFrameHandler3(...)
declare void @f()
declare void @g(i8*)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
; blocks as the place to which the bitcast should be sunk. Since catchpads
; do not allow non-phi instructions before the terminator, this isn't possible.
; CHECK-LABEL: @test(
define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
entry:
%x = getelementptr i32, i32* %addr, i32 1
%p1 = bitcast i32* %x to i8*
invoke void @f()
to label %invoke.cont unwind label %catch1
; CHECK: invoke.cont:
; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
invoke.cont:
%y = getelementptr i32, i32* %addr, i32 2
%p2 = bitcast i32* %y to i8*
invoke void @f()
to label %done unwind label %catch2
done:
ret void
catch1:
%cs1 = catchswitch within none [label %handler1] unwind to caller
handler1:
%cp1 = catchpad within %cs1 []
br label %catch.shared
; CHECK: handler1:
; CHECK-NEXT: catchpad within %cs1
; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*
catch2:
%cs2 = catchswitch within none [label %handler2] unwind to caller
handler2:
%cp2 = catchpad within %cs2 []
br label %catch.shared
; CHECK: handler2:
; CHECK: catchpad within %cs2
; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*
; CHECK: catch.shared:
; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
catch.shared:
%p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
call void @g(i8* %p)
unreachable
}
; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
; there is no insertion point in a catchpad block.
; CHECK-LABEL: @test_dbg_value(
define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
entry:
%a = alloca i8
%b = alloca i8
invoke void @f() to label %next unwind label %catch.dispatch
next:
invoke void @f() to label %ret unwind label %catch.dispatch
ret:
ret void
catch.dispatch:
%p = phi i8* [%a, %entry], [%b, %next]
%cs1 = catchswitch within none [label %catch] unwind to caller
catch:
%cp1 = catchpad within %cs1 []
tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
call void @g(i8* %p)
catchret from %cp1 to label %ret
; CHECK: catch.dispatch:
; CHECK-NEXT: phi i8
; CHECK-NEXT: catchswitch
; CHECK-NOT: llvm.dbg.value
; CHECK: catch:
; CHECK-NEXT: catchpad
; CHECK-NEXT: call void @llvm.dbg.value
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8, !9}
!llvm.ident = !{!10}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null)
!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: null)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"PIC Level", i32 2}
!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!13 = !DIExpression(DW_OP_deref)
!14 = !DILocation(line: 2, column: 8, scope: !4)
!15 = !DILocation(line: 3, column: 1, scope: !4)

View File

@ -1,294 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -codegenprepare -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare void @use(i32) local_unnamed_addr
declare void @useptr([2 x i8*]*) local_unnamed_addr
; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
@simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
@multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
@loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
@nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
; CHECK: @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
@noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
; Check that we break the critical edge when an jump table has only one use.
define void @simple(i32* nocapture readonly %p) {
; CHECK-LABEL: @simple(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
; CHECK-NEXT: i32 0, label [[BB0_CLONE:%.*]]
; CHECK-NEXT: i32 1, label [[BB1_CLONE:%.*]]
; CHECK-NEXT: ]
; CHECK: bb0:
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ]
; CHECK-NEXT: [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ]
; CHECK-NEXT: tail call void @use(i32 [[MERGE2]])
; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[DOTSPLIT3:%.*]]
; CHECK: .split3:
; CHECK-NEXT: [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ]
; CHECK-NEXT: [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ]
; CHECK-NEXT: tail call void @use(i32 [[MERGE7]])
; CHECK-NEXT: br label [[INDIRECTGOTO]]
; CHECK: indirectgoto:
; CHECK-NEXT: [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ]
; CHECK-NEXT: [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1
; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4
; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]]
; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: bb0.clone:
; CHECK-NEXT: br label [[DOTSPLIT]]
; CHECK: bb1.clone:
; CHECK-NEXT: br label [[DOTSPLIT3]]
;
entry:
%incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
%initval = load i32, i32* %p, align 4
%initop = load i32, i32* %incdec.ptr, align 4
switch i32 %initop, label %exit [
i32 0, label %bb0
i32 1, label %bb1
]
bb0:
%p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
%opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ]
tail call void @use(i32 %opcode.0)
br label %indirectgoto
bb1:
%p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
%opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ]
tail call void @use(i32 %opcode.1)
br label %indirectgoto
indirectgoto:
%p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ]
%ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1
%newp = load i32, i32* %p.addr.sink, align 4
%idx = sext i32 %newp to i64
%arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx
%newop = load i8*, i8** %arrayidx, align 8
indirectbr i8* %newop, [label %bb0, label %bb1]
exit:
ret void
}
; Don't try to break critical edges when several indirectbr point to a single block
define void @multi(i32* nocapture readonly %p) {
; CHECK-LABEL: @multi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
; CHECK-NEXT: i32 0, label [[BB0:%.*]]
; CHECK-NEXT: i32 1, label [[BB1:%.*]]
; CHECK-NEXT: ]
; CHECK: bb0:
; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ]
; CHECK-NEXT: [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
; CHECK-NEXT: tail call void @use(i32 [[OPCODE_0]])
; CHECK-NEXT: [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1
; CHECK-NEXT: [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4
; CHECK-NEXT: [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]]
; CHECK-NEXT: [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8
; CHECK-NEXT: indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1]
; CHECK: bb1:
; CHECK-NEXT: [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ]
; CHECK-NEXT: [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
; CHECK-NEXT: tail call void @use(i32 [[OPCODE_1]])
; CHECK-NEXT: [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1
; CHECK-NEXT: [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4
; CHECK-NEXT: [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]]
; CHECK-NEXT: [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8
; CHECK-NEXT: indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
%initval = load i32, i32* %p, align 4
%initop = load i32, i32* %incdec.ptr, align 4
switch i32 %initop, label %exit [
i32 0, label %bb0
i32 1, label %bb1
]
bb0:
%p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
%opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
tail call void @use(i32 %opcode.0)
%next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1
%newp0 = load i32, i32* %p.addr.0, align 4
%idx0 = sext i32 %newp0 to i64
%arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0
%newop0 = load i8*, i8** %arrayidx0, align 8
indirectbr i8* %newop0, [label %bb0, label %bb1]
bb1:
%p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
%opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
tail call void @use(i32 %opcode.1)
%next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1
%newp1 = load i32, i32* %p.addr.1, align 4
%idx1 = sext i32 %newp1 to i64
%arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1
%newop1 = load i8*, i8** %arrayidx1, align 8
indirectbr i8* %newop1, [label %bb0, label %bb1]
exit:
ret void
}
; Make sure we do the right thing for cases where the indirectbr branches to
; the block it terminates.
define void @loop(i64* nocapture readonly %p) {
; CHECK-LABEL: @loop(
; CHECK-NEXT: bb0.clone:
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: bb0:
; CHECK-NEXT: br label [[DOTSPLIT]]
; CHECK: .split:
; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]]
; CHECK-NEXT: store i64 [[MERGE]], i64* [[TMP0]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1
; CHECK-NEXT: [[IDX:%.*]] = srem i64 [[MERGE]], 2
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]]
; CHECK-NEXT: [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
; CHECK-NEXT: indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1]
; CHECK: bb1:
; CHECK-NEXT: ret void
;
entry:
br label %bb0
bb0:
%i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ]
%tmp0 = getelementptr inbounds i64, i64* %p, i64 %i
store i64 %i, i64* %tmp0, align 4
%i.next = add nuw nsw i64 %i, 1
%idx = srem i64 %i, 2
%arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx
%target = load i8*, i8** %arrayidx, align 8
indirectbr i8* %target, [label %bb0, label %bb1]
bb1:
ret void
}
; Don't do anything for cases that contain no phis.
define void @nophi(i32* %p) {
; CHECK-LABEL: @nophi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
; CHECK-NEXT: i32 0, label [[BB0:%.*]]
; CHECK-NEXT: i32 1, label [[BB1:%.*]]
; CHECK-NEXT: ]
; CHECK: bb0:
; CHECK-NEXT: tail call void @use(i32 0)
; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
; CHECK: bb1:
; CHECK-NEXT: tail call void @use(i32 1)
; CHECK-NEXT: br label [[INDIRECTGOTO]]
; CHECK: indirectgoto:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to i8*
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, i8* [[TMP0]], i64 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32*
; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]]
; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
%initop = load i32, i32* %incdec.ptr, align 4
switch i32 %initop, label %exit [
i32 0, label %bb0
i32 1, label %bb1
]
bb0:
tail call void @use(i32 0) br label %indirectgoto
bb1:
tail call void @use(i32 1)
br label %indirectgoto
indirectgoto:
%newp = load i32, i32* %incdec.ptr, align 4
%idx = sext i32 %newp to i64
%arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx
%newop = load i8*, i8** %arrayidx, align 8
indirectbr i8* %newop, [label %bb0, label %bb1]
exit:
ret void
}
; Don't do anything if the edge isn't critical.
define i32 @noncritical(i32 %k, i8* %p)
; CHECK-LABEL: @noncritical(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[D:%.*]] = add i32 [[K:%.*]], 1
; CHECK-NEXT: indirectbr i8* [[P:%.*]], [label [[BB0:%.*]], label %bb1]
; CHECK: bb0:
; CHECK-NEXT: [[R0:%.*]] = sub i32 [[K]], [[D]]
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[R1:%.*]] = sub i32 [[D]], [[K]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[R0]], [[BB0]] ], [ [[R1]], [[BB1:%.*]] ]
; CHECK-NEXT: ret i32 0
;
{
entry:
%d = add i32 %k, 1
indirectbr i8* %p, [label %bb0, label %bb1]
bb0:
%v00 = phi i32 [%k, %entry]
%v01 = phi i32 [%d, %entry]
%r0 = sub i32 %v00, %v01
br label %exit
bb1:
%v10 = phi i32 [%d, %entry]
%v11 = phi i32 [%k, %entry]
%r1 = sub i32 %v10, %v11
br label %exit
exit:
%v = phi i32 [%r0, %bb0], [%r1, %bb1]
ret i32 0
}

View File

@ -1,56 +0,0 @@
; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW
; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
target triple = "x86_64-unknown-unknown"
target datalayout = "e-n32:64"
; If the intrinsic is cheap, nothing should change.
; If the intrinsic is expensive, check if the input is zero to avoid the call.
; This is undoing speculation that may have been created by SimplifyCFG + InstCombine.
define i64 @cttz(i64 %A) {
entry:
%z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
ret i64 %z
; SLOW-LABEL: @cttz(
; SLOW: entry:
; SLOW: %cmpz = icmp eq i64 %A, 0
; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
; SLOW: cond.false:
; SLOW: %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
; SLOW: br label %cond.end
; SLOW: cond.end:
; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
; SLOW: ret i64 %ctz
; FAST_TZ-LABEL: @cttz(
; FAST_TZ: %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
; FAST_TZ: ret i64 %z
}
define i64 @ctlz(i64 %A) {
entry:
%z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
ret i64 %z
; SLOW-LABEL: @ctlz(
; SLOW: entry:
; SLOW: %cmpz = icmp eq i64 %A, 0
; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
; SLOW: cond.false:
; SLOW: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true)
; SLOW: br label %cond.end
; SLOW: cond.end:
; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
; SLOW: ret i64 %ctz
; FAST_LZ-LABEL: @ctlz(
; FAST_LZ: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
; FAST_LZ: ret i64 %z
}
declare i64 @llvm.cttz.i64(i64, i1)
declare i64 @llvm.ctlz.i64(i64, i1)

View File

@ -1,64 +0,0 @@
; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; The first cast should be sunk into block2, in order that the
; instruction selector can form an efficient
; i64 * i64 -> i128 multiplication.
define i128 @sink(i64* %mem1, i64* %mem2) {
; CHECK-LABEL: block1:
; CHECK-NEXT: load
block1:
%l1 = load i64, i64* %mem1
%s1 = sext i64 %l1 to i128
br label %block2
; CHECK-LABEL: block2:
; CHECK-NEXT: sext
; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
%l2 = load i64, i64* %mem2
%s2 = sext i64 %l2 to i128
%res = mul i128 %s1, %s2
ret i128 %res
}
; The first cast should be hoisted into block1, in order that the
; instruction selector can form an extend-load.
define i64 @hoist(i32* %mem1, i32* %mem2) {
; CHECK-LABEL: block1:
; CHECK-NEXT: load
; CHECK-NEXT: sext
block1:
%l1 = load i32, i32* %mem1
br label %block2
; CHECK-LABEL: block2:
; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
%s1 = sext i32 %l1 to i64
%l2 = load i32, i32* %mem2
%s2 = sext i32 %l2 to i64
%res = mul i64 %s1, %s2
ret i64 %res
}
; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite
; loop.
define i128 @use_ext_source() {
block1:
%v1 = or i64 undef, undef
%v2 = zext i64 %v1 to i128
br i1 undef, label %block2, label %block3
block2:
%v3 = add i64 %v1, 1
%v4 = zext i64 %v3 to i128
br label %block3
block3:
%res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ]
ret i128 %res
}

View File

@ -1,29 +0,0 @@
; RUN: opt %s -codegenprepare -mattr=+soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFP
; RUN: opt %s -codegenprepare -mattr=-soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=HARDFP
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @foo
; CHECK: entry:
; SOFTFP: fcmp
; HARDFP-NOT: fcmp
; CHECK: body:
; SOFTFP-NOT: fcmp
; HARDFP: fcmp
define void @foo(float %a, float %b) {
entry:
%c = fcmp oeq float %a, %b
br label %head
head:
%IND = phi i32 [ 0, %entry ], [ %IND.new, %body1 ]
%CMP = icmp slt i32 %IND, 1250
br i1 %CMP, label %body, label %tail
body:
br i1 %c, label %body1, label %tail
body1:
%IND.new = add i32 %IND, 1
br label %head
tail:
ret void
}

View File

@ -1,3 +0,0 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -1,18 +0,0 @@
; RUN: opt -S -disable-simplify-libcalls -codegenprepare < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; This is a workaround for PR23093: when building with -mkernel/-fno-builtin,
; we still generate fortified library calls.
; Check that we ignore two things:
; - attribute nobuiltin
; - TLI::has (always returns false thanks to -disable-simplify-libcalls)
; CHECK-NOT: _chk
; CHECK: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %len, i32 1, i1 false)
define void @test_nobuiltin(i8* %dst, i64 %len) {
call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
ret void
}
declare i8* @__memset_chk(i8*, i32, i64, i64)

View File

@ -1,32 +0,0 @@
; RUN: opt -S -codegenprepare < %s | FileCheck %s
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc"
@rtti = external global i8
define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
entry:
%e = alloca i8
%tmpcast = bitcast i8* %e to i16*
invoke void @_CxxThrowException(i8* null, i8* null)
to label %catchret.dest unwind label %catch.dispatch
catch.dispatch: ; preds = %entry
%0 = catchswitch within none [label %catch] unwind to caller
catch: ; preds = %catch.dispatch
%1 = catchpad within %0 [i8* @rtti, i32 0, i16* %tmpcast]
catchret from %1 to label %catchret.dest
catchret.dest: ; preds = %catch
ret void
}
; CHECK-LABEL: define void @test1(
; CHECK: %[[alloca:.*]] = alloca i8
; CHECK-NEXT: %[[bc:.*]] = bitcast i8* %[[alloca]] to i16*
; CHECK: catchpad within {{.*}} [i8* @rtti, i32 0, i16* %[[bc]]]
declare void @_CxxThrowException(i8*, i8*)
declare i32 @__CxxFrameHandler3(...)

View File

@ -1,21 +0,0 @@
; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
define void @f2() {
entry:
%arraydecay = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 0
%arrayidx1 = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 1
br label %for.body
for.body: ; preds = %for.body, %entry
%e.03 = phi i16* [ %arraydecay, %entry ], [ %arrayidx1, %for.body ]
%tobool = icmp eq i16 undef, 0
br i1 undef, label %for.body, label %for.end
for.end: ; preds = %for.body
; CHECK: sunkaddr
%e.1.le = select i1 %tobool, i16* %arrayidx1, i16* %e.03
store i16 0, i16* %e.1.le, align 1
ret void
}

View File

@ -1,155 +0,0 @@
; RUN: opt -codegenprepare -S < %s | FileCheck %s
target triple = "x86_64-unknown-unknown"
; Nothing to sink and convert here.
define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) {
entry:
%load = load double, double* %b, align 8
%cmp = fcmp olt double %load, %a
%sel = select i1 %cmp, i32 %x, i32 %y
ret i32 %sel
; CHECK-LABEL: @no_sink(
; CHECK: %load = load double, double* %b, align 8
; CHECK: %cmp = fcmp olt double %load, %a
; CHECK: %sel = select i1 %cmp, i32 %x, i32 %y
; CHECK: ret i32 %sel
}
; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
define float @fdiv_true_sink(float %a, float %b) {
entry:
%div = fdiv float %a, %b
%cmp = fcmp ogt float %a, 1.0
%sel = select i1 %cmp, float %div, float 2.0
ret float %sel
; CHECK-LABEL: @fdiv_true_sink(
; CHECK: %cmp = fcmp ogt float %a, 1.0
; CHECK: br i1 %cmp, label %select.true.sink, label %select.end
; CHECK: select.true.sink:
; CHECK: %div = fdiv float %a, %b
; CHECK: br label %select.end
; CHECK: select.end:
; CHECK: %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
; CHECK: ret float %sel
}
define float @fdiv_false_sink(float %a, float %b) {
entry:
%div = fdiv float %a, %b
%cmp = fcmp ogt float %a, 3.0
%sel = select i1 %cmp, float 4.0, float %div
ret float %sel
; CHECK-LABEL: @fdiv_false_sink(
; CHECK: %cmp = fcmp ogt float %a, 3.0
; CHECK: br i1 %cmp, label %select.end, label %select.false.sink
; CHECK: select.false.sink:
; CHECK: %div = fdiv float %a, %b
; CHECK: br label %select.end
; CHECK: select.end:
; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ]
; CHECK: ret float %sel
}
define float @fdiv_both_sink(float %a, float %b) {
entry:
%div1 = fdiv float %a, %b
%div2 = fdiv float %b, %a
%cmp = fcmp ogt float %a, 5.0
%sel = select i1 %cmp, float %div1, float %div2
ret float %sel
; CHECK-LABEL: @fdiv_both_sink(
; CHECK: %cmp = fcmp ogt float %a, 5.0
; CHECK: br i1 %cmp, label %select.true.sink, label %select.false.sink
; CHECK: select.true.sink:
; CHECK: %div1 = fdiv float %a, %b
; CHECK: br label %select.end
; CHECK: select.false.sink:
; CHECK: %div2 = fdiv float %b, %a
; CHECK: br label %select.end
; CHECK: select.end:
; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ]
; CHECK: ret float %sel
}
; But if the select is marked unpredictable, then don't turn it into a branch.
define float @unpredictable_select(float %a, float %b) {
; CHECK-LABEL: @unpredictable_select(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = fdiv float %a, %b
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float %a, 1.000000e+00
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable !0
; CHECK-NEXT: ret float [[SEL]]
;
entry:
%div = fdiv float %a, %b
%cmp = fcmp ogt float %a, 1.0
%sel = select i1 %cmp, float %div, float 2.0, !unpredictable !0
ret float %sel
}
!0 = !{}
; An 'fadd' is not too expensive, so it's ok to speculate.
define float @fadd_no_sink(float %a, float %b) {
%add = fadd float %a, %b
%cmp = fcmp ogt float 6.0, %a
%sel = select i1 %cmp, float %add, float 7.0
ret float %sel
; CHECK-LABEL: @fadd_no_sink(
; CHECK: %sel = select i1 %cmp, float %add, float 7.0
}
; Possible enhancement: sinkability is only calculated with the direct
; operand of the select, so we don't try to sink this. The fdiv cost is not
; taken into account.
define float @fdiv_no_sink(float %a, float %b) {
entry:
%div = fdiv float %a, %b
%add = fadd float %div, %b
%cmp = fcmp ogt float %a, 1.0
%sel = select i1 %cmp, float %add, float 8.0
ret float %sel
; CHECK-LABEL: @fdiv_no_sink(
; CHECK: %sel = select i1 %cmp, float %add, float 8.0
}
; Do not transform the CFG if the select operands may have side effects.
declare i64* @bar(i32, i32, i32)
declare i64* @baz(i32, i32, i32)
define i64* @calls_no_sink(i32 %in) {
%call1 = call i64* @bar(i32 1, i32 2, i32 3)
%call2 = call i64* @baz(i32 1, i32 2, i32 3)
%tobool = icmp ne i32 %in, 0
%sel = select i1 %tobool, i64* %call1, i64* %call2
ret i64* %sel
; CHECK-LABEL: @calls_no_sink(
; CHECK: %sel = select i1 %tobool, i64* %call1, i64* %call2
}
define i32 @sdiv_no_sink(i32 %a, i32 %b) {
%div1 = sdiv i32 %a, %b
%div2 = sdiv i32 %b, %a
%cmp = icmp sgt i32 %a, 5
%sel = select i1 %cmp, i32 %div1, i32 %div2
ret i32 %sel
; CHECK-LABEL: @sdiv_no_sink(
; CHECK: %sel = select i1 %cmp, i32 %div1, i32 %div2
}

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +0,0 @@
; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
; Select when both offset and scale reg are present.
define i64 @test1(i1 %c, i64* %b, i64 %scale) {
; CHECK-LABEL: @test1
entry:
; CHECK-LABEL: entry:
%g = getelementptr inbounds i64, i64* %b, i64 %scale
%g1 = getelementptr inbounds i64, i64* %g, i64 8
%g2 = getelementptr inbounds i64, i64* %g, i64 16
%s = select i1 %c, i64* %g1, i64* %g2
; CHECK-NOT: sunkaddr
%v = load i64 , i64* %s, align 8
ret i64 %v
}

View File

@ -1,280 +0,0 @@
; RUN: opt -S -codegenprepare < %s | FileCheck %s
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
@x = external global [1 x [2 x <4 x float>]]
; Can we sink single addressing mode computation to use?
define void @test1(i1 %cond, i64* %base) {
; CHECK-LABEL: @test1
; CHECK: getelementptr i8, {{.+}} 40
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
%v = load i32, i32* %casted, align 4
br label %fallthrough
fallthrough:
ret void
}
declare void @foo(i32)
; Make sure sinking two copies of addressing mode into different blocks works
define void @test2(i1 %cond, i64* %base) {
; CHECK-LABEL: @test2
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %next, label %fallthrough
next:
; CHECK-LABEL: next:
; CHECK: getelementptr i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
br label %fallthrough
fallthrough:
ret void
}
; If we have two loads in the same block, only need one copy of addressing mode
; - instruction selection will duplicate if needed
define void @test3(i1 %cond, i64* %base) {
; CHECK-LABEL: @test3
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
; CHECK-NOT: getelementptr i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
br label %fallthrough
fallthrough:
ret void
}
; Can we still sink addressing mode if there's a cold use of the
; address itself?
define void @test4(i1 %cond, i64* %base) {
; CHECK-LABEL: @test4
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
; CHECK-LABEL: rare.1:
; CHECK: getelementptr i8, {{.+}} 40
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
; Negative test - don't want to duplicate addressing into hot path
define void @test5(i1 %cond, i64* %base) {
; CHECK-LABEL: @test5
entry:
; CHECK: %addr = getelementptr
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK-NOT: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
br label %fallthrough
}
; Negative test - opt for size
define void @test6(i1 %cond, i64* %base) minsize {
; CHECK-LABEL: @test6
entry:
; CHECK: %addr = getelementptr
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK-NOT: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
; Make sure sinking two copies of addressing mode into different blocks works
; when there are cold paths for each.
define void @test7(i1 %cond, i64* %base) {
; CHECK-LABEL: @test7
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %next
next:
; CHECK-LABEL: next:
; CHECK: getelementptr i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
%cmp2 = icmp eq i32 %v2, 0
br i1 %cmp2, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
; CHECK-LABEL: rare.1:
; CHECK: getelementptr i8, {{.+}} 40
call void @slowpath(i32 %v1, i32* %casted) cold
br label %next
rare.2:
; CHECK-LABEL: rare.2:
; CHECK: getelementptr i8, {{.+}} 40
call void @slowpath(i32 %v2, i32* %casted) cold
br label %fallthrough
}
declare void @slowpath(i32, i32*)
; Make sure we don't end up in an infinite loop after we fail to sink.
; CHECK-LABEL: define void @test8
; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
define void @test8() {
allocas:
%aFOO_load = load float*, float** undef
%aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
%aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
%aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
%ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
br label %load.i145
load.i145:
%ptr.i143 = bitcast i8* %ptr to <4 x float>*
%valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
%x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
br label %pl_loop.i.i122
pl_loop.i.i122:
br label %pl_loop.i.i122
}
; Make sure we can sink address computation even
; if there is a cycle in phi nodes.
define void @test9(i1 %cond, i64* %base) {
; CHECK-LABEL: @test9
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br label %header
header:
%iv = phi i32 [0, %entry], [%iv.inc, %backedge]
%casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
br i1 %cond, label %if.then, label %backedge
if.then:
call void @foo(i32 %iv)
%addr.1 = getelementptr inbounds i64, i64* %base, i64 5
%casted.1 = bitcast i64* %addr.1 to i32*
br label %backedge
backedge:
; CHECK-LABEL: backedge:
; CHECK: getelementptr i8, {{.+}} 40
%casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
%v = load i32, i32* %casted.merged, align 4
call void @foo(i32 %v)
%iv.inc = add i32 %iv, 1
%cmp = icmp slt i32 %iv.inc, 1000
br i1 %cmp, label %header, label %exit
exit:
ret void
}
; Make sure we can eliminate a select when both arguments perform equivalent
; address computation.
define void @test10(i1 %cond, i64* %base) {
; CHECK-LABEL: @test10
; CHECK: getelementptr i8, {{.+}} 40
; CHECK-NOT: select
entry:
%gep1 = getelementptr inbounds i64, i64* %base, i64 5
%gep1.casted = bitcast i64* %gep1 to i32*
%base.casted = bitcast i64* %base to i32*
%gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
%casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
%v = load i32, i32* %casted.merged, align 4
call void @foo(i32 %v)
ret void
}
; Found by fuzzer, getSExtValue of > 64 bit constant
define void @i96_mul(i1* %base, i96 %offset) {
BB:
;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
%B84 = mul i96 %offset, 39614081257132168796771975167
%G23 = getelementptr i1, i1* %base, i96 %B84
store i1 false, i1* %G23
ret void
}

View File

@ -1,39 +0,0 @@
; RUN: opt -S -codegenprepare < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @load_cast_gep
; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 40
define void @load_cast_gep(i1 %cond, i64* %base) {
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = addrspacecast i64* %addr to i32 addrspace(1)*
br i1 %cond, label %if.then, label %fallthrough
if.then:
%v = load i32, i32 addrspace(1)* %casted, align 4
br label %fallthrough
fallthrough:
ret void
}
; CHECK-LABEL: @store_gep_cast
; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 20
define void @store_gep_cast(i1 %cond, i64* %base) {
entry:
%casted = addrspacecast i64* %base to i32 addrspace(1)*
%addr = getelementptr inbounds i32, i32 addrspace(1)* %casted, i64 5
br i1 %cond, label %if.then, label %fallthrough
if.then:
store i32 0, i32 addrspace(1)* %addr, align 4
br label %fallthrough
fallthrough:
ret void
}

View File

@ -1,95 +0,0 @@
;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available.
; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86
; No change for x86 because 16-bit registers are part of the architecture.
define i32 @widen_switch_i16(i32 %a) {
entry:
%trunc = trunc i32 %a to i16
switch i16 %trunc, label %sw.default [
i16 1, label %sw.bb0
i16 -1, label %sw.bb1
]
sw.bb0:
br label %return
sw.bb1:
br label %return
sw.default:
br label %return
return:
%retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
ret i32 %retval
; X86-LABEL: @widen_switch_i16(
; X86: %trunc = trunc i32 %a to i16
; X86-NEXT: switch i16 %trunc, label %sw.default [
; X86-NEXT: i16 1, label %sw.bb0
; X86-NEXT: i16 -1, label %sw.bb1
}
; Widen to 32-bit from a smaller, non-native type.
define i32 @widen_switch_i17(i32 %a) {
entry:
%trunc = trunc i32 %a to i17
switch i17 %trunc, label %sw.default [
i17 10, label %sw.bb0
i17 -1, label %sw.bb1
]
sw.bb0:
br label %return
sw.bb1:
br label %return
sw.default:
br label %return
return:
%retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
ret i32 %retval
; X86-LABEL: @widen_switch_i17(
; X86: %0 = zext i17 %trunc to i32
; X86-NEXT: switch i32 %0, label %sw.default [
; X86-NEXT: i32 10, label %sw.bb0
; X86-NEXT: i32 131071, label %sw.bb1
}
; If the switch condition is a sign-extended function argument, then the
; condition and cases should be sign-extended rather than zero-extended
; because the sign-extension can be optimized away.
define i32 @widen_switch_i16_sext(i2 signext %a) {
entry:
switch i2 %a, label %sw.default [
i2 1, label %sw.bb0
i2 -1, label %sw.bb1
]
sw.bb0:
br label %return
sw.bb1:
br label %return
sw.default:
br label %return
return:
%retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
ret i32 %retval
; X86-LABEL: @widen_switch_i16_sext(
; X86: %0 = sext i2 %a to i8
; X86-NEXT: switch i8 %0, label %sw.default [
; X86-NEXT: i8 1, label %sw.bb0
; X86-NEXT: i8 -1, label %sw.bb1
}

View File

@ -1,105 +0,0 @@
; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE2
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin10.9.0"
define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
; CHECK-LABEL: @test_8bit
; CHECK: if_true:
; CHECK-NOT: shufflevector
; CHECK: if_false:
; CHECK-NOT: shufflevector
; CHECK: shl <16 x i8> %lhs, %mask
%mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
if_true:
ret <16 x i8> %mask
if_false:
%res = shl <16 x i8> %lhs, %mask
ret <16 x i8> %res
}
define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) {
; CHECK-LABEL: @test_16bit
; CHECK: if_true:
; CHECK-NOT: shufflevector
; CHECK: if_false:
; CHECK: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
; CHECK: shl <8 x i16> %lhs, [[SPLAT]]
%mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
if_true:
ret <8 x i16> %mask
if_false:
%res = shl <8 x i16> %lhs, %mask
ret <8 x i16> %res
}
define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
; CHECK-LABEL: @test_notsplat
; CHECK: if_true:
; CHECK-NOT: shufflevector
; CHECK: if_false:
; CHECK-NOT: shufflevector
; CHECK: shl <4 x i32> %lhs, %mask
%mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
br i1 %tst, label %if_true, label %if_false
if_true:
ret <4 x i32> %mask
if_false:
%res = shl <4 x i32> %lhs, %mask
ret <4 x i32> %res
}
define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
; CHECK-AVX2-LABEL: @test_32bit
; CHECK-AVX2: if_false:
; CHECK-AVX2-NOT: shufflevector
; CHECK-AVX2: ashr <4 x i32> %lhs, %mask
; CHECK-SSE2-LABEL: @test_32bit
; CHECK-SSE2: if_false:
; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
; CHECK-SSE2: ashr <4 x i32> %lhs, [[SPLAT]]
%mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
br i1 %tst, label %if_true, label %if_false
if_true:
ret <4 x i32> %mask
if_false:
%res = ashr <4 x i32> %lhs, %mask
ret <4 x i32> %res
}
define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) {
; CHECK-AVX2-LABEL: @test_64bit
; CHECK-AVX2: if_false:
; CHECK-AVX2-NOT: shufflevector
; CHECK-AVX2: lshr <2 x i64> %lhs, %mask
; CHECK-SSE2-LABEL: @test_64bit
; CHECK-SSE2: if_false:
; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
; CHECK-SSE2: lshr <2 x i64> %lhs, [[SPLAT]]
%mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
if_true:
ret <2 x i64> %mask
if_false:
%res = lshr <2 x i64> %lhs, %mask
ret <2 x i64> %res
}