Imported Upstream version 6.4.0.137

Former-commit-id: 943baa9f16a098c33e129777827f3a9d20da00d6
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2019-07-26 19:53:28 +00:00
parent e9207cf623
commit ef583813eb
2712 changed files with 74169 additions and 40587 deletions

View File

@@ -28,6 +28,28 @@ return: ; preds = %if.then172, %cond.e
ret void
}
; Avoid an assert/bad codegen in LD1LANEPOST lowering by not forming
; LD1LANEPOST ISD nodes with a non-constant lane index.
define <4 x i32> @f2(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2, i32 %idx) {
%L0 = load i32, i32* %p
%p1 = getelementptr i32, i32* %p, i64 1
%L1 = load i32, i32* %p1
%v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2
%vret = insertelement <4 x i32> %v, i32 %L0, i32 %idx
store i32 %L1, i32 *%p
ret <4 x i32> %vret
}
; Check that a cycle is avoided during isel between the LD1LANEPOST instruction and the load of %L1.
define <4 x i32> @f3(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2) {
%L0 = load i32, i32* %p
%p1 = getelementptr i32, i32* %p, i64 1
%L1 = load i32, i32* %p1
%v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2
%vret = insertelement <4 x i32> %v, i32 %L0, i32 %L1
ret <4 x i32> %vret
}
; Function Attrs: nounwind readnone
declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1

View File

@@ -1,27 +1,31 @@
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefix=FALKOR --check-prefix=ALL
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefixes=ALL,CYCLONE
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; rdar://11481771
; rdar://13713797
declare void @bar(half, float, double, <2 x double>)
declare void @bari(i32, i32)
declare void @barl(i64, i64)
declare void @barf(float, float)
define void @t1() nounwind ssp {
entry:
; ALL-LABEL: t1:
; ALL-NOT: fmov
; CYCLONE: fmov d0, xzr
; CYCLONE: fmov d1, xzr
; ALL: ldr h0,{{.*}}
; CYCLONE: fmov s1, wzr
; CYCLONE: fmov d2, xzr
; CYCLONE: fmov d3, xzr
; KRYO: movi v0.2d, #0000000000000000
; KRYO: movi v1.2d, #0000000000000000
; KRYO: movi v2.2d, #0000000000000000
; KRYO: movi v3.2d, #0000000000000000
; FALKOR: movi v0.2d, #0000000000000000
; FALKOR: movi v1.2d, #0000000000000000
; FALKOR: movi v2.2d, #0000000000000000
; FALKOR: movi v3.2d, #0000000000000000
tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind
; CYCLONE: movi.16b v3, #0
; CYCLONE-FULLFP16: fmov h0, wzr
; CYCLONE-FULLFP16: fmov s1, wzr
; CYCLONE-FULLFP16: fmov d2, xzr
; CYCLONE-FULLFP16: movi.16b v3, #0
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
ret void
}
@@ -29,8 +33,8 @@ define void @t2() nounwind ssp {
entry:
; ALL-LABEL: t2:
; ALL-NOT: mov w0, wzr
; ALL: mov w0, #0
; ALL: mov w1, #0
; ALL: mov w{{[0-3]+}}, #0
; ALL: mov w{{[0-3]+}}, #0
tail call void @bari(i32 0, i32 0) nounwind
ret void
}
@@ -39,8 +43,8 @@ define void @t3() nounwind ssp {
entry:
; ALL-LABEL: t3:
; ALL-NOT: mov x0, xzr
; ALL: mov x0, #0
; ALL: mov x1, #0
; ALL: mov x{{[0-3]+}}, #0
; ALL: mov x{{[0-3]+}}, #0
tail call void @barl(i64 0, i64 0) nounwind
ret void
}
@@ -48,26 +52,21 @@ entry:
define void @t4() nounwind ssp {
; ALL-LABEL: t4:
; ALL-NOT: fmov
; CYCLONE: fmov s0, wzr
; CYCLONE: fmov s1, wzr
; KRYO: movi v0.2d, #0000000000000000
; KRYO: movi v1.2d, #0000000000000000
; FALKOR: movi v0.2d, #0000000000000000
; FALKOR: movi v1.2d, #0000000000000000
; CYCLONE: fmov s{{[0-3]+}}, wzr
; CYCLONE: fmov s{{[0-3]+}}, wzr
; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
ret void
}
declare void @bar(double, double, double, double)
declare void @bari(i32, i32)
declare void @barl(i64, i64)
declare void @barf(float, float)
; We used to produce spills+reloads for a Q register with zero cycle zeroing
; enabled.
; ALL-LABEL: foo:
; ALL-NOT: str {{q[0-9]+}}
; ALL-NOT: ldr {{q[0-9]+}}
; ALL-NOT: str q{{[0-9]+}}
; ALL-NOT: ldr q{{[0-9]+}}
define double @foo(i32 %n) {
entry:
br label %for.body
@@ -90,8 +89,7 @@ for.end:
define <2 x i64> @t6() {
; ALL-LABEL: t6:
; CYCLONE: movi.16b v0, #0
; KRYO: movi v0.2d, #0000000000000000
; FALKOR: movi v0.2d, #0000000000000000
; OTHERS: movi v0.2d, #0000000000000000
ret <2 x i64> zeroinitializer
}

View File

@@ -353,3 +353,28 @@ body: |
bb.1:
RET_ReallyLR
...
---
# Check that non-base registers are considered live when finding a
# scratch register by making sure we don't use %x2 for the scratch
# register for the inserted ORRXrs.
# CHECK-LABEL: name: hwpf_offreg
# CHECK: %x3 = ORRXrs %xzr, %x1, 0
# CHECK: %w10 = LDRWroX %x3, %x2, 0, 0
name: hwpf_offreg
tracksRegLiveness: true
body: |
bb.0:
liveins: %w0, %x1, %x2, %x17, %x18
%w10 = LDRWroX %x1, %x2, 0, 0 :: ("aarch64-strided-access" load 4)
%x2 = ORRXrs %xzr, %x10, 0
%w26 = LDRWroX %x1, %x2, 0, 0
%w0 = SUBWri %w0, 1, 0
%wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
Bcc 9, %bb.0, implicit %nzcv
bb.1:
RET_ReallyLR
...

View File

@@ -0,0 +1,20 @@
;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
@var = global i32 0
define void @test_inline_constraint_S() {
; CHECK-LABEL: test_inline_constraint_S:
call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
call void asm sideeffect "add x0, x0, :lo12:$0", "S"(i32* @var)
; CHECK: adrp x0, var
; CHECK: add x0, x0, :lo12:var
ret void
}
define i32 @test_inline_constraint_S_label(i1 %in) {
; CHECK-LABEL: test_inline_constraint_S_label:
call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
; CHECK: adr x0, .Ltmp{{[0-9]+}}
br i1 %in, label %loc, label %loc2
loc:
ret i32 0
loc2:
ret i32 42
}

View File

@@ -0,0 +1,35 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
# Ensure references to scavenged stack slots in the CSR area use the
# FP as a base when the stack pointer must be aligned to something
# larger than required by the target. This is necessary because the
# alignment padding area is between the CSR area and the SP, so the SP
# cannot be used to reference the CSR area.
name: test
tracksRegLiveness: true
frameInfo:
maxAlignment: 64
# CHECK: stack:
# CHECK: id: 0, name: '', type: default, offset: -64, size: 4, alignment: 64
# CHECK-NEXT: stack-id: 0
# CHECK-NEXT: local-offset: -64
# CHECK: id: 1, name: '', type: default, offset: -20, size: 4, alignment: 4
# CHECK-NEXT: stack-id: 0
# CHECK-NEXT: local-offset: -68
stack:
- { id: 0, size: 4, alignment: 64, local-offset: -64 }
- { id: 1, size: 4, alignment: 4, local-offset: -68 }
# CHECK: body:
# CHECK: %sp = ANDXri killed %{{x[0-9]+}}, 7865
# CHECK: STRSui %s0, %sp, 0
# CHECK: STURSi %s0, %fp, -4
body: |
bb.0.entry:
liveins: %s0
STRSui %s0, %stack.0, 0
STRSui %s0, %stack.1, 0
; Force preserve a CSR to create a hole in the CSR stack region.
%x28 = IMPLICIT_DEF
RET_ReallyLR

View File

@@ -0,0 +1,25 @@
# RUN: llc -march=hexagon -run-pass if-converter %s -o - | FileCheck %s
# Make sure this gets if-converted and it doesn't crash.
# CHECK-LABEL: bb.0
# CHECK: PS_jmpret %r31
# CHECK-NOT: bb.{{[1-9]+}}:
---
name: fred
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1, %bb.2
liveins: %r0
renamable %p0 = C2_cmpeqi killed renamable %r0, 0
J2_jumpf killed renamable %p0, %bb.2, implicit-def dead %pc
bb.1:
S4_storeiri_io undef renamable %r0, 0, 32768 :: (store 4 into `i32* undef`)
PS_jmpret %r31, implicit-def dead %pc
bb.2:
S4_storeiri_io undef renamable %r0, 0, 32768 :: (store 4 into `i32* undef`)
PS_jmpret %r31, implicit-def dead %pc
...

View File

@@ -0,0 +1,34 @@
# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -run-pass=if-converter %s -o - | FileCheck %s
---
name: foo
body: |
bb.0:
liveins: %x0, %x3
successors: %bb.1(0x40000000), %bb.2(0x40000000)
dead renamable %x3 = ANDIo8 killed renamable %x3, 1, implicit-def dead %cr0, implicit-def %cr0gt
%cr2lt = CROR %cr0gt, %cr0gt
BCn killed renamable %cr2lt, %bb.2
B %bb.1
bb.1:
renamable %x3 = LIS8 4096
MTLR8 %x0, implicit-def %lr8
BLR8 implicit %lr8, implicit %rm, implicit %x3
bb.2:
renamable %x3 = LIS8 4096
MTLR8 %x0, implicit-def %lr8
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
# Diamond testcase with equivalent branches terminating in returns.
# CHECK: body: |
# CHECK: bb.0:
# CHECK: dead renamable %x3 = ANDIo8 killed renamable %x3, 1, implicit-def dead %cr0, implicit-def %cr0gt
# CHECK: %cr2lt = CROR %cr0gt, %cr0gt
# CHECK: renamable %x3 = LIS8 4096
# CHECK: MTLR8 %x0, implicit-def %lr8
# CHECK: BLR8 implicit %lr8, implicit %rm, implicit %x3

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,13 @@
; Negative test. The constraint 'l' represents the register 'lo'.
; Check error message in case of invalid usage.
;
; RUN: not llc -march=mips -filetype=obj < %s 2>&1 | FileCheck %s
define void @constraint_l() nounwind {
entry:
; CHECK: error: invalid operand for instruction
tail call i16 asm sideeffect "addiu $0,$1,$2", "=l,r,r,~{$1}"(i16 0, i16 0)
ret void
}

View File

@@ -41,5 +41,15 @@ entry:
call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
store volatile i32 %4, i32* %bosco, align 4
; Check the 'l' constraint for 16-bit type.
; CHECK: #APP
; CHECK: mtlo ${{[0-9]+}}
; CHECK-NEXT: madd ${{[0-9]+}}, ${{[0-9]+}}
; CHECK: #NO_APP
; CHECK-NEXT: mflo ${{[0-9]+}}
%bosco16 = alloca i16, align 4
call i16 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
store volatile i16 %5, i16* %bosco16, align 4
ret i32 0
}

View File

@@ -1 +1 @@
67733795ed5de0cf155b38a0ff3bb1a92a429e27
f2ca07367b991eaea539df025ca1dc1a8d20b401

View File

@@ -0,0 +1,75 @@
; RUN: opt -early-cse-memssa -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s
; ModuleID = 'bugpoint-reduced-simplified.bc'
source_filename = "bugpoint-output-8903f29.bc"
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
define void @test(i64 %arg.ssa, i64 %arg.nb) local_unnamed_addr {
; Ensure that loop rotation doesn't duplicate the call to
; llvm.ppc.is.decremented.ctr.nonzero
; CHECK-LABEL: test
; CHECK: call i1 @llvm.ppc.is.decremented.ctr.nonzero
; CHECK-NOT: call i1 @llvm.ppc.is.decremented.ctr.nonzero
; CHECK: declare i1 @llvm.ppc.is.decremented.ctr.nonzero
entry:
switch i32 undef, label %BB_8 [
i32 -2, label %BB_9
i32 0, label %BB_9
]
BB_1: ; preds = %BB_12, %BB_4
%bcount.1.us = phi i64 [ %.810.us, %BB_4 ], [ 0, %BB_12 ]
%0 = add i64 %arg.ssa, %bcount.1.us
%.568.us = load i32, i32* undef, align 4
%.15.i.us = icmp slt i32 0, %.568.us
br i1 %.15.i.us, label %BB_3, label %BB_2
BB_2: ; preds = %BB_1
%.982.us = add nsw i64 %0, 1
unreachable
BB_3: ; preds = %BB_1
%1 = add i64 %arg.ssa, %bcount.1.us
%2 = add i64 %1, 1
%3 = call i1 @llvm.ppc.is.decremented.ctr.nonzero()
br i1 %3, label %BB_4, label %BB_7
BB_4: ; preds = %BB_3
%.810.us = add nuw nsw i64 %bcount.1.us, 1
br label %BB_1
BB_5: ; preds = %BB_7, %BB_5
%lsr.iv20.i116 = phi i64 [ %2, %BB_7 ], [ %lsr.iv.next21.i126, %BB_5 ]
%lsr.iv.next21.i126 = add i64 %lsr.iv20.i116, 1
br i1 undef, label %BB_5, label %BB_6
BB_6: ; preds = %BB_5
ret void
BB_7: ; preds = %BB_3
br label %BB_5
BB_8: ; preds = %entry
ret void
BB_9: ; preds = %entry, %entry
br label %BB_10
BB_10: ; preds = %BB_9
br label %BB_11
BB_11: ; preds = %BB_11, %BB_10
br i1 undef, label %BB_11, label %BB_12
BB_12: ; preds = %BB_11
call void @llvm.ppc.mtctr.i64(i64 %arg.nb)
br label %BB_1
}
; Function Attrs: nounwind
declare void @llvm.ppc.mtctr.i64(i64) #0
; Function Attrs: nounwind
declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #0
attributes #0 = { nounwind }

View File

@@ -10,16 +10,10 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
;
; X32-LABEL: test_add_i64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: movl 16(%ebp), %eax
; X32-NEXT: movl 20(%ebp), %edx
; X32-NEXT: addl 8(%ebp), %eax
; X32-NEXT: adcl 12(%ebp), %edx
; X32-NEXT: popl %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X32-NEXT: retl
%ret = add i64 %arg1, %arg2
ret i64 %ret

View File

@@ -37,6 +37,8 @@
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Expand ISel Pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass

View File

@@ -1,37 +0,0 @@
; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7.0"
; In the code below we need to copy the EFLAGS because of scheduling constraints.
; When copying the EFLAGS we need to write to the stack with push/pop. This forces
; us to emit the prolog.
; CHECK: main
; CHECK: subq{{.*}}rsp
; CHECK: ret
define i32 @main(i32 %arg, i8** %arg1) nounwind {
bb:
%tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
%tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
%tmp3 = alloca i32 ; [#uses=1 type=i32*]
store volatile i32 1, i32* %tmp, align 4
store volatile i32 1, i32* %tmp2, align 4
br label %bb4
bb4: ; preds = %bb4, %bb
%tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
%tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
store volatile i32 %tmp7, i32* %tmp2, align 4
%tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
%tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32]
%tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
store volatile i32 %tmp10, i32* %tmp3
br i1 %tmp8, label %bb11, label %bb4
bb11: ; preds = %bb4
%tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
ret i32 %tmp12
}

View File

@@ -1,100 +1,110 @@
; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf
; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME
; in X86InstrInfo::copyPhysReg() is resolved.
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
declare i32 @foo()
declare i32 @bar(i64)
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
; i386-LABEL: test_intervening_call:
; i386: cmpxchg8b
; i386-NEXT: pushl %eax
; i386-NEXT: seto %al
; i386-NEXT: lahf
; i386-NEXT: movl %eax, [[FLAGS:%.*]]
; i386-NEXT: popl %eax
; i386-NEXT: subl $8, %esp
; i386-NEXT: pushl %edx
; i386-NEXT: pushl %eax
; i386-NEXT: calll bar
; i386-NEXT: addl $16, %esp
; i386-NEXT: movl [[FLAGS]], %eax
; i386-NEXT: addb $127, %al
; i386-NEXT: sahf
; i386-NEXT: jne
; In the following case we get a long chain of EFLAGS save/restore due to
; a sequence of:
; In the following case when using fast scheduling we get a long chain of
; EFLAGS save/restore due to a sequence of:
; cmpxchg8b (implicit-def eflags)
; eax = copy eflags
; adjcallstackdown32
; ...
; use of eax
; During PEI the adjcallstackdown32 is replaced with the subl which
; clobbers eflags, effectively interfering in the liveness interval.
; Is this a case we care about? Maybe no, considering this issue
; happens with the fast pre-regalloc scheduler enforced. A more
; performant scheduler would move the adjcallstackdown32 out of the
; eflags liveness interval.
; i386f-LABEL: test_intervening_call:
; i386f: cmpxchg8b
; i386f-NEXT: pushl %eax
; i386f-NEXT: seto %al
; i386f-NEXT: lahf
; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
; i386f-NEXT: popl %eax
; i386f-NEXT: subl $8, %esp
; i386f-NEXT: pushl %eax
; i386f-NEXT: movl %ecx, %eax
; i386f-NEXT: addb $127, %al
; i386f-NEXT: sahf
; i386f-NEXT: popl %eax
; i386f-NEXT: pushl %eax
; i386f-NEXT: seto %al
; i386f-NEXT: lahf
; i386f-NEXT: movl %eax, %esi
; i386f-NEXT: popl %eax
; i386f-NEXT: pushl %edx
; i386f-NEXT: pushl %eax
; i386f-NEXT: calll bar
; i386f-NEXT: addl $16, %esp
; i386f-NEXT: movl %esi, %eax
; i386f-NEXT: addb $127, %al
; x8664-LABEL: test_intervening_call:
; x8664: cmpxchgq
; x8664: pushfq
; x8664-NEXT: popq [[FLAGS:%.*]]
; x8664-NEXT: movq %rax, %rdi
; x8664-NEXT: callq bar
; x8664-NEXT: pushq [[FLAGS]]
; x8664-NEXT: popfq
; x8664-NEXT: jne
; x8664-sahf-LABEL: test_intervening_call:
; x8664-sahf: cmpxchgq
; x8664-sahf: pushq %rax
; x8664-sahf-NEXT: seto %al
; x8664-sahf-NEXT: lahf
; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
; x8664-sahf-NEXT: popq %rax
; x8664-sahf-NEXT: movq %rax, %rdi
; x8664-sahf-NEXT: callq bar
; RAX is dead, no need to push and pop it.
; x8664-sahf-NEXT: movq [[FLAGS]], %rax
; x8664-sahf-NEXT: addb $127, %al
; x8664-sahf-NEXT: sahf
; x8664-sahf-NEXT: jne
; clobbers eflags, effectively interfering in the liveness interval. However,
; we then promote these copies into independent conditions in GPRs that avoids
; repeated saving and restoring logic and can be trivially managed by the
; register allocator.
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; 32-GOOD-RA-LABEL: test_intervening_call:
; 32-GOOD-RA: # %bb.0: # %entry
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
; 32-GOOD-RA-NEXT: setne %bl
; 32-GOOD-RA-NEXT: subl $8, %esp
; 32-GOOD-RA-NEXT: pushl %edx
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: calll bar
; 32-GOOD-RA-NEXT: addl $16, %esp
; 32-GOOD-RA-NEXT: testb %bl, %bl
; 32-GOOD-RA-NEXT: jne .LBB0_3
; 32-GOOD-RA-NEXT: # %bb.1: # %t
; 32-GOOD-RA-NEXT: movl $42, %eax
; 32-GOOD-RA-NEXT: jmp .LBB0_2
; 32-GOOD-RA-NEXT: .LBB0_3: # %f
; 32-GOOD-RA-NEXT: xorl %eax, %eax
; 32-GOOD-RA-NEXT: .LBB0_2: # %t
; 32-GOOD-RA-NEXT: xorl %edx, %edx
; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_intervening_call:
; 32-FAST-RA: # %bb.0: # %entry
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
; 32-FAST-RA-NEXT: setne %bl
; 32-FAST-RA-NEXT: subl $8, %esp
; 32-FAST-RA-NEXT: pushl %edx
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: calll bar
; 32-FAST-RA-NEXT: addl $16, %esp
; 32-FAST-RA-NEXT: testb %bl, %bl
; 32-FAST-RA-NEXT: jne .LBB0_3
; 32-FAST-RA-NEXT: # %bb.1: # %t
; 32-FAST-RA-NEXT: movl $42, %eax
; 32-FAST-RA-NEXT: jmp .LBB0_2
; 32-FAST-RA-NEXT: .LBB0_3: # %f
; 32-FAST-RA-NEXT: xorl %eax, %eax
; 32-FAST-RA-NEXT: .LBB0_2: # %t
; 32-FAST-RA-NEXT: xorl %edx, %edx
; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
;
; 64-ALL-LABEL: test_intervening_call:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: pushq %rbx
; 64-ALL-NEXT: movq %rsi, %rax
; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
; 64-ALL-NEXT: setne %bl
; 64-ALL-NEXT: movq %rax, %rdi
; 64-ALL-NEXT: callq bar
; 64-ALL-NEXT: testb %bl, %bl
; 64-ALL-NEXT: jne .LBB0_2
; 64-ALL-NEXT: # %bb.1: # %t
; 64-ALL-NEXT: movl $42, %eax
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: retq
; 64-ALL-NEXT: .LBB0_2: # %f
; 64-ALL-NEXT: xorl %eax, %eax
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: retq
entry:
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
@@ -109,23 +119,62 @@ f:
}
; Interesting in producing a clobber without any function calls.
define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
; i386-LABEL: test_control_flow:
; i386: cmpxchg
; i386-NEXT: jne
; i386f-LABEL: test_control_flow:
; i386f: cmpxchg
; i386f-NEXT: jne
; x8664-LABEL: test_control_flow:
; x8664: cmpxchg
; x8664-NEXT: jne
; x8664-sahf-LABEL: test_control_flow:
; x8664-sahf: cmpxchg
; x8664-sahf-NEXT: jne
define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
; 32-ALL-LABEL: test_control_flow:
; 32-ALL: # %bb.0: # %entry
; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; 32-ALL-NEXT: jle .LBB1_6
; 32-ALL-NEXT: # %bb.1: # %loop_start
; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-ALL-NEXT: .p2align 4, 0x90
; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
; 32-ALL-NEXT: # =>This Loop Header: Depth=1
; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2
; 32-ALL-NEXT: movl (%ecx), %edx
; 32-ALL-NEXT: .p2align 4, 0x90
; 32-ALL-NEXT: .LBB1_3: # %while.cond.i
; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1
; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2
; 32-ALL-NEXT: movl %edx, %eax
; 32-ALL-NEXT: xorl %edx, %edx
; 32-ALL-NEXT: testl %eax, %eax
; 32-ALL-NEXT: je .LBB1_3
; 32-ALL-NEXT: # %bb.4: # %while.body.i
; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
; 32-ALL-NEXT: jne .LBB1_2
; 32-ALL-NEXT: # %bb.5:
; 32-ALL-NEXT: xorl %eax, %eax
; 32-ALL-NEXT: .LBB1_6: # %cond.end
; 32-ALL-NEXT: retl
;
; 64-ALL-LABEL: test_control_flow:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: cmpl %edx, %esi
; 64-ALL-NEXT: jle .LBB1_5
; 64-ALL-NEXT: .p2align 4, 0x90
; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
; 64-ALL-NEXT: # =>This Loop Header: Depth=1
; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2
; 64-ALL-NEXT: movl (%rdi), %ecx
; 64-ALL-NEXT: .p2align 4, 0x90
; 64-ALL-NEXT: .LBB1_2: # %while.cond.i
; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2
; 64-ALL-NEXT: movl %ecx, %eax
; 64-ALL-NEXT: xorl %ecx, %ecx
; 64-ALL-NEXT: testl %eax, %eax
; 64-ALL-NEXT: je .LBB1_2
; 64-ALL-NEXT: # %bb.3: # %while.body.i
; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
; 64-ALL-NEXT: jne .LBB1_1
; 64-ALL-NEXT: # %bb.4:
; 64-ALL-NEXT: xorl %esi, %esi
; 64-ALL-NEXT: .LBB1_5: # %cond.end
; 64-ALL-NEXT: movl %esi, %eax
; 64-ALL-NEXT: retq
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
@@ -158,52 +207,68 @@ cond.end:
; This one is an interesting case because CMOV doesn't have a chain
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
; i386-LABEL: test_feed_cmov:
; i386: cmpxchgl
; i386-NEXT: seto %al
; i386-NEXT: lahf
; i386-NEXT: movl %eax, [[FLAGS:%.*]]
; i386-NEXT: calll foo
; i386-NEXT: pushl %eax
; i386-NEXT: movl [[FLAGS]], %eax
; i386-NEXT: addb $127, %al
; i386-NEXT: sahf
; i386-NEXT: popl %eax
; i386f-LABEL: test_feed_cmov:
; i386f: cmpxchgl
; i386f-NEXT: seto %al
; i386f-NEXT: lahf
; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
; i386f-NEXT: calll foo
; i386f-NEXT: pushl %eax
; i386f-NEXT: movl [[FLAGS]], %eax
; i386f-NEXT: addb $127, %al
; i386f-NEXT: sahf
; i386f-NEXT: popl %eax
; x8664-LABEL: test_feed_cmov:
; x8664: cmpxchg
; x8664: pushfq
; x8664-NEXT: popq [[FLAGS:%.*]]
; x8664-NEXT: callq foo
; x8664-NEXT: pushq [[FLAGS]]
; x8664-NEXT: popfq
; x8664-sahf-LABEL: test_feed_cmov:
; x8664-sahf: cmpxchgl
; RAX is dead, do not push or pop it.
; x8664-sahf-NEXT: seto %al
; x8664-sahf-NEXT: lahf
; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
; x8664-sahf-NEXT: callq foo
; x8664-sahf-NEXT: pushq %rax
; x8664-sahf-NEXT: movq [[FLAGS]], %rax
; x8664-sahf-NEXT: addb $127, %al
; x8664-sahf-NEXT: sahf
; x8664-sahf-NEXT: popq %rax
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
; 32-GOOD-RA-LABEL: test_feed_cmov:
; 32-GOOD-RA: # %bb.0: # %entry
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; 32-GOOD-RA-NEXT: sete %bl
; 32-GOOD-RA-NEXT: calll foo
; 32-GOOD-RA-NEXT: testb %bl, %bl
; 32-GOOD-RA-NEXT: jne .LBB2_2
; 32-GOOD-RA-NEXT: # %bb.1: # %entry
; 32-GOOD-RA-NEXT: movl %eax, %esi
; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
; 32-GOOD-RA-NEXT: movl %esi, %eax
; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_feed_cmov:
; 32-FAST-RA: # %bb.0: # %entry
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; 32-FAST-RA-NEXT: sete %bl
; 32-FAST-RA-NEXT: calll foo
; 32-FAST-RA-NEXT: testb %bl, %bl
; 32-FAST-RA-NEXT: jne .LBB2_2
; 32-FAST-RA-NEXT: # %bb.1: # %entry
; 32-FAST-RA-NEXT: movl %eax, %esi
; 32-FAST-RA-NEXT: .LBB2_2: # %entry
; 32-FAST-RA-NEXT: movl %esi, %eax
; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
;
; 64-ALL-LABEL: test_feed_cmov:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: pushq %rbp
; 64-ALL-NEXT: pushq %rbx
; 64-ALL-NEXT: pushq %rax
; 64-ALL-NEXT: movl %edx, %ebx
; 64-ALL-NEXT: movl %esi, %eax
; 64-ALL-NEXT: lock cmpxchgl %ebx, (%rdi)
; 64-ALL-NEXT: sete %bpl
; 64-ALL-NEXT: callq foo
; 64-ALL-NEXT: testb %bpl, %bpl
; 64-ALL-NEXT: cmovnel %ebx, %eax
; 64-ALL-NEXT: addq $8, %rsp
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: popq %rbp
; 64-ALL-NEXT: retq
entry:
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1

View File

@@ -1,6 +1,8 @@
; RUN: llc -o - %s | FileCheck %s
; This tests for the problem originally reported in http://llvm.org/PR25951
target triple = "i686-unknown-linux-gnu"
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
;
; Test patterns that require preserving and restoring flags.
@b = common global i8 0, align 1
@c = common global i32 0, align 4
@@ -8,13 +10,61 @@ target triple = "i686-unknown-linux-gnu"
@d = common global i8 0, align 1
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; CHECK-LABEL: func:
; This tests whether eax is properly saved/restored around the
; lahf/sahf instruction sequences. We make mem op volatile to prevent
; their reordering to avoid spills.
declare void @external(i32)
define i32 @func() {
; A test that re-uses flags in interesting ways due to volatile accesses.
; Specifically, the first increment's flags are reused for the branch despite
; being clobbered by the second increment.
define i32 @test1() nounwind {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
; X32-NEXT: movb b, %cl
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: incb %al
; X32-NEXT: movb %al, b
; X32-NEXT: incl c
; X32-NEXT: sete %dl
; X32-NEXT: movb a, %ah
; X32-NEXT: movb %ah, %ch
; X32-NEXT: incb %ch
; X32-NEXT: cmpb %cl, %ah
; X32-NEXT: sete d
; X32-NEXT: movb %ch, a
; X32-NEXT: testb %dl, %dl
; X32-NEXT: jne .LBB0_2
; X32-NEXT: # %bb.1: # %if.then
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: pushl %eax
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
; X32-NEXT: .LBB0_2: # %if.end
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
; X64-NEXT: movb {{.*}}(%rip), %dil
; X64-NEXT: movl %edi, %eax
; X64-NEXT: incb %al
; X64-NEXT: movb %al, {{.*}}(%rip)
; X64-NEXT: incl {{.*}}(%rip)
; X64-NEXT: sete %sil
; X64-NEXT: movb {{.*}}(%rip), %cl
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: incb %dl
; X64-NEXT: cmpb %dil, %cl
; X64-NEXT: sete {{.*}}(%rip)
; X64-NEXT: movb %dl, {{.*}}(%rip)
; X64-NEXT: testb %sil, %sil
; X64-NEXT: jne .LBB0_2
; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: pushq %rax
; X64-NEXT: movsbl %al, %edi
; X64-NEXT: callq external
; X64-NEXT: addq $8, %rsp
; X64-NEXT: .LBB0_2: # %if.end
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
entry:
%bval = load i8, i8* @b
%inc = add i8 %bval, 1
@@ -25,33 +75,290 @@ entry:
%aval = load volatile i8, i8* @a
%inc2 = add i8 %aval, 1
store volatile i8 %inc2, i8* @a
; Copy flags produced by the incb of %inc1 to a register, need to save+restore
; eax around it. The flags will be reused by %tobool.
; CHECK: pushl %eax
; CHECK: seto %al
; CHECK: lahf
; CHECK: movl %eax, [[REG:%[a-z]+]]
; CHECK: popl %eax
%cmp = icmp eq i8 %aval, %bval
%conv5 = zext i1 %cmp to i8
store i8 %conv5, i8* @d
%tobool = icmp eq i32 %inc1, 0
; We restore flags with an 'addb, sahf' sequence, need to save+restore eax
; around it.
; CHECK: pushl %eax
; CHECK: movl [[REG]], %eax
; CHECK: addb $127, %al
; CHECK: sahf
; CHECK: popl %eax
br i1 %tobool, label %if.end, label %if.then
if.then:
%conv6 = sext i8 %inc to i32
%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %conv6)
call void @external(i32 %conv6)
br label %if.end
if.end:
ret i32 0
}
declare i32 @printf(i8* nocapture readonly, ...)
; Preserve increment flags across a call.
define i32 @test2(i32* %ptr) nounwind {
; X32-LABEL: test2:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
; X32-NEXT: setne %bl
; X32-NEXT: pushl $42
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
; X32-NEXT: testb %bl, %bl
; X32-NEXT: je .LBB1_1
; X32-NEXT: # %bb.2: # %else
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
; X32-NEXT: .LBB1_1: # %then
; X32-NEXT: movl $64, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbx
; X64-NEXT: incl (%rdi)
; X64-NEXT: setne %bl
; X64-NEXT: movl $42, %edi
; X64-NEXT: callq external
; X64-NEXT: testb %bl, %bl
; X64-NEXT: je .LBB1_1
; X64-NEXT: # %bb.2: # %else
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
; X64-NEXT: .LBB1_1: # %then
; X64-NEXT: movl $64, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
entry:
%val = load i32, i32* %ptr
%inc = add i32 %val, 1
store i32 %inc, i32* %ptr
%cmp = icmp eq i32 %inc, 0
call void @external(i32 42)
br i1 %cmp, label %then, label %else
then:
ret i32 64
else:
ret i32 0
}
declare void @external_a()
declare void @external_b()
; This lowers to a conditional tail call instead of a conditional branch. This
; is tricky because we can only do this from a leaf function, and so we have to
; use volatile stores similar to test1 to force the save and restore of
; a condition without calling another function. We then set up subsequent calls
; in tail position.
define void @test_tail_call(i32* %ptr) nounwind optsize {
; X32-LABEL: test_tail_call:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
; X32-NEXT: setne %al
; X32-NEXT: incb a
; X32-NEXT: sete d
; X32-NEXT: testb %al, %al
; X32-NEXT: jne external_b # TAILCALL
; X32-NEXT: # %bb.1: # %then
; X32-NEXT: jmp external_a # TAILCALL
;
; X64-LABEL: test_tail_call:
; X64: # %bb.0: # %entry
; X64-NEXT: incl (%rdi)
; X64-NEXT: setne %al
; X64-NEXT: incb {{.*}}(%rip)
; X64-NEXT: sete {{.*}}(%rip)
; X64-NEXT: testb %al, %al
; X64-NEXT: jne external_b # TAILCALL
; X64-NEXT: # %bb.1: # %then
; X64-NEXT: jmp external_a # TAILCALL
entry:
%val = load i32, i32* %ptr
%inc = add i32 %val, 1
store i32 %inc, i32* %ptr
%cmp = icmp eq i32 %inc, 0
%aval = load volatile i8, i8* @a
%inc2 = add i8 %aval, 1
store volatile i8 %inc2, i8* @a
%cmp2 = icmp eq i8 %inc2, 0
%conv5 = zext i1 %cmp2 to i8
store i8 %conv5, i8* @d
br i1 %cmp, label %then, label %else
then:
tail call void @external_a()
ret void
else:
tail call void @external_b()
ret void
}
; Test a function that gets special select lowering into CFG with copied EFLAGS
; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
; cross-block rewrites in at least some narrow cases.
define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
; X32-LABEL: PR37100:
; X32: # %bb.0: # %bb
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: pushl %edi
; X32-NEXT: .cfi_def_cfa_offset 16
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 20
; X32-NEXT: .cfi_offset %esi, -20
; X32-NEXT: .cfi_offset %edi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: jmp .LBB3_1
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_5: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: idivl %ebp
; X32-NEXT: .LBB3_1: # %bb1
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movsbl %cl, %eax
; X32-NEXT: movl %eax, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: cmpl %eax, %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: sbbl %edx, %eax
; X32-NEXT: setl %al
; X32-NEXT: setl %dl
; X32-NEXT: movzbl %dl, %ebp
; X32-NEXT: negl %ebp
; X32-NEXT: testb %al, %al
; X32-NEXT: jne .LBB3_3
; X32-NEXT: # %bb.2: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: movb %ch, %cl
; X32-NEXT: .LBB3_3: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: movb %cl, (%ebx)
; X32-NEXT: movl (%edi), %edx
; X32-NEXT: testb %al, %al
; X32-NEXT: jne .LBB3_5
; X32-NEXT: # %bb.4: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: jmp .LBB3_5
;
; X64-LABEL: PR37100:
; X64: # %bb.0: # %bb
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: jmp .LBB3_1
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB3_5: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: idivl %esi
; X64-NEXT: .LBB3_1: # %bb1
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movsbq %dil, %rax
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: cmpq %rax, %r10
; X64-NEXT: setl %sil
; X64-NEXT: negl %esi
; X64-NEXT: cmpq %rax, %r10
; X64-NEXT: jl .LBB3_3
; X64-NEXT: # %bb.2: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: movl %ecx, %edi
; X64-NEXT: .LBB3_3: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: movb %dil, (%r8)
; X64-NEXT: jl .LBB3_5
; X64-NEXT: # %bb.4: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: movl (%r9), %esi
; X64-NEXT: jmp .LBB3_5
bb:
br label %bb1
bb1:
%tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
%tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
%tmp3 = icmp sgt i16 %tmp2, 7
%tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
%tmp5 = sext i8 %tmp to i64
%tmp6 = icmp slt i64 %arg3, %tmp5
%tmp7 = sext i1 %tmp6 to i32
%tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
store volatile i8 %tmp8, i8* %ptr1
%tmp9 = load volatile i32, i32* %ptr2
%tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
%tmp11 = srem i32 0, %tmp10
%tmp12 = trunc i32 %tmp11 to i16
br label %bb1
}
; Use a particular instruction pattern in order to lower to the post-RA pseudo
; used to lower SETB into an SBB pattern in order to make sure that kind of
; usage of a copied EFLAGS continues to work.
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
; X32-LABEL: PR37431:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %esi, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: sarl $31, %ecx
; X32-NEXT: cmpl %eax, %eax
; X32-NEXT: sbbl %ecx, %eax
; X32-NEXT: setb %al
; X32-NEXT: sbbb %cl, %cl
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movb %cl, (%edx)
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: subl %eax, %ecx
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: idivl %ecx
; X32-NEXT: movb %dl, (%esi)
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: PR37431:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movslq (%rdi), %rax
; X64-NEXT: cmpq %rax, %rax
; X64-NEXT: sbbb %dl, %dl
; X64-NEXT: cmpq %rax, %rax
; X64-NEXT: movb %dl, (%rsi)
; X64-NEXT: sbbl %esi, %esi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: idivl %esi
; X64-NEXT: movb %dl, (%rcx)
; X64-NEXT: retq
entry:
%tmp = load i32, i32* %arg1
%tmp1 = sext i32 %tmp to i64
%tmp2 = icmp ugt i64 %tmp1, undef
%tmp3 = zext i1 %tmp2 to i8
%tmp4 = sub i8 0, %tmp3
store i8 %tmp4, i8* %arg2
%tmp5 = sext i8 %tmp4 to i32
%tmp6 = srem i32 0, %tmp5
%tmp7 = trunc i32 %tmp6 to i8
store i8 %tmp7, i8* %arg3
ret void
}

View File

@@ -0,0 +1,24 @@
; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
; Check that the X86 Domain Reassignment pass doesn't drop IMPLICIT_DEF nodes,
; which would later cause crashes (e.g. in LiveVariables) - see PR37430
define void @domain_reassignment_implicit_def(i1 %cond, i8 *%mem, float %arg) {
; CHECK: vxorps %xmm1, %xmm1, %xmm1
; CHECK: vcmpneqss %xmm1, %xmm0, %k0
; CHECK: kmovb %k0, (%rsi)
top:
br i1 %cond, label %L19, label %L15
L15: ; preds = %top
%tmp47 = fcmp une float 0.000000e+00, %arg
%tmp48 = zext i1 %tmp47 to i8
br label %L21
L19: ; preds = %top
br label %L21
L21: ; preds = %L19, %L15
%.sroa.0.0 = phi i8 [ undef, %L19 ], [ %tmp48, %L15 ]
store i8 %.sroa.0.0, i8* %mem, align 1
ret void
}

View File

@@ -0,0 +1,37 @@
; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | llvm-mc -triple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512
; Check that the X86 domain reassignment pass doesn't introduce an illegal
; test instruction. See PR37396
define void @japi1_foo2_34617() {
pass2:
br label %if5
L174:
%tmp = icmp sgt <2 x i64> undef, zeroinitializer
%tmp1 = icmp sle <2 x i64> undef, undef
%tmp2 = and <2 x i1> %tmp, %tmp1
%tmp3 = extractelement <2 x i1> %tmp2, i32 0
%tmp4 = extractelement <2 x i1> %tmp2, i32 1
%tmp106 = and i1 %tmp4, %tmp3
%tmp107 = zext i1 %tmp106 to i8
%tmp108 = and i8 %tmp122, %tmp107
%tmp109 = icmp eq i8 %tmp108, 0
; CHECK-NOT: testb {{%k[0-7]}}
br i1 %tmp109, label %L188, label %L190
if5:
%b.055 = phi i8 [ 1, %pass2 ], [ %tmp122, %if5 ]
%tmp118 = icmp sgt i64 undef, 0
%tmp119 = icmp sle i64 undef, undef
%tmp120 = and i1 %tmp118, %tmp119
%tmp121 = zext i1 %tmp120 to i8
%tmp122 = and i8 %b.055, %tmp121
br i1 undef, label %L174, label %if5
L188:
unreachable
L190:
ret void
}

View File

@@ -1,64 +0,0 @@
# RUN: llc -run-pass postrapseudos -mtriple=i386-apple-macosx -o - %s | FileCheck %s
# Verify that we correctly save and restore eax when copying eflags,
# even when only a smaller alias of eax is used. We used to check only
# eax and not its aliases.
# PR27624.
--- |
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @foo() {
entry:
br label %false
false:
ret void
}
...
---
name: foo
tracksRegLiveness: true
liveins:
- { reg: '%edi' }
body: |
bb.0.entry:
liveins: %edi
NOOP implicit-def %al
; The bug was triggered only when LivePhysReg is used, which
; happens only when the heuristic for the liveness computation
; failed. The liveness computation heuristic looks at 10 instructions
; before and after the copy. Make sure we do not reach the definition of
; AL in 10 instructions, otherwise the heuristic will see that it is live.
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
NOOP
; Save AL.
; CHECK: PUSH32r killed %eax
; Copy edi into EFLAGS
; CHECK-NEXT: %eax = MOV32rr %edi
; CHECK-NEXT: %al = ADD8ri %al, 127, implicit-def %eflags
; CHECK-NEXT: SAHF implicit-def %eflags, implicit %ah
%eflags = COPY %edi
; Restore AL.
; CHECK-NEXT: %eax = POP32r
bb.1.false:
liveins: %al
NOOP implicit %al
RETQ
...

Some files were not shown because too many files have changed in this diff Show More