You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,167 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
; Make sure the pointer / address space of AtomicRMW is considered
|
||||
|
||||
; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(
|
||||
|
||||
; OPT-NOT: getelementptr
|
||||
|
||||
; OPT: .lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
||||
; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %scevgep4, i32 undef seq_cst
|
||||
; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst
|
||||
; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst
|
||||
; OPT: br i1 %exitcond
|
||||
define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
|
||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
|
||||
%tmp4 = atomicrmw add i32 addrspace(3)* %tmp3, i32 undef seq_cst
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
|
||||
%tmp7 = atomicrmw add i32 addrspace(3)* %tmp6, i32 undef seq_cst
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp4
|
||||
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT-NOT: getelementptr
|
||||
|
||||
; OPT: .lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
||||
; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %scevgep4, i32 undef, i32 undef seq_cst monotonic
|
||||
define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
|
||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
|
||||
%tmp4 = cmpxchg i32 addrspace(3)* %tmp3, i32 undef, i32 undef seq_cst monotonic
|
||||
%tmp4.0 = extractvalue { i32, i1 } %tmp4, 0
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
|
||||
%tmp7 = cmpxchg i32 addrspace(3)* %tmp6, i32 undef, i32 undef seq_cst monotonic
|
||||
%tmp7.0 = extractvalue { i32, i1 } %tmp7, 0
|
||||
%tmp8 = add nsw i32 %tmp7.0, %tmp4.0
|
||||
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT-NOT: getelementptr
|
||||
|
||||
; OPT: .lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
||||
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %scevgep4, i32 undef, i32 0, i32 0, i1 false)
|
||||
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
||||
define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
|
||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
|
||||
%tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %tmp3, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
|
||||
%tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %tmp6, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp4
|
||||
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT-NOT: getelementptr
|
||||
|
||||
; OPT: .lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
||||
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %scevgep4, i32 undef, i32 0, i32 0, i1 false)
|
||||
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
||||
define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
|
||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
|
||||
%tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %tmp3, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
|
||||
%tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %tmp6, i32 undef, i32 0, i32 0, i1 false)
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp4
|
||||
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #1
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind argmemonly }
|
@ -1,156 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
|
||||
|
||||
; Test that loops with different maximum offsets for different address
|
||||
; spaces are correctly handled.
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT: {{^}}.lr.ph:
|
||||
; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
|
||||
; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
|
||||
define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i64 %indvars.iv, 4095
|
||||
%tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
|
||||
%tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
|
||||
%tmp4 = sext i8 %tmp3 to i32
|
||||
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
|
||||
%tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
|
||||
%tmp7 = add nsw i32 %tmp6, %tmp4
|
||||
store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32(
|
||||
; OPT: {{^}}.lr.ph.preheader:
|
||||
; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096
|
||||
; OPT: br label %.lr.ph
|
||||
|
||||
; OPT: {{^}}.lr.ph:
|
||||
; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
|
||||
define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i64 %indvars.iv, 4096
|
||||
%tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
|
||||
%tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
|
||||
%tmp4 = sext i8 %tmp3 to i32
|
||||
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
|
||||
%tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
|
||||
%tmp7 = add nsw i32 %tmp6, %tmp4
|
||||
store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
|
||||
; OPT: {{^}}.lr.ph
|
||||
; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
|
||||
; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
|
||||
define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i64 %indvars.iv, 65535
|
||||
%tmp2 = trunc i64 %tmp1 to i32
|
||||
%tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
|
||||
%tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
|
||||
%tmp5 = sext i8 %tmp4 to i32
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
|
||||
%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp5
|
||||
store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32(
|
||||
; OPT: {{^}}.lr.ph.preheader:
|
||||
; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536
|
||||
; OPT: br label %.lr.ph
|
||||
|
||||
; OPT: {{^}}.lr.ph:
|
||||
; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
|
||||
define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||
bb:
|
||||
%tmp = icmp sgt i32 %n, 0
|
||||
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
|
||||
|
||||
.lr.ph.preheader: ; preds = %bb
|
||||
br label %.lr.ph
|
||||
|
||||
._crit_edge.loopexit: ; preds = %.lr.ph
|
||||
br label %._crit_edge
|
||||
|
||||
._crit_edge: ; preds = %._crit_edge.loopexit, %bb
|
||||
ret void
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%tmp1 = add nuw nsw i64 %indvars.iv, 65536
|
||||
%tmp2 = trunc i64 %tmp1 to i32
|
||||
%tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
|
||||
%tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
|
||||
%tmp5 = sext i8 %tmp4 to i32
|
||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
|
||||
%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
|
||||
%tmp8 = add nsw i32 %tmp7, %tmp5
|
||||
store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
@ -1,31 +0,0 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
target triple = "amdgcn--"
|
||||
|
||||
; We need to compile this for a target where we have different address spaces,
|
||||
; and where pointers in those address spaces have different size.
|
||||
; E.g. for amdgcn-- pointers in address space 0 are 32 bits and pointers in
|
||||
; address space 1 are 64 bits.
|
||||
|
||||
; We shouldn't crash. Check that we get a loop with the two stores.
|
||||
;CHECK-LABEL: foo:
|
||||
;CHECK: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]:
|
||||
;CHECK: buffer_store_dword
|
||||
;CHECK: buffer_store_dword
|
||||
;CHECK: s_branch [[LOOP_LABEL]]
|
||||
|
||||
define amdgpu_kernel void @foo() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ]
|
||||
%0 = getelementptr inbounds i32, i32* null, i32 %idx0
|
||||
%1 = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %idx0
|
||||
store i32 1, i32* %0
|
||||
store i32 7, i32 addrspace(1)* %1
|
||||
%next_idx0 = add nuw nsw i32 %idx0, 1
|
||||
br label %loop
|
||||
}
|
||||
|
@ -1,3 +0,0 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,131 +0,0 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Test various conditions where OptimizeLoopTermCond doesn't look at a
|
||||
; memory instruction use and fails to find the address space.
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
; CHECK-LABEL: @local_cmp_user(
|
||||
; CHECK: bb11:
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 2, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, -2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb:
|
||||
; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)*
|
||||
; CHECK: %c1 = icmp ne i8 addrspace(3)*
|
||||
define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11:
|
||||
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i32 %i, 1
|
||||
%c0 = icmp eq i32 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb:
|
||||
%t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
|
||||
%p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
|
||||
%c1 = icmp ne i8 addrspace(3)* %p, null
|
||||
%i.next = add i32 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @global_cmp_user(
|
||||
; CHECK: %lsr.iv1 = phi i64
|
||||
; CHECK: %lsr.iv = phi i64
|
||||
; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, -2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb:
|
||||
; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)*
|
||||
; CHECK: icmp ne i8 addrspace(1)* %t
|
||||
define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i64 %i, 1
|
||||
%c0 = icmp eq i64 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb:
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
|
||||
%c1 = icmp ne i8 addrspace(1)* %p, null
|
||||
%i.next = add i64 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @global_gep_user(
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb:
|
||||
; CHECK: %idxprom = sext i32 %lsr.iv1 to i64
|
||||
; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom
|
||||
define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11:
|
||||
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i32 %i, 1
|
||||
%c0 = icmp eq i32 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb:
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
|
||||
%c1 = icmp ne i8 addrspace(1)* %p, null
|
||||
%i.next = add i32 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @global_sext_scale_user(
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb
|
||||
; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
|
||||
define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11:
|
||||
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i32 %i, 1
|
||||
%ii.ext = sext i32 %ii to i64
|
||||
%c0 = icmp eq i32 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb:
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
|
||||
%c1 = icmp ne i8 addrspace(1)* %p, null
|
||||
%i.next = add i32 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -loop-reduce %s | FileCheck %s
|
||||
|
||||
; Test for assert resulting from inconsistent isLegalAddressingMode
|
||||
; answers when the address space was dropped from the query.
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
%0 = type { i32, double, i32, float }
|
||||
|
||||
; CHECK-LABEL: @lsr_crash_preserve_addrspace_unknown_type(
|
||||
; CHECK: %tmp4 = bitcast %0 addrspace(3)* %tmp to double addrspace(3)*
|
||||
; CHECK: %scevgep5 = getelementptr double, double addrspace(3)* %tmp4, i32 1
|
||||
; CHEC: load double, double addrspace(3)* %scevgep5
|
||||
|
||||
; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4
|
||||
; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep
|
||||
define amdgpu_kernel void @lsr_crash_preserve_addrspace_unknown_type() #0 {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb17, %bb
|
||||
%tmp = phi %0 addrspace(3)* [ undef, %bb ], [ %tmp18, %bb17 ]
|
||||
%tmp2 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 1
|
||||
%tmp3 = load double, double addrspace(3)* %tmp2, align 8
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb1
|
||||
br i1 undef, label %bb8, label %bb5
|
||||
|
||||
bb5: ; preds = %bb4
|
||||
unreachable
|
||||
|
||||
bb8: ; preds = %bb4
|
||||
%tmp9 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 0
|
||||
%tmp10 = load i32, i32 addrspace(3)* %tmp9, align 4
|
||||
%tmp11 = icmp eq i32 0, %tmp10
|
||||
br i1 %tmp11, label %bb12, label %bb17
|
||||
|
||||
bb12: ; preds = %bb8
|
||||
%tmp13 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 2
|
||||
%tmp14 = load i32, i32 addrspace(3)* %tmp13, align 4
|
||||
%tmp15 = icmp eq i32 0, %tmp14
|
||||
br i1 %tmp15, label %bb16, label %bb17
|
||||
|
||||
bb16: ; preds = %bb12
|
||||
unreachable
|
||||
|
||||
bb17: ; preds = %bb12, %bb8
|
||||
%tmp18 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 2
|
||||
br label %bb1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
Reference in New Issue
Block a user