Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,45 +0,0 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
define i32 @test1(i32* %ptr, i32 %val) #0 {
%orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
ret i32 %orig
}
; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) {
%orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
ret {i32, i1} %orig
}
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @test_atomic_inc_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
define i64 @test_atomic_inc_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @test_atomic_dec_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
define i64 @test_atomic_dec_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
ret i64 %ret
}
declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1
declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1
declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1
declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind argmemonly }

View File

@ -1,13 +0,0 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
store i32 %swizzle, i32 addrspace(1)* %out, align 4
ret void
}
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
attributes #0 = { nounwind convergent }
attributes #1 = { nounwind readnone convergent }

View File

@ -1,41 +0,0 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT:
; CHECK-NOT: %arg0
; CHECK-NOT: %arg1
; CHECK-NOT: %arg2
; CHECK: <2 x i32> %arg3
; CHECK: DIVERGENT: <3 x i32> %arg4
; CHECK: DIVERGENT: float %arg5
; CHECK: DIVERGENT: i32 %arg6
define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
ret void
}
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_kernel':
; CHECK-NOT: %arg0
; CHECK-NOT: %arg1
; CHECK-NOT: %arg2
; CHECK-NOT: %arg3
; CHECK-NOT: %arg4
; CHECK-NOT: %arg5
; CHECK-NOT: %arg6
define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
ret void
}
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_c':
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
define void @test_c([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
ret void
}
attributes #0 = { nounwind }

View File

@ -1,2 +0,0 @@
if not 'AMDGPU' in config.root.targets:
config.unsupported = True

View File

@ -1,103 +0,0 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add(
define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(
define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(
define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(
define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(
define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(
define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and(
define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or(
define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(
define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(
define float @buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
attributes #0 = { nounwind }

View File

@ -1,121 +0,0 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(
define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(
define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(
define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(
define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(
define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(
define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(
define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(
define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(
define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(
define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(
define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(
define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%r = bitcast i32 %orig to float
ret float %r
}
declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
attributes #0 = { nounwind }

View File

@ -1,30 +0,0 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
; CHECK: DIVERGENT: %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
; The post dominator tree does not have a root node in this case
define amdgpu_kernel void @no_return_blocks(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1) #0 {
bb0:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp2 = sext i32 %tmp to i64
%tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
%tmp6 = load volatile float, float addrspace(1)* %tmp5, align 4
%tmp8 = fcmp olt float %tmp6, 0.000000e+00
br i1 %tmp8, label %bb1, label %bb2
bb1:
%tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
br label %bb2
bb2:
%tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
br label %bb1
}
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -1,28 +0,0 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
; CHECK-LABEL: 'test1':
; CHECK-NEXT: DIVERGENT: i32 %bound
; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound
; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body
; CHECK-NEXT: DIVERGENT: br i1 %break, label %end, label %header
; Note: %counter is not divergent!
define amdgpu_ps void @test1(i32 %bound) {
entry:
br label %header
header:
%counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ]
%break = icmp sge i32 %counter, %bound
br i1 %break, label %footer, label %body
body:
%counter.next = add i32 %counter, 1
br label %footer
footer:
%counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ]
br i1 %break, label %end, label %header
end:
ret void
}

View File

@ -1,17 +0,0 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
entry:
unreachable
unreachable_loop: ; preds = %do.body.i, %if.then11
%tmp = cmpxchg volatile i32 addrspace(1)* null, i32 0, i32 0 seq_cst seq_cst
%cmp.i = extractvalue { i32, i1 } %tmp, 1
br i1 %cmp.i, label %unreachable_loop, label %end
end: ; preds = %do.body.i51, %atomicAdd_g_f.exit
unreachable
}
attributes #0 = { norecurse nounwind }

View File

@ -1,45 +0,0 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence %s | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @workitem_id_x() #1 {
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %id.x, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y()
define amdgpu_kernel void @workitem_id_y() #1 {
%id.y = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %id.y, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z()
define amdgpu_kernel void @workitem_id_z() #1 {
%id.z = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %id.z, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
define amdgpu_kernel void @mbcnt_lo() #1 {
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
store volatile i32 %mbcnt.lo, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
define amdgpu_kernel void @mbcnt_hi() #1 {
%mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
store volatile i32 %mbcnt.hi, i32 addrspace(1)* undef
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -1,219 +0,0 @@
; RUN: opt %s -analyze -divergence | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %n, 0
br i1 %cond, label %then, label %else ; uniform
; CHECK-NOT: DIVERGENT: br i1 %cond,
then:
%a1 = add i32 %a, %tid
br label %merge
else:
%b2 = add i32 %b, %tid
br label %merge
merge:
%c = phi i32 [ %a1, %then ], [ %b2, %else ]
ret i32 %c
}
; c = a;
; if (threadIdx.x < 5) // divergent: data dependent
; c = b;
; return c; // c is divergent: sync dependent
define i32 @sync(i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%cond = icmp slt i32 %tid, 5
br i1 %cond, label %bb2, label %bb3
; CHECK: DIVERGENT: br i1 %cond,
bb2:
br label %bb3
bb3:
%c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
; CHECK: DIVERGENT: %c =
ret i32 %c
}
; c = 0;
; if (threadIdx.x >= 5) { // divergent
; c = (n < 0 ? a : b); // c here is uniform because n is uniform
; }
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
; return c;
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
%cond = icmp slt i32 %tid, 5
br i1 %cond, label %bb6, label %bb2
; CHECK: DIVERGENT: br i1 %cond,
bb2:
%cond2 = icmp slt i32 %n, 0
br i1 %cond2, label %bb4, label %bb3
bb3:
br label %bb5
bb4:
br label %bb5
bb5:
%c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
; CHECK-NOT: DIVERGENT: %c =
br label %bb6
bb6:
%c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
; CHECK: DIVERGENT: %c2 =
ret i32 %c2
}
; We conservatively treats all parameters of a __device__ function as divergent.
define i32 @device(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device'
; CHECK: DIVERGENT: i32 %n
; CHECK: DIVERGENT: i32 %a
; CHECK: DIVERGENT: i32 %b
entry:
%cond = icmp slt i32 %n, 0
br i1 %cond, label %then, label %else
; CHECK: DIVERGENT: br i1 %cond,
then:
br label %merge
else:
br label %merge
merge:
%c = phi i32 [ %a, %then ], [ %b, %else ]
ret i32 %c
}
; int i = 0;
; do {
; i++; // i here is uniform
; } while (i < laneid);
; return i == 10 ? 0 : 1; // i here is divergent
;
; The i defined in the loop is used outside.
define i32 @loop() {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
entry:
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
; CHECK-NOT: DIVERGENT: %i =
%i1 = add i32 %i, 1
%exit_cond = icmp sge i32 %i1, %laneid
br i1 %exit_cond, label %loop_exit, label %loop
loop_exit:
%cond = icmp eq i32 %i, 10
br i1 %cond, label %then, label %else
; CHECK: DIVERGENT: br i1 %cond,
then:
ret i32 0
else:
ret i32 1
}
; Same as @loop, but the loop is in the LCSSA form.
define i32 @lcssa() {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
; CHECK-NOT: DIVERGENT: %i =
%i1 = add i32 %i, 1
%exit_cond = icmp sge i32 %i1, %tid
br i1 %exit_cond, label %loop_exit, label %loop
loop_exit:
%i.lcssa = phi i32 [ %i, %loop ]
; CHECK: DIVERGENT: %i.lcssa =
%cond = icmp eq i32 %i.lcssa, 10
br i1 %cond, label %then, label %else
; CHECK: DIVERGENT: br i1 %cond,
then:
ret i32 0
else:
ret i32 1
}
; This test contains an unstructured loop.
; +-------------- entry ----------------+
; | |
; V V
; i1 = phi(0, i3) i2 = phi(0, i3)
; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
; ^ | ^
; | V |
; +-------- switch (tid / i3) ----------+
; |
; V
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
loop_entry_1:
%i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
%j1 = add i32 %i1, 1
br label %loop_body
loop_entry_2:
%i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
%j2 = add i32 %i2, 2
br label %loop_body
loop_body:
%i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
br label %loop_latch
loop_latch:
%div = sdiv i32 %tid, %i3
switch i32 %div, label %branch [ i32 1, label %loop_entry_1
i32 2, label %loop_entry_2 ]
branch:
%cmp = icmp eq i32 %i3, 5
br i1 %cmp, label %then, label %else
; CHECK: DIVERGENT: br i1 %cmp,
then:
ret i32 0
else:
ret i32 1
}
; Verifies sync-dependence is computed correctly in the absense of loops.
define i32 @sync_no_loop(i32 %arg) {
entry:
%0 = add i32 %arg, 1
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%1 = icmp sge i32 %tid, 10
br i1 %1, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
%2 = add i32 %0, 2
; CHECK-NOT: DIVERGENT: %2
ret i32 %2
}
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
!3 = !{i32 ()* @loop, !"kernel", i32 1}
!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
!5 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1}

View File

@ -1,2 +0,0 @@
if not 'NVPTX' in config.root.targets:
config.unsupported = True