You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,45 +0,0 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
|
||||
define i32 @test1(i32* %ptr, i32 %val) #0 {
|
||||
%orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
|
||||
ret i32 %orig
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
|
||||
define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) {
|
||||
%orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
|
||||
ret {i32, i1} %orig
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
|
||||
define i32 @test_atomic_inc_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
|
||||
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
|
||||
define i64 @test_atomic_inc_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
|
||||
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
|
||||
define i32 @test_atomic_dec_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
|
||||
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
|
||||
define i64 @test_atomic_dec_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
|
||||
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1
|
||||
declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1
|
||||
declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind argmemonly }
|
@ -1,13 +0,0 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
|
||||
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
|
||||
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
|
||||
store i32 %swizzle, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind convergent }
|
||||
attributes #1 = { nounwind readnone convergent }
|
@ -1,41 +0,0 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_ps':
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK-NOT: %arg0
|
||||
; CHECK-NOT: %arg1
|
||||
; CHECK-NOT: %arg2
|
||||
; CHECK: <2 x i32> %arg3
|
||||
; CHECK: DIVERGENT: <3 x i32> %arg4
|
||||
; CHECK: DIVERGENT: float %arg5
|
||||
; CHECK: DIVERGENT: i32 %arg6
|
||||
|
||||
define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_kernel':
|
||||
; CHECK-NOT: %arg0
|
||||
; CHECK-NOT: %arg1
|
||||
; CHECK-NOT: %arg2
|
||||
; CHECK-NOT: %arg3
|
||||
; CHECK-NOT: %arg4
|
||||
; CHECK-NOT: %arg5
|
||||
; CHECK-NOT: %arg6
|
||||
define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_c':
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
define void @test_c([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -1,2 +0,0 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,103 +0,0 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(
|
||||
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add(
|
||||
define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(
|
||||
define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(
|
||||
define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(
|
||||
define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(
|
||||
define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(
|
||||
define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and(
|
||||
define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or(
|
||||
define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(
|
||||
define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(
|
||||
define float @buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -1,121 +0,0 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(
|
||||
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(
|
||||
define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(
|
||||
define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(
|
||||
define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(
|
||||
define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(
|
||||
define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(
|
||||
define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(
|
||||
define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(
|
||||
define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(
|
||||
define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(
|
||||
define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(
|
||||
define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(
|
||||
define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -1,30 +0,0 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
|
||||
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
; CHECK: DIVERGENT: %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
|
||||
; The post dominator tree does not have a root node in this case
|
||||
define amdgpu_kernel void @no_return_blocks(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1) #0 {
|
||||
bb0:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
|
||||
%tmp6 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
%tmp8 = fcmp olt float %tmp6, 0.000000e+00
|
||||
br i1 %tmp8, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
%tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
br label %bb1
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
@ -1,28 +0,0 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: 'test1':
|
||||
; CHECK-NEXT: DIVERGENT: i32 %bound
|
||||
; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound
|
||||
; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body
|
||||
; CHECK-NEXT: DIVERGENT: br i1 %break, label %end, label %header
|
||||
; Note: %counter is not divergent!
|
||||
define amdgpu_ps void @test1(i32 %bound) {
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
header:
|
||||
%counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ]
|
||||
%break = icmp sge i32 %counter, %bound
|
||||
br i1 %break, label %footer, label %body
|
||||
|
||||
body:
|
||||
%counter.next = add i32 %counter, 1
|
||||
br label %footer
|
||||
|
||||
footer:
|
||||
%counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ]
|
||||
br i1 %break, label %end, label %header
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
|
||||
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
|
||||
entry:
|
||||
unreachable
|
||||
|
||||
unreachable_loop: ; preds = %do.body.i, %if.then11
|
||||
%tmp = cmpxchg volatile i32 addrspace(1)* null, i32 0, i32 0 seq_cst seq_cst
|
||||
%cmp.i = extractvalue { i32, i1 } %tmp, 1
|
||||
br i1 %cmp.i, label %unreachable_loop, label %end
|
||||
|
||||
end: ; preds = %do.body.i51, %atomicAdd_g_f.exit
|
||||
unreachable
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind }
|
@ -1,45 +0,0 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence %s | FileCheck %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.z() #0
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
|
||||
|
||||
; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
define amdgpu_kernel void @workitem_id_x() #1 {
|
||||
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
store volatile i32 %id.x, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
define amdgpu_kernel void @workitem_id_y() #1 {
|
||||
%id.y = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
store volatile i32 %id.y, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z()
|
||||
define amdgpu_kernel void @workitem_id_z() #1 {
|
||||
%id.z = call i32 @llvm.amdgcn.workitem.id.z()
|
||||
store volatile i32 %id.z, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
|
||||
define amdgpu_kernel void @mbcnt_lo() #1 {
|
||||
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
|
||||
store volatile i32 %mbcnt.lo, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
|
||||
define amdgpu_kernel void @mbcnt_hi() #1 {
|
||||
%mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
|
||||
store volatile i32 %mbcnt.hi, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
@ -1,219 +0,0 @@
|
||||
; RUN: opt %s -analyze -divergence | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
|
||||
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%cond = icmp slt i32 %n, 0
|
||||
br i1 %cond, label %then, label %else ; uniform
|
||||
; CHECK-NOT: DIVERGENT: br i1 %cond,
|
||||
then:
|
||||
%a1 = add i32 %a, %tid
|
||||
br label %merge
|
||||
else:
|
||||
%b2 = add i32 %b, %tid
|
||||
br label %merge
|
||||
merge:
|
||||
%c = phi i32 [ %a1, %then ], [ %b2, %else ]
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; c = a;
|
||||
; if (threadIdx.x < 5) // divergent: data dependent
|
||||
; c = b;
|
||||
; return c; // c is divergent: sync dependent
|
||||
define i32 @sync(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync'
|
||||
bb1:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
%cond = icmp slt i32 %tid, 5
|
||||
br i1 %cond, label %bb2, label %bb3
|
||||
; CHECK: DIVERGENT: br i1 %cond,
|
||||
bb2:
|
||||
br label %bb3
|
||||
bb3:
|
||||
%c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
|
||||
; CHECK: DIVERGENT: %c =
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; c = 0;
|
||||
; if (threadIdx.x >= 5) { // divergent
|
||||
; c = (n < 0 ? a : b); // c here is uniform because n is uniform
|
||||
; }
|
||||
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
|
||||
; return c;
|
||||
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed'
|
||||
bb1:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
|
||||
%cond = icmp slt i32 %tid, 5
|
||||
br i1 %cond, label %bb6, label %bb2
|
||||
; CHECK: DIVERGENT: br i1 %cond,
|
||||
bb2:
|
||||
%cond2 = icmp slt i32 %n, 0
|
||||
br i1 %cond2, label %bb4, label %bb3
|
||||
bb3:
|
||||
br label %bb5
|
||||
bb4:
|
||||
br label %bb5
|
||||
bb5:
|
||||
%c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
|
||||
; CHECK-NOT: DIVERGENT: %c =
|
||||
br label %bb6
|
||||
bb6:
|
||||
%c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
|
||||
; CHECK: DIVERGENT: %c2 =
|
||||
ret i32 %c2
|
||||
}
|
||||
|
||||
; We conservatively treats all parameters of a __device__ function as divergent.
|
||||
define i32 @device(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device'
|
||||
; CHECK: DIVERGENT: i32 %n
|
||||
; CHECK: DIVERGENT: i32 %a
|
||||
; CHECK: DIVERGENT: i32 %b
|
||||
entry:
|
||||
%cond = icmp slt i32 %n, 0
|
||||
br i1 %cond, label %then, label %else
|
||||
; CHECK: DIVERGENT: br i1 %cond,
|
||||
then:
|
||||
br label %merge
|
||||
else:
|
||||
br label %merge
|
||||
merge:
|
||||
%c = phi i32 [ %a, %then ], [ %b, %else ]
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; int i = 0;
|
||||
; do {
|
||||
; i++; // i here is uniform
|
||||
; } while (i < laneid);
|
||||
; return i == 10 ? 0 : 1; // i here is divergent
|
||||
;
|
||||
; The i defined in the loop is used outside.
|
||||
define i32 @loop() {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
|
||||
entry:
|
||||
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
br label %loop
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
|
||||
; CHECK-NOT: DIVERGENT: %i =
|
||||
%i1 = add i32 %i, 1
|
||||
%exit_cond = icmp sge i32 %i1, %laneid
|
||||
br i1 %exit_cond, label %loop_exit, label %loop
|
||||
loop_exit:
|
||||
%cond = icmp eq i32 %i, 10
|
||||
br i1 %cond, label %then, label %else
|
||||
; CHECK: DIVERGENT: br i1 %cond,
|
||||
then:
|
||||
ret i32 0
|
||||
else:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
; Same as @loop, but the loop is in the LCSSA form.
|
||||
define i32 @lcssa() {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
br label %loop
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
|
||||
; CHECK-NOT: DIVERGENT: %i =
|
||||
%i1 = add i32 %i, 1
|
||||
%exit_cond = icmp sge i32 %i1, %tid
|
||||
br i1 %exit_cond, label %loop_exit, label %loop
|
||||
loop_exit:
|
||||
%i.lcssa = phi i32 [ %i, %loop ]
|
||||
; CHECK: DIVERGENT: %i.lcssa =
|
||||
%cond = icmp eq i32 %i.lcssa, 10
|
||||
br i1 %cond, label %then, label %else
|
||||
; CHECK: DIVERGENT: br i1 %cond,
|
||||
then:
|
||||
ret i32 0
|
||||
else:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
; This test contains an unstructured loop.
|
||||
; +-------------- entry ----------------+
|
||||
; | |
|
||||
; V V
|
||||
; i1 = phi(0, i3) i2 = phi(0, i3)
|
||||
; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
|
||||
; ^ | ^
|
||||
; | V |
|
||||
; +-------- switch (tid / i3) ----------+
|
||||
; |
|
||||
; V
|
||||
; if (i3 == 5) // divergent
|
||||
; because sync dependent on (tid / i3).
|
||||
define i32 @unstructured_loop(i1 %entry_cond) {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
|
||||
loop_entry_1:
|
||||
%i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
|
||||
%j1 = add i32 %i1, 1
|
||||
br label %loop_body
|
||||
loop_entry_2:
|
||||
%i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
|
||||
%j2 = add i32 %i2, 2
|
||||
br label %loop_body
|
||||
loop_body:
|
||||
%i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
|
||||
br label %loop_latch
|
||||
loop_latch:
|
||||
%div = sdiv i32 %tid, %i3
|
||||
switch i32 %div, label %branch [ i32 1, label %loop_entry_1
|
||||
i32 2, label %loop_entry_2 ]
|
||||
branch:
|
||||
%cmp = icmp eq i32 %i3, 5
|
||||
br i1 %cmp, label %then, label %else
|
||||
; CHECK: DIVERGENT: br i1 %cmp,
|
||||
then:
|
||||
ret i32 0
|
||||
else:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
; Verifies sync-dependence is computed correctly in the absense of loops.
|
||||
define i32 @sync_no_loop(i32 %arg) {
|
||||
entry:
|
||||
%0 = add i32 %arg, 1
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%1 = icmp sge i32 %tid, 10
|
||||
br i1 %1, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
br label %bb3
|
||||
|
||||
bb2:
|
||||
br label %bb3
|
||||
|
||||
bb3:
|
||||
%2 = add i32 %0, 2
|
||||
; CHECK-NOT: DIVERGENT: %2
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
|
||||
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
|
||||
!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
|
||||
!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
|
||||
!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
|
||||
!3 = !{i32 ()* @loop, !"kernel", i32 1}
|
||||
!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
|
||||
!5 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1}
|
@ -1,2 +0,0 @@
|
||||
if not 'NVPTX' in config.root.targets:
|
||||
config.unsupported = True
|
Reference in New Issue
Block a user