You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,3 +0,0 @@
|
||||
if not 'NVPTX' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,209 +0,0 @@
|
||||
; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
; Check that the load/store vectorizer is willing to move loads/stores across
|
||||
; intervening instructions only if it's safe.
|
||||
;
|
||||
; - Loads can be moved across instructions that don't write or throw.
|
||||
; - Stores can only be moved across instructions which don't read, write, or
|
||||
; throw.
|
||||
|
||||
declare void @fn()
|
||||
declare void @fn_nounwind() #0
|
||||
declare void @fn_nounwind_writeonly() #1
|
||||
declare void @fn_nounwind_readonly() #2
|
||||
declare void @fn_writeonly() #3
|
||||
declare void @fn_readonly() #4
|
||||
declare void @fn_readnone() #5
|
||||
|
||||
; CHECK-LABEL: @load_fn
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn()
|
||||
; CHECK: load
|
||||
define void @load_fn(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn()
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_nounwind
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_nounwind()
|
||||
; CHECK: load
|
||||
define void @load_fn_nounwind(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn_nounwind() #0
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_nounwind_writeonly
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_nounwind_writeonly()
|
||||
; CHECK: load
|
||||
define void @load_fn_nounwind_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn_nounwind_writeonly() #1
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_nounwind_readonly
|
||||
; CHECK-DAG: load <2 x i32>
|
||||
; CHECK-DAG: call void @fn_nounwind_readonly()
|
||||
define void @load_fn_nounwind_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn_nounwind_readonly() #2
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_readonly
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_readonly
|
||||
; CHECK: load
|
||||
define void @load_fn_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn_readonly() #4
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_writeonly
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_writeonly()
|
||||
; CHECK: load
|
||||
define void @load_fn_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn_writeonly() #3
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_readnone
|
||||
; CHECK-DAG: load <2 x i32>
|
||||
; CHECK-DAG: call void @fn_readnone()
|
||||
define void @load_fn_readnone(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p, align 8
|
||||
call void @fn_readnone() #5
|
||||
%v1 = load i32, i32* %p.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; ------------------------------------------------
|
||||
; Same tests, but now for stores instead of loads.
|
||||
; ------------------------------------------------
|
||||
|
||||
; CHECK-LABEL: @store_fn
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn()
|
||||
; CHECK: store
|
||||
define void @store_fn(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn()
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_nounwind
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_nounwind()
|
||||
; CHECK: store
|
||||
define void @store_fn_nounwind(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_nounwind() #0
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_nounwind_writeonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_nounwind_writeonly()
|
||||
; CHECK: store
|
||||
define void @store_fn_nounwind_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_nounwind_writeonly() #1
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_nounwind_readonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_nounwind_readonly()
|
||||
; CHECK: store
|
||||
define void @store_fn_nounwind_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_nounwind_readonly() #2
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_readonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_readonly
|
||||
; CHECK: store
|
||||
define void @store_fn_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_readonly() #4
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_writeonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_writeonly()
|
||||
; CHECK: store
|
||||
define void @store_fn_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_writeonly() #3
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; This is the only store idiom we can vectorize.
|
||||
; CHECK-LABEL: @store_fn_readnone
|
||||
; CHECK-DAG: store <2 x i32>
|
||||
; CHECK-DAG: call void @fn_readnone()
|
||||
define void @store_fn_readnone(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p, align 8
|
||||
call void @fn_readnone() #5
|
||||
store i32 0, i32* %p.1, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind writeonly }
|
||||
attributes #2 = { nounwind readonly }
|
||||
attributes #3 = { writeonly }
|
||||
attributes #4 = { readonly }
|
||||
; readnone implies nounwind, so no need to test separately
|
||||
attributes #5 = { nounwind readnone }
|
@ -1,14 +0,0 @@
|
||||
; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
; Load from a constant. This can be vectorized, but shouldn't crash us.
|
||||
|
||||
@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4
|
||||
|
||||
define void @foo() {
|
||||
; CHECK: load <4 x float>
|
||||
%a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 16
|
||||
%b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4
|
||||
%c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4
|
||||
%d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
; RUN: opt -load-store-vectorizer -march=nvptx64 -mcpu=sm_35 -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
define i32 @foo(i32* %ptr) {
|
||||
%ptr1 = getelementptr i32, i32* %ptr, i32 1
|
||||
%p1 = addrspacecast i32* %ptr1 to i32 addrspace(1)*
|
||||
; CHECK: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 8, !invariant.load !0
|
||||
%v0 = load i32, i32* %ptr, align 8, !invariant.load !0
|
||||
%v1 = load i32, i32* %ptr1, align 4, !invariant.load !0
|
||||
%sum = add i32 %v0, %v1
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
!0 = !{}
|
Reference in New Issue
Block a user