You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,2 +0,0 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,20 +0,0 @@
|
||||
; RUN: opt < %s -slsr -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
target triple = "amdgcn--"
|
||||
|
||||
%struct.Matrix4x4 = type { [4 x [4 x float]] }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @Accelerator_Intersect(%struct.Matrix4x4 addrspace(1)* nocapture readonly %leafTransformations) #0 {
|
||||
; CHECK-LABEL: @Accelerator_Intersect(
|
||||
entry:
|
||||
%tmp = sext i32 undef to i64
|
||||
%arrayidx114 = getelementptr inbounds %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp
|
||||
%tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0
|
||||
; CHECK: %tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0
|
||||
%tmp2 = load <4 x float>, <4 x float> addrspace(1)* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-cpu"="tahiti" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
@ -1,107 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
|
||||
; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
|
||||
; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
|
||||
; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
|
||||
define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
|
||||
bb:
|
||||
%i2 = shl nsw i32 %i, 1
|
||||
%j1 = add nsw i32 %i, 1023
|
||||
%tmp = sext i32 %j1 to i64
|
||||
%p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
|
||||
%tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
|
||||
%v11 = load i32, i32 addrspace(1)* %tmp3, align 4
|
||||
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
|
||||
|
||||
%j2 = add nsw i32 %i2, 1023
|
||||
%tmp5 = sext i32 %j2 to i64
|
||||
%p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
|
||||
%tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
|
||||
%v22 = load i32, i32 addrspace(1)* %tmp6, align 4
|
||||
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
|
||||
; CHECK: %j1 = add nsw i32 %i, 1024
|
||||
; CHECK: %tmp = sext i32 %j1 to i64
|
||||
; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
|
||||
; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
|
||||
define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
|
||||
bb:
|
||||
%i2 = shl nsw i32 %i, 1
|
||||
%j1 = add nsw i32 %i, 1024
|
||||
%tmp = sext i32 %j1 to i64
|
||||
%p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
|
||||
%tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
|
||||
%v11 = load i32, i32 addrspace(1)* %tmp3, align 4
|
||||
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
|
||||
|
||||
%j2 = add nsw i32 %i2, 1024
|
||||
%tmp5 = sext i32 %j2 to i64
|
||||
%p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
|
||||
%tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
|
||||
%v22 = load i32, i32 addrspace(1)* %tmp6, align 4
|
||||
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
|
||||
; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
|
||||
; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
|
||||
|
||||
; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
|
||||
; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
|
||||
define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
|
||||
bb:
|
||||
%i2 = shl nsw i32 %i, 1
|
||||
%j1 = add nsw i32 %i, 16383
|
||||
%p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
|
||||
%tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
|
||||
%v11 = load i32, i32 addrspace(3)* %tmp3, align 4
|
||||
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
|
||||
|
||||
%j2 = add nsw i32 %i2, 16383
|
||||
%p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
|
||||
%tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
|
||||
%v22 = load i32, i32 addrspace(3)* %tmp6, align 4
|
||||
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
|
||||
; CHECK: %j1 = add nsw i32 %i, 16384
|
||||
; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
|
||||
; CHECK: %j2 = add i32 %j1, %i
|
||||
; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
|
||||
define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
|
||||
bb:
|
||||
%i2 = shl nsw i32 %i, 1
|
||||
%j1 = add nsw i32 %i, 16384
|
||||
%p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
|
||||
%tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
|
||||
%v11 = load i32, i32 addrspace(3)* %tmp3, align 4
|
||||
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
|
||||
|
||||
%j2 = add nsw i32 %i2, 16384
|
||||
%p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
|
||||
%tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
|
||||
%v22 = load i32, i32 addrspace(3)* %tmp6, align 4
|
||||
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
|
||||
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
|
||||
|
||||
ret void
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
if not 'NVPTX' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,74 +0,0 @@
|
||||
; RUN: opt < %s -separate-const-offset-from-gep -slsr -gvn -S | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=PTX
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
||||
; arr[i + 5]
|
||||
; arr[i * 2 + 5]
|
||||
; arr[i * 3 + 5]
|
||||
; arr[i * 4 + 5]
|
||||
;
|
||||
; => reassociate-geps
|
||||
;
|
||||
; *(&arr[i] + 5)
|
||||
; *(&arr[i * 2] + 5)
|
||||
; *(&arr[i * 3] + 5)
|
||||
; *(&arr[i * 4] + 5)
|
||||
;
|
||||
; => slsr
|
||||
;
|
||||
; p1 = &arr[i]
|
||||
; *(p1 + 5)
|
||||
; p2 = p1 + i
|
||||
; *(p2 + 5)
|
||||
; p3 = p2 + i
|
||||
; *(p3 + 5)
|
||||
; p4 = p3 + i
|
||||
; *(p4 + 5)
|
||||
define void @slsr_after_reassociate_geps(float* %arr, i32 %i) {
|
||||
; CHECK-LABEL: @slsr_after_reassociate_geps(
|
||||
; PTX-LABEL: .visible .func slsr_after_reassociate_geps(
|
||||
; PTX: ld.param.u64 [[arr:%rd[0-9]+]], [slsr_after_reassociate_geps_param_0];
|
||||
; PTX: ld.param.u32 [[i:%r[0-9]+]], [slsr_after_reassociate_geps_param_1];
|
||||
%i2 = shl nsw i32 %i, 1
|
||||
%i3 = mul nsw i32 %i, 3
|
||||
%i4 = shl nsw i32 %i, 2
|
||||
|
||||
%j1 = add nsw i32 %i, 5
|
||||
%p1 = getelementptr inbounds float, float* %arr, i32 %j1
|
||||
; CHECK: [[b1:%[0-9]+]] = getelementptr float, float* %arr, i64 [[bump:%[0-9]+]]
|
||||
; PTX: mul.wide.s32 [[i4:%rd[0-9]+]], [[i]], 4;
|
||||
; PTX: add.s64 [[base1:%rd[0-9]+]], [[arr]], [[i4]];
|
||||
%v1 = load float, float* %p1, align 4
|
||||
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base1]]+20];
|
||||
call void @foo(float %v1)
|
||||
|
||||
%j2 = add nsw i32 %i2, 5
|
||||
%p2 = getelementptr inbounds float, float* %arr, i32 %j2
|
||||
; CHECK: [[b2:%[0-9]+]] = getelementptr float, float* [[b1]], i64 [[bump]]
|
||||
; PTX: add.s64 [[base2:%rd[0-9]+]], [[base1]], [[i4]];
|
||||
%v2 = load float, float* %p2, align 4
|
||||
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base2]]+20];
|
||||
call void @foo(float %v2)
|
||||
|
||||
%j3 = add nsw i32 %i3, 5
|
||||
%p3 = getelementptr inbounds float, float* %arr, i32 %j3
|
||||
; CHECK: [[b3:%[0-9]+]] = getelementptr float, float* [[b2]], i64 [[bump]]
|
||||
; PTX: add.s64 [[base3:%rd[0-9]+]], [[base2]], [[i4]];
|
||||
%v3 = load float, float* %p3, align 4
|
||||
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base3]]+20];
|
||||
call void @foo(float %v3)
|
||||
|
||||
%j4 = add nsw i32 %i4, 5
|
||||
%p4 = getelementptr inbounds float, float* %arr, i32 %j4
|
||||
; CHECK: [[b4:%[0-9]+]] = getelementptr float, float* [[b3]], i64 [[bump]]
|
||||
; PTX: add.s64 [[base4:%rd[0-9]+]], [[base3]], [[i4]];
|
||||
%v4 = load float, float* %p4, align 4
|
||||
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base4]]+20];
|
||||
call void @foo(float %v4)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(float)
|
@ -1,71 +0,0 @@
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; CUDA code
|
||||
; __global__ void foo(int b, int s) {
|
||||
; #pragma unroll
|
||||
; for (int i = 0; i < 4; ++i) {
|
||||
; if (cond(i))
|
||||
; use((b + i) * s);
|
||||
; }
|
||||
; }
|
||||
define void @foo(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: .visible .entry foo(
|
||||
entry:
|
||||
; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
|
||||
; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0];
|
||||
%call = tail call zeroext i1 @cond(i32 0)
|
||||
br i1 %call, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%mul = mul nsw i32 %b, %s
|
||||
; CHECK: mul.lo.s32 [[a0:%r[0-9]+]], [[b]], [[s]]
|
||||
tail call void @use(i32 %mul)
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %entry, %if.then
|
||||
%call.1 = tail call zeroext i1 @cond(i32 1)
|
||||
br i1 %call.1, label %if.then.1, label %for.inc.1
|
||||
|
||||
if.then.1: ; preds = %for.inc
|
||||
%add.1 = add nsw i32 %b, 1
|
||||
%mul.1 = mul nsw i32 %add.1, %s
|
||||
; CHECK: add.s32 [[a1:%r[0-9]+]], [[a0]], [[s]]
|
||||
tail call void @use(i32 %mul.1)
|
||||
br label %for.inc.1
|
||||
|
||||
for.inc.1: ; preds = %if.then.1, %for.inc
|
||||
%call.2 = tail call zeroext i1 @cond(i32 2)
|
||||
br i1 %call.2, label %if.then.2, label %for.inc.2
|
||||
|
||||
if.then.2: ; preds = %for.inc.1
|
||||
%add.2 = add nsw i32 %b, 2
|
||||
%mul.2 = mul nsw i32 %add.2, %s
|
||||
; CHECK: add.s32 [[a2:%r[0-9]+]], [[a1]], [[s]]
|
||||
tail call void @use(i32 %mul.2)
|
||||
br label %for.inc.2
|
||||
|
||||
for.inc.2: ; preds = %if.then.2, %for.inc.1
|
||||
%call.3 = tail call zeroext i1 @cond(i32 3)
|
||||
br i1 %call.3, label %if.then.3, label %for.inc.3
|
||||
|
||||
if.then.3: ; preds = %for.inc.2
|
||||
%add.3 = add nsw i32 %b, 3
|
||||
%mul.3 = mul nsw i32 %add.3, %s
|
||||
; CHECK: add.s32 [[a3:%r[0-9]+]], [[a2]], [[s]]
|
||||
tail call void @use(i32 %mul.3)
|
||||
br label %for.inc.3
|
||||
|
||||
for.inc.3: ; preds = %if.then.3, %for.inc.2
|
||||
ret void
|
||||
}
|
||||
|
||||
declare zeroext i1 @cond(i32)
|
||||
|
||||
declare void @use(i32)
|
||||
|
||||
!nvvm.annotations = !{!0}
|
||||
|
||||
!0 = !{void (i32, i32)* @foo, !"kernel", i32 1}
|
@ -1,2 +0,0 @@
|
||||
if not 'X86' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,44 +0,0 @@
|
||||
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Do not perform SLSR on &input[s] and &input[s * 2] which fit into addressing
|
||||
; modes of X86.
|
||||
define i32 @no_slsr_gep(i32* %input, i64 %s) {
|
||||
; CHECK-LABEL: @no_slsr_gep(
|
||||
; v0 = input[0];
|
||||
%p0 = getelementptr inbounds i32, i32* %input, i64 0
|
||||
%v0 = load i32, i32* %p0
|
||||
|
||||
; v1 = input[s];
|
||||
%p1 = getelementptr inbounds i32, i32* %input, i64 %s
|
||||
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s
|
||||
%v1 = load i32, i32* %p1
|
||||
|
||||
; v2 = input[s * 2];
|
||||
%s2 = mul nsw i64 %s, 2
|
||||
%p2 = getelementptr inbounds i32, i32* %input, i64 %s2
|
||||
; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i64 %s2
|
||||
%v2 = load i32, i32* %p2
|
||||
|
||||
; return v0 + v1 + v2;
|
||||
%1 = add i32 %v0, %v1
|
||||
%2 = add i32 %1, %v2
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define void @no_slsr_add(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @no_slsr_add(
|
||||
%1 = add i32 %b, %s
|
||||
; CHECK: add i32 %b, %s
|
||||
call void @foo(i32 %1)
|
||||
%s2 = mul i32 %s, 2
|
||||
; CHECK: %s2 = mul i32 %s, 2
|
||||
%2 = add i32 %b, %s2
|
||||
; CHECK: add i32 %b, %s2
|
||||
call void @foo(i32 %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(i32 %a)
|
@ -1,116 +0,0 @@
|
||||
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
|
||||
define void @shl(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @shl(
|
||||
%1 = add i32 %b, %s
|
||||
; [[BASIS:%[a-zA-Z0-9]+]] = add i32 %b, %s
|
||||
call void @foo(i32 %1)
|
||||
%s2 = shl i32 %s, 1
|
||||
%2 = add i32 %b, %s2
|
||||
; add i32 [[BASIS]], %s
|
||||
call void @foo(i32 %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stride_is_2s(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @stride_is_2s(
|
||||
%s2 = shl i32 %s, 1
|
||||
; CHECK: %s2 = shl i32 %s, 1
|
||||
%1 = add i32 %b, %s2
|
||||
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %b, %s2
|
||||
call void @foo(i32 %1)
|
||||
%s4 = shl i32 %s, 2
|
||||
%2 = add i32 %b, %s4
|
||||
; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i32 [[t1]], %s2
|
||||
call void @foo(i32 %2)
|
||||
%s6 = mul i32 %s, 6
|
||||
%3 = add i32 %b, %s6
|
||||
; CHECK: add i32 [[t2]], %s2
|
||||
call void @foo(i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stride_is_3s(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @stride_is_3s(
|
||||
%1 = add i32 %s, %b
|
||||
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %s, %b
|
||||
call void @foo(i32 %1)
|
||||
%s4 = shl i32 %s, 2
|
||||
%2 = add i32 %s4, %b
|
||||
; CHECK: [[bump:%[a-zA-Z0-9]+]] = mul i32 %s, 3
|
||||
; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i32 [[t1]], [[bump]]
|
||||
call void @foo(i32 %2)
|
||||
%s7 = mul i32 %s, 7
|
||||
%3 = add i32 %s7, %b
|
||||
; CHECK: add i32 [[t2]], [[bump]]
|
||||
call void @foo(i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; foo(b + 6 * s);
|
||||
; foo(b + 4 * s);
|
||||
; foo(b + 2 * s);
|
||||
; =>
|
||||
; t1 = b + 6 * s;
|
||||
; foo(t1);
|
||||
; s2 = 2 * s;
|
||||
; t2 = t1 - s2;
|
||||
; foo(t2);
|
||||
; t3 = t2 - s2;
|
||||
; foo(t3);
|
||||
define void @stride_is_minus_2s(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @stride_is_minus_2s(
|
||||
%s6 = mul i32 %s, 6
|
||||
%1 = add i32 %b, %s6
|
||||
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %b, %s6
|
||||
; CHECK: call void @foo(i32 [[t1]])
|
||||
call void @foo(i32 %1)
|
||||
%s4 = shl i32 %s, 2
|
||||
%2 = add i32 %b, %s4
|
||||
; CHECK: [[bump:%[a-zA-Z0-9]+]] = shl i32 %s, 1
|
||||
; CHECK: [[t2:%[a-zA-Z0-9]+]] = sub i32 [[t1]], [[bump]]
|
||||
call void @foo(i32 %2)
|
||||
; CHECK: call void @foo(i32 [[t2]])
|
||||
%s2 = shl i32 %s, 1
|
||||
%3 = add i32 %b, %s2
|
||||
; CHECK: [[t3:%[a-zA-Z0-9]+]] = sub i32 [[t2]], [[bump]]
|
||||
call void @foo(i32 %3)
|
||||
; CHECK: call void @foo(i32 [[t3]])
|
||||
ret void
|
||||
}
|
||||
|
||||
; t = b + (s << 3);
|
||||
; foo(t);
|
||||
; foo(b + s);
|
||||
;
|
||||
; do not rewrite b + s to t - 7 * s because the latter is more complicated.
|
||||
define void @simple_enough(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @simple_enough(
|
||||
%s8 = shl i32 %s, 3
|
||||
%1 = add i32 %b, %s8
|
||||
call void @foo(i32 %1)
|
||||
%2 = add i32 %b, %s
|
||||
; CHECK: [[t:%[a-zA-Z0-9]+]] = add i32 %b, %s{{$}}
|
||||
call void @foo(i32 %2)
|
||||
; CHECK: call void @foo(i32 [[t]])
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @slsr_strided_add_128bit(i128 %b, i128 %s) {
|
||||
; CHECK-LABEL: @slsr_strided_add_128bit(
|
||||
%s125 = shl i128 %s, 125
|
||||
%s126 = shl i128 %s, 126
|
||||
%1 = add i128 %b, %s125
|
||||
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i128 %b, %s125
|
||||
call void @bar(i128 %1)
|
||||
%2 = add i128 %b, %s126
|
||||
; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i128 [[t1]], %s125
|
||||
call void @bar(i128 %2)
|
||||
; CHECK: call void @bar(i128 [[t2]])
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(i32)
|
||||
declare void @bar(i128)
|
@ -1,191 +0,0 @@
|
||||
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32"
|
||||
|
||||
; foo(input[0]);
|
||||
; foo(input[s]);
|
||||
; foo(input[s * 2]);
|
||||
; =>
|
||||
; p0 = &input[0];
|
||||
; foo(*p);
|
||||
; p1 = p0 + s;
|
||||
; foo(*p1);
|
||||
; p2 = p1 + s;
|
||||
; foo(*p2);
|
||||
define void @slsr_gep(i32* %input, i64 %s) {
|
||||
; CHECK-LABEL: @slsr_gep(
|
||||
; v0 = input[0];
|
||||
%p0 = getelementptr inbounds i32, i32* %input, i64 0
|
||||
call void @foo(i32* %p0)
|
||||
|
||||
; v1 = input[s];
|
||||
%p1 = getelementptr inbounds i32, i32* %input, i64 %s
|
||||
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s
|
||||
call void @foo(i32* %p1)
|
||||
|
||||
; v2 = input[s * 2];
|
||||
%s2 = shl nsw i64 %s, 1
|
||||
%p2 = getelementptr inbounds i32, i32* %input, i64 %s2
|
||||
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %s
|
||||
call void @foo(i32* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; foo(input[0]);
|
||||
; foo(input[(long)s]);
|
||||
; foo(input[(long)(s * 2)]);
|
||||
; =>
|
||||
; p0 = &input[0];
|
||||
; foo(*p);
|
||||
; p1 = p0 + (long)s;
|
||||
; foo(*p1);
|
||||
; p2 = p1 + (long)s;
|
||||
; foo(*p2);
|
||||
define void @slsr_gep_sext(i32* %input, i32 %s) {
|
||||
; CHECK-LABEL: @slsr_gep_sext(
|
||||
; v0 = input[0];
|
||||
%p0 = getelementptr inbounds i32, i32* %input, i64 0
|
||||
call void @foo(i32* %p0)
|
||||
|
||||
; v1 = input[s];
|
||||
%t = sext i32 %s to i64
|
||||
%p1 = getelementptr inbounds i32, i32* %input, i64 %t
|
||||
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %t
|
||||
call void @foo(i32* %p1)
|
||||
|
||||
; v2 = input[s * 2];
|
||||
%s2 = shl nsw i32 %s, 1
|
||||
%t2 = sext i32 %s2 to i64
|
||||
%p2 = getelementptr inbounds i32, i32* %input, i64 %t2
|
||||
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %t
|
||||
call void @foo(i32* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; int input[10][5];
|
||||
; foo(input[s][t]);
|
||||
; foo(input[s * 2][t]);
|
||||
; foo(input[s * 3][t]);
|
||||
; =>
|
||||
; p0 = &input[s][t];
|
||||
; foo(*p0);
|
||||
; p1 = p0 + 5s;
|
||||
; foo(*p1);
|
||||
; p2 = p1 + 5s;
|
||||
; foo(*p2);
|
||||
define void @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) {
|
||||
; CHECK-LABEL: @slsr_gep_2d(
|
||||
; v0 = input[s][t];
|
||||
%p0 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s, i64 %t
|
||||
call void @foo(i32* %p0)
|
||||
|
||||
; v1 = input[s * 2][t];
|
||||
%s2 = shl nsw i64 %s, 1
|
||||
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 5
|
||||
%p1 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s2, i64 %t
|
||||
; CHECK: %p1 = getelementptr inbounds i32, i32* %p0, i64 [[BUMP]]
|
||||
call void @foo(i32* %p1)
|
||||
|
||||
; v3 = input[s * 3][t];
|
||||
%s3 = mul nsw i64 %s, 3
|
||||
%p2 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s3, i64 %t
|
||||
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 [[BUMP]]
|
||||
call void @foo(i32* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
%struct.S = type <{ i64, i32 }>
|
||||
|
||||
; In this case, the bump
|
||||
; = (char *)&input[s * 2][t].f1 - (char *)&input[s][t].f1
|
||||
; = 60 * s
|
||||
; which may not be divisible by typeof(input[s][t].f1) = 8. Therefore, we
|
||||
; rewrite the candidates using byte offset instead of index offset as in
|
||||
; @slsr_gep_2d.
|
||||
define void @slsr_gep_uglygep([10 x [5 x %struct.S]]* %input, i64 %s, i64 %t) {
|
||||
; CHECK-LABEL: @slsr_gep_uglygep(
|
||||
; v0 = input[s][t].f1;
|
||||
%p0 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s, i64 %t, i32 0
|
||||
call void @bar(i64* %p0)
|
||||
|
||||
; v1 = input[s * 2][t].f1;
|
||||
%s2 = shl nsw i64 %s, 1
|
||||
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 60
|
||||
%p1 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s2, i64 %t, i32 0
|
||||
; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
|
||||
call void @bar(i64* %p1)
|
||||
|
||||
; v2 = input[s * 3][t].f1;
|
||||
%s3 = mul nsw i64 %s, 3
|
||||
%p2 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s3, i64 %t, i32 0
|
||||
; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
|
||||
call void @bar(i64* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @slsr_out_of_bounds_gep(i32* %input, i32 %s) {
|
||||
; CHECK-LABEL: @slsr_out_of_bounds_gep(
|
||||
; v0 = input[0];
|
||||
%p0 = getelementptr i32, i32* %input, i64 0
|
||||
call void @foo(i32* %p0)
|
||||
|
||||
; v1 = input[(long)s];
|
||||
%t = sext i32 %s to i64
|
||||
%p1 = getelementptr i32, i32* %input, i64 %t
|
||||
; CHECK: %p1 = getelementptr i32, i32* %input, i64 %t
|
||||
call void @foo(i32* %p1)
|
||||
|
||||
; v2 = input[(long)(s * 2)];
|
||||
%s2 = shl nsw i32 %s, 1
|
||||
%t2 = sext i32 %s2 to i64
|
||||
%p2 = getelementptr i32, i32* %input, i64 %t2
|
||||
; CHECK: %p2 = getelementptr i32, i32* %p1, i64 %t
|
||||
call void @foo(i32* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @slsr_gep_128bit_index(i32* %input, i128 %s) {
|
||||
; CHECK-LABEL: @slsr_gep_128bit_index(
|
||||
; p0 = &input[0]
|
||||
%p0 = getelementptr inbounds i32, i32* %input, i128 0
|
||||
call void @foo(i32* %p0)
|
||||
|
||||
; p1 = &input[s << 125]
|
||||
%s125 = shl nsw i128 %s, 125
|
||||
%p1 = getelementptr inbounds i32, i32* %input, i128 %s125
|
||||
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i128 %s125
|
||||
call void @foo(i32* %p1)
|
||||
|
||||
; p2 = &input[s << 126]
|
||||
%s126 = shl nsw i128 %s, 126
|
||||
%p2 = getelementptr inbounds i32, i32* %input, i128 %s126
|
||||
; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i128 %s126
|
||||
call void @foo(i32* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @slsr_gep_32bit_pointer(i32 addrspace(1)* %input, i64 %s) {
|
||||
; CHECK-LABEL: @slsr_gep_32bit_pointer(
|
||||
; p1 = &input[s]
|
||||
%p1 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s
|
||||
call void @baz(i32 addrspace(1)* %p1)
|
||||
|
||||
; p2 = &input[s * 2]
|
||||
%s2 = mul nsw i64 %s, 2
|
||||
%p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2
|
||||
; %s2 is wider than the pointer size of addrspace(1), so do not factor it.
|
||||
; CHECK: %p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2
|
||||
call void @baz(i32 addrspace(1)* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(i32*)
|
||||
declare void @bar(i64*)
|
||||
declare void @baz(i32 addrspace(1)*)
|
@ -1,147 +0,0 @@
|
||||
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
|
||||
define void @slsr1(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @slsr1(
|
||||
; foo(b * s);
|
||||
%mul0 = mul i32 %b, %s
|
||||
; CHECK: mul i32
|
||||
; CHECK-NOT: mul i32
|
||||
call void @foo(i32 %mul0)
|
||||
|
||||
; foo((b + 1) * s);
|
||||
%b1 = add i32 %b, 1
|
||||
%mul1 = mul i32 %b1, %s
|
||||
call void @foo(i32 %mul1)
|
||||
|
||||
; foo((b + 2) * s);
|
||||
%b2 = add i32 %b, 2
|
||||
%mul2 = mul i32 %b2, %s
|
||||
call void @foo(i32 %mul2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @non_canonicalized(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @non_canonicalized(
|
||||
; foo(b * s);
|
||||
%mul0 = mul i32 %b, %s
|
||||
; CHECK: mul i32
|
||||
; CHECK-NOT: mul i32
|
||||
call void @foo(i32 %mul0)
|
||||
|
||||
; foo((1 + b) * s);
|
||||
%b1 = add i32 1, %b
|
||||
%mul1 = mul i32 %b1, %s
|
||||
call void @foo(i32 %mul1)
|
||||
|
||||
; foo((2 + b) * s);
|
||||
%b2 = add i32 2, %b
|
||||
%mul2 = mul i32 %b2, %s
|
||||
call void @foo(i32 %mul2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or(i32 %a, i32 %s) {
|
||||
%b = shl i32 %a, 1
|
||||
; CHECK-LABEL: @or(
|
||||
; foo(b * s);
|
||||
%mul0 = mul i32 %b, %s
|
||||
; CHECK: [[base:[^ ]+]] = mul i32
|
||||
call void @foo(i32 %mul0)
|
||||
|
||||
; foo((b | 1) * s);
|
||||
%b1 = or i32 %b, 1
|
||||
%mul1 = mul i32 %b1, %s
|
||||
; CHECK: add i32 [[base]], %s
|
||||
call void @foo(i32 %mul1)
|
||||
|
||||
; foo((b | 2) * s);
|
||||
%b2 = or i32 %b, 2
|
||||
%mul2 = mul i32 %b2, %s
|
||||
; CHECK: mul i32 %b2, %s
|
||||
call void @foo(i32 %mul2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; foo(a * b)
|
||||
; foo((a + 1) * b)
|
||||
; foo(a * (b + 1))
|
||||
; foo((a + 1) * (b + 1))
|
||||
define void @slsr2(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @slsr2(
|
||||
%a1 = add i32 %a, 1
|
||||
%b1 = add i32 %b, 1
|
||||
%mul0 = mul i32 %a, %b
|
||||
; CHECK: mul i32
|
||||
; CHECK-NOT: mul i32
|
||||
%mul1 = mul i32 %a1, %b
|
||||
%mul2 = mul i32 %a, %b1
|
||||
%mul3 = mul i32 %a1, %b1
|
||||
|
||||
call void @foo(i32 %mul0)
|
||||
call void @foo(i32 %mul1)
|
||||
call void @foo(i32 %mul2)
|
||||
call void @foo(i32 %mul3)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; The bump is a multiple of the stride.
|
||||
;
|
||||
; foo(b * s);
|
||||
; foo((b + 2) * s);
|
||||
; foo((b + 4) * s);
|
||||
; =>
|
||||
; mul0 = b * s;
|
||||
; bump = s * 2;
|
||||
; mul1 = mul0 + bump; // GVN ensures mul1 and mul2 use the same bump.
|
||||
; mul2 = mul1 + bump;
|
||||
define void @slsr3(i32 %b, i32 %s) {
|
||||
; CHECK-LABEL: @slsr3(
|
||||
%mul0 = mul i32 %b, %s
|
||||
; CHECK: mul i32
|
||||
call void @foo(i32 %mul0)
|
||||
|
||||
%b1 = add i32 %b, 2
|
||||
%mul1 = mul i32 %b1, %s
|
||||
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = shl i32 %s, 1
|
||||
; CHECK: %mul1 = add i32 %mul0, [[BUMP]]
|
||||
call void @foo(i32 %mul1)
|
||||
|
||||
%b2 = add i32 %b, 4
|
||||
%mul2 = mul i32 %b2, %s
|
||||
; CHECK: %mul2 = add i32 %mul1, [[BUMP]]
|
||||
call void @foo(i32 %mul2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Do not rewrite a candidate if its potential basis does not dominate it.
|
||||
;
|
||||
; if (cond)
|
||||
; foo(a * b);
|
||||
; foo((a + 1) * b);
|
||||
define void @not_dominate(i1 %cond, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @not_dominate(
|
||||
entry:
|
||||
%a1 = add i32 %a, 1
|
||||
br i1 %cond, label %then, label %merge
|
||||
|
||||
then:
|
||||
%mul0 = mul i32 %a, %b
|
||||
; CHECK: %mul0 = mul i32 %a, %b
|
||||
call void @foo(i32 %mul0)
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%mul1 = mul i32 %a1, %b
|
||||
; CHECK: %mul1 = mul i32 %a1, %b
|
||||
call void @foo(i32 %mul1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(i32)
|
Reference in New Issue
Block a user