Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,2 +0,0 @@
if not 'AMDGPU' in config.root.targets:
config.unsupported = True

View File

@ -1,20 +0,0 @@
; RUN: opt < %s -slsr -S | FileCheck %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
%struct.Matrix4x4 = type { [4 x [4 x float]] }
; Function Attrs: nounwind
define fastcc void @Accelerator_Intersect(%struct.Matrix4x4 addrspace(1)* nocapture readonly %leafTransformations) #0 {
; CHECK-LABEL: @Accelerator_Intersect(
entry:
%tmp = sext i32 undef to i64
%arrayidx114 = getelementptr inbounds %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp
%tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0
; CHECK: %tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0
%tmp2 = load <4 x float>, <4 x float> addrspace(1)* undef, align 4
ret void
}
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-cpu"="tahiti" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -1,107 +0,0 @@
; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1
%j1 = add nsw i32 %i, 1023
%tmp = sext i32 %j1 to i64
%p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
%tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
%v11 = load i32, i32 addrspace(1)* %tmp3, align 4
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
%j2 = add nsw i32 %i2, 1023
%tmp5 = sext i32 %j2 to i64
%p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
%tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
%v22 = load i32, i32 addrspace(1)* %tmp6, align 4
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
ret void
}
; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
; CHECK: %j1 = add nsw i32 %i, 1024
; CHECK: %tmp = sext i32 %j1 to i64
; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1
%j1 = add nsw i32 %i, 1024
%tmp = sext i32 %j1 to i64
%p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
%tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
%v11 = load i32, i32 addrspace(1)* %tmp3, align 4
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
%j2 = add nsw i32 %i2, 1024
%tmp5 = sext i32 %j2 to i64
%p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
%tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
%v22 = load i32, i32 addrspace(1)* %tmp6, align 4
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
ret void
}
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1
%j1 = add nsw i32 %i, 16383
%p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
%tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
%v11 = load i32, i32 addrspace(3)* %tmp3, align 4
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
%j2 = add nsw i32 %i2, 16383
%p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
%tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
%v22 = load i32, i32 addrspace(3)* %tmp6, align 4
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
ret void
}
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
; CHECK: %j1 = add nsw i32 %i, 16384
; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
; CHECK: %j2 = add i32 %j1, %i
; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1
%j1 = add nsw i32 %i, 16384
%p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
%tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
%v11 = load i32, i32 addrspace(3)* %tmp3, align 4
%tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v11, i32 addrspace(1)* %tmp4, align 4
%j2 = add nsw i32 %i2, 16384
%p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
%tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
%v22 = load i32, i32 addrspace(3)* %tmp6, align 4
%tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
store i32 %v22, i32 addrspace(1)* %tmp7, align 4
ret void
}

View File

@ -1,2 +0,0 @@
if not 'NVPTX' in config.root.targets:
config.unsupported = True

View File

@ -1,74 +0,0 @@
; RUN: opt < %s -separate-const-offset-from-gep -slsr -gvn -S | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=PTX
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
; arr[i + 5]
; arr[i * 2 + 5]
; arr[i * 3 + 5]
; arr[i * 4 + 5]
;
; => reassociate-geps
;
; *(&arr[i] + 5)
; *(&arr[i * 2] + 5)
; *(&arr[i * 3] + 5)
; *(&arr[i * 4] + 5)
;
; => slsr
;
; p1 = &arr[i]
; *(p1 + 5)
; p2 = p1 + i
; *(p2 + 5)
; p3 = p2 + i
; *(p3 + 5)
; p4 = p3 + i
; *(p4 + 5)
define void @slsr_after_reassociate_geps(float* %arr, i32 %i) {
; CHECK-LABEL: @slsr_after_reassociate_geps(
; PTX-LABEL: .visible .func slsr_after_reassociate_geps(
; PTX: ld.param.u64 [[arr:%rd[0-9]+]], [slsr_after_reassociate_geps_param_0];
; PTX: ld.param.u32 [[i:%r[0-9]+]], [slsr_after_reassociate_geps_param_1];
%i2 = shl nsw i32 %i, 1
%i3 = mul nsw i32 %i, 3
%i4 = shl nsw i32 %i, 2
%j1 = add nsw i32 %i, 5
%p1 = getelementptr inbounds float, float* %arr, i32 %j1
; CHECK: [[b1:%[0-9]+]] = getelementptr float, float* %arr, i64 [[bump:%[0-9]+]]
; PTX: mul.wide.s32 [[i4:%rd[0-9]+]], [[i]], 4;
; PTX: add.s64 [[base1:%rd[0-9]+]], [[arr]], [[i4]];
%v1 = load float, float* %p1, align 4
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base1]]+20];
call void @foo(float %v1)
%j2 = add nsw i32 %i2, 5
%p2 = getelementptr inbounds float, float* %arr, i32 %j2
; CHECK: [[b2:%[0-9]+]] = getelementptr float, float* [[b1]], i64 [[bump]]
; PTX: add.s64 [[base2:%rd[0-9]+]], [[base1]], [[i4]];
%v2 = load float, float* %p2, align 4
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base2]]+20];
call void @foo(float %v2)
%j3 = add nsw i32 %i3, 5
%p3 = getelementptr inbounds float, float* %arr, i32 %j3
; CHECK: [[b3:%[0-9]+]] = getelementptr float, float* [[b2]], i64 [[bump]]
; PTX: add.s64 [[base3:%rd[0-9]+]], [[base2]], [[i4]];
%v3 = load float, float* %p3, align 4
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base3]]+20];
call void @foo(float %v3)
%j4 = add nsw i32 %i4, 5
%p4 = getelementptr inbounds float, float* %arr, i32 %j4
; CHECK: [[b4:%[0-9]+]] = getelementptr float, float* [[b3]], i64 [[bump]]
; PTX: add.s64 [[base4:%rd[0-9]+]], [[base3]], [[i4]];
%v4 = load float, float* %p4, align 4
; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base4]]+20];
call void @foo(float %v4)
ret void
}
declare void @foo(float)

View File

@ -1,71 +0,0 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
; CUDA code
; __global__ void foo(int b, int s) {
; #pragma unroll
; for (int i = 0; i < 4; ++i) {
; if (cond(i))
; use((b + i) * s);
; }
; }
define void @foo(i32 %b, i32 %s) {
; CHECK-LABEL: .visible .entry foo(
entry:
; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0];
%call = tail call zeroext i1 @cond(i32 0)
br i1 %call, label %if.then, label %for.inc
if.then: ; preds = %entry
%mul = mul nsw i32 %b, %s
; CHECK: mul.lo.s32 [[a0:%r[0-9]+]], [[b]], [[s]]
tail call void @use(i32 %mul)
br label %for.inc
for.inc: ; preds = %entry, %if.then
%call.1 = tail call zeroext i1 @cond(i32 1)
br i1 %call.1, label %if.then.1, label %for.inc.1
if.then.1: ; preds = %for.inc
%add.1 = add nsw i32 %b, 1
%mul.1 = mul nsw i32 %add.1, %s
; CHECK: add.s32 [[a1:%r[0-9]+]], [[a0]], [[s]]
tail call void @use(i32 %mul.1)
br label %for.inc.1
for.inc.1: ; preds = %if.then.1, %for.inc
%call.2 = tail call zeroext i1 @cond(i32 2)
br i1 %call.2, label %if.then.2, label %for.inc.2
if.then.2: ; preds = %for.inc.1
%add.2 = add nsw i32 %b, 2
%mul.2 = mul nsw i32 %add.2, %s
; CHECK: add.s32 [[a2:%r[0-9]+]], [[a1]], [[s]]
tail call void @use(i32 %mul.2)
br label %for.inc.2
for.inc.2: ; preds = %if.then.2, %for.inc.1
%call.3 = tail call zeroext i1 @cond(i32 3)
br i1 %call.3, label %if.then.3, label %for.inc.3
if.then.3: ; preds = %for.inc.2
%add.3 = add nsw i32 %b, 3
%mul.3 = mul nsw i32 %add.3, %s
; CHECK: add.s32 [[a3:%r[0-9]+]], [[a2]], [[s]]
tail call void @use(i32 %mul.3)
br label %for.inc.3
for.inc.3: ; preds = %if.then.3, %for.inc.2
ret void
}
declare zeroext i1 @cond(i32)
declare void @use(i32)
!nvvm.annotations = !{!0}
!0 = !{void (i32, i32)* @foo, !"kernel", i32 1}

View File

@ -1,2 +0,0 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -1,44 +0,0 @@
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Do not perform SLSR on &input[s] and &input[s * 2] which fit into addressing
; modes of X86.
define i32 @no_slsr_gep(i32* %input, i64 %s) {
; CHECK-LABEL: @no_slsr_gep(
; v0 = input[0];
%p0 = getelementptr inbounds i32, i32* %input, i64 0
%v0 = load i32, i32* %p0
; v1 = input[s];
%p1 = getelementptr inbounds i32, i32* %input, i64 %s
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s
%v1 = load i32, i32* %p1
; v2 = input[s * 2];
%s2 = mul nsw i64 %s, 2
%p2 = getelementptr inbounds i32, i32* %input, i64 %s2
; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i64 %s2
%v2 = load i32, i32* %p2
; return v0 + v1 + v2;
%1 = add i32 %v0, %v1
%2 = add i32 %1, %v2
ret i32 %2
}
define void @no_slsr_add(i32 %b, i32 %s) {
; CHECK-LABEL: @no_slsr_add(
%1 = add i32 %b, %s
; CHECK: add i32 %b, %s
call void @foo(i32 %1)
%s2 = mul i32 %s, 2
; CHECK: %s2 = mul i32 %s, 2
%2 = add i32 %b, %s2
; CHECK: add i32 %b, %s2
call void @foo(i32 %2)
ret void
}
declare void @foo(i32 %a)

View File

@ -1,116 +0,0 @@
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
define void @shl(i32 %b, i32 %s) {
; CHECK-LABEL: @shl(
%1 = add i32 %b, %s
; [[BASIS:%[a-zA-Z0-9]+]] = add i32 %b, %s
call void @foo(i32 %1)
%s2 = shl i32 %s, 1
%2 = add i32 %b, %s2
; add i32 [[BASIS]], %s
call void @foo(i32 %2)
ret void
}
define void @stride_is_2s(i32 %b, i32 %s) {
; CHECK-LABEL: @stride_is_2s(
%s2 = shl i32 %s, 1
; CHECK: %s2 = shl i32 %s, 1
%1 = add i32 %b, %s2
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %b, %s2
call void @foo(i32 %1)
%s4 = shl i32 %s, 2
%2 = add i32 %b, %s4
; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i32 [[t1]], %s2
call void @foo(i32 %2)
%s6 = mul i32 %s, 6
%3 = add i32 %b, %s6
; CHECK: add i32 [[t2]], %s2
call void @foo(i32 %3)
ret void
}
define void @stride_is_3s(i32 %b, i32 %s) {
; CHECK-LABEL: @stride_is_3s(
%1 = add i32 %s, %b
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %s, %b
call void @foo(i32 %1)
%s4 = shl i32 %s, 2
%2 = add i32 %s4, %b
; CHECK: [[bump:%[a-zA-Z0-9]+]] = mul i32 %s, 3
; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i32 [[t1]], [[bump]]
call void @foo(i32 %2)
%s7 = mul i32 %s, 7
%3 = add i32 %s7, %b
; CHECK: add i32 [[t2]], [[bump]]
call void @foo(i32 %3)
ret void
}
; foo(b + 6 * s);
; foo(b + 4 * s);
; foo(b + 2 * s);
; =>
; t1 = b + 6 * s;
; foo(t1);
; s2 = 2 * s;
; t2 = t1 - s2;
; foo(t2);
; t3 = t2 - s2;
; foo(t3);
define void @stride_is_minus_2s(i32 %b, i32 %s) {
; CHECK-LABEL: @stride_is_minus_2s(
%s6 = mul i32 %s, 6
%1 = add i32 %b, %s6
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %b, %s6
; CHECK: call void @foo(i32 [[t1]])
call void @foo(i32 %1)
%s4 = shl i32 %s, 2
%2 = add i32 %b, %s4
; CHECK: [[bump:%[a-zA-Z0-9]+]] = shl i32 %s, 1
; CHECK: [[t2:%[a-zA-Z0-9]+]] = sub i32 [[t1]], [[bump]]
call void @foo(i32 %2)
; CHECK: call void @foo(i32 [[t2]])
%s2 = shl i32 %s, 1
%3 = add i32 %b, %s2
; CHECK: [[t3:%[a-zA-Z0-9]+]] = sub i32 [[t2]], [[bump]]
call void @foo(i32 %3)
; CHECK: call void @foo(i32 [[t3]])
ret void
}
; t = b + (s << 3);
; foo(t);
; foo(b + s);
;
; do not rewrite b + s to t - 7 * s because the latter is more complicated.
define void @simple_enough(i32 %b, i32 %s) {
; CHECK-LABEL: @simple_enough(
%s8 = shl i32 %s, 3
%1 = add i32 %b, %s8
call void @foo(i32 %1)
%2 = add i32 %b, %s
; CHECK: [[t:%[a-zA-Z0-9]+]] = add i32 %b, %s{{$}}
call void @foo(i32 %2)
; CHECK: call void @foo(i32 [[t]])
ret void
}
define void @slsr_strided_add_128bit(i128 %b, i128 %s) {
; CHECK-LABEL: @slsr_strided_add_128bit(
%s125 = shl i128 %s, 125
%s126 = shl i128 %s, 126
%1 = add i128 %b, %s125
; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i128 %b, %s125
call void @bar(i128 %1)
%2 = add i128 %b, %s126
; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i128 [[t1]], %s125
call void @bar(i128 %2)
; CHECK: call void @bar(i128 [[t2]])
ret void
}
declare void @foo(i32)
declare void @bar(i128)

View File

@ -1,191 +0,0 @@
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32"
; foo(input[0]);
; foo(input[s]);
; foo(input[s * 2]);
; =>
; p0 = &input[0];
; foo(*p);
; p1 = p0 + s;
; foo(*p1);
; p2 = p1 + s;
; foo(*p2);
define void @slsr_gep(i32* %input, i64 %s) {
; CHECK-LABEL: @slsr_gep(
; v0 = input[0];
%p0 = getelementptr inbounds i32, i32* %input, i64 0
call void @foo(i32* %p0)
; v1 = input[s];
%p1 = getelementptr inbounds i32, i32* %input, i64 %s
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s
call void @foo(i32* %p1)
; v2 = input[s * 2];
%s2 = shl nsw i64 %s, 1
%p2 = getelementptr inbounds i32, i32* %input, i64 %s2
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %s
call void @foo(i32* %p2)
ret void
}
; foo(input[0]);
; foo(input[(long)s]);
; foo(input[(long)(s * 2)]);
; =>
; p0 = &input[0];
; foo(*p);
; p1 = p0 + (long)s;
; foo(*p1);
; p2 = p1 + (long)s;
; foo(*p2);
define void @slsr_gep_sext(i32* %input, i32 %s) {
; CHECK-LABEL: @slsr_gep_sext(
; v0 = input[0];
%p0 = getelementptr inbounds i32, i32* %input, i64 0
call void @foo(i32* %p0)
; v1 = input[s];
%t = sext i32 %s to i64
%p1 = getelementptr inbounds i32, i32* %input, i64 %t
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %t
call void @foo(i32* %p1)
; v2 = input[s * 2];
%s2 = shl nsw i32 %s, 1
%t2 = sext i32 %s2 to i64
%p2 = getelementptr inbounds i32, i32* %input, i64 %t2
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %t
call void @foo(i32* %p2)
ret void
}
; int input[10][5];
; foo(input[s][t]);
; foo(input[s * 2][t]);
; foo(input[s * 3][t]);
; =>
; p0 = &input[s][t];
; foo(*p0);
; p1 = p0 + 5s;
; foo(*p1);
; p2 = p1 + 5s;
; foo(*p2);
define void @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) {
; CHECK-LABEL: @slsr_gep_2d(
; v0 = input[s][t];
%p0 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s, i64 %t
call void @foo(i32* %p0)
; v1 = input[s * 2][t];
%s2 = shl nsw i64 %s, 1
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 5
%p1 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s2, i64 %t
; CHECK: %p1 = getelementptr inbounds i32, i32* %p0, i64 [[BUMP]]
call void @foo(i32* %p1)
; v3 = input[s * 3][t];
%s3 = mul nsw i64 %s, 3
%p2 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s3, i64 %t
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 [[BUMP]]
call void @foo(i32* %p2)
ret void
}
%struct.S = type <{ i64, i32 }>
; In this case, the bump
; = (char *)&input[s * 2][t].f1 - (char *)&input[s][t].f1
; = 60 * s
; which may not be divisible by typeof(input[s][t].f1) = 8. Therefore, we
; rewrite the candidates using byte offset instead of index offset as in
; @slsr_gep_2d.
define void @slsr_gep_uglygep([10 x [5 x %struct.S]]* %input, i64 %s, i64 %t) {
; CHECK-LABEL: @slsr_gep_uglygep(
; v0 = input[s][t].f1;
%p0 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s, i64 %t, i32 0
call void @bar(i64* %p0)
; v1 = input[s * 2][t].f1;
%s2 = shl nsw i64 %s, 1
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 60
%p1 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s2, i64 %t, i32 0
; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
call void @bar(i64* %p1)
; v2 = input[s * 3][t].f1;
%s3 = mul nsw i64 %s, 3
%p2 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s3, i64 %t, i32 0
; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
call void @bar(i64* %p2)
ret void
}
define void @slsr_out_of_bounds_gep(i32* %input, i32 %s) {
; CHECK-LABEL: @slsr_out_of_bounds_gep(
; v0 = input[0];
%p0 = getelementptr i32, i32* %input, i64 0
call void @foo(i32* %p0)
; v1 = input[(long)s];
%t = sext i32 %s to i64
%p1 = getelementptr i32, i32* %input, i64 %t
; CHECK: %p1 = getelementptr i32, i32* %input, i64 %t
call void @foo(i32* %p1)
; v2 = input[(long)(s * 2)];
%s2 = shl nsw i32 %s, 1
%t2 = sext i32 %s2 to i64
%p2 = getelementptr i32, i32* %input, i64 %t2
; CHECK: %p2 = getelementptr i32, i32* %p1, i64 %t
call void @foo(i32* %p2)
ret void
}
define void @slsr_gep_128bit_index(i32* %input, i128 %s) {
; CHECK-LABEL: @slsr_gep_128bit_index(
; p0 = &input[0]
%p0 = getelementptr inbounds i32, i32* %input, i128 0
call void @foo(i32* %p0)
; p1 = &input[s << 125]
%s125 = shl nsw i128 %s, 125
%p1 = getelementptr inbounds i32, i32* %input, i128 %s125
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i128 %s125
call void @foo(i32* %p1)
; p2 = &input[s << 126]
%s126 = shl nsw i128 %s, 126
%p2 = getelementptr inbounds i32, i32* %input, i128 %s126
; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i128 %s126
call void @foo(i32* %p2)
ret void
}
define void @slsr_gep_32bit_pointer(i32 addrspace(1)* %input, i64 %s) {
; CHECK-LABEL: @slsr_gep_32bit_pointer(
; p1 = &input[s]
%p1 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s
call void @baz(i32 addrspace(1)* %p1)
; p2 = &input[s * 2]
%s2 = mul nsw i64 %s, 2
%p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2
; %s2 is wider than the pointer size of addrspace(1), so do not factor it.
; CHECK: %p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2
call void @baz(i32 addrspace(1)* %p2)
ret void
}
declare void @foo(i32*)
declare void @bar(i64*)
declare void @baz(i32 addrspace(1)*)

View File

@ -1,147 +0,0 @@
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
define void @slsr1(i32 %b, i32 %s) {
; CHECK-LABEL: @slsr1(
; foo(b * s);
%mul0 = mul i32 %b, %s
; CHECK: mul i32
; CHECK-NOT: mul i32
call void @foo(i32 %mul0)
; foo((b + 1) * s);
%b1 = add i32 %b, 1
%mul1 = mul i32 %b1, %s
call void @foo(i32 %mul1)
; foo((b + 2) * s);
%b2 = add i32 %b, 2
%mul2 = mul i32 %b2, %s
call void @foo(i32 %mul2)
ret void
}
define void @non_canonicalized(i32 %b, i32 %s) {
; CHECK-LABEL: @non_canonicalized(
; foo(b * s);
%mul0 = mul i32 %b, %s
; CHECK: mul i32
; CHECK-NOT: mul i32
call void @foo(i32 %mul0)
; foo((1 + b) * s);
%b1 = add i32 1, %b
%mul1 = mul i32 %b1, %s
call void @foo(i32 %mul1)
; foo((2 + b) * s);
%b2 = add i32 2, %b
%mul2 = mul i32 %b2, %s
call void @foo(i32 %mul2)
ret void
}
define void @or(i32 %a, i32 %s) {
%b = shl i32 %a, 1
; CHECK-LABEL: @or(
; foo(b * s);
%mul0 = mul i32 %b, %s
; CHECK: [[base:[^ ]+]] = mul i32
call void @foo(i32 %mul0)
; foo((b | 1) * s);
%b1 = or i32 %b, 1
%mul1 = mul i32 %b1, %s
; CHECK: add i32 [[base]], %s
call void @foo(i32 %mul1)
; foo((b | 2) * s);
%b2 = or i32 %b, 2
%mul2 = mul i32 %b2, %s
; CHECK: mul i32 %b2, %s
call void @foo(i32 %mul2)
ret void
}
; foo(a * b)
; foo((a + 1) * b)
; foo(a * (b + 1))
; foo((a + 1) * (b + 1))
define void @slsr2(i32 %a, i32 %b) {
; CHECK-LABEL: @slsr2(
%a1 = add i32 %a, 1
%b1 = add i32 %b, 1
%mul0 = mul i32 %a, %b
; CHECK: mul i32
; CHECK-NOT: mul i32
%mul1 = mul i32 %a1, %b
%mul2 = mul i32 %a, %b1
%mul3 = mul i32 %a1, %b1
call void @foo(i32 %mul0)
call void @foo(i32 %mul1)
call void @foo(i32 %mul2)
call void @foo(i32 %mul3)
ret void
}
; The bump is a multiple of the stride.
;
; foo(b * s);
; foo((b + 2) * s);
; foo((b + 4) * s);
; =>
; mul0 = b * s;
; bump = s * 2;
; mul1 = mul0 + bump; // GVN ensures mul1 and mul2 use the same bump.
; mul2 = mul1 + bump;
define void @slsr3(i32 %b, i32 %s) {
; CHECK-LABEL: @slsr3(
%mul0 = mul i32 %b, %s
; CHECK: mul i32
call void @foo(i32 %mul0)
%b1 = add i32 %b, 2
%mul1 = mul i32 %b1, %s
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = shl i32 %s, 1
; CHECK: %mul1 = add i32 %mul0, [[BUMP]]
call void @foo(i32 %mul1)
%b2 = add i32 %b, 4
%mul2 = mul i32 %b2, %s
; CHECK: %mul2 = add i32 %mul1, [[BUMP]]
call void @foo(i32 %mul2)
ret void
}
; Do not rewrite a candidate if its potential basis does not dominate it.
;
; if (cond)
; foo(a * b);
; foo((a + 1) * b);
define void @not_dominate(i1 %cond, i32 %a, i32 %b) {
; CHECK-LABEL: @not_dominate(
entry:
%a1 = add i32 %a, 1
br i1 %cond, label %then, label %merge
then:
%mul0 = mul i32 %a, %b
; CHECK: %mul0 = mul i32 %a, %b
call void @foo(i32 %mul0)
br label %merge
merge:
%mul1 = mul i32 %a1, %b
; CHECK: %mul1 = mul i32 %a1, %b
call void @foo(i32 %mul1)
ret void
}
declare void @foo(i32)