Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,131 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
; Make sure an illegal bitcast isn't introduced
define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) {
; CHECK-LABEL: @test_address_space_1_1(
; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
; CHECK: ret void
%aa = alloca <2 x i64>, align 16
%aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
%aaptr = bitcast <2 x i64>* %aa to i8*
call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
ret void
}
define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
; CHECK-LABEL: @test_address_space_1_0(
; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
; CHECK: ret void
%aa = alloca <2 x i64>, align 16
%aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
%aaptr = bitcast <2 x i64>* %aa to i8*
call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16* %b to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
ret void
}
define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
; CHECK-LABEL: @test_address_space_0_1(
; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
; CHECK: ret void
%aa = alloca <2 x i64>, align 16
%aptr = bitcast <2 x i64>* %a to i8*
%aaptr = bitcast <2 x i64>* %aa to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
ret void
}
%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] }
; Function Attrs: nounwind
define void @copy_struct([5 x i64] %in.coerce) {
; CHECK-LABEL: @copy_struct(
; CHECK-NOT: memcpy
for.end:
%in = alloca %struct.struct_test_27.0.13, align 8
%0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]*
store [5 x i64] %in.coerce, [5 x i64]* %0, align 8
%scevgep9 = getelementptr %struct.struct_test_27.0.13, %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0
%scevgep910 = bitcast i32* %scevgep9 to i8*
call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false)
ret void
}
%union.anon = type { i32* }
@g = common global i32 0, align 4
@l = common addrspace(3) global i32 0, align 4
; Make sure an illegal bitcast isn't introduced
define void @pr27557() {
; CHECK-LABEL: @pr27557(
; CHECK: %[[CAST:.*]] = bitcast i32** {{.*}} to i32 addrspace(3)**
; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)** %[[CAST]]
%1 = alloca %union.anon, align 8
%2 = bitcast %union.anon* %1 to i32**
store i32* @g, i32** %2, align 8
%3 = bitcast %union.anon* %1 to i32 addrspace(3)**
store i32 addrspace(3)* @l, i32 addrspace(3)** %3, align 8
ret void
}
; Make sure pre-splitting doesn't try to introduce an illegal bitcast
define float @presplit(i64 addrspace(1)* %p) {
entry:
; CHECK-LABEL: @presplit(
; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)*
; CHECK: load i32, i32 addrspace(1)* %[[CAST]]
%b = alloca i64
%b.cast = bitcast i64* %b to [2 x float]*
%b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0
%b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1
%l = load i64, i64 addrspace(1)* %p
store i64 %l, i64* %b
%f1 = load float, float* %b.gep1
%f2 = load float, float* %b.gep2
%ret = fadd float %f1, %f2
ret float %ret
}
; Test load from and store to non-zero address space.
define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
; CHECK-LABEL: @test_load_store_diff_addr_space
; CHECK-NOT: alloca
; CHECK: load i32, i32 addrspace(1)*
; CHECK: load i32, i32 addrspace(1)*
; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
%a = alloca i64
%a.cast = bitcast i64* %a to [2 x float]*
%a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
%a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
%complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
%p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
%v1 = load i64, i64 addrspace(1)* %p1
store i64 %v1, i64* %a
%f1 = load float, float* %a.gep1
%f2 = load float, float* %a.gep2
%sum = fadd float %f1, %f2
store float %sum, float* %a.gep1
store float %sum, float* %a.gep2
%v2 = load i64, i64* %a
%complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
%p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
store i64 %v2, i64 addrspace(1)* %p2
ret void
}

View File

@ -1,174 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
; CHECK-LABEL: @test1(
; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 0
; CHECK: %[[a0:.*]] = load i8, i8* %[[gep_a0]], align 16
; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 1
; CHECK: %[[a1:.*]] = load i8, i8* %[[gep_a1]], align 1
; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 0
; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16
; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 1
; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1
; CHECK: ret void
entry:
%alloca = alloca { i8, i8 }, align 16
%gep_a = getelementptr { i8, i8 }, { i8, i8 }* %a, i32 0, i32 0
%gep_alloca = getelementptr { i8, i8 }, { i8, i8 }* %alloca, i32 0, i32 0
%gep_b = getelementptr { i8, i8 }, { i8, i8 }* %b, i32 0, i32 0
store i8 420, i8* %gep_alloca, align 16
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false)
ret void
}
define void @test2() {
; CHECK-LABEL: @test2(
; CHECK: alloca i16
; CHECK: load i8, i8* %{{.*}}
; CHECK: store i8 42, i8* %{{.*}}
; CHECK: ret void
entry:
%a = alloca { i8, i8, i8, i8 }, align 2
%gep1 = getelementptr { i8, i8, i8, i8 }, { i8, i8, i8, i8 }* %a, i32 0, i32 1
%cast1 = bitcast i8* %gep1 to i16*
store volatile i16 0, i16* %cast1
%gep2 = getelementptr { i8, i8, i8, i8 }, { i8, i8, i8, i8 }* %a, i32 0, i32 2
%result = load i8, i8* %gep2
store i8 42, i8* %gep2
ret void
}
define void @PR13920(<2 x i64>* %a, i16* %b) {
; Test that alignments on memcpy intrinsics get propagated to loads and stores.
; CHECK-LABEL: @PR13920(
; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
; CHECK: ret void
entry:
%aa = alloca <2 x i64>, align 16
%aptr = bitcast <2 x i64>* %a to i8*
%aaptr = bitcast <2 x i64>* %aa to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16* %b to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
ret void
}
define void @test3(i8* %x) {
; Test that when we promote an alloca to a type with lower ABI alignment, we
; provide the needed explicit alignment that code using the alloca may be
; expecting. However, also check that any offset within an alloca can in turn
; reduce the alignment.
; CHECK-LABEL: @test3(
; CHECK: alloca [22 x i8], align 8
; CHECK: alloca [18 x i8], align 2
; CHECK: ret void
entry:
%a = alloca { i8*, i8*, i8* }
%b = alloca { i8*, i8*, i8* }
%a_raw = bitcast { i8*, i8*, i8* }* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false)
%b_raw = bitcast { i8*, i8*, i8* }* %b to i8*
%b_gep = getelementptr i8, i8* %b_raw, i32 6
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
ret void
}
define void @test5() {
; Test that we preserve underaligned loads and stores when splitting. The use
; of volatile in this test case is just to force the loads and stores to not be
; split or promoted out of existence.
;
; CHECK-LABEL: @test5(
; CHECK: alloca [9 x i8]
; CHECK: alloca [9 x i8]
; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
; CHECK: load volatile i16, i16* %{{.*}}, align 1
; CHECK: load double, double* %{{.*}}, align 1
; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1
; CHECK: load volatile i16, i16* %{{.*}}, align 1
; CHECK: ret void
entry:
%a = alloca [18 x i8]
%raw1 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 0
%ptr1 = bitcast i8* %raw1 to double*
store volatile double 0.0, double* %ptr1, align 1
%weird_gep1 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 7
%weird_cast1 = bitcast i8* %weird_gep1 to i16*
%weird_load1 = load volatile i16, i16* %weird_cast1, align 1
%raw2 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 9
%ptr2 = bitcast i8* %raw2 to double*
%d1 = load double, double* %ptr1, align 1
store volatile double %d1, double* %ptr2, align 1
%weird_gep2 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 16
%weird_cast2 = bitcast i8* %weird_gep2 to i16*
%weird_load2 = load volatile i16, i16* %weird_cast2, align 1
ret void
}
define void @test6() {
; Test that we promote alignment when the underlying alloca switches to one
; that innately provides it.
; CHECK-LABEL: @test6(
; CHECK: alloca double
; CHECK: alloca double
; CHECK-NOT: align
; CHECK: ret void
entry:
%a = alloca [16 x i8]
%raw1 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 0
%ptr1 = bitcast i8* %raw1 to double*
store volatile double 0.0, double* %ptr1, align 1
%raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8
%ptr2 = bitcast i8* %raw2 to double*
%val = load double, double* %ptr1, align 1
store volatile double %val, double* %ptr2, align 1
ret void
}
define void @test7(i8* %out) {
; Test that we properly compute the destination alignment when rewriting
; memcpys as direct loads or stores.
; CHECK-LABEL: @test7(
; CHECK-NOT: alloca
entry:
%a = alloca [16 x i8]
%raw1 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 0
%ptr1 = bitcast i8* %raw1 to double*
%raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8
%ptr2 = bitcast i8* %raw2 to double*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i32 0, i1 false)
; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1
; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1
%val1 = load double, double* %ptr2, align 1
%val2 = load double, double* %ptr1, align 1
store double %val1, double* %ptr1, align 1
store double %val2, double* %ptr2, align 1
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i32 0, i1 false)
; CHECK: store double %[[val1]], double* %{{.*}}, align 1
; CHECK: store double %[[val2]], double* %{{.*}}, align 1
ret void
; CHECK: ret void
}

View File

@ -1,113 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64-A2"
declare void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* nocapture, i8 addrspace(2)* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p2i8.p1i8.i32(i8 addrspace(2)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
; CHECK-LABEL: @test_address_space_1_1(
; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
; CHECK: ret void
define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) {
%aa = alloca <2 x i64>, align 16, addrspace(2)
%aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
%aaptr = bitcast <2 x i64> addrspace(2)* %aa to i8 addrspace(2)*
call void @llvm.memcpy.p2i8.p1i8.i32(i8 addrspace(2)* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
call void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* %bptr, i8 addrspace(2)* %aaptr, i32 16, i32 2, i1 false)
ret void
}
; CHECK-LABEL: @test_address_space_1_0(
; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(2)* {{.*}}, align 2
; CHECK: ret void
define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16 addrspace(2)* %b) {
%aa = alloca <2 x i64>, align 16, addrspace(2)
%aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
%aaptr = bitcast <2 x i64> addrspace(2)* %aa to i8 addrspace(2)*
call void @llvm.memcpy.p2i8.p1i8.i32(i8 addrspace(2)* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16 addrspace(2)* %b to i8 addrspace(2)*
call void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* %bptr, i8 addrspace(2)* %aaptr, i32 16, i32 2, i1 false)
ret void
}
; CHECK-LABEL: @test_address_space_0_1(
; CHECK: load <2 x i64>, <2 x i64> addrspace(2)* %a, align 2
; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
; CHECK: ret void
define void @test_address_space_0_1(<2 x i64> addrspace(2)* %a, i16 addrspace(1)* %b) {
%aa = alloca <2 x i64>, align 16, addrspace(2)
%aptr = bitcast <2 x i64> addrspace(2)* %a to i8 addrspace(2)*
%aaptr = bitcast <2 x i64> addrspace(2)* %aa to i8 addrspace(2)*
call void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* %aaptr, i8 addrspace(2)* %aptr, i32 16, i32 2, i1 false)
%bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
call void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* %bptr, i8 addrspace(2)* %aaptr, i32 16, i32 2, i1 false)
ret void
}
%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] }
; CHECK-LABEL: @copy_struct(
; CHECK-NOT: memcpy
define void @copy_struct([5 x i64] %in.coerce) {
for.end:
%in = alloca %struct.struct_test_27.0.13, align 8, addrspace(2)
%0 = bitcast %struct.struct_test_27.0.13 addrspace(2)* %in to [5 x i64] addrspace(2)*
store [5 x i64] %in.coerce, [5 x i64] addrspace(2)* %0, align 8
%scevgep9 = getelementptr %struct.struct_test_27.0.13, %struct.struct_test_27.0.13 addrspace(2)* %in, i32 0, i32 4, i32 0
%scevgep910 = bitcast i32 addrspace(2)* %scevgep9 to i8 addrspace(2)*
call void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* undef, i8 addrspace(2)* %scevgep910, i32 16, i32 4, i1 false)
ret void
}
%union.anon = type { i32* }
@g = common global i32 0, align 4
@l = common addrspace(3) global i32 0, align 4
; Make sure an illegal bitcast isn't introduced
; CHECK-LABEL: @pr27557(
; CHECK: %[[CAST:.*]] = bitcast i32* addrspace(2)* {{.*}} to i32 addrspace(3)* addrspace(2)*
; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)* addrspace(2)* %[[CAST]]
define void @pr27557() {
%1 = alloca %union.anon, align 8, addrspace(2)
%2 = bitcast %union.anon addrspace(2)* %1 to i32* addrspace(2)*
store i32* @g, i32* addrspace(2)* %2, align 8
%3 = bitcast %union.anon addrspace(2)* %1 to i32 addrspace(3)* addrspace(2)*
store i32 addrspace(3)* @l, i32 addrspace(3)* addrspace(2)* %3, align 8
ret void
}
; Test load from and store to non-zero address space.
define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
; CHECK-LABEL: @test_load_store_diff_addr_space
; CHECK-NOT: alloca
; CHECK: load i32, i32 addrspace(1)*
; CHECK: load i32, i32 addrspace(1)*
; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
%a0 = alloca [2 x i64], align 8, addrspace(2)
%a = getelementptr [2 x i64], [2 x i64] addrspace(2)* %a0, i32 0, i32 0
%a.cast = bitcast i64 addrspace(2)* %a to [2 x float] addrspace(2)*
%a.gep1 = getelementptr [2 x float], [2 x float] addrspace(2)* %a.cast, i32 0, i32 0
%a.gep2 = getelementptr [2 x float], [2 x float] addrspace(2)* %a.cast, i32 0, i32 1
%complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
%p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
%v1 = load i64, i64 addrspace(1)* %p1
store i64 %v1, i64 addrspace(2)* %a
%f1 = load float, float addrspace(2)* %a.gep1
%f2 = load float, float addrspace(2)* %a.gep2
%sum = fadd float %f1, %f2
store float %sum, float addrspace(2)* %a.gep1
store float %sum, float addrspace(2)* %a.gep2
%v2 = load i64, i64 addrspace(2)* %a
%complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
%p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
store i64 %v2, i64 addrspace(1)* %p2
ret void
}

File diff suppressed because it is too large Load Diff

View File

@ -1,236 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
define i8 @test1() {
; We fully promote these to the i24 load or store size, resulting in just masks
; and other operations that instcombine will fold, but no alloca. Note this is
; the same as test12 in basictest.ll, but here we assert big-endian byte
; ordering.
;
; CHECK-LABEL: @test1(
entry:
%a = alloca [3 x i8]
%b = alloca [3 x i8]
; CHECK-NOT: alloca
%a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0
store i8 0, i8* %a0ptr
%a1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1
store i8 0, i8* %a1ptr
%a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
store i8 0, i8* %a2ptr
%aiptr = bitcast [3 x i8]* %a to i24*
%ai = load i24, i24* %aiptr
; CHECK-NOT: store
; CHECK-NOT: load
; CHECK: %[[ext2:.*]] = zext i8 0 to i24
; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16
; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535
; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]]
%biptr = bitcast [3 x i8]* %b to i24*
store i24 %ai, i24* %biptr
%b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0
%b0 = load i8, i8* %b0ptr
%b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1
%b1 = load i8, i8* %b1ptr
%b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
%b2 = load i8, i8* %b2ptr
; CHECK-NOT: store
; CHECK-NOT: load
; CHECK: %[[shift0:.*]] = lshr i24 %[[insert0]], 16
; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8
%bsum0 = add i8 %b0, %b1
%bsum1 = add i8 %bsum0, %b2
ret i8 %bsum1
; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
; CHECK-NEXT: ret i8 %[[sum1]]
}
define i64 @test2() {
; Test for various mixed sizes of integer loads and stores all getting
; promoted.
;
; CHECK-LABEL: @test2(
entry:
%a = alloca [7 x i8]
; CHECK-NOT: alloca
%a0ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 0
%a1ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 1
%a2ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 2
%a3ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 3
; CHECK-NOT: store
; CHECK-NOT: load
%a0i16ptr = bitcast i8* %a0ptr to i16*
store i16 1, i16* %a0i16ptr
store i8 1, i8* %a2ptr
; CHECK: %[[mask1:.*]] = and i40 undef, 4294967295
; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
%a3i24ptr = bitcast i8* %a3ptr to i24*
store i24 1, i24* %a3i24ptr
; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256
%a2i40ptr = bitcast i8* %a2ptr to i40*
store i40 1, i40* %a2i40ptr
; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56
; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
; CHECK-NOT: store
; CHECK-NOT: load
%aiptr = bitcast [7 x i8]* %a to i56*
%ai = load i56, i56* %aiptr
%ret = zext i56 %ai to i64
ret i64 %ret
; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56
; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
; CHECK-NEXT: ret i64 %[[ret]]
}
define i64 @PR14132(i1 %flag) {
; CHECK-LABEL: @PR14132(
; Here we form a PHI-node by promoting the pointer alloca first, and then in
; order to promote the other two allocas, we speculate the load of the
; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
; alloca. While this is a bit dubious, we were asserting on trying to
; rewrite it. The trick is that the code using the value may carefully take
; steps to only use the not-undef bits, and so we need to at least loosely
; support this. This test is particularly interesting because how we handle
; a load of an i64 from an i8 alloca is dependent on endianness.
entry:
%a = alloca i64, align 8
%b = alloca i8, align 8
%ptr = alloca i64*, align 8
; CHECK-NOT: alloca
%ptr.cast = bitcast i64** %ptr to i8**
store i64 0, i64* %a
store i8 1, i8* %b
store i64* %a, i64** %ptr
br i1 %flag, label %if.then, label %if.end
if.then:
store i8* %b, i8** %ptr.cast
br label %if.end
; CHECK-NOT: store
; CHECK: %[[ext:.*]] = zext i8 1 to i64
; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56
if.end:
%tmp = load i64*, i64** %ptr
%result = load i64, i64* %tmp
; CHECK-NOT: load
; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ]
ret i64 %result
; CHECK-NEXT: ret i64 %[[result]]
}
declare void @f(i64 %x, i32 %y)
define void @test3() {
; CHECK-LABEL: @test3(
;
; This is a test that specifically exercises the big-endian lowering because it
; ends up splitting a 64-bit integer into two smaller integers and has a number
; of tricky aspects (the i24 type) that make that hard. Historically, SROA
; would miscompile this by either dropping a most significant byte or least
; significant byte due to shrinking the [4,8) slice to an i24, or by failing to
; move the bytes around correctly.
;
; The magical number 34494054408 is used because it has bits set in various
; bytes so that it is clear if those bytes fail to be propagated.
;
; If you're debugging this, rather than using the direct magical numbers, run
; the IR through '-sroa -instcombine'. With '-instcombine' these will be
; constant folded, and if the i64 doesn't round-trip correctly, you've found
; a bug!
;
entry:
%a = alloca { i32, i24 }, align 4
; CHECK-NOT: alloca
%tmp0 = bitcast { i32, i24 }* %a to i64*
store i64 34494054408, i64* %tmp0
%tmp1 = load i64, i64* %tmp0, align 4
%tmp2 = bitcast { i32, i24 }* %a to i32*
%tmp3 = load i32, i32* %tmp2, align 4
; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64
; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64
; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
call void @f(i64 %tmp1, i32 %tmp3)
; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8)
ret void
; CHECK: ret void
}
define void @test4() {
; CHECK-LABEL: @test4
;
; Much like @test3, this is specifically testing big-endian management of data.
; Also similarly, it uses constants with particular bits set to help track
; whether values are corrupted, and can be easily evaluated by running through
; -instcombine to see that the i64 round-trips.
;
entry:
%a = alloca { i32, i24 }, align 4
%a2 = alloca i64, align 4
; CHECK-NOT: alloca
store i64 34494054408, i64* %a2
%tmp0 = bitcast { i32, i24 }* %a to i8*
%tmp1 = bitcast i64* %a2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i32 4, i1 false)
; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
%tmp2 = bitcast { i32, i24 }* %a to i64*
%tmp3 = load i64, i64* %tmp2, align 4
%tmp4 = bitcast { i32, i24 }* %a to i32*
%tmp5 = load i32, i32* %tmp4, align 4
; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64
; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64
; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
call void @f(i64 %tmp3, i32 %tmp5)
; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]])
ret void
; CHECK: ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)

View File

@ -1,127 +0,0 @@
; RUN: opt -use-dbg-addr -sroa -S < %s | FileCheck %s
; ModuleID = '<stdin>'
source_filename = "newvars.c"
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc19.0.24215"
%struct.Pair = type { i32, i32 }
@pair = internal global %struct.Pair zeroinitializer
; Function Attrs: nounwind uwtable
define void @if_else(i32 %cond, i32 %a, i32 %b) !dbg !8 {
entry:
%p = alloca %struct.Pair, align 4
%0 = bitcast %struct.Pair* %p to i8*, !dbg !25
call void @llvm.dbg.addr(metadata %struct.Pair* %p, metadata !20, metadata !DIExpression()), !dbg !26
%x = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !27
store i32 %a, i32* %x, align 4, !dbg !28
%y = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !34
store i32 %b, i32* %y, align 4, !dbg !35
%tobool = icmp ne i32 %cond, 0, !dbg !37
br i1 %tobool, label %if.then, label %if.else, !dbg !39
if.then: ; preds = %entry
%x1 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !40
store i32 0, i32* %x1, align 4, !dbg !42
%y2 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !43
store i32 %a, i32* %y2, align 4, !dbg !44
br label %if.end, !dbg !45
if.else: ; preds = %entry
%x3 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !46
store i32 %b, i32* %x3, align 4, !dbg !48
%y4 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !49
store i32 0, i32* %y4, align 4, !dbg !50
br label %if.end
if.end: ; preds = %if.else, %if.then
%1 = bitcast %struct.Pair* %p to i8*, !dbg !51
%2 = bitcast %struct.Pair* @pair to i8*, !dbg !51
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 8, i32 4, i1 false), !dbg !51
ret void
}
; CHECK-LABEL: define void @if_else(i32 %cond, i32 %a, i32 %b)
; CHECK: entry:
; CHECK: call void @llvm.dbg.value(metadata i32 %a, metadata ![[PVAR:[0-9]+]], metadata ![[XFRAG:DIExpression\(DW_OP_LLVM_fragment, 0, 32\)]])
; CHECK: call void @llvm.dbg.value(metadata i32 %b, metadata ![[PVAR]], metadata ![[YFRAG:DIExpression\(DW_OP_LLVM_fragment, 32, 32\)]])
; CHECK: if.then:
; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[PVAR]], metadata ![[XFRAG]])
; CHECK: call void @llvm.dbg.value(metadata i32 %a, metadata ![[PVAR]], metadata ![[YFRAG]])
; CHECK: if.else:
; CHECK: call void @llvm.dbg.value(metadata i32 %b, metadata ![[PVAR]], metadata ![[XFRAG]])
; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[PVAR]], metadata ![[YFRAG]])
; CHECK: if.end:
; CHECK: %p.sroa.4.0 = phi i32 [ %a, %if.then ], [ 0, %if.else ]
; CHECK: %p.sroa.0.0 = phi i32 [ 0, %if.then ], [ %b, %if.else ]
; CHECK: call void @llvm.dbg.value(metadata i32 %p.sroa.0.0, metadata ![[PVAR]], metadata ![[XFRAG]])
; CHECK: call void @llvm.dbg.value(metadata i32 %p.sroa.4.0, metadata ![[PVAR]], metadata ![[YFRAG]])
; CHECK: ![[PVAR]] = !DILocalVariable(name: "p", {{.*}})
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #2
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.addr(metadata, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!llvm.ident = !{!7}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "newvars.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 2}
!6 = !{i32 7, !"PIC Level", i32 2}
!7 = !{!"clang version 6.0.0 "}
!8 = distinct !DISubprogram(name: "if_else", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !16)
!9 = !DISubroutineType(types: !10)
!10 = !{!11, !14, !14, !14}
!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !1, line: 1, size: 64, elements: !12)
!12 = !{!13, !15}
!13 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !11, file: !1, line: 1, baseType: !14, size: 32)
!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!15 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !11, file: !1, line: 1, baseType: !14, size: 32, offset: 32)
!16 = !{!17, !18, !19, !20}
!17 = !DILocalVariable(name: "b", arg: 3, scope: !8, file: !1, line: 2, type: !14)
!18 = !DILocalVariable(name: "a", arg: 2, scope: !8, file: !1, line: 2, type: !14)
!19 = !DILocalVariable(name: "cond", arg: 1, scope: !8, file: !1, line: 2, type: !14)
!20 = !DILocalVariable(name: "p", scope: !8, file: !1, line: 3, type: !11)
!22 = !DILocation(line: 2, column: 42, scope: !8)
!23 = !DILocation(line: 2, column: 35, scope: !8)
!24 = !DILocation(line: 2, column: 25, scope: !8)
!25 = !DILocation(line: 3, column: 3, scope: !8)
!26 = !DILocation(line: 3, column: 15, scope: !8)
!27 = !DILocation(line: 4, column: 5, scope: !8)
!28 = !DILocation(line: 4, column: 7, scope: !8)
!29 = !{!30, !31, i64 0}
!30 = !{!"Pair", !31, i64 0, !31, i64 4}
!31 = !{!"int", !32, i64 0}
!32 = !{!"omnipotent char", !33, i64 0}
!33 = !{!"Simple C/C++ TBAA"}
!34 = !DILocation(line: 5, column: 5, scope: !8)
!35 = !DILocation(line: 5, column: 7, scope: !8)
!36 = !{!30, !31, i64 4}
!37 = !DILocation(line: 6, column: 7, scope: !38)
!38 = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7)
!39 = !DILocation(line: 6, column: 7, scope: !8)
!40 = !DILocation(line: 7, column: 7, scope: !41)
!41 = distinct !DILexicalBlock(scope: !38, file: !1, line: 6, column: 13)
!42 = !DILocation(line: 7, column: 9, scope: !41)
!43 = !DILocation(line: 8, column: 7, scope: !41)
!44 = !DILocation(line: 8, column: 9, scope: !41)
!45 = !DILocation(line: 9, column: 3, scope: !41)
!46 = !DILocation(line: 10, column: 7, scope: !47)
!47 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 10)
!48 = !DILocation(line: 10, column: 9, scope: !47)
!49 = !DILocation(line: 11, column: 7, scope: !47)
!50 = !DILocation(line: 11, column: 9, scope: !47)
!51 = !DILocation(line: 13, column: 10, scope: !8)
!52 = !{i64 0, i64 4, !53, i64 4, i64 4, !53}
!53 = !{!31, !31, i64 0}
!54 = !DILocation(line: 14, column: 1, scope: !8)

View File

@ -1,37 +0,0 @@
; RUN: opt -sroa %s -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
%foo = type { [8 x i8], [8 x i8] }
declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() {
entry:
%retval = alloca %foo, align 8
call void @llvm.dbg.declare(metadata %foo* %retval, metadata !1, metadata !7), !dbg !8
; Checks that SROA still inserts a bit_piece expression, even if it produces only one piece
; (as long as that piece is smaller than the whole thing)
; CHECK-NOT: call void @llvm.dbg.value
; CHECK: call void @llvm.dbg.value(metadata %foo* undef, {{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg
; CHECK-NOT: call void @llvm.dbg.value
%0 = bitcast %foo* %retval to i8*
%1 = getelementptr inbounds i8, i8* %0, i64 8
%2 = bitcast i8* %1 to %foo**
store %foo* undef, %foo** %2, align 8
ret void
}
attributes #0 = { nounwind readnone }
!llvm.dbg.cu = !{!9}
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !DILocalVariable(name: "I", scope: !2, file: !3, line: 947, type: !4)
!2 = distinct !DISubprogram(name: "findInsertLocation", linkageName: "_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE", scope: !3, file: !3, line: 937, isLocal: true, isDefinition: true, scopeLine: 938, flags: DIFlagPrototyped, isOptimized: true, unit: !9)
!3 = !DIFile(filename: "none", directory: ".")
!4 = !DICompositeType(tag: DW_TAG_class_type, name: "bundle_iterator<llvm::MachineInstr, llvm::ilist_iterator<llvm::MachineInstr> >", scope: !5, file: !3, line: 163, size: 128, align: 64, elements: !6, templateParams: !6, identifier: "_ZTSN4llvm17MachineBasicBlock15bundle_iteratorINS_12MachineInstrENS_14ilist_iteratorIS2_EEEE")
!5 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MachineBasicBlock", file: !3, line: 68, size: 1408, align: 64, identifier: "_ZTSN4llvm17MachineBasicBlockE")
!6 = !{}
!7 = !DIExpression()
!8 = !DILocation(line: 947, column: 35, scope: !2)
!9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3)

View File

@ -1,97 +0,0 @@
; SROA fails to rewrite allocs but does rewrite some phis and delete
; dead instructions. Ensure that this invalidates analyses required
; for other passes.
; RUN: opt < %s -passes=bdce,sroa,bdce -o %t -debug-pass-manager 2>&1 | FileCheck %s
; CHECK: Running pass: BDCEPass on H
; CHECK: Running analysis: DemandedBitsAnalysis on H
; CHECK: Running pass: SROA on H
; CHECK: Invalidating all non-preserved analyses for: H
; CHECK: Invalidating analysis: DemandedBitsAnalysis on H
; CHECK: Running pass: BDCEPass on H
; CHECK: Running analysis: DemandedBitsAnalysis on H
; CHECK: Finished llvm::Function pass manager run.
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-grtev4-linux-gnu"
%class.b = type { i64 }
declare void @D(%class.b* sret, %class.b* dereferenceable(32)) local_unnamed_addr
; Function Attrs: nounwind
define hidden fastcc void @H(%class.b* noalias nocapture readnone, [2 x i64]) unnamed_addr {
%3 = alloca %class.b, align 8
%.sroa.0 = alloca i64, align 8
store i64 0, i64* %.sroa.0, align 8
%4 = extractvalue [2 x i64] %1, 1
switch i64 %4, label %6 [
i64 4, label %foo
i64 5, label %5
]
; <label>:5:
%.sroa.0.0..sroa_cast3 = bitcast i64* %.sroa.0 to i8**
br label %12
; <label>:6:
%7 = icmp ugt i64 %4, 5
%.sroa.0.0..sroa_cast5 = bitcast i64* %.sroa.0 to i8**
br i1 %7, label %8, label %12
; <label>:8:
%9 = load i8, i8* inttoptr (i64 4 to i8*), align 4
%10 = icmp eq i8 %9, 47
%11 = select i1 %10, i64 5, i64 4
br label %12
; <label>:12:
%13 = phi i8** [ %.sroa.0.0..sroa_cast3, %5 ], [ %.sroa.0.0..sroa_cast5, %8 ], [ %.sroa.0.0..sroa_cast5, %6 ]
%14 = phi i64 [ 4, %5 ], [ %11, %8 ], [ 4, %6 ]
%15 = icmp ne i64 %4, 0
%16 = icmp ugt i64 %4, %14
%17 = and i1 %15, %16
br i1 %17, label %18, label %a.exit
; <label>:18:
%19 = tail call i8* @memchr(i8* undef, i32 signext undef, i64 undef)
%20 = icmp eq i8* %19, null
%21 = sext i1 %20 to i64
br label %a.exit
a.exit:
%22 = phi i64 [ -1, %12 ], [ %21, %18 ]
%23 = load i8*, i8** %13, align 8
%24 = sub nsw i64 %22, %14
%25 = bitcast %class.b* %3 to i8*
call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %25)
%26 = icmp ult i64 %24, 2
br i1 %26, label %G.exit, label %27
; <label>:27:
%28 = getelementptr inbounds i8, i8* %23, i64 undef
%29 = icmp eq i8* %28, null
br i1 %29, label %30, label %31
; <label>:30:
unreachable
; <label>:31:
call void @D(%class.b* nonnull sret %3, %class.b* nonnull dereferenceable(32) undef)
br label %G.exit
G.exit:
call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %25)
br label %foo
foo:
ret void
}
; Function Attrs: nounwind readonly
declare i8* @memchr(i8*, i32 signext, i64) local_unnamed_addr
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)

View File

@ -1,48 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
define { i32, i32 } @test0(i32 %x, i32 %y) {
; CHECK-LABEL: @test0(
; CHECK-NOT: alloca
; CHECK: insertvalue { i32, i32 }
; CHECK: insertvalue { i32, i32 }
; CHECK: ret { i32, i32 }
entry:
%a = alloca { i32, i32 }
store { i32, i32 } undef, { i32, i32 }* %a
%gep1 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 0
store i32 %x, i32* %gep1
%gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1
store i32 %y, i32* %gep2
%result = load { i32, i32 }, { i32, i32 }* %a
ret { i32, i32 } %result
}
define { i32, i32 } @test1(i32 %x, i32 %y) {
; FIXME: This may be too conservative. Duncan argues that we are allowed to
; split the volatile load and store here but must produce volatile scalar loads
; and stores from them.
; CHECK-LABEL: @test1(
; CHECK: alloca
; CHECK: alloca
; CHECK: load volatile { i32, i32 }, { i32, i32 }*
; CHECK: store volatile { i32, i32 }
; CHECK: ret { i32, i32 }
entry:
%a = alloca { i32, i32 }
%b = alloca { i32, i32 }
%gep1 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 0
store i32 %x, i32* %gep1
%gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1
store i32 %y, i32* %gep2
%result = load volatile { i32, i32 }, { i32, i32 }* %a
store volatile { i32, i32 } %result, { i32, i32 }* %b
ret { i32, i32 } %result
}

View File

@ -1,110 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
;
; Make sure the llvm.mem.parallel_loop_access meta-data is preserved
; when a load/store is replaced with another load/store by sroa
;
; class Complex {
; private:
; float real_;
; float imaginary_;
;
; public:
; Complex() : real_(0), imaginary_(0) { }
; Complex(float real, float imaginary) : real_(real), imaginary_(imaginary) { }
; Complex(const Complex &rhs) : real_(rhs.real()), imaginary_(rhs.imaginary()) { }
;
; inline float real() const { return real_; }
; inline float imaginary() const { return imaginary_; }
;
; Complex operator+(const Complex& rhs) const
; {
; return Complex(real_ + rhs.real_, imaginary_ + rhs.imaginary_);
; }
; };
;
; void test(Complex *out, long size)
; {
; #pragma clang loop vectorize(assume_safety)
; for (long offset = 0; offset < size; ++offset) {
; Complex t0 = out[offset];
; out[offset] = t0 + t0;
; }
; }
; CHECK: for.body:
; CHECK-NOT: store i32 %{{.*}}, i32* %{{.*}}, align 4
; CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4, !llvm.mem.parallel_loop_access !1
; CHECK-NOT: store i32 %{{.*}}, i32* %{{.*}}, align 4
; CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4, !llvm.mem.parallel_loop_access !1
; CHECK-NOT: store i32 %{{.*}}, i32* %{{.*}}, align 4
; CHECK: br label
; ModuleID = '<stdin>'
source_filename = "mem-par-metadata-sroa1.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%class.Complex = type { float, float }
; Function Attrs: norecurse nounwind uwtable
define void @_Z4testP7Complexl(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr #0 {
entry:
%t0 = alloca %class.Complex, align 4
%ref.tmp = alloca i64, align 8
%tmpcast = bitcast i64* %ref.tmp to %class.Complex*
br label %for.cond
for.cond: ; preds = %for.body, %entry
%offset.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i64 %offset.0, %size
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.0
%real_.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0
%real_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx, i64 0, i32 0
%0 = load float, float* %real_.i.i, align 4, !llvm.mem.parallel_loop_access !1
store float %0, float* %real_.i, align 4, !llvm.mem.parallel_loop_access !1
%imaginary_.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1
%imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx, i64 0, i32 1
%1 = load float, float* %imaginary_.i.i, align 4, !llvm.mem.parallel_loop_access !1
store float %1, float* %imaginary_.i, align 4, !llvm.mem.parallel_loop_access !1
%arrayidx1 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.0
%real_.i1 = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0
%2 = load float, float* %real_.i1, align 4, !noalias !3, !llvm.mem.parallel_loop_access !1
%real_2.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0
%3 = load float, float* %real_2.i, align 4, !noalias !3, !llvm.mem.parallel_loop_access !1
%add.i = fadd float %2, %3
%imaginary_.i2 = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1
%4 = load float, float* %imaginary_.i2, align 4, !noalias !3, !llvm.mem.parallel_loop_access !1
%imaginary_3.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1
%5 = load float, float* %imaginary_3.i, align 4, !noalias !3, !llvm.mem.parallel_loop_access !1
%add4.i = fadd float %4, %5
%real_.i.i3 = getelementptr inbounds %class.Complex, %class.Complex* %tmpcast, i64 0, i32 0
store float %add.i, float* %real_.i.i3, align 4, !alias.scope !3, !llvm.mem.parallel_loop_access !1
%imaginary_.i.i4 = getelementptr inbounds %class.Complex, %class.Complex* %tmpcast, i64 0, i32 1
store float %add4.i, float* %imaginary_.i.i4, align 4, !alias.scope !3, !llvm.mem.parallel_loop_access !1
%6 = bitcast %class.Complex* %arrayidx1 to i64*
%7 = load i64, i64* %ref.tmp, align 8, !llvm.mem.parallel_loop_access !1
store i64 %7, i64* %6, align 4, !llvm.mem.parallel_loop_access !1
%inc = add nsw i64 %offset.0, 1
br label %for.cond, !llvm.loop !1
for.end: ; preds = %for.cond
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang version 4.0.0 (cfe/trunk 277751)"}
!1 = distinct !{!1, !2}
!2 = !{!"llvm.loop.vectorize.enable", i1 true}
!3 = !{!4}
!4 = distinct !{!4, !5, !"_ZNK7ComplexplERKS_: %agg.result"}
!5 = distinct !{!5, !"_ZNK7ComplexplERKS_"}

View File

@ -1,46 +0,0 @@
; RUN: opt -sroa -S < %s | FileCheck %s
; This test checks that SROA does not introduce ptrtoint and inttoptr
; casts from and to non-integral pointers. The "ni:4" bit in the
; datalayout states that pointers of address space 4 are to be
; considered "non-integral".
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
target triple = "x86_64-unknown-linux-gnu"
define void @f0(i1 %alwaysFalse, i64 %val) {
; CHECK-LABEL: @f0(
; CHECK-NOT: inttoptr
; CHECK-NOT: ptrtoint
entry:
%loc = alloca i64
store i64 %val, i64* %loc
br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
neverTaken:
%loc.bc = bitcast i64* %loc to i8 addrspace(4)**
%ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc
store i8 5, i8 addrspace(4)* %ptr
ret void
alwaysTaken:
ret void
}
define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val) {
; CHECK-LABEL: @f1(
; CHECK-NOT: inttoptr
; CHECK-NOT: ptrtoint
entry:
%loc = alloca i8 addrspace(4)*
store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
neverTaken:
%loc.bc = bitcast i8 addrspace(4)** %loc to i64*
%int = load i64, i64* %loc.bc
ret i64 %int
alwaysTaken:
ret i64 42
}

File diff suppressed because it is too large Load Diff

View File

@ -1,36 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
%struct.ld2 = type { [2 x ppc_fp128] }
declare void @bar(i8*, [2 x i128])
define void @foo(i8* %v) #0 {
entry:
%v.addr = alloca i8*, align 8
%z = alloca %struct.ld2, align 16
store i8* %v, i8** %v.addr, align 8
%dat = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0
%arrayidx = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat, i32 0, i64 0
store ppc_fp128 0xM403B0000000000000000000000000000, ppc_fp128* %arrayidx, align 16
%dat1 = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0
%arrayidx2 = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat1, i32 0, i64 1
store ppc_fp128 0xM4093B400000000000000000000000000, ppc_fp128* %arrayidx2, align 16
%0 = load i8*, i8** %v.addr, align 8
%coerce.dive = getelementptr %struct.ld2, %struct.ld2* %z, i32 0, i32 0
%1 = bitcast [2 x ppc_fp128]* %coerce.dive to [2 x i128]*
%2 = load [2 x i128], [2 x i128]* %1, align 1
call void @bar(i8* %0, [2 x i128] %2)
ret void
}
; CHECK-LABEL: @foo
; CHECK-NOT: i128 4628293042053316608
; CHECK-NOT: i128 4653260752096854016
; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128)
; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128)
; CHECK: call void @bar(i8* %v, [2 x i128]
; CHECK: ret void
attributes #0 = { nounwind }

View File

@ -1,17 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux"
; Make sure we properly handle allocas where the allocated
; size overflows a uint32_t. This specific constant results in
; the size in bits being 32 after truncation to a 32-bit int.
; CHECK-LABEL: fn1
; CHECK-NEXT: ret void
define void @fn1() {
%a = alloca [1073741825 x i32], align 16
%t0 = bitcast [1073741825 x i32]* %a to i8*
call void @llvm.lifetime.end.p0i8(i64 4294967300, i8* %t0)
ret void
}
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)

View File

@ -1,92 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
;
; Make sure that SROA doesn't lose nonnull metadata
; on loads from allocas that get optimized out.
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
; Check that we do basic propagation of nonnull when rewriting.
define i8* @propagate_nonnull(i32* %v) {
; CHECK-LABEL: define i8* @propagate_nonnull(
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[A:.*]] = alloca i8*
; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8*
; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]]
; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
; CHECK-NEXT: ret i8* %[[LOAD]]
entry:
%a = alloca [2 x i8*]
%a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
%a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
%a.gep0.cast = bitcast i8** %a.gep0 to i32**
%a.gep1.cast = bitcast i8** %a.gep1 to i32**
store i32* %v, i32** %a.gep1.cast
store i32* null, i32** %a.gep0.cast
%load = load volatile i8*, i8** %a.gep1, !nonnull !0
ret i8* %load
}
define float* @turn_nonnull_into_assume(float** %arg) {
; CHECK-LABEL: define float* @turn_nonnull_into_assume(
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]])
; CHECK-NEXT: ret float* %[[RETURN]]
entry:
%buf = alloca float*
%_arg_i8 = bitcast float** %arg to i8*
%_buf_i8 = bitcast float** %buf to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
%ret = load float*, float** %buf, align 8, !nonnull !0
ret float* %ret
}
; Make sure we properly handle the !nonnull attribute when we convert
; a pointer load to an integer load.
; FIXME: While this doesn't do anythnig actively harmful today, it really
; should propagate the !nonnull metadata to range metadata. The irony is, it
; *does* initially, but then we lose that !range metadata before we finish
; SROA.
define i8* @propagate_nonnull_to_int() {
; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[A:.*]] = alloca i64
; CHECK-NEXT: store i64 42, i64* %[[A]]
; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
; CHECK-NEXT: ret i8* %[[CAST]]
entry:
%a = alloca [2 x i8*]
%a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
%a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
%a.gep0.cast = bitcast i8** %a.gep0 to i64*
%a.gep1.cast = bitcast i8** %a.gep1 to i64*
store i64 42, i64* %a.gep1.cast
store i64 0, i64* %a.gep0.cast
%load = load volatile i8*, i8** %a.gep1, !nonnull !0
ret i8* %load
}
; Make sure we properly handle the !nonnull attribute when we convert
; a pointer load to an integer load and immediately promote it to an SSA
; register. This can fail in interesting ways due to the rewrite iteration of
; SROA, resulting in PR32902.
define i8* @propagate_nonnull_to_int_and_promote() {
; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]]
entry:
%a = alloca [2 x i8*], align 8
%a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
%a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
%a.gep0.cast = bitcast i8** %a.gep0 to i64*
%a.gep1.cast = bitcast i8** %a.gep1 to i64*
store i64 42, i64* %a.gep1.cast
store i64 0, i64* %a.gep0.cast
%load = load i8*, i8** %a.gep1, align 8, !nonnull !0
ret i8* %load
}
!0 = !{}

View File

@ -1,37 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
; Check that the chosen type for a split is independent from the order of
; slices even in case of types that are skipped because their width is not a
; byte width multiple
define void @skipped_inttype_first({ i16*, i32 }*) {
; CHECK-LABEL: @skipped_inttype_first
; CHECK: alloca i8*
%arg = alloca { i16*, i32 }, align 8
%2 = bitcast { i16*, i32 }* %0 to i8*
%3 = bitcast { i16*, i32 }* %arg to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
%b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
%pb0 = bitcast i16** %b to i63*
%b0 = load i63, i63* %pb0
%pb1 = bitcast i16** %b to i8**
%b1 = load i8*, i8** %pb1
ret void
}
define void @skipped_inttype_last({ i16*, i32 }*) {
; CHECK-LABEL: @skipped_inttype_last
; CHECK: alloca i8*
%arg = alloca { i16*, i32 }, align 8
%2 = bitcast { i16*, i32 }* %0 to i8*
%3 = bitcast { i16*, i32 }* %arg to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
%b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
%pb1 = bitcast i16** %b to i8**
%b1 = load i8*, i8** %pb1
%pb0 = bitcast i16** %b to i63*
%b0 = load i63, i63* %pb0
ret void
}

View File

@ -1,106 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
define void @no_split_on_non_byte_width(i32) {
; This tests that allocas are not split into slices that are not byte width multiple
%arg = alloca i32 , align 8
store i32 %0, i32* %arg
br label %load_i32
load_i32:
; CHECK-LABEL: load_i32:
; CHECK-NOT: bitcast {{.*}} to i1
; CHECK-NOT: zext i1
%r0 = load i32, i32* %arg
br label %load_i1
load_i1:
; CHECK-LABEL: load_i1:
; CHECK: bitcast {{.*}} to i1
%p1 = bitcast i32* %arg to i1*
%t1 = load i1, i1* %p1
ret void
}
; PR18726: Check that we use memcpy and memset to fill out padding when we have
; a slice with a simple single type whose store size is smaller than the slice
; size.
%union.Foo = type { x86_fp80, i64, i64 }
@foo_copy_source = external constant %union.Foo
@i64_sink = global i64 0
define void @memcpy_fp80_padding() {
%x = alloca %union.Foo
; Copy from a global.
%x_i8 = bitcast %union.Foo* %x to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i32 16, i1 false)
; Access a slice of the alloca to trigger SROA.
%mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
%elt = load i64, i64* %mid_p
store i64 %elt, i64* @i64_sink
ret void
}
; CHECK-LABEL: define void @memcpy_fp80_padding
; CHECK: alloca x86_fp80
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
; CHECK: load i64, i64* getelementptr inbounds (%union.Foo, %union.Foo* @foo_copy_source, i64 0, i32 1)
; CHECK: load i64, i64* getelementptr inbounds (%union.Foo, %union.Foo* @foo_copy_source, i64 0, i32 2)
define void @memset_fp80_padding() {
%x = alloca %union.Foo
; Set to all ones.
%x_i8 = bitcast %union.Foo* %x to i8*
call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i32 16, i1 false)
; Access a slice of the alloca to trigger SROA.
%mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
%elt = load i64, i64* %mid_p
store i64 %elt, i64* @i64_sink
ret void
}
; CHECK-LABEL: define void @memset_fp80_padding
; CHECK: alloca x86_fp80
; CHECK: call void @llvm.memset.p0i8.i32(i8* %{{.*}}, i8 -1, i32 16, i32 16, i1 false)
; CHECK: store i64 -1, i64* @i64_sink
%S.vec3float = type { float, float, float }
%U.vec3float = type { <4 x float> }
declare i32 @memcpy_vec3float_helper(%S.vec3float*)
define i32 @memcpy_vec3float_widening(%S.vec3float* %x) {
; CHECK-LABEL: @memcpy_vec3float_widening(
; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
; vector store, hence accidentally putting gibberish onto the stack.
entry:
; Create a temporary variable %tmp1 and copy %x[0] into it
%tmp1 = alloca %S.vec3float, align 4
%0 = bitcast %S.vec3float* %tmp1 to i8*
%1 = bitcast %S.vec3float* %x to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i32 4, i1 false)
; The following block does nothing; but appears to confuse SROA
%unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
%unused2 = getelementptr inbounds %U.vec3float, %U.vec3float* %unused1, i32 0, i32 0
%unused3 = load <4 x float>, <4 x float>* %unused2, align 1
; Create a second temporary and copy %tmp1 into it
%tmp2 = alloca %S.vec3float, align 4
%2 = bitcast %S.vec3float* %tmp2 to i8*
%3 = bitcast %S.vec3float* %tmp1 to i8*
; CHECK: alloca
; CHECK-NOT: store <4 x float>
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i32 4, i1 false)
%result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
ret i32 %result
; CHECK: ret i32 %result
}

View File

@ -1,53 +0,0 @@
; RUN: opt < %s -sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
; CHECK-LABEL: @vector_ptrtoint
%a = alloca {<2 x i32*>, <2 x i32*>}
; CHECK-NOT: alloca
store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
; CHECK-NOT: store
%cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
%vec = load <4 x i64>, <4 x i64>* %cast
; CHECK-NOT: load
; CHECK: ptrtoint
ret <4 x i64> %vec
}
define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
; CHECK-LABEL: @vector_inttoptr
%a = alloca {<2 x i64>, <2 x i64>}
; CHECK-NOT: alloca
store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
; CHECK-NOT: store
%cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
%vec = load <4 x i32*>, <4 x i32*>* %cast
; CHECK-NOT: load
; CHECK: inttoptr
ret <4 x i32*> %vec
}
define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
; CHECK-LABEL: @vector_ptrtointbitcast
%a = alloca {<1 x i32*>, <1 x i32*>}
; CHECK-NOT: alloca
store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
; CHECK-NOT: store
%cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
%vec = load <2 x i64>, <2 x i64>* %cast
; CHECK-NOT: load
; CHECK: ptrtoint
; CHECK: bitcast
; CHECK: ptrtoint
; CHECK: bitcast
ret <2 x i64> %vec
}

View File

@ -1,31 +0,0 @@
; RUN: opt -sroa -S < %s | FileCheck %s
target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64"
; Function Attrs: nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
; Function Attrs: nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
; CHECK: @wombat
; CHECK-NOT: alloca
; CHECK: ret void
define void @wombat(<4 x float> %arg1) {
bb:
%tmp = alloca <4 x float>, align 16
%tmp8 = bitcast <4 x float>* %tmp to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp8)
store <4 x float> %arg1, <4 x float>* %tmp, align 16
%tmp17 = bitcast <4 x float>* %tmp to <3 x float>*
%tmp18 = load <3 x float>, <3 x float>* %tmp17
%tmp20 = bitcast <4 x float>* %tmp to i8*
call void @llvm.lifetime.end.p0i8(i64 16, i8* %tmp20)
call void @wombat3(<3 x float> %tmp18)
ret void
}
; Function Attrs: nounwind
declare void @wombat3(<3 x float>) #0
attributes #0 = { nounwind }

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More