You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,62 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
|
||||
|
||||
define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
|
||||
; CHECK-LABEL: @i64_simplified(
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
|
||||
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
|
||||
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
|
||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
|
||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX3]], align 8
|
||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
|
||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
|
||||
|
||||
%t0 = load i64, i64* %ld, align 8
|
||||
%t1 = load i64, i64* %arrayidx1, align 8
|
||||
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
|
||||
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
|
||||
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
|
||||
|
||||
store i64 %t0, i64* %st, align 8
|
||||
store i64 %t1, i64* %arrayidx3, align 8
|
||||
store i64 %t0, i64* %arrayidx4, align 8
|
||||
store i64 %t1, i64* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
|
||||
; CHECK-LABEL: @i64_simplifiedi_reversed(
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
|
||||
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
|
||||
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
|
||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ST]], align 8
|
||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
|
||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX4]], align 8
|
||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX5]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
|
||||
|
||||
%t0 = load i64, i64* %ld, align 8
|
||||
%t1 = load i64, i64* %arrayidx1, align 8
|
||||
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
|
||||
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
|
||||
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
|
||||
|
||||
store i64 %t1, i64* %st, align 8
|
||||
store i64 %t0, i64* %arrayidx3, align 8
|
||||
store i64 %t1, i64* %arrayidx4, align 8
|
||||
store i64 %t0, i64* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
@ -1,98 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S -mcpu=corei7 | FileCheck %s
|
||||
|
||||
define i32 @main() {
|
||||
; CHECK-LABEL: @main(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i32>, align 32
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i32>* [[TMP]] to [8 x i32]*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32>* [[TMP]] to i8*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[TMP]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 6
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 5
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[TMP1]], i64 0, i64 7
|
||||
; CHECK-NEXT: store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, <8 x i32>* [[TMP]], align 32
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP2]] to i8*
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 32
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[TMP14]], [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP15]] to i32
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP6]], align 8
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[TMP18]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], i32 2, i32 [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = icmp slt i32 [[TMP22]], [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 3, i32 [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP8]], align 16
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = icmp slt i32 [[TMP26]], [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], i32 4, i32 [[TMP25]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = icmp slt i32 [[TMP30]], [[TMP28]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP28]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 5, i32 [[TMP29]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP9]], align 8
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[TMP34]], [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP35]], i32 6, i32 [[TMP33]]
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = icmp slt i32 [[TMP38]], [[TMP36]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 7, i32 [[TMP37]]
|
||||
; CHECK-NEXT: store i32 [[TMP40]], i32* [[TMP2]], align 4
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
bb:
|
||||
%tmp = alloca <8 x i32>, align 32
|
||||
%tmp1 = bitcast <8 x i32>* %tmp to [8 x i32]*
|
||||
%tmp2 = alloca i32, align 4
|
||||
%tmp3 = bitcast <8 x i32>* %tmp to i8*
|
||||
%tmp4 = getelementptr inbounds <8 x i32>, <8 x i32>* %tmp, i64 0, i64 0
|
||||
%tmp5 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 1
|
||||
%tmp6 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 2
|
||||
%tmp7 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 3
|
||||
%tmp8 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 4
|
||||
%tmp9 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 6
|
||||
%tmp10 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 5
|
||||
%tmp11 = getelementptr inbounds [8 x i32], [8 x i32]* %tmp1, i64 0, i64 7
|
||||
store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, <8 x i32>* %tmp, align 32
|
||||
%tmp12 = bitcast i32* %tmp2 to i8*
|
||||
%tmp13 = load i32, i32* %tmp4, align 32
|
||||
%tmp14 = load i32, i32* %tmp5, align 4
|
||||
%tmp15 = icmp slt i32 %tmp14, %tmp13
|
||||
%tmp16 = select i1 %tmp15, i32 %tmp14, i32 %tmp13
|
||||
%tmp17 = zext i1 %tmp15 to i32
|
||||
%tmp18 = load i32, i32* %tmp6, align 8
|
||||
%tmp19 = icmp slt i32 %tmp18, %tmp16
|
||||
%tmp20 = select i1 %tmp19, i32 %tmp18, i32 %tmp16
|
||||
%tmp21 = select i1 %tmp19, i32 2, i32 %tmp16
|
||||
%tmp22 = load i32, i32* %tmp7, align 4
|
||||
%tmp23 = icmp slt i32 %tmp22, %tmp20
|
||||
%tmp24 = select i1 %tmp23, i32 %tmp22, i32 %tmp20
|
||||
%tmp25 = select i1 %tmp23, i32 3, i32 %tmp21
|
||||
%tmp26 = load i32, i32* %tmp8, align 16
|
||||
%tmp27 = icmp slt i32 %tmp26, %tmp24
|
||||
%tmp28 = select i1 %tmp27, i32 %tmp26, i32 %tmp24
|
||||
%tmp29 = select i1 %tmp27, i32 4, i32 %tmp25
|
||||
%tmp30 = load i32, i32* %tmp10, align 4
|
||||
%tmp31 = icmp slt i32 %tmp30, %tmp28
|
||||
%tmp32 = select i1 %tmp31, i32 %tmp30, i32 %tmp28
|
||||
%tmp33 = select i1 %tmp31, i32 5, i32 %tmp29
|
||||
%tmp34 = load i32, i32* %tmp9, align 8
|
||||
%tmp35 = icmp slt i32 %tmp34, %tmp32
|
||||
%tmp36 = select i1 %tmp35, i32 %tmp34, i32 %tmp32
|
||||
%tmp37 = select i1 %tmp35, i32 6, i32 %tmp33
|
||||
%tmp38 = load i32, i32* %tmp11, align 4
|
||||
%tmp39 = icmp slt i32 %tmp38, %tmp36
|
||||
%tmp40 = select i1 %tmp39, i32 7, i32 %tmp37
|
||||
store i32 %tmp40, i32* %tmp2, align 4
|
||||
ret i32 0
|
||||
}
|
||||
|
@ -1,74 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
|
||||
define void @mainTest(i32* %ptr) #0 {
|
||||
; CHECK-LABEL: @mainTest(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA5:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], undef
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], undef
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], undef
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
|
||||
; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
|
||||
; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
|
||||
; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef
|
||||
; CHECK-NEXT: br label [[LOOP]]
|
||||
; CHECK: bail_out:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32* %ptr, null
|
||||
br i1 %cmp, label %loop, label %bail_out
|
||||
|
||||
loop:
|
||||
%dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ]
|
||||
%0 = load i32, i32 * %ptr , align 4
|
||||
%1 = mul i32 %0, %0
|
||||
%2 = add i32 1, %1
|
||||
%3 = getelementptr inbounds i32, i32 * %ptr, i64 1
|
||||
%4 = load i32, i32 * %3 , align 4
|
||||
%5 = mul i32 %4, %4
|
||||
%6 = add i32 %2, %4
|
||||
%7 = add i32 %6, %5
|
||||
%8 = getelementptr inbounds i32, i32 *%ptr, i64 2
|
||||
%9 = load i32, i32 * %8 , align 4
|
||||
%10 = mul i32 %9, %9
|
||||
%11 = add i32 %7, %9
|
||||
%12 = add i32 %11, %10
|
||||
%13 = sext i32 %9 to i64
|
||||
%14 = getelementptr inbounds i32, i32 *%ptr, i64 3
|
||||
%15 = load i32, i32 * %14 , align 4
|
||||
%16 = mul i32 %15, %15
|
||||
%17 = add i32 %12, %15
|
||||
%18 = add i32 %17, %16
|
||||
br label %loop
|
||||
|
||||
bail_out:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-cpu"="westmere" }
|
||||
|
@ -1,64 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
|
||||
define void @test() #0 {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 3, i64 2, i64 1, i64 0>, [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32, i64 32, i64 32, i64 32>
|
||||
; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef
|
||||
; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef
|
||||
; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0
|
||||
; CHECK-NEXT: [[JOIN:%.*]] = add i64 undef, [[ZSUM]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
|
||||
; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
|
||||
; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef
|
||||
; CHECK-NEXT: br label [[LOOP]]
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%dummy_phi = phi i64 [ 1, %entry ], [ %last, %loop ]
|
||||
%0 = phi i64 [ 2, %entry ], [ %fork, %loop ]
|
||||
%inc1 = add i64 %0, 1
|
||||
%inc2 = add i64 %0, 2
|
||||
%inc11 = add i64 1, %inc1
|
||||
%exact1 = ashr exact i64 %inc11, 32
|
||||
%inc3 = add i64 %0, 3
|
||||
%dummy_add = add i16 0, 0
|
||||
%inc12 = add i64 1, %inc2
|
||||
%exact2 = ashr exact i64 %inc12, 32
|
||||
%dummy_shl = shl i64 %inc3, 32
|
||||
%inc13 = add i64 1, %inc3
|
||||
%exact3 = ashr exact i64 %inc13, 32
|
||||
%fork = add i64 %0, 0
|
||||
%sum1 = add i64 %exact3, %exact2
|
||||
%sum2 = add i64 %sum1, %exact1
|
||||
%zsum = add i64 %sum2, 0
|
||||
%sext22 = add i64 1, %fork
|
||||
%exact4 = ashr exact i64 %sext22, 32
|
||||
%join = add i64 %fork, %zsum
|
||||
%last = add i64 %join, %exact4
|
||||
br label %loop
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s
|
||||
|
||||
@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
|
||||
|
||||
define { i64, i64 } @patatino(double %arg) {
|
||||
; CHECK-LABEL: @patatino(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1
|
||||
; CHECK-NEXT: ret { i64, i64 } [[TMP17]]
|
||||
;
|
||||
bb:
|
||||
%tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
|
||||
%tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
|
||||
%tmp2 = fmul double %tmp1, %arg
|
||||
%tmp3 = fadd double %tmp, %tmp2
|
||||
%tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
|
||||
%tmp5 = fadd double %tmp4, %tmp3
|
||||
%tmp6 = fptosi double %tmp5 to i32
|
||||
%tmp7 = sext i32 %tmp6 to i64
|
||||
%tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
|
||||
%tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
|
||||
%tmp10 = fmul double %tmp9, %arg
|
||||
%tmp11 = fadd double %tmp8, %tmp10
|
||||
%tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
|
||||
%tmp13 = fadd double %tmp12, %tmp11
|
||||
%tmp14 = fptosi double %tmp13 to i32
|
||||
%tmp15 = sext i32 %tmp14 to i64
|
||||
%tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0
|
||||
%tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1
|
||||
ret { i64, i64 } %tmp17
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer < %s -S -o - -mtriple=x86_64-apple-macosx10.10.0 -mcpu=core2 | FileCheck %s
|
||||
|
||||
define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
|
||||
; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
|
||||
; CHECK-NEXT: [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32
|
||||
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5
|
||||
; CHECK-NEXT: [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32
|
||||
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = extractelement <16 x half> undef, i32 4
|
||||
%conv.i.4.i = fpext half %0 to float
|
||||
%1 = bitcast float %conv.i.4.i to i32
|
||||
%vecins.i.4.i = insertelement <8 x i32> undef, i32 %1, i32 4
|
||||
%2 = extractelement <16 x half> undef, i32 5
|
||||
%conv.i.5.i = fpext half %2 to float
|
||||
%3 = bitcast float %conv.i.5.i to i32
|
||||
%vecins.i.5.i = insertelement <8 x i32> %vecins.i.4.i, i32 %3, i32 5
|
||||
ret void
|
||||
}
|
@ -1,314 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@b = common global [4 x i32] zeroinitializer, align 16
|
||||
@c = common global [4 x i32] zeroinitializer, align 16
|
||||
@d = common global [4 x i32] zeroinitializer, align 16
|
||||
@e = common global [4 x i32] zeroinitializer, align 16
|
||||
@a = common global [4 x i32] zeroinitializer, align 16
|
||||
@fb = common global [4 x float] zeroinitializer, align 16
|
||||
@fc = common global [4 x float] zeroinitializer, align 16
|
||||
@fa = common global [4 x float] zeroinitializer, align 16
|
||||
@fd = common global [4 x float] zeroinitializer, align 16
|
||||
|
||||
; CHECK-LABEL: @addsub
|
||||
; CHECK: %5 = add nsw <4 x i32> %3, %4
|
||||
; CHECK: %6 = add nsw <4 x i32> %2, %5
|
||||
; CHECK: %7 = sub nsw <4 x i32> %2, %5
|
||||
; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @addsub() #0 {
|
||||
entry:
|
||||
%0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
|
||||
%1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
|
||||
%add = add nsw i32 %0, %1
|
||||
%2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
|
||||
%3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
|
||||
%add1 = add nsw i32 %2, %3
|
||||
%add2 = add nsw i32 %add, %add1
|
||||
store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
|
||||
%4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
|
||||
%5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
|
||||
%add3 = add nsw i32 %4, %5
|
||||
%6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
|
||||
%7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
|
||||
%add4 = add nsw i32 %6, %7
|
||||
%sub = sub nsw i32 %add3, %add4
|
||||
store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
|
||||
%8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
|
||||
%9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
|
||||
%add5 = add nsw i32 %8, %9
|
||||
%10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
|
||||
%11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
|
||||
%add6 = add nsw i32 %10, %11
|
||||
%add7 = add nsw i32 %add5, %add6
|
||||
store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
|
||||
%12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
|
||||
%13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
|
||||
%add8 = add nsw i32 %12, %13
|
||||
%14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
|
||||
%15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
|
||||
%add9 = add nsw i32 %14, %15
|
||||
%sub10 = sub nsw i32 %add8, %add9
|
||||
store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @subadd
|
||||
; CHECK: %5 = add nsw <4 x i32> %3, %4
|
||||
; CHECK: %6 = sub nsw <4 x i32> %2, %5
|
||||
; CHECK: %7 = add nsw <4 x i32> %2, %5
|
||||
; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @subadd() #0 {
|
||||
entry:
|
||||
%0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
|
||||
%1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
|
||||
%add = add nsw i32 %0, %1
|
||||
%2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
|
||||
%3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
|
||||
%add1 = add nsw i32 %2, %3
|
||||
%sub = sub nsw i32 %add, %add1
|
||||
store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
|
||||
%4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
|
||||
%5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
|
||||
%add2 = add nsw i32 %4, %5
|
||||
%6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
|
||||
%7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
|
||||
%add3 = add nsw i32 %6, %7
|
||||
%add4 = add nsw i32 %add2, %add3
|
||||
store i32 %add4, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
|
||||
%8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
|
||||
%9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
|
||||
%add5 = add nsw i32 %8, %9
|
||||
%10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
|
||||
%11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
|
||||
%add6 = add nsw i32 %10, %11
|
||||
%sub7 = sub nsw i32 %add5, %add6
|
||||
store i32 %sub7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
|
||||
%12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
|
||||
%13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
|
||||
%add8 = add nsw i32 %12, %13
|
||||
%14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
|
||||
%15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
|
||||
%add9 = add nsw i32 %14, %15
|
||||
%add10 = add nsw i32 %add8, %add9
|
||||
store i32 %add10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @faddfsub
|
||||
; CHECK: %2 = fadd <4 x float> %0, %1
|
||||
; CHECK: %3 = fsub <4 x float> %0, %1
|
||||
; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @faddfsub() #0 {
|
||||
entry:
|
||||
%0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
%add = fadd float %0, %1
|
||||
store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
%2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
%3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
%sub = fsub float %2, %3
|
||||
store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
%4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
%5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
%add1 = fadd float %4, %5
|
||||
store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
%6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
%7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
%sub2 = fsub float %6, %7
|
||||
store float %sub2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fsubfadd
|
||||
; CHECK: %2 = fsub <4 x float> %0, %1
|
||||
; CHECK: %3 = fadd <4 x float> %0, %1
|
||||
; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @fsubfadd() #0 {
|
||||
entry:
|
||||
%0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
%sub = fsub float %0, %1
|
||||
store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
%2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
%3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
%add = fadd float %2, %3
|
||||
store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
%4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
%5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
%sub1 = fsub float %4, %5
|
||||
store float %sub1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
%6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
%7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
%add2 = fadd float %6, %7
|
||||
store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @No_faddfsub
|
||||
; CHECK-NOT: fadd <4 x float>
|
||||
; CHECK-NOT: fsub <4 x float>
|
||||
; CHECK-NOT: shufflevector
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @No_faddfsub() #0 {
|
||||
entry:
|
||||
%0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
%add = fadd float %0, %1
|
||||
store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
%2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
%3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
%add1 = fadd float %2, %3
|
||||
store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
%4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
%5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
%add2 = fadd float %4, %5
|
||||
store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
%6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
%7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
%sub = fsub float %6, %7
|
||||
store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check vectorization of following code for float data type-
|
||||
; fc[0] = fb[0]+fa[0]; //swapped fb and fa
|
||||
; fc[1] = fa[1]-fb[1];
|
||||
; fc[2] = fa[2]+fb[2];
|
||||
; fc[3] = fa[3]-fb[3];
|
||||
|
||||
; CHECK-LABEL: @reorder_alt
|
||||
; CHECK: %3 = fadd <4 x float> %1, %2
|
||||
; CHECK: %4 = fsub <4 x float> %1, %2
|
||||
; CHECK: %5 = shufflevector <4 x float> %3, <4 x float> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
define void @reorder_alt() #0 {
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
%2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
%3 = fadd float %1, %2
|
||||
store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
%4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
%5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
%6 = fsub float %4, %5
|
||||
store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
%7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
%8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
%9 = fadd float %7, %8
|
||||
store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
%10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
%11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
%12 = fsub float %10, %11
|
||||
store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check vectorization of following code for float data type-
|
||||
; fc[0] = fa[0]+(fb[0]-fd[0]);
|
||||
; fc[1] = fa[1]-(fb[1]+fd[1]);
|
||||
; fc[2] = fa[2]+(fb[2]-fd[2]);
|
||||
; fc[3] = fa[3]-(fd[3]+fb[3]); //swapped fd and fb
|
||||
|
||||
; CHECK-LABEL: @reorder_alt_subTree
|
||||
; CHECK: %4 = fsub <4 x float> %3, %2
|
||||
; CHECK: %5 = fadd <4 x float> %3, %2
|
||||
; CHECK: %6 = shufflevector <4 x float> %4, <4 x float> %5, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK: %7 = fadd <4 x float> %1, %6
|
||||
; CHECK: %8 = fsub <4 x float> %1, %6
|
||||
; CHECK: %9 = shufflevector <4 x float> %7, <4 x float> %8, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
define void @reorder_alt_subTree() #0 {
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
%2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
%3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 0), align 4
|
||||
%4 = fsub float %2, %3
|
||||
%5 = fadd float %1, %4
|
||||
store float %5, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
%6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
%7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
%8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 1), align 4
|
||||
%9 = fadd float %7, %8
|
||||
%10 = fsub float %6, %9
|
||||
store float %10, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
%11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
%12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
%13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 2), align 4
|
||||
%14 = fsub float %12, %13
|
||||
%15 = fadd float %11, %14
|
||||
store float %15, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
%16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
%17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 3), align 4
|
||||
%18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
%19 = fadd float %17, %18
|
||||
%20 = fsub float %16, %19
|
||||
store float %20, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check vectorization of following code for double data type-
|
||||
; c[0] = (a[0]+b[0])-d[0];
|
||||
; c[1] = d[1]+(a[1]+b[1]); //swapped d[1] and (a[1]+b[1])
|
||||
|
||||
; CHECK-LABEL: @reorder_alt_rightsubTree
|
||||
; CHECK: fadd <2 x double>
|
||||
; CHECK: fsub <2 x double>
|
||||
; CHECK: shufflevector <2 x double>
|
||||
define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) {
|
||||
%1 = load double, double* %a
|
||||
%2 = load double, double* %b
|
||||
%3 = fadd double %1, %2
|
||||
%4 = load double, double* %d
|
||||
%5 = fsub double %3, %4
|
||||
store double %5, double* %c
|
||||
%6 = getelementptr inbounds double, double* %d, i64 1
|
||||
%7 = load double, double* %6
|
||||
%8 = getelementptr inbounds double, double* %a, i64 1
|
||||
%9 = load double, double* %8
|
||||
%10 = getelementptr inbounds double, double* %b, i64 1
|
||||
%11 = load double, double* %10
|
||||
%12 = fadd double %9, %11
|
||||
%13 = fadd double %7, %12
|
||||
%14 = getelementptr inbounds double, double* %c, i64 1
|
||||
store double %13, double* %14
|
||||
ret void
|
||||
}
|
||||
|
||||
; Dont vectorization of following code for float data type as sub is not commutative-
|
||||
; fc[0] = fb[0]+fa[0];
|
||||
; fc[1] = fa[1]-fb[1];
|
||||
; fc[2] = fa[2]+fb[2];
|
||||
; fc[3] = fb[3]-fa[3];
|
||||
; In the above code we can swap the 1st and 2nd operation as fadd is commutative
|
||||
; but not 2nd or 4th as fsub is not commutative.
|
||||
|
||||
; CHECK-LABEL: @no_vec_shuff_reorder
|
||||
; CHECK-NOT: fadd <4 x float>
|
||||
; CHECK-NOT: fsub <4 x float>
|
||||
; CHECK-NOT: shufflevector
|
||||
define void @no_vec_shuff_reorder() #0 {
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
%2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
%3 = fadd float %1, %2
|
||||
store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
%4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
%5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
%6 = fsub float %4, %5
|
||||
store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
%7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
%8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
%9 = fadd float %7, %8
|
||||
store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
%10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
%11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
%12 = fsub float %10, %11
|
||||
store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,19 +0,0 @@
|
||||
; RUN: opt -S -mtriple=x86_64-unknown-linux -mcpu=corei7 -slp-vectorizer < %s | FileCheck %s
|
||||
|
||||
%struct.S = type { i8*, i8* }
|
||||
|
||||
@kS0 = common global %struct.S zeroinitializer, align 8
|
||||
|
||||
define { i64, i64 } @getS() {
|
||||
entry:
|
||||
%0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
|
||||
%1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
|
||||
%2 = insertvalue { i64, i64 } undef, i64 %0, 0
|
||||
%3 = insertvalue { i64, i64 } %2, i64 %1, 1
|
||||
ret { i64, i64 } %3
|
||||
}
|
||||
|
||||
; CHECK: load i64
|
||||
; CHECK-NOT: load <2 x i64>
|
||||
; CHECK-NOT: extractelement
|
||||
|
@ -1,55 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; Simple 3-pair chain with loads and stores
|
||||
; CHECK-LABEL: @test1
|
||||
define void @test1(double* %a, double* %b, double* %c) {
|
||||
entry:
|
||||
%agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
|
||||
; CHECK: %[[V0:[0-9]+]] = load <2 x double>, <2 x double>* %[[V2:[0-9]+]], align 8
|
||||
%i0 = load double, double* %a
|
||||
%i1 = load double, double* %b
|
||||
%mul = fmul double %i0, %i1
|
||||
%store1 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
|
||||
%store2 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
|
||||
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
|
||||
%i3 = load double, double* %arrayidx3, align 8
|
||||
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
|
||||
%i4 = load double, double* %arrayidx4, align 8
|
||||
%mul5 = fmul double %i3, %i4
|
||||
; CHECK: store <2 x double> %[[V1:[0-9]+]], <2 x double>* %[[V2:[0-9]+]], align 8
|
||||
store double %mul, double* %store1
|
||||
store double %mul5, double* %store2, align 16
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
; Float has 4 byte abi alignment on x86_64. We must use the alignmnet of the
|
||||
; value being loaded/stored not the alignment of the pointer type.
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK-NOT: align 8
|
||||
; CHECK: load <4 x float>{{.*}}, align 4
|
||||
; CHECK: store <4 x float>{{.*}}, align 4
|
||||
; CHECK: ret
|
||||
|
||||
define void @test2(float * %a, float * %b) {
|
||||
entry:
|
||||
%l0 = load float, float* %a
|
||||
%a1 = getelementptr inbounds float, float* %a, i64 1
|
||||
%l1 = load float, float* %a1
|
||||
%a2 = getelementptr inbounds float, float* %a, i64 2
|
||||
%l2 = load float, float* %a2
|
||||
%a3 = getelementptr inbounds float, float* %a, i64 3
|
||||
%l3 = load float, float* %a3
|
||||
store float %l0, float* %b
|
||||
%b1 = getelementptr inbounds float, float* %b, i64 1
|
||||
store float %l1, float* %b1
|
||||
%b2 = getelementptr inbounds float, float* %b, i64 2
|
||||
store float %l2, float* %b2
|
||||
%b3 = getelementptr inbounds float, float* %b, i64 3
|
||||
store float %l3, float* %b3
|
||||
ret void
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,31 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@x = global [4 x i32] zeroinitializer, align 16
|
||||
@a = global [4 x i32] zeroinitializer, align 16
|
||||
|
||||
; The SLPVectorizer should not vectorize atomic stores and it should not
|
||||
; schedule regular stores around atomic stores.
|
||||
|
||||
; CHECK-LABEL: test
|
||||
; CHECK: store i32
|
||||
; CHECK: store atomic i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store atomic i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store atomic i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store atomic i32
|
||||
define void @test() {
|
||||
entry:
|
||||
store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16
|
||||
store atomic i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 0) release, align 16
|
||||
store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4
|
||||
store atomic i32 1, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 1) release, align 4
|
||||
store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8
|
||||
store atomic i32 2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 2) release, align 8
|
||||
store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4
|
||||
store atomic i32 3, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 3) release, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -1,76 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
|
||||
; Ensure we can handle x86_mmx values which are primitive and can be bitcast
|
||||
; with integer types but can't be put into a vector.
|
||||
;
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: store i64
|
||||
; CHECK: store i64
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
%a.cast = bitcast x86_mmx %a to i64
|
||||
%b.cast = bitcast x86_mmx %b to i64
|
||||
%a.and = and i64 %a.cast, 42
|
||||
%b.and = and i64 %b.cast, 42
|
||||
%gep = getelementptr i64, i64* %ptr, i32 1
|
||||
store i64 %a.and, i64* %ptr
|
||||
store i64 %b.and, i64* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test2(x86_mmx %a, x86_mmx %b) {
|
||||
; Same as @test1 but using phi-input vectorization instead of store
|
||||
; vectorization.
|
||||
;
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: and i64
|
||||
; CHECK: and i64
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
br i1 undef, label %if.then, label %exit
|
||||
|
||||
if.then:
|
||||
%a.cast = bitcast x86_mmx %a to i64
|
||||
%b.cast = bitcast x86_mmx %b to i64
|
||||
%a.and = and i64 %a.cast, 42
|
||||
%b.and = and i64 %b.cast, 42
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%a.phi = phi i64 [ 0, %entry ], [ %a.and, %if.then ]
|
||||
%b.phi = phi i64 [ 0, %entry ], [ %b.and, %if.then ]
|
||||
tail call void @f(i64 %a.phi, i64 %b.phi)
|
||||
ret void
|
||||
}
|
||||
|
||||
define i8 @test3(i8 *%addr) {
|
||||
; Check that we do not vectorize types that are padded to a bigger ones.
|
||||
;
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK-NOT: <4 x i2>
|
||||
; CHECK: ret i8
|
||||
entry:
|
||||
%a = bitcast i8* %addr to i2*
|
||||
%a0 = getelementptr inbounds i2, i2* %a, i64 0
|
||||
%a1 = getelementptr inbounds i2, i2* %a, i64 1
|
||||
%a2 = getelementptr inbounds i2, i2* %a, i64 2
|
||||
%a3 = getelementptr inbounds i2, i2* %a, i64 3
|
||||
%l0 = load i2, i2* %a0, align 1
|
||||
%l1 = load i2, i2* %a1, align 1
|
||||
%l2 = load i2, i2* %a2, align 1
|
||||
%l3 = load i2, i2* %a3, align 1
|
||||
br label %bb1
|
||||
bb1: ; preds = %entry
|
||||
%p0 = phi i2 [ %l0, %entry ]
|
||||
%p1 = phi i2 [ %l1, %entry ]
|
||||
%p2 = phi i2 [ %l2, %entry ]
|
||||
%p3 = phi i2 [ %l3, %entry ]
|
||||
%r = zext i2 %p2 to i8
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
declare void @f(i64, i64)
|
@ -1,32 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
;CHECK-LABEL: @foo(
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: ret
|
||||
define i32 @foo(i32* nocapture %A, i32 %n) {
|
||||
entry:
|
||||
%call = tail call i32 (...) @bar() #2
|
||||
%mul = mul nsw i32 %n, 5
|
||||
%add = add nsw i32 %mul, 9
|
||||
store i32 %add, i32* %A, align 4
|
||||
%mul1 = mul nsw i32 %n, 9
|
||||
%add2 = add nsw i32 %mul1, 9
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
|
||||
store i32 %add2, i32* %arrayidx3, align 4
|
||||
%mul4 = shl i32 %n, 3
|
||||
%add5 = add nsw i32 %mul4, 9
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %A, i64 2
|
||||
store i32 %add5, i32* %arrayidx6, align 4
|
||||
%mul7 = mul nsw i32 %n, 10
|
||||
%add8 = add nsw i32 %mul7, 9
|
||||
%arrayidx9 = getelementptr inbounds i32, i32* %A, i64 3
|
||||
store i32 %add8, i32* %arrayidx9, align 4
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
; We can still vectorize the stores below.
|
||||
|
||||
declare i32 @bar(...)
|
@ -1,423 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@src64 = common global [4 x i64] zeroinitializer, align 32
|
||||
@dst64 = common global [4 x i64] zeroinitializer, align 32
|
||||
@src32 = common global [8 x i32] zeroinitializer, align 32
|
||||
@dst32 = common global [8 x i32] zeroinitializer, align 32
|
||||
@src16 = common global [16 x i16] zeroinitializer, align 32
|
||||
@dst16 = common global [16 x i16] zeroinitializer, align 32
|
||||
@src8 = common global [32 x i8] zeroinitializer, align 32
|
||||
@dst8 = common global [32 x i8] zeroinitializer, align 32
|
||||
|
||||
declare i64 @llvm.bitreverse.i64(i64)
|
||||
declare i32 @llvm.bitreverse.i32(i32)
|
||||
declare i16 @llvm.bitreverse.i16(i16)
|
||||
declare i8 @llvm.bitreverse.i8(i8)
|
||||
|
||||
define void @bitreverse_2i64() #0 {
|
||||
; CHECK-LABEL: @bitreverse_2i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
|
||||
%bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0)
|
||||
%bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1)
|
||||
store i64 %bitreverse0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
|
||||
store i64 %bitreverse1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_4i64() #0 {
|
||||
; SSE-LABEL: @bitreverse_4i64(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 4
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2) to <2 x i64>*), align 4
|
||||
; SSE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
|
||||
; SSE-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP2]])
|
||||
; SSE-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 4
|
||||
; SSE-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2) to <2 x i64>*), align 4
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bitreverse_4i64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]])
|
||||
; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
; XOP-LABEL: @bitreverse_4i64(
|
||||
; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
|
||||
; XOP-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]])
|
||||
; XOP-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
|
||||
; XOP-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
|
||||
%ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
|
||||
%ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
|
||||
%bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0)
|
||||
%bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1)
|
||||
%bitreverse2 = call i64 @llvm.bitreverse.i64(i64 %ld2)
|
||||
%bitreverse3 = call i64 @llvm.bitreverse.i64(i64 %ld3)
|
||||
store i64 %bitreverse0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
|
||||
store i64 %bitreverse1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
|
||||
store i64 %bitreverse2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
|
||||
store i64 %bitreverse3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_4i32() #0 {
|
||||
; CHECK-LABEL: @bitreverse_4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]])
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
%ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
%ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
%bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0)
|
||||
%bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1)
|
||||
%bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2)
|
||||
%bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3)
|
||||
store i32 %bitreverse0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
store i32 %bitreverse1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
store i32 %bitreverse2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
store i32 %bitreverse3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_8i32() #0 {
|
||||
; SSE-LABEL: @bitreverse_8i32(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]])
|
||||
; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP2]])
|
||||
; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
|
||||
; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bitreverse_8i32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]])
|
||||
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
; XOP-LABEL: @bitreverse_8i32(
|
||||
; XOP-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; XOP-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]])
|
||||
; XOP-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; XOP-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
%ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
%ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
%ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
%ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
%ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
%ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
%bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0)
|
||||
%bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1)
|
||||
%bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2)
|
||||
%bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3)
|
||||
%bitreverse4 = call i32 @llvm.bitreverse.i32(i32 %ld4)
|
||||
%bitreverse5 = call i32 @llvm.bitreverse.i32(i32 %ld5)
|
||||
%bitreverse6 = call i32 @llvm.bitreverse.i32(i32 %ld6)
|
||||
%bitreverse7 = call i32 @llvm.bitreverse.i32(i32 %ld7)
|
||||
store i32 %bitreverse0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
store i32 %bitreverse1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
store i32 %bitreverse2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
store i32 %bitreverse3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
store i32 %bitreverse4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
store i32 %bitreverse5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
store i32 %bitreverse6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
store i32 %bitreverse7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_8i16() #0 {
|
||||
; CHECK-LABEL: @bitreverse_8i16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]])
|
||||
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
|
||||
%ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
|
||||
%ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
|
||||
%ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
|
||||
%ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
|
||||
%ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
|
||||
%ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
|
||||
%ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
|
||||
%bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0)
|
||||
%bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1)
|
||||
%bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2)
|
||||
%bitreverse3 = call i16 @llvm.bitreverse.i16(i16 %ld3)
|
||||
%bitreverse4 = call i16 @llvm.bitreverse.i16(i16 %ld4)
|
||||
%bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5)
|
||||
%bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6)
|
||||
%bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7)
|
||||
store i16 %bitreverse0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
|
||||
store i16 %bitreverse1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
|
||||
store i16 %bitreverse2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
|
||||
store i16 %bitreverse3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
|
||||
store i16 %bitreverse4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
|
||||
store i16 %bitreverse5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
|
||||
store i16 %bitreverse6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
|
||||
store i16 %bitreverse7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_16i16() #0 {
|
||||
; SSE-LABEL: @bitreverse_16i16(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
|
||||
; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]])
|
||||
; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP2]])
|
||||
; SSE-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; SSE-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bitreverse_16i16(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]])
|
||||
; AVX-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
; XOP-LABEL: @bitreverse_16i16(
|
||||
; XOP-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
|
||||
; XOP-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]])
|
||||
; XOP-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
|
||||
; XOP-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
|
||||
%ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
|
||||
%ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
|
||||
%ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
|
||||
%ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
|
||||
%ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
|
||||
%ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
|
||||
%ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
|
||||
%ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
|
||||
%ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
|
||||
%ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
|
||||
%ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
|
||||
%ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
|
||||
%ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
|
||||
%ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
|
||||
%ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
|
||||
%bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0)
|
||||
%bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1)
|
||||
%bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2)
|
||||
%bitreverse3 = call i16 @llvm.bitreverse.i16(i16 %ld3)
|
||||
%bitreverse4 = call i16 @llvm.bitreverse.i16(i16 %ld4)
|
||||
%bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5)
|
||||
%bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6)
|
||||
%bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7)
|
||||
%bitreverse8 = call i16 @llvm.bitreverse.i16(i16 %ld8)
|
||||
%bitreverse9 = call i16 @llvm.bitreverse.i16(i16 %ld9)
|
||||
%bitreverse10 = call i16 @llvm.bitreverse.i16(i16 %ld10)
|
||||
%bitreverse11 = call i16 @llvm.bitreverse.i16(i16 %ld11)
|
||||
%bitreverse12 = call i16 @llvm.bitreverse.i16(i16 %ld12)
|
||||
%bitreverse13 = call i16 @llvm.bitreverse.i16(i16 %ld13)
|
||||
%bitreverse14 = call i16 @llvm.bitreverse.i16(i16 %ld14)
|
||||
%bitreverse15 = call i16 @llvm.bitreverse.i16(i16 %ld15)
|
||||
store i16 %bitreverse0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
|
||||
store i16 %bitreverse1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
|
||||
store i16 %bitreverse2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
|
||||
store i16 %bitreverse3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
|
||||
store i16 %bitreverse4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
|
||||
store i16 %bitreverse5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
|
||||
store i16 %bitreverse6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
|
||||
store i16 %bitreverse7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
|
||||
store i16 %bitreverse8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
|
||||
store i16 %bitreverse9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
|
||||
store i16 %bitreverse10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
|
||||
store i16 %bitreverse11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
|
||||
store i16 %bitreverse12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
|
||||
store i16 %bitreverse13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
|
||||
store i16 %bitreverse14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
|
||||
store i16 %bitreverse15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_16i8() #0 {
|
||||
; CHECK-LABEL: @bitreverse_16i8(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
|
||||
; CHECK-NEXT: store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
|
||||
%ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
|
||||
%ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
|
||||
%ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
|
||||
%ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
|
||||
%ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
|
||||
%ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
|
||||
%ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
|
||||
%ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
|
||||
%ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
|
||||
%ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
|
||||
%ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
|
||||
%ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
|
||||
%ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
|
||||
%ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
|
||||
%ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
|
||||
%bitreverse0 = call i8 @llvm.bitreverse.i8(i8 %ld0)
|
||||
%bitreverse1 = call i8 @llvm.bitreverse.i8(i8 %ld1)
|
||||
%bitreverse2 = call i8 @llvm.bitreverse.i8(i8 %ld2)
|
||||
%bitreverse3 = call i8 @llvm.bitreverse.i8(i8 %ld3)
|
||||
%bitreverse4 = call i8 @llvm.bitreverse.i8(i8 %ld4)
|
||||
%bitreverse5 = call i8 @llvm.bitreverse.i8(i8 %ld5)
|
||||
%bitreverse6 = call i8 @llvm.bitreverse.i8(i8 %ld6)
|
||||
%bitreverse7 = call i8 @llvm.bitreverse.i8(i8 %ld7)
|
||||
%bitreverse8 = call i8 @llvm.bitreverse.i8(i8 %ld8)
|
||||
%bitreverse9 = call i8 @llvm.bitreverse.i8(i8 %ld9)
|
||||
%bitreverse10 = call i8 @llvm.bitreverse.i8(i8 %ld10)
|
||||
%bitreverse11 = call i8 @llvm.bitreverse.i8(i8 %ld11)
|
||||
%bitreverse12 = call i8 @llvm.bitreverse.i8(i8 %ld12)
|
||||
%bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13)
|
||||
%bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14)
|
||||
%bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15)
|
||||
store i8 %bitreverse0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
|
||||
store i8 %bitreverse1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
|
||||
store i8 %bitreverse2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
|
||||
store i8 %bitreverse3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
|
||||
store i8 %bitreverse4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
|
||||
store i8 %bitreverse5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
|
||||
store i8 %bitreverse6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
|
||||
store i8 %bitreverse7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
|
||||
store i8 %bitreverse8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
|
||||
store i8 %bitreverse9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
|
||||
store i8 %bitreverse10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
|
||||
store i8 %bitreverse11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
|
||||
store i8 %bitreverse12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
|
||||
store i8 %bitreverse13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
|
||||
store i8 %bitreverse14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
|
||||
store i8 %bitreverse15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitreverse_32i8() #0 {
|
||||
; CHECK-LABEL: @bitreverse_32i8(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP2]])
|
||||
; CHECK-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
|
||||
; CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
|
||||
%ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
|
||||
%ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
|
||||
%ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
|
||||
%ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
|
||||
%ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
|
||||
%ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
|
||||
%ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
|
||||
%ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
|
||||
%ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
|
||||
%ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
|
||||
%ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
|
||||
%ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
|
||||
%ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
|
||||
%ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
|
||||
%ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
|
||||
%ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
|
||||
%ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
|
||||
%ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
|
||||
%ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
|
||||
%ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
|
||||
%ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
|
||||
%ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
|
||||
%ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
|
||||
%ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
|
||||
%ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
|
||||
%ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
|
||||
%ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
|
||||
%ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
|
||||
%ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
|
||||
%ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
|
||||
%ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
|
||||
%bitreverse0 = call i8 @llvm.bitreverse.i8(i8 %ld0)
|
||||
%bitreverse1 = call i8 @llvm.bitreverse.i8(i8 %ld1)
|
||||
%bitreverse2 = call i8 @llvm.bitreverse.i8(i8 %ld2)
|
||||
%bitreverse3 = call i8 @llvm.bitreverse.i8(i8 %ld3)
|
||||
%bitreverse4 = call i8 @llvm.bitreverse.i8(i8 %ld4)
|
||||
%bitreverse5 = call i8 @llvm.bitreverse.i8(i8 %ld5)
|
||||
%bitreverse6 = call i8 @llvm.bitreverse.i8(i8 %ld6)
|
||||
%bitreverse7 = call i8 @llvm.bitreverse.i8(i8 %ld7)
|
||||
%bitreverse8 = call i8 @llvm.bitreverse.i8(i8 %ld8)
|
||||
%bitreverse9 = call i8 @llvm.bitreverse.i8(i8 %ld9)
|
||||
%bitreverse10 = call i8 @llvm.bitreverse.i8(i8 %ld10)
|
||||
%bitreverse11 = call i8 @llvm.bitreverse.i8(i8 %ld11)
|
||||
%bitreverse12 = call i8 @llvm.bitreverse.i8(i8 %ld12)
|
||||
%bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13)
|
||||
%bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14)
|
||||
%bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15)
|
||||
%bitreverse16 = call i8 @llvm.bitreverse.i8(i8 %ld16)
|
||||
%bitreverse17 = call i8 @llvm.bitreverse.i8(i8 %ld17)
|
||||
%bitreverse18 = call i8 @llvm.bitreverse.i8(i8 %ld18)
|
||||
%bitreverse19 = call i8 @llvm.bitreverse.i8(i8 %ld19)
|
||||
%bitreverse20 = call i8 @llvm.bitreverse.i8(i8 %ld20)
|
||||
%bitreverse21 = call i8 @llvm.bitreverse.i8(i8 %ld21)
|
||||
%bitreverse22 = call i8 @llvm.bitreverse.i8(i8 %ld22)
|
||||
%bitreverse23 = call i8 @llvm.bitreverse.i8(i8 %ld23)
|
||||
%bitreverse24 = call i8 @llvm.bitreverse.i8(i8 %ld24)
|
||||
%bitreverse25 = call i8 @llvm.bitreverse.i8(i8 %ld25)
|
||||
%bitreverse26 = call i8 @llvm.bitreverse.i8(i8 %ld26)
|
||||
%bitreverse27 = call i8 @llvm.bitreverse.i8(i8 %ld27)
|
||||
%bitreverse28 = call i8 @llvm.bitreverse.i8(i8 %ld28)
|
||||
%bitreverse29 = call i8 @llvm.bitreverse.i8(i8 %ld29)
|
||||
%bitreverse30 = call i8 @llvm.bitreverse.i8(i8 %ld30)
|
||||
%bitreverse31 = call i8 @llvm.bitreverse.i8(i8 %ld31)
|
||||
store i8 %bitreverse0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
|
||||
store i8 %bitreverse1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
|
||||
store i8 %bitreverse2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
|
||||
store i8 %bitreverse3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
|
||||
store i8 %bitreverse4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
|
||||
store i8 %bitreverse5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
|
||||
store i8 %bitreverse6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
|
||||
store i8 %bitreverse7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
|
||||
store i8 %bitreverse8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
|
||||
store i8 %bitreverse9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
|
||||
store i8 %bitreverse10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
|
||||
store i8 %bitreverse11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
|
||||
store i8 %bitreverse12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
|
||||
store i8 %bitreverse13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
|
||||
store i8 %bitreverse14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
|
||||
store i8 %bitreverse15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
|
||||
store i8 %bitreverse16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
|
||||
store i8 %bitreverse17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
|
||||
store i8 %bitreverse18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
|
||||
store i8 %bitreverse19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
|
||||
store i8 %bitreverse20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
|
||||
store i8 %bitreverse21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
|
||||
store i8 %bitreverse22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
|
||||
store i8 %bitreverse23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
|
||||
store i8 %bitreverse24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
|
||||
store i8 %bitreverse25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
|
||||
store i8 %bitreverse26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
|
||||
store i8 %bitreverse27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
|
||||
store i8 %bitreverse28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
|
||||
store i8 %bitreverse29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
|
||||
store i8 %bitreverse30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
|
||||
store i8 %bitreverse31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,166 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86-64-unknown-linux -mcpu=bdver2 -instcombine | FileCheck %s
|
||||
|
||||
define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
|
||||
; CHECK-LABEL: @g(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: ret <2 x i8> [[TMP2]]
|
||||
;
|
||||
%x0 = extractelement <2 x i8> %x, i32 0
|
||||
%y1 = extractelement <2 x i8> %y, i32 1
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%y1y1 = mul i8 %y1, %y1
|
||||
%ins1 = insertelement <2 x i8> undef, i8 %x0x0, i32 0
|
||||
%ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1
|
||||
ret <2 x i8> %ins2
|
||||
}
|
||||
|
||||
define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @h(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
%y1 = extractelement <4 x i8> %y, i32 1
|
||||
%y2 = extractelement <4 x i8> %y, i32 2
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%x3x3 = mul i8 %x3, %x3
|
||||
%y1y1 = mul i8 %y1, %y1
|
||||
%y2y2 = mul i8 %y2, %y2
|
||||
%ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
|
||||
%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
|
||||
%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
|
||||
%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
|
||||
ret <4 x i8> %ins4
|
||||
}
|
||||
|
||||
define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @h_undef(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> undef, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
%y1 = extractelement <4 x i8> %y, i32 1
|
||||
%y2 = extractelement <4 x i8> %y, i32 2
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%x3x3 = mul i8 %x3, %x3
|
||||
%y1y1 = mul i8 %y1, %y1
|
||||
%y2y2 = mul i8 %y2, %y2
|
||||
%ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
|
||||
%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
|
||||
%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
|
||||
%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
|
||||
ret <4 x i8> %ins4
|
||||
}
|
||||
|
||||
define i8 @i(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @i(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i8> [[TMP2]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i8> [[BIN_RDX]], <4 x i8> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i8> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: ret i8 [[TMP3]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
%y1 = extractelement <4 x i8> %y, i32 1
|
||||
%y2 = extractelement <4 x i8> %y, i32 2
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%x3x3 = mul i8 %x3, %x3
|
||||
%y1y1 = mul i8 %y1, %y1
|
||||
%y2y2 = mul i8 %y2, %y2
|
||||
%1 = add i8 %x0x0, %x3x3
|
||||
%2 = add i8 %y1y1, %y2y2
|
||||
%3 = add i8 %1, %2
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
define i8 @j(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @j(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 5>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <2 x i32> <i32 3, i32 6>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i8> [[TMP3]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
|
||||
; CHECK-NEXT: ret i8 [[TMP8]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
%y1 = extractelement <4 x i8> %y, i32 1
|
||||
%y2 = extractelement <4 x i8> %y, i32 2
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%x3x3 = mul i8 %x3, %x3
|
||||
%y1y1 = mul i8 %y1, %y1
|
||||
%y2y2 = mul i8 %y2, %y2
|
||||
%1 = add i8 %x0x0, %x3x3
|
||||
%2 = add i8 %y1y1, %y2y2
|
||||
%3 = sdiv i8 %1, %2
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
define i8 @k(<4 x i8> %x) {
|
||||
; CHECK-LABEL: @k(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
|
||||
; CHECK-NEXT: ret i8 [[TMP8]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
%x1 = extractelement <4 x i8> %x, i32 1
|
||||
%x2 = extractelement <4 x i8> %x, i32 2
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%x3x3 = mul i8 %x3, %x3
|
||||
%x1x1 = mul i8 %x1, %x1
|
||||
%x2x2 = mul i8 %x2, %x2
|
||||
%1 = add i8 %x0x0, %x3x3
|
||||
%2 = add i8 %x1x1, %x2x2
|
||||
%3 = sdiv i8 %1, %2
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
define i8 @k_bb(<4 x i8> %x) {
|
||||
; CHECK-LABEL: @k_bb(
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
|
||||
; CHECK-NEXT: ret i8 [[TMP8]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
br label %bb1
|
||||
bb1:
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
%x1 = extractelement <4 x i8> %x, i32 1
|
||||
%x2 = extractelement <4 x i8> %x, i32 2
|
||||
%x0x0 = mul i8 %x0, %x0
|
||||
%x3x3 = mul i8 %x3, %x3
|
||||
%x1x1 = mul i8 %x1, %x1
|
||||
%x2x2 = mul i8 %x2, %x2
|
||||
%1 = add i8 %x0x0, %x3x3
|
||||
%2 = add i8 %x1x1, %x2x2
|
||||
%3 = sdiv i8 %1, %2
|
||||
ret i8 %3
|
||||
}
|
@ -1,247 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@src64 = common global [4 x i64] zeroinitializer, align 32
|
||||
@dst64 = common global [4 x i64] zeroinitializer, align 32
|
||||
@src32 = common global [8 x i32] zeroinitializer, align 32
|
||||
@dst32 = common global [8 x i32] zeroinitializer, align 32
|
||||
@src16 = common global [16 x i16] zeroinitializer, align 32
|
||||
@dst16 = common global [16 x i16] zeroinitializer, align 32
|
||||
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
|
||||
define void @bswap_2i64() #0 {
|
||||
; SSE-LABEL: @bswap_2i64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]])
|
||||
; SSE-NEXT: [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]])
|
||||
; SSE-NEXT: store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
|
||||
; SSE-NEXT: store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bswap_2i64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
|
||||
; AVX-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
|
||||
%bswap0 = call i64 @llvm.bswap.i64(i64 %ld0)
|
||||
%bswap1 = call i64 @llvm.bswap.i64(i64 %ld1)
|
||||
store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
|
||||
store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_4i64() #0 {
|
||||
; SSE-LABEL: @bswap_4i64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
|
||||
; SSE-NEXT: [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]])
|
||||
; SSE-NEXT: [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]])
|
||||
; SSE-NEXT: [[BSWAP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD2]])
|
||||
; SSE-NEXT: [[BSWAP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD3]])
|
||||
; SSE-NEXT: store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
|
||||
; SSE-NEXT: store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
|
||||
; SSE-NEXT: store i64 [[BSWAP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
|
||||
; SSE-NEXT: store i64 [[BSWAP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bswap_4i64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> [[TMP1]])
|
||||
; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
|
||||
%ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
|
||||
%ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
|
||||
%bswap0 = call i64 @llvm.bswap.i64(i64 %ld0)
|
||||
%bswap1 = call i64 @llvm.bswap.i64(i64 %ld1)
|
||||
%bswap2 = call i64 @llvm.bswap.i64(i64 %ld2)
|
||||
%bswap3 = call i64 @llvm.bswap.i64(i64 %ld3)
|
||||
store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
|
||||
store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
|
||||
store i64 %bswap2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
|
||||
store i64 %bswap3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_4i32() #0 {
|
||||
; CHECK-LABEL: @bswap_4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
%ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
%ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
%bswap0 = call i32 @llvm.bswap.i32(i32 %ld0)
|
||||
%bswap1 = call i32 @llvm.bswap.i32(i32 %ld1)
|
||||
%bswap2 = call i32 @llvm.bswap.i32(i32 %ld2)
|
||||
%bswap3 = call i32 @llvm.bswap.i32(i32 %ld3)
|
||||
store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_8i32() #0 {
|
||||
; SSE-LABEL: @bswap_8i32(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
|
||||
; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP2]])
|
||||
; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
|
||||
; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bswap_8i32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> [[TMP1]])
|
||||
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
%ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
%ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
%ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
%ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
%ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
%ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
%bswap0 = call i32 @llvm.bswap.i32(i32 %ld0)
|
||||
%bswap1 = call i32 @llvm.bswap.i32(i32 %ld1)
|
||||
%bswap2 = call i32 @llvm.bswap.i32(i32 %ld2)
|
||||
%bswap3 = call i32 @llvm.bswap.i32(i32 %ld3)
|
||||
%bswap4 = call i32 @llvm.bswap.i32(i32 %ld4)
|
||||
%bswap5 = call i32 @llvm.bswap.i32(i32 %ld5)
|
||||
%bswap6 = call i32 @llvm.bswap.i32(i32 %ld6)
|
||||
%bswap7 = call i32 @llvm.bswap.i32(i32 %ld7)
|
||||
store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
store i32 %bswap4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
store i32 %bswap5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
store i32 %bswap6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
store i32 %bswap7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_8i16() #0 {
|
||||
; CHECK-LABEL: @bswap_8i16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]])
|
||||
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
|
||||
%ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
|
||||
%ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
|
||||
%ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
|
||||
%ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
|
||||
%ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
|
||||
%ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
|
||||
%ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
|
||||
%bswap0 = call i16 @llvm.bswap.i16(i16 %ld0)
|
||||
%bswap1 = call i16 @llvm.bswap.i16(i16 %ld1)
|
||||
%bswap2 = call i16 @llvm.bswap.i16(i16 %ld2)
|
||||
%bswap3 = call i16 @llvm.bswap.i16(i16 %ld3)
|
||||
%bswap4 = call i16 @llvm.bswap.i16(i16 %ld4)
|
||||
%bswap5 = call i16 @llvm.bswap.i16(i16 %ld5)
|
||||
%bswap6 = call i16 @llvm.bswap.i16(i16 %ld6)
|
||||
%bswap7 = call i16 @llvm.bswap.i16(i16 %ld7)
|
||||
store i16 %bswap0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
|
||||
store i16 %bswap1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
|
||||
store i16 %bswap2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
|
||||
store i16 %bswap3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
|
||||
store i16 %bswap4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
|
||||
store i16 %bswap5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
|
||||
store i16 %bswap6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
|
||||
store i16 %bswap7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_16i16() #0 {
|
||||
; SSE-LABEL: @bswap_16i16(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
|
||||
; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]])
|
||||
; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP2]])
|
||||
; SSE-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; SSE-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @bswap_16i16(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> [[TMP1]])
|
||||
; AVX-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
|
||||
%ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
|
||||
%ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
|
||||
%ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
|
||||
%ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
|
||||
%ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
|
||||
%ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
|
||||
%ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
|
||||
%ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
|
||||
%ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
|
||||
%ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
|
||||
%ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
|
||||
%ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
|
||||
%ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
|
||||
%ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
|
||||
%ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
|
||||
%bswap0 = call i16 @llvm.bswap.i16(i16 %ld0)
|
||||
%bswap1 = call i16 @llvm.bswap.i16(i16 %ld1)
|
||||
%bswap2 = call i16 @llvm.bswap.i16(i16 %ld2)
|
||||
%bswap3 = call i16 @llvm.bswap.i16(i16 %ld3)
|
||||
%bswap4 = call i16 @llvm.bswap.i16(i16 %ld4)
|
||||
%bswap5 = call i16 @llvm.bswap.i16(i16 %ld5)
|
||||
%bswap6 = call i16 @llvm.bswap.i16(i16 %ld6)
|
||||
%bswap7 = call i16 @llvm.bswap.i16(i16 %ld7)
|
||||
%bswap8 = call i16 @llvm.bswap.i16(i16 %ld8)
|
||||
%bswap9 = call i16 @llvm.bswap.i16(i16 %ld9)
|
||||
%bswap10 = call i16 @llvm.bswap.i16(i16 %ld10)
|
||||
%bswap11 = call i16 @llvm.bswap.i16(i16 %ld11)
|
||||
%bswap12 = call i16 @llvm.bswap.i16(i16 %ld12)
|
||||
%bswap13 = call i16 @llvm.bswap.i16(i16 %ld13)
|
||||
%bswap14 = call i16 @llvm.bswap.i16(i16 %ld14)
|
||||
%bswap15 = call i16 @llvm.bswap.i16(i16 %ld15)
|
||||
store i16 %bswap0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
|
||||
store i16 %bswap1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
|
||||
store i16 %bswap2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
|
||||
store i16 %bswap3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
|
||||
store i16 %bswap4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
|
||||
store i16 %bswap5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
|
||||
store i16 %bswap6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
|
||||
store i16 %bswap7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
|
||||
store i16 %bswap8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
|
||||
store i16 %bswap9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
|
||||
store i16 %bswap10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
|
||||
store i16 %bswap11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
|
||||
store i16 %bswap12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
|
||||
store i16 %bswap13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
|
||||
store i16 %bswap14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
|
||||
store i16 %bswap15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -1,176 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
declare double @sin(double)
|
||||
declare double @cos(double)
|
||||
declare double @pow(double, double)
|
||||
declare double @exp2(double)
|
||||
declare double @sqrt(double)
|
||||
declare i64 @round(i64)
|
||||
|
||||
|
||||
define void @sin_libm(double* %a, double* %b) {
|
||||
; CHECK-LABEL: @sin_libm(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* %a, align 8
|
||||
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||||
%a1 = load double, double* %idx1, align 8
|
||||
%sin1 = tail call double @sin(double %a0) nounwind readnone
|
||||
%sin2 = tail call double @sin(double %a1) nounwind readnone
|
||||
store double %sin1, double* %b, align 8
|
||||
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||||
store double %sin2, double* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @cos_libm(double* %a, double* %b) {
|
||||
; CHECK-LABEL: @cos_libm(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* %a, align 8
|
||||
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||||
%a1 = load double, double* %idx1, align 8
|
||||
%cos1 = tail call double @cos(double %a0) nounwind readnone
|
||||
%cos2 = tail call double @cos(double %a1) nounwind readnone
|
||||
store double %cos1, double* %b, align 8
|
||||
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||||
store double %cos2, double* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @pow_libm(double* %a, double* %b) {
|
||||
; CHECK-LABEL: @pow_libm(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* %a, align 8
|
||||
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||||
%a1 = load double, double* %idx1, align 8
|
||||
%pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
|
||||
%pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
|
||||
store double %pow1, double* %b, align 8
|
||||
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||||
store double %pow2, double* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @exp_libm(double* %a, double* %b) {
|
||||
; CHECK-LABEL: @exp_libm(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* %a, align 8
|
||||
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||||
%a1 = load double, double* %idx1, align 8
|
||||
%exp1 = tail call double @exp2(double %a0) nounwind readnone
|
||||
%exp2 = tail call double @exp2(double %a1) nounwind readnone
|
||||
store double %exp1, double* %b, align 8
|
||||
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||||
store double %exp2, double* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
|
||||
; We just need to know that errno is not set (readnone).
|
||||
|
||||
define void @sqrt_libm_no_errno(double* %a, double* %b) {
|
||||
; CHECK-LABEL: @sqrt_libm_no_errno(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* %a, align 8
|
||||
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||||
%a1 = load double, double* %idx1, align 8
|
||||
%sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
|
||||
%sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
|
||||
store double %sqrt1, double* %b, align 8
|
||||
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||||
store double %sqrt2, double* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
|
||||
; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
|
||||
; input would result in a nan output ("On a domain error, the function returns an
|
||||
; implementation-defined value.")
|
||||
|
||||
define void @sqrt_libm_errno(double* %a, double* %b) {
|
||||
; CHECK-LABEL: @sqrt_libm_errno(
|
||||
; CHECK-NEXT: [[A0:%.*]] = load double, double* %a, align 8
|
||||
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* %a, i64 1
|
||||
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDX1]], align 8
|
||||
; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2
|
||||
; CHECK-NEXT: [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2
|
||||
; CHECK-NEXT: store double [[SQRT1]], double* %b, align 8
|
||||
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* %b, i64 1
|
||||
; CHECK-NEXT: store double [[SQRT2]], double* [[IDX2]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* %a, align 8
|
||||
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||||
%a1 = load double, double* %idx1, align 8
|
||||
%sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
|
||||
%sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
|
||||
store double %sqrt1, double* %b, align 8
|
||||
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||||
store double %sqrt2, double* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Negative test case
|
||||
define void @round_custom(i64* %a, i64* %b) {
|
||||
; CHECK-LABEL: @round_custom(
|
||||
; CHECK-NEXT: [[A0:%.*]] = load i64, i64* %a, align 8
|
||||
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i64, i64* %a, i64 1
|
||||
; CHECK-NEXT: [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
|
||||
; CHECK-NEXT: [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3
|
||||
; CHECK-NEXT: [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3
|
||||
; CHECK-NEXT: store i64 [[ROUND1]], i64* %b, align 8
|
||||
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i64, i64* %b, i64 1
|
||||
; CHECK-NEXT: store i64 [[ROUND2]], i64* [[IDX2]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load i64, i64* %a, align 8
|
||||
%idx1 = getelementptr inbounds i64, i64* %a, i64 1
|
||||
%a1 = load i64, i64* %idx1, align 8
|
||||
%round1 = tail call i64 @round(i64 %a0) nounwind readnone
|
||||
%round2 = tail call i64 @round(i64 %a1) nounwind readnone
|
||||
store i64 %round1, i64* %b, align 8
|
||||
%idx2 = getelementptr inbounds i64, i64* %b, i64 1
|
||||
store i64 %round2, i64* %idx2, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) [[ATTR0:#[0-9]+]]
|
||||
; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) [[ATTR0]]
|
||||
; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]]
|
||||
; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]]
|
||||
|
||||
; CHECK: attributes [[ATTR0]] = { nounwind readnone speculatable }
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user