You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,22 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic < %s | FileCheck %s
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=aarch64-apple-ios -mcpu=cyclone < %s | FileCheck %s
|
||||
; Currently disabled for a few subtargets (e.g. Kryo):
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=kryo < %s | FileCheck --check-prefix=NO_SLP %s
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic -slp-min-reg-size=128 < %s | FileCheck --check-prefix=NO_SLP %s
|
||||
|
||||
define void @f(float* %r, float* %w) {
|
||||
%r0 = getelementptr inbounds float, float* %r, i64 0
|
||||
%r1 = getelementptr inbounds float, float* %r, i64 1
|
||||
%f0 = load float, float* %r0
|
||||
%f1 = load float, float* %r1
|
||||
%add0 = fadd float %f0, %f0
|
||||
; CHECK: fadd <2 x float>
|
||||
; NO_SLP: fadd float
|
||||
; NO_SLP: fadd float
|
||||
%add1 = fadd float %f1, %f1
|
||||
%w0 = getelementptr inbounds float, float* %w, i64 0
|
||||
%w1 = getelementptr inbounds float, float* %w, i64 1
|
||||
store float %add0, float* %w0
|
||||
store float %add1, float* %w1
|
||||
ret void
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
%structA = type { [2 x float] }
|
||||
|
||||
define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
||||
; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
||||
; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
|
||||
; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
|
||||
; CHECK: %5 = fsub fast <2 x float> %2, %4
|
||||
; CHECK: %6 = fmul fast <2 x float> %5, %5
|
||||
; CHECK: %7 = extractelement <2 x float> %6, i32 0
|
||||
; CHECK: %8 = extractelement <2 x float> %6, i32 1
|
||||
; CHECK: %add = fadd fast float %7, %8
|
||||
; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
|
||||
|
||||
entry:
|
||||
br label %for.body3.lr.ph
|
||||
|
||||
for.body3.lr.ph:
|
||||
%conv5 = sitofp i32 %ymin to float
|
||||
%conv = sitofp i32 %xmin to float
|
||||
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
||||
%0 = load float, float* %arrayidx4, align 4
|
||||
%sub = fsub fast float %conv, %0
|
||||
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
||||
%1 = load float, float* %arrayidx9, align 4
|
||||
%sub10 = fsub fast float %conv5, %1
|
||||
%mul11 = fmul fast float %sub, %sub
|
||||
%mul12 = fmul fast float %sub10, %sub10
|
||||
%add = fadd fast float %mul11, %mul12
|
||||
%cmp = fcmp oeq float %add, 0.000000e+00
|
||||
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
||||
|
||||
for.end27:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
||||
; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
||||
; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
|
||||
; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
|
||||
; CHECK: %5 = fsub fast <2 x float> %2, %4
|
||||
; CHECK: %6 = fmul fast <2 x float> %5, %5
|
||||
; CHECK: %7 = extractelement <2 x float> %6, i32 0
|
||||
; CHECK: %8 = extractelement <2 x float> %6, i32 1
|
||||
; CHECK: %add = fadd fast float %8, %7
|
||||
; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
|
||||
|
||||
entry:
|
||||
br label %for.body3.lr.ph
|
||||
|
||||
for.body3.lr.ph:
|
||||
%conv5 = sitofp i32 %ymin to float
|
||||
%conv = sitofp i32 %xmin to float
|
||||
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
||||
%0 = load float, float* %arrayidx4, align 4
|
||||
%sub = fsub fast float %conv, %0
|
||||
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
||||
%1 = load float, float* %arrayidx9, align 4
|
||||
%sub10 = fsub fast float %conv5, %1
|
||||
%mul11 = fmul fast float %sub, %sub
|
||||
%mul12 = fmul fast float %sub10, %sub10
|
||||
%add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!!
|
||||
%cmp = fcmp oeq float %add, 0.000000e+00
|
||||
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
||||
|
||||
for.end27:
|
||||
ret void
|
||||
}
|
@ -1,258 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=GENERIC
|
||||
; RUN: opt -S -mcpu=kryo -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=KRYO
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
; These tests check that we vectorize the index calculations in the
|
||||
; gather-reduce pattern shown below. We check cases having i32 and i64
|
||||
; subtraction.
|
||||
;
|
||||
; int gather_reduce_8x16(short *a, short *b, short *g, int n) {
|
||||
; int sum = 0;
|
||||
; for (int i = 0; i < n ; ++i) {
|
||||
; sum += g[*a++ - b[0]]; sum += g[*a++ - b[1]];
|
||||
; sum += g[*a++ - b[2]]; sum += g[*a++ - b[3]];
|
||||
; sum += g[*a++ - b[4]]; sum += g[*a++ - b[5]];
|
||||
; sum += g[*a++ - b[6]]; sum += g[*a++ - b[7]];
|
||||
; }
|
||||
; return sum;
|
||||
; }
|
||||
|
||||
; GENERIC-LABEL: @gather_reduce_8x16_i32
|
||||
;
|
||||
; GENERIC: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
|
||||
; GENERIC: zext <8 x i16> [[L]] to <8 x i32>
|
||||
; GENERIC: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
|
||||
; GENERIC: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
|
||||
; GENERIC: sext i32 [[X]] to i64
|
||||
;
|
||||
define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
|
||||
entry:
|
||||
%cmp.99 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
||||
for.body.preheader:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit:
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup:
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
|
||||
ret i32 %sum.0.lcssa
|
||||
|
||||
for.body:
|
||||
%i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
||||
%sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
|
||||
%a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
|
||||
%incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
|
||||
%0 = load i16, i16* %a.addr.0101, align 2
|
||||
%conv = zext i16 %0 to i32
|
||||
%incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
|
||||
%1 = load i16, i16* %b, align 2
|
||||
%conv2 = zext i16 %1 to i32
|
||||
%sub = sub nsw i32 %conv, %conv2
|
||||
%arrayidx = getelementptr inbounds i16, i16* %g, i32 %sub
|
||||
%2 = load i16, i16* %arrayidx, align 2
|
||||
%conv3 = zext i16 %2 to i32
|
||||
%add = add nsw i32 %conv3, %sum.0102
|
||||
%incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
|
||||
%3 = load i16, i16* %incdec.ptr, align 2
|
||||
%conv5 = zext i16 %3 to i32
|
||||
%incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
|
||||
%4 = load i16, i16* %incdec.ptr1, align 2
|
||||
%conv7 = zext i16 %4 to i32
|
||||
%sub8 = sub nsw i32 %conv5, %conv7
|
||||
%arrayidx10 = getelementptr inbounds i16, i16* %g, i32 %sub8
|
||||
%5 = load i16, i16* %arrayidx10, align 2
|
||||
%conv11 = zext i16 %5 to i32
|
||||
%add12 = add nsw i32 %add, %conv11
|
||||
%incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
|
||||
%6 = load i16, i16* %incdec.ptr4, align 2
|
||||
%conv14 = zext i16 %6 to i32
|
||||
%incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
|
||||
%7 = load i16, i16* %incdec.ptr6, align 2
|
||||
%conv16 = zext i16 %7 to i32
|
||||
%sub17 = sub nsw i32 %conv14, %conv16
|
||||
%arrayidx19 = getelementptr inbounds i16, i16* %g, i32 %sub17
|
||||
%8 = load i16, i16* %arrayidx19, align 2
|
||||
%conv20 = zext i16 %8 to i32
|
||||
%add21 = add nsw i32 %add12, %conv20
|
||||
%incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
|
||||
%9 = load i16, i16* %incdec.ptr13, align 2
|
||||
%conv23 = zext i16 %9 to i32
|
||||
%incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
|
||||
%10 = load i16, i16* %incdec.ptr15, align 2
|
||||
%conv25 = zext i16 %10 to i32
|
||||
%sub26 = sub nsw i32 %conv23, %conv25
|
||||
%arrayidx28 = getelementptr inbounds i16, i16* %g, i32 %sub26
|
||||
%11 = load i16, i16* %arrayidx28, align 2
|
||||
%conv29 = zext i16 %11 to i32
|
||||
%add30 = add nsw i32 %add21, %conv29
|
||||
%incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
|
||||
%12 = load i16, i16* %incdec.ptr22, align 2
|
||||
%conv32 = zext i16 %12 to i32
|
||||
%incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
|
||||
%13 = load i16, i16* %incdec.ptr24, align 2
|
||||
%conv34 = zext i16 %13 to i32
|
||||
%sub35 = sub nsw i32 %conv32, %conv34
|
||||
%arrayidx37 = getelementptr inbounds i16, i16* %g, i32 %sub35
|
||||
%14 = load i16, i16* %arrayidx37, align 2
|
||||
%conv38 = zext i16 %14 to i32
|
||||
%add39 = add nsw i32 %add30, %conv38
|
||||
%incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
|
||||
%15 = load i16, i16* %incdec.ptr31, align 2
|
||||
%conv41 = zext i16 %15 to i32
|
||||
%incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
|
||||
%16 = load i16, i16* %incdec.ptr33, align 2
|
||||
%conv43 = zext i16 %16 to i32
|
||||
%sub44 = sub nsw i32 %conv41, %conv43
|
||||
%arrayidx46 = getelementptr inbounds i16, i16* %g, i32 %sub44
|
||||
%17 = load i16, i16* %arrayidx46, align 2
|
||||
%conv47 = zext i16 %17 to i32
|
||||
%add48 = add nsw i32 %add39, %conv47
|
||||
%incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
|
||||
%18 = load i16, i16* %incdec.ptr40, align 2
|
||||
%conv50 = zext i16 %18 to i32
|
||||
%incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
|
||||
%19 = load i16, i16* %incdec.ptr42, align 2
|
||||
%conv52 = zext i16 %19 to i32
|
||||
%sub53 = sub nsw i32 %conv50, %conv52
|
||||
%arrayidx55 = getelementptr inbounds i16, i16* %g, i32 %sub53
|
||||
%20 = load i16, i16* %arrayidx55, align 2
|
||||
%conv56 = zext i16 %20 to i32
|
||||
%add57 = add nsw i32 %add48, %conv56
|
||||
%incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
|
||||
%21 = load i16, i16* %incdec.ptr49, align 2
|
||||
%conv59 = zext i16 %21 to i32
|
||||
%22 = load i16, i16* %incdec.ptr51, align 2
|
||||
%conv61 = zext i16 %22 to i32
|
||||
%sub62 = sub nsw i32 %conv59, %conv61
|
||||
%arrayidx64 = getelementptr inbounds i16, i16* %g, i32 %sub62
|
||||
%23 = load i16, i16* %arrayidx64, align 2
|
||||
%conv65 = zext i16 %23 to i32
|
||||
%add66 = add nsw i32 %add57, %conv65
|
||||
%inc = add nuw nsw i32 %i.0103, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
|
||||
; KRYO-LABEL: @gather_reduce_8x16_i64
|
||||
;
|
||||
; KRYO: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
|
||||
; KRYO: zext <8 x i16> [[L]] to <8 x i32>
|
||||
; KRYO: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
|
||||
; KRYO: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
|
||||
; KRYO: sext i32 [[X]] to i64
|
||||
;
|
||||
define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
|
||||
entry:
|
||||
%cmp.99 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
||||
for.body.preheader:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit:
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup:
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
|
||||
ret i32 %sum.0.lcssa
|
||||
|
||||
for.body:
|
||||
%i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
||||
%sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
|
||||
%a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
|
||||
%incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
|
||||
%0 = load i16, i16* %a.addr.0101, align 2
|
||||
%conv = zext i16 %0 to i64
|
||||
%incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
|
||||
%1 = load i16, i16* %b, align 2
|
||||
%conv2 = zext i16 %1 to i64
|
||||
%sub = sub nsw i64 %conv, %conv2
|
||||
%arrayidx = getelementptr inbounds i16, i16* %g, i64 %sub
|
||||
%2 = load i16, i16* %arrayidx, align 2
|
||||
%conv3 = zext i16 %2 to i32
|
||||
%add = add nsw i32 %conv3, %sum.0102
|
||||
%incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
|
||||
%3 = load i16, i16* %incdec.ptr, align 2
|
||||
%conv5 = zext i16 %3 to i64
|
||||
%incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
|
||||
%4 = load i16, i16* %incdec.ptr1, align 2
|
||||
%conv7 = zext i16 %4 to i64
|
||||
%sub8 = sub nsw i64 %conv5, %conv7
|
||||
%arrayidx10 = getelementptr inbounds i16, i16* %g, i64 %sub8
|
||||
%5 = load i16, i16* %arrayidx10, align 2
|
||||
%conv11 = zext i16 %5 to i32
|
||||
%add12 = add nsw i32 %add, %conv11
|
||||
%incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
|
||||
%6 = load i16, i16* %incdec.ptr4, align 2
|
||||
%conv14 = zext i16 %6 to i64
|
||||
%incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
|
||||
%7 = load i16, i16* %incdec.ptr6, align 2
|
||||
%conv16 = zext i16 %7 to i64
|
||||
%sub17 = sub nsw i64 %conv14, %conv16
|
||||
%arrayidx19 = getelementptr inbounds i16, i16* %g, i64 %sub17
|
||||
%8 = load i16, i16* %arrayidx19, align 2
|
||||
%conv20 = zext i16 %8 to i32
|
||||
%add21 = add nsw i32 %add12, %conv20
|
||||
%incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
|
||||
%9 = load i16, i16* %incdec.ptr13, align 2
|
||||
%conv23 = zext i16 %9 to i64
|
||||
%incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
|
||||
%10 = load i16, i16* %incdec.ptr15, align 2
|
||||
%conv25 = zext i16 %10 to i64
|
||||
%sub26 = sub nsw i64 %conv23, %conv25
|
||||
%arrayidx28 = getelementptr inbounds i16, i16* %g, i64 %sub26
|
||||
%11 = load i16, i16* %arrayidx28, align 2
|
||||
%conv29 = zext i16 %11 to i32
|
||||
%add30 = add nsw i32 %add21, %conv29
|
||||
%incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
|
||||
%12 = load i16, i16* %incdec.ptr22, align 2
|
||||
%conv32 = zext i16 %12 to i64
|
||||
%incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
|
||||
%13 = load i16, i16* %incdec.ptr24, align 2
|
||||
%conv34 = zext i16 %13 to i64
|
||||
%sub35 = sub nsw i64 %conv32, %conv34
|
||||
%arrayidx37 = getelementptr inbounds i16, i16* %g, i64 %sub35
|
||||
%14 = load i16, i16* %arrayidx37, align 2
|
||||
%conv38 = zext i16 %14 to i32
|
||||
%add39 = add nsw i32 %add30, %conv38
|
||||
%incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
|
||||
%15 = load i16, i16* %incdec.ptr31, align 2
|
||||
%conv41 = zext i16 %15 to i64
|
||||
%incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
|
||||
%16 = load i16, i16* %incdec.ptr33, align 2
|
||||
%conv43 = zext i16 %16 to i64
|
||||
%sub44 = sub nsw i64 %conv41, %conv43
|
||||
%arrayidx46 = getelementptr inbounds i16, i16* %g, i64 %sub44
|
||||
%17 = load i16, i16* %arrayidx46, align 2
|
||||
%conv47 = zext i16 %17 to i32
|
||||
%add48 = add nsw i32 %add39, %conv47
|
||||
%incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
|
||||
%18 = load i16, i16* %incdec.ptr40, align 2
|
||||
%conv50 = zext i16 %18 to i64
|
||||
%incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
|
||||
%19 = load i16, i16* %incdec.ptr42, align 2
|
||||
%conv52 = zext i16 %19 to i64
|
||||
%sub53 = sub nsw i64 %conv50, %conv52
|
||||
%arrayidx55 = getelementptr inbounds i16, i16* %g, i64 %sub53
|
||||
%20 = load i16, i16* %arrayidx55, align 2
|
||||
%conv56 = zext i16 %20 to i32
|
||||
%add57 = add nsw i32 %add48, %conv56
|
||||
%incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
|
||||
%21 = load i16, i16* %incdec.ptr49, align 2
|
||||
%conv59 = zext i16 %21 to i64
|
||||
%22 = load i16, i16* %incdec.ptr51, align 2
|
||||
%conv61 = zext i16 %22 to i64
|
||||
%sub62 = sub nsw i64 %conv59, %conv61
|
||||
%arrayidx64 = getelementptr inbounds i16, i16* %g, i64 %sub62
|
||||
%23 = load i16, i16* %arrayidx64, align 2
|
||||
%conv65 = zext i16 %23 to i32
|
||||
%add66 = add nsw i32 %add57, %conv65
|
||||
%inc = add nuw nsw i32 %i.0103, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
@ -1,310 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT
|
||||
; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
|
||||
; RUN: opt < %s -slp-schedule-budget=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
@a = common global [80 x i8] zeroinitializer, align 16
|
||||
|
||||
define void @PR28330(i32 %n) {
|
||||
; DEFAULT-LABEL: @PR28330(
|
||||
; DEFAULT-NEXT: entry:
|
||||
; DEFAULT-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
||||
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
||||
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; DEFAULT: for.body:
|
||||
; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], undef
|
||||
; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
|
||||
; DEFAULT-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef
|
||||
; DEFAULT-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef
|
||||
; DEFAULT-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], undef
|
||||
; DEFAULT-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], undef
|
||||
; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
|
||||
; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[TMP17]]
|
||||
; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
|
||||
; DEFAULT-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; GATHER-LABEL: @PR28330(
|
||||
; GATHER-NEXT: entry:
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; GATHER-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
|
||||
; GATHER-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
||||
; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
|
||||
; GATHER-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
|
||||
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; GATHER: for.body:
|
||||
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]]
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
|
||||
; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
|
||||
; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
|
||||
; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
|
||||
; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
|
||||
; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
|
||||
; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
|
||||
; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
|
||||
; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
|
||||
; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
|
||||
; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
|
||||
; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
|
||||
; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; GATHER-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; MAX-COST-LABEL: @PR28330(
|
||||
; MAX-COST-NEXT: entry:
|
||||
; MAX-COST-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
||||
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
||||
; MAX-COST-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
||||
; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
|
||||
; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; MAX-COST-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
; MAX-COST-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
|
||||
; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
; MAX-COST-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
|
||||
; MAX-COST-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
||||
; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
|
||||
; MAX-COST-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
|
||||
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; MAX-COST: for.body:
|
||||
; MAX-COST-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; MAX-COST-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP19]]
|
||||
; MAX-COST-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
|
||||
; MAX-COST-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
|
||||
; MAX-COST-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
|
||||
; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
|
||||
; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
|
||||
; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
|
||||
; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; MAX-COST-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
entry:
|
||||
%tmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
||||
%tmp1 = icmp eq i8 %tmp0, 0
|
||||
%tmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
||||
%tmp3 = icmp eq i8 %tmp2, 0
|
||||
%tmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
%tmp5 = icmp eq i8 %tmp4, 0
|
||||
%tmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
%tmp7 = icmp eq i8 %tmp6, 0
|
||||
%tmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
%tmp9 = icmp eq i8 %tmp8, 0
|
||||
%tmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
%tmp11 = icmp eq i8 %tmp10, 0
|
||||
%tmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
||||
%tmp13 = icmp eq i8 %tmp12, 0
|
||||
%tmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
%tmp15 = icmp eq i8 %tmp14, 0
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
|
||||
%tmp19 = select i1 %tmp1, i32 -720, i32 -80
|
||||
%tmp20 = add i32 %tmp17, %tmp19
|
||||
%tmp21 = select i1 %tmp3, i32 -720, i32 -80
|
||||
%tmp22 = add i32 %tmp20, %tmp21
|
||||
%tmp23 = select i1 %tmp5, i32 -720, i32 -80
|
||||
%tmp24 = add i32 %tmp22, %tmp23
|
||||
%tmp25 = select i1 %tmp7, i32 -720, i32 -80
|
||||
%tmp26 = add i32 %tmp24, %tmp25
|
||||
%tmp27 = select i1 %tmp9, i32 -720, i32 -80
|
||||
%tmp28 = add i32 %tmp26, %tmp27
|
||||
%tmp29 = select i1 %tmp11, i32 -720, i32 -80
|
||||
%tmp30 = add i32 %tmp28, %tmp29
|
||||
%tmp31 = select i1 %tmp13, i32 -720, i32 -80
|
||||
%tmp32 = add i32 %tmp30, %tmp31
|
||||
%tmp33 = select i1 %tmp15, i32 -720, i32 -80
|
||||
%tmp34 = add i32 %tmp32, %tmp33
|
||||
br label %for.body
|
||||
}
|
||||
|
||||
define void @PR32038(i32 %n) {
|
||||
; DEFAULT-LABEL: @PR32038(
|
||||
; DEFAULT-NEXT: entry:
|
||||
; DEFAULT-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
||||
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
||||
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; DEFAULT: for.body:
|
||||
; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef
|
||||
; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
|
||||
; DEFAULT-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef
|
||||
; DEFAULT-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef
|
||||
; DEFAULT-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], undef
|
||||
; DEFAULT-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], undef
|
||||
; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
|
||||
; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5
|
||||
; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
|
||||
; DEFAULT-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; GATHER-LABEL: @PR32038(
|
||||
; GATHER-NEXT: entry:
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; GATHER-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
|
||||
; GATHER-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
||||
; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
|
||||
; GATHER-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
|
||||
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; GATHER: for.body:
|
||||
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP3]]
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
|
||||
; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
|
||||
; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
|
||||
; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
|
||||
; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
|
||||
; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
|
||||
; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
|
||||
; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
|
||||
; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
|
||||
; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
|
||||
; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
|
||||
; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
|
||||
; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], -5
|
||||
; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; GATHER-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; MAX-COST-LABEL: @PR32038(
|
||||
; MAX-COST-NEXT: entry:
|
||||
; MAX-COST-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
|
||||
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
|
||||
; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; MAX-COST-NEXT: [[TMPP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
; MAX-COST-NEXT: [[TMPP7:%.*]] = icmp eq i8 [[TMP6]], 0
|
||||
; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
; MAX-COST-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
|
||||
; MAX-COST-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
||||
; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
|
||||
; MAX-COST-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
|
||||
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; MAX-COST: for.body:
|
||||
; MAX-COST-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
||||
; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0
|
||||
; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
|
||||
; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1
|
||||
; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMPP5]], i32 2
|
||||
; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMPP7]], i32 3
|
||||
; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef
|
||||
; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
|
||||
; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef
|
||||
; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef
|
||||
; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
|
||||
; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]])
|
||||
; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP27]]
|
||||
; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP29]]
|
||||
; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP11]], -5
|
||||
; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
|
||||
; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]]
|
||||
; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; MAX-COST-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
entry:
|
||||
%tmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
||||
%tmp1 = icmp eq i8 %tmp0, 0
|
||||
%tmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
||||
%tmp3 = icmp eq i8 %tmp2, 0
|
||||
%tmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
%tmp5 = icmp eq i8 %tmp4, 0
|
||||
%tmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
%tmp7 = icmp eq i8 %tmp6, 0
|
||||
%tmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
%tmp9 = icmp eq i8 %tmp8, 0
|
||||
%tmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
%tmp11 = icmp eq i8 %tmp10, 0
|
||||
%tmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
||||
%tmp13 = icmp eq i8 %tmp12, 0
|
||||
%tmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
%tmp15 = icmp eq i8 %tmp14, 0
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
|
||||
%tmp19 = select i1 %tmp1, i32 -720, i32 -80
|
||||
%tmp20 = add i32 -5, %tmp19
|
||||
%tmp21 = select i1 %tmp3, i32 -720, i32 -80
|
||||
%tmp22 = add i32 %tmp20, %tmp21
|
||||
%tmp23 = select i1 %tmp5, i32 -720, i32 -80
|
||||
%tmp24 = add i32 %tmp22, %tmp23
|
||||
%tmp25 = select i1 %tmp7, i32 -720, i32 -80
|
||||
%tmp26 = add i32 %tmp24, %tmp25
|
||||
%tmp27 = select i1 %tmp9, i32 -720, i32 -80
|
||||
%tmp28 = add i32 %tmp26, %tmp27
|
||||
%tmp29 = select i1 %tmp11, i32 -720, i32 -80
|
||||
%tmp30 = add i32 %tmp28, %tmp29
|
||||
%tmp31 = select i1 %tmp13, i32 -720, i32 -80
|
||||
%tmp32 = add i32 %tmp30, %tmp31
|
||||
%tmp33 = select i1 %tmp15, i32 -720, i32 -80
|
||||
%tmp34 = add i32 %tmp32, %tmp33
|
||||
br label %for.body
|
||||
}
|
@ -1,155 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine -pass-remarks-output=%t < %s | FileCheck %s
|
||||
; RUN: cat %t | FileCheck -check-prefix=YAML %s
|
||||
; RUN: opt -S -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-18 -pass-remarks-output=%t < %s | FileCheck %s
|
||||
; RUN: cat %t | FileCheck -check-prefix=YAML %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
; These tests check that we remove from consideration pairs of seed
|
||||
; getelementptrs when they are known to have a constant difference. Such pairs
|
||||
; are likely not good candidates for vectorization since one can be computed
|
||||
; from the other. We use an unprofitable threshold to force vectorization.
|
||||
;
|
||||
; int getelementptr(int *g, int n, int w, int x, int y, int z) {
|
||||
; int sum = 0;
|
||||
; for (int i = 0; i < n ; ++i) {
|
||||
; sum += g[2*i + w]; sum += g[2*i + x];
|
||||
; sum += g[2*i + y]; sum += g[2*i + z];
|
||||
; }
|
||||
; return sum;
|
||||
; }
|
||||
;
|
||||
|
||||
; CHECK-LABEL: @getelementptr_4x32
|
||||
;
|
||||
; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32>
|
||||
; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
|
||||
; CHECK: sext i32 [[X]] to i64
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedList
|
||||
; YAML-NEXT: Function: getelementptr_4x32
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
||||
; YAML-NEXT: - Cost: '11'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '5'
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedList
|
||||
; YAML-NEXT: Function: getelementptr_4x32
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
||||
; YAML-NEXT: - Cost: '16'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '3'
|
||||
|
||||
define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%cmp31 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
||||
for.body.preheader:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit:
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup:
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
|
||||
ret i32 %sum.0.lcssa
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
|
||||
%t4 = shl nsw i32 %indvars.iv, 1
|
||||
%t5 = add nsw i32 %t4, 0
|
||||
%arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
|
||||
%t6 = load i32, i32* %arrayidx, align 4
|
||||
%add1 = add nsw i32 %t6, %sum.032
|
||||
%t7 = add nsw i32 %t4, %x
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
|
||||
%t8 = load i32, i32* %arrayidx5, align 4
|
||||
%add6 = add nsw i32 %add1, %t8
|
||||
%t9 = add nsw i32 %t4, %y
|
||||
%arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
|
||||
%t10 = load i32, i32* %arrayidx10, align 4
|
||||
%add11 = add nsw i32 %add6, %t10
|
||||
%t11 = add nsw i32 %t4, %z
|
||||
%arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
|
||||
%t12 = load i32, i32* %arrayidx15, align 4
|
||||
%add16 = add nsw i32 %add11, %t12
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next , %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @getelementptr_2x32
|
||||
;
|
||||
; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32>
|
||||
; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
|
||||
; CHECK: sext i32 [[X]] to i64
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedList
|
||||
; YAML-NEXT: Function: getelementptr_2x32
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
||||
; YAML-NEXT: - Cost: '11'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '5'
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedList
|
||||
; YAML-NEXT: Function: getelementptr_2x32
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
||||
; YAML-NEXT: - Cost: '6'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '3'
|
||||
|
||||
define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%cmp31 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
||||
for.body.preheader:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit:
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup:
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
|
||||
ret i32 %sum.0.lcssa
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
|
||||
%t4 = shl nsw i32 %indvars.iv, 1
|
||||
%t5 = add nsw i32 %t4, 0
|
||||
%arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
|
||||
%t6 = load i32, i32* %arrayidx, align 4
|
||||
%add1 = add nsw i32 %t6, %sum.032
|
||||
%t7 = add nsw i32 %t4, 1
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
|
||||
%t8 = load i32, i32* %arrayidx5, align 4
|
||||
%add6 = add nsw i32 %add1, %t8
|
||||
%t9 = add nsw i32 %t4, %y
|
||||
%arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
|
||||
%t10 = load i32, i32* %arrayidx10, align 4
|
||||
%add11 = add nsw i32 %add6, %t10
|
||||
%t11 = add nsw i32 %t4, %z
|
||||
%arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
|
||||
%t12 = load i32, i32* %arrayidx15, align 4
|
||||
%add16 = add nsw i32 %add11, %t12
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next , %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
@ -1,307 +0,0 @@
|
||||
; RUN: opt -slp-vectorizer -slp-threshold=-6 -S -pass-remarks-output=%t < %s | FileCheck %s
|
||||
; RUN: cat %t | FileCheck -check-prefix=YAML %s
|
||||
; RUN: opt -passes=slp-vectorizer -slp-threshold=-6 -S -pass-remarks-output=%t < %s | FileCheck %s
|
||||
; RUN: cat %t | FileCheck -check-prefix=YAML %s
|
||||
|
||||
|
||||
; FIXME: The threshold is changed to keep this test case a bit smaller.
|
||||
; The AArch64 cost model should not give such high costs to select statements.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux"
|
||||
|
||||
; CHECK-LABEL: test_select
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: select <4 x i1>
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedHorizontalReduction
|
||||
; YAML-NEXT: Function: test_select
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
|
||||
; YAML-NEXT: - Cost: '4'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '8'
|
||||
|
||||
define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
|
||||
entry:
|
||||
%cmp.22 = icmp sgt i32 %h, 0
|
||||
br i1 %cmp.22, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%idx.ext = sext i32 %lx to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%s.026 = phi i32 [ 0, %for.body.lr.ph ], [ %add27, %for.body ]
|
||||
%j.025 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
||||
%p2.024 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr29, %for.body ]
|
||||
%p1.023 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %for.body ]
|
||||
%0 = load i32, i32* %p1.023, align 4
|
||||
%1 = load i32, i32* %p2.024, align 4
|
||||
%sub = sub nsw i32 %0, %1
|
||||
%cmp2 = icmp slt i32 %sub, 0
|
||||
%sub3 = sub nsw i32 0, %sub
|
||||
%sub3.sub = select i1 %cmp2, i32 %sub3, i32 %sub
|
||||
%add = add nsw i32 %sub3.sub, %s.026
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %p1.023, i64 1
|
||||
%2 = load i32, i32* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %p2.024, i64 1
|
||||
%3 = load i32, i32* %arrayidx5, align 4
|
||||
%sub6 = sub nsw i32 %2, %3
|
||||
%cmp7 = icmp slt i32 %sub6, 0
|
||||
%sub9 = sub nsw i32 0, %sub6
|
||||
%v.1 = select i1 %cmp7, i32 %sub9, i32 %sub6
|
||||
%add11 = add nsw i32 %add, %v.1
|
||||
%arrayidx12 = getelementptr inbounds i32, i32* %p1.023, i64 2
|
||||
%4 = load i32, i32* %arrayidx12, align 4
|
||||
%arrayidx13 = getelementptr inbounds i32, i32* %p2.024, i64 2
|
||||
%5 = load i32, i32* %arrayidx13, align 4
|
||||
%sub14 = sub nsw i32 %4, %5
|
||||
%cmp15 = icmp slt i32 %sub14, 0
|
||||
%sub17 = sub nsw i32 0, %sub14
|
||||
%sub17.sub14 = select i1 %cmp15, i32 %sub17, i32 %sub14
|
||||
%add19 = add nsw i32 %add11, %sub17.sub14
|
||||
%arrayidx20 = getelementptr inbounds i32, i32* %p1.023, i64 3
|
||||
%6 = load i32, i32* %arrayidx20, align 4
|
||||
%arrayidx21 = getelementptr inbounds i32, i32* %p2.024, i64 3
|
||||
%7 = load i32, i32* %arrayidx21, align 4
|
||||
%sub22 = sub nsw i32 %6, %7
|
||||
%cmp23 = icmp slt i32 %sub22, 0
|
||||
%sub25 = sub nsw i32 0, %sub22
|
||||
%v.3 = select i1 %cmp23, i32 %sub25, i32 %sub22
|
||||
%add27 = add nsw i32 %add19, %v.3
|
||||
%add.ptr = getelementptr inbounds i32, i32* %p1.023, i64 %idx.ext
|
||||
%add.ptr29 = getelementptr inbounds i32, i32* %p2.024, i64 %idx.ext
|
||||
%inc = add nuw nsw i32 %j.025, 1
|
||||
%exitcond = icmp eq i32 %inc, %h
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%s.0.lcssa = phi i32 [ 0, %entry ], [ %add27, %for.end.loopexit ]
|
||||
ret i32 %s.0.lcssa
|
||||
}
|
||||
|
||||
;; Check whether SLP can find a reduction phi whose incoming blocks are not
|
||||
;; the same as the block containing the phi.
|
||||
;;
|
||||
;; Came from code like,
|
||||
;;
|
||||
;; int s = 0;
|
||||
;; for (int j = 0; j < h; j++) {
|
||||
;; s += p1[0] * p2[0]
|
||||
;; s += p1[1] * p2[1];
|
||||
;; s += p1[2] * p2[2];
|
||||
;; s += p1[3] * p2[3];
|
||||
;; if (s >= lim)
|
||||
;; break;
|
||||
;; p1 += lx;
|
||||
;; p2 += lx;
|
||||
;; }
|
||||
define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
|
||||
; CHECK-LABEL: reduction_with_br
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: mul nsw <4 x i32>
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedHorizontalReduction
|
||||
; YAML-NEXT: Function: reduction_with_br
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
|
||||
; YAML-NEXT: - Cost: '1'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '3'
|
||||
|
||||
entry:
|
||||
%cmp.16 = icmp sgt i32 %h, 0
|
||||
br i1 %cmp.16, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%idx.ext = sext i32 %lx to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %if.end
|
||||
%s.020 = phi i32 [ 0, %for.body.lr.ph ], [ %add13, %if.end ]
|
||||
%j.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
|
||||
%p2.018 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr16, %if.end ]
|
||||
%p1.017 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end ]
|
||||
%0 = load i32, i32* %p1.017, align 4
|
||||
%1 = load i32, i32* %p2.018, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, %s.020
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %p1.017, i64 1
|
||||
%2 = load i32, i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %p2.018, i64 1
|
||||
%3 = load i32, i32* %arrayidx3, align 4
|
||||
%mul4 = mul nsw i32 %3, %2
|
||||
%add5 = add nsw i32 %add, %mul4
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %p1.017, i64 2
|
||||
%4 = load i32, i32* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32, i32* %p2.018, i64 2
|
||||
%5 = load i32, i32* %arrayidx7, align 4
|
||||
%mul8 = mul nsw i32 %5, %4
|
||||
%add9 = add nsw i32 %add5, %mul8
|
||||
%arrayidx10 = getelementptr inbounds i32, i32* %p1.017, i64 3
|
||||
%6 = load i32, i32* %arrayidx10, align 4
|
||||
%arrayidx11 = getelementptr inbounds i32, i32* %p2.018, i64 3
|
||||
%7 = load i32, i32* %arrayidx11, align 4
|
||||
%mul12 = mul nsw i32 %7, %6
|
||||
%add13 = add nsw i32 %add9, %mul12
|
||||
%cmp14 = icmp slt i32 %add13, %lim
|
||||
br i1 %cmp14, label %if.end, label %for.end.loopexit
|
||||
|
||||
if.end: ; preds = %for.body
|
||||
%add.ptr = getelementptr inbounds i32, i32* %p1.017, i64 %idx.ext
|
||||
%add.ptr16 = getelementptr inbounds i32, i32* %p2.018, i64 %idx.ext
|
||||
%inc = add nuw nsw i32 %j.019, 1
|
||||
%cmp = icmp slt i32 %inc, %h
|
||||
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.body, %if.end
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%s.1 = phi i32 [ 0, %entry ], [ %add13, %for.end.loopexit ]
|
||||
ret i32 %s.1
|
||||
}
|
||||
|
||||
; CHECK: test_unrolled_select
|
||||
; CHECK: load <8 x i8>
|
||||
; CHECK: load <8 x i8>
|
||||
; CHECK: select <8 x i1>
|
||||
|
||||
; YAML: --- !Passed
|
||||
; YAML-NEXT: Pass: slp-vectorizer
|
||||
; YAML-NEXT: Name: VectorizedHorizontalReduction
|
||||
; YAML-NEXT: Function: test_unrolled_select
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
|
||||
; YAML-NEXT: - Cost: '-33'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '10'
|
||||
|
||||
define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
|
||||
entry:
|
||||
%cmp.43 = icmp sgt i32 %h, 0
|
||||
br i1 %cmp.43, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%idx.ext = sext i32 %lx to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %if.end.86
|
||||
%s.047 = phi i32 [ 0, %for.body.lr.ph ], [ %add82, %if.end.86 ]
|
||||
%j.046 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end.86 ]
|
||||
%p2.045 = phi i8* [ %blk2, %for.body.lr.ph ], [ %add.ptr88, %if.end.86 ]
|
||||
%p1.044 = phi i8* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.86 ]
|
||||
%0 = load i8, i8* %p1.044, align 1
|
||||
%conv = zext i8 %0 to i32
|
||||
%1 = load i8, i8* %p2.045, align 1
|
||||
%conv2 = zext i8 %1 to i32
|
||||
%sub = sub nsw i32 %conv, %conv2
|
||||
%cmp3 = icmp slt i32 %sub, 0
|
||||
%sub5 = sub nsw i32 0, %sub
|
||||
%sub5.sub = select i1 %cmp3, i32 %sub5, i32 %sub
|
||||
%add = add nsw i32 %sub5.sub, %s.047
|
||||
%arrayidx6 = getelementptr inbounds i8, i8* %p1.044, i64 1
|
||||
%2 = load i8, i8* %arrayidx6, align 1
|
||||
%conv7 = zext i8 %2 to i32
|
||||
%arrayidx8 = getelementptr inbounds i8, i8* %p2.045, i64 1
|
||||
%3 = load i8, i8* %arrayidx8, align 1
|
||||
%conv9 = zext i8 %3 to i32
|
||||
%sub10 = sub nsw i32 %conv7, %conv9
|
||||
%cmp11 = icmp slt i32 %sub10, 0
|
||||
%sub14 = sub nsw i32 0, %sub10
|
||||
%v.1 = select i1 %cmp11, i32 %sub14, i32 %sub10
|
||||
%add16 = add nsw i32 %add, %v.1
|
||||
%arrayidx17 = getelementptr inbounds i8, i8* %p1.044, i64 2
|
||||
%4 = load i8, i8* %arrayidx17, align 1
|
||||
%conv18 = zext i8 %4 to i32
|
||||
%arrayidx19 = getelementptr inbounds i8, i8* %p2.045, i64 2
|
||||
%5 = load i8, i8* %arrayidx19, align 1
|
||||
%conv20 = zext i8 %5 to i32
|
||||
%sub21 = sub nsw i32 %conv18, %conv20
|
||||
%cmp22 = icmp slt i32 %sub21, 0
|
||||
%sub25 = sub nsw i32 0, %sub21
|
||||
%sub25.sub21 = select i1 %cmp22, i32 %sub25, i32 %sub21
|
||||
%add27 = add nsw i32 %add16, %sub25.sub21
|
||||
%arrayidx28 = getelementptr inbounds i8, i8* %p1.044, i64 3
|
||||
%6 = load i8, i8* %arrayidx28, align 1
|
||||
%conv29 = zext i8 %6 to i32
|
||||
%arrayidx30 = getelementptr inbounds i8, i8* %p2.045, i64 3
|
||||
%7 = load i8, i8* %arrayidx30, align 1
|
||||
%conv31 = zext i8 %7 to i32
|
||||
%sub32 = sub nsw i32 %conv29, %conv31
|
||||
%cmp33 = icmp slt i32 %sub32, 0
|
||||
%sub36 = sub nsw i32 0, %sub32
|
||||
%v.3 = select i1 %cmp33, i32 %sub36, i32 %sub32
|
||||
%add38 = add nsw i32 %add27, %v.3
|
||||
%arrayidx39 = getelementptr inbounds i8, i8* %p1.044, i64 4
|
||||
%8 = load i8, i8* %arrayidx39, align 1
|
||||
%conv40 = zext i8 %8 to i32
|
||||
%arrayidx41 = getelementptr inbounds i8, i8* %p2.045, i64 4
|
||||
%9 = load i8, i8* %arrayidx41, align 1
|
||||
%conv42 = zext i8 %9 to i32
|
||||
%sub43 = sub nsw i32 %conv40, %conv42
|
||||
%cmp44 = icmp slt i32 %sub43, 0
|
||||
%sub47 = sub nsw i32 0, %sub43
|
||||
%sub47.sub43 = select i1 %cmp44, i32 %sub47, i32 %sub43
|
||||
%add49 = add nsw i32 %add38, %sub47.sub43
|
||||
%arrayidx50 = getelementptr inbounds i8, i8* %p1.044, i64 5
|
||||
%10 = load i8, i8* %arrayidx50, align 1
|
||||
%conv51 = zext i8 %10 to i32
|
||||
%arrayidx52 = getelementptr inbounds i8, i8* %p2.045, i64 5
|
||||
%11 = load i8, i8* %arrayidx52, align 1
|
||||
%conv53 = zext i8 %11 to i32
|
||||
%sub54 = sub nsw i32 %conv51, %conv53
|
||||
%cmp55 = icmp slt i32 %sub54, 0
|
||||
%sub58 = sub nsw i32 0, %sub54
|
||||
%v.5 = select i1 %cmp55, i32 %sub58, i32 %sub54
|
||||
%add60 = add nsw i32 %add49, %v.5
|
||||
%arrayidx61 = getelementptr inbounds i8, i8* %p1.044, i64 6
|
||||
%12 = load i8, i8* %arrayidx61, align 1
|
||||
%conv62 = zext i8 %12 to i32
|
||||
%arrayidx63 = getelementptr inbounds i8, i8* %p2.045, i64 6
|
||||
%13 = load i8, i8* %arrayidx63, align 1
|
||||
%conv64 = zext i8 %13 to i32
|
||||
%sub65 = sub nsw i32 %conv62, %conv64
|
||||
%cmp66 = icmp slt i32 %sub65, 0
|
||||
%sub69 = sub nsw i32 0, %sub65
|
||||
%sub69.sub65 = select i1 %cmp66, i32 %sub69, i32 %sub65
|
||||
%add71 = add nsw i32 %add60, %sub69.sub65
|
||||
%arrayidx72 = getelementptr inbounds i8, i8* %p1.044, i64 7
|
||||
%14 = load i8, i8* %arrayidx72, align 1
|
||||
%conv73 = zext i8 %14 to i32
|
||||
%arrayidx74 = getelementptr inbounds i8, i8* %p2.045, i64 7
|
||||
%15 = load i8, i8* %arrayidx74, align 1
|
||||
%conv75 = zext i8 %15 to i32
|
||||
%sub76 = sub nsw i32 %conv73, %conv75
|
||||
%cmp77 = icmp slt i32 %sub76, 0
|
||||
%sub80 = sub nsw i32 0, %sub76
|
||||
%v.7 = select i1 %cmp77, i32 %sub80, i32 %sub76
|
||||
%add82 = add nsw i32 %add71, %v.7
|
||||
%cmp83 = icmp slt i32 %add82, %lim
|
||||
br i1 %cmp83, label %if.end.86, label %for.end.loopexit
|
||||
|
||||
if.end.86: ; preds = %for.body
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p1.044, i64 %idx.ext
|
||||
%add.ptr88 = getelementptr inbounds i8, i8* %p2.045, i64 %idx.ext
|
||||
%inc = add nuw nsw i32 %j.046, 1
|
||||
%cmp = icmp slt i32 %inc, %h
|
||||
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.body, %if.end.86
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%s.1 = phi i32 [ 0, %entry ], [ %add82, %for.end.loopexit ]
|
||||
ret i32 %s.1
|
||||
}
|
||||
|
@ -1,2 +0,0 @@
|
||||
if not 'AArch64' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,46 +0,0 @@
|
||||
; RUN: opt -S -basicaa -slp-vectorizer < %s | FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-apple-ios5.0.0"
|
||||
|
||||
; Holding a value live over a call boundary may require
|
||||
; spills and fills. This is the case for <2 x double>,
|
||||
; as it occupies a Q register of which there are no
|
||||
; callee-saves.
|
||||
|
||||
; CHECK: load double
|
||||
; CHECK: load double
|
||||
; CHECK: call void @g
|
||||
; CHECK: store double
|
||||
; CHECK: store double
|
||||
define void @f(double* %p, double* %q) {
|
||||
%addr2 = getelementptr double, double* %q, i32 1
|
||||
%addr = getelementptr double, double* %p, i32 1
|
||||
%x = load double, double* %p
|
||||
%y = load double, double* %addr
|
||||
call void @g()
|
||||
store double %x, double* %q
|
||||
store double %y, double* %addr2
|
||||
ret void
|
||||
}
|
||||
declare void @g()
|
||||
|
||||
; Check we deal with loops correctly.
|
||||
;
|
||||
; CHECK: store <2 x double>
|
||||
; CHECK: load <2 x double>
|
||||
define void @f2(double* %p, double* %q) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%p1 = phi double [0.0, %entry], [%x, %loop]
|
||||
%p2 = phi double [0.0, %entry], [%y, %loop]
|
||||
%addr2 = getelementptr double, double* %q, i32 1
|
||||
%addr = getelementptr double, double* %p, i32 1
|
||||
store double %p1, double* %q
|
||||
store double %p2, double* %addr2
|
||||
|
||||
%x = load double, double* %p
|
||||
%y = load double, double* %addr
|
||||
br label %loop
|
||||
}
|
@ -1,55 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
; This test ensures that we do not regress due to PR26364. The vectorizer
|
||||
; should not compute a smaller size for %k.13 since it is in a use-def cycle
|
||||
; and cannot be demoted.
|
||||
;
|
||||
; CHECK-LABEL: @PR26364
|
||||
; CHECK: %k.13 = phi i32
|
||||
;
|
||||
define fastcc void @PR26364() {
|
||||
entry:
|
||||
br i1 undef, label %for.end11, label %for.cond4
|
||||
|
||||
for.cond4:
|
||||
%k.13 = phi i32 [ undef, %entry ], [ %k.3, %for.cond4 ]
|
||||
%e.02 = phi i32 [ 1, %entry ], [ 0, %for.cond4 ]
|
||||
%e.1 = select i1 undef, i32 %e.02, i32 0
|
||||
%k.3 = select i1 undef, i32 %k.13, i32 undef
|
||||
br label %for.cond4
|
||||
|
||||
for.end11:
|
||||
ret void
|
||||
}
|
||||
|
||||
; This test ensures that we do not regress due to PR26629. We must look at
|
||||
; every root in the vectorizable tree when computing minimum sizes since one
|
||||
; root may require fewer bits than another.
|
||||
;
|
||||
; CHECK-LABEL: @PR26629
|
||||
; CHECK-NOT: {{.*}} and <2 x i72>
|
||||
;
|
||||
define void @PR26629(i32* %c) {
|
||||
entry:
|
||||
br i1 undef, label %for.ph, label %for.end
|
||||
|
||||
for.ph:
|
||||
%0 = load i32, i32* %c, align 4
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%d = phi i72 [ 576507472957710340, %for.ph ], [ %bf.set17, %for.body ]
|
||||
%sub = sub i32 %0, undef
|
||||
%bf.clear13 = and i72 %d, -576460748008464384
|
||||
%1 = zext i32 %sub to i72
|
||||
%bf.value15 = and i72 %1, 8191
|
||||
%bf.clear16 = or i72 %bf.value15, %bf.clear13
|
||||
%bf.set17 = or i72 %bf.clear16, undef
|
||||
br label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer %s | FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-apple-ios5.0.0"
|
||||
|
||||
define i64 @mismatched_intrinsics(<4 x i32> %in1, <2 x i32> %in2) nounwind {
|
||||
; CHECK-LABEL: @mismatched_intrinsics
|
||||
; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v4i32
|
||||
; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v2i32
|
||||
|
||||
%vaddlvq_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1) #2
|
||||
%vaddlv_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in2) #2
|
||||
%tst = icmp sgt i64 %vaddlvq_s32.i, %vaddlv_s32.i
|
||||
%equal = sext i1 %tst to i64
|
||||
ret i64 %equal
|
||||
}
|
||||
|
||||
declare i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1)
|
||||
declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in1)
|
@ -1,76 +0,0 @@
|
||||
; RUN: opt -S -basicaa -slp-vectorizer -dce < %s | FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-apple-ios5.0.0"
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
define void @foo(float* noalias %a, float* noalias %b, float* noalias %c) {
|
||||
entry:
|
||||
; Check that we don't lose !nontemporal hint when vectorizing loads.
|
||||
; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
|
||||
%b1 = load float, float* %b, align 4, !nontemporal !0
|
||||
%arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
|
||||
%b2 = load float, float* %arrayidx.1, align 4, !nontemporal !0
|
||||
%arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
|
||||
%b3 = load float, float* %arrayidx.2, align 4, !nontemporal !0
|
||||
%arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
|
||||
%b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
|
||||
|
||||
; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
|
||||
; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
|
||||
%c1 = load float, float* %c, align 4
|
||||
%arrayidx2.1 = getelementptr inbounds float, float* %c, i64 1
|
||||
%c2 = load float, float* %arrayidx2.1, align 4
|
||||
%arrayidx2.2 = getelementptr inbounds float, float* %c, i64 2
|
||||
%c3 = load float, float* %arrayidx2.2, align 4
|
||||
%arrayidx2.3 = getelementptr inbounds float, float* %c, i64 3
|
||||
%c4 = load float, float* %arrayidx2.3, align 4
|
||||
|
||||
%a1 = fadd float %b1, %c1
|
||||
%a2 = fadd float %b2, %c2
|
||||
%a3 = fadd float %b3, %c3
|
||||
%a4 = fadd float %b4, %c4
|
||||
|
||||
; Check that we don't lose !nontemporal hint when vectorizing stores.
|
||||
; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
|
||||
store float %a1, float* %a, align 4, !nontemporal !0
|
||||
%arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
|
||||
store float %a2, float* %arrayidx3.1, align 4, !nontemporal !0
|
||||
%arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
|
||||
store float %a3, float* %arrayidx3.2, align 4, !nontemporal !0
|
||||
%arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
|
||||
store float %a4, float* %arrayidx3.3, align 4, !nontemporal !0
|
||||
|
||||
; CHECK: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @foo2
|
||||
define void @foo2(float* noalias %a, float* noalias %b) {
|
||||
entry:
|
||||
; Check that we don't mark vector load with !nontemporal attribute if some of
|
||||
; the original scalar loads don't have it.
|
||||
; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
|
||||
%b1 = load float, float* %b, align 4, !nontemporal !0
|
||||
%arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
|
||||
%b2 = load float, float* %arrayidx.1, align 4
|
||||
%arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
|
||||
%b3 = load float, float* %arrayidx.2, align 4
|
||||
%arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
|
||||
%b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
|
||||
|
||||
; Check that we don't mark vector store with !nontemporal attribute if some of
|
||||
; the original scalar stores don't have it.
|
||||
; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4{{$}}
|
||||
store float %b1, float* %a, align 4, !nontemporal !0
|
||||
%arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
|
||||
store float %b2, float* %arrayidx3.1, align 4
|
||||
%arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
|
||||
store float %b3, float* %arrayidx3.2, align 4
|
||||
%arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
|
||||
store float %b4, float* %arrayidx3.3, align 4, !nontemporal !0
|
||||
|
||||
; CHECK: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{i32 1}
|
@ -1,32 +0,0 @@
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic -pass-remarks=slp-vectorizer -o /dev/null < %s 2>&1 | FileCheck %s
|
||||
|
||||
define void @f(double* %r, double* %w) {
|
||||
%r0 = getelementptr inbounds double, double* %r, i64 0
|
||||
%r1 = getelementptr inbounds double, double* %r, i64 1
|
||||
%f0 = load double, double* %r0
|
||||
%f1 = load double, double* %r1
|
||||
%add0 = fadd double %f0, %f0
|
||||
%add1 = fadd double %f1, %f1
|
||||
%w0 = getelementptr inbounds double, double* %w, i64 0
|
||||
%w1 = getelementptr inbounds double, double* %w, i64 1
|
||||
; CHECK: remark: /tmp/s.c:5:10: Stores SLP vectorized with cost -4 and with tree size 3
|
||||
store double %add0, double* %w0, !dbg !9
|
||||
store double %add1, double* %w1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5}
|
||||
!llvm.ident = !{!6}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 281293) (llvm/trunk 281290)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
|
||||
!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !{i32 1, !"PIC Level", i32 2}
|
||||
!6 = !{!"clang version 4.0.0 (trunk 281293) (llvm/trunk 281290)"}
|
||||
!7 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 4, type: !8, isLocal: false, isDefinition: true, scopeLine: 4, isOptimized: true, unit: !0, variables: !2)
|
||||
!8 = !DISubroutineType(types: !2)
|
||||
!9 = !DILocation(line: 5, column: 10, scope: !7)
|
@ -1,42 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: add nsw <4 x i32>
|
||||
; CHECK: sdiv <4 x i32>
|
||||
|
||||
define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = load i32, i32* %c, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
%div = sdiv i32 %add, 2
|
||||
store i32 %div, i32* %a, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
|
||||
%2 = load i32, i32* %arrayidx3, align 4
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
|
||||
%3 = load i32, i32* %arrayidx4, align 4
|
||||
%add5 = add nsw i32 %3, %2
|
||||
%div6 = sdiv i32 %add5, 2
|
||||
%arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
|
||||
store i32 %div6, i32* %arrayidx7, align 4
|
||||
%arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
|
||||
%4 = load i32, i32* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
|
||||
%5 = load i32, i32* %arrayidx9, align 4
|
||||
%add10 = add nsw i32 %5, %4
|
||||
%div11 = sdiv i32 %add10, 2
|
||||
%arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
|
||||
store i32 %div11, i32* %arrayidx12, align 4
|
||||
%arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
|
||||
%6 = load i32, i32* %arrayidx13, align 4
|
||||
%arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
|
||||
%7 = load i32, i32* %arrayidx14, align 4
|
||||
%add15 = add nsw i32 %7, %6
|
||||
%div16 = sdiv i32 %add15, 2
|
||||
%arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
|
||||
store i32 %div16, i32* %arrayidx17, align 4
|
||||
ret void
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,195 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
|
||||
; FIXME: Should still like to vectorize the memory operations for VI
|
||||
|
||||
; Simple 3-pair chain with loads and stores
|
||||
; GCN-LABEL: @test1_as_3_3_3_v2f16(
|
||||
; GFX9: load <2 x half>, <2 x half> addrspace(3)*
|
||||
; GFX9: load <2 x half>, <2 x half> addrspace(3)*
|
||||
; GFX9: fmul <2 x half>
|
||||
; GFX9: store <2 x half> %{{.*}}, <2 x half> addrspace(3)* %
|
||||
; GFX9: ret
|
||||
|
||||
; VI: load half
|
||||
; VI: load half
|
||||
define amdgpu_kernel void @test1_as_3_3_3_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%i1 = load half, half addrspace(3)* %b, align 2
|
||||
%mul = fmul half %i0, %i1
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
|
||||
%i4 = load half, half addrspace(3)* %arrayidx4, align 2
|
||||
%mul5 = fmul half %i3, %i4
|
||||
store half %mul, half addrspace(3)* %c, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
store half %mul5, half addrspace(3)* %arrayidx5, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @test1_as_3_0_0(
|
||||
; GFX9: load <2 x half>, <2 x half> addrspace(3)*
|
||||
; GFX9: load <2 x half>, <2 x half>*
|
||||
; GFX9: fmul <2 x half>
|
||||
; GFX9: store <2 x half> %{{.*}}, <2 x half>* %
|
||||
; GFX9: ret
|
||||
|
||||
; VI: load half
|
||||
; VI: load half
|
||||
define amdgpu_kernel void @test1_as_3_0_0(half addrspace(3)* %a, half* %b, half* %c) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%i1 = load half, half* %b, align 2
|
||||
%mul = fmul half %i0, %i1
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%arrayidx4 = getelementptr inbounds half, half* %b, i64 1
|
||||
%i4 = load half, half* %arrayidx4, align 2
|
||||
%mul5 = fmul half %i3, %i4
|
||||
store half %mul, half* %c, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half* %c, i64 1
|
||||
store half %mul5, half* %arrayidx5, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @test1_as_0_0_3_v2f16(
|
||||
; GFX9: load <2 x half>, <2 x half>*
|
||||
; GFX9: load <2 x half>, <2 x half>*
|
||||
; GFX9: fmul <2 x half>
|
||||
; GFX9: store <2 x half> %{{.*}}, <2 x half> addrspace(3)* %
|
||||
; GFX9: ret
|
||||
|
||||
; VI: load half
|
||||
; VI: load half
|
||||
define amdgpu_kernel void @test1_as_0_0_3_v2f16(half* %a, half* %b, half addrspace(3)* %c) {
|
||||
%i0 = load half, half* %a, align 2
|
||||
%i1 = load half, half* %b, align 2
|
||||
%mul = fmul half %i0, %i1
|
||||
%arrayidx3 = getelementptr inbounds half, half* %a, i64 1
|
||||
%i3 = load half, half* %arrayidx3, align 2
|
||||
%arrayidx4 = getelementptr inbounds half, half* %b, i64 1
|
||||
%i4 = load half, half* %arrayidx4, align 2
|
||||
%mul5 = fmul half %i3, %i4
|
||||
store half %mul, half addrspace(3)* %c, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
store half %mul5, half addrspace(3)* %arrayidx5, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @test1_fma_v2f16(
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: call <2 x half> @llvm.fma.v2f16(
|
||||
; GFX9: store <2 x half>
|
||||
define amdgpu_kernel void @test1_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%i1 = load half, half addrspace(3)* %b, align 2
|
||||
%i2 = load half, half addrspace(3)* %c, align 2
|
||||
%fma0 = call half @llvm.fma.f16(half %i0, half %i1, half %i2)
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
|
||||
%i4 = load half, half addrspace(3)* %arrayidx4, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
%i5 = load half, half addrspace(3)* %arrayidx5, align 2
|
||||
%fma1 = call half @llvm.fma.f16(half %i3, half %i4, half %i5)
|
||||
store half %fma0, half addrspace(3)* %d, align 2
|
||||
%arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
|
||||
store half %fma1, half addrspace(3)* %arrayidx6, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @mul_scalar_v2f16(
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: fmul <2 x half>
|
||||
; GFX9: store <2 x half>
|
||||
define amdgpu_kernel void @mul_scalar_v2f16(half addrspace(3)* %a, half %scalar, half addrspace(3)* %c) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%mul = fmul half %i0, %scalar
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%mul5 = fmul half %i3, %scalar
|
||||
store half %mul, half addrspace(3)* %c, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
store half %mul5, half addrspace(3)* %arrayidx5, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @fabs_v2f16
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: call <2 x half> @llvm.fabs.v2f16(
|
||||
; GFX9: store <2 x half>
|
||||
define amdgpu_kernel void @fabs_v2f16(half addrspace(3)* %a, half addrspace(3)* %c) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%fabs0 = call half @llvm.fabs.f16(half %i0)
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%fabs1 = call half @llvm.fabs.f16(half %i3)
|
||||
store half %fabs0, half addrspace(3)* %c, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
store half %fabs1, half addrspace(3)* %arrayidx5, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @test1_fabs_fma_v2f16(
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: call <2 x half> @llvm.fabs.v2f16(
|
||||
; GFX9: call <2 x half> @llvm.fma.v2f16(
|
||||
; GFX9: store <2 x half>
|
||||
define amdgpu_kernel void @test1_fabs_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%i1 = load half, half addrspace(3)* %b, align 2
|
||||
%i2 = load half, half addrspace(3)* %c, align 2
|
||||
%i0.fabs = call half @llvm.fabs.f16(half %i0)
|
||||
|
||||
%fma0 = call half @llvm.fma.f16(half %i0.fabs, half %i1, half %i2)
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
|
||||
%i4 = load half, half addrspace(3)* %arrayidx4, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
%i5 = load half, half addrspace(3)* %arrayidx5, align 2
|
||||
%i3.fabs = call half @llvm.fabs.f16(half %i3)
|
||||
|
||||
%fma1 = call half @llvm.fma.f16(half %i3.fabs, half %i4, half %i5)
|
||||
store half %fma0, half addrspace(3)* %d, align 2
|
||||
%arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
|
||||
store half %fma1, half addrspace(3)* %arrayidx6, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Should do vector load and extract component for fabs
|
||||
; GCN-LABEL: @test1_fabs_scalar_fma_v2f16(
|
||||
; GFX9: load half
|
||||
; GFX9: call half @llvm.fabs.f16(
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: load half
|
||||
; GFX9: load <2 x half>
|
||||
; GFX9: call <2 x half> @llvm.fma.v2f16(
|
||||
; GFX9: store <2 x half>
|
||||
define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
|
||||
%i0 = load half, half addrspace(3)* %a, align 2
|
||||
%i1 = load half, half addrspace(3)* %b, align 2
|
||||
%i2 = load half, half addrspace(3)* %c, align 2
|
||||
%i1.fabs = call half @llvm.fabs.f16(half %i1)
|
||||
|
||||
%fma0 = call half @llvm.fma.f16(half %i0, half %i1.fabs, half %i2)
|
||||
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
|
||||
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
|
||||
%arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
|
||||
%i4 = load half, half addrspace(3)* %arrayidx4, align 2
|
||||
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
|
||||
%i5 = load half, half addrspace(3)* %arrayidx5, align 2
|
||||
%fma1 = call half @llvm.fma.f16(half %i3, half %i4, half %i5)
|
||||
store half %fma0, half addrspace(3)* %d, align 2
|
||||
%arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
|
||||
store half %fma1, half addrspace(3)* %arrayidx6, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
declare half @llvm.fabs.f16(half) #1
|
||||
declare half @llvm.fma.f16(half, half, half) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
@ -1,2 +0,0 @@
|
||||
if not 'ARM' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,20 +0,0 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||
|
||||
; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
|
||||
; to do this scalar.
|
||||
|
||||
; CHECK-LABEL: expensive_double_store
|
||||
; CHECK-NOT: load <2 x double>
|
||||
; CHECK-NOT: store <2 x double>
|
||||
define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
|
||||
entry:
|
||||
%0 = load double, double* %src, align 8
|
||||
store double %0, double* %dst, align 8
|
||||
%arrayidx2 = getelementptr inbounds double, double* %src, i64 1
|
||||
%1 = load double, double* %arrayidx2, align 8
|
||||
%arrayidx3 = getelementptr inbounds double, double* %dst, i64 1
|
||||
store double %1, double* %arrayidx3, align 8
|
||||
ret void
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
; RUN: opt -S -mcpu=swift -mtriple=thumbv7-apple-ios -basicaa -slp-vectorizer < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||
|
||||
%class.Complex = type { double, double }
|
||||
|
||||
; Code like this is the result of SROA. Make sure we don't vectorize this
|
||||
; because the scalar version of the shl/or are handled by the
|
||||
; backend and disappear, the vectorized code stays.
|
||||
|
||||
; CHECK-LABEL: SROAed
|
||||
; CHECK-NOT: shl nuw <2 x i64>
|
||||
; CHECK-NOT: or <2 x i64>
|
||||
|
||||
define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
|
||||
entry:
|
||||
%a.coerce.fca.0.extract = extractvalue [4 x i32] %a.coerce, 0
|
||||
%a.sroa.0.0.insert.ext = zext i32 %a.coerce.fca.0.extract to i64
|
||||
%a.coerce.fca.1.extract = extractvalue [4 x i32] %a.coerce, 1
|
||||
%a.sroa.0.4.insert.ext = zext i32 %a.coerce.fca.1.extract to i64
|
||||
%a.sroa.0.4.insert.shift = shl nuw i64 %a.sroa.0.4.insert.ext, 32
|
||||
%a.sroa.0.4.insert.insert = or i64 %a.sroa.0.4.insert.shift, %a.sroa.0.0.insert.ext
|
||||
%0 = bitcast i64 %a.sroa.0.4.insert.insert to double
|
||||
%a.coerce.fca.2.extract = extractvalue [4 x i32] %a.coerce, 2
|
||||
%a.sroa.3.8.insert.ext = zext i32 %a.coerce.fca.2.extract to i64
|
||||
%a.coerce.fca.3.extract = extractvalue [4 x i32] %a.coerce, 3
|
||||
%a.sroa.3.12.insert.ext = zext i32 %a.coerce.fca.3.extract to i64
|
||||
%a.sroa.3.12.insert.shift = shl nuw i64 %a.sroa.3.12.insert.ext, 32
|
||||
%a.sroa.3.12.insert.insert = or i64 %a.sroa.3.12.insert.shift, %a.sroa.3.8.insert.ext
|
||||
%1 = bitcast i64 %a.sroa.3.12.insert.insert to double
|
||||
%b.coerce.fca.0.extract = extractvalue [4 x i32] %b.coerce, 0
|
||||
%b.sroa.0.0.insert.ext = zext i32 %b.coerce.fca.0.extract to i64
|
||||
%b.coerce.fca.1.extract = extractvalue [4 x i32] %b.coerce, 1
|
||||
%b.sroa.0.4.insert.ext = zext i32 %b.coerce.fca.1.extract to i64
|
||||
%b.sroa.0.4.insert.shift = shl nuw i64 %b.sroa.0.4.insert.ext, 32
|
||||
%b.sroa.0.4.insert.insert = or i64 %b.sroa.0.4.insert.shift, %b.sroa.0.0.insert.ext
|
||||
%2 = bitcast i64 %b.sroa.0.4.insert.insert to double
|
||||
%b.coerce.fca.2.extract = extractvalue [4 x i32] %b.coerce, 2
|
||||
%b.sroa.3.8.insert.ext = zext i32 %b.coerce.fca.2.extract to i64
|
||||
%b.coerce.fca.3.extract = extractvalue [4 x i32] %b.coerce, 3
|
||||
%b.sroa.3.12.insert.ext = zext i32 %b.coerce.fca.3.extract to i64
|
||||
%b.sroa.3.12.insert.shift = shl nuw i64 %b.sroa.3.12.insert.ext, 32
|
||||
%b.sroa.3.12.insert.insert = or i64 %b.sroa.3.12.insert.shift, %b.sroa.3.8.insert.ext
|
||||
%3 = bitcast i64 %b.sroa.3.12.insert.insert to double
|
||||
%add = fadd double %0, %2
|
||||
%add3 = fadd double %1, %3
|
||||
%re.i.i = getelementptr inbounds %class.Complex, %class.Complex* %agg.result, i32 0, i32 0
|
||||
store double %add, double* %re.i.i, align 4
|
||||
%im.i.i = getelementptr inbounds %class.Complex, %class.Complex* %agg.result, i32 0, i32 1
|
||||
store double %add3, double* %im.i.i, align 4
|
||||
ret void
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
|
||||
|
||||
%struct.S = type { i8*, i8* }
|
||||
|
||||
@kS0 = common global %struct.S zeroinitializer, align 8
|
||||
|
||||
define { i64, i64 } @getS() {
|
||||
entry:
|
||||
%0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
|
||||
%1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
|
||||
%2 = insertvalue { i64, i64 } undef, i64 %0, 0
|
||||
%3 = insertvalue { i64, i64 } %2, i64 %1, 1
|
||||
ret { i64, i64 } %3
|
||||
}
|
||||
|
||||
; CHECK: load i64
|
||||
; CHECK-NOT: load <2 x i64>
|
||||
; CHECK-NOT: extractelement
|
||||
|
@ -1,2 +0,0 @@
|
||||
if not 'PowerPC' in config.root.targets:
|
||||
config.unsupported = True
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user