You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,72 +0,0 @@
|
||||
; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \
|
||||
; RUN: -instcombine -force-vector-width=2 < %s | FileCheck %s
|
||||
;
|
||||
; Test that loop vectorizer does not generate vector addresses that must then
|
||||
; always be extracted.
|
||||
|
||||
; Check that the addresses for a scalarized memory access is not extracted
|
||||
; from a vector register.
|
||||
define i32 @foo(i32* nocapture %A) {
|
||||
;CHECK-LABEL: @foo(
|
||||
;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
;CHECK: %0 = shl nsw i64 %index, 2
|
||||
;CHECK: %1 = shl i64 %index, 2
|
||||
;CHECK: %2 = or i64 %1, 4
|
||||
;CHECK: %3 = getelementptr inbounds i32, i32* %A, i64 %0
|
||||
;CHECK: %4 = getelementptr inbounds i32, i32* %A, i64 %2
|
||||
;CHECK: store i32 4, i32* %3, align 4
|
||||
;CHECK: store i32 4, i32* %4, align 4
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%0 = shl nsw i64 %indvars.iv, 2
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
|
||||
store i32 4, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
|
||||
; Check that a load of address is scalarized.
|
||||
define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
|
||||
;CHECK-LABEL: @foo1(
|
||||
;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
;CHECK: %0 = or i64 %index, 1
|
||||
;CHECK: %1 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %index
|
||||
;CHECK: %2 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %0
|
||||
;CHECK: %3 = load i32*, i32** %1, align 8
|
||||
;CHECK: %4 = load i32*, i32** %2, align 8
|
||||
;CHECK: %5 = load i32, i32* %3, align 4
|
||||
;CHECK: %6 = load i32, i32* %4, align 4
|
||||
;CHECK: %7 = insertelement <2 x i32> undef, i32 %5, i32 0
|
||||
;CHECK: %8 = insertelement <2 x i32> %7, i32 %6, i32 1
|
||||
;CHECK: %9 = getelementptr inbounds i32, i32* %A, i64 %index
|
||||
;CHECK: %10 = bitcast i32* %9 to <2 x i32>*
|
||||
;CHECK: store <2 x i32> %8, <2 x i32>* %10, align 4
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
|
||||
%el = load i32*, i32** %ptr
|
||||
%v = load i32, i32* %el
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 %v, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 undef
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
|
||||
; RUN: -force-vector-width=2 -debug-only=loop-vectorize \
|
||||
; RUN: -disable-output < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Check costs for branches inside a vectorized loop around predicated
|
||||
; blocks. Each such branch will be guarded with an extractelement from the
|
||||
; vector compare plus a test under mask instruction. This cost is modelled on
|
||||
; the extractelement of i1.
|
||||
|
||||
define void @fun(i32* %arr, i64 %trip.count) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 %indvars.iv
|
||||
%l = load i32, i32* %arrayidx, align 4
|
||||
%cmp55 = icmp sgt i32 %l, 0
|
||||
br i1 %cmp55, label %if.then, label %for.inc
|
||||
|
||||
if.then:
|
||||
%sub = sub nsw i32 0, %l
|
||||
store i32 %sub, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %trip.count
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit:
|
||||
ret void
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF 2 For instruction: br i1 %cmp55, label %if.then, label %for.inc
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
if not 'SystemZ' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,33 +0,0 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
|
||||
; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
|
||||
; RUN: -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
|
||||
; RUN: FileCheck %s
|
||||
;
|
||||
; Check that a scalarized load/store does not get a cost for insterts/
|
||||
; extracts, since z13 supports element load/store.
|
||||
|
||||
define void @fun(i32* %data, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
|
||||
%tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
|
||||
%tmp1 = load i32, i32* %tmp0, align 4
|
||||
%tmp2 = add i32 %tmp1, 1
|
||||
store i32 %tmp2, i32* %tmp0, align 4
|
||||
%i.next = add nuw nsw i64 %i, 2
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
|
||||
; CHECK: LV: Scalarizing: %tmp1 = load i32, i32* %tmp0, align 4
|
||||
; CHECK: LV: Scalarizing: store i32 %tmp2, i32* %tmp0, align 4
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp2, i32* %tmp0, align 4
|
||||
}
|
||||
|
@ -1,70 +0,0 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
|
||||
; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
|
||||
; RUN: -disable-output < %s 2>&1 | FileCheck %s
|
||||
;
|
||||
; Check that the loop vectorizer performs memory interleaving with accurate
|
||||
; cost estimations.
|
||||
|
||||
|
||||
; Simple case where just the load is interleaved, because the store group
|
||||
; would have gaps.
|
||||
define void @fun0(i32* %data, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
|
||||
%tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
|
||||
%tmp1 = load i32, i32* %tmp0, align 4
|
||||
%tmp2 = add i32 %tmp1, 1
|
||||
store i32 %tmp2, i32* %tmp0, align 4
|
||||
%i.next = add nuw nsw i64 %i, 2
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
|
||||
; CHECK: LV: Creating an interleave group with: %tmp1 = load i32, i32* %tmp0, align 4
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
|
||||
; (vl; vl; vperm)
|
||||
}
|
||||
|
||||
; Interleaving of both load and stores.
|
||||
define void @fun1(i32* %data, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
|
||||
%tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
|
||||
%tmp1 = load i32, i32* %tmp0, align 4
|
||||
%i_1 = add i64 %i, 1
|
||||
%tmp2 = getelementptr inbounds i32, i32* %data, i64 %i_1
|
||||
%tmp3 = load i32, i32* %tmp2, align 4
|
||||
store i32 %tmp1, i32* %tmp2, align 4
|
||||
store i32 %tmp3, i32* %tmp0, align 4
|
||||
%i.next = add nuw nsw i64 %i, 2
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
|
||||
; CHECK: LV: Creating an interleave group with: store i32 %tmp3, i32* %tmp0, align 4
|
||||
; CHECK: LV: Inserted: store i32 %tmp1, i32* %tmp2, align 4
|
||||
; CHECK: into the interleave group with store i32 %tmp3, i32* %tmp0, align 4
|
||||
; CHECK: LV: Creating an interleave group with: %tmp3 = load i32, i32* %tmp2, align 4
|
||||
; CHECK: LV: Inserted: %tmp1 = load i32, i32* %tmp0, align 4
|
||||
; CHECK: into the interleave group with %tmp3 = load i32, i32* %tmp2, align 4
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp2, align 4
|
||||
; (vl; vl; vperm, vpkg)
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp1, i32* %tmp2, align 4
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, i32* %tmp0, align 4
|
||||
; (vmrlf; vmrhf; vst; vst)
|
||||
}
|
||||
|
Reference in New Issue
Block a user