Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,72 +0,0 @@
; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \
; RUN: -instcombine -force-vector-width=2 < %s | FileCheck %s
;
; Test that loop vectorizer does not generate vector addresses that must then
; always be extracted.
; Check that the addresses for a scalarized memory access is not extracted
; from a vector register.
define i32 @foo(i32* nocapture %A) {
;CHECK-LABEL: @foo(
;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
;CHECK: %0 = shl nsw i64 %index, 2
;CHECK: %1 = shl i64 %index, 2
;CHECK: %2 = or i64 %1, 4
;CHECK: %3 = getelementptr inbounds i32, i32* %A, i64 %0
;CHECK: %4 = getelementptr inbounds i32, i32* %A, i64 %2
;CHECK: store i32 4, i32* %3, align 4
;CHECK: store i32 4, i32* %4, align 4
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 2
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
store i32 4, i32* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret i32 undef
}
; Check that a load of address is scalarized.
define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
;CHECK-LABEL: @foo1(
;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
;CHECK: %0 = or i64 %index, 1
;CHECK: %1 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %index
;CHECK: %2 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %0
;CHECK: %3 = load i32*, i32** %1, align 8
;CHECK: %4 = load i32*, i32** %2, align 8
;CHECK: %5 = load i32, i32* %3, align 4
;CHECK: %6 = load i32, i32* %4, align 4
;CHECK: %7 = insertelement <2 x i32> undef, i32 %5, i32 0
;CHECK: %8 = insertelement <2 x i32> %7, i32 %6, i32 1
;CHECK: %9 = getelementptr inbounds i32, i32* %A, i64 %index
;CHECK: %10 = bitcast i32* %9 to <2 x i32>*
;CHECK: store <2 x i32> %8, <2 x i32>* %10, align 4
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
%el = load i32*, i32** %ptr
%v = load i32, i32* %el
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %v, i32* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret i32 undef
}

View File

@ -1,38 +0,0 @@
; REQUIRES: asserts
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
; RUN: -force-vector-width=2 -debug-only=loop-vectorize \
; RUN: -disable-output < %s 2>&1 | FileCheck %s
; Check costs for branches inside a vectorized loop around predicated
; blocks. Each such branch will be guarded with an extractelement from the
; vector compare plus a test under mask instruction. This cost is modelled on
; the extractelement of i1.
define void @fun(i32* %arr, i64 %trip.count) {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 %indvars.iv
%l = load i32, i32* %arrayidx, align 4
%cmp55 = icmp sgt i32 %l, 0
br i1 %cmp55, label %if.then, label %for.inc
if.then:
%sub = sub nsw i32 0, %l
store i32 %sub, i32* %arrayidx, align 4
br label %for.inc
for.inc:
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %trip.count
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit:
ret void
; CHECK: LV: Found an estimated cost of 5 for VF 2 For instruction: br i1 %cmp55, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %exitcond, label %for.end.loopexit, label %for.body
}

View File

@ -1,2 +0,0 @@
if not 'SystemZ' in config.root.targets:
config.unsupported = True

View File

@ -1,33 +0,0 @@
; REQUIRES: asserts
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
; RUN: -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
; RUN: FileCheck %s
;
; Check that a scalarized load/store does not get a cost for insterts/
; extracts, since z13 supports element load/store.
define void @fun(i32* %data, i64 %n) {
entry:
br label %for.body
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
%tmp1 = load i32, i32* %tmp0, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, i32* %tmp0, align 4
%i.next = add nuw nsw i64 %i, 2
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end:
ret void
; CHECK: LV: Scalarizing: %tmp1 = load i32, i32* %tmp0, align 4
; CHECK: LV: Scalarizing: store i32 %tmp2, i32* %tmp0, align 4
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp2, i32* %tmp0, align 4
}

View File

@ -1,70 +0,0 @@
; REQUIRES: asserts
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
; RUN: -disable-output < %s 2>&1 | FileCheck %s
;
; Check that the loop vectorizer performs memory interleaving with accurate
; cost estimations.
; Simple case where just the load is interleaved, because the store group
; would have gaps.
define void @fun0(i32* %data, i64 %n) {
entry:
br label %for.body
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
%tmp1 = load i32, i32* %tmp0, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, i32* %tmp0, align 4
%i.next = add nuw nsw i64 %i, 2
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end:
ret void
; CHECK: LV: Creating an interleave group with: %tmp1 = load i32, i32* %tmp0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
; (vl; vl; vperm)
}
; Interleaving of both load and stores.
define void @fun1(i32* %data, i64 %n) {
entry:
br label %for.body
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
%tmp1 = load i32, i32* %tmp0, align 4
%i_1 = add i64 %i, 1
%tmp2 = getelementptr inbounds i32, i32* %data, i64 %i_1
%tmp3 = load i32, i32* %tmp2, align 4
store i32 %tmp1, i32* %tmp2, align 4
store i32 %tmp3, i32* %tmp0, align 4
%i.next = add nuw nsw i64 %i, 2
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end:
ret void
; CHECK: LV: Creating an interleave group with: store i32 %tmp3, i32* %tmp0, align 4
; CHECK: LV: Inserted: store i32 %tmp1, i32* %tmp2, align 4
; CHECK: into the interleave group with store i32 %tmp3, i32* %tmp0, align 4
; CHECK: LV: Creating an interleave group with: %tmp3 = load i32, i32* %tmp2, align 4
; CHECK: LV: Inserted: %tmp1 = load i32, i32* %tmp0, align 4
; CHECK: into the interleave group with %tmp3 = load i32, i32* %tmp2, align 4
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp2, align 4
; (vl; vl; vperm, vpkg)
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp1, i32* %tmp2, align 4
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, i32* %tmp0, align 4
; (vmrlf; vmrhf; vst; vst)
}