Imported Upstream version 5.18.0.246

Former-commit-id: 0c7ce5b1a7851e13f22acfd379b7f9fb304e4833
2019-01-23 08:21:40 +00:00
parent a7724cd563
commit 279aa8f685
28482 changed files with 3866972 additions and 44 deletions
--- a/external/llvm/test/Analysis/CostModel/ARM/cast.ll
+++ b/external/llvm/test/Analysis/CostModel/ARM/cast.ll
--- a/external/llvm/test/Analysis/CostModel/ARM/divrem.ll
+++ b/external/llvm/test/Analysis/CostModel/ARM/divrem.ll
@ -0,0 +1,450 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s
+
+define <2 x i8> @sdiv_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: sdiv_v2_i8
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @sdiv_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: sdiv_v2_i16
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @sdiv_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: sdiv_v2_i32
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @sdiv_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: sdiv_v2_i64
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @sdiv_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: sdiv_v4_i8
+  ; CHECK: cost of 10 {{.*}} sdiv
+
+  %1 = sdiv <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @sdiv_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: sdiv_v4_i16
+  ; CHECK: cost of 10 {{.*}} sdiv
+
+  %1 = sdiv <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @sdiv_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: sdiv_v4_i32
+  ; CHECK: cost of 80 {{.*}} sdiv
+
+  %1 = sdiv <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @sdiv_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: sdiv_v4_i64
+  ; CHECK: cost of 80 {{.*}} sdiv
+
+  %1 = sdiv <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @sdiv_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: sdiv_v8_i8
+  ; CHECK: cost of 10 {{.*}} sdiv
+
+  %1 = sdiv <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @sdiv_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: sdiv_v8_i16
+  ; CHECK: cost of 160 {{.*}} sdiv
+
+  %1 = sdiv <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @sdiv_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: sdiv_v8_i32
+  ; CHECK: cost of 160 {{.*}} sdiv
+
+  %1 = sdiv <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @sdiv_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: sdiv_v8_i64
+  ; CHECK: cost of 160 {{.*}} sdiv
+
+  %1 = sdiv <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @sdiv_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: sdiv_v16_i8
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @sdiv_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: sdiv_v16_i16
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @sdiv_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: sdiv_v16_i32
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @sdiv_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: sdiv_v16_i64
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
+define <2 x i8> @udiv_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: udiv_v2_i8
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @udiv_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: udiv_v2_i16
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @udiv_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: udiv_v2_i32
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @udiv_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: udiv_v2_i64
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @udiv_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: udiv_v4_i8
+  ; CHECK: cost of 10 {{.*}} udiv
+
+  %1 = udiv <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @udiv_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: udiv_v4_i16
+  ; CHECK: cost of 10 {{.*}} udiv
+
+  %1 = udiv <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @udiv_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: udiv_v4_i32
+  ; CHECK: cost of 80 {{.*}} udiv
+
+  %1 = udiv <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @udiv_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: udiv_v4_i64
+  ; CHECK: cost of 80 {{.*}} udiv
+
+  %1 = udiv <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @udiv_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: udiv_v8_i8
+  ; CHECK: cost of 10 {{.*}} udiv
+
+  %1 = udiv <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @udiv_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: udiv_v8_i16
+  ; CHECK: cost of 160 {{.*}} udiv
+
+  %1 = udiv <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @udiv_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: udiv_v8_i32
+  ; CHECK: cost of 160 {{.*}} udiv
+
+  %1 = udiv <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @udiv_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: udiv_v8_i64
+  ; CHECK: cost of 160 {{.*}} udiv
+
+  %1 = udiv <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @udiv_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: udiv_v16_i8
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @udiv_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: udiv_v16_i16
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @udiv_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: udiv_v16_i32
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @udiv_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: udiv_v16_i64
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
+define <2 x i8> @srem_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: srem_v2_i8
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @srem_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: srem_v2_i16
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @srem_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: srem_v2_i32
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @srem_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: srem_v2_i64
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @srem_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: srem_v4_i8
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @srem_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: srem_v4_i16
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @srem_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: srem_v4_i32
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @srem_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: srem_v4_i64
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @srem_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: srem_v8_i8
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @srem_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: srem_v8_i16
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @srem_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: srem_v8_i32
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @srem_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: srem_v8_i64
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @srem_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: srem_v16_i8
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @srem_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: srem_v16_i16
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @srem_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: srem_v16_i32
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @srem_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: srem_v16_i64
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
+define <2 x i8> @urem_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: urem_v2_i8
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @urem_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: urem_v2_i16
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @urem_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: urem_v2_i32
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @urem_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: urem_v2_i64
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @urem_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: urem_v4_i8
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @urem_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: urem_v4_i16
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @urem_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: urem_v4_i32
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @urem_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: urem_v4_i64
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @urem_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: urem_v8_i8
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @urem_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: urem_v8_i16
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @urem_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: urem_v8_i32
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @urem_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: urem_v8_i64
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @urem_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: urem_v16_i8
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @urem_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: urem_v16_i16
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @urem_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: urem_v16_i32
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @urem_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: urem_v16_i64
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
--- a/external/llvm/test/Analysis/CostModel/ARM/gep.ll
+++ b/external/llvm/test/Analysis/CostModel/ARM/gep.ll
@ -0,0 +1,90 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define void @test_geps(i32 %i) {
+  ; GEPs with index 0 are essentially NOOPs.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %a0 = getelementptr inbounds i8, i8* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %a1 = getelementptr inbounds i16, i16* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %a2 = getelementptr inbounds i32, i32* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %a3 = getelementptr inbounds i64, i64* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+  %a4 = getelementptr inbounds float, float* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+  %a5 = getelementptr inbounds double, double* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
+
+  ; Cost of GEPs is one if we cannot fold the address computation.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %b0 = getelementptr inbounds i8, i8* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %b1 = getelementptr inbounds i16, i16* undef, i32 1024
+  ; Thumb-2 cannot fold offset >= 2^12 into address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %b2 = getelementptr inbounds i32, i32* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %b3 = getelementptr inbounds i64, i64* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+  %b4 = getelementptr inbounds float, float* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+  %b5 = getelementptr inbounds double, double* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %c0 = getelementptr inbounds i8, i8* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %c1 = getelementptr inbounds i16, i16* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %c2 = getelementptr inbounds i32, i32* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %c3 = getelementptr inbounds i64, i64* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+  %c4 = getelementptr inbounds float, float* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+  %c5 = getelementptr inbounds double, double* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
+  ; Thumb-2 cannot fold scales larger than 8 to address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+
+  ret void
+}
--- a/external/llvm/test/Analysis/CostModel/ARM/insertelement.ll
+++ b/external/llvm/test/Analysis/CostModel/ARM/insertelement.ll
@ -0,0 +1,46 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; Multiple insert elements from loads into d subregisters are expensive on swift
+; due to renaming constraints.
+%T_i8v = type <8 x i8>
+%T_i8 = type i8
+; CHECK: insertelement_i8
+define void @insertelement_i8(%T_i8* %saddr,
+                           %T_i8v* %vaddr) {
+  %v0 = load %T_i8v, %T_i8v* %vaddr
+  %v1 = load %T_i8, %T_i8* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
+  %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
+  store %T_i8v %v2, %T_i8v* %vaddr
+  ret void
+}
+
+
+%T_i16v = type <4 x i16>
+%T_i16 = type i16
+; CHECK: insertelement_i16
+define void @insertelement_i16(%T_i16* %saddr,
+                           %T_i16v* %vaddr) {
+  %v0 = load %T_i16v, %T_i16v* %vaddr
+  %v1 = load %T_i16, %T_i16* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
+  %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
+  store %T_i16v %v2, %T_i16v* %vaddr
+  ret void
+}
+
+%T_i32v = type <2 x i32>
+%T_i32 = type i32
+; CHECK: insertelement_i32
+define void @insertelement_i32(%T_i32* %saddr,
+                           %T_i32v* %vaddr) {
+  %v0 = load %T_i32v, %T_i32v* %vaddr
+  %v1 = load %T_i32, %T_i32* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
+  %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
+  store %T_i32v %v2, %T_i32v* %vaddr
+  ret void
+}
--- a/external/llvm/test/Analysis/CostModel/ARM/lit.local.cfg
+++ b/external/llvm/test/Analysis/CostModel/ARM/lit.local.cfg
@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+
--- a/external/llvm/test/Analysis/CostModel/ARM/select.ll
+++ b/external/llvm/test/Analysis/CostModel/ARM/select.ll
@ -0,0 +1,75 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: casts
+define void @casts() {
+    ; Scalar values
+  ; CHECK: cost of 1 {{.*}} select
+  %v1 = select i1 undef, i8 undef, i8 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v2 = select i1 undef, i16 undef, i16 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v3 = select i1 undef, i32 undef, i32 undef
+  ; CHECK: cost of 2 {{.*}} select
+  %v4 = select i1 undef, i64 undef, i64 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v5 = select i1 undef, float undef, float undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v6 = select i1 undef, double undef, double undef
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} select
+  %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v8 = select <4 x i1>  undef, <4 x i8> undef, <4 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v9 = select <8 x i1>  undef, <8 x i8> undef, <8 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v10 = select <16 x i1>  undef, <16 x i8> undef, <16 x i8> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v12 = select <4 x i1>  undef, <4 x i16> undef, <4 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v13 = select <8 x i1>  undef, <8 x i16> undef, <8 x i16> undef
+  ; CHECK: cost of 2 {{.*}} select
+  %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v15 = select <4 x i1>  undef, <4 x i32> undef, <4 x i32> undef
+  ; CHECK: cost of 2 {{.*}} select
+  %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
+  ; CHECK: cost of 4 {{.*}} select
+  %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+  ; CHECK: cost of 19 {{.*}} select
+  %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+  ; CHECK: cost of 50 {{.*}} select
+  %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+  ; CHECK: cost of 100 {{.*}} select
+  %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v18 = select <4 x i1>  undef, <4 x float> undef, <4 x float> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v19 = select <2 x i1>  undef, <2 x double> undef, <2 x double> undef
+
+  ; odd vectors get legalized and should have similar costs
+  ; CHECK: cost of 1 {{.*}} select
+  %v20 = select <1 x i1>  undef, <1 x i32> undef, <1 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v21 = select <3 x i1>  undef, <3 x float> undef, <3 x float> undef
+  ; CHECK: cost of 4 {{.*}} select
+  %v22 = select <5 x i1>  undef, <5 x double> undef, <5 x double> undef
+
+  ret void
+}
--- a/external/llvm/test/Analysis/CostModel/ARM/shuffle.ll
+++ b/external/llvm/test/Analysis/CostModel/ARM/shuffle.ll
@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: shuffle
+define void @shuffle() {
+
+
+  ;; Reverse shuffles should be lowered to vrev and possibly a vext (for
+  ;; quadwords)
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ret void
+}