Imported Upstream version 5.18.0.234

Former-commit-id: 8071ec1a8c5eaa9be24b41745add19297608001f
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2019-01-08 08:22:36 +00:00
parent f32dbaf0b2
commit 212f6bafcb
28494 changed files with 359 additions and 3867025 deletions

View File

@@ -1,347 +0,0 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-ssse3 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+sse3,+ssse3 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSSE3
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
; Verify the cost model for alternate shuffles.
; shufflevector instructions with illegal 64-bit vector types.
; 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
; 64-bit packed float vectors (v2f32) are widened to type v4f32.
define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) {
%1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i32> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i32':
; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b) {
%1 = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 3>
ret <2 x float> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f32':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <2 x i32> @test_v2i32_2(<2 x i32> %a, <2 x i32> %b) {
%1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i32> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i32_2':
; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <2 x float> @test_v2f32_2(<2 x float> %a, <2 x float> %b) {
%1 = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 2, i32 1>
ret <2 x float> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f32_2':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; Test shuffles on packed vectors of two elements.
define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
%1 = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i64':
; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
%1 = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f64':
; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <2 x i64> @test_v2i64_2(<2 x i64> %a, <2 x i64> %b) {
%1 = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i64_2':
; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <2 x double> @test_v2f64_2(<2 x double> %a, <2 x double> %b) {
%1 = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
ret <2 x double> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f64_2':
; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; Test shuffles on packed vectors of four elements.
define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x i32> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i32':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x i32> @test_v4i32_2(<4 x i32> %a, <4 x i32> %b) {
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x i32> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i32_2':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f32':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x float> @test_v4f32_2(<4 x float> %a, <4 x float> %b) {
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x float> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f32_2':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
%1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x i64> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i64':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x i64> @test_v4i64_2(<4 x i64> %a, <4 x i64> %b) {
%1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x i64> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i64_2':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
%1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f64':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <4 x double> @test_v4f64_2(<4 x double> %a, <4 x double> %b) {
%1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x double> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f64_2':
; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; Test shuffles on packed vectors of eight elements.
define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
%1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x i16> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16':
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) {
%1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i16> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2':
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
%1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x i32> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i32':
; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <8 x i32> @test_v8i32_2(<8 x i32> %a, <8 x i32> %b) {
%1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i32> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i32_2':
; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
%1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8f32':
; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <8 x float> @test_v8f32_2(<8 x float> %a, <8 x float> %b) {
%1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x float> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8f32_2':
; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; Test shuffles on packed vectors of sixteen elements.
define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) {
%1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
ret <16 x i8> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8':
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
%1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
ret <16 x i8> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2':
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
%1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
ret <16 x i16> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16':
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
%1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
ret <16 x i16> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2':
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) {
%1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
ret <32 x i8> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8':
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
%1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
ret <32 x i8> %1
}
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2':
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,222 +0,0 @@
; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE2
; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE42
; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX
; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX2
; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512F
; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512vl,avx512bw,avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512BW
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE42
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512F
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512BW
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
; Verify the cost of scalar bitreverse instructions.
declare i64 @llvm.bitreverse.i64(i64)
declare i32 @llvm.bitreverse.i32(i32)
declare i16 @llvm.bitreverse.i16(i16)
declare i8 @llvm.bitreverse.i8(i8)
define i64 @var_bitreverse_i64(i64 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_i64':
; X86: Found an estimated cost of 28 for instruction: %bitreverse
; X64: Found an estimated cost of 14 for instruction: %bitreverse
; XOP: Found an estimated cost of 3 for instruction: %bitreverse
%bitreverse = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %bitreverse
}
define i32 @var_bitreverse_i32(i32 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_i32':
; X86: Found an estimated cost of 14 for instruction: %bitreverse
; X64: Found an estimated cost of 14 for instruction: %bitreverse
; XOP: Found an estimated cost of 3 for instruction: %bitreverse
%bitreverse = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %bitreverse
}
define i16 @var_bitreverse_i16(i16 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_i16':
; X86: Found an estimated cost of 14 for instruction: %bitreverse
; X64: Found an estimated cost of 14 for instruction: %bitreverse
; XOP: Found an estimated cost of 3 for instruction: %bitreverse
%bitreverse = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %bitreverse
}
define i8 @var_bitreverse_i8(i8 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_i8':
; X86: Found an estimated cost of 11 for instruction: %bitreverse
; X64: Found an estimated cost of 11 for instruction: %bitreverse
; XOP: Found an estimated cost of 3 for instruction: %bitreverse
%bitreverse = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %bitreverse
}
; Verify the cost of vector bitreverse instructions.
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
declare <32 x i16> @llvm.bitreverse.v32i16(<32 x i16>)
declare <64 x i8> @llvm.bitreverse.v64i8(<64 x i8>)
define <2 x i64> @var_bitreverse_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v2i64':
; SSE2: Found an estimated cost of 29 for instruction: %bitreverse
; SSE42: Found an estimated cost of 5 for instruction: %bitreverse
; AVX: Found an estimated cost of 5 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 1 for instruction: %bitreverse
%bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
ret <2 x i64> %bitreverse
}
define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v4i64':
; SSE2: Found an estimated cost of 58 for instruction: %bitreverse
; SSE42: Found an estimated cost of 10 for instruction: %bitreverse
; AVX: Found an estimated cost of 12 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 4 for instruction: %bitreverse
%bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
ret <4 x i64> %bitreverse
}
define <8 x i64> @var_bitreverse_v8i64(<8 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i64':
; SSE2: Found an estimated cost of 116 for instruction: %bitreverse
; SSE42: Found an estimated cost of 20 for instruction: %bitreverse
; AVX: Found an estimated cost of 24 for instruction: %bitreverse
; AVX2: Found an estimated cost of 10 for instruction: %bitreverse
; AVX512F: Found an estimated cost of 36 for instruction: %bitreverse
; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 8 for instruction: %bitreverse
%bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
ret <8 x i64> %bitreverse
}
define <4 x i32> @var_bitreverse_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v4i32':
; SSE2: Found an estimated cost of 27 for instruction: %bitreverse
; SSE42: Found an estimated cost of 5 for instruction: %bitreverse
; AVX: Found an estimated cost of 5 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 1 for instruction: %bitreverse
%bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
ret <4 x i32> %bitreverse
}
define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i32':
; SSE2: Found an estimated cost of 54 for instruction: %bitreverse
; SSE42: Found an estimated cost of 10 for instruction: %bitreverse
; AVX: Found an estimated cost of 12 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 4 for instruction: %bitreverse
%bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
ret <8 x i32> %bitreverse
}
define <16 x i32> @var_bitreverse_v16i32(<16 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i32':
; SSE2: Found an estimated cost of 108 for instruction: %bitreverse
; SSE42: Found an estimated cost of 20 for instruction: %bitreverse
; AVX: Found an estimated cost of 24 for instruction: %bitreverse
; AVX2: Found an estimated cost of 10 for instruction: %bitreverse
; AVX512F: Found an estimated cost of 24 for instruction: %bitreverse
; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 8 for instruction: %bitreverse
%bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
ret <16 x i32> %bitreverse
}
define <8 x i16> @var_bitreverse_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i16':
; SSE2: Found an estimated cost of 27 for instruction: %bitreverse
; SSE42: Found an estimated cost of 5 for instruction: %bitreverse
; AVX: Found an estimated cost of 5 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 1 for instruction: %bitreverse
%bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
ret <8 x i16> %bitreverse
}
define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i16':
; SSE2: Found an estimated cost of 54 for instruction: %bitreverse
; SSE42: Found an estimated cost of 10 for instruction: %bitreverse
; AVX: Found an estimated cost of 12 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 4 for instruction: %bitreverse
%bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
ret <16 x i16> %bitreverse
}
define <32 x i16> @var_bitreverse_v32i16(<32 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v32i16':
; SSE2: Found an estimated cost of 108 for instruction: %bitreverse
; SSE42: Found an estimated cost of 20 for instruction: %bitreverse
; AVX: Found an estimated cost of 24 for instruction: %bitreverse
; AVX2: Found an estimated cost of 10 for instruction: %bitreverse
; AVX512F: Found an estimated cost of 10 for instruction: %bitreverse
; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 8 for instruction: %bitreverse
%bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
ret <32 x i16> %bitreverse
}
define <16 x i8> @var_bitreverse_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i8':
; SSE2: Found an estimated cost of 20 for instruction: %bitreverse
; SSE42: Found an estimated cost of 5 for instruction: %bitreverse
; AVX: Found an estimated cost of 5 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 1 for instruction: %bitreverse
%bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
ret <16 x i8> %bitreverse
}
define <32 x i8> @var_bitreverse_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v32i8':
; SSE2: Found an estimated cost of 40 for instruction: %bitreverse
; SSE42: Found an estimated cost of 10 for instruction: %bitreverse
; AVX: Found an estimated cost of 12 for instruction: %bitreverse
; AVX2: Found an estimated cost of 5 for instruction: %bitreverse
; AVX512: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 4 for instruction: %bitreverse
%bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
ret <32 x i8> %bitreverse
}
define <64 x i8> @var_bitreverse_v64i8(<64 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v64i8':
; SSE2: Found an estimated cost of 80 for instruction: %bitreverse
; SSE42: Found an estimated cost of 20 for instruction: %bitreverse
; AVX: Found an estimated cost of 24 for instruction: %bitreverse
; AVX2: Found an estimated cost of 10 for instruction: %bitreverse
; AVX512F: Found an estimated cost of 10 for instruction: %bitreverse
; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse
; XOP: Found an estimated cost of 8 for instruction: %bitreverse
%bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a)
ret <64 x i8> %bitreverse
}

View File

@@ -1,82 +0,0 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE42
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX1
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
; Verify the cost of vector bswap instructions.
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
define <2 x i64> @var_bswap_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bswap_v2i64':
; SSE2: Found an estimated cost of 7 for instruction: %bswap
; SSE42: Found an estimated cost of 1 for instruction: %bswap
; AVX: Found an estimated cost of 1 for instruction: %bswap
; XOP: Found an estimated cost of 1 for instruction: %bswap
%bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
ret <2 x i64> %bswap
}
define <4 x i64> @var_bswap_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bswap_v4i64':
; SSE2: Found an estimated cost of 14 for instruction: %bswap
; SSE42: Found an estimated cost of 2 for instruction: %bswap
; AVX1: Found an estimated cost of 4 for instruction: %bswap
; AVX2: Found an estimated cost of 1 for instruction: %bswap
; XOPAVX1: Found an estimated cost of 4 for instruction: %bswap
; XOPAVX2: Found an estimated cost of 1 for instruction: %bswap
%bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
ret <4 x i64> %bswap
}
define <4 x i32> @var_bswap_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bswap_v4i32':
; SSE2: Found an estimated cost of 7 for instruction: %bswap
; SSE42: Found an estimated cost of 1 for instruction: %bswap
; AVX: Found an estimated cost of 1 for instruction: %bswap
; XOP: Found an estimated cost of 1 for instruction: %bswap
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
ret <4 x i32> %bswap
}
define <8 x i32> @var_bswap_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bswap_v8i32':
; SSE2: Found an estimated cost of 14 for instruction: %bswap
; SSE42: Found an estimated cost of 2 for instruction: %bswap
; AVX1: Found an estimated cost of 4 for instruction: %bswap
; AVX2: Found an estimated cost of 1 for instruction: %bswap
; XOPAVX1: Found an estimated cost of 4 for instruction: %bswap
; XOPAVX2: Found an estimated cost of 1 for instruction: %bswap
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
ret <8 x i32> %bswap
}
define <8 x i16> @var_bswap_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bswap_v8i16':
; SSE2: Found an estimated cost of 7 for instruction: %bswap
; SSE42: Found an estimated cost of 1 for instruction: %bswap
; AVX: Found an estimated cost of 1 for instruction: %bswap
; XOP: Found an estimated cost of 1 for instruction: %bswap
%bswap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
ret <8 x i16> %bswap
}
define <16 x i16> @var_bswap_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_bswap_v16i16':
; SSE2: Found an estimated cost of 14 for instruction: %bswap
; SSE42: Found an estimated cost of 2 for instruction: %bswap
; AVX1: Found an estimated cost of 4 for instruction: %bswap
; AVX2: Found an estimated cost of 1 for instruction: %bswap
; XOPAVX1: Found an estimated cost of 4 for instruction: %bswap
; XOPAVX2: Found an estimated cost of 1 for instruction: %bswap
%bswap = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
ret <16 x i16> %bswap
}

View File

@@ -1,258 +0,0 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define i32 @add(i32 %arg) {
; CHECK-LABEL: for function 'add'
; -- Same size registeres --
;CHECK: cost of 1 {{.*}} zext
%A = zext <4 x i1> undef to <4 x i32>
;CHECK: cost of 2 {{.*}} sext
%B = sext <4 x i1> undef to <4 x i32>
;CHECK: cost of 0 {{.*}} trunc
%C = trunc <4 x i32> undef to <4 x i1>
; -- Different size registers --
;CHECK-NOT: cost of 1 {{.*}} zext
%D = zext <8 x i1> undef to <8 x i32>
;CHECK-NOT: cost of 2 {{.*}} sext
%E = sext <8 x i1> undef to <8 x i32>
;CHECK-NOT: cost of 2 {{.*}} trunc
%F = trunc <8 x i32> undef to <8 x i1>
; -- scalars --
;CHECK: cost of 1 {{.*}} zext
%G = zext i1 undef to i32
;CHECK: cost of 0 {{.*}} trunc
%H = trunc i32 undef to i1
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}
define i32 @zext_sext(<8 x i1> %in) {
; CHECK-AVX2-LABEL: for function 'zext_sext'
; CHECK-AVX-LABEL: for function 'zext_sext'
;CHECK-AVX2: cost of 3 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%Z = zext <8 x i1> %in to <8 x i32>
;CHECK-AVX2: cost of 3 {{.*}} sext
;CHECK-AVX: cost of 7 {{.*}} sext
%S = sext <8 x i1> %in to <8 x i32>
;CHECK-AVX2: cost of 1 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%A1 = zext <16 x i8> undef to <16 x i16>
;CHECK-AVX2: cost of 1 {{.*}} sext
;CHECK-AVX: cost of 4 {{.*}} sext
%A2 = sext <16 x i8> undef to <16 x i16>
;CHECK-AVX2: cost of 1 {{.*}} sext
;CHECK-AVX: cost of 4 {{.*}} sext
%A = sext <8 x i16> undef to <8 x i32>
;CHECK-AVX2: cost of 1 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%B = zext <8 x i16> undef to <8 x i32>
;CHECK-AVX2: cost of 1 {{.*}} sext
;CHECK-AVX: cost of 4 {{.*}} sext
%C = sext <4 x i32> undef to <4 x i64>
;CHECK-AVX2: cost of 3 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%C.v8i8.z = zext <8 x i8> undef to <8 x i32>
;CHECK-AVX2: cost of 3 {{.*}} sext
;CHECK-AVX: cost of 7 {{.*}} sext
%C.v8i8.s = sext <8 x i8> undef to <8 x i32>
;CHECK-AVX2: cost of 3 {{.*}} zext
;CHECK-AVX: cost of 3 {{.*}} zext
%C.v4i16.z = zext <4 x i16> undef to <4 x i64>
;CHECK-AVX2: cost of 3 {{.*}} sext
;CHECK-AVX: cost of 6 {{.*}} sext
%C.v4i16.s = sext <4 x i16> undef to <4 x i64>
;CHECK-AVX2: cost of 3 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%C.v4i8.z = zext <4 x i8> undef to <4 x i64>
;CHECK-AVX2: cost of 3 {{.*}} sext
;CHECK-AVX: cost of 6 {{.*}} sext
%C.v4i8.s = sext <4 x i8> undef to <4 x i64>
;CHECK-AVX2: cost of 1 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%D = zext <4 x i32> undef to <4 x i64>
;CHECK-AVX512: cost of 1 {{.*}} %D1 = zext
%D1 = zext <8 x i32> undef to <8 x i64>
;CHECK-AVX512: cost of 1 {{.*}} %D2 = sext
%D2 = sext <8 x i32> undef to <8 x i64>
;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
%D3 = zext <16 x i16> undef to <16 x i32>
;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
%D4 = zext <16 x i8> undef to <16 x i32>
;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
%D5 = zext <16 x i1> undef to <16 x i32>
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%E = trunc <4 x i64> undef to <4 x i32>
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 5 {{.*}} trunc
%F = trunc <8 x i32> undef to <8 x i16>
;CHECK-AVX2: cost of 4 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%F1 = trunc <16 x i16> undef to <16 x i8>
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%F2 = trunc <8 x i32> undef to <8 x i8>
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%F3 = trunc <4 x i64> undef to <4 x i8>
;CHECK-AVX2: cost of 4 {{.*}} trunc
;CHECK-AVX: cost of 9 {{.*}} trunc
;CHECK_AVX512: cost of 1 {{.*}} G = trunc
%G = trunc <8 x i64> undef to <8 x i32>
;CHECK-AVX512: cost of 1 {{.*}} %G1 = trunc
%G1 = trunc <16 x i32> undef to <16 x i16>
;CHECK-AVX512: cost of 1 {{.*}} %G2 = trunc
%G2 = trunc <16 x i32> undef to <16 x i8>
ret i32 undef
}
define i32 @masks8(<8 x i1> %in) {
; CHECK-AVX2-LABEL: for function 'masks8'
; CHECK-AVX-LABEL: for function 'masks8'
;CHECK-AVX2: cost of 3 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%Z = zext <8 x i1> %in to <8 x i32>
;CHECK-AVX2: cost of 3 {{.*}} sext
;CHECK-AVX: cost of 7 {{.*}} sext
%S = sext <8 x i1> %in to <8 x i32>
ret i32 undef
}
define i32 @masks4(<4 x i1> %in) {
; CHECK-AVX2-LABEL: for function 'masks4'
; CHECK-AVX-LABEL: for function 'masks4'
;CHECK-AVX2: cost of 3 {{.*}} zext
;CHECK-AVX: cost of 4 {{.*}} zext
%Z = zext <4 x i1> %in to <4 x i64>
;CHECK-AVX2: cost of 3 {{.*}} sext
;CHECK-AVX: cost of 6 {{.*}} sext
%S = sext <4 x i1> %in to <4 x i64>
ret i32 undef
}
define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; CHECK-LABEL: for function 'sitofp4'
; CHECK: cost of 3 {{.*}} sitofp
%A1 = sitofp <4 x i1> %a to <4 x float>
; CHECK: cost of 3 {{.*}} sitofp
%A2 = sitofp <4 x i1> %a to <4 x double>
; CHECK: cost of 3 {{.*}} sitofp
%B1 = sitofp <4 x i8> %b to <4 x float>
; CHECK: cost of 3 {{.*}} sitofp
%B2 = sitofp <4 x i8> %b to <4 x double>
; CHECK: cost of 3 {{.*}} sitofp
%C1 = sitofp <4 x i16> %c to <4 x float>
; CHECK: cost of 3 {{.*}} sitofp
%C2 = sitofp <4 x i16> %c to <4 x double>
; CHECK: cost of 1 {{.*}} sitofp
%D1 = sitofp <4 x i32> %d to <4 x float>
; CHECK: cost of 1 {{.*}} sitofp
%D2 = sitofp <4 x i32> %d to <4 x double>
ret void
}
define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
; CHECK-LABEL: for function 'sitofp8'
; CHECK: cost of 8 {{.*}} sitofp
%A1 = sitofp <8 x i1> %a to <8 x float>
; CHECK: cost of 8 {{.*}} sitofp
%B1 = sitofp <8 x i8> %b to <8 x float>
; CHECK: cost of 5 {{.*}} sitofp
%C1 = sitofp <8 x i16> %c to <8 x float>
; CHECK: cost of 1 {{.*}} sitofp
%D1 = sitofp <8 x i32> %d to <8 x float>
ret void
}
define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; CHECK-LABEL: for function 'uitofp4'
; CHECK: cost of 7 {{.*}} uitofp
%A1 = uitofp <4 x i1> %a to <4 x float>
; CHECK: cost of 7 {{.*}} uitofp
%A2 = uitofp <4 x i1> %a to <4 x double>
; CHECK: cost of 2 {{.*}} uitofp
%B1 = uitofp <4 x i8> %b to <4 x float>
; CHECK: cost of 2 {{.*}} uitofp
%B2 = uitofp <4 x i8> %b to <4 x double>
; CHECK: cost of 2 {{.*}} uitofp
%C1 = uitofp <4 x i16> %c to <4 x float>
; CHECK: cost of 2 {{.*}} uitofp
%C2 = uitofp <4 x i16> %c to <4 x double>
; CHECK-AVX2: cost of 6 {{.*}} uitofp
%D1 = uitofp <4 x i32> %d to <4 x float>
; CHECK-AVX2: cost of 6 {{.*}} uitofp
%D2 = uitofp <4 x i32> %d to <4 x double>
ret void
}
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
; CHECK-LABEL: for function 'uitofp8'
; CHECK-AVX2: cost of 6 {{.*}} uitofp
%A1 = uitofp <8 x i1> %a to <8 x float>
; CHECK-AVX2: cost of 5 {{.*}} uitofp
; CHECK-AVX512: cost of 2 {{.*}} uitofp
%B1 = uitofp <8 x i8> %b to <8 x float>
; CHECK-AVX2: cost of 5 {{.*}} uitofp
; CHECK-AVX512: cost of 2 {{.*}} uitofp
%C1 = uitofp <8 x i16> %c to <8 x float>
; CHECK-AVX2: cost of 8 {{.*}} uitofp
; CHECK-AVX512: cost of 1 {{.*}} uitofp
; CHECK-AVX: cost of 9 {{.*}} uitofp
%D1 = uitofp <8 x i32> %d to <8 x float>
ret void
}
define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
;CHECK-LABEL: for function 'fp_conv'
; CHECK: cost of 1 {{.*}} %A1 = fpext
%A1 = fpext <4 x float> %c to <4 x double>
; CHECK-AVX: cost of 3 {{.*}} %A2 = fpext
; CHECK-AVX2: cost of 3 {{.*}} %A2 = fpext
; CHECK-AVX512: cost of 1 {{.*}} %A2 = fpext
%A2 = fpext <8 x float> %a to <8 x double>
; CHECK: cost of 1 {{.*}} %A3 = fptrunc
%A3 = fptrunc <4 x double> undef to <4 x float>
; CHECK-AVX: cost of 3 {{.*}} %A4 = fptrunc
; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
%A4 = fptrunc <8 x double> undef to <8 x float>
ret void
}

View File

@@ -1,147 +0,0 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=pentium4 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=yonah | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE3 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=penryn | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define i32 @cmp(i32 %arg) {
; -- floats --
;SSE2: cost of 3 {{.*}} fcmp
;SSE3: cost of 3 {{.*}} fcmp
;SSSE3: cost of 3 {{.*}} fcmp
;SSE41: cost of 3 {{.*}} fcmp
;SSE42: cost of 1 {{.*}} fcmp
;AVX: cost of 1 {{.*}} fcmp
%A = fcmp olt <2 x float> undef, undef
;SSE2: cost of 7 {{.*}} fcmp
;SSE3: cost of 7 {{.*}} fcmp
;SSSE3: cost of 7 {{.*}} fcmp
;SSE41: cost of 7 {{.*}} fcmp
;SSE42: cost of 1 {{.*}} fcmp
;AVX: cost of 1 {{.*}} fcmp
%B = fcmp olt <4 x float> undef, undef
;SSE2: cost of 14 {{.*}} fcmp
;SSE3: cost of 14 {{.*}} fcmp
;SSSE3: cost of 14 {{.*}} fcmp
;SSE41: cost of 14 {{.*}} fcmp
;SSE42: cost of 2 {{.*}} fcmp
;AVX: cost of 1 {{.*}} fcmp
%C = fcmp olt <8 x float> undef, undef
;SSE2: cost of 3 {{.*}} fcmp
;SSE3: cost of 3 {{.*}} fcmp
;SSSE3: cost of 3 {{.*}} fcmp
;SSE41: cost of 3 {{.*}} fcmp
;SSE42: cost of 1 {{.*}} fcmp
;AVX: cost of 1 {{.*}} fcmp
%D = fcmp olt <2 x double> undef, undef
;SSE2: cost of 6 {{.*}} fcmp
;SSE3: cost of 6 {{.*}} fcmp
;SSSE3: cost of 6 {{.*}} fcmp
;SSE41: cost of 6 {{.*}} fcmp
;SSE42: cost of 2 {{.*}} fcmp
;AVX: cost of 1 {{.*}} fcmp
%E = fcmp olt <4 x double> undef, undef
; AVX512: cost of 1 {{.*}} %E1 = fcmp
%E1 = fcmp olt <16 x float> undef, undef
; AVX512: cost of 1 {{.*}} %E2 = fcmp
%E2 = fcmp olt <8 x double> undef, undef
; AVX512: cost of 2 {{.*}} %E3 = fcmp
%E3 = fcmp olt <16 x double> undef, undef
; -- integers --
;SSE2: cost of 1 {{.*}} icmp
;SSE3: cost of 1 {{.*}} icmp
;SSSE3: cost of 1 {{.*}} icmp
;SSE41: cost of 1 {{.*}} icmp
;SSE42: cost of 1 {{.*}} icmp
;AVX: cost of 1 {{.*}} icmp
%F = icmp eq <16 x i8> undef, undef
;SSE2: cost of 1 {{.*}} icmp
;SSE3: cost of 1 {{.*}} icmp
;SSSE3: cost of 1 {{.*}} icmp
;SSE41: cost of 1 {{.*}} icmp
;SSE42: cost of 1 {{.*}} icmp
;AVX: cost of 1 {{.*}} icmp
%G = icmp eq <8 x i16> undef, undef
;SSE2: cost of 1 {{.*}} icmp
;SSE3: cost of 1 {{.*}} icmp
;SSSE3: cost of 1 {{.*}} icmp
;SSE41: cost of 1 {{.*}} icmp
;SSE42: cost of 1 {{.*}} icmp
;AVX: cost of 1 {{.*}} icmp
%H = icmp eq <4 x i32> undef, undef
;SSE2: cost of 8 {{.*}} icmp
;SSE3: cost of 8 {{.*}} icmp
;SSSE3: cost of 8 {{.*}} icmp
;SSE41: cost of 8 {{.*}} icmp
;SSE42: cost of 1 {{.*}} icmp
;AVX: cost of 1 {{.*}} icmp
%I = icmp eq <2 x i64> undef, undef
;SSE2: cost of 16 {{.*}} icmp
;SSE3: cost of 16 {{.*}} icmp
;SSSE3: cost of 16 {{.*}} icmp
;SSE41: cost of 16 {{.*}} icmp
;SSE42: cost of 2 {{.*}} icmp
;AVX1: cost of 4 {{.*}} icmp
;AVX2: cost of 1 {{.*}} icmp
%J = icmp eq <4 x i64> undef, undef
;SSE2: cost of 2 {{.*}} icmp
;SSE3: cost of 2 {{.*}} icmp
;SSSE3: cost of 2 {{.*}} icmp
;SSE41: cost of 2 {{.*}} icmp
;SSE42: cost of 2 {{.*}} icmp
;AVX1: cost of 4 {{.*}} icmp
;AVX2: cost of 1 {{.*}} icmp
%K = icmp eq <8 x i32> undef, undef
;SSE2: cost of 2 {{.*}} icmp
;SSE3: cost of 2 {{.*}} icmp
;SSSE3: cost of 2 {{.*}} icmp
;SSE41: cost of 2 {{.*}} icmp
;SSE42: cost of 2 {{.*}} icmp
;AVX1: cost of 4 {{.*}} icmp
;AVX2: cost of 1 {{.*}} icmp
%L = icmp eq <16 x i16> undef, undef
;SSE2: cost of 2 {{.*}} icmp
;SSE3: cost of 2 {{.*}} icmp
;SSSE3: cost of 2 {{.*}} icmp
;SSE41: cost of 2 {{.*}} icmp
;SSE42: cost of 2 {{.*}} icmp
;AVX1: cost of 4 {{.*}} icmp
;AVX2: cost of 1 {{.*}} icmp
%M = icmp eq <32 x i8> undef, undef
; AVX512: cost of 1 {{.*}} %M1 = icmp
%M1 = icmp eq <16 x i32> undef, undef
; AVX512: cost of 1 {{.*}} %M2 = icmp
%M2 = icmp eq <8 x i64> undef, undef
; AVX512: cost of 2 {{.*}} %M3 = icmp
%M3 = icmp eq <16 x i64> undef, undef
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}

View File

@@ -1,55 +0,0 @@
; RUN: opt < %s -cost-model -cost-kind=latency -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=LATENCY
; RUN: opt < %s -cost-model -cost-kind=code-size -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=CODESIZE
; Tests if the interface TargetTransformInfo::getInstructionCost() works correctly.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
define i64 @foo(i64 %arg) {
; LATENCY: cost of 0 {{.*}} alloca i32
; CODESIZE: cost of 0 {{.*}} alloca i32
%A1 = alloca i32, align 8
; LATENCY: cost of 1 {{.*}} alloca i64, i64 undef
; CODESIZE: cost of 1 {{.*}} alloca i64, i64 undef
%A2 = alloca i64, i64 undef, align 8
; LATENCY: cost of 1 {{.*}} %I64 = add
; CODESIZE: cost of 1 {{.*}} %I64 = add
%I64 = add i64 undef, undef
; LATENCY: cost of 4 {{.*}} load
; CODESIZE: cost of 1 {{.*}} load
load i64, i64* undef, align 4
; LATENCY: cost of 0 {{.*}} bitcast
; CODESIZE: cost of 0 {{.*}} bitcast
%BC = bitcast i8* undef to i32*
; LATENCY: cost of 0 {{.*}} inttoptr
; CODESIZE: cost of 0 {{.*}} inttoptr
%I2P = inttoptr i64 undef to i8*
; LATENCY: cost of 0 {{.*}} ptrtoint
; CODESIZE: cost of 0 {{.*}} ptrtoint
%P2I = ptrtoint i8* undef to i64
; LATENCY: cost of 0 {{.*}} trunc
; CODESIZE: cost of 0 {{.*}} trunc
%TC = trunc i64 undef to i32
; LATENCY: cost of 1 {{.*}} call
; CODESIZE: cost of 1 {{.*}} call
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; LATENCY: cost of 40 {{.*}} call void undef
; CODESIZE: cost of 1 {{.*}} call void undef
call void undef()
; LATENCY: cost of 1 {{.*}} ret
; CODESIZE: cost of 1 {{.*}} ret
ret i64 undef
}

View File

@@ -1,375 +0,0 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq,+avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512CD
; Verify the cost of scalar leading zero count instructions.
declare i64 @llvm.ctlz.i64(i64, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i16 @llvm.ctlz.i16(i16, i1)
declare i8 @llvm.ctlz.i8(i8, i1)
define i64 @var_ctlz_i64(i64 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
ret i64 %ctlz
}
define i64 @var_ctlz_i64u(i64 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
ret i64 %ctlz
}
define i32 @var_ctlz_i32(i32 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
ret i32 %ctlz
}
define i32 @var_ctlz_i32u(i32 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
ret i32 %ctlz
}
define i16 @var_ctlz_i16(i16 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0)
ret i16 %ctlz
}
define i16 @var_ctlz_i16u(i16 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1)
ret i16 %ctlz
}
define i8 @var_ctlz_i8(i8 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0)
ret i8 %ctlz
}
define i8 @var_ctlz_i8u(i8 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u':
; CHECK: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1)
ret i8 %ctlz
}
; Verify the cost of vector leading zero count instructions.
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
; SSE2: Found an estimated cost of 25 for instruction: %ctlz
; SSE42: Found an estimated cost of 23 for instruction: %ctlz
; AVX: Found an estimated cost of 23 for instruction: %ctlz
; AVX512: Found an estimated cost of 23 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
ret <2 x i64> %ctlz
}
define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
; SSE2: Found an estimated cost of 25 for instruction: %ctlz
; SSE42: Found an estimated cost of 23 for instruction: %ctlz
; AVX: Found an estimated cost of 23 for instruction: %ctlz
; AVX512: Found an estimated cost of 23 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
ret <2 x i64> %ctlz
}
define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
; SSE2: Found an estimated cost of 50 for instruction: %ctlz
; SSE42: Found an estimated cost of 46 for instruction: %ctlz
; AVX1: Found an estimated cost of 48 for instruction: %ctlz
; AVX2: Found an estimated cost of 23 for instruction: %ctlz
; AVX512: Found an estimated cost of 23 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
ret <4 x i64> %ctlz
}
define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
; SSE2: Found an estimated cost of 50 for instruction: %ctlz
; SSE42: Found an estimated cost of 46 for instruction: %ctlz
; AVX1: Found an estimated cost of 48 for instruction: %ctlz
; AVX2: Found an estimated cost of 23 for instruction: %ctlz
; AVX512: Found an estimated cost of 23 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
ret <4 x i64> %ctlz
}
define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i64':
; SSE2: Found an estimated cost of 100 for instruction: %ctlz
; SSE42: Found an estimated cost of 92 for instruction: %ctlz
; AVX1: Found an estimated cost of 96 for instruction: %ctlz
; AVX2: Found an estimated cost of 46 for instruction: %ctlz
; AVX512F: Found an estimated cost of 29 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 23 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 0)
ret <8 x i64> %ctlz
}
define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i64u':
; SSE2: Found an estimated cost of 100 for instruction: %ctlz
; SSE42: Found an estimated cost of 92 for instruction: %ctlz
; AVX1: Found an estimated cost of 96 for instruction: %ctlz
; AVX2: Found an estimated cost of 46 for instruction: %ctlz
; AVX512F: Found an estimated cost of 29 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 23 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 1)
ret <8 x i64> %ctlz
}
define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
; SSE2: Found an estimated cost of 26 for instruction: %ctlz
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX: Found an estimated cost of 18 for instruction: %ctlz
; AVX512: Found an estimated cost of 18 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
ret <4 x i32> %ctlz
}
define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
; SSE2: Found an estimated cost of 26 for instruction: %ctlz
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX: Found an estimated cost of 18 for instruction: %ctlz
; AVX512: Found an estimated cost of 18 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
ret <4 x i32> %ctlz
}
define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
; SSE2: Found an estimated cost of 52 for instruction: %ctlz
; SSE42: Found an estimated cost of 36 for instruction: %ctlz
; AVX1: Found an estimated cost of 38 for instruction: %ctlz
; AVX2: Found an estimated cost of 18 for instruction: %ctlz
; AVX512: Found an estimated cost of 18 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
ret <8 x i32> %ctlz
}
define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
; SSE2: Found an estimated cost of 52 for instruction: %ctlz
; SSE42: Found an estimated cost of 36 for instruction: %ctlz
; AVX1: Found an estimated cost of 38 for instruction: %ctlz
; AVX2: Found an estimated cost of 18 for instruction: %ctlz
; AVX512: Found an estimated cost of 18 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
ret <8 x i32> %ctlz
}
define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i32':
; SSE2: Found an estimated cost of 104 for instruction: %ctlz
; SSE42: Found an estimated cost of 72 for instruction: %ctlz
; AVX1: Found an estimated cost of 76 for instruction: %ctlz
; AVX2: Found an estimated cost of 36 for instruction: %ctlz
; AVX512F: Found an estimated cost of 35 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 22 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 0)
ret <16 x i32> %ctlz
}
define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i32u':
; SSE2: Found an estimated cost of 104 for instruction: %ctlz
; SSE42: Found an estimated cost of 72 for instruction: %ctlz
; AVX1: Found an estimated cost of 76 for instruction: %ctlz
; AVX2: Found an estimated cost of 36 for instruction: %ctlz
; AVX512F: Found an estimated cost of 35 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 22 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
%ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 1)
ret <16 x i32> %ctlz
}
define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
; SSE2: Found an estimated cost of 20 for instruction: %ctlz
; SSE42: Found an estimated cost of 14 for instruction: %ctlz
; AVX: Found an estimated cost of 14 for instruction: %ctlz
; AVX512: Found an estimated cost of 14 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
%ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %ctlz
}
define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
; SSE2: Found an estimated cost of 20 for instruction: %ctlz
; SSE42: Found an estimated cost of 14 for instruction: %ctlz
; AVX: Found an estimated cost of 14 for instruction: %ctlz
; AVX512: Found an estimated cost of 14 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
%ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %ctlz
}
define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
; SSE2: Found an estimated cost of 40 for instruction: %ctlz
; SSE42: Found an estimated cost of 28 for instruction: %ctlz
; AVX1: Found an estimated cost of 30 for instruction: %ctlz
; AVX2: Found an estimated cost of 14 for instruction: %ctlz
; AVX512: Found an estimated cost of 14 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
%ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %ctlz
}
define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
; SSE2: Found an estimated cost of 40 for instruction: %ctlz
; SSE42: Found an estimated cost of 28 for instruction: %ctlz
; AVX1: Found an estimated cost of 30 for instruction: %ctlz
; AVX2: Found an estimated cost of 14 for instruction: %ctlz
; AVX512: Found an estimated cost of 14 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
%ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %ctlz
}
define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i16':
; SSE2: Found an estimated cost of 80 for instruction: %ctlz
; SSE42: Found an estimated cost of 56 for instruction: %ctlz
; AVX1: Found an estimated cost of 60 for instruction: %ctlz
; AVX2: Found an estimated cost of 28 for instruction: %ctlz
; AVX512F: Found an estimated cost of 28 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 18 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 8 for instruction: %ctlz
%ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 0)
ret <32 x i16> %ctlz
}
define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i16u':
; SSE2: Found an estimated cost of 80 for instruction: %ctlz
; SSE42: Found an estimated cost of 56 for instruction: %ctlz
; AVX1: Found an estimated cost of 60 for instruction: %ctlz
; AVX2: Found an estimated cost of 28 for instruction: %ctlz
; AVX512F: Found an estimated cost of 28 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 18 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 8 for instruction: %ctlz
%ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 1)
ret <32 x i16> %ctlz
}
define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
; SSE2: Found an estimated cost of 17 for instruction: %ctlz
; SSE42: Found an estimated cost of 9 for instruction: %ctlz
; AVX: Found an estimated cost of 9 for instruction: %ctlz
; AVX512: Found an estimated cost of 9 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
%ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %ctlz
}
define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
; SSE2: Found an estimated cost of 17 for instruction: %ctlz
; SSE42: Found an estimated cost of 9 for instruction: %ctlz
; AVX: Found an estimated cost of 9 for instruction: %ctlz
; AVX512: Found an estimated cost of 9 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
%ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %ctlz
}
define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
; SSE2: Found an estimated cost of 34 for instruction: %ctlz
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX1: Found an estimated cost of 20 for instruction: %ctlz
; AVX2: Found an estimated cost of 9 for instruction: %ctlz
; AVX512: Found an estimated cost of 9 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 10 for instruction: %ctlz
%ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %ctlz
}
define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
; SSE2: Found an estimated cost of 34 for instruction: %ctlz
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX1: Found an estimated cost of 20 for instruction: %ctlz
; AVX2: Found an estimated cost of 9 for instruction: %ctlz
; AVX512: Found an estimated cost of 9 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 10 for instruction: %ctlz
%ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %ctlz
}
define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v64i8':
; SSE2: Found an estimated cost of 68 for instruction: %ctlz
; SSE42: Found an estimated cost of 36 for instruction: %ctlz
; AVX1: Found an estimated cost of 40 for instruction: %ctlz
; AVX2: Found an estimated cost of 18 for instruction: %ctlz
; AVX512F: Found an estimated cost of 18 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 17 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 20 for instruction: %ctlz
%ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 0)
ret <64 x i8> %ctlz
}
define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v64i8u':
; SSE2: Found an estimated cost of 68 for instruction: %ctlz
; SSE42: Found an estimated cost of 36 for instruction: %ctlz
; AVX1: Found an estimated cost of 40 for instruction: %ctlz
; AVX2: Found an estimated cost of 18 for instruction: %ctlz
; AVX512F: Found an estimated cost of 18 for instruction: %ctlz
; AVX512BW: Found an estimated cost of 17 for instruction: %ctlz
; AVX512CD: Found an estimated cost of 20 for instruction: %ctlz
%ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 1)
ret <64 x i8> %ctlz
}

View File

@@ -1,194 +0,0 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW -check-prefix=POPCNT
; Verify the cost of scalar population count instructions.
declare i64 @llvm.ctpop.i64(i64)
declare i32 @llvm.ctpop.i32(i32)
declare i16 @llvm.ctpop.i16(i16)
declare i8 @llvm.ctpop.i8(i8)
define i64 @var_ctpop_i64(i64 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64':
; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
%ctpop = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %ctpop
}
define i32 @var_ctpop_i32(i32 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32':
; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
%ctpop = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %ctpop
}
define i16 @var_ctpop_i16(i16 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16':
; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
%ctpop = call i16 @llvm.ctpop.i16(i16 %a)
ret i16 %ctpop
}
define i8 @var_ctpop_i8(i8 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8':
; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
%ctpop = call i8 @llvm.ctpop.i8(i8 %a)
ret i8 %ctpop
}
; Verify the cost of vector population count instructions.
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>)
declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
; SSE2: Found an estimated cost of 12 for instruction: %ctpop
; SSE42: Found an estimated cost of 7 for instruction: %ctpop
; AVX: Found an estimated cost of 7 for instruction: %ctpop
; AVX512: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %ctpop
}
define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
; SSE2: Found an estimated cost of 24 for instruction: %ctpop
; SSE42: Found an estimated cost of 14 for instruction: %ctpop
; AVX1: Found an estimated cost of 16 for instruction: %ctpop
; AVX2: Found an estimated cost of 7 for instruction: %ctpop
; AVX512: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
ret <4 x i64> %ctpop
}
define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i64':
; SSE2: Found an estimated cost of 48 for instruction: %ctpop
; SSE42: Found an estimated cost of 28 for instruction: %ctpop
; AVX1: Found an estimated cost of 32 for instruction: %ctpop
; AVX2: Found an estimated cost of 14 for instruction: %ctpop
; AVX512F: Found an estimated cost of 16 for instruction: %ctpop
; AVX512BW: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
ret <8 x i64> %ctpop
}
define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
; SSE2: Found an estimated cost of 15 for instruction: %ctpop
; SSE42: Found an estimated cost of 11 for instruction: %ctpop
; AVX: Found an estimated cost of 11 for instruction: %ctpop
; AVX512: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
ret <4 x i32> %ctpop
}
define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
; SSE2: Found an estimated cost of 30 for instruction: %ctpop
; SSE42: Found an estimated cost of 22 for instruction: %ctpop
; AVX1: Found an estimated cost of 24 for instruction: %ctpop
; AVX2: Found an estimated cost of 11 for instruction: %ctpop
; AVX512: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
ret <8 x i32> %ctpop
}
define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i32':
; SSE2: Found an estimated cost of 60 for instruction: %ctpop
; SSE42: Found an estimated cost of 44 for instruction: %ctpop
; AVX1: Found an estimated cost of 48 for instruction: %ctpop
; AVX2: Found an estimated cost of 22 for instruction: %ctpop
; AVX512F: Found an estimated cost of 24 for instruction: %ctpop
; AVX512BW: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
ret <16 x i32> %ctpop
}
define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
; SSE2: Found an estimated cost of 13 for instruction: %ctpop
; SSE42: Found an estimated cost of 9 for instruction: %ctpop
; AVX: Found an estimated cost of 9 for instruction: %ctpop
; AVX512: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
ret <8 x i16> %ctpop
}
define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
; SSE2: Found an estimated cost of 26 for instruction: %ctpop
; SSE42: Found an estimated cost of 18 for instruction: %ctpop
; AVX1: Found an estimated cost of 20 for instruction: %ctpop
; AVX2: Found an estimated cost of 9 for instruction: %ctpop
; AVX512: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
ret <16 x i16> %ctpop
}
define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i16':
; SSE2: Found an estimated cost of 52 for instruction: %ctpop
; SSE42: Found an estimated cost of 36 for instruction: %ctpop
; AVX1: Found an estimated cost of 40 for instruction: %ctpop
; AVX2: Found an estimated cost of 18 for instruction: %ctpop
; AVX512F: Found an estimated cost of 18 for instruction: %ctpop
; AVX512BW: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
ret <32 x i16> %ctpop
}
define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
; SSE2: Found an estimated cost of 10 for instruction: %ctpop
; SSE42: Found an estimated cost of 6 for instruction: %ctpop
; AVX: Found an estimated cost of 6 for instruction: %ctpop
; AVX512: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
ret <16 x i8> %ctpop
}
define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
; SSE2: Found an estimated cost of 20 for instruction: %ctpop
; SSE42: Found an estimated cost of 12 for instruction: %ctpop
; AVX1: Found an estimated cost of 14 for instruction: %ctpop
; AVX2: Found an estimated cost of 6 for instruction: %ctpop
; AVX512: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
ret <32 x i8> %ctpop
}
define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v64i8':
; SSE2: Found an estimated cost of 40 for instruction: %ctpop
; SSE42: Found an estimated cost of 24 for instruction: %ctpop
; AVX1: Found an estimated cost of 28 for instruction: %ctpop
; AVX2: Found an estimated cost of 12 for instruction: %ctpop
; AVX512F: Found an estimated cost of 12 for instruction: %ctpop
; AVX512BW: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
ret <64 x i8> %ctpop
}

View File

@@ -1,350 +0,0 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW
; Verify the cost of scalar trailing zero count instructions.
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i16 @llvm.cttz.i16(i16, i1)
declare i8 @llvm.cttz.i8(i8, i1)
define i64 @var_cttz_i64(i64 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
ret i64 %cttz
}
define i64 @var_cttz_i64u(i64 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
ret i64 %cttz
}
define i32 @var_cttz_i32(i32 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
ret i32 %cttz
}
define i32 @var_cttz_i32u(i32 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
ret i32 %cttz
}
define i16 @var_cttz_i16(i16 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
ret i16 %cttz
}
define i16 @var_cttz_i16u(i16 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
ret i16 %cttz
}
define i8 @var_cttz_i8(i8 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
ret i8 %cttz
}
define i8 @var_cttz_i8u(i8 %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u':
; CHECK: Found an estimated cost of 1 for instruction: %cttz
%cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
ret i8 %cttz
}
; Verify the cost of vector trailing zero count instructions.
declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
; SSE2: Found an estimated cost of 14 for instruction: %cttz
; SSE42: Found an estimated cost of 10 for instruction: %cttz
; AVX: Found an estimated cost of 10 for instruction: %cttz
; AVX512: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
ret <2 x i64> %cttz
}
define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
; SSE2: Found an estimated cost of 14 for instruction: %cttz
; SSE42: Found an estimated cost of 10 for instruction: %cttz
; AVX: Found an estimated cost of 10 for instruction: %cttz
; AVX512: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
ret <2 x i64> %cttz
}
define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
; SSE2: Found an estimated cost of 28 for instruction: %cttz
; SSE42: Found an estimated cost of 20 for instruction: %cttz
; AVX1: Found an estimated cost of 22 for instruction: %cttz
; AVX2: Found an estimated cost of 10 for instruction: %cttz
; AVX512: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
ret <4 x i64> %cttz
}
define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
; SSE2: Found an estimated cost of 28 for instruction: %cttz
; SSE42: Found an estimated cost of 20 for instruction: %cttz
; AVX1: Found an estimated cost of 22 for instruction: %cttz
; AVX2: Found an estimated cost of 10 for instruction: %cttz
; AVX512: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
ret <4 x i64> %cttz
}
define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i64':
; SSE2: Found an estimated cost of 56 for instruction: %cttz
; SSE42: Found an estimated cost of 40 for instruction: %cttz
; AVX1: Found an estimated cost of 44 for instruction: %cttz
; AVX2: Found an estimated cost of 20 for instruction: %cttz
; AVX512F: Found an estimated cost of 20 for instruction: %cttz
; AVX512BW: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
ret <8 x i64> %cttz
}
define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i64u':
; SSE2: Found an estimated cost of 56 for instruction: %cttz
; SSE42: Found an estimated cost of 40 for instruction: %cttz
; AVX1: Found an estimated cost of 44 for instruction: %cttz
; AVX2: Found an estimated cost of 20 for instruction: %cttz
; AVX512F: Found an estimated cost of 20 for instruction: %cttz
; AVX512BW: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
ret <8 x i64> %cttz
}
define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
; SSE2: Found an estimated cost of 18 for instruction: %cttz
; SSE42: Found an estimated cost of 14 for instruction: %cttz
; AVX: Found an estimated cost of 14 for instruction: %cttz
; AVX512: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
ret <4 x i32> %cttz
}
define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
; SSE2: Found an estimated cost of 18 for instruction: %cttz
; SSE42: Found an estimated cost of 14 for instruction: %cttz
; AVX: Found an estimated cost of 14 for instruction: %cttz
; AVX512: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
ret <4 x i32> %cttz
}
define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
; SSE2: Found an estimated cost of 36 for instruction: %cttz
; SSE42: Found an estimated cost of 28 for instruction: %cttz
; AVX1: Found an estimated cost of 30 for instruction: %cttz
; AVX2: Found an estimated cost of 14 for instruction: %cttz
; AVX512: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
ret <8 x i32> %cttz
}
define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
; SSE2: Found an estimated cost of 36 for instruction: %cttz
; SSE42: Found an estimated cost of 28 for instruction: %cttz
; AVX1: Found an estimated cost of 30 for instruction: %cttz
; AVX2: Found an estimated cost of 14 for instruction: %cttz
; AVX512: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
ret <8 x i32> %cttz
}
define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i32':
; SSE2: Found an estimated cost of 72 for instruction: %cttz
; SSE42: Found an estimated cost of 56 for instruction: %cttz
; AVX1: Found an estimated cost of 60 for instruction: %cttz
; AVX2: Found an estimated cost of 28 for instruction: %cttz
; AVX512F: Found an estimated cost of 28 for instruction: %cttz
; AVX512BW: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
ret <16 x i32> %cttz
}
define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i32u':
; SSE2: Found an estimated cost of 72 for instruction: %cttz
; SSE42: Found an estimated cost of 56 for instruction: %cttz
; AVX1: Found an estimated cost of 60 for instruction: %cttz
; AVX2: Found an estimated cost of 28 for instruction: %cttz
; AVX512F: Found an estimated cost of 28 for instruction: %cttz
; AVX512BW: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
ret <16 x i32> %cttz
}
define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
; SSE2: Found an estimated cost of 16 for instruction: %cttz
; SSE42: Found an estimated cost of 12 for instruction: %cttz
; AVX: Found an estimated cost of 12 for instruction: %cttz
; AVX512: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
}
define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
; SSE2: Found an estimated cost of 16 for instruction: %cttz
; SSE42: Found an estimated cost of 12 for instruction: %cttz
; AVX: Found an estimated cost of 12 for instruction: %cttz
; AVX512: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
}
define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
; SSE2: Found an estimated cost of 32 for instruction: %cttz
; SSE42: Found an estimated cost of 24 for instruction: %cttz
; AVX1: Found an estimated cost of 26 for instruction: %cttz
; AVX2: Found an estimated cost of 12 for instruction: %cttz
; AVX512: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %cttz
}
define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
; SSE2: Found an estimated cost of 32 for instruction: %cttz
; SSE42: Found an estimated cost of 24 for instruction: %cttz
; AVX1: Found an estimated cost of 26 for instruction: %cttz
; AVX2: Found an estimated cost of 12 for instruction: %cttz
; AVX512: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %cttz
}
define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i16':
; SSE2: Found an estimated cost of 64 for instruction: %cttz
; SSE42: Found an estimated cost of 48 for instruction: %cttz
; AVX1: Found an estimated cost of 52 for instruction: %cttz
; AVX2: Found an estimated cost of 24 for instruction: %cttz
; AVX512F: Found an estimated cost of 24 for instruction: %cttz
; AVX512BW: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
ret <32 x i16> %cttz
}
define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i16u':
; SSE2: Found an estimated cost of 64 for instruction: %cttz
; SSE42: Found an estimated cost of 48 for instruction: %cttz
; AVX1: Found an estimated cost of 52 for instruction: %cttz
; AVX2: Found an estimated cost of 24 for instruction: %cttz
; AVX512F: Found an estimated cost of 24 for instruction: %cttz
; AVX512BW: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
ret <32 x i16> %cttz
}
define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
; SSE2: Found an estimated cost of 13 for instruction: %cttz
; SSE42: Found an estimated cost of 9 for instruction: %cttz
; AVX: Found an estimated cost of 9 for instruction: %cttz
; AVX512: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %cttz
}
define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
; SSE2: Found an estimated cost of 13 for instruction: %cttz
; SSE42: Found an estimated cost of 9 for instruction: %cttz
; AVX: Found an estimated cost of 9 for instruction: %cttz
; AVX512: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %cttz
}
define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
; SSE2: Found an estimated cost of 26 for instruction: %cttz
; SSE42: Found an estimated cost of 18 for instruction: %cttz
; AVX1: Found an estimated cost of 20 for instruction: %cttz
; AVX2: Found an estimated cost of 9 for instruction: %cttz
; AVX512: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %cttz
}
define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
; SSE2: Found an estimated cost of 26 for instruction: %cttz
; SSE42: Found an estimated cost of 18 for instruction: %cttz
; AVX1: Found an estimated cost of 20 for instruction: %cttz
; AVX2: Found an estimated cost of 9 for instruction: %cttz
; AVX512: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %cttz
}
define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v64i8':
; SSE2: Found an estimated cost of 52 for instruction: %cttz
; SSE42: Found an estimated cost of 36 for instruction: %cttz
; AVX1: Found an estimated cost of 40 for instruction: %cttz
; AVX2: Found an estimated cost of 18 for instruction: %cttz
; AVX512F: Found an estimated cost of 18 for instruction: %cttz
; AVX512BW: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
ret <64 x i8> %cttz
}
define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v64i8u':
; SSE2: Found an estimated cost of 52 for instruction: %cttz
; SSE42: Found an estimated cost of 36 for instruction: %cttz
; AVX1: Found an estimated cost of 40 for instruction: %cttz
; AVX2: Found an estimated cost of 18 for instruction: %cttz
; AVX512F: Found an estimated cost of 18 for instruction: %cttz
; AVX512BW: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
ret <64 x i8> %cttz
}

View File

@@ -1,376 +0,0 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; CHECK-LABEL: 'sdiv'
define i32 @sdiv() {
; CHECK: cost of 1 {{.*}} %I64 = sdiv
%I64 = sdiv i64 undef, undef
; SSE: cost of 40 {{.*}} %V2i64 = sdiv
; AVX: cost of 40 {{.*}} %V2i64 = sdiv
%V2i64 = sdiv <2 x i64> undef, undef
; SSE: cost of 80 {{.*}} %V4i64 = sdiv
; AVX: cost of 80 {{.*}} %V4i64 = sdiv
%V4i64 = sdiv <4 x i64> undef, undef
; SSE: cost of 160 {{.*}} %V8i64 = sdiv
; AVX: cost of 160 {{.*}} %V8i64 = sdiv
%V8i64 = sdiv <8 x i64> undef, undef
; CHECK: cost of 1 {{.*}} %I32 = sdiv
%I32 = sdiv i32 undef, undef
; SSE: cost of 80 {{.*}} %V4i32 = sdiv
; AVX: cost of 80 {{.*}} %V4i32 = sdiv
%V4i32 = sdiv <4 x i32> undef, undef
; SSE: cost of 160 {{.*}} %V8i32 = sdiv
; AVX: cost of 160 {{.*}} %V8i32 = sdiv
%V8i32 = sdiv <8 x i32> undef, undef
; SSE: cost of 320 {{.*}} %V16i32 = sdiv
; AVX: cost of 320 {{.*}} %V16i32 = sdiv
%V16i32 = sdiv <16 x i32> undef, undef
; CHECK: cost of 1 {{.*}} %I16 = sdiv
%I16 = sdiv i16 undef, undef
; SSE: cost of 160 {{.*}} %V8i16 = sdiv
; AVX: cost of 160 {{.*}} %V8i16 = sdiv
%V8i16 = sdiv <8 x i16> undef, undef
; SSE: cost of 320 {{.*}} %V16i16 = sdiv
; AVX: cost of 320 {{.*}} %V16i16 = sdiv
%V16i16 = sdiv <16 x i16> undef, undef
; SSE: cost of 640 {{.*}} %V32i16 = sdiv
; AVX: cost of 640 {{.*}} %V32i16 = sdiv
%V32i16 = sdiv <32 x i16> undef, undef
; CHECK: cost of 1 {{.*}} %I8 = sdiv
%I8 = sdiv i8 undef, undef
; SSE: cost of 320 {{.*}} %V16i8 = sdiv
; AVX: cost of 320 {{.*}} %V16i8 = sdiv
%V16i8 = sdiv <16 x i8> undef, undef
; SSE: cost of 640 {{.*}} %V32i8 = sdiv
; AVX: cost of 640 {{.*}} %V32i8 = sdiv
%V32i8 = sdiv <32 x i8> undef, undef
; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
%V64i8 = sdiv <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'udiv'
define i32 @udiv() {
; CHECK: cost of 1 {{.*}} %I64 = udiv
%I64 = udiv i64 undef, undef
; SSE: cost of 40 {{.*}} %V2i64 = udiv
; AVX: cost of 40 {{.*}} %V2i64 = udiv
%V2i64 = udiv <2 x i64> undef, undef
; SSE: cost of 80 {{.*}} %V4i64 = udiv
; AVX: cost of 80 {{.*}} %V4i64 = udiv
%V4i64 = udiv <4 x i64> undef, undef
; SSE: cost of 160 {{.*}} %V8i64 = udiv
; AVX: cost of 160 {{.*}} %V8i64 = udiv
%V8i64 = udiv <8 x i64> undef, undef
; CHECK: cost of 1 {{.*}} %I32 = udiv
%I32 = udiv i32 undef, undef
; SSE: cost of 80 {{.*}} %V4i32 = udiv
; AVX: cost of 80 {{.*}} %V4i32 = udiv
%V4i32 = udiv <4 x i32> undef, undef
; SSE: cost of 160 {{.*}} %V8i32 = udiv
; AVX: cost of 160 {{.*}} %V8i32 = udiv
%V8i32 = udiv <8 x i32> undef, undef
; SSE: cost of 320 {{.*}} %V16i32 = udiv
; AVX: cost of 320 {{.*}} %V16i32 = udiv
%V16i32 = udiv <16 x i32> undef, undef
; CHECK: cost of 1 {{.*}} %I16 = udiv
%I16 = udiv i16 undef, undef
; SSE: cost of 160 {{.*}} %V8i16 = udiv
; AVX: cost of 160 {{.*}} %V8i16 = udiv
%V8i16 = udiv <8 x i16> undef, undef
; SSE: cost of 320 {{.*}} %V16i16 = udiv
; AVX: cost of 320 {{.*}} %V16i16 = udiv
%V16i16 = udiv <16 x i16> undef, undef
; SSE: cost of 640 {{.*}} %V32i16 = udiv
; AVX: cost of 640 {{.*}} %V32i16 = udiv
%V32i16 = udiv <32 x i16> undef, undef
; CHECK: cost of 1 {{.*}} %I8 = udiv
%I8 = udiv i8 undef, undef
; SSE: cost of 320 {{.*}} %V16i8 = udiv
; AVX: cost of 320 {{.*}} %V16i8 = udiv
%V16i8 = udiv <16 x i8> undef, undef
; SSE: cost of 640 {{.*}} %V32i8 = udiv
; AVX: cost of 640 {{.*}} %V32i8 = udiv
%V32i8 = udiv <32 x i8> undef, undef
; SSE: cost of 1280 {{.*}} %V64i8 = udiv
; AVX: cost of 1280 {{.*}} %V64i8 = udiv
%V64i8 = udiv <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'sdiv_uniformconst'
define i32 @sdiv_uniformconst() {
; CHECK: cost of 1 {{.*}} %I64 = sdiv
%I64 = sdiv i64 undef, 7
; SSE: cost of 40 {{.*}} %V2i64 = sdiv
; AVX: cost of 40 {{.*}} %V2i64 = sdiv
%V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
; SSE: cost of 80 {{.*}} %V4i64 = sdiv
; AVX: cost of 80 {{.*}} %V4i64 = sdiv
%V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
; SSE: cost of 160 {{.*}} %V8i64 = sdiv
; AVX: cost of 160 {{.*}} %V8i64 = sdiv
%V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
; CHECK: cost of 1 {{.*}} %I32 = sdiv
%I32 = sdiv i32 undef, 7
; SSE2: cost of 19 {{.*}} %V4i32 = sdiv
; SSSE3: cost of 19 {{.*}} %V4i32 = sdiv
; SSE42: cost of 15 {{.*}} %V4i32 = sdiv
; AVX: cost of 15 {{.*}} %V4i32 = sdiv
%V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
; AVX1: cost of 32 {{.*}} %V8i32 = sdiv
; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
%V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
; AVX1: cost of 64 {{.*}} %V16i32 = sdiv
; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
%V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; CHECK: cost of 1 {{.*}} %I16 = sdiv
%I16 = sdiv i16 undef, 7
; SSE: cost of 6 {{.*}} %V8i16 = sdiv
; AVX: cost of 6 {{.*}} %V8i16 = sdiv
%V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 12 {{.*}} %V16i16 = sdiv
; AVX1: cost of 14 {{.*}} %V16i16 = sdiv
; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
%V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 24 {{.*}} %V32i16 = sdiv
; AVX1: cost of 28 {{.*}} %V32i16 = sdiv
; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
%V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; CHECK: cost of 1 {{.*}} %I8 = sdiv
%I8 = sdiv i8 undef, 7
; SSE: cost of 320 {{.*}} %V16i8 = sdiv
; AVX: cost of 320 {{.*}} %V16i8 = sdiv
%V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
; SSE: cost of 640 {{.*}} %V32i8 = sdiv
; AVX: cost of 640 {{.*}} %V32i8 = sdiv
%V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
%V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret i32 undef
}
; CHECK-LABEL: 'udiv_uniformconst'
define i32 @udiv_uniformconst() {
; CHECK: cost of 1 {{.*}} %I64 = udiv
%I64 = udiv i64 undef, 7
; SSE: cost of 40 {{.*}} %V2i64 = udiv
; AVX: cost of 40 {{.*}} %V2i64 = udiv
%V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
; SSE: cost of 80 {{.*}} %V4i64 = udiv
; AVX: cost of 80 {{.*}} %V4i64 = udiv
%V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
; SSE: cost of 160 {{.*}} %V8i64 = udiv
; AVX: cost of 160 {{.*}} %V8i64 = udiv
%V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
; CHECK: cost of 1 {{.*}} %I32 = udiv
%I32 = udiv i32 undef, 7
; SSE: cost of 15 {{.*}} %V4i32 = udiv
; AVX: cost of 15 {{.*}} %V4i32 = udiv
%V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
; SSE: cost of 30 {{.*}} %V8i32 = udiv
; AVX1: cost of 32 {{.*}} %V8i32 = udiv
; AVX2: cost of 15 {{.*}} %V8i32 = udiv
; AVX512: cost of 15 {{.*}} %V8i32 = udiv
%V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; SSE: cost of 60 {{.*}} %V16i32 = udiv
; AVX1: cost of 64 {{.*}} %V16i32 = udiv
; AVX2: cost of 30 {{.*}} %V16i32 = udiv
; AVX512: cost of 15 {{.*}} %V16i32 = udiv
%V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; CHECK: cost of 1 {{.*}} %I16 = udiv
%I16 = udiv i16 undef, 7
; SSE: cost of 6 {{.*}} %V8i16 = udiv
; AVX: cost of 6 {{.*}} %V8i16 = udiv
%V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 12 {{.*}} %V16i16 = udiv
; AVX1: cost of 14 {{.*}} %V16i16 = udiv
; AVX2: cost of 6 {{.*}} %V16i16 = udiv
; AVX512: cost of 6 {{.*}} %V16i16 = udiv
%V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 24 {{.*}} %V32i16 = udiv
; AVX1: cost of 28 {{.*}} %V32i16 = udiv
; AVX2: cost of 12 {{.*}} %V32i16 = udiv
; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
%V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; CHECK: cost of 1 {{.*}} %I8 = udiv
%I8 = udiv i8 undef, 7
; SSE: cost of 320 {{.*}} %V16i8 = udiv
; AVX: cost of 320 {{.*}} %V16i8 = udiv
%V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
; SSE: cost of 640 {{.*}} %V32i8 = udiv
; AVX: cost of 640 {{.*}} %V32i8 = udiv
%V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
; SSE: cost of 1280 {{.*}} %V64i8 = udiv
; AVX: cost of 1280 {{.*}} %V64i8 = udiv
%V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret i32 undef
}
; CHECK-LABEL: 'sdiv_uniformconstpow2'
define i32 @sdiv_uniformconstpow2() {
; CHECK: cost of 1 {{.*}} %I64 = sdiv
%I64 = sdiv i64 undef, 16
; SSE: cost of 40 {{.*}} %V2i64 = sdiv
; AVX: cost of 40 {{.*}} %V2i64 = sdiv
%V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
; SSE: cost of 80 {{.*}} %V4i64 = sdiv
; AVX: cost of 80 {{.*}} %V4i64 = sdiv
%V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
; SSE: cost of 160 {{.*}} %V8i64 = sdiv
; AVX: cost of 160 {{.*}} %V8i64 = sdiv
%V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
; CHECK: cost of 1 {{.*}} %I32 = sdiv
%I32 = sdiv i32 undef, 16
; SSE2: cost of 19 {{.*}} %V4i32 = sdiv
; SSSE3: cost of 19 {{.*}} %V4i32 = sdiv
; SSE42: cost of 15 {{.*}} %V4i32 = sdiv
; AVX: cost of 15 {{.*}} %V4i32 = sdiv
%V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
; AVX1: cost of 32 {{.*}} %V8i32 = sdiv
; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
%V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
; AVX1: cost of 64 {{.*}} %V16i32 = sdiv
; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
%V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK: cost of 1 {{.*}} %I16 = sdiv
%I16 = sdiv i16 undef, 16
; SSE: cost of 6 {{.*}} %V8i16 = sdiv
; AVX: cost of 6 {{.*}} %V8i16 = sdiv
%V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 12 {{.*}} %V16i16 = sdiv
; AVX1: cost of 14 {{.*}} %V16i16 = sdiv
; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
%V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 24 {{.*}} %V32i16 = sdiv
; AVX1: cost of 28 {{.*}} %V32i16 = sdiv
; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
%V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; CHECK: cost of 1 {{.*}} %I8 = sdiv
%I8 = sdiv i8 undef, 16
; SSE: cost of 320 {{.*}} %V16i8 = sdiv
; AVX: cost of 320 {{.*}} %V16i8 = sdiv
%V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
; SSE: cost of 640 {{.*}} %V32i8 = sdiv
; AVX: cost of 640 {{.*}} %V32i8 = sdiv
%V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
%V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
ret i32 undef
}
; CHECK-LABEL: 'udiv_uniformconstpow2'
define i32 @udiv_uniformconstpow2() {
; CHECK: cost of 1 {{.*}} %I64 = udiv
%I64 = udiv i64 undef, 16
; SSE: cost of 40 {{.*}} %V2i64 = udiv
; AVX: cost of 40 {{.*}} %V2i64 = udiv
%V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
; SSE: cost of 80 {{.*}} %V4i64 = udiv
; AVX: cost of 80 {{.*}} %V4i64 = udiv
%V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
; SSE: cost of 160 {{.*}} %V8i64 = udiv
; AVX: cost of 160 {{.*}} %V8i64 = udiv
%V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
; CHECK: cost of 1 {{.*}} %I32 = udiv
%I32 = udiv i32 undef, 16
; SSE: cost of 15 {{.*}} %V4i32 = udiv
; AVX: cost of 15 {{.*}} %V4i32 = udiv
%V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
; SSE: cost of 30 {{.*}} %V8i32 = udiv
; AVX1: cost of 32 {{.*}} %V8i32 = udiv
; AVX2: cost of 15 {{.*}} %V8i32 = udiv
; AVX512: cost of 15 {{.*}} %V8i32 = udiv
%V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; SSE: cost of 60 {{.*}} %V16i32 = udiv
; AVX1: cost of 64 {{.*}} %V16i32 = udiv
; AVX2: cost of 30 {{.*}} %V16i32 = udiv
; AVX512: cost of 15 {{.*}} %V16i32 = udiv
%V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK: cost of 1 {{.*}} %I16 = udiv
%I16 = udiv i16 undef, 16
; SSE: cost of 6 {{.*}} %V8i16 = udiv
; AVX: cost of 6 {{.*}} %V8i16 = udiv
%V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 12 {{.*}} %V16i16 = udiv
; AVX1: cost of 14 {{.*}} %V16i16 = udiv
; AVX2: cost of 6 {{.*}} %V16i16 = udiv
; AVX512: cost of 6 {{.*}} %V16i16 = udiv
%V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 24 {{.*}} %V32i16 = udiv
; AVX1: cost of 28 {{.*}} %V32i16 = udiv
; AVX2: cost of 12 {{.*}} %V32i16 = udiv
; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
%V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; CHECK: cost of 1 {{.*}} %I8 = udiv
%I8 = udiv i8 undef, 16
; SSE: cost of 320 {{.*}} %V16i8 = udiv
; AVX: cost of 320 {{.*}} %V16i8 = udiv
%V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
; SSE: cost of 640 {{.*}} %V32i8 = udiv
; AVX: cost of 640 {{.*}} %V32i8 = udiv
%V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
; SSE: cost of 1280 {{.*}} %V64i8 = udiv
; AVX: cost of 1280 {{.*}} %V64i8 = udiv
%V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
ret i32 undef
}

View File

@@ -1,261 +0,0 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE42 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512F %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512DQ %s
; CHECK-LABEL: 'fptosi_double_i64'
define i32 @fptosi_double_i64(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I64 = fptosi
; SSE42: cost of 1 {{.*}} %I64 = fptosi
; AVX1: cost of 1 {{.*}} %I64 = fptosi
; AVX2: cost of 1 {{.*}} %I64 = fptosi
; AVX512: cost of 1 {{.*}} %I64 = fptosi
%I64 = fptosi double undef to i64
; SSE2: cost of 6 {{.*}} %V2I64 = fptosi
; SSE42: cost of 6 {{.*}} %V2I64 = fptosi
; AVX1: cost of 6 {{.*}} %V2I64 = fptosi
; AVX2: cost of 6 {{.*}} %V2I64 = fptosi
; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi
%V2I64 = fptosi <2 x double> undef to <2 x i64>
; SSE2: cost of 13 {{.*}} %V4I64 = fptosi
; SSE42: cost of 13 {{.*}} %V4I64 = fptosi
; AVX1: cost of 12 {{.*}} %V4I64 = fptosi
; AVX2: cost of 12 {{.*}} %V4I64 = fptosi
; AVX512F: cost of 12 {{.*}} %V4I64 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptosi
%V4I64 = fptosi <4 x double> undef to <4 x i64>
; SSE2: cost of 27 {{.*}} %V8I64 = fptosi
; SSE42: cost of 27 {{.*}} %V8I64 = fptosi
; AVX1: cost of 25 {{.*}} %V8I64 = fptosi
; AVX2: cost of 25 {{.*}} %V8I64 = fptosi
; AVX512F: cost of 24 {{.*}} %V8I64 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptosi
%V8I64 = fptosi <8 x double> undef to <8 x i64>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_double_i32'
define i32 @fptosi_double_i32(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I32 = fptosi
; SSE42: cost of 1 {{.*}} %I32 = fptosi
; AVX1: cost of 1 {{.*}} %I32 = fptosi
; AVX2: cost of 1 {{.*}} %I32 = fptosi
; AVX512: cost of 1 {{.*}} %I32 = fptosi
%I32 = fptosi double undef to i32
; SSE2: cost of 3 {{.*}} %V2I32 = fptosi
; SSE42: cost of 3 {{.*}} %V2I32 = fptosi
; AVX1: cost of 3 {{.*}} %V2I32 = fptosi
; AVX2: cost of 3 {{.*}} %V2I32 = fptosi
; AVX512: cost of 3 {{.*}} %V2I32 = fptosi
%V2I32 = fptosi <2 x double> undef to <2 x i32>
; SSE2: cost of 7 {{.*}} %V4I32 = fptosi
; SSE42: cost of 7 {{.*}} %V4I32 = fptosi
; AVX1: cost of 1 {{.*}} %V4I32 = fptosi
; AVX2: cost of 1 {{.*}} %V4I32 = fptosi
; AVX512: cost of 1 {{.*}} %V4I32 = fptosi
%V4I32 = fptosi <4 x double> undef to <4 x i32>
; SSE2: cost of 15 {{.*}} %V8I32 = fptosi
; SSE42: cost of 15 {{.*}} %V8I32 = fptosi
; AVX1: cost of 3 {{.*}} %V8I32 = fptosi
; AVX2: cost of 3 {{.*}} %V8I32 = fptosi
; AVX512: cost of 1 {{.*}} %V8I32 = fptosi
%V8I32 = fptosi <8 x double> undef to <8 x i32>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_double_i16'
define i32 @fptosi_double_i16(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I16 = fptosi
; SSE42: cost of 1 {{.*}} %I16 = fptosi
; AVX1: cost of 1 {{.*}} %I16 = fptosi
; AVX2: cost of 1 {{.*}} %I16 = fptosi
; AVX512: cost of 1 {{.*}} %I16 = fptosi
%I16 = fptosi double undef to i16
; SSE2: cost of 6 {{.*}} %V2I16 = fptosi
; SSE42: cost of 6 {{.*}} %V2I16 = fptosi
; AVX1: cost of 6 {{.*}} %V2I16 = fptosi
; AVX2: cost of 6 {{.*}} %V2I16 = fptosi
; AVX512F: cost of 6 {{.*}} %V2I16 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptosi
%V2I16 = fptosi <2 x double> undef to <2 x i16>
; SSE2: cost of 13 {{.*}} %V4I16 = fptosi
; SSE42: cost of 13 {{.*}} %V4I16 = fptosi
; AVX1: cost of 1 {{.*}} %V4I16 = fptosi
; AVX2: cost of 1 {{.*}} %V4I16 = fptosi
; AVX512: cost of 1 {{.*}} %V4I16 = fptosi
%V4I16 = fptosi <4 x double> undef to <4 x i16>
; SSE2: cost of 27 {{.*}} %V8I16 = fptosi
; SSE42: cost of 27 {{.*}} %V8I16 = fptosi
; AVX1: cost of 3 {{.*}} %V8I16 = fptosi
; AVX2: cost of 3 {{.*}} %V8I16 = fptosi
; AVX512: cost of 1 {{.*}} %V8I16 = fptosi
%V8I16 = fptosi <8 x double> undef to <8 x i16>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_double_i8'
define i32 @fptosi_double_i8(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I8 = fptosi
; SSE42: cost of 1 {{.*}} %I8 = fptosi
; AVX1: cost of 1 {{.*}} %I8 = fptosi
; AVX2: cost of 1 {{.*}} %I8 = fptosi
; AVX512: cost of 1 {{.*}} %I8 = fptosi
%I8 = fptosi double undef to i8
; SSE2: cost of 6 {{.*}} %V2I8 = fptosi
; SSE42: cost of 6 {{.*}} %V2I8 = fptosi
; AVX1: cost of 6 {{.*}} %V2I8 = fptosi
; AVX2: cost of 6 {{.*}} %V2I8 = fptosi
; AVX512F: cost of 6 {{.*}} %V2I8 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptosi
%V2I8 = fptosi <2 x double> undef to <2 x i8>
; SSE2: cost of 13 {{.*}} %V4I8 = fptosi
; SSE42: cost of 13 {{.*}} %V4I8 = fptosi
; AVX1: cost of 1 {{.*}} %V4I8 = fptosi
; AVX2: cost of 1 {{.*}} %V4I8 = fptosi
; AVX512: cost of 1 {{.*}} %V4I8 = fptosi
%V4I8 = fptosi <4 x double> undef to <4 x i8>
; SSE2: cost of 27 {{.*}} %V8I8 = fptosi
; SSE42: cost of 27 {{.*}} %V8I8 = fptosi
; AVX1: cost of 3 {{.*}} %V8I8 = fptosi
; AVX2: cost of 3 {{.*}} %V8I8 = fptosi
; AVX512: cost of 1 {{.*}} %V8I8 = fptosi
%V8I8 = fptosi <8 x double> undef to <8 x i8>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_float_i64'
define i32 @fptosi_float_i64(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I64 = fptosi
; SSE42: cost of 1 {{.*}} %I64 = fptosi
; AVX1: cost of 1 {{.*}} %I64 = fptosi
; AVX2: cost of 1 {{.*}} %I64 = fptosi
; AVX512: cost of 1 {{.*}} %I64 = fptosi
%I64 = fptosi float undef to i64
; SSE2: cost of 6 {{.*}} %V2I64 = fptosi
; SSE42: cost of 6 {{.*}} %V2I64 = fptosi
; AVX1: cost of 6 {{.*}} %V2I64 = fptosi
; AVX2: cost of 6 {{.*}} %V2I64 = fptosi
; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi
%V2I64 = fptosi <2 x float> undef to <2 x i64>
; SSE2: cost of 13 {{.*}} %V4I64 = fptosi
; SSE42: cost of 13 {{.*}} %V4I64 = fptosi
; AVX1: cost of 12 {{.*}} %V4I64 = fptosi
; AVX2: cost of 12 {{.*}} %V4I64 = fptosi
; AVX512F: cost of 12 {{.*}} %V4I64 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptosi
%V4I64 = fptosi <4 x float> undef to <4 x i64>
; SSE2: cost of 27 {{.*}} %V8I64 = fptosi
; SSE42: cost of 27 {{.*}} %V8I64 = fptosi
; AVX1: cost of 25 {{.*}} %V8I64 = fptosi
; AVX2: cost of 25 {{.*}} %V8I64 = fptosi
; AVX512F: cost of 24 {{.*}} %V8I64 = fptosi
; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptosi
%V8I64 = fptosi <8 x float> undef to <8 x i64>
; SSE2: cost of 55 {{.*}} %V16I64 = fptosi
; SSE42: cost of 55 {{.*}} %V16I64 = fptosi
; AVX1: cost of 51 {{.*}} %V16I64 = fptosi
; AVX2: cost of 51 {{.*}} %V16I64 = fptosi
; AVX512F: cost of 49 {{.*}} %V16I64 = fptosi
; AVX512DQ: cost of 3 {{.*}} %V16I64 = fptosi
%V16I64 = fptosi <16 x float> undef to <16 x i64>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_float_i32'
define i32 @fptosi_float_i32(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I32 = fptosi
; SSE42: cost of 1 {{.*}} %I32 = fptosi
; AVX1: cost of 1 {{.*}} %I32 = fptosi
; AVX2: cost of 1 {{.*}} %I32 = fptosi
; AVX512: cost of 1 {{.*}} %I32 = fptosi
%I32 = fptosi float undef to i32
; SSE2: cost of 1 {{.*}} %V4I32 = fptosi
; SSE42: cost of 1 {{.*}} %V4I32 = fptosi
; AVX1: cost of 1 {{.*}} %V4I32 = fptosi
; AVX2: cost of 1 {{.*}} %V4I32 = fptosi
; AVX512: cost of 1 {{.*}} %V4I32 = fptosi
%V4I32 = fptosi <4 x float> undef to <4 x i32>
; SSE2: cost of 1 {{.*}} %V8I32 = fptosi
; SSE42: cost of 1 {{.*}} %V8I32 = fptosi
; AVX1: cost of 1 {{.*}} %V8I32 = fptosi
; AVX2: cost of 1 {{.*}} %V8I32 = fptosi
; AVX512: cost of 1 {{.*}} %V8I32 = fptosi
%V8I32 = fptosi <8 x float> undef to <8 x i32>
; SSE2: cost of 1 {{.*}} %V16I32 = fptosi
; SSE42: cost of 1 {{.*}} %V16I32 = fptosi
; AVX1: cost of 1 {{.*}} %V16I32 = fptosi
; AVX2: cost of 1 {{.*}} %V16I32 = fptosi
; AVX512: cost of 1 {{.*}} %V16I32 = fptosi
%V16I32 = fptosi <16 x float> undef to <16 x i32>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_float_i16'
define i32 @fptosi_float_i16(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I16 = fptosi
; SSE42: cost of 1 {{.*}} %I16 = fptosi
; AVX1: cost of 1 {{.*}} %I16 = fptosi
; AVX2: cost of 1 {{.*}} %I16 = fptosi
; AVX512: cost of 1 {{.*}} %I16 = fptosi
%I16 = fptosi float undef to i16
; SSE2: cost of 1 {{.*}} %V4I16 = fptosi
; SSE42: cost of 1 {{.*}} %V4I16 = fptosi
; AVX1: cost of 1 {{.*}} %V4I16 = fptosi
; AVX2: cost of 1 {{.*}} %V4I16 = fptosi
; AVX512: cost of 1 {{.*}} %V4I16 = fptosi
%V4I16 = fptosi <4 x float> undef to <4 x i16>
; SSE2: cost of 3 {{.*}} %V8I16 = fptosi
; SSE42: cost of 3 {{.*}} %V8I16 = fptosi
; AVX1: cost of 1 {{.*}} %V8I16 = fptosi
; AVX2: cost of 1 {{.*}} %V8I16 = fptosi
; AVX512: cost of 1 {{.*}} %V8I16 = fptosi
%V8I16 = fptosi <8 x float> undef to <8 x i16>
; SSE2: cost of 7 {{.*}} %V16I16 = fptosi
; SSE42: cost of 7 {{.*}} %V16I16 = fptosi
; AVX1: cost of 3 {{.*}} %V16I16 = fptosi
; AVX2: cost of 3 {{.*}} %V16I16 = fptosi
; AVX512: cost of 1 {{.*}} %V16I16 = fptosi
%V16I16 = fptosi <16 x float> undef to <16 x i16>
ret i32 undef
}
; CHECK-LABEL: 'fptosi_float_i8'
define i32 @fptosi_float_i8(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I8 = fptosi
; SSE42: cost of 1 {{.*}} %I8 = fptosi
; AVX1: cost of 1 {{.*}} %I8 = fptosi
; AVX2: cost of 1 {{.*}} %I8 = fptosi
; AVX512: cost of 1 {{.*}} %I8 = fptosi
%I8 = fptosi float undef to i8
; SSE2: cost of 1 {{.*}} %V4I8 = fptosi
; SSE42: cost of 1 {{.*}} %V4I8 = fptosi
; AVX1: cost of 1 {{.*}} %V4I8 = fptosi
; AVX2: cost of 1 {{.*}} %V4I8 = fptosi
; AVX512: cost of 1 {{.*}} %V4I8 = fptosi
%V4I8 = fptosi <4 x float> undef to <4 x i8>
; SSE2: cost of 3 {{.*}} %V8I8 = fptosi
; SSE42: cost of 3 {{.*}} %V8I8 = fptosi
; AVX1: cost of 7 {{.*}} %V8I8 = fptosi
; AVX2: cost of 7 {{.*}} %V8I8 = fptosi
; AVX512: cost of 7 {{.*}} %V8I8 = fptosi
%V8I8 = fptosi <8 x float> undef to <8 x i8>
; SSE2: cost of 7 {{.*}} %V16I8 = fptosi
; SSE42: cost of 7 {{.*}} %V16I8 = fptosi
; AVX1: cost of 15 {{.*}} %V16I8 = fptosi
; AVX2: cost of 15 {{.*}} %V16I8 = fptosi
; AVX512: cost of 1 {{.*}} %V16I8 = fptosi
%V16I8 = fptosi <16 x float> undef to <16 x i8>
ret i32 undef
}

View File

@@ -1,262 +0,0 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE42 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512F %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512DQ %s
; CHECK-LABEL: 'fptoui_double_i64'
define i32 @fptoui_double_i64(i32 %arg) {
; SSE2: cost of 4 {{.*}} %I64 = fptoui
; SSE42: cost of 4 {{.*}} %I64 = fptoui
; AVX1: cost of 4 {{.*}} %I64 = fptoui
; AVX2: cost of 4 {{.*}} %I64 = fptoui
; AVX512: cost of 1 {{.*}} %I64 = fptoui
%I64 = fptoui double undef to i64
; SSE2: cost of 12 {{.*}} %V2I64 = fptoui
; SSE42: cost of 12 {{.*}} %V2I64 = fptoui
; AVX1: cost of 12 {{.*}} %V2I64 = fptoui
; AVX2: cost of 12 {{.*}} %V2I64 = fptoui
; AVX512F: cost of 6 {{.*}} %V2I64 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptoui
%V2I64 = fptoui <2 x double> undef to <2 x i64>
; SSE2: cost of 25 {{.*}} %V4I64 = fptoui
; SSE42: cost of 25 {{.*}} %V4I64 = fptoui
; AVX1: cost of 24 {{.*}} %V4I64 = fptoui
; AVX2: cost of 24 {{.*}} %V4I64 = fptoui
; AVX512F: cost of 12 {{.*}} %V4I64 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptoui
%V4I64 = fptoui <4 x double> undef to <4 x i64>
; SSE2: cost of 51 {{.*}} %V8I64 = fptoui
; SSE42: cost of 51 {{.*}} %V8I64 = fptoui
; AVX1: cost of 49 {{.*}} %V8I64 = fptoui
; AVX2: cost of 49 {{.*}} %V8I64 = fptoui
; AVX512F: cost of 24 {{.*}} %V8I64 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptoui
%V8I64 = fptoui <8 x double> undef to <8 x i64>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_double_i32'
define i32 @fptoui_double_i32(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I32 = fptoui
; SSE42: cost of 1 {{.*}} %I32 = fptoui
; AVX1: cost of 1 {{.*}} %I32 = fptoui
; AVX2: cost of 1 {{.*}} %I32 = fptoui
; AVX512: cost of 1 {{.*}} %I32 = fptoui
%I32 = fptoui double undef to i32
; SSE2: cost of 6 {{.*}} %V2I32 = fptoui
; SSE42: cost of 6 {{.*}} %V2I32 = fptoui
; AVX1: cost of 6 {{.*}} %V2I32 = fptoui
; AVX2: cost of 6 {{.*}} %V2I32 = fptoui
; AVX512F: cost of 6 {{.*}} %V2I32 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V2I32 = fptoui
%V2I32 = fptoui <2 x double> undef to <2 x i32>
; SSE2: cost of 13 {{.*}} %V4I32 = fptoui
; SSE42: cost of 13 {{.*}} %V4I32 = fptoui
; AVX1: cost of 16 {{.*}} %V4I32 = fptoui
; AVX2: cost of 16 {{.*}} %V4I32 = fptoui
; AVX512: cost of 16 {{.*}} %V4I32 = fptoui
%V4I32 = fptoui <4 x double> undef to <4 x i32>
; SSE2: cost of 27 {{.*}} %V8I32 = fptoui
; SSE42: cost of 27 {{.*}} %V8I32 = fptoui
; AVX1: cost of 33 {{.*}} %V8I32 = fptoui
; AVX2: cost of 33 {{.*}} %V8I32 = fptoui
; AVX512: cost of 1 {{.*}} %V8I32 = fptoui
%V8I32 = fptoui <8 x double> undef to <8 x i32>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_double_i16'
define i32 @fptoui_double_i16(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I16 = fptoui
; SSE42: cost of 1 {{.*}} %I16 = fptoui
; AVX1: cost of 1 {{.*}} %I16 = fptoui
; AVX2: cost of 1 {{.*}} %I16 = fptoui
; AVX512: cost of 1 {{.*}} %I16 = fptoui
%I16 = fptoui double undef to i16
; SSE2: cost of 6 {{.*}} %V2I16 = fptoui
; SSE42: cost of 6 {{.*}} %V2I16 = fptoui
; AVX1: cost of 6 {{.*}} %V2I16 = fptoui
; AVX2: cost of 6 {{.*}} %V2I16 = fptoui
; AVX512F: cost of 6 {{.*}} %V2I16 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptoui
%V2I16 = fptoui <2 x double> undef to <2 x i16>
; SSE2: cost of 13 {{.*}} %V4I16 = fptoui
; SSE42: cost of 13 {{.*}} %V4I16 = fptoui
; AVX1: cost of 12 {{.*}} %V4I16 = fptoui
; AVX2: cost of 12 {{.*}} %V4I16 = fptoui
; AVX512: cost of 1 {{.*}} %V4I16 = fptoui
%V4I16 = fptoui <4 x double> undef to <4 x i16>
; SSE2: cost of 27 {{.*}} %V8I16 = fptoui
; SSE42: cost of 27 {{.*}} %V8I16 = fptoui
; AVX1: cost of 25 {{.*}} %V8I16 = fptoui
; AVX2: cost of 25 {{.*}} %V8I16 = fptoui
; AVX512: cost of 2 {{.*}} %V8I16 = fptoui
%V8I16 = fptoui <8 x double> undef to <8 x i16>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_double_i8'
define i32 @fptoui_double_i8(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I8 = fptoui
; SSE42: cost of 1 {{.*}} %I8 = fptoui
; AVX1: cost of 1 {{.*}} %I8 = fptoui
; AVX2: cost of 1 {{.*}} %I8 = fptoui
; AVX512: cost of 1 {{.*}} %I8 = fptoui
%I8 = fptoui double undef to i8
; SSE2: cost of 6 {{.*}} %V2I8 = fptoui
; SSE42: cost of 6 {{.*}} %V2I8 = fptoui
; AVX1: cost of 6 {{.*}} %V2I8 = fptoui
; AVX2: cost of 6 {{.*}} %V2I8 = fptoui
; AVX512F: cost of 6 {{.*}} %V2I8 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptoui
%V2I8 = fptoui <2 x double> undef to <2 x i8>
; SSE2: cost of 13 {{.*}} %V4I8 = fptoui
; SSE42: cost of 13 {{.*}} %V4I8 = fptoui
; AVX1: cost of 12 {{.*}} %V4I8 = fptoui
; AVX2: cost of 12 {{.*}} %V4I8 = fptoui
; AVX512: cost of 1 {{.*}} %V4I8 = fptoui
%V4I8 = fptoui <4 x double> undef to <4 x i8>
; SSE2: cost of 27 {{.*}} %V8I8 = fptoui
; SSE42: cost of 27 {{.*}} %V8I8 = fptoui
; AVX1: cost of 25 {{.*}} %V8I8 = fptoui
; AVX2: cost of 25 {{.*}} %V8I8 = fptoui
; AVX512: cost of 2 {{.*}} %V8I8 = fptoui
%V8I8 = fptoui <8 x double> undef to <8 x i8>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_float_i64'
define i32 @fptoui_float_i64(i32 %arg) {
; SSE2: cost of 4 {{.*}} %I64 = fptoui
; SSE42: cost of 4 {{.*}} %I64 = fptoui
; AVX1: cost of 4 {{.*}} %I64 = fptoui
; AVX2: cost of 4 {{.*}} %I64 = fptoui
; AVX512: cost of 1 {{.*}} %I64 = fptoui
%I64 = fptoui float undef to i64
; SSE2: cost of 12 {{.*}} %V2I64 = fptoui
; SSE42: cost of 12 {{.*}} %V2I64 = fptoui
; AVX1: cost of 12 {{.*}} %V2I64 = fptoui
; AVX2: cost of 12 {{.*}} %V2I64 = fptoui
; AVX512F: cost of 6 {{.*}} %V2I64 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptoui
%V2I64 = fptoui <2 x float> undef to <2 x i64>
; SSE2: cost of 25 {{.*}} %V4I64 = fptoui
; SSE42: cost of 25 {{.*}} %V4I64 = fptoui
; AVX1: cost of 24 {{.*}} %V4I64 = fptoui
; AVX2: cost of 24 {{.*}} %V4I64 = fptoui
; AVX512F: cost of 12 {{.*}} %V4I64 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptoui
%V4I64 = fptoui <4 x float> undef to <4 x i64>
; SSE2: cost of 51 {{.*}} %V8I64 = fptoui
; SSE42: cost of 51 {{.*}} %V8I64 = fptoui
; AVX1: cost of 49 {{.*}} %V8I64 = fptoui
; AVX2: cost of 49 {{.*}} %V8I64 = fptoui
; AVX512F: cost of 24 {{.*}} %V8I64 = fptoui
; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptoui
%V8I64 = fptoui <8 x float> undef to <8 x i64>
; SSE2: cost of 103 {{.*}} %V16I64 = fptoui
; SSE42: cost of 103 {{.*}} %V16I64 = fptoui
; AVX1: cost of 99 {{.*}} %V16I64 = fptoui
; AVX2: cost of 99 {{.*}} %V16I64 = fptoui
; AVX512F: cost of 49 {{.*}} %V16I64 = fptoui
; AVX512DQ: cost of 3 {{.*}} %V16I64 = fptoui
%V16I64 = fptoui <16 x float> undef to <16 x i64>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_float_i32'
define i32 @fptoui_float_i32(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I32 = fptoui
; SSE42: cost of 1 {{.*}} %I32 = fptoui
; AVX1: cost of 1 {{.*}} %I32 = fptoui
; AVX2: cost of 1 {{.*}} %I32 = fptoui
; AVX512: cost of 1 {{.*}} %I32 = fptoui
%I32 = fptoui float undef to i32
; SSE2: cost of 12 {{.*}} %V4I32 = fptoui
; SSE42: cost of 12 {{.*}} %V4I32 = fptoui
; AVX1: cost of 12 {{.*}} %V4I32 = fptoui
; AVX2: cost of 12 {{.*}} %V4I32 = fptoui
; AVX512: cost of 1 {{.*}} %V4I32 = fptoui
%V4I32 = fptoui <4 x float> undef to <4 x i32>
; SSE2: cost of 25 {{.*}} %V8I32 = fptoui
; SSE42: cost of 25 {{.*}} %V8I32 = fptoui
; AVX1: cost of 32 {{.*}} %V8I32 = fptoui
; AVX2: cost of 32 {{.*}} %V8I32 = fptoui
; AVX512: cost of 1 {{.*}} %V8I32 = fptoui
%V8I32 = fptoui <8 x float> undef to <8 x i32>
; SSE2: cost of 51 {{.*}} %V16I32 = fptoui
; SSE42: cost of 51 {{.*}} %V16I32 = fptoui
; AVX1: cost of 65 {{.*}} %V16I32 = fptoui
; AVX2: cost of 65 {{.*}} %V16I32 = fptoui
; AVX512: cost of 1 {{.*}} %V16I32 = fptoui
%V16I32 = fptoui <16 x float> undef to <16 x i32>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_float_i16'
define i32 @fptoui_float_i16(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I16 = fptoui
; SSE42: cost of 1 {{.*}} %I16 = fptoui
; AVX1: cost of 1 {{.*}} %I16 = fptoui
; AVX2: cost of 1 {{.*}} %I16 = fptoui
; AVX512: cost of 1 {{.*}} %I16 = fptoui
%I16 = fptoui float undef to i16
; SSE2: cost of 12 {{.*}} %V4I16 = fptoui
; SSE42: cost of 12 {{.*}} %V4I16 = fptoui
; AVX1: cost of 12 {{.*}} %V4I16 = fptoui
; AVX2: cost of 12 {{.*}} %V4I16 = fptoui
; AVX512: cost of 1 {{.*}} %V4I16 = fptoui
%V4I16 = fptoui <4 x float> undef to <4 x i16>
; SSE2: cost of 25 {{.*}} %V8I16 = fptoui
; SSE42: cost of 25 {{.*}} %V8I16 = fptoui
; AVX1: cost of 1 {{.*}} %V8I16 = fptoui
; AVX2: cost of 1 {{.*}} %V8I16 = fptoui
; AVX512: cost of 1 {{.*}} %V8I16 = fptoui
%V8I16 = fptoui <8 x float> undef to <8 x i16>
; SSE2: cost of 51 {{.*}} %V16I16 = fptoui
; SSE42: cost of 51 {{.*}} %V16I16 = fptoui
; AVX1: cost of 3 {{.*}} %V16I16 = fptoui
; AVX2: cost of 3 {{.*}} %V16I16 = fptoui
; AVX512: cost of 2 {{.*}} %V16I16 = fptoui
%V16I16 = fptoui <16 x float> undef to <16 x i16>
ret i32 undef
}
; CHECK-LABEL: 'fptoui_float_i8'
define i32 @fptoui_float_i8(i32 %arg) {
; SSE2: cost of 1 {{.*}} %I8 = fptoui
; SSE42: cost of 1 {{.*}} %I8 = fptoui
; AVX1: cost of 1 {{.*}} %I8 = fptoui
; AVX2: cost of 1 {{.*}} %I8 = fptoui
; AVX512: cost of 1 {{.*}} %I8 = fptoui
%I8 = fptoui float undef to i8
; SSE2: cost of 12 {{.*}} %V4I8 = fptoui
; SSE42: cost of 12 {{.*}} %V4I8 = fptoui
; AVX1: cost of 12 {{.*}} %V4I8 = fptoui
; AVX2: cost of 12 {{.*}} %V4I8 = fptoui
; AVX512: cost of 1 {{.*}} %V4I8 = fptoui
%V4I8 = fptoui <4 x float> undef to <4 x i8>
; SSE2: cost of 25 {{.*}} %V8I8 = fptoui
; SSE42: cost of 25 {{.*}} %V8I8 = fptoui
; AVX1: cost of 1 {{.*}} %V8I8 = fptoui
; AVX2: cost of 1 {{.*}} %V8I8 = fptoui
; AVX512: cost of 1 {{.*}} %V8I8 = fptoui
%V8I8 = fptoui <8 x float> undef to <8 x i8>
; SSE2: cost of 51 {{.*}} %V16I8 = fptoui
; SSE42: cost of 51 {{.*}} %V16I8 = fptoui
; AVX1: cost of 3 {{.*}} %V16I8 = fptoui
; AVX2: cost of 3 {{.*}} %V16I8 = fptoui
; AVX512: cost of 2 {{.*}} %V16I8 = fptoui
%V16I8 = fptoui <16 x float> undef to <16 x i8>
ret i32 undef
}

View File

@@ -1,51 +0,0 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define void @test_geps() {
; Cost of should be zero. We expect it to be folded into
; the instruction addressing mode.
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a0 = getelementptr inbounds i8, i8* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a1 = getelementptr inbounds i16, i16* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a2 = getelementptr inbounds i32, i32* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a3 = getelementptr inbounds i64, i64* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
%a4 = getelementptr inbounds float, float* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
%a5 = getelementptr inbounds double, double* undef, i32 0
; Vector geps should also have zero cost.
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
%a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
%a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
%a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
%a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
%a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
%a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
; Check that we handle outlandishly large GEPs properly. This is unlikely to
; be a valid pointer, but LLVM still generates GEPs like this sometimes in
; dead code.
;
; This GEP has index INT64_MAX, which is cost 1.
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
; This GEP index wraps around to -1, which is cost 0.
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
ret void
}

View File

@@ -1,7 +0,0 @@
; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s
;CHECK: cost of 0 {{.*}} ret
define i32 @no_info(i32 %arg) {
%e = add i64 undef, undef
ret i32 undef
}

View File

@@ -1,40 +0,0 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
;CHECK: cost of 0 {{.*}} extract
%A = extractelement <4 x float> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%B = extractelement <4 x i32> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%C = extractelement <4 x float> undef, i32 1
;CHECK: cost of 0 {{.*}} extract
%D = extractelement <8 x float> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%E = extractelement <8 x float> undef, i32 1
;CHECK: cost of 1 {{.*}} extract
%F = extractelement <8 x float> undef, i32 %arg
;CHECK: cost of 0 {{.*}} insert
%G = insertelement <4 x float> undef, float %fl, i32 0
;CHECK: cost of 1 {{.*}} insert
%H = insertelement <4 x float> undef, float %fl, i32 1
;CHECK: cost of 1 {{.*}} insert
%I = insertelement <4 x i32> undef, i32 %arg, i32 0
;CHECK: cost of 0 {{.*}} insert
%J = insertelement <4 x double> undef, double undef, i32 0
;CHECK: cost of 0 {{.*}} insert
%K = insertelement <8 x double> undef, double undef, i32 4
;CHECK: cost of 0 {{.*}} insert
%L = insertelement <16 x double> undef, double undef, i32 8
;CHECK: cost of 1 {{.*}} insert
%M = insertelement <16 x double> undef, double undef, i32 9
ret i32 0
}

View File

@@ -1,85 +0,0 @@
; REQUIRES: asserts
; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = global [10240 x i32] zeroinitializer, align 16
@B = global [10240 x i32] zeroinitializer, align 16
; Function Attrs: nounwind uwtable
define void @load_i32_interleave4() {
;CHECK-LABEL: load_i32_interleave4
;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %0 = load
;CHECK: Found an estimated cost of 5 for VF 2 For instruction: %0 = load
;CHECK: Found an estimated cost of 5 for VF 4 For instruction: %0 = load
;CHECK: Found an estimated cost of 8 for VF 8 For instruction: %0 = load
;CHECK: Found an estimated cost of 22 for VF 16 For instruction: %0 = load
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 16
%1 = or i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %1
%2 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %2, %0
%3 = or i64 %indvars.iv, 2
%arrayidx6 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %3
%4 = load i32, i32* %arrayidx6, align 8
%add7 = add nsw i32 %add3, %4
%5 = or i64 %indvars.iv, 3
%arrayidx10 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %5
%6 = load i32, i32* %arrayidx10, align 4
%add11 = add nsw i32 %add7, %6
%arrayidx13 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %add11, i32* %arrayidx13, align 16
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
%cmp = icmp slt i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
define void @load_i32_interleave5() {
;CHECK-LABEL: load_i32_interleave5
;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %0 = load
;CHECK: Found an estimated cost of 6 for VF 2 For instruction: %0 = load
;CHECK: Found an estimated cost of 9 for VF 4 For instruction: %0 = load
;CHECK: Found an estimated cost of 18 for VF 8 For instruction: %0 = load
;CHECK: Found an estimated cost of 35 for VF 16 For instruction: %0 = load
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%1 = add nuw nsw i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %1
%2 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %2, %0
%3 = add nuw nsw i64 %indvars.iv, 2
%arrayidx6 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %3
%4 = load i32, i32* %arrayidx6, align 4
%add7 = add nsw i32 %add3, %4
%5 = add nuw nsw i64 %indvars.iv, 3
%arrayidx10 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %5
%6 = load i32, i32* %arrayidx10, align 4
%add11 = add nsw i32 %add7, %6
%7 = add nuw nsw i64 %indvars.iv, 4
%arrayidx14 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %7
%8 = load i32, i32* %arrayidx14, align 4
%add15 = add nsw i32 %add11, %8
%arrayidx17 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %add15, i32* %arrayidx17, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
%cmp = icmp slt i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

View File

@@ -1,85 +0,0 @@
; REQUIRES: asserts
; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = global [10240 x i32] zeroinitializer, align 16
@B = global [10240 x i32] zeroinitializer, align 16
; Function Attrs: nounwind uwtable
define void @store_i32_interleave4() {
;CHECK-LABEL: store_i32_interleave4
;CHECK: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add16
;CHECK: Found an estimated cost of 5 for VF 2 For instruction: store i32 %add16
;CHECK: Found an estimated cost of 5 for VF 4 For instruction: store i32 %add16
;CHECK: Found an estimated cost of 11 for VF 8 For instruction: store i32 %add16
;CHECK: Found an estimated cost of 22 for VF 16 For instruction: store i32 %add16
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 16
%arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %0, i32* %arrayidx2, align 16
%add = add nsw i32 %0, 1
%1 = or i64 %indvars.iv, 1
%arrayidx7 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %1
store i32 %add, i32* %arrayidx7, align 4
%add10 = add nsw i32 %0, 2
%2 = or i64 %indvars.iv, 2
%arrayidx13 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %2
store i32 %add10, i32* %arrayidx13, align 8
%add16 = add nsw i32 %0, 3
%3 = or i64 %indvars.iv, 3
%arrayidx19 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %3
store i32 %add16, i32* %arrayidx19, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
%cmp = icmp slt i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
define void @store_i32_interleave5() {
;CHECK-LABEL: store_i32_interleave5
;CHECK: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add22
;CHECK: Found an estimated cost of 7 for VF 2 For instruction: store i32 %add22
;CHECK: Found an estimated cost of 14 for VF 4 For instruction: store i32 %add22
;CHECK: Found an estimated cost of 21 for VF 8 For instruction: store i32 %add22
;CHECK: Found an estimated cost of 35 for VF 16 For instruction: store i32 %add22
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %0, i32* %arrayidx2, align 4
%add = add nsw i32 %0, 1
%1 = add nuw nsw i64 %indvars.iv, 1
%arrayidx7 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %1
store i32 %add, i32* %arrayidx7, align 4
%add10 = add nsw i32 %0, 2
%2 = add nuw nsw i64 %indvars.iv, 2
%arrayidx13 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %2
store i32 %add10, i32* %arrayidx13, align 4
%add16 = add nsw i32 %0, 3
%3 = add nuw nsw i64 %indvars.iv, 3
%arrayidx19 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %3
store i32 %add16, i32* %arrayidx19, align 4
%add22 = add nsw i32 %0, 4
%4 = add nuw nsw i64 %indvars.iv, 4
%arrayidx25 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %4
store i32 %add22, i32* %arrayidx25, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
%cmp = icmp slt i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

View File

@@ -1,141 +0,0 @@
; REQUIRES: asserts
; RUN: opt -S -loop-vectorize -debug-only=loop-vectorize -mcpu=skylake %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"
@src = common local_unnamed_addr global [120 x float] zeroinitializer, align 4
@dst = common local_unnamed_addr global [120 x float] zeroinitializer, align 4
; Function Attrs: norecurse nounwind
define void @stride8(float %k, i32 %width_) {
entry:
; CHECK: Found an estimated cost of 48 for VF 8 For instruction: %0 = load float
%cmp72 = icmp sgt i32 %width_, 0
br i1 %cmp72, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void
for.body: ; preds = %for.body.lr.ph, %for.body
%i.073 = phi i32 [ 0, %for.body.lr.ph ], [ %add46, %for.body ]
%arrayidx = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %i.073
%0 = load float, float* %arrayidx, align 4
%mul = fmul fast float %0, %k
%arrayidx2 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %i.073
%1 = load float, float* %arrayidx2, align 4
%add3 = fadd fast float %1, %mul
store float %add3, float* %arrayidx2, align 4
%add4 = or i32 %i.073, 1
%arrayidx5 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add4
%2 = load float, float* %arrayidx5, align 4
%mul6 = fmul fast float %2, %k
%arrayidx8 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add4
%3 = load float, float* %arrayidx8, align 4
%add9 = fadd fast float %3, %mul6
store float %add9, float* %arrayidx8, align 4
%add10 = or i32 %i.073, 2
%arrayidx11 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add10
%4 = load float, float* %arrayidx11, align 4
%mul12 = fmul fast float %4, %k
%arrayidx14 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add10
%5 = load float, float* %arrayidx14, align 4
%add15 = fadd fast float %5, %mul12
store float %add15, float* %arrayidx14, align 4
%add16 = or i32 %i.073, 3
%arrayidx17 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add16
%6 = load float, float* %arrayidx17, align 4
%mul18 = fmul fast float %6, %k
%arrayidx20 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add16
%7 = load float, float* %arrayidx20, align 4
%add21 = fadd fast float %7, %mul18
store float %add21, float* %arrayidx20, align 4
%add22 = or i32 %i.073, 4
%arrayidx23 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add22
%8 = load float, float* %arrayidx23, align 4
%mul24 = fmul fast float %8, %k
%arrayidx26 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add22
%9 = load float, float* %arrayidx26, align 4
%add27 = fadd fast float %9, %mul24
store float %add27, float* %arrayidx26, align 4
%add28 = or i32 %i.073, 5
%arrayidx29 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add28
%10 = load float, float* %arrayidx29, align 4
%mul30 = fmul fast float %10, %k
%arrayidx32 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add28
%11 = load float, float* %arrayidx32, align 4
%add33 = fadd fast float %11, %mul30
store float %add33, float* %arrayidx32, align 4
%add34 = or i32 %i.073, 6
%arrayidx35 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add34
%12 = load float, float* %arrayidx35, align 4
%mul36 = fmul fast float %12, %k
%arrayidx38 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add34
%13 = load float, float* %arrayidx38, align 4
%add39 = fadd fast float %13, %mul36
store float %add39, float* %arrayidx38, align 4
%add40 = or i32 %i.073, 7
%arrayidx41 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add40
%14 = load float, float* %arrayidx41, align 4
%mul42 = fmul fast float %14, %k
%arrayidx44 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add40
%15 = load float, float* %arrayidx44, align 4
%add45 = fadd fast float %15, %mul42
store float %add45, float* %arrayidx44, align 4
%add46 = add nuw nsw i32 %i.073, 8
%cmp = icmp slt i32 %add46, %width_
br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
}
; Function Attrs: norecurse nounwind
define void @stride3(float %k, i32 %width_) {
entry:
; CHECK: Found an estimated cost of 20 for VF 8 For instruction: %0 = load float
%cmp27 = icmp sgt i32 %width_, 0
br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %for.body.lr.ph, %for.body
%i.028 = phi i32 [ 0, %for.body.lr.ph ], [ %add16, %for.body ]
%arrayidx = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %i.028
%0 = load float, float* %arrayidx, align 4
%mul = fmul fast float %0, %k
%arrayidx2 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %i.028
%1 = load float, float* %arrayidx2, align 4
%add3 = fadd fast float %1, %mul
store float %add3, float* %arrayidx2, align 4
%add4 = add nuw nsw i32 %i.028, 1
%arrayidx5 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add4
%2 = load float, float* %arrayidx5, align 4
%mul6 = fmul fast float %2, %k
%arrayidx8 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add4
%3 = load float, float* %arrayidx8, align 4
%add9 = fadd fast float %3, %mul6
store float %add9, float* %arrayidx8, align 4
%add10 = add nuw nsw i32 %i.028, 2
%arrayidx11 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add10
%4 = load float, float* %arrayidx11, align 4
%mul12 = fmul fast float %4, %k
%arrayidx14 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add10
%5 = load float, float* %arrayidx14, align 4
%add15 = fadd fast float %5, %mul12
store float %add15, float* %arrayidx14, align 4
%add16 = add nuw nsw i32 %i.028, 3
%cmp = icmp slt i32 %add16, %width_
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

Some files were not shown because too many files have changed in this diff Show More