; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+xop | FileCheck %s -check-prefix=CHECK -check-prefix=XOP ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VBMI ; ; Verify the cost model for 2 src shuffles ; ; CHECK-LABEL: 'test_vXf64' define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { ; SSE2: cost of 1 {{.*}} %V128 = shufflevector ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector ; SSE42: cost of 1 {{.*}} %V128 = shufflevector ; XOP: cost of 1 {{.*}} %V128 = shufflevector ; AVX1: cost of 1 {{.*}} %V128 = shufflevector ; AVX2: cost of 1 {{.*}} %V128 = shufflevector ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; SSE2: cost of 6 {{.*}} %V256 = shufflevector ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector ; SSE42: cost of 6 {{.*}} %V256 = shufflevector ; XOP: cost of 4 {{.*}} %V256 = shufflevector ; AVX1: cost of 4 {{.*}} %V256 = shufflevector ; AVX2: cost of 3 {{.*}} %V256 = shufflevector ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; SSE2: cost of 28 {{.*}} %V512 = shufflevector ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector ; SSE42: cost of 28 {{.*}} %V512 = shufflevector ; XOP: cost of 24 {{.*}} %V512 = shufflevector ; AVX1: cost of 24 {{.*}} %V512 = shufflevector ; AVX2: cost of 18 {{.*}} %V512 = shufflevector ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector ; XOP: cost of 112 {{.*}} %V1024 = shufflevector ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ret void } ; CHECK-LABEL: 'test_vXi64' define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { ; SSE2: cost of 1 {{.*}} %V128 = shufflevector ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector ; SSE42: cost of 1 {{.*}} %V128 = shufflevector ; XOP: cost of 1 {{.*}} %V128 = shufflevector ; AVX1: cost of 1 {{.*}} %V128 = shufflevector ; AVX2: cost of 1 {{.*}} %V128 = shufflevector ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; SSE2: cost of 6 {{.*}} %V256 = shufflevector ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector ; SSE42: cost of 6 {{.*}} %V256 = shufflevector ; XOP: cost of 4 {{.*}} %V256 = shufflevector ; AVX1: cost of 4 {{.*}} %V256 = shufflevector ; AVX2: cost of 3 {{.*}} %V256 = shufflevector ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; SSE2: cost of 28 {{.*}} %V512 = shufflevector ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector ; SSE42: cost of 28 {{.*}} %V512 = shufflevector ; XOP: cost of 24 {{.*}} %V512 = shufflevector ; AVX1: cost of 24 {{.*}} %V512 = shufflevector ; AVX2: cost of 18 {{.*}} %V512 = shufflevector ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector ; XOP: cost of 112 {{.*}} %V1024 = shufflevector ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ret void } ; CHECK-LABEL: 'test_vXf32' define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { ; SSE2: cost of 2 {{.*}} %V128 = shufflevector ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector ; SSE42: cost of 2 {{.*}} %V128 = shufflevector ; XOP: cost of 2 {{.*}} %V128 = shufflevector ; AVX1: cost of 2 {{.*}} %V128 = shufflevector ; AVX2: cost of 2 {{.*}} %V128 = shufflevector ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; SSE2: cost of 12 {{.*}} %V256 = shufflevector ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector ; SSE42: cost of 12 {{.*}} %V256 = shufflevector ; XOP: cost of 4 {{.*}} %V256 = shufflevector ; AVX1: cost of 4 {{.*}} %V256 = shufflevector ; AVX2: cost of 3 {{.*}} %V256 = shufflevector ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> ; SSE2: cost of 56 {{.*}} %V512 = shufflevector ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector ; SSE42: cost of 56 {{.*}} %V512 = shufflevector ; XOP: cost of 24 {{.*}} %V512 = shufflevector ; AVX1: cost of 24 {{.*}} %V512 = shufflevector ; AVX2: cost of 18 {{.*}} %V512 = shufflevector ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector ; XOP: cost of 112 {{.*}} %V1024 = shufflevector ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ret void } ; CHECK-LABEL: 'test_vXi32' define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { ; SSE2: cost of 2 {{.*}} %V128 = shufflevector ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector ; SSE42: cost of 2 {{.*}} %V128 = shufflevector ; XOP: cost of 2 {{.*}} %V128 = shufflevector ; AVX1: cost of 2 {{.*}} %V128 = shufflevector ; AVX2: cost of 2 {{.*}} %V128 = shufflevector ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; SSE2: cost of 12 {{.*}} %V256 = shufflevector ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector ; SSE42: cost of 12 {{.*}} %V256 = shufflevector ; XOP: cost of 4 {{.*}} %V256 = shufflevector ; AVX1: cost of 4 {{.*}} %V256 = shufflevector ; AVX2: cost of 3 {{.*}} %V256 = shufflevector ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> ; SSE2: cost of 56 {{.*}} %V512 = shufflevector ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector ; SSE42: cost of 56 {{.*}} %V512 = shufflevector ; XOP: cost of 24 {{.*}} %V512 = shufflevector ; AVX1: cost of 24 {{.*}} %V512 = shufflevector ; AVX2: cost of 18 {{.*}} %V512 = shufflevector ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector ; XOP: cost of 112 {{.*}} %V1024 = shufflevector ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ret void } ; CHECK-LABEL: 'test_vXi16' define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { ; SSE2: cost of 8 {{.*}} %V128 = shufflevector ; SSSE3: cost of 3 {{.*}} %V128 = shufflevector ; SSE42: cost of 3 {{.*}} %V128 = shufflevector ; XOP: cost of 1 {{.*}} %V128 = shufflevector ; AVX1: cost of 3 {{.*}} %V128 = shufflevector ; AVX2: cost of 3 {{.*}} %V128 = shufflevector ; AVX512F: cost of 3 {{.*}} %V128 = shufflevector ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector ; AVX512VBMI: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; SSE2: cost of 48 {{.*}} %V256 = shufflevector ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector ; SSE42: cost of 18 {{.*}} %V256 = shufflevector ; XOP: cost of 9 {{.*}} %V256 = shufflevector ; AVX1: cost of 15 {{.*}} %V256 = shufflevector ; AVX2: cost of 7 {{.*}} %V256 = shufflevector ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector ; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; SSE2: cost of 224 {{.*}} %V512 = shufflevector ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector ; SSE42: cost of 84 {{.*}} %V512 = shufflevector ; XOP: cost of 54 {{.*}} %V512 = shufflevector ; AVX1: cost of 90 {{.*}} %V512 = shufflevector ; AVX2: cost of 42 {{.*}} %V512 = shufflevector ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> ; SSE2: cost of 960 {{.*}} %V1024 = shufflevector ; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector ; SSE42: cost of 360 {{.*}} %V1024 = shufflevector ; XOP: cost of 252 {{.*}} %V1024 = shufflevector ; AVX1: cost of 420 {{.*}} %V1024 = shufflevector ; AVX2: cost of 196 {{.*}} %V1024 = shufflevector ; AVX512F: cost of 196 {{.*}} %V1024 = shufflevector ; AVX512BW: cost of 6 {{.*}} %V1024 = shufflevector ; AVX512VBMI: cost of 6 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ret void } ; CHECK-LABEL: 'test_vXi8' define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { ; SSE2: cost of 13 {{.*}} %V128 = shufflevector ; SSSE3: cost of 3 {{.*}} %V128 = shufflevector ; SSE42: cost of 3 {{.*}} %V128 = shufflevector ; XOP: cost of 1 {{.*}} %V128 = shufflevector ; AVX1: cost of 3 {{.*}} %V128 = shufflevector ; AVX2: cost of 3 {{.*}} %V128 = shufflevector ; AVX512F: cost of 3 {{.*}} %V128 = shufflevector ; AVX512BW: cost of 3 {{.*}} %V128 = shufflevector ; AVX512VBMI: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; SSE2: cost of 78 {{.*}} %V256 = shufflevector ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector ; SSE42: cost of 18 {{.*}} %V256 = shufflevector ; XOP: cost of 9 {{.*}} %V256 = shufflevector ; AVX1: cost of 15 {{.*}} %V256 = shufflevector ; AVX2: cost of 7 {{.*}} %V256 = shufflevector ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> ; SSE2: cost of 364 {{.*}} %V512 = shufflevector ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector ; SSE42: cost of 84 {{.*}} %V512 = shufflevector ; XOP: cost of 54 {{.*}} %V512 = shufflevector ; AVX1: cost of 90 {{.*}} %V512 = shufflevector ; AVX2: cost of 42 {{.*}} %V512 = shufflevector ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector ; AVX512BW: cost of 19 {{.*}} %V512 = shufflevector ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ret void }