Imported Upstream version 5.18.0.207

Former-commit-id: 3b152f462918d427ce18620a2cbe4f8b79650449
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-11-17 08:23:10 +00:00
parent 8e12397d70
commit eb85e2fc17
28480 changed files with 72 additions and 3866936 deletions

View File

@@ -1,194 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; The test checks the folding of cmp(sub(a,b),0) into cmp(a,b).
define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD128_safe(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.safe = fsub <2 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.safe , <2 x double> zeroinitializer, i32 5, i8 -1)
ret i8 %0
}
define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD128(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i = fsub ninf <2 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i , <2 x double> zeroinitializer, i32 5, i8 -1)
ret i8 %0
}
define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i1 = fsub ninf <4 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1)
ret i8 %0
}
define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD512(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i8 -1, i32 4)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i2 = fsub ninf <8 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i8 -1, i32 4)
ret i8 %0
}
define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
; CHECK-LABEL: @sub_compare_foldingPS128(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i3 = fsub ninf <4 x float> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12, i8 -1)
ret i8 %0
}
define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){
; CHECK-LABEL: @sub_compare_foldingPS256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i4 = fsub ninf <8 x float> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5, i8 -1)
ret i8 %0
}
define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){
; CHECK-LABEL: @sub_compare_foldingPS512(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i16 -1, i32 4)
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
%sub.i5 = fsub ninf <16 x float> %a, %b
%0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i16 -1, i32 4)
ret i16 %0
}
define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_folding_swapPD128(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i = fsub ninf <2 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5, i8 -1)
ret i8 %0
}
define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
; CHECK-LABEL: @sub_compare_folding_swapPD256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i = fsub ninf <4 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5, i8 -1)
ret i8 %0
}
define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @sub_compare_folding_swapPD256_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> fsub (<4 x double> undef, <4 x double> undef), <4 x double> zeroinitializer, i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP]]
;
entry:
%sub.i1 = fsub ninf <4 x double> undef, undef
%tmp = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1)
ret i8 %tmp
}
define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
; CHECK-LABEL: @sub_compare_folding_swapPD512(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i8 -1, i32 4)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i = fsub ninf <8 x double> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i8 -1, i32 4)
ret i8 %0
}
define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
; CHECK-LABEL: @sub_compare_folding_swapPS128(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i = fsub ninf <4 x float> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12, i8 -1)
ret i8 %0
}
define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){
; CHECK-LABEL: @sub_compare_folding_swapPS256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5, i8 -1)
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
%sub.i = fsub ninf <8 x float> %a, %b
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5, i8 -1)
ret i8 %0
}
define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){
; CHECK-LABEL: @sub_compare_folding_swapPS512(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i16 -1, i32 4)
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
%sub.i = fsub ninf <16 x float> %a, %b
%0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i16 -1, i32 4)
ret i16 %0
}
declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, i8)
declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, i8)
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i8, i32)
declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8)
declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8)
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32)

View File

@@ -1,151 +0,0 @@
; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s
define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %ab, <2 x double> %xy, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <2 x double> %1
}
define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd_zero
; CHECK-NEXT: ret <2 x double> %xy
%1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
ret <2 x double> %1
}
define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
; CHECK-LABEL: @constant_blendvpd_dup
; CHECK-NEXT: ret <2 x double> %xy
%1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
ret <2 x double> %1
}
define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %abcd, <4 x float> %xyzw, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <4 x float> %1
}
define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps_zero
; CHECK-NEXT: ret <4 x float> %xyzw
%1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
ret <4 x float> %1
}
define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
; CHECK-LABEL: @constant_blendvps_dup
; CHECK-NEXT: ret <4 x float> %xyzw
%1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
ret <4 x float> %1
}
define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %abcd, <16 x i8> %xyzw, <16 x i32> <i32 16, i32 17, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
;
%1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
ret <16 x i8> %1
}
define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb_zero
; CHECK-NEXT: ret <16 x i8> %xyzw
%1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
ret <16 x i8> %1
}
define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
; CHECK-LABEL: @constant_pblendvb_dup
; CHECK-NEXT: ret <16 x i8> %xyzw
%1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
ret <16 x i8> %1
}
define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd_avx(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %ab, <4 x double> %xy, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: ret <4 x double> [[TMP1]]
;
%1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <4 x double> %1
}
define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd_avx_zero
; CHECK-NEXT: ret <4 x double> %xy
%1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
ret <4 x double> %1
}
define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
; CHECK-LABEL: @constant_blendvpd_avx_dup
; CHECK-NEXT: ret <4 x double> %xy
%1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
ret <4 x double> %1
}
define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps_avx(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %abcd, <8 x float> %xyzw, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 12, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
%1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <8 x float> %1
}
define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps_avx_zero
; CHECK-NEXT: ret <8 x float> %xyzw
%1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
ret <8 x float> %1
}
define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
; CHECK-LABEL: @constant_blendvps_avx_dup
; CHECK-NEXT: ret <8 x float> %xyzw
%1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
ret <8 x float> %1
}
define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb_avx2(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %abcd, <32 x i8> %xyzw, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63>
; CHECK-NEXT: ret <32 x i8> [[TMP1]]
;
%1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
<32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
ret <32 x i8> %1
}
define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb_avx2_zero
; CHECK-NEXT: ret <32 x i8> %xyzw
%1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
ret <32 x i8> %1
}
define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
; CHECK-LABEL: @constant_pblendvb_avx2_dup
; CHECK-NEXT: ret <32 x i8> %xyzw
%1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
ret <32 x i8> %1
}
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)

View File

@@ -1,2 +0,0 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@@ -1,39 +0,0 @@
; RUN: opt < %s -instcombine -S | grep shufflevector
; PR2645
; instcombine shouldn't delete the shufflevector.
define internal void @""(i8*, i32, i8*) {
; <label>:3
br label %4
; <label>:4 ; preds = %6, %3
%.0 = phi i32 [ 0, %3 ], [ %19, %6 ] ; <i32> [#uses=4]
%5 = icmp slt i32 %.0, %1 ; <i1> [#uses=1]
br i1 %5, label %6, label %20
; <label>:6 ; preds = %4
%7 = getelementptr i8, i8* %2, i32 %.0 ; <i8*> [#uses=1]
%8 = bitcast i8* %7 to <4 x i16>* ; <<4 x i16>*> [#uses=1]
%9 = load <4 x i16>, <4 x i16>* %8, align 1 ; <<4 x i16>> [#uses=1]
%10 = bitcast <4 x i16> %9 to <1 x i64> ; <<1 x i64>> [#uses=1]
%11 = call <2 x i64> @foo(<1 x i64> %10)
; <<2 x i64>> [#uses=1]
%12 = bitcast <2 x i64> %11 to <4 x i32> ; <<4 x i32>> [#uses=1]
%13 = bitcast <4 x i32> %12 to <8 x i16> ; <<8 x i16>> [#uses=2]
%14 = shufflevector <8 x i16> %13, <8 x i16> %13, <8 x i32> < i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3 > ; <<8 x i16>> [#uses=1]
%15 = bitcast <8 x i16> %14 to <4 x i32> ; <<4 x i32>> [#uses=1]
%16 = sitofp <4 x i32> %15 to <4 x float> ; <<4 x float>> [#uses=1]
%17 = getelementptr i8, i8* %0, i32 %.0 ; <i8*> [#uses=1]
%18 = bitcast i8* %17 to <4 x float>* ; <<4 x float>*> [#uses=1]
store <4 x float> %16, <4 x float>* %18, align 1
%19 = add i32 %.0, 1 ; <i32> [#uses=1]
br label %4
; <label>:20 ; preds = %4
call void @llvm.x86.mmx.emms( )
ret void
}
declare <2 x i64> @foo(<1 x i64>)
declare void @llvm.x86.mmx.emms( )

View File

@@ -1,110 +0,0 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
; CHECK-NOT: shufflevector{{.*}}i32 8"
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9"
%struct.ActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
%struct.AlphaTest = type { float, i16, i8, i8 }
%struct.ArrayRange = type { i8, i8, i8, i8 }
%struct.BlendMode = type { i16, i16, i16, i16, %struct.IColor4, i16, i16, i8, i8, i8, i8 }
%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
%struct.FixedFunction = type { %struct.PPStreamToken* }
%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
%struct.LightModel = type { %struct.IColor4, [8 x %struct.Light], [2 x %struct.Material], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
%struct.LightProduct = type { %struct.IColor4, %struct.IColor4, %struct.IColor4 }
%struct.LineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
%struct.LogicOp = type { i16, i8, i8 }
%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
%struct.PixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
%struct.PluginBufferData = type { i32 }
%struct.PointLineLimits = type { float, float, float }
%struct.PointMode = type { float, float, float, float, %struct.PointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
%struct.PolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.ProgramLimits = type { i32, i32, i32, i32 }
%struct.RegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.IColor4], [8 x %struct.RegisterCombinersPerStageState], %struct.RegisterCombinersFinalStageState }
%struct.RegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.RegisterCombinersPerVariableState] }
%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
%struct.TextureImageMode = type { float }
%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
%struct.IColor4 = type { float, float, float, float }
%struct.TCoord2 = type { float, float }
%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
%struct.VMTextures = type { [16 x %struct.TextureRec*] }
%struct.PPStreamToken = type { { i16, i16, i32 } }
%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk) nounwind {
bb266.i:
getelementptr <4 x float>, <4 x float>* null, i32 11 ; <<4 x float>*>:0 [#uses=1]
load <4 x float>, <4 x float>* %0, align 16 ; <<4 x float>>:1 [#uses=1]
shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 > ; <<4 x float>>:2 [#uses=1]
shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:3 [#uses=1]
shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:4 [#uses=1]
shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 > ; <<4 x float>>:5 [#uses=1]
fmul <4 x float> %5, zeroinitializer ; <<4 x float>>:6 [#uses=2]
fmul <4 x float> %6, %6 ; <<4 x float>>:7 [#uses=1]
fadd <4 x float> zeroinitializer, %7 ; <<4 x float>>:8 [#uses=1]
call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone ; <<4 x float>>:9 [#uses=1]
%phitmp40 = bitcast <4 x float> %9 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 > ; <<4 x i32>> [#uses=1]
%tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 > ; <<4 x i32>> [#uses=1]
%tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float> ; <<4 x float>> [#uses=1]
fadd <4 x float> %tmp4117.i, zeroinitializer ; <<4 x float>>:10 [#uses=1]
fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 > ; <<4 x float>>:11 [#uses=1]
call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone ; <<4 x float>>:12 [#uses=1]
call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone ; <<4 x float>>:13 [#uses=1]
%tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind ; <<4 x float>> [#uses=1]
bitcast <4 x float> %tmp4170.i to <16 x i8> ; <<16 x i8>>:14 [#uses=1]
call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %14 ) nounwind readnone ; <i32>:15 [#uses=1]
icmp eq i32 %15, 0 ; <i1>:16 [#uses=1]
br i1 %16, label %bb5574.i, label %bb4521.i
bb4521.i: ; preds = %bb266.i
unreachable
bb5574.i: ; preds = %bb266.i
unreachable
}
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone

View File

@@ -1,109 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Verify that instcombine is able to fold identity shuffles.
define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @identity_test_vpermd(
; CHECK-NEXT: ret <8 x i32> %a0
;
%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
ret <8 x i32> %a
}
define <8 x float> @identity_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @identity_test_vpermps(
; CHECK-NEXT: ret <8 x float> %a0
;
%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
ret <8 x float> %a
}
; Instcombine should be able to fold the following shuffle to a builtin shufflevector
; with a shuffle mask of all zeroes.
define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @zero_test_vpermd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
ret <8 x i32> %a
}
define <8 x float> @zero_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @zero_test_vpermps(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
ret <8 x float> %a
}
; Verify that instcombine is able to fold constant shuffles.
define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @shuffle_test_vpermd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
ret <8 x i32> %a
}
define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @shuffle_test_vpermps(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
ret <8 x float> %a
}
; Verify that instcombine is able to fold constant shuffles with undef mask elements.
define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @undef_test_vpermd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
ret <8 x i32> %a
}
define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @undef_test_vpermps(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
ret <8 x float> %a
}
; Verify simplify demanded elts.
define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {
; CHECK-LABEL: @elts_test_vpermd(
; CHECK-NEXT: ret <8 x i32> %a0
;
%1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0
%2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)
%3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i32> %3
}
define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_test_vpermps(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x float> [[TMP2]]
;
%1 = insertelement <8 x i32> %a1, i32 0, i32 7
%2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)
%3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %3
}
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)

View File

@@ -1 +0,0 @@
2a24d93ce76a090bf51479d912f4f6c087ef57f2

View File

@@ -1,271 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone
declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone
declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.tbm.bextri.u32(i32 [[A:%.*]], i32 1296)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1296)
ret i32 %1
}
define i32 @test_x86_tbm_bextri_u32_zero_length(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32_zero_length(
; CHECK-NEXT: ret i32 0
;
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1)
ret i32 %1
}
define i32 @test_x86_tbm_bextri_u32_large_shift(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32_large_shift(
; CHECK-NEXT: ret i32 0
;
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 288)
ret i32 %1
}
define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u64(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.tbm.bextri.u64(i64 [[A:%.*]], i64 1312)
; CHECK-NEXT: ret i64 [[TMP1]]
;
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1312)
ret i64 %1
}
define i64 @test_x86_tbm_bextri_u64_zero_length(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u64_zero_length(
; CHECK-NEXT: ret i64 0
;
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1)
ret i64 %1
}
define i64 @test_x86_tbm_bextri_u64_large_shift(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u64_large_shift(
; CHECK-NEXT: ret i64 0
;
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 320)
ret i64 %1
}
define i32 @test_x86_tbm_bextri_u32_constfold() nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold(
; CHECK-NEXT: ret i32 57005
;
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF
ret i32 %1
}
define i32 @test_x86_tbm_bextri_u32_constfold2() nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold2(
; CHECK-NEXT: ret i32 233495534
;
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF
ret i32 %1
}
define i32 @test_x86_tbm_bextri_u32_constfold3() nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold3(
; CHECK-NEXT: ret i32 233495534
;
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF
ret i32 %1
}
define i64 @test_x86_tbm_bextri_u64_constfold() nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold(
; CHECK-NEXT: ret i64 57005
;
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF
ret i64 %1
}
define i64 @test_x86_tbm_bextri_u64_constfold2() nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold2(
; CHECK-NEXT: ret i64 233495534
;
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF
ret i64 %1
}
define i64 @test_x86_tbm_bextri_u64_constfold3() nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold3(
; CHECK-NEXT: ret i64 233495534
;
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF
ret i64 %1
}
define i32 @test_x86_bmi_bextri_32(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_32(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.bmi.bextr.32(i32 [[A:%.*]], i32 1296)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1296)
ret i32 %1
}
define i32 @test_x86_bmi_bextri_32_zero_length(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_32_zero_length(
; CHECK-NEXT: ret i32 0
;
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1)
ret i32 %1
}
define i32 @test_x86_bmi_bextri_32_large_shift(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_32_large_shift(
; CHECK-NEXT: ret i32 0
;
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 288)
ret i32 %1
}
define i64 @test_x86_bmi_bextri_64(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_64(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.bmi.bextr.64(i64 [[A:%.*]], i64 1312)
; CHECK-NEXT: ret i64 [[TMP1]]
;
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1312)
ret i64 %1
}
define i64 @test_x86_bmi_bextri_64_zero_length(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_64_zero_length(
; CHECK-NEXT: ret i64 0
;
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1)
ret i64 %1
}
define i64 @test_x86_bmi_bextri_64_large_shift(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_64_large_shift(
; CHECK-NEXT: ret i64 0
;
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 320)
ret i64 %1
}
define i32 @test_x86_bmi_bextri_32_constfold() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold(
; CHECK-NEXT: ret i32 57005
;
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF
ret i32 %1
}
define i32 @test_x86_bmi_bextri_32_constfold2() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold2(
; CHECK-NEXT: ret i32 233495534
;
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF
ret i32 %1
}
define i32 @test_x86_bmi_bextri_32_constfold3() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold3(
; CHECK-NEXT: ret i32 233495534
;
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF
ret i32 %1
}
define i64 @test_x86_bmi_bextri_64_constfold() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold(
; CHECK-NEXT: ret i64 57005
;
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF
ret i64 %1
}
define i64 @test_x86_bmi_bextri_64_constfold2() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold2(
; CHECK-NEXT: ret i64 233495534
;
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF
ret i64 %1
}
define i64 @test_x86_bmi_bextri_64_constfold3() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold3(
; CHECK-NEXT: ret i64 233495534
;
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF
ret i64 %1
}
define i32 @test_x86_bmi_bzhi_32(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_32(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.bmi.bzhi.32(i32 [[A:%.*]], i32 31)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 31)
ret i32 %1
}
define i32 @test_x86_bmi_bzhi_32_zero(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_32_zero(
; CHECK-NEXT: ret i32 0
;
%1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 0)
ret i32 %1
}
define i32 @test_x86_bmi_bzhi_32_max(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_32_max(
; CHECK-NEXT: ret i32 [[A:%.*]]
;
%1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 32)
ret i32 %1
}
define i32 @test_x86_bmi_bzhi_32_constfold() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_32_constfold(
; CHECK-NEXT: ret i32 1
;
%1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 5, i32 1)
ret i32 %1
}
define i64 @test_x86_bmi_bzhi_64(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_64(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.bmi.bzhi.64(i64 [[A:%.*]], i64 63)
; CHECK-NEXT: ret i64 [[TMP1]]
;
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 63)
ret i64 %1
}
define i64 @test_x86_bmi_bzhi_64_zero(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_64_zero(
; CHECK-NEXT: ret i64 0
;
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 0)
ret i64 %1
}
define i64 @test_x86_bmi_bzhi_64_max(i64 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_64_max(
; CHECK-NEXT: ret i64 [[A:%.*]]
;
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 64)
ret i64 %1
}
define i64 @test_x86_bmi_bzhi_64_constfold() nounwind readnone {
; CHECK-LABEL: @test_x86_bmi_bzhi_64_constfold(
; CHECK-NEXT: ret i64 1
;
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
ret i64 %1
}

View File

@@ -1,17 +0,0 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
; crc32 with 64-bit destination zeros high 32-bit.
; rdar://9467055
define i64 @test() nounwind {
entry:
; CHECK: test
; CHECK: tail call i64 @llvm.x86.sse42.crc32.64.64
; CHECK-NOT: and
; CHECK: ret
%0 = tail call i64 @llvm.x86.sse42.crc32.64.64(i64 0, i64 4) nounwind
%1 = and i64 %0, 4294967295
ret i64 %1
}
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone

View File

@@ -1,68 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
;
; Vector Demanded Bits
;
; Only bottom 4 elements required.
define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
; CHECK-LABEL: @demand_vcvtph2ps_128(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
ret <4 x float> %2
}
; All 8 elements required.
define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
; CHECK-LABEL: @demand_vcvtph2ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
; CHECK-NEXT: ret <8 x float> [[TMP2]]
;
%1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
ret <8 x float> %2
}
;
; Constant Folding
;
define <4 x float> @fold_vcvtph2ps_128() {
; CHECK-LABEL: @fold_vcvtph2ps_128(
; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
;
%1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
ret <4 x float> %1
}
define <8 x float> @fold_vcvtph2ps_256() {
; CHECK-LABEL: @fold_vcvtph2ps_256(
; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
;
%1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
ret <8 x float> %1
}
define <4 x float> @fold_vcvtph2ps_128_zero() {
; CHECK-LABEL: @fold_vcvtph2ps_128_zero(
; CHECK-NEXT: ret <4 x float> zeroinitializer
;
%1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
ret <4 x float> %1
}
define <8 x float> @fold_vcvtph2ps_256_zero() {
; CHECK-LABEL: @fold_vcvtph2ps_256_zero(
; CHECK-NEXT: ret <8 x float> zeroinitializer
;
%1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
ret <8 x float> %1
}

View File

@@ -1,315 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmadd_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
%res = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
ret <4 x float> %res
}
define float @test_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmadd_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: ret float [[TMP2]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 0
ret float %5
}
define float @test_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmadd_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 1
ret float %5
}
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
define <2 x double> @test_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfmadd_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
%res = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
ret <2 x double> %res
}
define double @test_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfmadd_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: ret double [[TMP2]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfmadd_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 1
ret double %3
}
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
define <4 x float> @test_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmsub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
%res = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
ret <4 x float> %res
}
define float @test_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmsub_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: ret float [[TMP2]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 0
ret float %5
}
define float @test_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmsub_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 1
ret float %5
}
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
define <2 x double> @test_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfmsub_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
%res = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
ret <2 x double> %res
}
define double @test_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfmsub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: ret double [[TMP2]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfmsub_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 1
ret double %3
}
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
define <4 x float> @test_vfnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfnmadd_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
%res = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
ret <4 x float> %res
}
define float @test_vfnmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfnmadd_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: ret float [[TMP2]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 0
ret float %5
}
define float @test_vfnmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfnmadd_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 1
ret float %5
}
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
define <2 x double> @test_vfnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfnmadd_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
%res = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
ret <2 x double> %res
}
define double @test_vfnmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfnmadd_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: ret double [[TMP2]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_vfnmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfnmadd_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 1
ret double %3
}
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
define <4 x float> @test_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfnmsub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
%res = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
ret <4 x float> %res
}
define float @test_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfnmsub_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: ret float [[TMP2]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 0
ret float %5
}
define float @test_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfnmsub_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
%5 = extractelement <4 x float> %4, i32 1
ret float %5
}
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
define <2 x double> @test_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfnmsub_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
%res = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
ret <2 x double> %res
}
define double @test_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfnmsub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: ret double [[TMP2]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: @test_vfnmsub_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
%3 = extractelement <2 x double> %2, i32 1
ret double %3
}

View File

@@ -1,166 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
; CHECK-LABEL: @insertps_non_const_imm(
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
ret <4 x float> %res
}
; If all zero mask bits are set, return a zero regardless of the other control bits.
define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0x0f(
; CHECK-NEXT: ret <4 x float> zeroinitializer
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
ret <4 x float> %res
}
define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xff(
; CHECK-NEXT: ret <4 x float> zeroinitializer
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
ret <4 x float> %res
}
; If some zero mask bits are set that do not override the insertion, we do not change anything.
define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0x0c(
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
ret <4 x float> %res
}
; ...unless both input vectors are the same operand.
define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
; CHECK-LABEL: @insertps_0x15_single_input(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
ret <4 x float> %res
}
; The zero mask overrides the insertion lane.
define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
; CHECK-LABEL: @insertps_0x1a_single_input(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
ret <4 x float> %res
}
; The zero mask overrides the insertion lane, so the second input vector is not used.
define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xc1(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
ret <4 x float> %res
}
; If no zero mask bits are set, convert to a shuffle.
define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0x00(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
ret <4 x float> %res
}
define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0x10(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
ret <4 x float> %res
}
define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0x20(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
ret <4 x float> %res
}
define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0x30(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
ret <4 x float> %res
}
define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xc0(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
ret <4 x float> %res
}
define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xd0(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
ret <4 x float> %res
}
define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xe0(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
ret <4 x float> %res
}
define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xf0(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
ret <4 x float> %res
}

View File

@@ -1,328 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
;; MASKED LOADS
; If the mask isn't constant, do nothing.
define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
; CHECK-LABEL: @mload(
; CHECK-NEXT: [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
; CHECK-NEXT: ret <4 x float> [[LD]]
;
%ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
ret <4 x float> %ld
}
; Zero mask returns a zero vector.
define <4 x float> @mload_zeros(i8* %f) {
; CHECK-LABEL: @mload_zeros(
; CHECK-NEXT: ret <4 x float> zeroinitializer
;
%ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
ret <4 x float> %ld
}
; Only the sign bit matters.
define <4 x float> @mload_fake_ones(i8* %f) {
; CHECK-LABEL: @mload_fake_ones(
; CHECK-NEXT: ret <4 x float> zeroinitializer
;
%ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>)
ret <4 x float> %ld
}
; All mask bits are set, so this is just a vector load.
define <4 x float> @mload_real_ones(i8* %f) {
; CHECK-LABEL: @mload_real_ones(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x float>, <4 x float>* [[CASTVEC]], align 1
; CHECK-NEXT: ret <4 x float> [[UNMASKEDLOAD]]
;
%ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 2147483648>)
ret <4 x float> %ld
}
; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
define <4 x float> @mload_one_one(i8* %f) {
; CHECK-LABEL: @mload_one_one(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> zeroinitializer)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
ret <4 x float> %ld
}
; Try doubles.
define <2 x double> @mload_one_one_double(i8* %f) {
; CHECK-LABEL: @mload_one_one_double(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> zeroinitializer)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(i8* %f, <2 x i64> <i64 -1, i64 0>)
ret <2 x double> %ld
}
; Try 256-bit FP ops.
define <8 x float> @mload_v8f32(i8* %f) {
; CHECK-LABEL: @mload_v8f32(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <8 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> zeroinitializer)
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
%ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
ret <8 x float> %ld
}
define <4 x double> @mload_v4f64(i8* %f) {
; CHECK-LABEL: @mload_v4f64(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> zeroinitializer)
; CHECK-NEXT: ret <4 x double> [[TMP1]]
;
%ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
ret <4 x double> %ld
}
; Try the AVX2 variants.
define <4 x i32> @mload_v4i32(i8* %f) {
; CHECK-LABEL: @mload_v4i32(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
ret <4 x i32> %ld
}
define <2 x i64> @mload_v2i64(i8* %f) {
; CHECK-LABEL: @mload_v2i64(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> zeroinitializer)
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %f, <2 x i64> <i64 -1, i64 0>)
ret <2 x i64> %ld
}
define <8 x i32> @mload_v8i32(i8* %f) {
; CHECK-LABEL: @mload_v8i32(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <8 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> zeroinitializer)
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
ret <8 x i32> %ld
}
define <4 x i64> @mload_v4i64(i8* %f) {
; CHECK-LABEL: @mload_v4i64(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> zeroinitializer)
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
%ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
ret <4 x i64> %ld
}
;; MASKED STORES
; If the mask isn't constant, do nothing.
define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) {
; CHECK-LABEL: @mstore(
; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
ret void
}
; Zero mask is a nop.
define void @mstore_zeros(i8* %f, <4 x float> %v) {
; CHECK-LABEL: @mstore_zeros(
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> zeroinitializer, <4 x float> %v)
ret void
}
; Only the sign bit matters.
define void @mstore_fake_ones(i8* %f, <4 x float> %v) {
; CHECK-LABEL: @mstore_fake_ones(
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>, <4 x float> %v)
ret void
}
; All mask bits are set, so this is just a vector store.
define void @mstore_real_ones(i8* %f, <4 x float> %v) {
; CHECK-LABEL: @mstore_real_ones(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
; CHECK-NEXT: store <4 x float> %v, <4 x float>* [[CASTVEC]], align 1
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -2147483648>, <4 x float> %v)
ret void
}
; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
define void @mstore_one_one(i8* %f, <4 x float> %v) {
; CHECK-LABEL: @mstore_one_one(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v, <4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, <4 x float> %v)
ret void
}
; Try doubles.
define void @mstore_one_one_double(i8* %f, <2 x double> %v) {
; CHECK-LABEL: @mstore_one_one_double(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <2 x double>*
; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %v, <2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.pd(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x double> %v)
ret void
}
; Try 256-bit FP ops.
define void @mstore_v8f32(i8* %f, <8 x float> %v) {
; CHECK-LABEL: @mstore_v8f32(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <8 x float>*
; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %v, <8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.ps.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x float> %v)
ret void
}
define void @mstore_v4f64(i8* %f, <4 x double> %v) {
; CHECK-LABEL: @mstore_v4f64(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x double>*
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %v, <4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x double> %v)
ret void
}
; Try the AVX2 variants.
define void @mstore_v4i32(i8* %f, <4 x i32> %v) {
; CHECK-LABEL: @mstore_v4i32(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i32>*
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v, <4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx2.maskstore.d(i8* %f, <4 x i32> <i32 0, i32 1, i32 -1, i32 -2>, <4 x i32> %v)
ret void
}
define void @mstore_v2i64(i8* %f, <2 x i64> %v) {
; CHECK-LABEL: @mstore_v2i64(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <2 x i64>*
; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %v, <2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx2.maskstore.q(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x i64> %v)
ret void
}
define void @mstore_v8i32(i8* %f, <8 x i32> %v) {
; CHECK-LABEL: @mstore_v8i32(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <8 x i32>*
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %v, <8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx2.maskstore.d.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x i32> %v)
ret void
}
define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
; CHECK-LABEL: @mstore_v4i64(
; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i64>*
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %v, <4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x i64> %v)
ret void
}
; The original SSE2 masked store variant.
define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
; CHECK-NEXT: ret void
;
tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
ret void
}
declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>)
declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>)
declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>)
declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>)
declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>)
declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>)
declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>)
declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)
declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>)
declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>)
declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>)
declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)

View File

@@ -1,324 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;
; DemandedBits - MOVMSK zeros the upper bits of the result.
;
define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
%2 = and i32 %1, 255
ret i32 %2
}
define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
%2 = and i32 %1, 15
ret i32 %2
}
define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
%2 = and i32 %1, 3
ret i32 %2
}
define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
%2 = and i32 %1, 65535
ret i32 %2
}
define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
%2 = and i32 %1, 255
ret i32 %2
}
define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
%2 = and i32 %1, 15
ret i32 %2
}
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
;
; DemandedBits - If we don't use the lower bits then we just return zero.
;
define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
%2 = and i32 %1, -256
ret i32 %2
}
define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
%2 = and i32 %1, -16
ret i32 %2
}
define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
%2 = and i32 %1, -4
ret i32 %2
}
define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
%2 = and i32 %1, -65536
ret i32 %2
}
define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
%2 = and i32 %1, -256
ret i32 %2
}
define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
%2 = and i32 %1, -16
ret i32 %2
}
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
;
; Constant Folding (UNDEF -> ZERO)
;
define i32 @undef_x86_mmx_pmovmskb() {
; CHECK-LABEL: @undef_x86_mmx_pmovmskb(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef)
ret i32 %1
}
define i32 @undef_x86_sse_movmsk_ps() {
; CHECK-LABEL: @undef_x86_sse_movmsk_ps(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef)
ret i32 %1
}
define i32 @undef_x86_sse2_movmsk_pd() {
; CHECK-LABEL: @undef_x86_sse2_movmsk_pd(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef)
ret i32 %1
}
define i32 @undef_x86_sse2_pmovmskb_128() {
; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef)
ret i32 %1
}
define i32 @undef_x86_avx_movmsk_ps_256() {
; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef)
ret i32 %1
}
define i32 @undef_x86_avx_movmsk_pd_256() {
; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef)
ret i32 %1
}
define i32 @undef_x86_avx2_pmovmskb() {
; CHECK-LABEL: @undef_x86_avx2_pmovmskb(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef)
ret i32 %1
}
;
; Constant Folding (ZERO -> ZERO)
;
define i32 @zero_x86_mmx_pmovmskb() {
; CHECK-LABEL: @zero_x86_mmx_pmovmskb(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx))
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = bitcast <1 x i64> zeroinitializer to x86_mmx
%2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
ret i32 %2
}
define i32 @zero_x86_sse_movmsk_ps() {
; CHECK-LABEL: @zero_x86_sse_movmsk_ps(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer)
ret i32 %1
}
define i32 @zero_x86_sse2_movmsk_pd() {
; CHECK-LABEL: @zero_x86_sse2_movmsk_pd(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer)
ret i32 %1
}
define i32 @zero_x86_sse2_pmovmskb_128() {
; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer)
ret i32 %1
}
define i32 @zero_x86_avx_movmsk_ps_256() {
; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer)
ret i32 %1
}
define i32 @zero_x86_avx_movmsk_pd_256() {
; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer)
ret i32 %1
}
define i32 @zero_x86_avx2_pmovmskb() {
; CHECK-LABEL: @zero_x86_avx2_pmovmskb(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer)
ret i32 %1
}
;
; Constant Folding
;
define i32 @fold_x86_mmx_pmovmskb() {
; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx))
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx
%2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
ret i32 %2
}
define i32 @fold_x86_sse_movmsk_ps() {
; CHECK-LABEL: @fold_x86_sse_movmsk_ps(
; CHECK-NEXT: ret i32 10
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>)
ret i32 %1
}
define i32 @fold_x86_sse2_movmsk_pd() {
; CHECK-LABEL: @fold_x86_sse2_movmsk_pd(
; CHECK-NEXT: ret i32 2
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>)
ret i32 %1
}
define i32 @fold_x86_sse2_pmovmskb_128() {
; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128(
; CHECK-NEXT: ret i32 5654
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
ret i32 %1
}
define i32 @fold_x86_avx_movmsk_ps_256() {
; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256(
; CHECK-NEXT: ret i32 170
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>)
ret i32 %1
}
define i32 @fold_x86_avx_movmsk_pd_256() {
; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256(
; CHECK-NEXT: ret i32 10
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>)
ret i32 %1
}
define i32 @fold_x86_avx2_pmovmskb() {
; CHECK-LABEL: @fold_x86_avx2_pmovmskb(
; CHECK-NEXT: ret i32 370546176
;
%1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
ret i32 %1
}
declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)

View File

@@ -1,245 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
;
; UNDEF Elts
;
define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @undef_pmuludq_128(
; CHECK-NEXT: ret <2 x i64> zeroinitializer
;
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
ret <2 x i64> %1
}
define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @undef_pmuludq_256(
; CHECK-NEXT: ret <4 x i64> zeroinitializer
;
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
ret <4 x i64> %1
}
define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @undef_pmuludq_512(
; CHECK-NEXT: ret <8 x i64> zeroinitializer
;
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
ret <8 x i64> %1
}
define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @undef_pmuldq_128(
; CHECK-NEXT: ret <2 x i64> zeroinitializer
;
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
ret <2 x i64> %1
}
define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @undef_pmuldq_256(
; CHECK-NEXT: ret <4 x i64> zeroinitializer
;
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
ret <4 x i64> %1
}
define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @undef_pmuldq_512(
; CHECK-NEXT: ret <8 x i64> zeroinitializer
;
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
ret <8 x i64> %1
}
define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @undef_zero_pmuludq_128(
; CHECK-NEXT: ret <2 x i64> zeroinitializer
;
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
ret <2 x i64> %1
}
define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @undef_zero_pmuludq_256(
; CHECK-NEXT: ret <4 x i64> zeroinitializer
;
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
ret <4 x i64> %1
}
define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @undef_zero_pmuludq_512(
; CHECK-NEXT: ret <8 x i64> zeroinitializer
;
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
ret <8 x i64> %1
}
define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @undef_zero_pmuldq_128(
; CHECK-NEXT: ret <2 x i64> zeroinitializer
;
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
ret <2 x i64> %1
}
define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @undef_zero_pmuldq_256(
; CHECK-NEXT: ret <4 x i64> zeroinitializer
;
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
ret <4 x i64> %1
}
define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @undef_zero_pmuldq_512(
; CHECK-NEXT: ret <8 x i64> zeroinitializer
;
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
ret <8 x i64> %1
}
;
; Constant Folding
;
define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @fold_pmuludq_128(
; CHECK-NEXT: ret <2 x i64> <i64 9223372030412324865, i64 4294967295>
;
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2147483647, i32 1, i32 1, i32 3>)
ret <2 x i64> %1
}
define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @fold_pmuludq_256(
; CHECK-NEXT: ret <4 x i64> zeroinitializer
;
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
ret <4 x i64> %1
}
define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @fold_pmuludq_512(
; CHECK-NEXT: ret <8 x i64> <i64 0, i64 0, i64 255, i64 131070, i64 0, i64 -281474976645121, i64 140737488289792, i64 281470681743360>
;
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 1, i32 1, i32 2, i32 2, i32 undef, i32 undef, i32 -1, i32 -1, i32 65536, i32 -1, i32 -65536, i32 undef>, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 255, i32 -256, i32 65535, i32 -65536, i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
ret <8 x i64> %1
}
define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @fold_pmuldq_128(
; CHECK-NEXT: ret <2 x i64> <i64 0, i64 2>
;
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 undef, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 undef, i32 1, i32 -2, i32 3>)
ret <2 x i64> %1
}
define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @fold_pmuldq_256(
; CHECK-NEXT: ret <4 x i64> <i64 0, i64 4294836225, i64 140737488289792, i64 -140737488355328>
;
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> <i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>, <8 x i32> <i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
ret <4 x i64> %1
}
define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @fold_pmuldq_512(
; CHECK-NEXT: ret <8 x i64> zeroinitializer
;
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 -1, i32 -3, i32 -1, i32 8, i32 10, i32 -256, i32 65536, i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>)
ret <8 x i64> %1
}
;
; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
;
define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @test_demanded_elts_pmuludq_128(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
%3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2)
%4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %4
}
define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @test_demanded_elts_pmuludq_256(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
%3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2)
ret <4 x i64> %3
}
define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @test_demanded_elts_pmuludq_512(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
; CHECK-NEXT: ret <8 x i64> [[TMP2]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
%3 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %1, <16 x i32> %2)
ret <8 x i64> %3
}
define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @test_demanded_elts_pmuldq_128(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> [[TMP1]])
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
%3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
ret <2 x i64> %3
}
define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @test_demanded_elts_pmuldq_256(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
%3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2)
%4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
ret <4 x i64> %4
}
define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @test_demanded_elts_pmuldq_512(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 15, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
%3 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %1, <16 x i32> %2)
%4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
ret <8 x i64> %4
}
declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) nounwind readnone

View File

@@ -1,366 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
;
; UNDEF Elts
;
define <8 x i16> @undef_packssdw_128() {
; CHECK-LABEL: @undef_packssdw_128(
; CHECK-NEXT: ret <8 x i16> undef
;
%1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef)
ret <8 x i16> %1
}
define <8 x i16> @undef_packusdw_128() {
; CHECK-LABEL: @undef_packusdw_128(
; CHECK-NEXT: ret <8 x i16> undef
;
%1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef)
ret <8 x i16> %1
}
define <16 x i8> @undef_packsswb_128() {
; CHECK-LABEL: @undef_packsswb_128(
; CHECK-NEXT: ret <16 x i8> undef
;
%1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef)
ret <16 x i8> %1
}
define <16 x i8> @undef_packuswb_128() {
; CHECK-LABEL: @undef_packuswb_128(
; CHECK-NEXT: ret <16 x i8> undef
;
%1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef)
ret <16 x i8> %1
}
define <16 x i16> @undef_packssdw_256() {
; CHECK-LABEL: @undef_packssdw_256(
; CHECK-NEXT: ret <16 x i16> undef
;
%1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef)
ret <16 x i16> %1
}
define <16 x i16> @undef_packusdw_256() {
; CHECK-LABEL: @undef_packusdw_256(
; CHECK-NEXT: ret <16 x i16> undef
;
%1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef)
ret <16 x i16> %1
}
define <32 x i8> @undef_packsswb_256() {
; CHECK-LABEL: @undef_packsswb_256(
; CHECK-NEXT: ret <32 x i8> undef
;
%1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef)
ret <32 x i8> %1
}
define <32 x i8> @undef_packuswb_256() {
; CHECK-LABEL: @undef_packuswb_256(
; CHECK-NEXT: ret <32 x i8> undef
;
%1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef)
ret <32 x i8> %1
}
define <32 x i16> @undef_packssdw_512() {
; CHECK-LABEL: @undef_packssdw_512(
; CHECK-NEXT: ret <32 x i16> undef
;
%1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef)
ret <32 x i16> %1
}
define <32 x i16> @undef_packusdw_512() {
; CHECK-LABEL: @undef_packusdw_512(
; CHECK-NEXT: ret <32 x i16> undef
;
%1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef)
ret <32 x i16> %1
}
define <64 x i8> @undef_packsswb_512() {
; CHECK-LABEL: @undef_packsswb_512(
; CHECK-NEXT: ret <64 x i8> undef
;
%1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef)
ret <64 x i8> %1
}
define <64 x i8> @undef_packuswb_512() {
; CHECK-LABEL: @undef_packuswb_512(
; CHECK-NEXT: ret <64 x i8> undef
;
%1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef)
ret <64 x i8> %1
}
;
; Constant Folding
;
define <8 x i16> @fold_packssdw_128() {
; CHECK-LABEL: @fold_packssdw_128(
; CHECK-NEXT: ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0>
;
%1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @fold_packusdw_128() {
; CHECK-LABEL: @fold_packusdw_128(
; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1>
;
%1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>)
ret <8 x i16> %1
}
define <16 x i8> @fold_packsswb_128() {
; CHECK-LABEL: @fold_packsswb_128(
; CHECK-NEXT: ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
;
%1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef)
ret <16 x i8> %1
}
define <16 x i8> @fold_packuswb_128() {
; CHECK-LABEL: @fold_packuswb_128(
; CHECK-NEXT: ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0>
;
%1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>)
ret <16 x i8> %1
}
define <16 x i16> @fold_packssdw_256() {
; CHECK-LABEL: @fold_packssdw_256(
; CHECK-NEXT: ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
;
%1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef)
ret <16 x i16> %1
}
define <16 x i16> @fold_packusdw_256() {
; CHECK-LABEL: @fold_packusdw_256(
; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
;
%1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
ret <16 x i16> %1
}
define <32 x i8> @fold_packsswb_256() {
; CHECK-LABEL: @fold_packsswb_256(
; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
;
%1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer)
ret <32 x i8> %1
}
define <32 x i8> @fold_packuswb_256() {
; CHECK-LABEL: @fold_packuswb_256(
; CHECK-NEXT: ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
;
%1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
ret <32 x i8> %1
}
define <32 x i16> @fold_packssdw_512() {
; CHECK-LABEL: @fold_packssdw_512(
; CHECK-NEXT: ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
;
%1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef)
ret <32 x i16> %1
}
define <32 x i16> @fold_packusdw_512() {
; CHECK-LABEL: @fold_packusdw_512(
; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
;
%1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
ret <32 x i16> %1
}
define <64 x i8> @fold_packsswb_512() {
; CHECK-LABEL: @fold_packsswb_512(
; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
;
%1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer)
ret <64 x i8> %1
}
define <64 x i8> @fold_packuswb_512() {
; CHECK-LABEL: @fold_packuswb_512(
; CHECK-NEXT: ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
;
%1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
ret <64 x i8> %1
}
;
; Demanded Elts
;
define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef>
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
%3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
%4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7>
ret <8 x i16> %4
}
define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
%1 = insertelement <4 x i32> %a0, i32 0, i32 0
%2 = insertelement <4 x i32> %a1, i32 0, i32 3
%3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2)
%4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
ret <8 x i16> %4
}
define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @elts_packsswb_128(
; CHECK-NEXT: ret <16 x i8> zeroinitializer
;
%1 = insertelement <8 x i16> %a0, i16 0, i32 0
%2 = insertelement <8 x i16> %a1, i16 0, i32 0
%3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i8> %4
}
define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @elts_packuswb_128(
; CHECK-NEXT: ret <16 x i8> undef
;
%1 = insertelement <8 x i16> undef, i16 0, i32 0
%2 = insertelement <8 x i16> undef, i16 0, i32 0
%3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
ret <16 x i8> %4
}
define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef)
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
%3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
%4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15>
ret <16 x i16> %4
}
define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2)
%4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %4
}
define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @elts_packsswb_256(
; CHECK-NEXT: ret <32 x i8> zeroinitializer
;
%1 = insertelement <16 x i16> %a0, i16 0, i32 0
%2 = insertelement <16 x i16> %a1, i16 0, i32 8
%3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
ret <32 x i8> %4
}
define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @elts_packuswb_256(
; CHECK-NEXT: ret <32 x i8> undef
;
%1 = insertelement <16 x i16> undef, i16 0, i32 1
%2 = insertelement <16 x i16> undef, i16 0, i32 0
%3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer
ret <32 x i8> %4
}
define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef)
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
%3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
%4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31>
ret <32 x i16> %4
}
define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
%3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2)
%4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i16> %4
}
define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @elts_packsswb_512(
; CHECK-NEXT: ret <64 x i8> zeroinitializer
;
%1 = insertelement <32 x i16> %a0, i16 0, i32 0
%2 = insertelement <32 x i16> %a1, i16 0, i32 8
%3 = insertelement <32 x i16> %1, i16 0, i32 16
%4 = insertelement <32 x i16> %2, i16 0, i32 24
%5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
%6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
ret <64 x i8> %6
}
define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @elts_packuswb_512(
; CHECK-NEXT: ret <64 x i8> undef
;
%1 = insertelement <32 x i16> undef, i16 0, i32 1
%2 = insertelement <32 x i16> undef, i16 0, i32 0
%3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
ret <64 x i8> %4
}
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,460 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define double @test_sqrt_sd_0(double %a) {
; CHECK-LABEL: @test_sqrt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 0
ret double %4
}
define double @test_sqrt_sd_1(double %a) {
; CHECK-LABEL: @test_sqrt_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 1
ret double %4
}
define double @test_add_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_add_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fadd double %a, %b
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_add_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_add_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_sub_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_sub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_sub_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_sub_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_mul_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_mul_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_mul_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_mul_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_div_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_div_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double %a, %b
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_div_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_div_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_min_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
ret <2 x double> %2
}
define double @test_min_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_min_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_min_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_min_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_max_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
ret <2 x double> %2
}
define double @test_max_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_max_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_max_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_max_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_cmp_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
ret <2 x double> %2
}
define double @test_cmp_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_cmp_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_cmp_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_cmp_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define i32 @test_comieq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comieq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comige_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comige_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comigt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comigt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comile_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comile_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comilt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comilt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comineq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comineq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomieq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomieq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomige_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomige_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomigt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomigt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomile_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomile_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomilt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomilt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomineq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomineq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8)
declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>)

View File

@@ -1,98 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_round_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 10)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
ret <2 x double> %3
}
define double @test_round_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_round_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_round_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_round_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> undef, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_round_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> %b, i32 10)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
%6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
%7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
ret <4 x float> %7
}
define float @test_round_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_round_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %b, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%1 = insertelement <4 x float> undef, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> undef, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_round_ss_2(float %a, float %b) {
; CHECK-LABEL: @test_round_ss_2(
; CHECK-NEXT: ret float 2.000000e+00
;
%1 = insertelement <4 x float> undef, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> undef, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
%r = extractelement <4 x float> %9, i32 2
ret float %r
}
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

Some files were not shown because too many files have changed in this diff Show More