246 lines
12 KiB
LLVM
246 lines
12 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||
|
|
||
|
;
|
||
|
; UNDEF Elts
|
||
|
;
|
||
|
|
||
|
define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_pmuludq_128(
|
||
|
; CHECK-NEXT: ret <2 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
|
||
|
ret <2 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_pmuludq_256(
|
||
|
; CHECK-NEXT: ret <4 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
|
||
|
ret <4 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_pmuludq_512(
|
||
|
; CHECK-NEXT: ret <8 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
|
||
|
ret <8 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_pmuldq_128(
|
||
|
; CHECK-NEXT: ret <2 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
|
||
|
ret <2 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_pmuldq_256(
|
||
|
; CHECK-NEXT: ret <4 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
|
||
|
ret <4 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_pmuldq_512(
|
||
|
; CHECK-NEXT: ret <8 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
|
||
|
ret <8 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_zero_pmuludq_128(
|
||
|
; CHECK-NEXT: ret <2 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
|
||
|
ret <2 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_zero_pmuludq_256(
|
||
|
; CHECK-NEXT: ret <4 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
|
||
|
ret <4 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_zero_pmuludq_512(
|
||
|
; CHECK-NEXT: ret <8 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
|
||
|
ret <8 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_zero_pmuldq_128(
|
||
|
; CHECK-NEXT: ret <2 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
|
||
|
ret <2 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_zero_pmuldq_256(
|
||
|
; CHECK-NEXT: ret <4 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
|
||
|
ret <4 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @undef_zero_pmuldq_512(
|
||
|
; CHECK-NEXT: ret <8 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
|
||
|
ret <8 x i64> %1
|
||
|
}
|
||
|
|
||
|
;
|
||
|
; Constant Folding
|
||
|
;
|
||
|
|
||
|
define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @fold_pmuludq_128(
|
||
|
; CHECK-NEXT: ret <2 x i64> <i64 9223372030412324865, i64 4294967295>
|
||
|
;
|
||
|
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2147483647, i32 1, i32 1, i32 3>)
|
||
|
ret <2 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @fold_pmuludq_256(
|
||
|
; CHECK-NEXT: ret <4 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
|
||
|
ret <4 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @fold_pmuludq_512(
|
||
|
; CHECK-NEXT: ret <8 x i64> <i64 0, i64 0, i64 255, i64 131070, i64 0, i64 -281474976645121, i64 140737488289792, i64 281470681743360>
|
||
|
;
|
||
|
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 1, i32 1, i32 2, i32 2, i32 undef, i32 undef, i32 -1, i32 -1, i32 65536, i32 -1, i32 -65536, i32 undef>, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 255, i32 -256, i32 65535, i32 -65536, i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
|
||
|
ret <8 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @fold_pmuldq_128(
|
||
|
; CHECK-NEXT: ret <2 x i64> <i64 0, i64 2>
|
||
|
;
|
||
|
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 undef, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 undef, i32 1, i32 -2, i32 3>)
|
||
|
ret <2 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @fold_pmuldq_256(
|
||
|
; CHECK-NEXT: ret <4 x i64> <i64 0, i64 4294836225, i64 140737488289792, i64 -140737488355328>
|
||
|
;
|
||
|
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> <i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>, <8 x i32> <i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
|
||
|
ret <4 x i64> %1
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @fold_pmuldq_512(
|
||
|
; CHECK-NEXT: ret <8 x i64> zeroinitializer
|
||
|
;
|
||
|
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 -1, i32 -3, i32 -1, i32 8, i32 10, i32 -256, i32 65536, i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>)
|
||
|
ret <8 x i64> %1
|
||
|
}
|
||
|
|
||
|
;
|
||
|
; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
|
||
|
;
|
||
|
|
||
|
define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @test_demanded_elts_pmuludq_128(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> [[TMP1]])
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> zeroinitializer
|
||
|
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
||
|
;
|
||
|
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||
|
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
|
||
|
%3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2)
|
||
|
%4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
|
||
|
ret <2 x i64> %4
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @test_demanded_elts_pmuludq_256(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef>
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
|
||
|
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
|
||
|
;
|
||
|
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||
|
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
||
|
%3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2)
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @test_demanded_elts_pmuludq_512(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15, i32 undef>
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
|
||
|
; CHECK-NEXT: ret <8 x i64> [[TMP2]]
|
||
|
;
|
||
|
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
|
||
|
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
|
||
|
%3 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %1, <16 x i32> %2)
|
||
|
ret <8 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: @test_demanded_elts_pmuldq_128(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> [[TMP1]])
|
||
|
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
|
||
|
;
|
||
|
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||
|
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
|
||
|
%3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
|
||
|
ret <2 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: @test_demanded_elts_pmuldq_256(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
|
||
|
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
|
||
|
;
|
||
|
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||
|
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
||
|
%3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2)
|
||
|
%4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
|
||
|
ret <4 x i64> %4
|
||
|
}
|
||
|
|
||
|
define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||
|
; CHECK-LABEL: @test_demanded_elts_pmuldq_512(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 15, i32 undef>
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
|
||
|
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
|
||
|
;
|
||
|
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
|
||
|
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
|
||
|
%3 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %1, <16 x i32> %2)
|
||
|
%4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
|
||
|
ret <8 x i64> %4
|
||
|
}
|
||
|
|
||
|
declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
|
||
|
declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
|