You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,52 +0,0 @@
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O0 < %s | FileCheck %s
|
||||
|
||||
; Check that at -O0, the backend doesn't attempt to canonicalize a vector load
|
||||
; used by an INSERTPS into a scalar load plus scalar_to_vector.
|
||||
;
|
||||
; In order to fold a load into the memory operand of an INSERTPSrm, the backend
|
||||
; tries to canonicalize a vector load in input to an INSERTPS node into a
|
||||
; scalar load plus scalar_to_vector. This would allow ISel to match the
|
||||
; INSERTPSrm variant rather than a load plus INSERTPSrr.
|
||||
;
|
||||
; However, ISel can only select an INSERTPSrm if folding a load into the operand
|
||||
; of an insertps is considered to be profitable.
|
||||
;
|
||||
; In the example below:
|
||||
;
|
||||
; __m128 test(__m128 a, __m128 *b) {
|
||||
; __m128 c = _mm_insert_ps(a, *b, 1 << 6);
|
||||
; return c;
|
||||
; }
|
||||
;
|
||||
; At -O0, the backend would attempt to canonicalize the load to 'b' into
|
||||
; a scalar load in the hope of matching an INSERTPSrm.
|
||||
; However, ISel would fail to recognize an INSERTPSrm since load folding is
|
||||
; always considered unprofitable at -O0. This would leave the insertps mask
|
||||
; in an invalid state.
|
||||
;
|
||||
; The problem with the canonicalization rule performed by the backend is that
|
||||
; it assumes ISel to always be able to match an INSERTPSrm. This assumption is
|
||||
; not always correct at -O0. In this example, FastISel fails to lower the
|
||||
; arguments needed by the entry block. This is enough to enable the DAGCombiner
|
||||
; and eventually trigger the canonicalization on the INSERTPS node.
|
||||
;
|
||||
; This test checks that the vector load in input to the insertps is not
|
||||
; canonicalized into a scalar load plus scalar_to_vector (a movss).
|
||||
|
||||
define <4 x float> @test(<4 x float> %a, <4 x float>* %b) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: movaps (%rdi), [[REG:%[a-z0-9]+]]
|
||||
; CHECK-NOT: movss
|
||||
; CHECK: insertps $64, [[REG]],
|
||||
; CHECK: ret
|
||||
entry:
|
||||
%0 = load <4 x float>, <4 x float>* %b, align 16
|
||||
%1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
|
||||
%2 = alloca <4 x float>, align 16
|
||||
store <4 x float> %1, <4 x float>* %2, align 16
|
||||
%3 = load <4 x float>, <4 x float>* %2, align 16
|
||||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)
|
Reference in New Issue
Block a user