Bug 1096684 - IonMonkey: Optimize with SSE3 movsldup and movshdup r=bbouvier

This commit is contained in:
Dan Gohman 2014-11-12 12:38:32 -08:00
parent 7b34f3cf97
commit f635cbd2c0
3 changed files with 77 additions and 0 deletions

View File

@ -2194,6 +2194,40 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void movsldup(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
masm.movsldup_rr(src.code(), dest.code());
}
void movsldup(const Operand &src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
switch (src.kind()) {
case Operand::FPREG:
masm.movsldup_rr(src.fpu(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.movsldup_mr(src.disp(), src.base(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void movshdup(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
masm.movshdup_rr(src.code(), dest.code());
}
void movshdup(const Operand &src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
switch (src.kind()) {
case Operand::FPREG:
masm.movshdup_rr(src.fpu(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.movshdup_mr(src.disp(), src.base(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void minsd(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.minsd_rr(src.code(), dest.code());

View File

@ -300,9 +300,11 @@ private:
OP2_MOVSD_WsdVsd = 0x11,
OP2_MOVPS_WpsVps = 0x11,
OP2_MOVHLPS_VqUq = 0x12,
OP2_MOVSLDUP_VpsWps = 0x12,
OP2_UNPCKLPS_VsdWsd = 0x14,
OP2_UNPCKHPS_VsdWsd = 0x15,
OP2_MOVLHPS_VqUq = 0x16,
OP2_MOVSHDUP_VpsWps = 0x16,
OP2_MOVAPD_VsdWsd = 0x28,
OP2_MOVAPS_VsdWsd = 0x28,
OP2_MOVAPS_WsdVsd = 0x29,
@ -3764,6 +3766,36 @@ public:
m_formatter.immediate8(uint8_t(imm));
}
void movsldup_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movsldup %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSLDUP_VpsWps, (RegisterID)dst, (RegisterID)src);
}
void movsldup_mr(int offset, RegisterID base, XMMRegisterID dst)
{
spew("movsldup %s0x%x(%s), %s", PRETTY_PRINT_OFFSET(offset), nameIReg(base),
nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSLDUP_VpsWps, (RegisterID)dst, base, offset);
}
void movshdup_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movshdup %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSHDUP_VpsWps, (RegisterID)dst, (RegisterID)src);
}
void movshdup_mr(int offset, RegisterID base, XMMRegisterID dst)
{
spew("movshdup %s0x%x(%s), %s", PRETTY_PRINT_OFFSET(offset), nameIReg(base),
nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSHDUP_VpsWps, (RegisterID)dst, base, offset);
}
void minsd_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("minsd %s, %s", nameFPReg(src), nameFPReg(dst));

View File

@ -2414,6 +2414,17 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
uint32_t z = ins->laneZ();
uint32_t w = ins->laneW();
if (AssemblerX86Shared::HasSSE3()) {
if (ins->lanesMatch(0, 0, 2, 2)) {
masm.movsldup(input, output);
return true;
}
if (ins->lanesMatch(1, 1, 3, 3)) {
masm.movshdup(input, output);
return true;
}
}
// TODO Here and below, arch specific lowering could identify this pattern
// and use defineReuseInput to avoid this move (bug 1084404)
if (ins->lanesMatch(2, 3, 2, 3)) {