Bug 1111241 - SpiderMonkey: Use VEX encodings for several more instructions r=jandem

This commit is contained in:
Dan Gohman 2014-12-15 20:54:00 -08:00
parent 89cd21c65e
commit d12d779e3b
7 changed files with 173 additions and 184 deletions

View File

@ -248,7 +248,7 @@ class AssemblerX86Shared : public AssemblerShared
NoParity = X86Assembler::ConditionNP
};
// If this bit is set, the ucomisd operands have to be inverted.
// If this bit is set, the vucomisd operands have to be inverted.
static const int DoubleConditionBitInvert = 0x10;
// Bit set when a DoubleCondition does not map to a single x86 condition.
@ -1623,25 +1623,25 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_ASSERT(HasSSE2());
masm.vcvtdq2ps_rr(src.code(), dest.code());
}
void movmskpd(FloatRegister src, Register dest) {
void vmovmskpd(FloatRegister src, Register dest) {
MOZ_ASSERT(HasSSE2());
masm.movmskpd_rr(src.code(), dest.code());
masm.vmovmskpd_rr(src.code(), dest.code());
}
void movmskps(FloatRegister src, Register dest) {
void vmovmskps(FloatRegister src, Register dest) {
MOZ_ASSERT(HasSSE2());
masm.movmskps_rr(src.code(), dest.code());
masm.vmovmskps_rr(src.code(), dest.code());
}
void ptest(FloatRegister rhs, FloatRegister lhs) {
void vptest(FloatRegister rhs, FloatRegister lhs) {
MOZ_ASSERT(HasSSE41());
masm.ptest_rr(rhs.code(), lhs.code());
masm.vptest_rr(rhs.code(), lhs.code());
}
void ucomisd(FloatRegister rhs, FloatRegister lhs) {
void vucomisd(FloatRegister rhs, FloatRegister lhs) {
MOZ_ASSERT(HasSSE2());
masm.ucomisd_rr(rhs.code(), lhs.code());
masm.vucomisd_rr(rhs.code(), lhs.code());
}
void ucomiss(FloatRegister rhs, FloatRegister lhs) {
void vucomiss(FloatRegister rhs, FloatRegister lhs) {
MOZ_ASSERT(HasSSE2());
masm.ucomiss_rr(rhs.code(), lhs.code());
masm.vucomiss_rr(rhs.code(), lhs.code());
}
void vpcmpeqw(FloatRegister rhs, FloatRegister lhs, FloatRegister dst) {
MOZ_ASSERT(HasSSE2());
@ -2080,13 +2080,13 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void movhlps(FloatRegister src, FloatRegister dest) {
void vmovhlps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.movhlps_rr(src.code(), dest.code());
masm.vmovhlps_rr(src1.code(), src0.code(), dest.code());
}
void movlhps(FloatRegister src, FloatRegister dest) {
void vmovlhps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.movlhps_rr(src.code(), dest.code());
masm.vmovlhps_rr(src1.code(), src0.code(), dest.code());
}
void vunpcklps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
@ -2282,13 +2282,13 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_ASSERT(HasSSE2());
masm.vandps_rr(src1.code(), src0.code(), dest.code());
}
void sqrtsd(FloatRegister src, FloatRegister dest) {
void vsqrtsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.sqrtsd_rr(src.code(), dest.code());
masm.vsqrtsd_rr(src1.code(), src0.code(), dest.code());
}
void sqrtss(FloatRegister src, FloatRegister dest) {
void vsqrtss(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.sqrtss_rr(src.code(), dest.code());
masm.vsqrtss_rr(src1.code(), src0.code(), dest.code());
}
void roundsd(X86Assembler::RoundingMode mode, FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
@ -2350,81 +2350,81 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void movsldup(FloatRegister src, FloatRegister dest) {
void vmovsldup(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
masm.movsldup_rr(src.code(), dest.code());
masm.vmovsldup_rr(src.code(), dest.code());
}
void movsldup(const Operand &src, FloatRegister dest) {
void vmovsldup(const Operand &src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
switch (src.kind()) {
case Operand::FPREG:
masm.movsldup_rr(src.fpu(), dest.code());
masm.vmovsldup_rr(src.fpu(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.movsldup_mr(src.disp(), src.base(), dest.code());
masm.vmovsldup_mr(src.disp(), src.base(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void movshdup(FloatRegister src, FloatRegister dest) {
void vmovshdup(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
masm.movshdup_rr(src.code(), dest.code());
masm.vmovshdup_rr(src.code(), dest.code());
}
void movshdup(const Operand &src, FloatRegister dest) {
void vmovshdup(const Operand &src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
switch (src.kind()) {
case Operand::FPREG:
masm.movshdup_rr(src.fpu(), dest.code());
masm.vmovshdup_rr(src.fpu(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.movshdup_mr(src.disp(), src.base(), dest.code());
masm.vmovshdup_mr(src.disp(), src.base(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void minsd(FloatRegister src, FloatRegister dest) {
void vminsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.minsd_rr(src.code(), dest.code());
masm.vminsd_rr(src1.code(), src0.code(), dest.code());
}
void minsd(const Operand &src, FloatRegister dest) {
void vminsd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.minsd_rr(src.fpu(), dest.code());
masm.vminsd_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.minsd_mr(src.disp(), src.base(), dest.code());
masm.vminsd_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void minss(FloatRegister src, FloatRegister dest) {
void vminss(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.minss_rr(src.code(), dest.code());
masm.vminss_rr(src1.code(), src0.code(), dest.code());
}
void maxsd(FloatRegister src, FloatRegister dest) {
void vmaxsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.maxsd_rr(src.code(), dest.code());
masm.vmaxsd_rr(src1.code(), src0.code(), dest.code());
}
void maxsd(const Operand &src, FloatRegister dest) {
void vmaxsd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.maxsd_rr(src.fpu(), dest.code());
masm.vmaxsd_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.maxsd_mr(src.disp(), src.base(), dest.code());
masm.vmaxsd_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void maxss(FloatRegister src, FloatRegister dest) {
void vmaxss(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.maxss_rr(src.code(), dest.code());
masm.vmaxss_rr(src1.code(), src0.code(), dest.code());
}
void fisttp(const Operand &dest) {
MOZ_ASSERT(HasSSE3());

View File

@ -3029,16 +3029,14 @@ public:
m_formatter.immediate8(uint8_t(mask));
}
void movhlps_rr(XMMRegisterID src, XMMRegisterID dst)
void vmovhlps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("movhlps %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_MOVHLPS_VqUq, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vmovhlps", VEX_PS, OP2_MOVHLPS_VqUq, src1, src0, dst);
}
void movlhps_rr(XMMRegisterID src, XMMRegisterID dst)
void vmovlhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("movlhps %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_MOVLHPS_VqUq, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vmovlhps", VEX_PS, OP2_MOVLHPS_VqUq, src1, src0, dst);
}
void psrldq_ir(int shift, XMMRegisterID dest)
@ -3110,23 +3108,18 @@ public:
m_formatter.immediate8(int8_t(count));
}
void movmskpd_rr(XMMRegisterID src, RegisterID dst)
void vmovmskpd_rr(XMMRegisterID src, RegisterID dst)
{
spew("movmskpd %s, %s", nameFPReg(src), nameIReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_MOVMSKPD_EdVd, (RegisterID)src, dst);
twoByteOpSimdInt32("vmovmskpd", VEX_PD, OP2_MOVMSKPD_EdVd, src, dst);
}
void movmskps_rr(XMMRegisterID src, RegisterID dst)
void vmovmskps_rr(XMMRegisterID src, RegisterID dst)
{
spew("movmskps %s, %s", nameFPReg(src), nameIReg(dst));
m_formatter.twoByteOp(OP2_MOVMSKPD_EdVd, (RegisterID)src, dst);
twoByteOpSimdInt32("vmovmskps", VEX_PS, OP2_MOVMSKPD_EdVd, src, dst);
}
void ptest_rr(XMMRegisterID rhs, XMMRegisterID lhs) {
spew("ptest %s, %s", nameFPReg(rhs), nameFPReg(lhs));
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP3_PTEST_VdVd, ESCAPE_PTEST, (RegisterID)rhs, (RegisterID)lhs);
void vptest_rr(XMMRegisterID rhs, XMMRegisterID lhs) {
threeByteOpSimd("vptest", VEX_PD, OP3_PTEST_VdVd, ESCAPE_PTEST, rhs, X86Registers::invalid_xmm, lhs);
}
void vmovd_rr(XMMRegisterID src, RegisterID dst)
@ -3615,25 +3608,19 @@ public:
twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, offset, base, src0, dst);
}
void ucomiss_rr(XMMRegisterID rhs, XMMRegisterID lhs)
void vucomiss_rr(XMMRegisterID rhs, XMMRegisterID lhs)
{
spew("ucomiss %s, %s", nameFPReg(rhs), nameFPReg(lhs));
m_formatter.twoByteOp(OP2_UCOMISD_VsdWsd, (RegisterID)rhs, (RegisterID)lhs);
twoByteOpSimdFlags("vucomiss", VEX_PS, OP2_UCOMISD_VsdWsd, rhs, lhs);
}
void ucomisd_rr(XMMRegisterID rhs, XMMRegisterID lhs)
void vucomisd_rr(XMMRegisterID rhs, XMMRegisterID lhs)
{
spew("ucomisd %s, %s", nameFPReg(rhs), nameFPReg(lhs));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_UCOMISD_VsdWsd, (RegisterID)rhs, (RegisterID)lhs);
twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, rhs, lhs);
}
void ucomisd_mr(int offset, RegisterID base, XMMRegisterID lhs)
void vucomisd_mr(int offset, RegisterID base, XMMRegisterID lhs)
{
spew("ucomisd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(lhs));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_UCOMISD_VsdWsd, offset, base, (RegisterID)lhs);
twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, offset, base, lhs);
}
void vdivsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
@ -3731,18 +3718,14 @@ public:
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst);
}
void sqrtsd_rr(XMMRegisterID src, XMMRegisterID dst)
void vsqrtsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("sqrtsd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_SQRTSD_VsdWsd, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vsqrtsd", VEX_SD, OP2_SQRTSD_VsdWsd, src1, src0, dst);
}
void sqrtss_rr(XMMRegisterID src, XMMRegisterID dst)
void vsqrtss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("sqrtss %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_SQRTSS_VssWss, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vsqrtss", VEX_SS, OP2_SQRTSS_VssWss, src1, src0, dst);
}
void roundsd_rr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst)
@ -3831,78 +3814,50 @@ public:
vblendvOpSimd(mask, offset, base, src0, dst);
}
void movsldup_rr(XMMRegisterID src, XMMRegisterID dst)
void vmovsldup_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movsldup %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSLDUP_VpsWps, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, src, X86Registers::invalid_xmm, dst);
}
void vmovsldup_mr(int offset, RegisterID base, XMMRegisterID dst)
{
twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, offset, base, X86Registers::invalid_xmm, dst);
}
void movsldup_mr(int offset, RegisterID base, XMMRegisterID dst)
void vmovshdup_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movsldup %s0x%x(%s), %s", PRETTY_PRINT_OFFSET(offset), nameIReg(base),
nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSLDUP_VpsWps, offset, base, (RegisterID)dst);
twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, src, X86Registers::invalid_xmm, dst);
}
void vmovshdup_mr(int offset, RegisterID base, XMMRegisterID dst)
{
twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, offset, base, X86Registers::invalid_xmm, dst);
}
void movshdup_rr(XMMRegisterID src, XMMRegisterID dst)
void vminsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("movshdup %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSHDUP_VpsWps, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, src1, src0, dst);
}
void vminsd_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, offset, base, src0, dst);
}
void movshdup_mr(int offset, RegisterID base, XMMRegisterID dst)
void vminss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("movshdup %s0x%x(%s), %s", PRETTY_PRINT_OFFSET(offset), nameIReg(base),
nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MOVSHDUP_VpsWps, offset, base, (RegisterID)dst);
twoByteOpSimd("vminss", VEX_SS, OP2_MINSS_VssWss, src1, src0, dst);
}
void minsd_rr(XMMRegisterID src, XMMRegisterID dst)
void vmaxsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("minsd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MINSD_VsdWsd, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, src1, src0, dst);
}
void vmaxsd_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, offset, base, src0, dst);
}
void minsd_mr(int offset, RegisterID base, XMMRegisterID dst)
void vmaxss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("minsd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MINSD_VsdWsd, offset, base, (RegisterID)dst);
}
void minss_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("minss %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MINSS_VssWss, (RegisterID)src, (RegisterID)dst);
}
void maxsd_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("maxsd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)src, (RegisterID)dst);
}
void maxsd_mr(int offset, RegisterID base, XMMRegisterID dst)
{
spew("maxsd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, offset, base, (RegisterID)dst);
}
void maxss_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("maxss %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MAXSS_VssWss, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vmaxss", VEX_SS, OP2_MAXSS_VssWss, src1, src0, dst);
}
// Misc instructions:
@ -4453,6 +4408,36 @@ private:
}
#endif
void twoByteOpSimdFlags(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
XMMRegisterID rm, XMMRegisterID reg)
{
if (useLegacySSEEncodingForOtherOutput()) {
spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg(rm), nameFPReg(reg));
m_formatter.legacySSEPrefix(ty);
m_formatter.twoByteOp(opcode, (RegisterID)rm, reg);
return;
}
spew("%-11s%s, %s", name, nameFPReg(rm), nameFPReg(reg));
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, (XMMRegisterID)reg);
}
void twoByteOpSimdFlags(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
int offset, RegisterID base, XMMRegisterID reg)
{
if (useLegacySSEEncodingForOtherOutput()) {
spew("%-11s%s0x%x(%s), %s", legacySSEOpName(name),
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(reg));
m_formatter.legacySSEPrefix(ty);
m_formatter.twoByteOp(opcode, offset, base, reg);
return;
}
spew("%-11s%s0x%x(%s), %s", name,
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(reg));
m_formatter.twoByteOpVex(ty, opcode, offset, base, X86Registers::invalid_xmm, (XMMRegisterID)reg);
}
void threeByteOpSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
ThreeByteEscape escape,
XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)

View File

@ -125,7 +125,7 @@ CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch *test)
{
const LAllocation *opd = test->input();
// ucomisd flags:
// vucomisd flags:
// Z P C
// ---------
// NaN 1 1 1
@ -136,7 +136,7 @@ CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch *test)
// NaN is falsey, so comparing against 0 and then using the Z flag is
// enough to determine which branch to take.
masm.zeroDouble(ScratchDoubleReg);
masm.ucomisd(ScratchDoubleReg, ToFloatRegister(opd));
masm.vucomisd(ScratchDoubleReg, ToFloatRegister(opd));
emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
}
@ -144,9 +144,9 @@ void
CodeGeneratorX86Shared::visitTestFAndBranch(LTestFAndBranch *test)
{
const LAllocation *opd = test->input();
// ucomiss flags are the same as doubles; see comment above
// vucomiss flags are the same as doubles; see comment above
masm.zeroFloat32(ScratchFloat32Reg);
masm.ucomiss(ScratchFloat32Reg, ToFloatRegister(opd));
masm.vucomiss(ScratchFloat32Reg, ToFloatRegister(opd));
emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
}
@ -492,12 +492,12 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
Label done, nan, minMaxInst;
// Do a ucomisd to catch equality and NaNs, which both require special
// Do a vucomisd to catch equality and NaNs, which both require special
// handling. If the operands are ordered and inequal, we branch straight to
// the min/max instruction. If we wanted, we could also branch for less-than
// or greater-than here instead of using min/max, however these conditions
// will sometimes be hard on the branch predictor.
masm.ucomisd(second, first);
masm.vucomisd(second, first);
masm.j(Assembler::NotEqual, &minMaxInst);
if (!ins->mir()->range() || ins->mir()->range()->canBeNaN())
masm.j(Assembler::Parity, &nan);
@ -516,7 +516,7 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
// NaN, so we explicitly check for a NaN in the read-write operand.
if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) {
masm.bind(&nan);
masm.ucomisd(first, first);
masm.vucomisd(first, first);
masm.j(Assembler::Parity, &done);
}
@ -524,9 +524,9 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
// return the value we need.
masm.bind(&minMaxInst);
if (ins->mir()->isMax())
masm.maxsd(second, first);
masm.vmaxsd(second, first, first);
else
masm.minsd(second, first);
masm.vminsd(second, first, first);
masm.bind(&done);
}
@ -543,12 +543,12 @@ CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF *ins)
Label done, nan, minMaxInst;
// Do a ucomiss to catch equality and NaNs, which both require special
// Do a vucomiss to catch equality and NaNs, which both require special
// handling. If the operands are ordered and inequal, we branch straight to
// the min/max instruction. If we wanted, we could also branch for less-than
// or greater-than here instead of using min/max, however these conditions
// will sometimes be hard on the branch predictor.
masm.ucomiss(second, first);
masm.vucomiss(second, first);
masm.j(Assembler::NotEqual, &minMaxInst);
if (!ins->mir()->range() || ins->mir()->range()->canBeNaN())
masm.j(Assembler::Parity, &nan);
@ -567,7 +567,7 @@ CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF *ins)
// NaN, so we explicitly check for a NaN in the read-write operand.
if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) {
masm.bind(&nan);
masm.ucomiss(first, first);
masm.vucomiss(first, first);
masm.j(Assembler::Parity, &done);
}
@ -575,9 +575,9 @@ CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF *ins)
// return the value we need.
masm.bind(&minMaxInst);
if (ins->mir()->isMax())
masm.maxss(second, first);
masm.vmaxss(second, first, first);
else
masm.minss(second, first);
masm.vminss(second, first, first);
masm.bind(&done);
}
@ -630,7 +630,7 @@ CodeGeneratorX86Shared::visitSqrtD(LSqrtD *ins)
{
FloatRegister input = ToFloatRegister(ins->input());
FloatRegister output = ToFloatRegister(ins->output());
masm.sqrtsd(input, output);
masm.vsqrtsd(input, output, output);
}
void
@ -638,14 +638,14 @@ CodeGeneratorX86Shared::visitSqrtF(LSqrtF *ins)
{
FloatRegister input = ToFloatRegister(ins->input());
FloatRegister output = ToFloatRegister(ins->output());
masm.sqrtss(input, output);
masm.vsqrtss(input, output, output);
}
void
CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD *ins)
{
FloatRegister input = ToFloatRegister(ins->input());
MOZ_ASSERT(input == ToFloatRegister(ins->output()));
FloatRegister output = ToFloatRegister(ins->output());
Label done, sqrt;
@ -672,7 +672,7 @@ CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD *ins)
masm.addDouble(ScratchDoubleReg, input);
}
masm.sqrtsd(input, input);
masm.vsqrtsd(input, output, output);
masm.bind(&done);
}
@ -1710,7 +1710,7 @@ CodeGeneratorX86Shared::visitCeil(LCeil *lir)
scratch, &lessThanMinusOne);
// Test for remaining values with the sign bit set, i.e. ]-1; -0]
masm.movmskpd(input, output);
masm.vmovmskpd(input, output);
masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
bailoutFrom(&bailout, lir->snapshot());
@ -1762,7 +1762,7 @@ CodeGeneratorX86Shared::visitCeilF(LCeilF *lir)
scratch, &lessThanMinusOne);
// Test for remaining values with the sign bit set, i.e. ]-1; -0]
masm.movmskps(input, output);
masm.vmovmskps(input, output);
masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
bailoutFrom(&bailout, lir->snapshot());
@ -2252,7 +2252,7 @@ CodeGeneratorX86Shared::visitSimdSignMaskX4(LSimdSignMaskX4 *ins)
Register output = ToRegister(ins->output());
// For Float32x4 and Int32x4.
masm.movmskps(input, output);
masm.vmovmskps(input, output);
}
void
@ -2283,11 +2283,11 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
if (AssemblerX86Shared::HasSSE3()) {
if (ins->lanesMatch(0, 0, 2, 2)) {
masm.movsldup(input, output);
masm.vmovsldup(input, output);
return;
}
if (ins->lanesMatch(1, 1, 3, 3)) {
masm.movshdup(input, output);
masm.vmovshdup(input, output);
return;
}
}
@ -2295,14 +2295,14 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
// TODO Here and below, arch specific lowering could identify this pattern
// and use defineReuseInput to avoid this move (bug 1084404)
if (ins->lanesMatch(2, 3, 2, 3)) {
masm.movaps(input, output);
masm.movhlps(input, output);
FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
masm.vmovhlps(input, inputCopy, output);
return;
}
if (ins->lanesMatch(0, 1, 0, 1)) {
masm.movaps(input, output);
masm.movlhps(input, output);
FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
masm.vmovlhps(input, inputCopy, output);
return;
}
@ -2447,19 +2447,23 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
// TODO Here and below, symmetric case would be more handy to avoid a move,
// but can't be reached because operands would get swapped (bug 1084404).
if (ins->lanesMatch(2, 3, 6, 7)) {
masm.movaps(rhs, ScratchSimdReg);
masm.movhlps(lhs, ScratchSimdReg);
masm.movaps(ScratchSimdReg, out);
if (AssemblerX86Shared::HasAVX()) {
masm.vmovhlps(lhs, rhs, out);
} else {
masm.movaps(rhs, ScratchSimdReg);
masm.vmovhlps(lhs, ScratchSimdReg, ScratchSimdReg);
masm.movaps(ScratchSimdReg, out);
}
return;
}
if (ins->lanesMatch(0, 1, 4, 5)) {
masm.movlhps(rhs, lhs);
masm.vmovlhps(rhs, lhs, out);
return;
}
if (ins->lanesMatch(0, 4, 1, 5)) {
masm.vunpcklps(rhs, lhs, lhs);
masm.vunpcklps(rhs, lhs, out);
return;
}

View File

@ -60,7 +60,7 @@ LIRGeneratorX86Shared::visitPowHalf(MPowHalf *ins)
MDefinition *input = ins->input();
MOZ_ASSERT(input->type() == MIRType_Double);
LPowHalfD *lir = new(alloc()) LPowHalfD(useRegisterAtStart(input));
defineReuseInput(lir, ins, 0);
define(lir, ins);
}
void

View File

@ -223,7 +223,7 @@ MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg,
branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero);
}
// Input register is either zero or negative zero. Retrieve sign of input.
movmskpd(reg, scratch);
vmovmskpd(reg, scratch);
// If reg is 1 or 3, input is negative zero.
// If reg is 0 or 2, input is a normal zero.

View File

@ -38,9 +38,9 @@ class MacroAssemblerX86Shared : public Assembler
void compareDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
if (cond & DoubleConditionBitInvert)
ucomisd(lhs, rhs);
vucomisd(lhs, rhs);
else
ucomisd(rhs, lhs);
vucomisd(rhs, lhs);
}
void branchDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs, Label *label)
{
@ -65,9 +65,9 @@ class MacroAssemblerX86Shared : public Assembler
void compareFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
if (cond & DoubleConditionBitInvert)
ucomiss(lhs, rhs);
vucomiss(lhs, rhs);
else
ucomiss(rhs, lhs);
vucomiss(rhs, lhs);
}
void branchFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs, Label *label)
{
@ -643,7 +643,7 @@ class MacroAssemblerX86Shared : public Assembler
}
Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
zeroDouble(ScratchDoubleReg);
ucomisd(reg, ScratchDoubleReg);
vucomisd(reg, ScratchDoubleReg);
return truthy ? NonZero : Zero;
}
void branchTestDoubleTruthy(bool truthy, FloatRegister reg, Label *label) {
@ -1003,7 +1003,7 @@ class MacroAssemblerX86Shared : public Assembler
}
void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
movhlps(src, dest);
vmovhlps(src, dest, dest);
}
void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
// The shuffle instruction on x86 is such that it moves 2 words from
@ -1090,7 +1090,7 @@ class MacroAssemblerX86Shared : public Assembler
vcvttsd2si(src, dest);
convertInt32ToDouble(dest, ScratchDoubleReg);
ucomisd(ScratchDoubleReg, src);
vucomisd(ScratchDoubleReg, src);
j(Assembler::Parity, fail);
j(Assembler::NotEqual, fail);
@ -1108,7 +1108,7 @@ class MacroAssemblerX86Shared : public Assembler
vcvttss2si(src, dest);
convertInt32ToFloat32(dest, ScratchFloat32Reg);
ucomiss(ScratchFloat32Reg, src);
vucomiss(ScratchFloat32Reg, src);
j(Assembler::Parity, fail);
j(Assembler::NotEqual, fail);
}

View File

@ -846,7 +846,7 @@ CodeGeneratorX86::visitOutOfLineTruncate(OutOfLineTruncate *ool)
// This has to be an exact conversion, as otherwise the truncation works
// incorrectly on the modified value.
masm.zeroDouble(ScratchDoubleReg);
masm.ucomisd(ScratchDoubleReg, input);
masm.vucomisd(ScratchDoubleReg, input);
masm.j(Assembler::Parity, &fail);
{
@ -866,7 +866,7 @@ CodeGeneratorX86::visitOutOfLineTruncate(OutOfLineTruncate *ool)
masm.vcvttsd2si(temp, output);
masm.vcvtsi2sd(output, ScratchDoubleReg, ScratchDoubleReg);
masm.ucomisd(ScratchDoubleReg, temp);
masm.vucomisd(ScratchDoubleReg, temp);
masm.j(Assembler::Parity, &fail);
masm.j(Assembler::Equal, ool->rejoin());
}
@ -935,7 +935,7 @@ CodeGeneratorX86::visitOutOfLineTruncateFloat32(OutOfLineTruncateFloat32 *ool)
// This has to be an exact conversion, as otherwise the truncation works
// incorrectly on the modified value.
masm.zeroFloat32(ScratchFloat32Reg);
masm.ucomiss(ScratchFloat32Reg, input);
masm.vucomiss(ScratchFloat32Reg, input);
masm.j(Assembler::Parity, &fail);
{
@ -955,7 +955,7 @@ CodeGeneratorX86::visitOutOfLineTruncateFloat32(OutOfLineTruncateFloat32 *ool)
masm.vcvttss2si(temp, output);
masm.vcvtsi2ss(output, ScratchFloat32Reg, ScratchFloat32Reg);
masm.ucomiss(ScratchFloat32Reg, temp);
masm.vucomiss(ScratchFloat32Reg, temp);
masm.j(Assembler::Parity, &fail);
masm.j(Assembler::Equal, ool->rejoin());
}