From 63b1b5bede8fd14a5abc0d34f2d6701906c70e8a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 10 Feb 2015 08:10:31 -0800 Subject: [PATCH] Bug 1131289 - SpiderMonkey: Use the vmovq encoding for vmovq r=bbouvier --- js/src/jit/shared/Assembler-x86-shared.h | 32 ++++++++++ js/src/jit/shared/BaseAssembler-x86-shared.h | 62 ++++++++++++------- js/src/jit/shared/Disassembler-x86-shared.cpp | 27 ++++++++ js/src/jit/shared/Encoding-x86-shared.h | 4 +- js/src/jit/x64/Assembler-x64.h | 31 +--------- js/src/jit/x64/CodeGenerator-x64.cpp | 4 +- js/src/jit/x86/Assembler-x86.h | 20 ++++++ js/src/jit/x86/CodeGenerator-x86.cpp | 6 +- 8 files changed, 124 insertions(+), 62 deletions(-) diff --git a/js/src/jit/shared/Assembler-x86-shared.h b/js/src/jit/shared/Assembler-x86-shared.h index aa8a08745f5..3707855a89f 100644 --- a/js/src/jit/shared/Assembler-x86-shared.h +++ b/js/src/jit/shared/Assembler-x86-shared.h @@ -1789,6 +1789,38 @@ class AssemblerX86Shared : public AssemblerShared case Operand::MEM_SCALE: masm.vmovd_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale()); break; + case Operand::MEM_ADDRESS32: + masm.vmovq_rm(src.code(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovq(const Operand &src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_mr(src.disp(), src.base(), dest.code()); + break; + case Operand::MEM_SCALE: + masm.vmovq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_mr(src.address(), dest.code()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovq(FloatRegister src, const Operand &dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_rm(src.code(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovq_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; default: MOZ_CRASH("unexpected operand kind"); } diff --git a/js/src/jit/shared/BaseAssembler-x86-shared.h b/js/src/jit/shared/BaseAssembler-x86-shared.h index 2ecdb72c04f..6b70251e698 100644 --- a/js/src/jit/shared/BaseAssembler-x86-shared.h +++ b/js/src/jit/shared/BaseAssembler-x86-shared.h @@ -1741,28 +1741,33 @@ public: spew("movq %s, %p", GPReg64Name(src), addr); m_formatter.oneByteOp64(OP_MOV_EvGv, addr, src); } +#endif - void movq_rm(XMMRegisterID src, int32_t offset, RegisterID base) + void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base) { - spew("movq %s, " MEM_ob, XMMRegName(src), ADDR_ob(offset, base)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVQ_EdVd, offset, base, src); + // vmovq_rm can be encoded either as a true vmovq or as a vmovd with a + // REX prefix modifying it to be 64-bit. We choose the vmovq encoding + // because it's smaller (when it doesn't need a REX prefix for other + // reasons) and because it works on 32-bit x86 too. + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, src); } - void movq_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + void vmovq_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { - spew("movq %s, " MEM_obs, XMMRegName(src), ADDR_obs(offset, base, index, scale)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVQ_EdVd, offset, base, index, scale, src); + twoByteOpSimd_disp32("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, src); } - void movq_rm(XMMRegisterID src, const void *addr) + void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) { - spew("movq %s, %p", XMMRegName(src), addr); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVQ_EdVd, addr, src); + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, index, scale, invalid_xmm, src); } + void vmovq_rm(XMMRegisterID src, const void *addr) + { + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, addr, invalid_xmm, src); + } + +#ifdef JS_CODEGEN_X64 void movq_mEAX(const void *addr) { if (IsAddressImmediate(addr)) { @@ -1815,28 +1820,33 @@ public: spew("movq %p, %s", addr, GPReg64Name(dst)); m_formatter.oneByteOp64(OP_MOV_GvEv, addr, dst); } +#endif - void movq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + void vmovq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { - spew("movq " MEM_ob ", %s", ADDR_ob(offset, base), XMMRegName(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVQ_VdEd, offset, base, (RegisterID) dst); + // vmovq_mr can be encoded either as a true vmovq or as a vmovd with a + // REX prefix modifying it to be 64-bit. We choose the vmovq encoding + // because it's smaller (when it doesn't need a REX prefix for other + // reasons) and because it works on 32-bit x86 too. + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, dst); } - void movq_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst) + void vmovq_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { - spew("movq " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), XMMRegName(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVQ_VdEd, offset, base, index, scale, (RegisterID) dst); + twoByteOpSimd_disp32("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, dst); } - void movq_mr(const void *addr, XMMRegisterID dst) + void vmovq_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst) { - spew("movq %p, %s", addr, XMMRegName(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVQ_VdEd, addr, (RegisterID) dst); + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, index, scale, invalid_xmm, dst); } + void vmovq_mr(const void *addr, XMMRegisterID dst) + { + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, addr, invalid_xmm, dst); + } + +#ifdef JS_CODEGEN_X64 void leaq_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) { spew("leaq " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg64Name(dst)), @@ -2662,11 +2672,15 @@ public: #ifdef JS_CODEGEN_X64 void vmovq_rr(XMMRegisterID src, RegisterID dst) { + // While this is called "vmovq", it actually uses the vmovd encoding + // with a REX prefix modifying it to be 64-bit. twoByteOpSimdInt64("vmovq", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst, (RegisterID)src); } void vmovq_rr(RegisterID src, XMMRegisterID dst) { + // While this is called "vmovq", it actually uses the vmovd encoding + // with a REX prefix modifying it to be 64-bit. twoByteOpInt64Simd("vmovq", VEX_PD, OP2_MOVD_VdEd, src, invalid_xmm, dst); } #endif diff --git a/js/src/jit/shared/Disassembler-x86-shared.cpp b/js/src/jit/shared/Disassembler-x86-shared.cpp index 53497ec2b97..b6cb7fd5e45 100644 --- a/js/src/jit/shared/Disassembler-x86-shared.cpp +++ b/js/src/jit/shared/Disassembler-x86-shared.cpp @@ -473,6 +473,33 @@ js::jit::Disassembler::DisassembleHeapAccess(uint8_t *ptr, HeapAccess *access) } kind = HeapAccess::Store; break; + case Pack2ByteOpcode(OP2_MOVD_VdEd): + MOZ_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_PD: memSize = 4; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + kind = HeapAccess::Load; + break; + case Pack2ByteOpcode(OP2_MOVQ_WdVd): + MOZ_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_PD: memSize = 8; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + kind = HeapAccess::Store; + break; + case Pack2ByteOpcode(OP2_MOVD_EdVd): // aka OP2_MOVQ_VdWd + MOZ_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_SS: memSize = 8; kind = HeapAccess::Load; break; + case VEX_PD: memSize = 4; kind = HeapAccess::Store; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + break; default: MOZ_CRASH("Unable to disassemble instruction"); } diff --git a/js/src/jit/shared/Encoding-x86-shared.h b/js/src/jit/shared/Encoding-x86-shared.h index a493ec46253..bfc3adb54d8 100644 --- a/js/src/jit/shared/Encoding-x86-shared.h +++ b/js/src/jit/shared/Encoding-x86-shared.h @@ -167,7 +167,6 @@ enum TwoByteOpcodeID { OP2_XORPD_VpdWpd = 0x57, OP2_PCMPGTD_VdqWdq = 0x66, OP2_MOVD_VdEd = 0x6E, - OP2_MOVQ_VdEd = 0x6E, OP2_MOVDQ_VsdWsd = 0x6F, OP2_MOVDQ_VdqWdq = 0x6F, OP2_PSHUFD_VdqWdqIb = 0x70, @@ -178,7 +177,7 @@ enum TwoByteOpcodeID { OP2_PCMPEQW = 0x75, OP2_PCMPEQD_VdqWdq = 0x76, OP2_MOVD_EdVd = 0x7E, - OP2_MOVQ_EdVd = 0x7E, + OP2_MOVQ_VdWd = 0x7E, OP2_MOVDQ_WdqVdq = 0x7F, OP2_JCC_rel32 = 0x80, OP_SETCC = 0x90, @@ -197,6 +196,7 @@ enum TwoByteOpcodeID { OP2_PEXTRW_GdUdIb = 0xC5, OP2_SHUFPS_VpsWpsIb = 0xC6, OP2_PSRLD_VdqWdq = 0xD2, + OP2_MOVQ_WdVd = 0xD6, OP2_PANDDQ_VdqWdq = 0xDB, OP2_PANDNDQ_VdqWdq = 0xDF, OP2_PSRAD_VdqWdq = 0xE2, diff --git a/js/src/jit/x64/Assembler-x64.h b/js/src/jit/x64/Assembler-x64.h index e66330152dc..783ffcccc61 100644 --- a/js/src/jit/x64/Assembler-x64.h +++ b/js/src/jit/x64/Assembler-x64.h @@ -257,6 +257,7 @@ class Assembler : public AssemblerX86Shared using AssemblerX86Shared::jmp; using AssemblerX86Shared::push; using AssemblerX86Shared::pop; + using AssemblerX86Shared::vmovq; static uint8_t *PatchableJumpAddress(JitCode *code, size_t index); static void PatchJumpEntry(uint8_t *entry, uint8_t *target); @@ -361,21 +362,6 @@ class Assembler : public AssemblerX86Shared MOZ_CRASH("unexpected operand kind"); } } - void movq(const Operand &src, FloatRegister dest) { - switch (src.kind()) { - case Operand::MEM_REG_DISP: - masm.movq_mr(src.disp(), src.base(), dest.code()); - break; - case Operand::MEM_SCALE: - masm.movq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); - break; - case Operand::MEM_ADDRESS32: - masm.movq_mr(src.address(), dest.code()); - break; - default: - MOZ_CRASH("unexpected operand kind"); - } - } void movq(Register src, const Operand &dest) { switch (dest.kind()) { case Operand::REG: @@ -394,21 +380,6 @@ class Assembler : public AssemblerX86Shared MOZ_CRASH("unexpected operand kind"); } } - void movq(FloatRegister src, const Operand &dest) { - switch (dest.kind()) { - case Operand::MEM_REG_DISP: - masm.movq_rm(src.code(), dest.disp(), dest.base()); - break; - case Operand::MEM_SCALE: - masm.movq_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale()); - break; - case Operand::MEM_ADDRESS32: - masm.movq_rm(src.code(), dest.address()); - break; - default: - MOZ_CRASH("unexpected operand kind"); - } - } void movq(Imm32 imm32, const Operand &dest) { switch (dest.kind()) { case Operand::REG: diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp index e346c9f6932..8511039ba46 100644 --- a/js/src/jit/x64/CodeGenerator-x64.cpp +++ b/js/src/jit/x64/CodeGenerator-x64.cpp @@ -275,7 +275,7 @@ CodeGeneratorX64::loadSimd(Scalar::Type type, unsigned numElems, const Operand & // In memory-to-register mode, movd zeroes out the high lanes. case 1: masm.vmovd(srcAddr, out); break; // See comment above, which also applies to movq. - case 2: masm.movq(srcAddr, out); break; + case 2: masm.vmovq(srcAddr, out); break; case 4: masm.loadUnalignedInt32x4(srcAddr, out); break; default: MOZ_CRASH("unexpected size for partial load"); } @@ -429,7 +429,7 @@ CodeGeneratorX64::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister // In memory-to-register mode, movd zeroes out the high lanes. case 1: masm.vmovd(in, dstAddr); break; // See comment above, which also applies to movq. - case 2: masm.movq(in, dstAddr); break; + case 2: masm.vmovq(in, dstAddr); break; case 4: masm.storeUnalignedInt32x4(in, dstAddr); break; default: MOZ_CRASH("unexpected size for partial load"); } diff --git a/js/src/jit/x86/Assembler-x86.h b/js/src/jit/x86/Assembler-x86.h index 62a99bd3f3c..460256cdcdd 100644 --- a/js/src/jit/x86/Assembler-x86.h +++ b/js/src/jit/x86/Assembler-x86.h @@ -472,6 +472,11 @@ class Assembler : public AssemblerX86Shared masm.vmovd_mr_disp32(src.offset, src.base.code(), dest.code()); return CodeOffsetLabel(masm.currentOffset()); } + CodeOffsetLabel vmovqWithPatch(Address src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovq_mr_disp32(src.offset, src.base.code(), dest.code()); + return CodeOffsetLabel(masm.currentOffset()); + } CodeOffsetLabel vmovsdWithPatch(Address src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); masm.vmovsd_mr_disp32(src.offset, src.base.code(), dest.code()); @@ -506,6 +511,11 @@ class Assembler : public AssemblerX86Shared masm.vmovd_rm_disp32(src.code(), dest.offset, dest.base.code()); return CodeOffsetLabel(masm.currentOffset()); } + CodeOffsetLabel vmovqWithPatch(FloatRegister src, Address dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovq_rm_disp32(src.code(), dest.offset, dest.base.code()); + return CodeOffsetLabel(masm.currentOffset()); + } CodeOffsetLabel vmovssWithPatch(FloatRegister src, Address dest) { MOZ_ASSERT(HasSSE2()); masm.vmovss_rm_disp32(src.code(), dest.offset, dest.base.code()); @@ -566,6 +576,11 @@ class Assembler : public AssemblerX86Shared masm.vmovd_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } + CodeOffsetLabel vmovqWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovq_mr(src.addr, dest.code()); + return CodeOffsetLabel(masm.currentOffset()); + } CodeOffsetLabel vmovsdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); masm.vmovsd_mr(src.addr, dest.code()); @@ -615,6 +630,11 @@ class Assembler : public AssemblerX86Shared masm.vmovd_rm(src.code(), dest.addr); return CodeOffsetLabel(masm.currentOffset()); } + CodeOffsetLabel vmovqWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovq_rm(src.code(), dest.addr); + return CodeOffsetLabel(masm.currentOffset()); + } CodeOffsetLabel vmovsdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { MOZ_ASSERT(HasSSE2()); masm.vmovsd_rm(src.code(), dest.addr); diff --git a/js/src/jit/x86/CodeGenerator-x86.cpp b/js/src/jit/x86/CodeGenerator-x86.cpp index 41ab5a3d963..11ccb3893eb 100644 --- a/js/src/jit/x86/CodeGenerator-x86.cpp +++ b/js/src/jit/x86/CodeGenerator-x86.cpp @@ -375,8 +375,7 @@ CodeGeneratorX86::loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, Floa // In memory-to-register mode, movd zeroes out the high lanes. case 1: masm.vmovdWithPatch(srcAddr, out); break; // See comment above, which also applies to movsd. - // TODO memory-to-xmm movq is encodable on x86 as well - case 2: masm.vmovsdWithPatch(srcAddr, out); break; + case 2: masm.vmovqWithPatch(srcAddr, out); break; case 4: masm.vmovdquWithPatch(srcAddr, out); break; default: MOZ_CRASH("unexpected size for partial load"); } @@ -597,8 +596,7 @@ CodeGeneratorX86::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister // In memory-to-register mode, movd zeroes destAddr the high lanes. case 1: masm.vmovdWithPatch(in, destAddr); break; // See comment above, which also applies to movsd. - // Cross-domain penalty here, as movq isn't encodable on x86. - case 2: masm.vmovsdWithPatch(in, destAddr); break; + case 2: masm.vmovqWithPatch(in, destAddr); break; case 4: masm.vmovdquWithPatch(in, destAddr); break; default: MOZ_CRASH("unexpected size for partial load"); }