Bug 876064 - Implement constant pools for double constants on x64. r=sstangl

This commit is contained in:
Dan Gohman 2013-07-01 21:24:08 -07:00
parent a0b2a8279e
commit 82f735fe7c
5 changed files with 95 additions and 36 deletions

View File

@ -386,27 +386,22 @@ class MacroAssemblerX86Shared : public Assembler
bind(&done); bind(&done);
} }
bool maybeInlineDouble(uint64_t u, const FloatRegister &dest) { bool maybeInlineDouble(double d, const FloatRegister &dest) {
// This implements parts of "13.4 Generating constants" of uint64_t u = mozilla::BitwiseCast<uint64_t>(d);
// "2. Optimizing subroutines in assembly language" by Agner Fog,
// generalized to handle any case that can use a pcmpeqw and
// up to two shifts.
// Loading zero with xor is specially optimized in hardware.
if (u == 0) { if (u == 0) {
xorpd(dest, dest); xorpd(dest, dest);
return true; return true;
} }
int tz = js_bitscan_ctz64(u); // It is also possible to load several common constants using pcmpeqw
int lz = js_bitscan_clz64(u); // to get all ones and then psllq and psrlq to get zeros at the ends,
if (u == (~uint64_t(0) << (lz + tz) >> lz)) { // as described in "13.4 Generating constants" of
pcmpeqw(dest, dest); // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
if (tz != 0) // previously implemented here. However, with x86 and x64 both using
psllq(Imm32(lz + tz), dest); // constant pool loads for double constants, this is probably only
if (lz != 0) // worthwhile in cases where a load is likely to be delayed.
psrlq(Imm32(lz), dest);
return true;
}
return false; return false;
} }

View File

@ -8,12 +8,64 @@
#include "ion/BaselineFrame.h" #include "ion/BaselineFrame.h"
#include "ion/MoveEmitter.h" #include "ion/MoveEmitter.h"
#include "ion/IonFrames.h" #include "ion/IonFrames.h"
#include "mozilla/Casting.h"
#include "jsscriptinlines.h" #include "jsscriptinlines.h"
using namespace js; using namespace js;
using namespace js::ion; using namespace js::ion;
void
MacroAssemblerX64::loadConstantDouble(double d, const FloatRegister &dest)
{
if (maybeInlineDouble(d, dest))
return;
if (!doubleMap_.initialized()) {
enoughMemory_ &= doubleMap_.init();
if (!enoughMemory_)
return;
}
size_t doubleIndex;
if (DoubleMap::AddPtr p = doubleMap_.lookupForAdd(d)) {
doubleIndex = p->value;
} else {
doubleIndex = doubles_.length();
enoughMemory_ &= doubles_.append(Double(d));
enoughMemory_ &= doubleMap_.add(p, d, doubleIndex);
if (!enoughMemory_)
return;
}
Double &dbl = doubles_[doubleIndex];
JS_ASSERT(!dbl.uses.bound());
// The constants will be stored in a pool appended to the text (see
// finish()), so they will always be a fixed distance from the
// instructions which reference them. This allows the instructions to use
// PC-relative addressing. Use "jump" label support code, because we need
// the same PC-relative address patching that jumps use.
JmpSrc j = masm.movsd_ripr(dest.code());
JmpSrc prev = JmpSrc(dbl.uses.use(j.offset()));
masm.setNextJump(j, prev);
}
void
MacroAssemblerX64::finish()
{
JS_STATIC_ASSERT(CodeAlignment >= sizeof(double));
if (!doubles_.empty())
masm.align(sizeof(double));
for (size_t i = 0; i < doubles_.length(); i++) {
Double &dbl = doubles_[i];
bind(&dbl.uses);
masm.doubleConstant(dbl.value);
}
MacroAssemblerX86Shared::finish();
}
void void
MacroAssemblerX64::setupABICall(uint32_t args) MacroAssemblerX64::setupABICall(uint32_t args)
{ {

View File

@ -45,6 +45,19 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
bool dynamicAlignment_; bool dynamicAlignment_;
bool enoughMemory_; bool enoughMemory_;
// These use SystemAllocPolicy since asm.js releases memory after each
// function is compiled, and these need to live until after all functions
// are compiled.
struct Double {
double value;
NonAssertingLabel uses;
Double(double value) : value(value) {}
};
Vector<Double, 0, SystemAllocPolicy> doubles_;
typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy> DoubleMap;
DoubleMap doubleMap_;
void setupABICall(uint32_t arg); void setupABICall(uint32_t arg);
protected: protected:
@ -71,6 +84,10 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
{ {
} }
// The buffer is about to be linked, make sure any constant pools or excess
// bookkeeping has been flushed to the instruction stream.
void finish();
bool oom() const { bool oom() const {
return MacroAssemblerX86Shared::oom() || !enoughMemory_; return MacroAssemblerX86Shared::oom() || !enoughMemory_;
} }
@ -938,17 +955,7 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
cvtsi2sd(operand.valueReg(), dest); cvtsi2sd(operand.valueReg(), dest);
} }
void loadConstantDouble(double d, const FloatRegister &dest) { void loadConstantDouble(double d, const FloatRegister &dest);
union DoublePun {
uint64_t u;
double d;
} pun;
pun.d = d;
if (!maybeInlineDouble(pun.u, dest)) {
mov(ImmWord(pun.u), ScratchReg);
movqsd(ScratchReg, dest);
}
}
void loadStaticDouble(const double *dp, const FloatRegister &dest) { void loadStaticDouble(const double *dp, const FloatRegister &dest) {
loadConstantDouble(*dp, dest); loadConstantDouble(*dp, dest);
} }

View File

@ -8,6 +8,7 @@
#include "ion/BaselineFrame.h" #include "ion/BaselineFrame.h"
#include "ion/MoveEmitter.h" #include "ion/MoveEmitter.h"
#include "ion/IonFrames.h" #include "ion/IonFrames.h"
#include "mozilla/Casting.h"
#include "jsscriptinlines.h" #include "jsscriptinlines.h"
@ -17,12 +18,7 @@ using namespace js::ion;
void void
MacroAssemblerX86::loadConstantDouble(double d, const FloatRegister &dest) MacroAssemblerX86::loadConstantDouble(double d, const FloatRegister &dest)
{ {
union DoublePun { if (maybeInlineDouble(d, dest))
uint64_t u;
double d;
} dpun;
dpun.d = d;
if (maybeInlineDouble(dpun.u, dest))
return; return;
if (!doubleMap_.initialized()) { if (!doubleMap_.initialized()) {
@ -42,10 +38,21 @@ MacroAssemblerX86::loadConstantDouble(double d, const FloatRegister &dest)
return; return;
} }
Double &dbl = doubles_[doubleIndex]; Double &dbl = doubles_[doubleIndex];
masm.movsd_mr(reinterpret_cast<void *>(dbl.uses.prev()), dest.code()); JS_ASSERT(!dbl.uses.bound());
masm.movsd_mr(reinterpret_cast<const void *>(dbl.uses.prev()), dest.code());
dbl.uses.setPrev(masm.size()); dbl.uses.setPrev(masm.size());
} }
void
MacroAssemblerX86::loadStaticDouble(const double *dp, const FloatRegister &dest) {
if (maybeInlineDouble(*dp, dest))
return;
// x86 can just load from any old immediate address.
movsd(dp, dest);
}
void void
MacroAssemblerX86::finish() MacroAssemblerX86::finish()
{ {

View File

@ -793,9 +793,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
} }
void loadConstantDouble(double d, const FloatRegister &dest); void loadConstantDouble(double d, const FloatRegister &dest);
void loadStaticDouble(const double *dp, const FloatRegister &dest) { void loadStaticDouble(const double *dp, const FloatRegister &dest);
movsd(dp, dest);
}
void branchTruncateDouble(const FloatRegister &src, const Register &dest, Label *fail) { void branchTruncateDouble(const FloatRegister &src, const Register &dest, Label *fail) {
const uint32_t IndefiniteIntegerValue = 0x80000000; const uint32_t IndefiniteIntegerValue = 0x80000000;