Bug 976110 - Part 1: Optimize signed integer division by constants; r=sunfish

This commit is contained in:
Mauricio Collares Neto 2014-04-19 10:37:51 -07:00
parent 395ac257b1
commit e6753fd672
10 changed files with 219 additions and 4 deletions

View File

@ -366,6 +366,7 @@ private:
GROUP3_OP_TEST = 0,
GROUP3_OP_NOT = 2,
GROUP3_OP_NEG = 3,
GROUP3_OP_IMUL = 5,
GROUP3_OP_DIV = 6,
GROUP3_OP_IDIV = 7,
@ -1155,6 +1156,13 @@ public:
m_formatter.twoByteOp(OP2_IMUL_GvEv, dst, src);
}
void imull_r(RegisterID multiplier)
{
spew("imull %s",
nameIReg(4, multiplier));
m_formatter.oneByteOp(OP_GROUP3_Ev, GROUP3_OP_IMUL, multiplier);
}
void imull_mr(int offset, RegisterID base, RegisterID dst)
{
spew("imull %s0x%x(%s), %s",

View File

@ -1062,12 +1062,18 @@ class AssemblerX86Shared
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
}
}
void imull(const Register &multiplier) {
masm.imull_r(multiplier.code());
}
void imull(Imm32 imm, const Register &dest) {
masm.imull_i32r(dest.code(), imm.value, dest.code());
}
void imull(const Register &src, const Register &dest) {
masm.imull_rr(src.code(), dest.code());
}
void imull(Imm32 imm, const Register &src, const Register &dest) {
masm.imull_i32r(src.code(), imm.value, dest.code());
}
void imull(const Operand &src, const Register &dest) {
switch (src.kind()) {
case Operand::REG:

View File

@ -1007,6 +1007,74 @@ CodeGeneratorShared::addCacheLocations(const CacheLocationList &locs, size_t *nu
return firstIndex;
}
ReciprocalMulConstants
CodeGeneratorShared::computeDivisionConstants(int d) {
// In what follows, d is positive and is not a power of 2.
JS_ASSERT(d > 0 && (d & (d - 1)) != 0);
// Speeding up division by non power-of-2 constants is possible by
// calculating, during compilation, a value M such that high-order
// bits of M*n correspond to the result of the division. Formally,
// we compute values 0 <= M < 2^32 and 0 <= s < 31 such that
// (M * n) >> (32 + s) = floor(n/d) if n >= 0
// (M * n) >> (32 + s) = ceil(n/d) - 1 if n < 0.
// The original presentation of this technique appears in Hacker's
// Delight, a book by Henry S. Warren, Jr.. A proof of correctness
// for our version follows.
// Define p = 32 + s, M = ceil(2^p/d), and assume that s satisfies
// M - 2^p/d <= 2^(s+1)/d. (1)
// (Observe that s = FloorLog32(d) satisfies this, because in this
// case d <= 2^(s+1) and so the RHS of (1) is at least one). Then,
//
// a) If s <= FloorLog32(d), then M <= 2^32 - 1.
// Proof: Indeed, M is monotone in s and, for s = FloorLog32(d),
// the inequalities 2^31 > d >= 2^s + 1 readily imply
// 2^p / d = 2^p/(d - 1) * (d - 1)/d
// <= 2^32 * (1 - 1/d) < 2 * (2^31 - 1) = 2^32 - 2.
// The claim follows by applying the ceiling function.
//
// b) For any 0 <= n < 2^31, floor(Mn/2^p) = floor(n/d).
// Proof: Put x = floor(Mn/2^p); it's the unique integer for which
// Mn/2^p - 1 < x <= Mn/2^p. (2)
// Using M >= 2^p/d on the LHS and (1) on the RHS, we get
// n/d - 1 < x <= n/d + n/(2^31 d) < n/d + 1/d.
// Since x is an integer, it's not in the interval (n/d, (n+1)/d),
// and so n/d - 1 < x <= n/d, which implies x = floor(n/d).
//
// c) For any -2^31 <= n < 0, floor(Mn/2^p) + 1 = ceil(n/d).
// Proof: The proof is similar. Equation (2) holds as above. Using
// M > 2^p/d (d isn't a power of 2) on the RHS and (1) on the LHS,
// n/d + n/(2^31 d) - 1 < x < n/d.
// Using n >= -2^31 and summing 1,
// n/d - 1/d < x + 1 < n/d + 1.
// Since x + 1 is an integer, this implies n/d <= x + 1 < n/d + 1.
// In other words, x + 1 = ceil(n/d).
//
// Condition (1) isn't necessary for the existence of M and s with
// the properties above. Hacker's Delight provides a slightly less
// restrictive condition when d >= 196611, at the cost of a 3-page
// proof of correctness.
// Note that, since d*M - 2^p = d - (2^p)%d, (1) can be written as
// 2^(s+1) >= d - (2^p)%d.
// We now compute the least s with this property...
int32_t shift = 0;
while ((int64_t(1) << (shift+1)) + (int64_t(1) << (shift+32)) % d < d)
shift++;
// ...and the corresponding M. This may not fit in a signed 32-bit
// integer; we will compute (M - 2^32) * n + (2^32 * n) instead of
// M * n if this is the case (cf. item (a) above).
ReciprocalMulConstants rmc;
rmc.multiplier = int32_t((int64_t(1) << (shift+32))/d + 1);
rmc.shift_amount = shift;
return rmc;
}
#ifdef JS_TRACE_LOGGING
bool

View File

@ -45,6 +45,11 @@ struct PatchableBackedgeInfo
{}
};
struct ReciprocalMulConstants {
int32_t multiplier;
int32_t shift_amount;
};
class CodeGeneratorShared : public LInstructionVisitor
{
js::Vector<OutOfLineCode *, 0, SystemAllocPolicy> outOfLineCode_;
@ -402,6 +407,7 @@ class CodeGeneratorShared : public LInstructionVisitor
bool addCache(LInstruction *lir, size_t cacheIndex);
size_t addCacheLocations(const CacheLocationList &locs, size_t *numLocs);
ReciprocalMulConstants computeDivisionConstants(int d);
protected:
bool addOutOfLineCode(OutOfLineCode *code);

View File

@ -22,6 +22,7 @@
using namespace js;
using namespace js::jit;
using mozilla::Abs;
using mozilla::FloatingPoint;
using mozilla::FloorLog2;
using mozilla::NegativeInfinity;
@ -918,6 +919,88 @@ CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI *ins)
return true;
}
bool
CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI *ins) {
Register lhs = ToRegister(ins->numerator());
Register output = ToRegister(ins->output());
int32_t d = ins->denominator();
// This emits the division answer into edx or the modulus answer into eax.
JS_ASSERT(output == eax || output == edx);
JS_ASSERT(lhs != eax && lhs != edx);
// The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
// and LModPowTwoI).
JS_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
// We will first divide by Abs(d), and negate the answer if d is negative.
// If desired, this can be avoided by generalizing computeDivisionConstants.
ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d));
// As explained in the comments of computeDivisionConstants, we first compute
// X >> (32 + shift), where X is either (rmc.multiplier * n) if the multiplier
// is non-negative or (rmc.multiplier * n) + (2^32 * n) otherwise. This is the
// desired division result if n is non-negative, and is one less than the result
// otherwise.
masm.movl(lhs, eax);
masm.movl(Imm32(rmc.multiplier), edx);
masm.imull(edx);
if (rmc.multiplier < 0)
masm.addl(lhs, edx);
masm.sarl(Imm32(rmc.shift_amount), edx);
// We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
// computed with just a sign-extending shift of 31 bits.
if (ins->canBeNegativeDividend()) {
masm.movl(lhs, eax);
masm.sarl(Imm32(31), eax);
masm.subl(eax, edx);
}
// After this, edx contains the correct division result.
if (d < 0)
masm.negl(edx);
if (output == eax) {
masm.imull(Imm32(-d), edx, eax);
masm.addl(lhs, eax);
}
if (!ins->mir()->isTruncated()) {
if (output == edx) {
// This is a division op. Multiply the obtained value by d to check if
// the correct answer is an integer. This cannot overflow, since |d| > 1.
masm.imull(Imm32(d), edx, eax);
masm.cmpl(lhs, eax);
if (!bailoutIf(Assembler::NotEqual, ins->snapshot()))
return false;
// If lhs is zero and the divisor is negative, the answer should have
// been -0.
if (d < 0) {
masm.testl(lhs, lhs);
if (!bailoutIf(Assembler::Zero, ins->snapshot()))
return false;
}
} else if (ins->canBeNegativeDividend()) {
// This is a mod op. If the computed value is zero and lhs
// is negative, the answer should have been -0.
Label done;
masm.cmpl(lhs, Imm32(0));
masm.j(Assembler::GreaterThanOrEqual, &done);
masm.testl(eax, eax);
if (!bailoutIf(Assembler::Zero, ins->snapshot()))
return false;
masm.bind(&done);
}
}
return true;
}
bool
CodeGeneratorX86Shared::visitDivI(LDivI *ins)
{

View File

@ -124,6 +124,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
virtual bool visitMulI(LMulI *ins);
virtual bool visitDivI(LDivI *ins);
virtual bool visitDivPowTwoI(LDivPowTwoI *ins);
virtual bool visitDivOrModConstantI(LDivOrModConstantI *ins);
virtual bool visitModI(LModI *ins);
virtual bool visitModPowTwoI(LModPowTwoI *ins);
virtual bool visitBitNotI(LBitNotI *ins);

View File

@ -76,6 +76,37 @@ class LDivPowTwoI : public LBinaryMath<0>
}
};
class LDivOrModConstantI : public LInstructionHelper<1, 1, 1>
{
const int32_t denominator_;
public:
LIR_HEADER(DivOrModConstantI)
LDivOrModConstantI(const LAllocation &lhs, int32_t denominator, const LDefinition& temp)
: denominator_(denominator)
{
setOperand(0, lhs);
setTemp(0, temp);
}
const LAllocation *numerator() {
return getOperand(0);
}
int32_t denominator() const {
return denominator_;
}
MBinaryArithInstruction *mir() const {
JS_ASSERT(mir_->isDiv() || mir_->isMod());
return static_cast<MBinaryArithInstruction *>(mir_);
}
bool canBeNegativeDividend() const {
if (mir_->isMod())
return mir_->toMod()->canBeNegativeDividend();
return mir_->toDiv()->canBeNegativeDividend();
}
};
class LModI : public LBinaryMath<1>
{
public:

View File

@ -138,10 +138,8 @@ LIRGeneratorX86Shared::lowerDivI(MDiv *div)
if (div->rhs()->isConstant()) {
int32_t rhs = div->rhs()->toConstant()->value().toInt32();
// Check for division by a power of two, which is an easy and
// important case to optimize. Note that other optimizations
// are also possible: division by other constants can be
// optimized by a reciprocal multiplication technique.
// Division by powers of two can be done by shifting, and division by
// other numbers can be done by a reciprocal multiplication technique.
int32_t shift = FloorLog2(Abs(rhs));
if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
LAllocation lhs = useRegisterAtStart(div->lhs());
@ -157,6 +155,12 @@ LIRGeneratorX86Shared::lowerDivI(MDiv *div)
if (div->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
return false;
return defineReuseInput(lir, div, 0);
} else if (rhs != 0) {
LDivOrModConstantI *lir;
lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
if (div->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
return false;
return defineFixed(lir, div, LAllocation(AnyRegister(edx)));
}
}
@ -181,6 +185,12 @@ LIRGeneratorX86Shared::lowerModI(MMod *mod)
if (mod->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
return false;
return defineReuseInput(lir, mod, 0);
} else if (rhs != 0) {
LDivOrModConstantI *lir;
lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
if (mod->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
return false;
return defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
}
}

View File

@ -13,6 +13,7 @@
_(UnboxFloatingPoint) \
_(DivI) \
_(DivPowTwoI) \
_(DivOrModConstantI) \
_(ModI) \
_(ModPowTwoI) \
_(PowHalfD) \

View File

@ -14,6 +14,7 @@
_(BoxFloatingPoint) \
_(DivI) \
_(DivPowTwoI) \
_(DivOrModConstantI) \
_(ModI) \
_(ModPowTwoI) \
_(PowHalfD) \