mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 976110 - Part 1: Optimize signed integer division by constants; r=sunfish
This commit is contained in:
parent
395ac257b1
commit
e6753fd672
@ -366,6 +366,7 @@ private:
|
||||
GROUP3_OP_TEST = 0,
|
||||
GROUP3_OP_NOT = 2,
|
||||
GROUP3_OP_NEG = 3,
|
||||
GROUP3_OP_IMUL = 5,
|
||||
GROUP3_OP_DIV = 6,
|
||||
GROUP3_OP_IDIV = 7,
|
||||
|
||||
@ -1155,6 +1156,13 @@ public:
|
||||
m_formatter.twoByteOp(OP2_IMUL_GvEv, dst, src);
|
||||
}
|
||||
|
||||
void imull_r(RegisterID multiplier)
|
||||
{
|
||||
spew("imull %s",
|
||||
nameIReg(4, multiplier));
|
||||
m_formatter.oneByteOp(OP_GROUP3_Ev, GROUP3_OP_IMUL, multiplier);
|
||||
}
|
||||
|
||||
void imull_mr(int offset, RegisterID base, RegisterID dst)
|
||||
{
|
||||
spew("imull %s0x%x(%s), %s",
|
||||
|
@ -1062,12 +1062,18 @@ class AssemblerX86Shared
|
||||
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void imull(const Register &multiplier) {
|
||||
masm.imull_r(multiplier.code());
|
||||
}
|
||||
void imull(Imm32 imm, const Register &dest) {
|
||||
masm.imull_i32r(dest.code(), imm.value, dest.code());
|
||||
}
|
||||
void imull(const Register &src, const Register &dest) {
|
||||
masm.imull_rr(src.code(), dest.code());
|
||||
}
|
||||
void imull(Imm32 imm, const Register &src, const Register &dest) {
|
||||
masm.imull_i32r(src.code(), imm.value, dest.code());
|
||||
}
|
||||
void imull(const Operand &src, const Register &dest) {
|
||||
switch (src.kind()) {
|
||||
case Operand::REG:
|
||||
|
@ -1007,6 +1007,74 @@ CodeGeneratorShared::addCacheLocations(const CacheLocationList &locs, size_t *nu
|
||||
return firstIndex;
|
||||
}
|
||||
|
||||
ReciprocalMulConstants
|
||||
CodeGeneratorShared::computeDivisionConstants(int d) {
|
||||
// In what follows, d is positive and is not a power of 2.
|
||||
JS_ASSERT(d > 0 && (d & (d - 1)) != 0);
|
||||
|
||||
// Speeding up division by non power-of-2 constants is possible by
|
||||
// calculating, during compilation, a value M such that high-order
|
||||
// bits of M*n correspond to the result of the division. Formally,
|
||||
// we compute values 0 <= M < 2^32 and 0 <= s < 31 such that
|
||||
// (M * n) >> (32 + s) = floor(n/d) if n >= 0
|
||||
// (M * n) >> (32 + s) = ceil(n/d) - 1 if n < 0.
|
||||
// The original presentation of this technique appears in Hacker's
|
||||
// Delight, a book by Henry S. Warren, Jr.. A proof of correctness
|
||||
// for our version follows.
|
||||
|
||||
// Define p = 32 + s, M = ceil(2^p/d), and assume that s satisfies
|
||||
// M - 2^p/d <= 2^(s+1)/d. (1)
|
||||
// (Observe that s = FloorLog32(d) satisfies this, because in this
|
||||
// case d <= 2^(s+1) and so the RHS of (1) is at least one). Then,
|
||||
//
|
||||
// a) If s <= FloorLog32(d), then M <= 2^32 - 1.
|
||||
// Proof: Indeed, M is monotone in s and, for s = FloorLog32(d),
|
||||
// the inequalities 2^31 > d >= 2^s + 1 readily imply
|
||||
// 2^p / d = 2^p/(d - 1) * (d - 1)/d
|
||||
// <= 2^32 * (1 - 1/d) < 2 * (2^31 - 1) = 2^32 - 2.
|
||||
// The claim follows by applying the ceiling function.
|
||||
//
|
||||
// b) For any 0 <= n < 2^31, floor(Mn/2^p) = floor(n/d).
|
||||
// Proof: Put x = floor(Mn/2^p); it's the unique integer for which
|
||||
// Mn/2^p - 1 < x <= Mn/2^p. (2)
|
||||
// Using M >= 2^p/d on the LHS and (1) on the RHS, we get
|
||||
// n/d - 1 < x <= n/d + n/(2^31 d) < n/d + 1/d.
|
||||
// Since x is an integer, it's not in the interval (n/d, (n+1)/d),
|
||||
// and so n/d - 1 < x <= n/d, which implies x = floor(n/d).
|
||||
//
|
||||
// c) For any -2^31 <= n < 0, floor(Mn/2^p) + 1 = ceil(n/d).
|
||||
// Proof: The proof is similar. Equation (2) holds as above. Using
|
||||
// M > 2^p/d (d isn't a power of 2) on the RHS and (1) on the LHS,
|
||||
// n/d + n/(2^31 d) - 1 < x < n/d.
|
||||
// Using n >= -2^31 and summing 1,
|
||||
// n/d - 1/d < x + 1 < n/d + 1.
|
||||
// Since x + 1 is an integer, this implies n/d <= x + 1 < n/d + 1.
|
||||
// In other words, x + 1 = ceil(n/d).
|
||||
//
|
||||
// Condition (1) isn't necessary for the existence of M and s with
|
||||
// the properties above. Hacker's Delight provides a slightly less
|
||||
// restrictive condition when d >= 196611, at the cost of a 3-page
|
||||
// proof of correctness.
|
||||
|
||||
// Note that, since d*M - 2^p = d - (2^p)%d, (1) can be written as
|
||||
// 2^(s+1) >= d - (2^p)%d.
|
||||
// We now compute the least s with this property...
|
||||
|
||||
int32_t shift = 0;
|
||||
while ((int64_t(1) << (shift+1)) + (int64_t(1) << (shift+32)) % d < d)
|
||||
shift++;
|
||||
|
||||
// ...and the corresponding M. This may not fit in a signed 32-bit
|
||||
// integer; we will compute (M - 2^32) * n + (2^32 * n) instead of
|
||||
// M * n if this is the case (cf. item (a) above).
|
||||
ReciprocalMulConstants rmc;
|
||||
rmc.multiplier = int32_t((int64_t(1) << (shift+32))/d + 1);
|
||||
rmc.shift_amount = shift;
|
||||
|
||||
return rmc;
|
||||
}
|
||||
|
||||
|
||||
#ifdef JS_TRACE_LOGGING
|
||||
|
||||
bool
|
||||
|
@ -45,6 +45,11 @@ struct PatchableBackedgeInfo
|
||||
{}
|
||||
};
|
||||
|
||||
struct ReciprocalMulConstants {
|
||||
int32_t multiplier;
|
||||
int32_t shift_amount;
|
||||
};
|
||||
|
||||
class CodeGeneratorShared : public LInstructionVisitor
|
||||
{
|
||||
js::Vector<OutOfLineCode *, 0, SystemAllocPolicy> outOfLineCode_;
|
||||
@ -402,6 +407,7 @@ class CodeGeneratorShared : public LInstructionVisitor
|
||||
|
||||
bool addCache(LInstruction *lir, size_t cacheIndex);
|
||||
size_t addCacheLocations(const CacheLocationList &locs, size_t *numLocs);
|
||||
ReciprocalMulConstants computeDivisionConstants(int d);
|
||||
|
||||
protected:
|
||||
bool addOutOfLineCode(OutOfLineCode *code);
|
||||
|
@ -22,6 +22,7 @@
|
||||
using namespace js;
|
||||
using namespace js::jit;
|
||||
|
||||
using mozilla::Abs;
|
||||
using mozilla::FloatingPoint;
|
||||
using mozilla::FloorLog2;
|
||||
using mozilla::NegativeInfinity;
|
||||
@ -918,6 +919,88 @@ CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI *ins)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI *ins) {
|
||||
Register lhs = ToRegister(ins->numerator());
|
||||
Register output = ToRegister(ins->output());
|
||||
int32_t d = ins->denominator();
|
||||
|
||||
// This emits the division answer into edx or the modulus answer into eax.
|
||||
JS_ASSERT(output == eax || output == edx);
|
||||
JS_ASSERT(lhs != eax && lhs != edx);
|
||||
|
||||
// The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
|
||||
// and LModPowTwoI).
|
||||
JS_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
|
||||
|
||||
// We will first divide by Abs(d), and negate the answer if d is negative.
|
||||
// If desired, this can be avoided by generalizing computeDivisionConstants.
|
||||
ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d));
|
||||
|
||||
// As explained in the comments of computeDivisionConstants, we first compute
|
||||
// X >> (32 + shift), where X is either (rmc.multiplier * n) if the multiplier
|
||||
// is non-negative or (rmc.multiplier * n) + (2^32 * n) otherwise. This is the
|
||||
// desired division result if n is non-negative, and is one less than the result
|
||||
// otherwise.
|
||||
masm.movl(lhs, eax);
|
||||
masm.movl(Imm32(rmc.multiplier), edx);
|
||||
masm.imull(edx);
|
||||
if (rmc.multiplier < 0)
|
||||
masm.addl(lhs, edx);
|
||||
masm.sarl(Imm32(rmc.shift_amount), edx);
|
||||
|
||||
// We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
|
||||
// computed with just a sign-extending shift of 31 bits.
|
||||
if (ins->canBeNegativeDividend()) {
|
||||
masm.movl(lhs, eax);
|
||||
masm.sarl(Imm32(31), eax);
|
||||
masm.subl(eax, edx);
|
||||
}
|
||||
|
||||
// After this, edx contains the correct division result.
|
||||
if (d < 0)
|
||||
masm.negl(edx);
|
||||
|
||||
if (output == eax) {
|
||||
masm.imull(Imm32(-d), edx, eax);
|
||||
masm.addl(lhs, eax);
|
||||
}
|
||||
|
||||
if (!ins->mir()->isTruncated()) {
|
||||
if (output == edx) {
|
||||
// This is a division op. Multiply the obtained value by d to check if
|
||||
// the correct answer is an integer. This cannot overflow, since |d| > 1.
|
||||
masm.imull(Imm32(d), edx, eax);
|
||||
masm.cmpl(lhs, eax);
|
||||
if (!bailoutIf(Assembler::NotEqual, ins->snapshot()))
|
||||
return false;
|
||||
|
||||
// If lhs is zero and the divisor is negative, the answer should have
|
||||
// been -0.
|
||||
if (d < 0) {
|
||||
masm.testl(lhs, lhs);
|
||||
if (!bailoutIf(Assembler::Zero, ins->snapshot()))
|
||||
return false;
|
||||
}
|
||||
} else if (ins->canBeNegativeDividend()) {
|
||||
// This is a mod op. If the computed value is zero and lhs
|
||||
// is negative, the answer should have been -0.
|
||||
Label done;
|
||||
|
||||
masm.cmpl(lhs, Imm32(0));
|
||||
masm.j(Assembler::GreaterThanOrEqual, &done);
|
||||
|
||||
masm.testl(eax, eax);
|
||||
if (!bailoutIf(Assembler::Zero, ins->snapshot()))
|
||||
return false;
|
||||
|
||||
masm.bind(&done);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeGeneratorX86Shared::visitDivI(LDivI *ins)
|
||||
{
|
||||
|
@ -124,6 +124,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
|
||||
virtual bool visitMulI(LMulI *ins);
|
||||
virtual bool visitDivI(LDivI *ins);
|
||||
virtual bool visitDivPowTwoI(LDivPowTwoI *ins);
|
||||
virtual bool visitDivOrModConstantI(LDivOrModConstantI *ins);
|
||||
virtual bool visitModI(LModI *ins);
|
||||
virtual bool visitModPowTwoI(LModPowTwoI *ins);
|
||||
virtual bool visitBitNotI(LBitNotI *ins);
|
||||
|
@ -76,6 +76,37 @@ class LDivPowTwoI : public LBinaryMath<0>
|
||||
}
|
||||
};
|
||||
|
||||
class LDivOrModConstantI : public LInstructionHelper<1, 1, 1>
|
||||
{
|
||||
const int32_t denominator_;
|
||||
|
||||
public:
|
||||
LIR_HEADER(DivOrModConstantI)
|
||||
|
||||
LDivOrModConstantI(const LAllocation &lhs, int32_t denominator, const LDefinition& temp)
|
||||
: denominator_(denominator)
|
||||
{
|
||||
setOperand(0, lhs);
|
||||
setTemp(0, temp);
|
||||
}
|
||||
|
||||
const LAllocation *numerator() {
|
||||
return getOperand(0);
|
||||
}
|
||||
int32_t denominator() const {
|
||||
return denominator_;
|
||||
}
|
||||
MBinaryArithInstruction *mir() const {
|
||||
JS_ASSERT(mir_->isDiv() || mir_->isMod());
|
||||
return static_cast<MBinaryArithInstruction *>(mir_);
|
||||
}
|
||||
bool canBeNegativeDividend() const {
|
||||
if (mir_->isMod())
|
||||
return mir_->toMod()->canBeNegativeDividend();
|
||||
return mir_->toDiv()->canBeNegativeDividend();
|
||||
}
|
||||
};
|
||||
|
||||
class LModI : public LBinaryMath<1>
|
||||
{
|
||||
public:
|
||||
|
@ -138,10 +138,8 @@ LIRGeneratorX86Shared::lowerDivI(MDiv *div)
|
||||
if (div->rhs()->isConstant()) {
|
||||
int32_t rhs = div->rhs()->toConstant()->value().toInt32();
|
||||
|
||||
// Check for division by a power of two, which is an easy and
|
||||
// important case to optimize. Note that other optimizations
|
||||
// are also possible: division by other constants can be
|
||||
// optimized by a reciprocal multiplication technique.
|
||||
// Division by powers of two can be done by shifting, and division by
|
||||
// other numbers can be done by a reciprocal multiplication technique.
|
||||
int32_t shift = FloorLog2(Abs(rhs));
|
||||
if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
|
||||
LAllocation lhs = useRegisterAtStart(div->lhs());
|
||||
@ -157,6 +155,12 @@ LIRGeneratorX86Shared::lowerDivI(MDiv *div)
|
||||
if (div->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
|
||||
return false;
|
||||
return defineReuseInput(lir, div, 0);
|
||||
} else if (rhs != 0) {
|
||||
LDivOrModConstantI *lir;
|
||||
lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
|
||||
if (div->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
|
||||
return false;
|
||||
return defineFixed(lir, div, LAllocation(AnyRegister(edx)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -181,6 +185,12 @@ LIRGeneratorX86Shared::lowerModI(MMod *mod)
|
||||
if (mod->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
|
||||
return false;
|
||||
return defineReuseInput(lir, mod, 0);
|
||||
} else if (rhs != 0) {
|
||||
LDivOrModConstantI *lir;
|
||||
lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
|
||||
if (mod->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
|
||||
return false;
|
||||
return defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
_(UnboxFloatingPoint) \
|
||||
_(DivI) \
|
||||
_(DivPowTwoI) \
|
||||
_(DivOrModConstantI) \
|
||||
_(ModI) \
|
||||
_(ModPowTwoI) \
|
||||
_(PowHalfD) \
|
||||
|
@ -14,6 +14,7 @@
|
||||
_(BoxFloatingPoint) \
|
||||
_(DivI) \
|
||||
_(DivPowTwoI) \
|
||||
_(DivOrModConstantI) \
|
||||
_(ModI) \
|
||||
_(ModPowTwoI) \
|
||||
_(PowHalfD) \
|
||||
|
Loading…
Reference in New Issue
Block a user