From dd3591d95b2c18fdc04dace2d813bbcdeb8b3d97 Mon Sep 17 00:00:00 2001 From: Douglas Crosher Date: Sat, 4 May 2013 23:08:36 +1000 Subject: [PATCH] Bug 868708 - ARM optimize signed integer divisions by constant powers of two. r=nbp --- js/src/ion/arm/CodeGenerator-arm.cpp | 33 ++++++++++++++++ js/src/ion/arm/CodeGenerator-arm.h | 1 + js/src/ion/arm/LIR-arm.h | 26 +++++++++++++ js/src/ion/arm/LOpcodes-arm.h | 1 + js/src/ion/arm/Lowering-arm.cpp | 19 ++++++++++ .../jit-test/tests/asm.js/testExpressions.js | 38 +++++++++++++++++++ 6 files changed, 118 insertions(+) diff --git a/js/src/ion/arm/CodeGenerator-arm.cpp b/js/src/ion/arm/CodeGenerator-arm.cpp index 69f4c83c0ea..670fbec5eba 100644 --- a/js/src/ion/arm/CodeGenerator-arm.cpp +++ b/js/src/ion/arm/CodeGenerator-arm.cpp @@ -570,6 +570,39 @@ CodeGeneratorARM::visitDivI(LDivI *ins) return true; } +bool +CodeGeneratorARM::visitDivPowTwoI(LDivPowTwoI *ins) +{ + Register lhs = ToRegister(ins->numerator()); + Register output = ToRegister(ins->output()); + int32_t shift = ins->shift(); + + if (shift != 0) { + if (!ins->mir()->isTruncated()) { + // If the remainder is != 0, bailout since this must be a double. + masm.as_mov(ScratchRegister, lsl(lhs, 32 - shift), SetCond); + if (!bailoutIf(Assembler::NonZero, ins->snapshot())) + return false; + } + + // Adjust the value so that shifting produces a correctly rounded result + // when the numerator is negative. See 10-1 "Signed Division by a Known + // Power of 2" in Henry S. Warren, Jr.'s Hacker's Delight. + // Note that we wouldn't need to do this adjustment if we could use + // Range Analysis to find cases when the value is never negative. + if (shift > 1) { + masm.as_mov(ScratchRegister, asr(lhs, 31)); + masm.as_add(ScratchRegister, lhs, lsr(ScratchRegister, 32 - shift)); + } else + masm.as_add(ScratchRegister, lhs, lsr(lhs, 32 - shift)); + + // Do the shift. + masm.as_mov(output, asr(ScratchRegister, shift)); + } + + return true; +} + bool CodeGeneratorARM::visitModI(LModI *ins) { diff --git a/js/src/ion/arm/CodeGenerator-arm.h b/js/src/ion/arm/CodeGenerator-arm.h index 20623bf454e..5abe1062faf 100644 --- a/js/src/ion/arm/CodeGenerator-arm.h +++ b/js/src/ion/arm/CodeGenerator-arm.h @@ -75,6 +75,7 @@ class CodeGeneratorARM : public CodeGeneratorShared virtual bool visitMulI(LMulI *ins); virtual bool visitDivI(LDivI *ins); + virtual bool visitDivPowTwoI(LDivPowTwoI *ins); virtual bool visitModI(LModI *ins); virtual bool visitModPowTwoI(LModPowTwoI *ins); virtual bool visitModMaskI(LModMaskI *ins); diff --git a/js/src/ion/arm/LIR-arm.h b/js/src/ion/arm/LIR-arm.h index 83db1da05f6..342dbb8170c 100644 --- a/js/src/ion/arm/LIR-arm.h +++ b/js/src/ion/arm/LIR-arm.h @@ -121,6 +121,32 @@ class LDivI : public LBinaryMath<2> } }; +class LDivPowTwoI : public LInstructionHelper<1, 1, 0> +{ + const int32_t shift_; + + public: + LIR_HEADER(DivPowTwoI) + + LDivPowTwoI(const LAllocation &lhs, int32_t shift) + : shift_(shift) + { + setOperand(0, lhs); + } + + const LAllocation *numerator() { + return getOperand(0); + } + + int32_t shift() { + return shift_; + } + + MDiv *mir() const { + return mir_->toDiv(); + } +}; + class LModI : public LBinaryMath<3> { public: diff --git a/js/src/ion/arm/LOpcodes-arm.h b/js/src/ion/arm/LOpcodes-arm.h index 1f53b9baabc..72f52b8ae78 100644 --- a/js/src/ion/arm/LOpcodes-arm.h +++ b/js/src/ion/arm/LOpcodes-arm.h @@ -13,6 +13,7 @@ _(Box) \ _(BoxDouble) \ _(DivI) \ + _(DivPowTwoI) \ _(ModI) \ _(ModPowTwoI) \ _(ModMaskI) \ diff --git a/js/src/ion/arm/Lowering-arm.cpp b/js/src/ion/arm/Lowering-arm.cpp index 1bdafc89f99..1ef2f1ad2c3 100644 --- a/js/src/ion/arm/Lowering-arm.cpp +++ b/js/src/ion/arm/Lowering-arm.cpp @@ -230,6 +230,25 @@ LIRGeneratorARM::lowerForShift(LInstructionHelper<1, 2, 0> *ins, MDefinition *mi bool LIRGeneratorARM::lowerDivI(MDiv *div) { + // Division instructions are slow. Division by constant denominators can be + // rewritten to use other instructions. + if (div->rhs()->isConstant()) { + int32_t rhs = div->rhs()->toConstant()->value().toInt32(); + // Check for division by a positive power of two, which is an easy and + // important case to optimize. Note that other optimizations are also + // possible; division by negative powers of two can be optimized in a + // similar manner as positive powers of two, and division by other + // constants can be optimized by a reciprocal multiplication technique. + int32_t shift; + JS_FLOOR_LOG2(shift, rhs); + if (rhs > 0 && 1 << shift == rhs) { + LDivPowTwoI *lir = new LDivPowTwoI(useRegisterAtStart(div->lhs()), shift); + if (div->fallible() && !assignSnapshot(lir)) + return false; + return define(lir, div); + } + } + LDivI *lir = new LDivI(useFixed(div->lhs(), r0), use(div->rhs(), r1), tempFixed(r2), tempFixed(r3)); if (div->fallible() && !assignSnapshot(lir)) diff --git a/js/src/jit-test/tests/asm.js/testExpressions.js b/js/src/jit-test/tests/asm.js/testExpressions.js index 7c5d56ac25e..eaf5847ddcf 100644 --- a/js/src/jit-test/tests/asm.js/testExpressions.js +++ b/js/src/jit-test/tests/asm.js/testExpressions.js @@ -277,3 +277,41 @@ asmLink(asmCompile('glob','imp','buf', USE_ASM + "var i32=new glob.Int32Array(bu assertEq(new Int32Array(buf)[0], 42); assertEq(asmLink(asmCompile(USE_ASM + "function f() { var a=0,i=0; for (; ~~i!=4; i=(i+1)|0) { a = (a*5)|0; if (+(a>>>0) != 0.0) return 1; } return 0; } return f"))(), 0) + +// Signed integer division by a power of two. +var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/1)|0; } return f;")); +for (let i = 0; i < 31; i++) { + assertEq(f(Math.pow(2,i)), Math.pow(2,i)); + assertEq(f(Math.pow(2,i)-1), Math.pow(2,i)-1); + assertEq(f(-Math.pow(2,i)), -Math.pow(2,i)); + assertEq(f(-Math.pow(2,i)-1), -Math.pow(2,i)-1); +} +assertEq(f(INT32_MIN), INT32_MIN); +assertEq(f(INT32_MAX), INT32_MAX); +var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/2)|0; } return f;")); +for (let i = 0; i < 31; i++) { + assertEq(f(Math.pow(2,i)), (Math.pow(2,i)/2)|0); + assertEq(f(Math.pow(2,i)-1), ((Math.pow(2,i)-1)/2)|0); + assertEq(f(-Math.pow(2,i)), (-Math.pow(2,i)/2)|0); + assertEq(f(-Math.pow(2,i)-1), ((-Math.pow(2,i)-1)/2)|0); +} +assertEq(f(INT32_MIN), (INT32_MIN/2)|0); +assertEq(f(INT32_MAX), (INT32_MAX/2)|0); +var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/4)|0; } return f;")); +for (let i = 0; i < 31; i++) { + assertEq(f(Math.pow(2,i)), (Math.pow(2,i)/4)|0); + assertEq(f(Math.pow(2,i)-1), ((Math.pow(2,i)-1)/4)|0); + assertEq(f(-Math.pow(2,i)), (-Math.pow(2,i)/4)|0); + assertEq(f(-Math.pow(2,i)-1), ((-Math.pow(2,i)-1)/4)|0); +} +assertEq(f(INT32_MIN), (INT32_MIN/4)|0); +assertEq(f(INT32_MAX), (INT32_MAX/4)|0); +var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/1073741824)|0; } return f;")); +for (let i = 0; i < 31; i++) { + assertEq(f(Math.pow(2,i)), (Math.pow(2,i)/Math.pow(2,30))|0); + assertEq(f(Math.pow(2,i)-1), ((Math.pow(2,i)-1)/Math.pow(2,30))|0); + assertEq(f(-Math.pow(2,i)), (-Math.pow(2,i)/Math.pow(2,30))|0); + assertEq(f(-Math.pow(2,i)-1), ((-Math.pow(2,i)-1)/Math.pow(2,30))|0); +} +assertEq(f(INT32_MIN), (INT32_MIN/Math.pow(2,30))|0); +assertEq(f(INT32_MAX), (INT32_MAX/Math.pow(2,30))|0);