From dd3591d95b2c18fdc04dace2d813bbcdeb8b3d97 Mon Sep 17 00:00:00 2001
From: Douglas Crosher <dtc-moz@scieneer.com>
Date: Sat, 4 May 2013 23:08:36 +1000
Subject: [PATCH] Bug 868708 - ARM optimize signed integer divisions by
 constant powers of two. r=nbp

---
 js/src/ion/arm/CodeGenerator-arm.cpp          | 33 ++++++++++++++++
 js/src/ion/arm/CodeGenerator-arm.h            |  1 +
 js/src/ion/arm/LIR-arm.h                      | 26 +++++++++++++
 js/src/ion/arm/LOpcodes-arm.h                 |  1 +
 js/src/ion/arm/Lowering-arm.cpp               | 19 ++++++++++
 .../jit-test/tests/asm.js/testExpressions.js  | 38 +++++++++++++++++++
 6 files changed, 118 insertions(+)

diff --git a/js/src/ion/arm/CodeGenerator-arm.cpp b/js/src/ion/arm/CodeGenerator-arm.cpp
index 69f4c83c0ea..670fbec5eba 100644
--- a/js/src/ion/arm/CodeGenerator-arm.cpp
+++ b/js/src/ion/arm/CodeGenerator-arm.cpp
@@ -570,6 +570,39 @@ CodeGeneratorARM::visitDivI(LDivI *ins)
     return true;
 }
 
+bool
+CodeGeneratorARM::visitDivPowTwoI(LDivPowTwoI *ins)
+{
+    Register lhs = ToRegister(ins->numerator());
+    Register output = ToRegister(ins->output());
+    int32_t shift = ins->shift();
+
+    if (shift != 0) {
+        if (!ins->mir()->isTruncated()) {
+            // If the remainder is != 0, bailout since this must be a double.
+            masm.as_mov(ScratchRegister, lsl(lhs, 32 - shift), SetCond);
+            if (!bailoutIf(Assembler::NonZero, ins->snapshot()))
+                return false;
+        }
+
+        // Adjust the value so that shifting produces a correctly rounded result
+        // when the numerator is negative. See 10-1 "Signed Division by a Known
+        // Power of 2" in Henry S. Warren, Jr.'s Hacker's Delight.
+        // Note that we wouldn't need to do this adjustment if we could use
+        // Range Analysis to find cases when the value is never negative.
+        if (shift > 1) {
+            masm.as_mov(ScratchRegister, asr(lhs, 31));
+            masm.as_add(ScratchRegister, lhs, lsr(ScratchRegister, 32 - shift));
+        } else
+            masm.as_add(ScratchRegister, lhs, lsr(lhs, 32 - shift));
+
+        // Do the shift.
+        masm.as_mov(output, asr(ScratchRegister, shift));
+    }
+
+    return true;
+}
+
 bool
 CodeGeneratorARM::visitModI(LModI *ins)
 {
diff --git a/js/src/ion/arm/CodeGenerator-arm.h b/js/src/ion/arm/CodeGenerator-arm.h
index 20623bf454e..5abe1062faf 100644
--- a/js/src/ion/arm/CodeGenerator-arm.h
+++ b/js/src/ion/arm/CodeGenerator-arm.h
@@ -75,6 +75,7 @@ class CodeGeneratorARM : public CodeGeneratorShared
     virtual bool visitMulI(LMulI *ins);
 
     virtual bool visitDivI(LDivI *ins);
+    virtual bool visitDivPowTwoI(LDivPowTwoI *ins);
     virtual bool visitModI(LModI *ins);
     virtual bool visitModPowTwoI(LModPowTwoI *ins);
     virtual bool visitModMaskI(LModMaskI *ins);
diff --git a/js/src/ion/arm/LIR-arm.h b/js/src/ion/arm/LIR-arm.h
index 83db1da05f6..342dbb8170c 100644
--- a/js/src/ion/arm/LIR-arm.h
+++ b/js/src/ion/arm/LIR-arm.h
@@ -121,6 +121,32 @@ class LDivI : public LBinaryMath<2>
     }
 };
 
+class LDivPowTwoI : public LInstructionHelper<1, 1, 0>
+{
+    const int32_t shift_;
+
+  public:
+    LIR_HEADER(DivPowTwoI)
+
+    LDivPowTwoI(const LAllocation &lhs, int32_t shift)
+      : shift_(shift)
+    {
+        setOperand(0, lhs);
+    }
+
+    const LAllocation *numerator() {
+        return getOperand(0);
+    }
+
+    int32_t shift() {
+        return shift_;
+    }
+
+    MDiv *mir() const {
+        return mir_->toDiv();
+    }
+};
+
 class LModI : public LBinaryMath<3>
 {
   public:
diff --git a/js/src/ion/arm/LOpcodes-arm.h b/js/src/ion/arm/LOpcodes-arm.h
index 1f53b9baabc..72f52b8ae78 100644
--- a/js/src/ion/arm/LOpcodes-arm.h
+++ b/js/src/ion/arm/LOpcodes-arm.h
@@ -13,6 +13,7 @@
     _(Box)                      \
     _(BoxDouble)                \
     _(DivI)                     \
+    _(DivPowTwoI)               \
     _(ModI)                     \
     _(ModPowTwoI)               \
     _(ModMaskI)                 \
diff --git a/js/src/ion/arm/Lowering-arm.cpp b/js/src/ion/arm/Lowering-arm.cpp
index 1bdafc89f99..1ef2f1ad2c3 100644
--- a/js/src/ion/arm/Lowering-arm.cpp
+++ b/js/src/ion/arm/Lowering-arm.cpp
@@ -230,6 +230,25 @@ LIRGeneratorARM::lowerForShift(LInstructionHelper<1, 2, 0> *ins, MDefinition *mi
 bool
 LIRGeneratorARM::lowerDivI(MDiv *div)
 {
+    // Division instructions are slow. Division by constant denominators can be
+    // rewritten to use other instructions.
+    if (div->rhs()->isConstant()) {
+        int32_t rhs = div->rhs()->toConstant()->value().toInt32();
+        // Check for division by a positive power of two, which is an easy and
+        // important case to optimize. Note that other optimizations are also
+        // possible; division by negative powers of two can be optimized in a
+        // similar manner as positive powers of two, and division by other
+        // constants can be optimized by a reciprocal multiplication technique.
+        int32_t shift;
+        JS_FLOOR_LOG2(shift, rhs);
+        if (rhs > 0 && 1 << shift == rhs) {
+            LDivPowTwoI *lir = new LDivPowTwoI(useRegisterAtStart(div->lhs()), shift);
+            if (div->fallible() && !assignSnapshot(lir))
+                return false;
+            return define(lir, div);
+        }
+    }
+
     LDivI *lir = new LDivI(useFixed(div->lhs(), r0), use(div->rhs(), r1),
                            tempFixed(r2), tempFixed(r3));
     if (div->fallible() && !assignSnapshot(lir))
diff --git a/js/src/jit-test/tests/asm.js/testExpressions.js b/js/src/jit-test/tests/asm.js/testExpressions.js
index 7c5d56ac25e..eaf5847ddcf 100644
--- a/js/src/jit-test/tests/asm.js/testExpressions.js
+++ b/js/src/jit-test/tests/asm.js/testExpressions.js
@@ -277,3 +277,41 @@ asmLink(asmCompile('glob','imp','buf', USE_ASM + "var i32=new glob.Int32Array(bu
 assertEq(new Int32Array(buf)[0], 42);
 
 assertEq(asmLink(asmCompile(USE_ASM + "function f() { var a=0,i=0; for (; ~~i!=4; i=(i+1)|0) { a = (a*5)|0; if (+(a>>>0) != 0.0) return 1; } return 0; } return f"))(), 0)
+
+// Signed integer division by a power of two.
+var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/1)|0; } return f;"));
+for (let i = 0; i < 31; i++) {
+    assertEq(f(Math.pow(2,i)), Math.pow(2,i));
+    assertEq(f(Math.pow(2,i)-1), Math.pow(2,i)-1);
+    assertEq(f(-Math.pow(2,i)), -Math.pow(2,i));
+    assertEq(f(-Math.pow(2,i)-1), -Math.pow(2,i)-1);
+}
+assertEq(f(INT32_MIN), INT32_MIN);
+assertEq(f(INT32_MAX), INT32_MAX);
+var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/2)|0; } return f;"));
+for (let i = 0; i < 31; i++) {
+    assertEq(f(Math.pow(2,i)), (Math.pow(2,i)/2)|0);
+    assertEq(f(Math.pow(2,i)-1), ((Math.pow(2,i)-1)/2)|0);
+    assertEq(f(-Math.pow(2,i)), (-Math.pow(2,i)/2)|0);
+    assertEq(f(-Math.pow(2,i)-1), ((-Math.pow(2,i)-1)/2)|0);
+}
+assertEq(f(INT32_MIN), (INT32_MIN/2)|0);
+assertEq(f(INT32_MAX), (INT32_MAX/2)|0);
+var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/4)|0; } return f;"));
+for (let i = 0; i < 31; i++) {
+    assertEq(f(Math.pow(2,i)), (Math.pow(2,i)/4)|0);
+    assertEq(f(Math.pow(2,i)-1), ((Math.pow(2,i)-1)/4)|0);
+    assertEq(f(-Math.pow(2,i)), (-Math.pow(2,i)/4)|0);
+    assertEq(f(-Math.pow(2,i)-1), ((-Math.pow(2,i)-1)/4)|0);
+}
+assertEq(f(INT32_MIN), (INT32_MIN/4)|0);
+assertEq(f(INT32_MAX), (INT32_MAX/4)|0);
+var f = asmLink(asmCompile(USE_ASM + "function f(i) { i=i|0; return ((i|0)/1073741824)|0; } return f;"));
+for (let i = 0; i < 31; i++) {
+    assertEq(f(Math.pow(2,i)), (Math.pow(2,i)/Math.pow(2,30))|0);
+    assertEq(f(Math.pow(2,i)-1), ((Math.pow(2,i)-1)/Math.pow(2,30))|0);
+    assertEq(f(-Math.pow(2,i)), (-Math.pow(2,i)/Math.pow(2,30))|0);
+    assertEq(f(-Math.pow(2,i)-1), ((-Math.pow(2,i)-1)/Math.pow(2,30))|0);
+}
+assertEq(f(INT32_MIN), (INT32_MIN/Math.pow(2,30))|0);
+assertEq(f(INT32_MAX), (INT32_MAX/Math.pow(2,30))|0);