Bug 1138348 - byte ops on x86_64. r=h4writer

2024-09-13 09:24:08 -07:00 · 2015-03-10 08:29:01 +01:00 · 2015-03-10 08:29:01 +01:00 · 0686e32aac
commit 0686e32aac
parent be4ff4cebe
6 changed files with 181 additions and 148 deletions
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@ -364,7 +364,8 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
 }

 void
-LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
+LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
+                                                             bool useI386ByteRegisters)
 {
    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
@ -385,12 +386,11 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
    //
    // oldval must be in a register.
    //
-    // newval will need to be in a register.  If the source is a byte
-    // array then the newval must be a register that has a byte size:
-    // ebx, ecx, or edx, since eax is taken for the output in this
-    // case.
+    // newval must be in a register.  If the source is a byte array
+    // then newval must be a register that has a byte size: on x86
+    // this must be ebx, ecx, or edx (eax is taken for the output).
    //
-    // Bug #1077036 describes some optimization opportunities.
+    // Bug #1077036 describes some further optimization opportunities.

    bool fixedOutput = false;
    LDefinition tempDef = LDefinition::BogusTemp();
@ -400,13 +400,12 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
        newval = useRegister(ins->newval());
    } else {
        fixedOutput = true;
-        if (ins->isByteArray())
+        if (useI386ByteRegisters && ins->isByteArray())
            newval = useFixed(ins->newval(), ebx);
        else
            newval = useRegister(ins->newval());
    }

-    // A register allocator limitation precludes 'useRegisterAtStart()' here.
    const LAllocation oldval = useRegister(ins->oldval());

    LCompareExchangeTypedArrayElement *lir =
@ -419,7 +418,8 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
 }

 void
-LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
+LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
+                                                         bool useI386ByteRegisters)
 {
    MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
@ -452,7 +452,7 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
    //
    // Note the placement of L, cmpxchg will update eax with *mem if
    // *mem does not have the expected value, so reloading it at the
-    // top of the loop is redundant.
+    // top of the loop would be redundant.
    //
    // If the array is not a uint32 array then:
    //  - eax should be the output (one result of the cmpxchg)
@ -488,12 +488,11 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
        } else {
            tempDef1 = temp();
        }
-    } else if (ins->isByteArray()) {
+    } else if (useI386ByteRegisters && ins->isByteArray()) {
        value = useFixed(ins->value(), ebx);
        if (bitOp)
            tempDef1 = tempFixed(ecx);
-    }
-    else {
+    } else {
        value = useRegister(ins->value());
        if (bitOp)
            tempDef1 = temp();
@ -508,133 +507,6 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
        define(lir, ins);
 }

-void
-LIRGeneratorX86Shared::lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins,
-						     const LDefinition& addrTemp)
-{
-    MDefinition *ptr = ins->ptr();
-    MOZ_ASSERT(ptr->type() == MIRType_Int32);
-
-    bool byteArray = false;
-    switch (ins->accessType()) {
-      case Scalar::Int8:
-      case Scalar::Uint8:
-        byteArray = true;
-        break;
-      case Scalar::Int16:
-      case Scalar::Uint16:
-      case Scalar::Int32:
-      case Scalar::Uint32:
-        break;
-      default:
-        MOZ_CRASH("Unexpected array type");
-    }
-
-    // Register allocation:
-    //
-    // The output must be eax.
-    //
-    // oldval must be in a register (it'll eventually end up in eax so
-    // ideally it's there to begin with).
-    //
-    // newval will need to be in a register.  If the source is a byte
-    // array then the newval must be a register that has a byte size:
-    // ebx, ecx, or edx, since eax is taken for the output in this
-    // case.  We pick ebx but it would be more flexible to pick any of
-    // the three that wasn't being used.
-    //
-    // Bug #1077036 describes some optimization opportunities.
-
-    const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
-    const LAllocation oldval = useRegister(ins->oldValue());
-
-    LAsmJSCompareExchangeHeap *lir =
-        new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
-
-    lir->setAddrTemp(addrTemp);
-    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
-}
-
-void
-LIRGeneratorX86Shared::lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins,
-						 const LDefinition& addrTemp)
-{
-    MDefinition *ptr = ins->ptr();
-    MOZ_ASSERT(ptr->type() == MIRType_Int32);
-
-    bool byteArray = false;
-    switch (ins->accessType()) {
-      case Scalar::Int8:
-      case Scalar::Uint8:
-        byteArray = true;
-        break;
-      case Scalar::Int16:
-      case Scalar::Uint16:
-      case Scalar::Int32:
-      case Scalar::Uint32:
-        break;
-      default:
-        MOZ_CRASH("Unexpected array type");
-    }
-
-    // Register allocation:
-    //
-    // For ADD and SUB we'll use XADD:
-    //
-    //    movl       value, output
-    //    lock xaddl output, mem
-    //
-    // For the 8-bit variants XADD needs a byte register for the
-    // output only, we can still set up with movl; just pin the output
-    // to eax (or ebx / ecx / edx).
-    //
-    // For AND/OR/XOR we need to use a CMPXCHG loop:
-    //
-    //    movl          *mem, eax
-    // L: mov           eax, temp
-    //    andl          value, temp
-    //    lock cmpxchg  temp, mem  ; reads eax also
-    //    jnz           L
-    //    ; result in eax
-    //
-    // Note the placement of L, cmpxchg will update eax with *mem if
-    // *mem does not have the expected value, so reloading it at the
-    // top of the loop is redundant.
-    //
-    // We want to fix eax as the output.  We also need a temp for
-    // the intermediate value.
-    //
-    // For the 8-bit variants the temp must have a byte register.
-    //
-    // There are optimization opportunities:
-    //  - when the result is unused, Bug #1077014.
-    //  - better register allocation and instruction selection, Bug #1077036.
-
-    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
-    LDefinition tempDef = LDefinition::BogusTemp();
-    LAllocation value;
-
-    // Optimization opportunity: "value" need not be pinned to something that
-    // has a byte register unless the back-end insists on using a byte move
-    // for the setup or the payload computation, which really it need not do.
-
-    if (byteArray) {
-        value = useFixed(ins->value(), ebx);
-        if (bitOp)
-            tempDef = tempFixed(ecx);
-    } else {
-        value = useRegister(ins->value());
-        if (bitOp)
-            tempDef = temp();
-    }
-
-    LAsmJSAtomicBinopHeap *lir =
-        new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
-
-    lir->setAddrTemp(addrTemp);
-    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
-}
-
 void
 LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
 {
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@ -56,10 +56,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
    void visitSimdSelect(MSimdSelect *ins);
    void visitSimdSplatX4(MSimdSplatX4 *ins);
    void visitSimdValueX4(MSimdValueX4 *ins);
-    void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
-    void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
-    void lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, const LDefinition& addrTemp);
-    void lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, const LDefinition& addrTemp);
+    void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
+                                               bool useI386ByteRegisters);
+    void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
+                                           bool useI386ByteRegisters);
 };

 } // namespace jit
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@ -131,6 +131,18 @@ LIRGeneratorX64::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
    lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
 }

+void
+LIRGeneratorX64::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
+{
+    lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
+}
+
+void
+LIRGeneratorX64::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
+{
+    lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
+}
+
 void
 LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
 {
@ -200,13 +212,58 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
 void
 LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
 {
-    lowerAsmJSCompareExchangeHeap(ins, LDefinition::BogusTemp());
+    MDefinition *ptr = ins->ptr();
+    MOZ_ASSERT(ptr->type() == MIRType_Int32);
+
+    const LAllocation oldval = useRegister(ins->oldValue());
+    const LAllocation newval = useRegister(ins->newValue());
+
+    LAsmJSCompareExchangeHeap *lir =
+        new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
+
+    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }

 void
 LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
 {
-    lowerAsmJSAtomicBinopHeap(ins, LDefinition::BogusTemp());
+    MDefinition *ptr = ins->ptr();
+    MOZ_ASSERT(ptr->type() == MIRType_Int32);
+
+    // Register allocation:
+    //
+    // For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
+    //
+    //    movl       value, output
+    //    lock xaddl output, mem
+    //
+    // For AND/OR/XOR we need to use a CMPXCHG loop:
+    //
+    //    movl          *mem, eax
+    // L: mov           eax, temp
+    //    andl          value, temp
+    //    lock cmpxchg  temp, mem  ; reads eax also
+    //    jnz           L
+    //    ; result in eax
+    //
+    // Note the placement of L, cmpxchg will update eax with *mem if
+    // *mem does not have the expected value, so reloading it at the
+    // top of the loop would be redundant.
+    //
+    // We want to fix eax as the output.  We also need a temp for
+    // the intermediate value.
+    //
+    // There are optimization opportunities:
+    //  - when the result is unused, Bug #1077014.
+
+    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
+    LAllocation value = useRegister(ins->value());
+    LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
+
+    LAsmJSAtomicBinopHeap *lir =
+        new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
+
+    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }

 void
--- a/js/src/jit/x64/Lowering-x64.h
+++ b/js/src/jit/x64/Lowering-x64.h
@ -42,6 +42,8 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
    void visitBox(MBox *box);
    void visitUnbox(MUnbox *unbox);
    void visitReturn(MReturn *ret);
+    void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
+    void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
    void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
    void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
    void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@ -182,6 +182,18 @@ LIRGeneratorX86::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
    payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
 }

+void
+LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
+{
+    lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true);
+}
+
+void
+LIRGeneratorX86::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
+{
+    lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true);
+}
+
 void
 LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
 {
@ -273,13 +285,101 @@ LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
 void
 LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
 {
-    lowerAsmJSCompareExchangeHeap(ins, temp());
+    MOZ_ASSERT(ins->accessType() < Scalar::Float32);
+
+    MDefinition *ptr = ins->ptr();
+    MOZ_ASSERT(ptr->type() == MIRType_Int32);
+
+    bool byteArray = byteSize(ins->accessType()) == 1;
+
+    // Register allocation:
+    //
+    // The output must be eax.
+    //
+    // oldval must be in a register.
+    //
+    // newval must be in a register.  If the source is a byte array
+    // then newval must be a register that has a byte size: on x86
+    // this must be ebx, ecx, or edx (eax is taken for the output).
+    //
+    // Bug #1077036 describes some optimization opportunities.
+
+    const LAllocation oldval = useRegister(ins->oldValue());
+    const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
+
+    LAsmJSCompareExchangeHeap *lir =
+        new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
+
+    lir->setAddrTemp(temp());
+    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }

 void
 LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
 {
-    lowerAsmJSAtomicBinopHeap(ins, temp());
+    MOZ_ASSERT(ins->accessType() < Scalar::Float32);
+
+    MDefinition *ptr = ins->ptr();
+    MOZ_ASSERT(ptr->type() == MIRType_Int32);
+
+    bool byteArray = byteSize(ins->accessType()) == 1;
+
+    // Register allocation:
+    //
+    // For ADD and SUB we'll use XADD:
+    //
+    //    movl       value, output
+    //    lock xaddl output, mem
+    //
+    // For the 8-bit variants XADD needs a byte register for the
+    // output only, we can still set up with movl; just pin the output
+    // to eax (or ebx / ecx / edx).
+    //
+    // For AND/OR/XOR we need to use a CMPXCHG loop:
+    //
+    //    movl          *mem, eax
+    // L: mov           eax, temp
+    //    andl          value, temp
+    //    lock cmpxchg  temp, mem  ; reads eax also
+    //    jnz           L
+    //    ; result in eax
+    //
+    // Note the placement of L, cmpxchg will update eax with *mem if
+    // *mem does not have the expected value, so reloading it at the
+    // top of the loop would be redundant.
+    //
+    // We want to fix eax as the output.  We also need a temp for
+    // the intermediate value.
+    //
+    // For the 8-bit variants the temp must have a byte register.
+    //
+    // There are optimization opportunities:
+    //  - when the result is unused, Bug #1077014.
+    //  - better register allocation and instruction selection, Bug #1077036.
+
+    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
+    LDefinition tempDef = LDefinition::BogusTemp();
+    LAllocation value;
+
+    // Optimization opportunity: "value" need not be pinned to something that
+    // has a byte register unless the back-end insists on using a byte move
+    // for the setup or the payload computation, which really it need not do.
+
+    if (byteArray) {
+        value = useFixed(ins->value(), ebx);
+        if (bitOp)
+            tempDef = tempFixed(ecx);
+    } else {
+        value = useRegister(ins->value());
+        if (bitOp)
+            tempDef = temp();
+    }
+
+    LAsmJSAtomicBinopHeap *lir =
+        new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
+
+    lir->setAddrTemp(temp());
+    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }

 void
--- a/js/src/jit/x86/Lowering-x86.h
+++ b/js/src/jit/x86/Lowering-x86.h
@ -48,6 +48,8 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
    void visitBox(MBox *box);
    void visitUnbox(MUnbox *unbox);
    void visitReturn(MReturn *ret);
+    void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
+    void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
    void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
    void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
    void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);