Bug 1138348 - byte ops on x86_64. r=h4writer

2024-09-13 09:24:08 -07:00 · 2015-03-10 08:29:01 +01:00 · 2015-03-10 08:29:01 +01:00 · 0686e32aac
commit 0686e32aac
parent be4ff4cebe
6 changed files with 181 additions and 148 deletions
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@ -364,7 +364,8 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
 }
 void
-LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
+LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
                                                             bool useI386ByteRegisters)
 {
    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
@ -385,12 +386,11 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
    //
    // oldval must be in a register.
    //
-    // newval will need to be in a register.  If the source is a byte
+    // newval must be in a register.  If the source is a byte array
-    // array then the newval must be a register that has a byte size:
+    // then newval must be a register that has a byte size: on x86
-    // ebx, ecx, or edx, since eax is taken for the output in this
+    // this must be ebx, ecx, or edx (eax is taken for the output).
    // case.
    //
-    // Bug #1077036 describes some optimization opportunities.
+    // Bug #1077036 describes some further optimization opportunities.
    bool fixedOutput = false;
    LDefinition tempDef = LDefinition::BogusTemp();
@ -400,13 +400,12 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
        newval = useRegister(ins->newval());
    } else {
        fixedOutput = true;
-        if (ins->isByteArray())
+        if (useI386ByteRegisters && ins->isByteArray())
            newval = useFixed(ins->newval(), ebx);
        else
            newval = useRegister(ins->newval());
    }
    // A register allocator limitation precludes 'useRegisterAtStart()' here.
    const LAllocation oldval = useRegister(ins->oldval());
    LCompareExchangeTypedArrayElement *lir =
@ -419,7 +418,8 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
 }
 void
-LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
+LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
                                                         bool useI386ByteRegisters)
 {
    MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
@ -452,7 +452,7 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
    //
    // Note the placement of L, cmpxchg will update eax with *mem if
    // *mem does not have the expected value, so reloading it at the
-    // top of the loop is redundant.
+    // top of the loop would be redundant.
    //
    // If the array is not a uint32 array then:
    //  - eax should be the output (one result of the cmpxchg)
@ -488,12 +488,11 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
        } else {
            tempDef1 = temp();
        }
-    } else if (ins->isByteArray()) {
+    } else if (useI386ByteRegisters && ins->isByteArray()) {
        value = useFixed(ins->value(), ebx);
        if (bitOp)
            tempDef1 = tempFixed(ecx);
-    }
+    } else {
    else {
        value = useRegister(ins->value());
        if (bitOp)
            tempDef1 = temp();
@ -508,133 +507,6 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
        define(lir, ins);
 }
 void
 LIRGeneratorX86Shared::lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins,
 						     const LDefinition& addrTemp)
 {
    MDefinition *ptr = ins->ptr();
    MOZ_ASSERT(ptr->type() == MIRType_Int32);
    bool byteArray = false;
    switch (ins->accessType()) {
      case Scalar::Int8:
      case Scalar::Uint8:
        byteArray = true;
        break;
      case Scalar::Int16:
      case Scalar::Uint16:
      case Scalar::Int32:
      case Scalar::Uint32:
        break;
      default:
        MOZ_CRASH("Unexpected array type");
    }
    // Register allocation:
    //
    // The output must be eax.
    //
    // oldval must be in a register (it'll eventually end up in eax so
    // ideally it's there to begin with).
    //
    // newval will need to be in a register.  If the source is a byte
    // array then the newval must be a register that has a byte size:
    // ebx, ecx, or edx, since eax is taken for the output in this
    // case.  We pick ebx but it would be more flexible to pick any of
    // the three that wasn't being used.
    //
    // Bug #1077036 describes some optimization opportunities.
    const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
    const LAllocation oldval = useRegister(ins->oldValue());
    LAsmJSCompareExchangeHeap *lir =
        new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
    lir->setAddrTemp(addrTemp);
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 void
 LIRGeneratorX86Shared::lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins,
 						 const LDefinition& addrTemp)
 {
    MDefinition *ptr = ins->ptr();
    MOZ_ASSERT(ptr->type() == MIRType_Int32);
    bool byteArray = false;
    switch (ins->accessType()) {
      case Scalar::Int8:
      case Scalar::Uint8:
        byteArray = true;
        break;
      case Scalar::Int16:
      case Scalar::Uint16:
      case Scalar::Int32:
      case Scalar::Uint32:
        break;
      default:
        MOZ_CRASH("Unexpected array type");
    }
    // Register allocation:
    //
    // For ADD and SUB we'll use XADD:
    //
    //    movl       value, output
    //    lock xaddl output, mem
    //
    // For the 8-bit variants XADD needs a byte register for the
    // output only, we can still set up with movl; just pin the output
    // to eax (or ebx / ecx / edx).
    //
    // For AND/OR/XOR we need to use a CMPXCHG loop:
    //
    //    movl          *mem, eax
    // L: mov           eax, temp
    //    andl          value, temp
    //    lock cmpxchg  temp, mem  ; reads eax also
    //    jnz           L
    //    ; result in eax
    //
    // Note the placement of L, cmpxchg will update eax with *mem if
    // *mem does not have the expected value, so reloading it at the
    // top of the loop is redundant.
    //
    // We want to fix eax as the output.  We also need a temp for
    // the intermediate value.
    //
    // For the 8-bit variants the temp must have a byte register.
    //
    // There are optimization opportunities:
    //  - when the result is unused, Bug #1077014.
    //  - better register allocation and instruction selection, Bug #1077036.
    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
    LDefinition tempDef = LDefinition::BogusTemp();
    LAllocation value;
    // Optimization opportunity: "value" need not be pinned to something that
    // has a byte register unless the back-end insists on using a byte move
    // for the setup or the payload computation, which really it need not do.
    if (byteArray) {
        value = useFixed(ins->value(), ebx);
        if (bitOp)
            tempDef = tempFixed(ecx);
    } else {
        value = useRegister(ins->value());
        if (bitOp)
            tempDef = temp();
    }
    LAsmJSAtomicBinopHeap *lir =
        new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
    lir->setAddrTemp(addrTemp);
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 void
 LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
 {
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@ -56,10 +56,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
    void visitSimdSelect(MSimdSelect *ins);
    void visitSimdSplatX4(MSimdSplatX4 *ins);
    void visitSimdValueX4(MSimdValueX4 *ins);
-    void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
+    void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
-    void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
+                                               bool useI386ByteRegisters);
-    void lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, const LDefinition& addrTemp);
+    void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
-    void lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, const LDefinition& addrTemp);
+                                           bool useI386ByteRegisters);
 };
 } // namespace jit
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@ -131,6 +131,18 @@ LIRGeneratorX64::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
    lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
 }
 void
 LIRGeneratorX64::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
 {
    lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
 }
 void
 LIRGeneratorX64::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
 {
    lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
 }
 void
 LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
 {
@ -200,13 +212,58 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
 void
 LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
 {
-    lowerAsmJSCompareExchangeHeap(ins, LDefinition::BogusTemp());
+    MDefinition *ptr = ins->ptr();
    MOZ_ASSERT(ptr->type() == MIRType_Int32);
    const LAllocation oldval = useRegister(ins->oldValue());
    const LAllocation newval = useRegister(ins->newValue());
    LAsmJSCompareExchangeHeap *lir =
        new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 void
 LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
 {
-    lowerAsmJSAtomicBinopHeap(ins, LDefinition::BogusTemp());
+    MDefinition *ptr = ins->ptr();
    MOZ_ASSERT(ptr->type() == MIRType_Int32);
    // Register allocation:
    //
    // For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
    //
    //    movl       value, output
    //    lock xaddl output, mem
    //
    // For AND/OR/XOR we need to use a CMPXCHG loop:
    //
    //    movl          *mem, eax
    // L: mov           eax, temp
    //    andl          value, temp
    //    lock cmpxchg  temp, mem  ; reads eax also
    //    jnz           L
    //    ; result in eax
    //
    // Note the placement of L, cmpxchg will update eax with *mem if
    // *mem does not have the expected value, so reloading it at the
    // top of the loop would be redundant.
    //
    // We want to fix eax as the output.  We also need a temp for
    // the intermediate value.
    //
    // There are optimization opportunities:
    //  - when the result is unused, Bug #1077014.
    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
    LAllocation value = useRegister(ins->value());
    LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
    LAsmJSAtomicBinopHeap *lir =
        new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 void
--- a/js/src/jit/x64/Lowering-x64.h
+++ b/js/src/jit/x64/Lowering-x64.h
@ -42,6 +42,8 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
    void visitBox(MBox *box);
    void visitUnbox(MUnbox *unbox);
    void visitReturn(MReturn *ret);
    void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
    void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
    void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
    void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
    void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@ -182,6 +182,18 @@ LIRGeneratorX86::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
    payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
 }
 void
 LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
 {
    lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true);
 }
 void
 LIRGeneratorX86::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
 {
    lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true);
 }
 void
 LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
 {
@ -273,13 +285,101 @@ LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
 void
 LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
 {
-    lowerAsmJSCompareExchangeHeap(ins, temp());
+    MOZ_ASSERT(ins->accessType() < Scalar::Float32);
    MDefinition *ptr = ins->ptr();
    MOZ_ASSERT(ptr->type() == MIRType_Int32);
    bool byteArray = byteSize(ins->accessType()) == 1;
    // Register allocation:
    //
    // The output must be eax.
    //
    // oldval must be in a register.
    //
    // newval must be in a register.  If the source is a byte array
    // then newval must be a register that has a byte size: on x86
    // this must be ebx, ecx, or edx (eax is taken for the output).
    //
    // Bug #1077036 describes some optimization opportunities.
    const LAllocation oldval = useRegister(ins->oldValue());
    const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
    LAsmJSCompareExchangeHeap *lir =
        new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
    lir->setAddrTemp(temp());
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 void
 LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
 {
-    lowerAsmJSAtomicBinopHeap(ins, temp());
+    MOZ_ASSERT(ins->accessType() < Scalar::Float32);
    MDefinition *ptr = ins->ptr();
    MOZ_ASSERT(ptr->type() == MIRType_Int32);
    bool byteArray = byteSize(ins->accessType()) == 1;
    // Register allocation:
    //
    // For ADD and SUB we'll use XADD:
    //
    //    movl       value, output
    //    lock xaddl output, mem
    //
    // For the 8-bit variants XADD needs a byte register for the
    // output only, we can still set up with movl; just pin the output
    // to eax (or ebx / ecx / edx).
    //
    // For AND/OR/XOR we need to use a CMPXCHG loop:
    //
    //    movl          *mem, eax
    // L: mov           eax, temp
    //    andl          value, temp
    //    lock cmpxchg  temp, mem  ; reads eax also
    //    jnz           L
    //    ; result in eax
    //
    // Note the placement of L, cmpxchg will update eax with *mem if
    // *mem does not have the expected value, so reloading it at the
    // top of the loop would be redundant.
    //
    // We want to fix eax as the output.  We also need a temp for
    // the intermediate value.
    //
    // For the 8-bit variants the temp must have a byte register.
    //
    // There are optimization opportunities:
    //  - when the result is unused, Bug #1077014.
    //  - better register allocation and instruction selection, Bug #1077036.
    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
    LDefinition tempDef = LDefinition::BogusTemp();
    LAllocation value;
    // Optimization opportunity: "value" need not be pinned to something that
    // has a byte register unless the back-end insists on using a byte move
    // for the setup or the payload computation, which really it need not do.
    if (byteArray) {
        value = useFixed(ins->value(), ebx);
        if (bitOp)
            tempDef = tempFixed(ecx);
    } else {
        value = useRegister(ins->value());
        if (bitOp)
            tempDef = temp();
    }
    LAsmJSAtomicBinopHeap *lir =
        new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
    lir->setAddrTemp(temp());
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 void
--- a/js/src/jit/x86/Lowering-x86.h
+++ b/js/src/jit/x86/Lowering-x86.h
@ -48,6 +48,8 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
    void visitBox(MBox *box);
    void visitUnbox(MUnbox *unbox);
    void visitReturn(MReturn *ret);
    void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
    void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
    void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
    void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
    void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);