From 0686e32aac73778853845ca844e2ffb3a55f0b8b Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Tue, 10 Mar 2015 08:29:01 +0100 Subject: [PATCH] Bug 1138348 - byte ops on x86_64. r=h4writer --- js/src/jit/shared/Lowering-x86-shared.cpp | 152 ++-------------------- js/src/jit/shared/Lowering-x86-shared.h | 8 +- js/src/jit/x64/Lowering-x64.cpp | 61 ++++++++- js/src/jit/x64/Lowering-x64.h | 2 + js/src/jit/x86/Lowering-x86.cpp | 104 ++++++++++++++- js/src/jit/x86/Lowering-x86.h | 2 + 6 files changed, 181 insertions(+), 148 deletions(-) diff --git a/js/src/jit/shared/Lowering-x86-shared.cpp b/js/src/jit/shared/Lowering-x86-shared.cpp index 6163ee908f8..f86125c0934 100644 --- a/js/src/jit/shared/Lowering-x86-shared.cpp +++ b/js/src/jit/shared/Lowering-x86-shared.cpp @@ -364,7 +364,8 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins) } void -LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins) +LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins, + bool useI386ByteRegisters) { MOZ_ASSERT(ins->arrayType() != Scalar::Float32); MOZ_ASSERT(ins->arrayType() != Scalar::Float64); @@ -385,12 +386,11 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp // // oldval must be in a register. // - // newval will need to be in a register. If the source is a byte - // array then the newval must be a register that has a byte size: - // ebx, ecx, or edx, since eax is taken for the output in this - // case. + // newval must be in a register. If the source is a byte array + // then newval must be a register that has a byte size: on x86 + // this must be ebx, ecx, or edx (eax is taken for the output). // - // Bug #1077036 describes some optimization opportunities. + // Bug #1077036 describes some further optimization opportunities. bool fixedOutput = false; LDefinition tempDef = LDefinition::BogusTemp(); @@ -400,13 +400,12 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp newval = useRegister(ins->newval()); } else { fixedOutput = true; - if (ins->isByteArray()) + if (useI386ByteRegisters && ins->isByteArray()) newval = useFixed(ins->newval(), ebx); else newval = useRegister(ins->newval()); } - // A register allocator limitation precludes 'useRegisterAtStart()' here. const LAllocation oldval = useRegister(ins->oldval()); LCompareExchangeTypedArrayElement *lir = @@ -419,7 +418,8 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp } void -LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins) +LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins, + bool useI386ByteRegisters) { MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); MOZ_ASSERT(ins->arrayType() != Scalar::Float32); @@ -452,7 +452,7 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen // // Note the placement of L, cmpxchg will update eax with *mem if // *mem does not have the expected value, so reloading it at the - // top of the loop is redundant. + // top of the loop would be redundant. // // If the array is not a uint32 array then: // - eax should be the output (one result of the cmpxchg) @@ -488,12 +488,11 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen } else { tempDef1 = temp(); } - } else if (ins->isByteArray()) { + } else if (useI386ByteRegisters && ins->isByteArray()) { value = useFixed(ins->value(), ebx); if (bitOp) tempDef1 = tempFixed(ecx); - } - else { + } else { value = useRegister(ins->value()); if (bitOp) tempDef1 = temp(); @@ -508,133 +507,6 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen define(lir, ins); } -void -LIRGeneratorX86Shared::lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, - const LDefinition& addrTemp) -{ - MDefinition *ptr = ins->ptr(); - MOZ_ASSERT(ptr->type() == MIRType_Int32); - - bool byteArray = false; - switch (ins->accessType()) { - case Scalar::Int8: - case Scalar::Uint8: - byteArray = true; - break; - case Scalar::Int16: - case Scalar::Uint16: - case Scalar::Int32: - case Scalar::Uint32: - break; - default: - MOZ_CRASH("Unexpected array type"); - } - - // Register allocation: - // - // The output must be eax. - // - // oldval must be in a register (it'll eventually end up in eax so - // ideally it's there to begin with). - // - // newval will need to be in a register. If the source is a byte - // array then the newval must be a register that has a byte size: - // ebx, ecx, or edx, since eax is taken for the output in this - // case. We pick ebx but it would be more flexible to pick any of - // the three that wasn't being used. - // - // Bug #1077036 describes some optimization opportunities. - - const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue()); - const LAllocation oldval = useRegister(ins->oldValue()); - - LAsmJSCompareExchangeHeap *lir = - new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval); - - lir->setAddrTemp(addrTemp); - defineFixed(lir, ins, LAllocation(AnyRegister(eax))); -} - -void -LIRGeneratorX86Shared::lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, - const LDefinition& addrTemp) -{ - MDefinition *ptr = ins->ptr(); - MOZ_ASSERT(ptr->type() == MIRType_Int32); - - bool byteArray = false; - switch (ins->accessType()) { - case Scalar::Int8: - case Scalar::Uint8: - byteArray = true; - break; - case Scalar::Int16: - case Scalar::Uint16: - case Scalar::Int32: - case Scalar::Uint32: - break; - default: - MOZ_CRASH("Unexpected array type"); - } - - // Register allocation: - // - // For ADD and SUB we'll use XADD: - // - // movl value, output - // lock xaddl output, mem - // - // For the 8-bit variants XADD needs a byte register for the - // output only, we can still set up with movl; just pin the output - // to eax (or ebx / ecx / edx). - // - // For AND/OR/XOR we need to use a CMPXCHG loop: - // - // movl *mem, eax - // L: mov eax, temp - // andl value, temp - // lock cmpxchg temp, mem ; reads eax also - // jnz L - // ; result in eax - // - // Note the placement of L, cmpxchg will update eax with *mem if - // *mem does not have the expected value, so reloading it at the - // top of the loop is redundant. - // - // We want to fix eax as the output. We also need a temp for - // the intermediate value. - // - // For the 8-bit variants the temp must have a byte register. - // - // There are optimization opportunities: - // - when the result is unused, Bug #1077014. - // - better register allocation and instruction selection, Bug #1077036. - - bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp); - LDefinition tempDef = LDefinition::BogusTemp(); - LAllocation value; - - // Optimization opportunity: "value" need not be pinned to something that - // has a byte register unless the back-end insists on using a byte move - // for the setup or the payload computation, which really it need not do. - - if (byteArray) { - value = useFixed(ins->value(), ebx); - if (bitOp) - tempDef = tempFixed(ecx); - } else { - value = useRegister(ins->value()); - if (bitOp) - tempDef = temp(); - } - - LAsmJSAtomicBinopHeap *lir = - new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef); - - lir->setAddrTemp(addrTemp); - defineFixed(lir, ins, LAllocation(AnyRegister(eax))); -} - void LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins) { diff --git a/js/src/jit/shared/Lowering-x86-shared.h b/js/src/jit/shared/Lowering-x86-shared.h index ab9e491843e..7bd1e3031e5 100644 --- a/js/src/jit/shared/Lowering-x86-shared.h +++ b/js/src/jit/shared/Lowering-x86-shared.h @@ -56,10 +56,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared void visitSimdSelect(MSimdSelect *ins); void visitSimdSplatX4(MSimdSplatX4 *ins); void visitSimdValueX4(MSimdValueX4 *ins); - void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins); - void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins); - void lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, const LDefinition& addrTemp); - void lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, const LDefinition& addrTemp); + void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins, + bool useI386ByteRegisters); + void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins, + bool useI386ByteRegisters); }; } // namespace jit diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp index 7b5c5be1c16..ec5c5c51152 100644 --- a/js/src/jit/x64/Lowering-x64.cpp +++ b/js/src/jit/x64/Lowering-x64.cpp @@ -131,6 +131,18 @@ LIRGeneratorX64::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock lowerTypedPhiInput(phi, inputPosition, block, lirIndex); } +void +LIRGeneratorX64::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins) +{ + lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false); +} + +void +LIRGeneratorX64::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins) +{ + lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false); +} + void LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins) { @@ -200,13 +212,58 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins) void LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins) { - lowerAsmJSCompareExchangeHeap(ins, LDefinition::BogusTemp()); + MDefinition *ptr = ins->ptr(); + MOZ_ASSERT(ptr->type() == MIRType_Int32); + + const LAllocation oldval = useRegister(ins->oldValue()); + const LAllocation newval = useRegister(ins->newValue()); + + LAsmJSCompareExchangeHeap *lir = + new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval); + + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); } void LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins) { - lowerAsmJSAtomicBinopHeap(ins, LDefinition::BogusTemp()); + MDefinition *ptr = ins->ptr(); + MOZ_ASSERT(ptr->type() == MIRType_Int32); + + // Register allocation: + // + // For ADD and SUB we'll use XADD (with word and byte ops as appropriate): + // + // movl value, output + // lock xaddl output, mem + // + // For AND/OR/XOR we need to use a CMPXCHG loop: + // + // movl *mem, eax + // L: mov eax, temp + // andl value, temp + // lock cmpxchg temp, mem ; reads eax also + // jnz L + // ; result in eax + // + // Note the placement of L, cmpxchg will update eax with *mem if + // *mem does not have the expected value, so reloading it at the + // top of the loop would be redundant. + // + // We want to fix eax as the output. We also need a temp for + // the intermediate value. + // + // There are optimization opportunities: + // - when the result is unused, Bug #1077014. + + bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp); + LAllocation value = useRegister(ins->value()); + LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp(); + + LAsmJSAtomicBinopHeap *lir = + new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef); + + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); } void diff --git a/js/src/jit/x64/Lowering-x64.h b/js/src/jit/x64/Lowering-x64.h index 708695e9710..aa1b87688d0 100644 --- a/js/src/jit/x64/Lowering-x64.h +++ b/js/src/jit/x64/Lowering-x64.h @@ -42,6 +42,8 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared void visitBox(MBox *box); void visitUnbox(MUnbox *unbox); void visitReturn(MReturn *ret); + void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins); + void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins); void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins); void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins); void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins); diff --git a/js/src/jit/x86/Lowering-x86.cpp b/js/src/jit/x86/Lowering-x86.cpp index 206df7fdd30..8292dffb1ab 100644 --- a/js/src/jit/x86/Lowering-x86.cpp +++ b/js/src/jit/x86/Lowering-x86.cpp @@ -182,6 +182,18 @@ LIRGeneratorX86::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY)); } +void +LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins) +{ + lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true); +} + +void +LIRGeneratorX86::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins) +{ + lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true); +} + void LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins) { @@ -273,13 +285,101 @@ LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic void LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins) { - lowerAsmJSCompareExchangeHeap(ins, temp()); + MOZ_ASSERT(ins->accessType() < Scalar::Float32); + + MDefinition *ptr = ins->ptr(); + MOZ_ASSERT(ptr->type() == MIRType_Int32); + + bool byteArray = byteSize(ins->accessType()) == 1; + + // Register allocation: + // + // The output must be eax. + // + // oldval must be in a register. + // + // newval must be in a register. If the source is a byte array + // then newval must be a register that has a byte size: on x86 + // this must be ebx, ecx, or edx (eax is taken for the output). + // + // Bug #1077036 describes some optimization opportunities. + + const LAllocation oldval = useRegister(ins->oldValue()); + const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue()); + + LAsmJSCompareExchangeHeap *lir = + new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval); + + lir->setAddrTemp(temp()); + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); } void LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins) { - lowerAsmJSAtomicBinopHeap(ins, temp()); + MOZ_ASSERT(ins->accessType() < Scalar::Float32); + + MDefinition *ptr = ins->ptr(); + MOZ_ASSERT(ptr->type() == MIRType_Int32); + + bool byteArray = byteSize(ins->accessType()) == 1; + + // Register allocation: + // + // For ADD and SUB we'll use XADD: + // + // movl value, output + // lock xaddl output, mem + // + // For the 8-bit variants XADD needs a byte register for the + // output only, we can still set up with movl; just pin the output + // to eax (or ebx / ecx / edx). + // + // For AND/OR/XOR we need to use a CMPXCHG loop: + // + // movl *mem, eax + // L: mov eax, temp + // andl value, temp + // lock cmpxchg temp, mem ; reads eax also + // jnz L + // ; result in eax + // + // Note the placement of L, cmpxchg will update eax with *mem if + // *mem does not have the expected value, so reloading it at the + // top of the loop would be redundant. + // + // We want to fix eax as the output. We also need a temp for + // the intermediate value. + // + // For the 8-bit variants the temp must have a byte register. + // + // There are optimization opportunities: + // - when the result is unused, Bug #1077014. + // - better register allocation and instruction selection, Bug #1077036. + + bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp); + LDefinition tempDef = LDefinition::BogusTemp(); + LAllocation value; + + // Optimization opportunity: "value" need not be pinned to something that + // has a byte register unless the back-end insists on using a byte move + // for the setup or the payload computation, which really it need not do. + + if (byteArray) { + value = useFixed(ins->value(), ebx); + if (bitOp) + tempDef = tempFixed(ecx); + } else { + value = useRegister(ins->value()); + if (bitOp) + tempDef = temp(); + } + + LAsmJSAtomicBinopHeap *lir = + new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef); + + lir->setAddrTemp(temp()); + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); } void diff --git a/js/src/jit/x86/Lowering-x86.h b/js/src/jit/x86/Lowering-x86.h index 7c62309b119..2ea268982fb 100644 --- a/js/src/jit/x86/Lowering-x86.h +++ b/js/src/jit/x86/Lowering-x86.h @@ -48,6 +48,8 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared void visitBox(MBox *box); void visitUnbox(MUnbox *unbox); void visitReturn(MReturn *ret); + void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins); + void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins); void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins); void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins); void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);