Bug 1138348 - byte ops on x86_64. r=h4writer

This commit is contained in:
Lars T Hansen 2015-03-10 08:29:01 +01:00
parent be4ff4cebe
commit 0686e32aac
6 changed files with 181 additions and 148 deletions

View File

@ -364,7 +364,8 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
} }
void void
LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins) LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
bool useI386ByteRegisters)
{ {
MOZ_ASSERT(ins->arrayType() != Scalar::Float32); MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
MOZ_ASSERT(ins->arrayType() != Scalar::Float64); MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
@ -385,12 +386,11 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
// //
// oldval must be in a register. // oldval must be in a register.
// //
// newval will need to be in a register. If the source is a byte // newval must be in a register. If the source is a byte array
// array then the newval must be a register that has a byte size: // then newval must be a register that has a byte size: on x86
// ebx, ecx, or edx, since eax is taken for the output in this // this must be ebx, ecx, or edx (eax is taken for the output).
// case.
// //
// Bug #1077036 describes some optimization opportunities. // Bug #1077036 describes some further optimization opportunities.
bool fixedOutput = false; bool fixedOutput = false;
LDefinition tempDef = LDefinition::BogusTemp(); LDefinition tempDef = LDefinition::BogusTemp();
@ -400,13 +400,12 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
newval = useRegister(ins->newval()); newval = useRegister(ins->newval());
} else { } else {
fixedOutput = true; fixedOutput = true;
if (ins->isByteArray()) if (useI386ByteRegisters && ins->isByteArray())
newval = useFixed(ins->newval(), ebx); newval = useFixed(ins->newval(), ebx);
else else
newval = useRegister(ins->newval()); newval = useRegister(ins->newval());
} }
// A register allocator limitation precludes 'useRegisterAtStart()' here.
const LAllocation oldval = useRegister(ins->oldval()); const LAllocation oldval = useRegister(ins->oldval());
LCompareExchangeTypedArrayElement *lir = LCompareExchangeTypedArrayElement *lir =
@ -419,7 +418,8 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
} }
void void
LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins) LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
bool useI386ByteRegisters)
{ {
MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
MOZ_ASSERT(ins->arrayType() != Scalar::Float32); MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
@ -452,7 +452,7 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
// //
// Note the placement of L, cmpxchg will update eax with *mem if // Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the // *mem does not have the expected value, so reloading it at the
// top of the loop is redundant. // top of the loop would be redundant.
// //
// If the array is not a uint32 array then: // If the array is not a uint32 array then:
// - eax should be the output (one result of the cmpxchg) // - eax should be the output (one result of the cmpxchg)
@ -488,12 +488,11 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
} else { } else {
tempDef1 = temp(); tempDef1 = temp();
} }
} else if (ins->isByteArray()) { } else if (useI386ByteRegisters && ins->isByteArray()) {
value = useFixed(ins->value(), ebx); value = useFixed(ins->value(), ebx);
if (bitOp) if (bitOp)
tempDef1 = tempFixed(ecx); tempDef1 = tempFixed(ecx);
} } else {
else {
value = useRegister(ins->value()); value = useRegister(ins->value());
if (bitOp) if (bitOp)
tempDef1 = temp(); tempDef1 = temp();
@ -508,133 +507,6 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
define(lir, ins); define(lir, ins);
} }
void
LIRGeneratorX86Shared::lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins,
const LDefinition& addrTemp)
{
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = false;
switch (ins->accessType()) {
case Scalar::Int8:
case Scalar::Uint8:
byteArray = true;
break;
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
break;
default:
MOZ_CRASH("Unexpected array type");
}
// Register allocation:
//
// The output must be eax.
//
// oldval must be in a register (it'll eventually end up in eax so
// ideally it's there to begin with).
//
// newval will need to be in a register. If the source is a byte
// array then the newval must be a register that has a byte size:
// ebx, ecx, or edx, since eax is taken for the output in this
// case. We pick ebx but it would be more flexible to pick any of
// the three that wasn't being used.
//
// Bug #1077036 describes some optimization opportunities.
const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
const LAllocation oldval = useRegister(ins->oldValue());
LAsmJSCompareExchangeHeap *lir =
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
lir->setAddrTemp(addrTemp);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void
LIRGeneratorX86Shared::lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins,
const LDefinition& addrTemp)
{
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = false;
switch (ins->accessType()) {
case Scalar::Int8:
case Scalar::Uint8:
byteArray = true;
break;
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
break;
default:
MOZ_CRASH("Unexpected array type");
}
// Register allocation:
//
// For ADD and SUB we'll use XADD:
//
// movl value, output
// lock xaddl output, mem
//
// For the 8-bit variants XADD needs a byte register for the
// output only, we can still set up with movl; just pin the output
// to eax (or ebx / ecx / edx).
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
// movl *mem, eax
// L: mov eax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// jnz L
// ; result in eax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop is redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
//
// For the 8-bit variants the temp must have a byte register.
//
// There are optimization opportunities:
// - when the result is unused, Bug #1077014.
// - better register allocation and instruction selection, Bug #1077036.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LDefinition tempDef = LDefinition::BogusTemp();
LAllocation value;
// Optimization opportunity: "value" need not be pinned to something that
// has a byte register unless the back-end insists on using a byte move
// for the setup or the payload computation, which really it need not do.
if (byteArray) {
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef = tempFixed(ecx);
} else {
value = useRegister(ins->value());
if (bitOp)
tempDef = temp();
}
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
lir->setAddrTemp(addrTemp);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void void
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins) LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
{ {

View File

@ -56,10 +56,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
void visitSimdSelect(MSimdSelect *ins); void visitSimdSelect(MSimdSelect *ins);
void visitSimdSplatX4(MSimdSplatX4 *ins); void visitSimdSplatX4(MSimdSplatX4 *ins);
void visitSimdValueX4(MSimdValueX4 *ins); void visitSimdValueX4(MSimdValueX4 *ins);
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins); void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins); bool useI386ByteRegisters);
void lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, const LDefinition& addrTemp); void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
void lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, const LDefinition& addrTemp); bool useI386ByteRegisters);
}; };
} // namespace jit } // namespace jit

View File

@ -131,6 +131,18 @@ LIRGeneratorX64::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
lowerTypedPhiInput(phi, inputPosition, block, lirIndex); lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
} }
void
LIRGeneratorX64::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
{
lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
}
void
LIRGeneratorX64::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
{
lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
}
void void
LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins) LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
{ {
@ -200,13 +212,58 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
void void
LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins) LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
{ {
lowerAsmJSCompareExchangeHeap(ins, LDefinition::BogusTemp()); MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
const LAllocation oldval = useRegister(ins->oldValue());
const LAllocation newval = useRegister(ins->newValue());
LAsmJSCompareExchangeHeap *lir =
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
} }
void void
LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins) LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
{ {
lowerAsmJSAtomicBinopHeap(ins, LDefinition::BogusTemp()); MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
// Register allocation:
//
// For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
//
// movl value, output
// lock xaddl output, mem
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
// movl *mem, eax
// L: mov eax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// jnz L
// ; result in eax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop would be redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
//
// There are optimization opportunities:
// - when the result is unused, Bug #1077014.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LAllocation value = useRegister(ins->value());
LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
} }
void void

View File

@ -42,6 +42,8 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
void visitBox(MBox *box); void visitBox(MBox *box);
void visitUnbox(MUnbox *unbox); void visitUnbox(MUnbox *unbox);
void visitReturn(MReturn *ret); void visitReturn(MReturn *ret);
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins); void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins); void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins); void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);

View File

@ -182,6 +182,18 @@ LIRGeneratorX86::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY)); payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
} }
void
LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
{
lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true);
}
void
LIRGeneratorX86::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
{
lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true);
}
void void
LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins) LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
{ {
@ -273,13 +285,101 @@ LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
void void
LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins) LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
{ {
lowerAsmJSCompareExchangeHeap(ins, temp()); MOZ_ASSERT(ins->accessType() < Scalar::Float32);
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = byteSize(ins->accessType()) == 1;
// Register allocation:
//
// The output must be eax.
//
// oldval must be in a register.
//
// newval must be in a register. If the source is a byte array
// then newval must be a register that has a byte size: on x86
// this must be ebx, ecx, or edx (eax is taken for the output).
//
// Bug #1077036 describes some optimization opportunities.
const LAllocation oldval = useRegister(ins->oldValue());
const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
LAsmJSCompareExchangeHeap *lir =
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
lir->setAddrTemp(temp());
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
} }
void void
LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins) LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
{ {
lowerAsmJSAtomicBinopHeap(ins, temp()); MOZ_ASSERT(ins->accessType() < Scalar::Float32);
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = byteSize(ins->accessType()) == 1;
// Register allocation:
//
// For ADD and SUB we'll use XADD:
//
// movl value, output
// lock xaddl output, mem
//
// For the 8-bit variants XADD needs a byte register for the
// output only, we can still set up with movl; just pin the output
// to eax (or ebx / ecx / edx).
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
// movl *mem, eax
// L: mov eax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// jnz L
// ; result in eax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop would be redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
//
// For the 8-bit variants the temp must have a byte register.
//
// There are optimization opportunities:
// - when the result is unused, Bug #1077014.
// - better register allocation and instruction selection, Bug #1077036.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LDefinition tempDef = LDefinition::BogusTemp();
LAllocation value;
// Optimization opportunity: "value" need not be pinned to something that
// has a byte register unless the back-end insists on using a byte move
// for the setup or the payload computation, which really it need not do.
if (byteArray) {
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef = tempFixed(ecx);
} else {
value = useRegister(ins->value());
if (bitOp)
tempDef = temp();
}
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
lir->setAddrTemp(temp());
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
} }
void void

View File

@ -48,6 +48,8 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
void visitBox(MBox *box); void visitBox(MBox *box);
void visitUnbox(MUnbox *unbox); void visitUnbox(MUnbox *unbox);
void visitReturn(MReturn *ret); void visitReturn(MReturn *ret);
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins); void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins); void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins); void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);