Bug 1138348 - byte ops on x86_64. r=h4writer

This commit is contained in:
Lars T Hansen 2015-03-10 08:29:01 +01:00
parent be4ff4cebe
commit 0686e32aac
6 changed files with 181 additions and 148 deletions

View File

@ -364,7 +364,8 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
}
void
LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
bool useI386ByteRegisters)
{
MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
@ -385,12 +386,11 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
//
// oldval must be in a register.
//
// newval will need to be in a register. If the source is a byte
// array then the newval must be a register that has a byte size:
// ebx, ecx, or edx, since eax is taken for the output in this
// case.
// newval must be in a register. If the source is a byte array
// then newval must be a register that has a byte size: on x86
// this must be ebx, ecx, or edx (eax is taken for the output).
//
// Bug #1077036 describes some optimization opportunities.
// Bug #1077036 describes some further optimization opportunities.
bool fixedOutput = false;
LDefinition tempDef = LDefinition::BogusTemp();
@ -400,13 +400,12 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
newval = useRegister(ins->newval());
} else {
fixedOutput = true;
if (ins->isByteArray())
if (useI386ByteRegisters && ins->isByteArray())
newval = useFixed(ins->newval(), ebx);
else
newval = useRegister(ins->newval());
}
// A register allocator limitation precludes 'useRegisterAtStart()' here.
const LAllocation oldval = useRegister(ins->oldval());
LCompareExchangeTypedArrayElement *lir =
@ -419,7 +418,8 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
}
void
LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
bool useI386ByteRegisters)
{
MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
@ -452,7 +452,7 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop is redundant.
// top of the loop would be redundant.
//
// If the array is not a uint32 array then:
// - eax should be the output (one result of the cmpxchg)
@ -488,12 +488,11 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
} else {
tempDef1 = temp();
}
} else if (ins->isByteArray()) {
} else if (useI386ByteRegisters && ins->isByteArray()) {
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef1 = tempFixed(ecx);
}
else {
} else {
value = useRegister(ins->value());
if (bitOp)
tempDef1 = temp();
@ -508,133 +507,6 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
define(lir, ins);
}
void
LIRGeneratorX86Shared::lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins,
const LDefinition& addrTemp)
{
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = false;
switch (ins->accessType()) {
case Scalar::Int8:
case Scalar::Uint8:
byteArray = true;
break;
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
break;
default:
MOZ_CRASH("Unexpected array type");
}
// Register allocation:
//
// The output must be eax.
//
// oldval must be in a register (it'll eventually end up in eax so
// ideally it's there to begin with).
//
// newval will need to be in a register. If the source is a byte
// array then the newval must be a register that has a byte size:
// ebx, ecx, or edx, since eax is taken for the output in this
// case. We pick ebx but it would be more flexible to pick any of
// the three that wasn't being used.
//
// Bug #1077036 describes some optimization opportunities.
const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
const LAllocation oldval = useRegister(ins->oldValue());
LAsmJSCompareExchangeHeap *lir =
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
lir->setAddrTemp(addrTemp);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void
LIRGeneratorX86Shared::lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins,
const LDefinition& addrTemp)
{
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = false;
switch (ins->accessType()) {
case Scalar::Int8:
case Scalar::Uint8:
byteArray = true;
break;
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
break;
default:
MOZ_CRASH("Unexpected array type");
}
// Register allocation:
//
// For ADD and SUB we'll use XADD:
//
// movl value, output
// lock xaddl output, mem
//
// For the 8-bit variants XADD needs a byte register for the
// output only, we can still set up with movl; just pin the output
// to eax (or ebx / ecx / edx).
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
// movl *mem, eax
// L: mov eax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// jnz L
// ; result in eax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop is redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
//
// For the 8-bit variants the temp must have a byte register.
//
// There are optimization opportunities:
// - when the result is unused, Bug #1077014.
// - better register allocation and instruction selection, Bug #1077036.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LDefinition tempDef = LDefinition::BogusTemp();
LAllocation value;
// Optimization opportunity: "value" need not be pinned to something that
// has a byte register unless the back-end insists on using a byte move
// for the setup or the payload computation, which really it need not do.
if (byteArray) {
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef = tempFixed(ecx);
} else {
value = useRegister(ins->value());
if (bitOp)
tempDef = temp();
}
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
lir->setAddrTemp(addrTemp);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
{

View File

@ -56,10 +56,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
void visitSimdSelect(MSimdSelect *ins);
void visitSimdSplatX4(MSimdSplatX4 *ins);
void visitSimdValueX4(MSimdValueX4 *ins);
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
void lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, const LDefinition& addrTemp);
void lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, const LDefinition& addrTemp);
void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
bool useI386ByteRegisters);
void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
bool useI386ByteRegisters);
};
} // namespace jit

View File

@ -131,6 +131,18 @@ LIRGeneratorX64::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
}
void
LIRGeneratorX64::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
{
lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
}
void
LIRGeneratorX64::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
{
lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
}
void
LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
{
@ -200,13 +212,58 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
void
LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
{
lowerAsmJSCompareExchangeHeap(ins, LDefinition::BogusTemp());
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
const LAllocation oldval = useRegister(ins->oldValue());
const LAllocation newval = useRegister(ins->newValue());
LAsmJSCompareExchangeHeap *lir =
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void
LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
{
lowerAsmJSAtomicBinopHeap(ins, LDefinition::BogusTemp());
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
// Register allocation:
//
// For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
//
// movl value, output
// lock xaddl output, mem
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
// movl *mem, eax
// L: mov eax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// jnz L
// ; result in eax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop would be redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
//
// There are optimization opportunities:
// - when the result is unused, Bug #1077014.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LAllocation value = useRegister(ins->value());
LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void

View File

@ -42,6 +42,8 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
void visitBox(MBox *box);
void visitUnbox(MUnbox *unbox);
void visitReturn(MReturn *ret);
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);

View File

@ -182,6 +182,18 @@ LIRGeneratorX86::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
}
void
LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
{
lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true);
}
void
LIRGeneratorX86::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
{
lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true);
}
void
LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
{
@ -273,13 +285,101 @@ LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
void
LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
{
lowerAsmJSCompareExchangeHeap(ins, temp());
MOZ_ASSERT(ins->accessType() < Scalar::Float32);
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = byteSize(ins->accessType()) == 1;
// Register allocation:
//
// The output must be eax.
//
// oldval must be in a register.
//
// newval must be in a register. If the source is a byte array
// then newval must be a register that has a byte size: on x86
// this must be ebx, ecx, or edx (eax is taken for the output).
//
// Bug #1077036 describes some optimization opportunities.
const LAllocation oldval = useRegister(ins->oldValue());
const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
LAsmJSCompareExchangeHeap *lir =
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
lir->setAddrTemp(temp());
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void
LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
{
lowerAsmJSAtomicBinopHeap(ins, temp());
MOZ_ASSERT(ins->accessType() < Scalar::Float32);
MDefinition *ptr = ins->ptr();
MOZ_ASSERT(ptr->type() == MIRType_Int32);
bool byteArray = byteSize(ins->accessType()) == 1;
// Register allocation:
//
// For ADD and SUB we'll use XADD:
//
// movl value, output
// lock xaddl output, mem
//
// For the 8-bit variants XADD needs a byte register for the
// output only, we can still set up with movl; just pin the output
// to eax (or ebx / ecx / edx).
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
// movl *mem, eax
// L: mov eax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// jnz L
// ; result in eax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop would be redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
//
// For the 8-bit variants the temp must have a byte register.
//
// There are optimization opportunities:
// - when the result is unused, Bug #1077014.
// - better register allocation and instruction selection, Bug #1077036.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LDefinition tempDef = LDefinition::BogusTemp();
LAllocation value;
// Optimization opportunity: "value" need not be pinned to something that
// has a byte register unless the back-end insists on using a byte move
// for the setup or the payload computation, which really it need not do.
if (byteArray) {
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef = tempFixed(ecx);
} else {
value = useRegister(ins->value());
if (bitOp)
tempDef = temp();
}
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
lir->setAddrTemp(temp());
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
}
void

View File

@ -48,6 +48,8 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
void visitBox(MBox *box);
void visitUnbox(MUnbox *unbox);
void visitReturn(MReturn *ret);
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);