mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 1138348 - byte ops on x86_64. r=h4writer
This commit is contained in:
parent
be4ff4cebe
commit
0686e32aac
@ -364,7 +364,8 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
|
||||
LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
|
||||
bool useI386ByteRegisters)
|
||||
{
|
||||
MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
|
||||
MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
|
||||
@ -385,12 +386,11 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
|
||||
//
|
||||
// oldval must be in a register.
|
||||
//
|
||||
// newval will need to be in a register. If the source is a byte
|
||||
// array then the newval must be a register that has a byte size:
|
||||
// ebx, ecx, or edx, since eax is taken for the output in this
|
||||
// case.
|
||||
// newval must be in a register. If the source is a byte array
|
||||
// then newval must be a register that has a byte size: on x86
|
||||
// this must be ebx, ecx, or edx (eax is taken for the output).
|
||||
//
|
||||
// Bug #1077036 describes some optimization opportunities.
|
||||
// Bug #1077036 describes some further optimization opportunities.
|
||||
|
||||
bool fixedOutput = false;
|
||||
LDefinition tempDef = LDefinition::BogusTemp();
|
||||
@ -400,13 +400,12 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
|
||||
newval = useRegister(ins->newval());
|
||||
} else {
|
||||
fixedOutput = true;
|
||||
if (ins->isByteArray())
|
||||
if (useI386ByteRegisters && ins->isByteArray())
|
||||
newval = useFixed(ins->newval(), ebx);
|
||||
else
|
||||
newval = useRegister(ins->newval());
|
||||
}
|
||||
|
||||
// A register allocator limitation precludes 'useRegisterAtStart()' here.
|
||||
const LAllocation oldval = useRegister(ins->oldval());
|
||||
|
||||
LCompareExchangeTypedArrayElement *lir =
|
||||
@ -419,7 +418,8 @@ LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTyp
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
|
||||
LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
|
||||
bool useI386ByteRegisters)
|
||||
{
|
||||
MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
|
||||
MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
|
||||
@ -452,7 +452,7 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
||||
//
|
||||
// Note the placement of L, cmpxchg will update eax with *mem if
|
||||
// *mem does not have the expected value, so reloading it at the
|
||||
// top of the loop is redundant.
|
||||
// top of the loop would be redundant.
|
||||
//
|
||||
// If the array is not a uint32 array then:
|
||||
// - eax should be the output (one result of the cmpxchg)
|
||||
@ -488,12 +488,11 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
||||
} else {
|
||||
tempDef1 = temp();
|
||||
}
|
||||
} else if (ins->isByteArray()) {
|
||||
} else if (useI386ByteRegisters && ins->isByteArray()) {
|
||||
value = useFixed(ins->value(), ebx);
|
||||
if (bitOp)
|
||||
tempDef1 = tempFixed(ecx);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
value = useRegister(ins->value());
|
||||
if (bitOp)
|
||||
tempDef1 = temp();
|
||||
@ -508,133 +507,6 @@ LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
||||
define(lir, ins);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins,
|
||||
const LDefinition& addrTemp)
|
||||
{
|
||||
MDefinition *ptr = ins->ptr();
|
||||
MOZ_ASSERT(ptr->type() == MIRType_Int32);
|
||||
|
||||
bool byteArray = false;
|
||||
switch (ins->accessType()) {
|
||||
case Scalar::Int8:
|
||||
case Scalar::Uint8:
|
||||
byteArray = true;
|
||||
break;
|
||||
case Scalar::Int16:
|
||||
case Scalar::Uint16:
|
||||
case Scalar::Int32:
|
||||
case Scalar::Uint32:
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Unexpected array type");
|
||||
}
|
||||
|
||||
// Register allocation:
|
||||
//
|
||||
// The output must be eax.
|
||||
//
|
||||
// oldval must be in a register (it'll eventually end up in eax so
|
||||
// ideally it's there to begin with).
|
||||
//
|
||||
// newval will need to be in a register. If the source is a byte
|
||||
// array then the newval must be a register that has a byte size:
|
||||
// ebx, ecx, or edx, since eax is taken for the output in this
|
||||
// case. We pick ebx but it would be more flexible to pick any of
|
||||
// the three that wasn't being used.
|
||||
//
|
||||
// Bug #1077036 describes some optimization opportunities.
|
||||
|
||||
const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
|
||||
const LAllocation oldval = useRegister(ins->oldValue());
|
||||
|
||||
LAsmJSCompareExchangeHeap *lir =
|
||||
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
|
||||
|
||||
lir->setAddrTemp(addrTemp);
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins,
|
||||
const LDefinition& addrTemp)
|
||||
{
|
||||
MDefinition *ptr = ins->ptr();
|
||||
MOZ_ASSERT(ptr->type() == MIRType_Int32);
|
||||
|
||||
bool byteArray = false;
|
||||
switch (ins->accessType()) {
|
||||
case Scalar::Int8:
|
||||
case Scalar::Uint8:
|
||||
byteArray = true;
|
||||
break;
|
||||
case Scalar::Int16:
|
||||
case Scalar::Uint16:
|
||||
case Scalar::Int32:
|
||||
case Scalar::Uint32:
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Unexpected array type");
|
||||
}
|
||||
|
||||
// Register allocation:
|
||||
//
|
||||
// For ADD and SUB we'll use XADD:
|
||||
//
|
||||
// movl value, output
|
||||
// lock xaddl output, mem
|
||||
//
|
||||
// For the 8-bit variants XADD needs a byte register for the
|
||||
// output only, we can still set up with movl; just pin the output
|
||||
// to eax (or ebx / ecx / edx).
|
||||
//
|
||||
// For AND/OR/XOR we need to use a CMPXCHG loop:
|
||||
//
|
||||
// movl *mem, eax
|
||||
// L: mov eax, temp
|
||||
// andl value, temp
|
||||
// lock cmpxchg temp, mem ; reads eax also
|
||||
// jnz L
|
||||
// ; result in eax
|
||||
//
|
||||
// Note the placement of L, cmpxchg will update eax with *mem if
|
||||
// *mem does not have the expected value, so reloading it at the
|
||||
// top of the loop is redundant.
|
||||
//
|
||||
// We want to fix eax as the output. We also need a temp for
|
||||
// the intermediate value.
|
||||
//
|
||||
// For the 8-bit variants the temp must have a byte register.
|
||||
//
|
||||
// There are optimization opportunities:
|
||||
// - when the result is unused, Bug #1077014.
|
||||
// - better register allocation and instruction selection, Bug #1077036.
|
||||
|
||||
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
|
||||
LDefinition tempDef = LDefinition::BogusTemp();
|
||||
LAllocation value;
|
||||
|
||||
// Optimization opportunity: "value" need not be pinned to something that
|
||||
// has a byte register unless the back-end insists on using a byte move
|
||||
// for the setup or the payload computation, which really it need not do.
|
||||
|
||||
if (byteArray) {
|
||||
value = useFixed(ins->value(), ebx);
|
||||
if (bitOp)
|
||||
tempDef = tempFixed(ecx);
|
||||
} else {
|
||||
value = useRegister(ins->value());
|
||||
if (bitOp)
|
||||
tempDef = temp();
|
||||
}
|
||||
|
||||
LAsmJSAtomicBinopHeap *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
|
||||
|
||||
lir->setAddrTemp(addrTemp);
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
|
||||
{
|
||||
|
@ -56,10 +56,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
|
||||
void visitSimdSelect(MSimdSelect *ins);
|
||||
void visitSimdSplatX4(MSimdSplatX4 *ins);
|
||||
void visitSimdValueX4(MSimdValueX4 *ins);
|
||||
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
|
||||
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
|
||||
void lowerAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins, const LDefinition& addrTemp);
|
||||
void lowerAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins, const LDefinition& addrTemp);
|
||||
void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins,
|
||||
bool useI386ByteRegisters);
|
||||
void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins,
|
||||
bool useI386ByteRegisters);
|
||||
};
|
||||
|
||||
} // namespace jit
|
||||
|
@ -131,6 +131,18 @@ LIRGeneratorX64::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
|
||||
lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX64::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
|
||||
{
|
||||
lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX64::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
|
||||
{
|
||||
lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX64::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
|
||||
{
|
||||
@ -200,13 +212,58 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
|
||||
void
|
||||
LIRGeneratorX64::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
|
||||
{
|
||||
lowerAsmJSCompareExchangeHeap(ins, LDefinition::BogusTemp());
|
||||
MDefinition *ptr = ins->ptr();
|
||||
MOZ_ASSERT(ptr->type() == MIRType_Int32);
|
||||
|
||||
const LAllocation oldval = useRegister(ins->oldValue());
|
||||
const LAllocation newval = useRegister(ins->newValue());
|
||||
|
||||
LAsmJSCompareExchangeHeap *lir =
|
||||
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
|
||||
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
|
||||
{
|
||||
lowerAsmJSAtomicBinopHeap(ins, LDefinition::BogusTemp());
|
||||
MDefinition *ptr = ins->ptr();
|
||||
MOZ_ASSERT(ptr->type() == MIRType_Int32);
|
||||
|
||||
// Register allocation:
|
||||
//
|
||||
// For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
|
||||
//
|
||||
// movl value, output
|
||||
// lock xaddl output, mem
|
||||
//
|
||||
// For AND/OR/XOR we need to use a CMPXCHG loop:
|
||||
//
|
||||
// movl *mem, eax
|
||||
// L: mov eax, temp
|
||||
// andl value, temp
|
||||
// lock cmpxchg temp, mem ; reads eax also
|
||||
// jnz L
|
||||
// ; result in eax
|
||||
//
|
||||
// Note the placement of L, cmpxchg will update eax with *mem if
|
||||
// *mem does not have the expected value, so reloading it at the
|
||||
// top of the loop would be redundant.
|
||||
//
|
||||
// We want to fix eax as the output. We also need a temp for
|
||||
// the intermediate value.
|
||||
//
|
||||
// There are optimization opportunities:
|
||||
// - when the result is unused, Bug #1077014.
|
||||
|
||||
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
|
||||
LAllocation value = useRegister(ins->value());
|
||||
LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
|
||||
|
||||
LAsmJSAtomicBinopHeap *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
|
||||
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -42,6 +42,8 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
|
||||
void visitBox(MBox *box);
|
||||
void visitUnbox(MUnbox *unbox);
|
||||
void visitReturn(MReturn *ret);
|
||||
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
|
||||
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
|
||||
void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
|
||||
void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
|
||||
void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
|
||||
|
@ -182,6 +182,18 @@ LIRGeneratorX86::lowerUntypedPhiInput(MPhi *phi, uint32_t inputPosition, LBlock
|
||||
payload->setOperand(inputPosition, LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
|
||||
{
|
||||
lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
|
||||
{
|
||||
lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
|
||||
{
|
||||
@ -273,13 +285,101 @@ LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
|
||||
void
|
||||
LIRGeneratorX86::visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins)
|
||||
{
|
||||
lowerAsmJSCompareExchangeHeap(ins, temp());
|
||||
MOZ_ASSERT(ins->accessType() < Scalar::Float32);
|
||||
|
||||
MDefinition *ptr = ins->ptr();
|
||||
MOZ_ASSERT(ptr->type() == MIRType_Int32);
|
||||
|
||||
bool byteArray = byteSize(ins->accessType()) == 1;
|
||||
|
||||
// Register allocation:
|
||||
//
|
||||
// The output must be eax.
|
||||
//
|
||||
// oldval must be in a register.
|
||||
//
|
||||
// newval must be in a register. If the source is a byte array
|
||||
// then newval must be a register that has a byte size: on x86
|
||||
// this must be ebx, ecx, or edx (eax is taken for the output).
|
||||
//
|
||||
// Bug #1077036 describes some optimization opportunities.
|
||||
|
||||
const LAllocation oldval = useRegister(ins->oldValue());
|
||||
const LAllocation newval = byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue());
|
||||
|
||||
LAsmJSCompareExchangeHeap *lir =
|
||||
new(alloc()) LAsmJSCompareExchangeHeap(useRegister(ptr), oldval, newval);
|
||||
|
||||
lir->setAddrTemp(temp());
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
|
||||
{
|
||||
lowerAsmJSAtomicBinopHeap(ins, temp());
|
||||
MOZ_ASSERT(ins->accessType() < Scalar::Float32);
|
||||
|
||||
MDefinition *ptr = ins->ptr();
|
||||
MOZ_ASSERT(ptr->type() == MIRType_Int32);
|
||||
|
||||
bool byteArray = byteSize(ins->accessType()) == 1;
|
||||
|
||||
// Register allocation:
|
||||
//
|
||||
// For ADD and SUB we'll use XADD:
|
||||
//
|
||||
// movl value, output
|
||||
// lock xaddl output, mem
|
||||
//
|
||||
// For the 8-bit variants XADD needs a byte register for the
|
||||
// output only, we can still set up with movl; just pin the output
|
||||
// to eax (or ebx / ecx / edx).
|
||||
//
|
||||
// For AND/OR/XOR we need to use a CMPXCHG loop:
|
||||
//
|
||||
// movl *mem, eax
|
||||
// L: mov eax, temp
|
||||
// andl value, temp
|
||||
// lock cmpxchg temp, mem ; reads eax also
|
||||
// jnz L
|
||||
// ; result in eax
|
||||
//
|
||||
// Note the placement of L, cmpxchg will update eax with *mem if
|
||||
// *mem does not have the expected value, so reloading it at the
|
||||
// top of the loop would be redundant.
|
||||
//
|
||||
// We want to fix eax as the output. We also need a temp for
|
||||
// the intermediate value.
|
||||
//
|
||||
// For the 8-bit variants the temp must have a byte register.
|
||||
//
|
||||
// There are optimization opportunities:
|
||||
// - when the result is unused, Bug #1077014.
|
||||
// - better register allocation and instruction selection, Bug #1077036.
|
||||
|
||||
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
|
||||
LDefinition tempDef = LDefinition::BogusTemp();
|
||||
LAllocation value;
|
||||
|
||||
// Optimization opportunity: "value" need not be pinned to something that
|
||||
// has a byte register unless the back-end insists on using a byte move
|
||||
// for the setup or the payload computation, which really it need not do.
|
||||
|
||||
if (byteArray) {
|
||||
value = useFixed(ins->value(), ebx);
|
||||
if (bitOp)
|
||||
tempDef = tempFixed(ecx);
|
||||
} else {
|
||||
value = useRegister(ins->value());
|
||||
if (bitOp)
|
||||
tempDef = temp();
|
||||
}
|
||||
|
||||
LAsmJSAtomicBinopHeap *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
|
||||
|
||||
lir->setAddrTemp(temp());
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -48,6 +48,8 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
|
||||
void visitBox(MBox *box);
|
||||
void visitUnbox(MUnbox *unbox);
|
||||
void visitReturn(MReturn *ret);
|
||||
void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
|
||||
void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
|
||||
void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
|
||||
void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
|
||||
void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
|
||||
|
Loading…
Reference in New Issue
Block a user