mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 885186 - Optimize x86/x64 register moves using xchg, xor swap, and push/pop. r=jandem
This commit is contained in:
parent
1973753238
commit
b2f0e66ef3
@ -99,6 +99,9 @@ class MoveResolver
|
||||
return disp_ == other.disp_;
|
||||
return true;
|
||||
}
|
||||
bool operator !=(const MoveOperand &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
};
|
||||
|
||||
class Move
|
||||
|
@ -287,6 +287,10 @@ class AssemblerX86Shared
|
||||
}
|
||||
}
|
||||
|
||||
void xchgl(const Register &src, const Register &dest) {
|
||||
masm.xchgl_rr(src.code(), dest.code());
|
||||
}
|
||||
|
||||
void movsd(const FloatRegister &src, const FloatRegister &dest) {
|
||||
JS_ASSERT(HasSSE2());
|
||||
masm.movsd_rr(src.code(), dest.code());
|
||||
|
@ -14,24 +14,127 @@ using namespace js::ion;
|
||||
MoveEmitterX86::MoveEmitterX86(MacroAssemblerSpecific &masm)
|
||||
: inCycle_(false),
|
||||
masm(masm),
|
||||
pushedAtCycle_(-1),
|
||||
pushedAtSpill_(-1),
|
||||
spilledReg_(InvalidReg)
|
||||
pushedAtCycle_(-1)
|
||||
{
|
||||
pushedAtStart_ = masm.framePushed();
|
||||
}
|
||||
|
||||
// Examine the cycle in moves starting at position i. Determine if it's a
|
||||
// simple cycle consisting of all register-to-register moves in a single class,
|
||||
// and whether it can be implemented entirely by swaps.
|
||||
size_t
|
||||
MoveEmitterX86::characterizeCycle(const MoveResolver &moves, size_t i,
|
||||
bool *allGeneralRegs, bool *allFloatRegs)
|
||||
{
|
||||
size_t swapCount = 0;
|
||||
|
||||
for (size_t j = i; ; j++) {
|
||||
const Move &move = moves.getMove(j);
|
||||
|
||||
// If it isn't a cycle of registers of the same kind, we won't be able
|
||||
// to optimize it.
|
||||
if (!move.to().isGeneralReg())
|
||||
*allGeneralRegs = false;
|
||||
if (!move.to().isFloatReg())
|
||||
*allFloatRegs = false;
|
||||
if (!*allGeneralRegs && !*allFloatRegs)
|
||||
return -1;
|
||||
|
||||
// The first and last move of the cycle are marked with inCycle(). Stop
|
||||
// iterating when we see the last one.
|
||||
if (j != i && move.inCycle())
|
||||
break;
|
||||
|
||||
// Check that this move is actually part of the cycle. This is
|
||||
// over-conservative when there are multiple reads from the same source,
|
||||
// but that's expected to be rare.
|
||||
if (move.from() != moves.getMove(j + 1).to()) {
|
||||
*allGeneralRegs = false;
|
||||
*allFloatRegs = false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
swapCount++;
|
||||
}
|
||||
|
||||
// Check that the last move cycles back to the first move.
|
||||
const Move &move = moves.getMove(i + swapCount);
|
||||
if (move.from() != moves.getMove(i).to()) {
|
||||
*allGeneralRegs = false;
|
||||
*allFloatRegs = false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return swapCount;
|
||||
}
|
||||
|
||||
// If we can emit optimized code for the cycle in moves starting at position i,
|
||||
// do so, and return true.
|
||||
bool
|
||||
MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i,
|
||||
bool allGeneralRegs, bool allFloatRegs, size_t swapCount)
|
||||
{
|
||||
if (allGeneralRegs && swapCount <= 2) {
|
||||
// Use x86's swap-integer-registers instruction if we only have a few
|
||||
// swaps. (x86 also has a swap between registers and memory but it's
|
||||
// slow.)
|
||||
for (size_t k = 0; k < swapCount; k++)
|
||||
masm.xchg(moves.getMove(i + k).to().reg(), moves.getMove(i + k + 1).to().reg());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allFloatRegs && swapCount == 1) {
|
||||
// There's no xchg for xmm registers, but if we only need a single swap,
|
||||
// it's cheap to do an XOR swap.
|
||||
FloatRegister a = moves.getMove(i).to().floatReg();
|
||||
FloatRegister b = moves.getMove(i + 1).to().floatReg();
|
||||
masm.xorpd(a, b);
|
||||
masm.xorpd(b, a);
|
||||
masm.xorpd(a, b);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
MoveEmitterX86::emit(const MoveResolver &moves)
|
||||
{
|
||||
if (moves.hasCycles()) {
|
||||
// Reserve stack for cycle resolution
|
||||
masm.reserveStack(sizeof(double));
|
||||
pushedAtCycle_ = masm.framePushed();
|
||||
for (size_t i = 0; i < moves.numMoves(); i++) {
|
||||
const Move &move = moves.getMove(i);
|
||||
const MoveOperand &from = move.from();
|
||||
const MoveOperand &to = move.to();
|
||||
|
||||
if (move.inCycle()) {
|
||||
// If this is the end of a cycle for which we're using the stack,
|
||||
// handle the end.
|
||||
if (inCycle_) {
|
||||
completeCycle(to, move.kind());
|
||||
inCycle_ = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < moves.numMoves(); i++)
|
||||
emit(moves.getMove(i));
|
||||
// Characterize the cycle.
|
||||
bool allGeneralRegs = true, allFloatRegs = true;
|
||||
size_t swapCount = characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs);
|
||||
|
||||
// Attempt to optimize it to avoid using the stack.
|
||||
if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs, swapCount)) {
|
||||
i += swapCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise use the stack.
|
||||
breakCycle(to, move.kind());
|
||||
inCycle_ = true;
|
||||
}
|
||||
|
||||
// A normal move which is not part of a cycle.
|
||||
if (move.kind() == Move::DOUBLE)
|
||||
emitDoubleMove(from, to);
|
||||
else
|
||||
emitGeneralMove(from, to);
|
||||
}
|
||||
}
|
||||
|
||||
MoveEmitterX86::~MoveEmitterX86()
|
||||
@ -40,17 +143,20 @@ MoveEmitterX86::~MoveEmitterX86()
|
||||
}
|
||||
|
||||
Operand
|
||||
MoveEmitterX86::cycleSlot() const
|
||||
MoveEmitterX86::cycleSlot()
|
||||
{
|
||||
if (pushedAtCycle_ == -1) {
|
||||
// Reserve stack for cycle resolution
|
||||
masm.reserveStack(sizeof(double));
|
||||
pushedAtCycle_ = masm.framePushed();
|
||||
}
|
||||
|
||||
return Operand(StackPointer, masm.framePushed() - pushedAtCycle_);
|
||||
}
|
||||
|
||||
Operand
|
||||
MoveEmitterX86::spillSlot() const
|
||||
{
|
||||
return Operand(StackPointer, masm.framePushed() - pushedAtSpill_);
|
||||
}
|
||||
|
||||
// Warning, do not use the resulting operand with pop instructions, since they
|
||||
// compute the effective destination address after altering the stack pointer.
|
||||
// Use toPopOperand if an Operand is needed for a pop.
|
||||
Operand
|
||||
MoveEmitterX86::toOperand(const MoveOperand &operand) const
|
||||
{
|
||||
@ -70,31 +176,32 @@ MoveEmitterX86::toOperand(const MoveOperand &operand) const
|
||||
return Operand(operand.floatReg());
|
||||
}
|
||||
|
||||
Register
|
||||
MoveEmitterX86::tempReg()
|
||||
// This is the same as toOperand except that it computes an Operand suitable for
|
||||
// use in a pop.
|
||||
Operand
|
||||
MoveEmitterX86::toPopOperand(const MoveOperand &operand) const
|
||||
{
|
||||
if (spilledReg_ != InvalidReg)
|
||||
return spilledReg_;
|
||||
if (operand.isMemory()) {
|
||||
if (operand.base() != StackPointer)
|
||||
return Operand(operand.base(), operand.disp());
|
||||
|
||||
// For now, just pick edx/rdx as the eviction point. This is totally
|
||||
// random, and if it ends up being bad, we can use actual heuristics later.
|
||||
spilledReg_ = edx;
|
||||
JS_ASSERT(operand.disp() >= 0);
|
||||
|
||||
#ifdef JS_CPU_X64
|
||||
JS_ASSERT(edx == rdx);
|
||||
#endif
|
||||
|
||||
if (pushedAtSpill_ == -1) {
|
||||
masm.Push(spilledReg_);
|
||||
pushedAtSpill_ = masm.framePushed();
|
||||
} else {
|
||||
masm.mov(spilledReg_, spillSlot());
|
||||
// Otherwise, the stack offset may need to be adjusted.
|
||||
// Note the adjustment by the stack slot here, to offset for the fact that pop
|
||||
// computes its effective address after incrementing the stack pointer.
|
||||
return Operand(StackPointer,
|
||||
operand.disp() + (masm.framePushed() - sizeof(void *) - pushedAtStart_));
|
||||
}
|
||||
return spilledReg_;
|
||||
if (operand.isGeneralReg())
|
||||
return Operand(operand.reg());
|
||||
|
||||
JS_ASSERT(operand.isFloatReg());
|
||||
return Operand(operand.floatReg());
|
||||
}
|
||||
|
||||
void
|
||||
MoveEmitterX86::breakCycle(const MoveOperand &from, const MoveOperand &to, Move::Kind kind)
|
||||
MoveEmitterX86::breakCycle(const MoveOperand &to, Move::Kind kind)
|
||||
{
|
||||
// There is some pattern:
|
||||
// (A -> B)
|
||||
@ -110,23 +217,15 @@ MoveEmitterX86::breakCycle(const MoveOperand &from, const MoveOperand &to, Move:
|
||||
masm.movsd(to.floatReg(), cycleSlot());
|
||||
}
|
||||
} else {
|
||||
if (to.isMemory()) {
|
||||
Register temp = tempReg();
|
||||
masm.mov(toOperand(to), temp);
|
||||
masm.mov(temp, cycleSlot());
|
||||
} else {
|
||||
if (to.reg() == spilledReg_) {
|
||||
// If the destination was spilled, restore it first.
|
||||
masm.mov(spillSlot(), spilledReg_);
|
||||
spilledReg_ = InvalidReg;
|
||||
}
|
||||
masm.mov(to.reg(), cycleSlot());
|
||||
}
|
||||
if (to.isMemory())
|
||||
masm.Push(toOperand(to));
|
||||
else
|
||||
masm.Push(to.reg());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
MoveEmitterX86::completeCycle(const MoveOperand &from, const MoveOperand &to, Move::Kind kind)
|
||||
MoveEmitterX86::completeCycle(const MoveOperand &to, Move::Kind kind)
|
||||
{
|
||||
// There is some pattern:
|
||||
// (A -> B)
|
||||
@ -143,35 +242,17 @@ MoveEmitterX86::completeCycle(const MoveOperand &from, const MoveOperand &to, Mo
|
||||
}
|
||||
} else {
|
||||
if (to.isMemory()) {
|
||||
Register temp = tempReg();
|
||||
masm.mov(cycleSlot(), temp);
|
||||
masm.mov(temp, toOperand(to));
|
||||
masm.Pop(toPopOperand(to));
|
||||
} else {
|
||||
if (to.reg() == spilledReg_) {
|
||||
// Make sure we don't re-clobber the spilled register later.
|
||||
spilledReg_ = InvalidReg;
|
||||
}
|
||||
masm.mov(cycleSlot(), to.reg());
|
||||
masm.Pop(to.reg());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
MoveEmitterX86::emitMove(const MoveOperand &from, const MoveOperand &to)
|
||||
MoveEmitterX86::emitGeneralMove(const MoveOperand &from, const MoveOperand &to)
|
||||
{
|
||||
if (to.isGeneralReg() && to.reg() == spilledReg_) {
|
||||
// If the destination is the spilled register, make sure we
|
||||
// don't re-clobber its value.
|
||||
spilledReg_ = InvalidReg;
|
||||
}
|
||||
|
||||
if (from.isGeneralReg()) {
|
||||
if (from.reg() == spilledReg_) {
|
||||
// If the source is a register that has been spilled, make sure
|
||||
// to load the source back into that register.
|
||||
masm.mov(spillSlot(), spilledReg_);
|
||||
spilledReg_ = InvalidReg;
|
||||
}
|
||||
masm.mov(from.reg(), toOperand(to));
|
||||
} else if (to.isGeneralReg()) {
|
||||
JS_ASSERT(from.isMemory() || from.isEffectiveAddress());
|
||||
@ -179,20 +260,31 @@ MoveEmitterX86::emitMove(const MoveOperand &from, const MoveOperand &to)
|
||||
masm.mov(toOperand(from), to.reg());
|
||||
else
|
||||
masm.lea(toOperand(from), to.reg());
|
||||
} else {
|
||||
} else if (from.isMemory()) {
|
||||
// Memory to memory gpr move.
|
||||
Register reg = tempReg();
|
||||
// Reload its previous value from the stack.
|
||||
if (reg == from.base())
|
||||
masm.mov(spillSlot(), from.base());
|
||||
|
||||
JS_ASSERT(from.isMemory() || from.isEffectiveAddress());
|
||||
if (from.isMemory())
|
||||
masm.mov(toOperand(from), reg);
|
||||
else
|
||||
masm.lea(toOperand(from), reg);
|
||||
JS_ASSERT(to.base() != reg);
|
||||
masm.mov(reg, toOperand(to));
|
||||
#ifdef JS_CPU_X64
|
||||
// x64 has a ScratchReg. Use it.
|
||||
masm.mov(toOperand(from), ScratchReg);
|
||||
masm.mov(ScratchReg, toOperand(to));
|
||||
#else
|
||||
// No ScratchReg; bounce it off the stack.
|
||||
masm.Push(toOperand(from));
|
||||
masm.Pop(toPopOperand(to));
|
||||
#endif
|
||||
} else {
|
||||
// Effective address to memory move.
|
||||
JS_ASSERT(from.isEffectiveAddress());
|
||||
#ifdef JS_CPU_X64
|
||||
// x64 has a ScratchReg. Use it.
|
||||
masm.lea(toOperand(from), ScratchReg);
|
||||
masm.mov(ScratchReg, toOperand(to));
|
||||
#else
|
||||
// This is tricky without a ScratchReg. We can't do an lea. Bounce the
|
||||
// base register off the stack, then add the offset in place.
|
||||
masm.Push(from.base());
|
||||
masm.Pop(toPopOperand(to));
|
||||
masm.addPtr(Imm32(from.disp()), toOperand(to));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -211,29 +303,6 @@ MoveEmitterX86::emitDoubleMove(const MoveOperand &from, const MoveOperand &to)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
MoveEmitterX86::emit(const Move &move)
|
||||
{
|
||||
const MoveOperand &from = move.from();
|
||||
const MoveOperand &to = move.to();
|
||||
|
||||
if (move.inCycle()) {
|
||||
if (inCycle_) {
|
||||
completeCycle(from, to, move.kind());
|
||||
inCycle_ = false;
|
||||
return;
|
||||
}
|
||||
|
||||
breakCycle(from, to, move.kind());
|
||||
inCycle_ = true;
|
||||
}
|
||||
|
||||
if (move.kind() == Move::DOUBLE)
|
||||
emitDoubleMove(from, to);
|
||||
else
|
||||
emitMove(from, to);
|
||||
}
|
||||
|
||||
void
|
||||
MoveEmitterX86::assertDone()
|
||||
{
|
||||
@ -245,9 +314,6 @@ MoveEmitterX86::finish()
|
||||
{
|
||||
assertDone();
|
||||
|
||||
if (pushedAtSpill_ != -1 && spilledReg_ != InvalidReg)
|
||||
masm.mov(spillSlot(), spilledReg_);
|
||||
|
||||
masm.freeStack(masm.framePushed() - pushedAtStart_);
|
||||
}
|
||||
|
||||
|
@ -26,28 +26,23 @@ class MoveEmitterX86
|
||||
// Original stack push value.
|
||||
uint32_t pushedAtStart_;
|
||||
|
||||
// These store stack offsets to spill locations, snapshotting
|
||||
// codegen->framePushed_ at the time they were allocated. They are -1 if no
|
||||
// stack space has been allocated for that particular spill.
|
||||
// This is a store stack offset for the cycle-break spill slot, snapshotting
|
||||
// codegen->framePushed_ at the time it is allocated. -1 if not allocated.
|
||||
int32_t pushedAtCycle_;
|
||||
int32_t pushedAtSpill_;
|
||||
|
||||
// Register that is available for temporary use. It may be assigned
|
||||
// InvalidReg. If no corresponding spill space has been assigned,
|
||||
// then this register do not need to be spilled.
|
||||
Register spilledReg_;
|
||||
|
||||
void assertDone();
|
||||
Register tempReg();
|
||||
Operand cycleSlot() const;
|
||||
Operand spillSlot() const;
|
||||
Operand cycleSlot();
|
||||
Operand toOperand(const MoveOperand &operand) const;
|
||||
Operand toPopOperand(const MoveOperand &operand) const;
|
||||
|
||||
void emitMove(const MoveOperand &from, const MoveOperand &to);
|
||||
size_t characterizeCycle(const MoveResolver &moves, size_t i,
|
||||
bool *allGeneralRegs, bool *allFloatRegs);
|
||||
bool maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i,
|
||||
bool allGeneralRegs, bool allFloatRegs, size_t swapCount);
|
||||
void emitGeneralMove(const MoveOperand &from, const MoveOperand &to);
|
||||
void emitDoubleMove(const MoveOperand &from, const MoveOperand &to);
|
||||
void breakCycle(const MoveOperand &from, const MoveOperand &to, Move::Kind kind);
|
||||
void completeCycle(const MoveOperand &from, const MoveOperand &to, Move::Kind kind);
|
||||
void emit(const Move &move);
|
||||
void breakCycle(const MoveOperand &to, Move::Kind kind);
|
||||
void completeCycle(const MoveOperand &to, Move::Kind kind);
|
||||
|
||||
public:
|
||||
MoveEmitterX86(MacroAssemblerSpecific &masm);
|
||||
|
@ -428,6 +428,10 @@ class Assembler : public AssemblerX86Shared
|
||||
masm.movq_rr(src.code(), dest.code());
|
||||
}
|
||||
|
||||
void xchgq(const Register &src, const Register &dest) {
|
||||
masm.xchgq_rr(src.code(), dest.code());
|
||||
}
|
||||
|
||||
void andq(const Register &src, const Register &dest) {
|
||||
masm.andq_rr(src.code(), dest.code());
|
||||
}
|
||||
@ -552,6 +556,9 @@ class Assembler : public AssemblerX86Shared
|
||||
masm.movq_i64r(label->prev(), dest.code());
|
||||
label->setPrev(masm.size());
|
||||
}
|
||||
void xchg(const Register &src, const Register &dest) {
|
||||
xchgq(src, dest);
|
||||
}
|
||||
void lea(const Operand &src, const Register &dest) {
|
||||
switch (src.kind()) {
|
||||
case Operand::REG_DISP:
|
||||
|
@ -450,6 +450,9 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
|
||||
void addPtr(Imm32 imm, const Address &dest) {
|
||||
addq(imm, Operand(dest));
|
||||
}
|
||||
void addPtr(Imm32 imm, const Operand &dest) {
|
||||
addq(imm, dest);
|
||||
}
|
||||
void addPtr(ImmWord imm, const Register &dest) {
|
||||
JS_ASSERT(dest != ScratchReg);
|
||||
if ((intptr_t)imm.value <= INT32_MAX && (intptr_t)imm.value >= INT32_MIN) {
|
||||
|
@ -343,6 +343,9 @@ class Assembler : public AssemblerX86Shared
|
||||
void mov(const Register &src, const Register &dest) {
|
||||
movl(src, dest);
|
||||
}
|
||||
void xchg(const Register &src, const Register &dest) {
|
||||
xchgl(src, dest);
|
||||
}
|
||||
void lea(const Operand &src, const Register &dest) {
|
||||
return leal(src, dest);
|
||||
}
|
||||
|
@ -488,6 +488,9 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
|
||||
void addPtr(Imm32 imm, const Address &dest) {
|
||||
addl(imm, Operand(dest));
|
||||
}
|
||||
void addPtr(Imm32 imm, const Operand &dest) {
|
||||
addl(imm, dest);
|
||||
}
|
||||
void addPtr(const Address &src, const Register &dest) {
|
||||
addl(Operand(src), dest);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user