mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 535706 - nanojit: fix regstate updates for X64. r=edwsmith.
--HG-- extra : convert_revision : 6fb5f71fba861a333e17de644fe49de60bb7a1bd
This commit is contained in:
parent
62a7855349
commit
23edcdcbce
@ -2088,7 +2088,7 @@ namespace nanojit
|
||||
/**
|
||||
* move regs around so the SavedRegs contains the highest priority regs.
|
||||
*/
|
||||
void Assembler::evictScratchRegs()
|
||||
void Assembler::evictScratchRegsExcept(RegisterMask ignore)
|
||||
{
|
||||
// find the top GpRegs that are candidates to put in SavedRegs
|
||||
|
||||
@ -2099,7 +2099,7 @@ namespace nanojit
|
||||
int len=0;
|
||||
RegAlloc *regs = &_allocator;
|
||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
||||
if (rmask(r) & GpRegs) {
|
||||
if (rmask(r) & GpRegs & ~ignore) {
|
||||
LIns *ins = regs->getActive(r);
|
||||
if (ins) {
|
||||
if (canRemat(ins)) {
|
||||
@ -2156,7 +2156,7 @@ namespace nanojit
|
||||
}
|
||||
|
||||
// now evict everything else.
|
||||
evictSomeActiveRegs(~SavedRegs);
|
||||
evictSomeActiveRegs(~(SavedRegs | ignore));
|
||||
}
|
||||
|
||||
void Assembler::evictAllActiveRegs()
|
||||
|
@ -337,7 +337,7 @@ namespace nanojit
|
||||
void registerResetAll();
|
||||
void evictAllActiveRegs();
|
||||
void evictSomeActiveRegs(RegisterMask regs);
|
||||
void evictScratchRegs();
|
||||
void evictScratchRegsExcept(RegisterMask ignore);
|
||||
void intersectRegisterState(RegAlloc& saved);
|
||||
void unionRegisterState(RegAlloc& saved);
|
||||
void assignSaved(RegAlloc &saved, RegisterMask skip);
|
||||
|
@ -826,14 +826,14 @@ Assembler::asm_call(LInsp ins)
|
||||
* sequence we'd get would be something like:
|
||||
* MOV {R0-R3},params [from below]
|
||||
* BL function [from below]
|
||||
* MOV {R0-R3},spilled data [from evictScratchRegs()]
|
||||
* MOV {R0-R3},spilled data [from evictScratchRegsExcept()]
|
||||
* MOV Dx,{R0,R1} [from here]
|
||||
* which is clearly broken.
|
||||
*
|
||||
* This is not a problem for non-floating point calls, because the
|
||||
* restoring of spilled data into R0 is done via a call to
|
||||
* deprecated_prepResultReg(R0) in the other branch of this if-then-else,
|
||||
* meaning that evictScratchRegs() will not modify R0. However,
|
||||
* meaning that evictScratchRegsExcept() will not modify R0. However,
|
||||
* deprecated_prepResultReg is not aware of the concept of using a register pair
|
||||
* (R0,R1) for the result of a single operation, so it can only be
|
||||
* used here with the ultimate VFP register, and not R0/R1, which
|
||||
@ -846,7 +846,7 @@ Assembler::asm_call(LInsp ins)
|
||||
// Do this after we've handled the call result, so we don't
|
||||
// force the call result to be spilled unnecessarily.
|
||||
|
||||
evictScratchRegs();
|
||||
evictScratchRegsExcept(0);
|
||||
|
||||
const CallInfo* call = ins->callInfo();
|
||||
ArgSize sizes[MAXARGS];
|
||||
|
@ -1568,7 +1568,7 @@ namespace nanojit
|
||||
// Do this after we've handled the call result, so we don't
|
||||
// force the call result to be spilled unnecessarily.
|
||||
|
||||
evictScratchRegs();
|
||||
evictScratchRegsExcept(0);
|
||||
|
||||
const CallInfo* call = ins->callInfo();
|
||||
ArgSize sizes[MAXARGS];
|
||||
|
@ -688,7 +688,7 @@ namespace nanojit
|
||||
// Do this after we've handled the call result, so we don't
|
||||
// force the call result to be spilled unnecessarily.
|
||||
|
||||
evictScratchRegs();
|
||||
evictScratchRegsExcept(0);
|
||||
|
||||
const CallInfo* call = ins->callInfo();
|
||||
ArgSize sizes[MAXARGS];
|
||||
|
@ -159,7 +159,7 @@ namespace nanojit
|
||||
// Do this after we've handled the call result, so we don't
|
||||
// force the call result to be spilled unnecessarily.
|
||||
|
||||
evictScratchRegs();
|
||||
evictScratchRegsExcept(0);
|
||||
|
||||
const CallInfo* call = ins->callInfo();
|
||||
|
||||
|
@ -620,63 +620,35 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
|
||||
// register allocation for 2-address style ops of the form R = R (op) B
|
||||
void Assembler::regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb) {
|
||||
#ifdef _DEBUG
|
||||
RegisterMask originalAllow = allow;
|
||||
#endif
|
||||
LIns *a = ins->oprnd1();
|
||||
LIns *b = ins->oprnd2();
|
||||
if (a != b) {
|
||||
rb = findRegFor(b, allow);
|
||||
allow &= ~rmask(rb);
|
||||
}
|
||||
rr = deprecated_prepResultReg(ins, allow);
|
||||
// if this is last use of a in reg, we can re-use result reg
|
||||
if (!a->isInReg()) {
|
||||
ra = findSpecificRegForUnallocated(a, rr);
|
||||
} else if (!(allow & rmask(a->getReg()))) {
|
||||
// 'a' already has a register assigned, but it's not valid.
|
||||
// To make sure floating point operations stay in FPU registers
|
||||
// as much as possible, make sure that only a few opcodes are
|
||||
// reserving GPRs.
|
||||
NanoAssert(a->isop(LIR_quad) || a->isop(LIR_float) ||
|
||||
a->isop(LIR_ldf) || a->isop(LIR_ldfc) ||
|
||||
a->isop(LIR_ldq) || a->isop(LIR_ldqc) ||
|
||||
a->isop(LIR_ld32f) || a->isop(LIR_ldc32f) ||
|
||||
a->isop(LIR_u2f) || a->isop(LIR_fcall));
|
||||
allow &= ~rmask(rr);
|
||||
ra = findRegFor(a, allow);
|
||||
} else {
|
||||
ra = a->getReg();
|
||||
}
|
||||
if (a == b) {
|
||||
rb = ra;
|
||||
}
|
||||
NanoAssert(originalAllow & rmask(rr));
|
||||
NanoAssert(originalAllow & rmask(ra));
|
||||
NanoAssert(originalAllow & rmask(rb));
|
||||
}
|
||||
|
||||
void Assembler::asm_qbinop(LIns *ins) {
|
||||
asm_arith(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_shift(LIns *ins) {
|
||||
// shift require rcx for shift count
|
||||
// Shift requires rcx for shift count.
|
||||
LIns *a = ins->oprnd1();
|
||||
LIns *b = ins->oprnd2();
|
||||
if (b->isconst()) {
|
||||
asm_shift_imm(ins);
|
||||
return;
|
||||
}
|
||||
|
||||
Register rr, ra;
|
||||
if (b != ins->oprnd1()) {
|
||||
if (a != b) {
|
||||
findSpecificRegFor(b, RCX);
|
||||
regalloc_unary(ins, GpRegs & ~rmask(RCX), rr, ra);
|
||||
beginOp1Regs(ins, GpRegs & ~rmask(RCX), rr, ra);
|
||||
} else {
|
||||
// a == b means both must be in RCX
|
||||
regalloc_unary(ins, rmask(RCX), rr, ra);
|
||||
// Nb: this is just like beginOp1Regs() except that it asserts
|
||||
// that ra is in GpRegs instead of rmask(RCX)) -- this is
|
||||
// necessary for the a==b case because 'a' might not be in RCX
|
||||
// (which is ok, the MR(rr, ra) below will move it into RCX).
|
||||
rr = prepareResultReg(ins, rmask(RCX));
|
||||
|
||||
// If 'a' isn't in a register, it can be clobbered by 'ins'.
|
||||
ra = a->isInReg() ? a->getReg() : rr;
|
||||
NanoAssert(rmask(ra) & GpRegs);
|
||||
}
|
||||
|
||||
switch (ins->opcode()) {
|
||||
default:
|
||||
TODO(asm_shift);
|
||||
@ -689,11 +661,14 @@ namespace nanojit
|
||||
}
|
||||
if (rr != ra)
|
||||
MR(rr, ra);
|
||||
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
void Assembler::asm_shift_imm(LIns *ins) {
|
||||
Register rr, ra;
|
||||
regalloc_unary(ins, GpRegs, rr, ra);
|
||||
beginOp1Regs(ins, GpRegs, rr, ra);
|
||||
|
||||
int shift = ins->oprnd2()->imm32() & 63;
|
||||
switch (ins->opcode()) {
|
||||
default: TODO(shiftimm);
|
||||
@ -706,6 +681,8 @@ namespace nanojit
|
||||
}
|
||||
if (rr != ra)
|
||||
MR(rr, ra);
|
||||
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
static bool isImm32(LIns *ins) {
|
||||
@ -715,21 +692,22 @@ namespace nanojit
|
||||
return ins->isconst() ? ins->imm32() : int32_t(ins->imm64());
|
||||
}
|
||||
|
||||
// binary op, integer regs, rhs is int32 const
|
||||
// Binary op, integer regs, rhs is int32 constant.
|
||||
void Assembler::asm_arith_imm(LIns *ins) {
|
||||
LIns *b = ins->oprnd2();
|
||||
int32_t imm = getImm32(b);
|
||||
LOpcode op = ins->opcode();
|
||||
Register rr, ra;
|
||||
if (op == LIR_mul || op == LIR_mulxov) {
|
||||
// imul has true 3-addr form, it doesn't clobber ra
|
||||
rr = deprecated_prepResultReg(ins, GpRegs);
|
||||
LIns *a = ins->oprnd1();
|
||||
ra = findRegFor(a, GpRegs);
|
||||
// Special case: imul-by-imm has true 3-addr form. So we don't
|
||||
// need the MR(rr, ra) after the IMULI.
|
||||
beginOp1Regs(ins, GpRegs, rr, ra);
|
||||
IMULI(rr, ra, imm);
|
||||
endOpRegs(ins, rr, ra);
|
||||
return;
|
||||
}
|
||||
regalloc_unary(ins, GpRegs, rr, ra);
|
||||
|
||||
beginOp1Regs(ins, GpRegs, rr, ra);
|
||||
if (isS8(imm)) {
|
||||
switch (ins->opcode()) {
|
||||
default: TODO(arith_imm8);
|
||||
@ -765,35 +743,63 @@ namespace nanojit
|
||||
}
|
||||
if (rr != ra)
|
||||
MR(rr, ra);
|
||||
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
void Assembler::asm_div_mod(LIns *ins) {
|
||||
LIns *div;
|
||||
if (ins->opcode() == LIR_mod) {
|
||||
// LIR_mod expects the LIR_div to be near
|
||||
div = ins->oprnd1();
|
||||
deprecated_prepResultReg(ins, rmask(RDX));
|
||||
} else {
|
||||
div = ins;
|
||||
evictIfActive(RDX);
|
||||
}
|
||||
|
||||
// Generates code for a LIR_div that doesn't have a subsequent LIR_mod.
|
||||
void Assembler::asm_div(LIns *div) {
|
||||
NanoAssert(div->isop(LIR_div));
|
||||
LIns *a = div->oprnd1();
|
||||
LIns *b = div->oprnd2();
|
||||
|
||||
LIns *lhs = div->oprnd1();
|
||||
LIns *rhs = div->oprnd2();
|
||||
evictIfActive(RDX);
|
||||
prepareResultReg(div, rmask(RAX));
|
||||
|
||||
deprecated_prepResultReg(div, rmask(RAX));
|
||||
Register rb = findRegFor(b, GpRegs & ~(rmask(RAX)|rmask(RDX)));
|
||||
Register ra = a->isInReg() ? a->getReg() : RAX;
|
||||
|
||||
Register rhsReg = findRegFor(rhs, GpRegs & ~(rmask(RAX)|rmask(RDX)));
|
||||
Register lhsReg = !lhs->isInReg()
|
||||
? findSpecificRegForUnallocated(lhs, RAX)
|
||||
: lhs->getReg();
|
||||
IDIV(rhsReg);
|
||||
IDIV(rb);
|
||||
SARI(RDX, 31);
|
||||
MR(RDX, RAX);
|
||||
if (RAX != lhsReg)
|
||||
MR(RAX, lhsReg);
|
||||
if (RAX != ra)
|
||||
MR(RAX, ra);
|
||||
|
||||
freeResourcesOf(div);
|
||||
if (!a->isInReg()) {
|
||||
NanoAssert(ra == RAX);
|
||||
findSpecificRegForUnallocated(a, RAX);
|
||||
}
|
||||
}
|
||||
|
||||
// Generates code for a LIR_mod(LIR_div(divL, divR)) sequence.
|
||||
void Assembler::asm_div_mod(LIns *mod) {
|
||||
LIns *div = mod->oprnd1();
|
||||
|
||||
NanoAssert(mod->isop(LIR_mod));
|
||||
NanoAssert(div->isop(LIR_div));
|
||||
|
||||
LIns *divL = div->oprnd1();
|
||||
LIns *divR = div->oprnd2();
|
||||
|
||||
prepareResultReg(mod, rmask(RDX));
|
||||
prepareResultReg(div, rmask(RAX));
|
||||
|
||||
Register rDivR = findRegFor(divR, GpRegs & ~(rmask(RAX)|rmask(RDX)));
|
||||
Register rDivL = divL->isInReg() ? divL->getReg() : RAX;
|
||||
|
||||
IDIV(rDivR);
|
||||
SARI(RDX, 31);
|
||||
MR(RDX, RAX);
|
||||
if (RAX != rDivL)
|
||||
MR(RAX, rDivL);
|
||||
|
||||
freeResourcesOf(mod);
|
||||
freeResourcesOf(div);
|
||||
if (!divL->isInReg()) {
|
||||
NanoAssert(rDivL == RAX);
|
||||
findSpecificRegForUnallocated(divL, RAX);
|
||||
}
|
||||
}
|
||||
|
||||
// binary op with integer registers
|
||||
@ -807,9 +813,13 @@ namespace nanojit
|
||||
asm_shift(ins);
|
||||
return;
|
||||
case LIR_mod:
|
||||
case LIR_div:
|
||||
asm_div_mod(ins);
|
||||
return;
|
||||
case LIR_div:
|
||||
// Nb: if the div feeds into a mod it will be handled by
|
||||
// asm_div_mod() rather than here.
|
||||
asm_div(ins);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -819,7 +829,7 @@ namespace nanojit
|
||||
asm_arith_imm(ins);
|
||||
return;
|
||||
}
|
||||
regalloc_binary(ins, GpRegs, rr, ra, rb);
|
||||
beginOp2Regs(ins, GpRegs, rr, ra, rb);
|
||||
switch (ins->opcode()) {
|
||||
default: TODO(asm_arith);
|
||||
case LIR_or: ORLRR(rr, rb); break;
|
||||
@ -838,16 +848,15 @@ namespace nanojit
|
||||
case LIR_qaddp: ADDQRR(rr, rb); break;
|
||||
}
|
||||
if (rr != ra)
|
||||
MR(rr,ra);
|
||||
MR(rr, ra);
|
||||
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
// binary op with fp registers
|
||||
// Binary op with fp registers.
|
||||
void Assembler::asm_fop(LIns *ins) {
|
||||
// NB, rb is always filled in by regalloc_binary,
|
||||
// but compilers can't always tell that: init to UnspecifiedReg
|
||||
// to avoid a warning.
|
||||
Register rr, ra, rb = UnspecifiedReg;
|
||||
regalloc_binary(ins, FpRegs, rr, ra, rb);
|
||||
Register rr, ra, rb = UnspecifiedReg; // init to shut GCC up
|
||||
beginOp2Regs(ins, FpRegs, rr, ra, rb);
|
||||
switch (ins->opcode()) {
|
||||
default: TODO(asm_fop);
|
||||
case LIR_fdiv: DIVSD(rr, rb); break;
|
||||
@ -858,28 +867,29 @@ namespace nanojit
|
||||
if (rr != ra) {
|
||||
asm_nongp_copy(rr, ra);
|
||||
}
|
||||
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
void Assembler::asm_neg_not(LIns *ins) {
|
||||
Register rr, ra;
|
||||
regalloc_unary(ins, GpRegs, rr, ra);
|
||||
NanoAssert(IsGpReg(ra));
|
||||
beginOp1Regs(ins, GpRegs, rr, ra);
|
||||
|
||||
if (ins->isop(LIR_not))
|
||||
NOT(rr);
|
||||
else
|
||||
NEG(rr);
|
||||
if (rr != ra)
|
||||
MR(rr, ra);
|
||||
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
void Assembler::asm_call(LIns *ins) {
|
||||
Register retReg = ( ins->isop(LIR_fcall) ? XMM0 : retRegs[0] );
|
||||
deprecated_prepResultReg(ins, rmask(retReg));
|
||||
Register rr = ( ins->isop(LIR_fcall) ? XMM0 : retRegs[0] );
|
||||
prepareResultReg(ins, rmask(rr));
|
||||
|
||||
// Do this after we've handled the call result, so we don't
|
||||
// force the call result to be spilled unnecessarily.
|
||||
|
||||
evictScratchRegs();
|
||||
evictScratchRegsExcept(rmask(rr));
|
||||
|
||||
const CallInfo *call = ins->callInfo();
|
||||
ArgSize sizes[MAXARGS];
|
||||
@ -906,6 +916,9 @@ namespace nanojit
|
||||
CALLRAX();
|
||||
}
|
||||
|
||||
// Call this now so that the arg setup can involve 'rr'.
|
||||
freeResourcesOf(ins);
|
||||
|
||||
#ifdef _WIN64
|
||||
int stk_used = 32; // always reserve 32byte shadow area
|
||||
#else
|
||||
@ -994,14 +1007,15 @@ namespace nanojit
|
||||
|
||||
void Assembler::asm_q2i(LIns *ins) {
|
||||
Register rr, ra;
|
||||
regalloc_unary(ins, GpRegs, rr, ra);
|
||||
beginOp1Regs(ins, GpRegs, rr, ra);
|
||||
NanoAssert(IsGpReg(ra));
|
||||
MOVLR(rr, ra); // 32bit mov zeros the upper 32bits of the target
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
void Assembler::asm_promote(LIns *ins) {
|
||||
Register rr, ra;
|
||||
regalloc_unary(ins, GpRegs, rr, ra);
|
||||
beginOp1Regs(ins, GpRegs, rr, ra);
|
||||
NanoAssert(IsGpReg(ra));
|
||||
if (ins->isop(LIR_u2q)) {
|
||||
MOVLR(rr, ra); // 32bit mov zeros the upper 32bits of the target
|
||||
@ -1009,38 +1023,44 @@ namespace nanojit
|
||||
NanoAssert(ins->isop(LIR_i2q));
|
||||
MOVSXDR(rr, ra); // sign extend 32->64
|
||||
}
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
// the CVTSI2SD instruction only writes to the low 64bits of the target
|
||||
// The CVTSI2SD instruction only writes to the low 64bits of the target
|
||||
// XMM register, which hinders register renaming and makes dependence
|
||||
// chains longer. So we precede with XORPS to clear the target register.
|
||||
|
||||
void Assembler::asm_i2f(LIns *ins) {
|
||||
Register r = deprecated_prepResultReg(ins, FpRegs);
|
||||
Register b = findRegFor(ins->oprnd1(), GpRegs);
|
||||
CVTSI2SD(r, b); // cvtsi2sd xmmr, b only writes xmm:0:64
|
||||
XORPS(r); // xorps xmmr,xmmr to break dependency chains
|
||||
LIns *a = ins->oprnd1();
|
||||
NanoAssert(ins->isF64() && a->isI32());
|
||||
|
||||
Register rr = prepareResultReg(ins, FpRegs);
|
||||
Register ra = findRegFor(a, GpRegs);
|
||||
CVTSI2SD(rr, ra); // cvtsi2sd xmmr, b only writes xmm:0:64
|
||||
XORPS(rr); // xorps xmmr,xmmr to break dependency chains
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_u2f(LIns *ins) {
|
||||
Register r = deprecated_prepResultReg(ins, FpRegs);
|
||||
Register b = findRegFor(ins->oprnd1(), GpRegs);
|
||||
NanoAssert(ins->oprnd1()->isI32());
|
||||
// since oprnd1 value is 32bit, its okay to zero-extend the value without worrying about clobbering.
|
||||
CVTSQ2SD(r, b); // convert int64 to double
|
||||
XORPS(r); // xorps xmmr,xmmr to break dependency chains
|
||||
MOVLR(b, b); // zero extend u32 to int64
|
||||
LIns *a = ins->oprnd1();
|
||||
NanoAssert(ins->isF64() && a->isI32());
|
||||
|
||||
Register rr = prepareResultReg(ins, FpRegs);
|
||||
Register ra = findRegFor(a, GpRegs);
|
||||
// Because oprnd1 is 32bit, it's ok to zero-extend it without worrying about clobbering.
|
||||
CVTSQ2SD(rr, ra); // convert int64 to double
|
||||
XORPS(rr); // xorps xmmr,xmmr to break dependency chains
|
||||
MOVLR(ra, ra); // zero extend u32 to int64
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_f2i(LIns *ins) {
|
||||
LIns *lhs = ins->oprnd1();
|
||||
|
||||
NanoAssert(ins->isI32() && lhs->isF64());
|
||||
Register r = prepareResultReg(ins, GpRegs);
|
||||
Register b = findRegFor(lhs, FpRegs);
|
||||
|
||||
CVTSD2SI(r, b);
|
||||
LIns *a = ins->oprnd1();
|
||||
NanoAssert(ins->isI32() && a->isF64());
|
||||
|
||||
Register rr = prepareResultReg(ins, GpRegs);
|
||||
Register rb = findRegFor(a, FpRegs);
|
||||
CVTSD2SI(rr, rb);
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
|
||||
@ -1052,11 +1072,16 @@ namespace nanojit
|
||||
NanoAssert((ins->isop(LIR_cmov) && iftrue->isI32() && iffalse->isI32()) ||
|
||||
(ins->isop(LIR_qcmov) && iftrue->isI64() && iffalse->isI64()));
|
||||
|
||||
// this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
|
||||
// (This is true on Intel, is it true on all architectures?)
|
||||
const Register rr = deprecated_prepResultReg(ins, GpRegs);
|
||||
const Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
|
||||
Register rr = prepareResultReg(ins, GpRegs);
|
||||
|
||||
Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
|
||||
|
||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
||||
|
||||
// WARNING: We cannot generate any code that affects the condition
|
||||
// codes between the MRcc generation here and the asm_cmp() call
|
||||
// below. See asm_cmp() for more details.
|
||||
LOpcode condop = cond->opcode();
|
||||
if (ins->opcode() == LIR_cmov) {
|
||||
switch (condop) {
|
||||
@ -1085,7 +1110,15 @@ namespace nanojit
|
||||
default: NanoAssert(0); break;
|
||||
}
|
||||
}
|
||||
/*const Register rt =*/ findSpecificRegFor(iftrue, rr);
|
||||
if (rr != rt)
|
||||
MR(rr, rt);
|
||||
|
||||
freeResourcesOf(ins);
|
||||
if (!iftrue->isInReg()) {
|
||||
NanoAssert(rt == rr);
|
||||
findSpecificRegForUnallocated(iftrue, rr);
|
||||
}
|
||||
|
||||
asm_cmp(cond);
|
||||
}
|
||||
|
||||
@ -1099,7 +1132,7 @@ namespace nanojit
|
||||
if (condop >= LIR_feq && condop <= LIR_fge)
|
||||
return asm_fbranch(onFalse, cond, target);
|
||||
|
||||
// We must ensure there's room for the instr before calculating
|
||||
// We must ensure there's room for the instruction before calculating
|
||||
// the offset. And the offset determines the opcode (8bit or 32bit).
|
||||
if (target && isTargetWithinS8(target)) {
|
||||
if (onFalse) {
|
||||
@ -1158,7 +1191,7 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
}
|
||||
NIns *patch = _nIns; // addr of instr to patch
|
||||
NIns *patch = _nIns; // address of instruction to patch
|
||||
asm_cmp(cond);
|
||||
return patch;
|
||||
}
|
||||
@ -1176,6 +1209,9 @@ namespace nanojit
|
||||
JO( 8, target);
|
||||
}
|
||||
|
||||
// WARNING: this function cannot generate code that will affect the
|
||||
// condition codes prior to the generation of the test/cmp. See
|
||||
// Nativei386.cpp:asm_cmp() for details.
|
||||
void Assembler::asm_cmp(LIns *cond) {
|
||||
LIns *b = cond->oprnd2();
|
||||
if (isImm32(b)) {
|
||||
@ -1220,8 +1256,8 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
|
||||
// compiling floating point branches
|
||||
// discussion in https://bugzilla.mozilla.org/show_bug.cgi?id=443886
|
||||
// Compiling floating point branches.
|
||||
// Discussion in https://bugzilla.mozilla.org/show_bug.cgi?id=443886.
|
||||
//
|
||||
// fucom/p/pp: c3 c2 c0 jae ja jbe jb je jne
|
||||
// ucomisd: Z P C !C !C&!Z C|Z C Z !Z
|
||||
@ -1231,7 +1267,7 @@ namespace nanojit
|
||||
// less < 0 0 1 T T T
|
||||
// equal = 1 0 0 T T T
|
||||
//
|
||||
// here's the cases, using conditionals:
|
||||
// Here are the cases, using conditionals:
|
||||
//
|
||||
// branch >= > <= < =
|
||||
// ------ --- --- --- --- ---
|
||||
@ -1281,7 +1317,7 @@ namespace nanojit
|
||||
}
|
||||
patch = _nIns;
|
||||
}
|
||||
fcmp(a, b);
|
||||
asm_fcmp(a, b);
|
||||
return patch;
|
||||
}
|
||||
|
||||
@ -1292,7 +1328,7 @@ namespace nanojit
|
||||
if (op == LIR_feq) {
|
||||
// result = ZF & !PF, must do logic on flags
|
||||
// r = al|bl|cl|dl, can only use rh without rex prefix
|
||||
Register r = deprecated_prepResultReg(ins, 1<<RAX|1<<RCX|1<<RDX|1<<RBX);
|
||||
Register r = prepareResultReg(ins, 1<<RAX|1<<RCX|1<<RDX|1<<RBX);
|
||||
MOVZX8(r, r); // movzx8 r,rl r[8:63] = 0
|
||||
X86_AND8R(r); // and rl,rh rl &= rh
|
||||
X86_SETNP(r); // setnp rh rh = !PF
|
||||
@ -1305,22 +1341,30 @@ namespace nanojit
|
||||
op = LIR_fge;
|
||||
LIns *t = a; a = b; b = t;
|
||||
}
|
||||
Register r = deprecated_prepResultReg(ins, GpRegs); // x64 can use any GPR as setcc target
|
||||
Register r = prepareResultReg(ins, GpRegs); // x64 can use any GPR as setcc target
|
||||
MOVZX8(r, r);
|
||||
if (op == LIR_fgt)
|
||||
SETA(r);
|
||||
else
|
||||
SETAE(r);
|
||||
}
|
||||
fcmp(a, b);
|
||||
|
||||
freeResourcesOf(ins);
|
||||
|
||||
asm_fcmp(a, b);
|
||||
}
|
||||
|
||||
void Assembler::fcmp(LIns *a, LIns *b) {
|
||||
// WARNING: This function cannot generate any code that will affect the
|
||||
// condition codes prior to the generation of the ucomisd. See asm_cmp()
|
||||
// for more details.
|
||||
void Assembler::asm_fcmp(LIns *a, LIns *b) {
|
||||
Register ra, rb;
|
||||
findRegFor2(FpRegs, a, ra, FpRegs, b, rb);
|
||||
UCOMISD(ra, rb);
|
||||
}
|
||||
|
||||
// WARNING: the code generated by this function must not affect the
|
||||
// condition codes. See asm_cmp() for details.
|
||||
void Assembler::asm_restore(LIns *ins, Register r) {
|
||||
if (ins->isop(LIR_alloc)) {
|
||||
int d = arDisp(ins);
|
||||
@ -1336,11 +1380,10 @@ namespace nanojit
|
||||
}
|
||||
else {
|
||||
int d = findMemFor(ins);
|
||||
if (IsFpReg(r)) {
|
||||
NanoAssert(ins->isN64());
|
||||
// load 64bits into XMM. don't know if double or int64, assume double.
|
||||
if (ins->isF64()) {
|
||||
NanoAssert(IsFpReg(r));
|
||||
MOVSDRM(r, d, FP);
|
||||
} else if (ins->isN64()) {
|
||||
} else if (ins->isI64()) {
|
||||
NanoAssert(IsGpReg(r));
|
||||
MOVQRM(r, d, FP);
|
||||
} else {
|
||||
@ -1353,8 +1396,10 @@ namespace nanojit
|
||||
|
||||
void Assembler::asm_cond(LIns *ins) {
|
||||
LOpcode op = ins->opcode();
|
||||
|
||||
// unlike x86-32, with a rex prefix we can use any GP register as an 8bit target
|
||||
Register r = deprecated_prepResultReg(ins, GpRegs);
|
||||
Register r = prepareResultReg(ins, GpRegs);
|
||||
|
||||
// SETcc only sets low 8 bits, so extend
|
||||
MOVZX8(r, r);
|
||||
switch (op) {
|
||||
@ -1379,6 +1424,8 @@ namespace nanojit
|
||||
case LIR_quge:
|
||||
case LIR_uge: SETAE(r); break;
|
||||
}
|
||||
freeResourcesOf(ins);
|
||||
|
||||
asm_cmp(ins);
|
||||
}
|
||||
|
||||
@ -1409,18 +1456,17 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &dr, Register &rb) {
|
||||
// Register setup for load ops. Pairs with endLoadRegs().
|
||||
void Assembler::beginLoadRegs(LIns *ins, RegisterMask allow, Register &rr, int32_t &dr, Register &rb) {
|
||||
dr = ins->disp();
|
||||
LIns *base = ins->oprnd1();
|
||||
rb = getBaseReg(base, dr, BaseRegs);
|
||||
if (!ins->isInRegMask(allow)) {
|
||||
rr = deprecated_prepResultReg(ins, allow & ~rmask(rb));
|
||||
} else {
|
||||
// keep already assigned register
|
||||
rr = ins->getReg();
|
||||
NanoAssert(allow & rmask(rr));
|
||||
deprecated_freeRsrcOf(ins, false);
|
||||
}
|
||||
rr = prepareResultReg(ins, allow & ~rmask(rb));
|
||||
}
|
||||
|
||||
// Register clean-up for load ops. Pairs with beginLoadRegs().
|
||||
void Assembler::endLoadRegs(LIns* ins) {
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_load64(LIns *ins) {
|
||||
@ -1429,19 +1475,19 @@ namespace nanojit
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
regalloc_load(ins, GpRegs, rr, dr, rb);
|
||||
beginLoadRegs(ins, GpRegs, rr, dr, rb);
|
||||
NanoAssert(IsGpReg(rr));
|
||||
MOVQRM(rr, dr, rb); // general 64bit load, 32bit const displacement
|
||||
break;
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
regalloc_load(ins, FpRegs, rr, dr, rb);
|
||||
beginLoadRegs(ins, FpRegs, rr, dr, rb);
|
||||
NanoAssert(IsFpReg(rr));
|
||||
MOVSDRM(rr, dr, rb); // load 64bits into XMM
|
||||
break;
|
||||
case LIR_ld32f:
|
||||
case LIR_ldc32f:
|
||||
regalloc_load(ins, FpRegs, rr, dr, rb);
|
||||
beginLoadRegs(ins, FpRegs, rr, dr, rb);
|
||||
NanoAssert(IsFpReg(rr));
|
||||
CVTSS2SD(rr, rr);
|
||||
MOVSSRM(rr, dr, rb);
|
||||
@ -1450,14 +1496,14 @@ namespace nanojit
|
||||
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
|
||||
break;
|
||||
}
|
||||
|
||||
endLoadRegs(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_load32(LIns *ins) {
|
||||
NanoAssert(ins->isI32());
|
||||
Register r, b;
|
||||
int32_t d;
|
||||
regalloc_load(ins, GpRegs, r, d, b);
|
||||
beginLoadRegs(ins, GpRegs, r, d, b);
|
||||
LOpcode op = ins->opcode();
|
||||
switch(op) {
|
||||
case LIR_ldzb:
|
||||
@ -1484,6 +1530,7 @@ namespace nanojit
|
||||
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
|
||||
break;
|
||||
}
|
||||
endLoadRegs(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
|
||||
@ -1542,8 +1589,6 @@ namespace nanojit
|
||||
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void Assembler::asm_int(LIns *ins) {
|
||||
@ -1597,11 +1642,11 @@ namespace nanojit
|
||||
uint32_t a = ins->paramArg();
|
||||
uint32_t kind = ins->paramKind();
|
||||
if (kind == 0) {
|
||||
// ordinary param
|
||||
// first four or six args always in registers for x86_64 ABI
|
||||
// Ordinary param. First four or six args always in registers for x86_64 ABI.
|
||||
if (a < (uint32_t)NumArgRegs) {
|
||||
// incoming arg in register
|
||||
deprecated_prepResultReg(ins, rmask(argRegs[a]));
|
||||
prepareResultReg(ins, rmask(argRegs[a]));
|
||||
// No code to generate.
|
||||
} else {
|
||||
// todo: support stack based args, arg 0 is at [FP+off] where off
|
||||
// is the # of regs to be pushed in genProlog()
|
||||
@ -1609,24 +1654,60 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
else {
|
||||
// saved param
|
||||
deprecated_prepResultReg(ins, rmask(savedRegs[a]));
|
||||
// Saved param.
|
||||
prepareResultReg(ins, rmask(savedRegs[a]));
|
||||
// No code to generate.
|
||||
}
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
|
||||
// Register setup for 2-address style unary ops of the form R = (op) R.
|
||||
// Pairs with endOpRegs().
|
||||
void Assembler::beginOp1Regs(LIns* ins, RegisterMask allow, Register &rr, Register &ra) {
|
||||
LIns* a = ins->oprnd1();
|
||||
|
||||
rr = prepareResultReg(ins, allow);
|
||||
|
||||
// If 'a' isn't in a register, it can be clobbered by 'ins'.
|
||||
ra = a->isInReg() ? a->getReg() : rr;
|
||||
NanoAssert(rmask(ra) & allow);
|
||||
}
|
||||
|
||||
// Register setup for 2-address style binary ops of the form R = R (op) B.
|
||||
// Pairs with endOpRegs().
|
||||
void Assembler::beginOp2Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra,
|
||||
Register &rb) {
|
||||
LIns *a = ins->oprnd1();
|
||||
LIns *b = ins->oprnd2();
|
||||
if (a != b) {
|
||||
rb = findRegFor(b, allow);
|
||||
allow &= ~rmask(rb);
|
||||
}
|
||||
rr = prepareResultReg(ins, allow);
|
||||
|
||||
// If 'a' isn't in a register, it can be clobbered by 'ins'.
|
||||
ra = a->isInReg() ? a->getReg() : rr;
|
||||
NanoAssert(rmask(ra) & allow);
|
||||
|
||||
if (a == b) {
|
||||
rb = ra;
|
||||
}
|
||||
}
|
||||
|
||||
// register allocation for 2-address style unary ops of the form R = (op) R
|
||||
void Assembler::regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra) {
|
||||
LIns *a = ins->oprnd1();
|
||||
rr = deprecated_prepResultReg(ins, allow);
|
||||
// if this is last use of a in reg, we can re-use result reg
|
||||
// Register clean-up for 2-address style unary ops of the form R = (op) R.
|
||||
// Pairs with beginOp1Regs() and beginOp2Regs().
|
||||
void Assembler::endOpRegs(LIns* ins, Register rr, Register ra) {
|
||||
LIns* a = ins->oprnd1();
|
||||
|
||||
// We're finished with 'ins'.
|
||||
NanoAssert(ins->getReg() == rr);
|
||||
freeResourcesOf(ins);
|
||||
|
||||
// If 'a' isn't in a register yet, that means it's clobbered by 'ins'.
|
||||
if (!a->isInReg()) {
|
||||
ra = findSpecificRegForUnallocated(a, rr);
|
||||
} else {
|
||||
// 'a' already has a register assigned. Caller must emit a copy
|
||||
// to rr once instr code is generated. (ie mov rr,ra ; op rr)
|
||||
ra = a->getReg();
|
||||
}
|
||||
NanoAssert(allow & rmask(rr));
|
||||
NanoAssert(ra == rr);
|
||||
findSpecificRegForUnallocated(a, ra);
|
||||
}
|
||||
}
|
||||
|
||||
static const AVMPLUS_ALIGN16(int64_t) negateMask[] = {0x8000000000000000LL,0};
|
||||
@ -1634,7 +1715,7 @@ namespace nanojit
|
||||
void Assembler::asm_fneg(LIns *ins) {
|
||||
Register rr, ra;
|
||||
if (isS32((uintptr_t)negateMask) || isTargetWithinS32((NIns*)negateMask)) {
|
||||
regalloc_unary(ins, FpRegs, rr, ra);
|
||||
beginOp1Regs(ins, FpRegs, rr, ra);
|
||||
if (isS32((uintptr_t)negateMask)) {
|
||||
// builtin code is in bottom or top 2GB addr space, use absolute addressing
|
||||
XORPSA(rr, (int32_t)(uintptr_t)negateMask);
|
||||
@ -1644,14 +1725,17 @@ namespace nanojit
|
||||
}
|
||||
if (ra != rr)
|
||||
asm_nongp_copy(rr,ra);
|
||||
endOpRegs(ins, rr, ra);
|
||||
|
||||
} else {
|
||||
// this is just hideous - can't use RIP-relative load, can't use
|
||||
// This is just hideous - can't use RIP-relative load, can't use
|
||||
// absolute-address load, and cant move imm64 const to XMM.
|
||||
// so do it all in a GPR. hrmph.
|
||||
rr = deprecated_prepResultReg(ins, GpRegs);
|
||||
rr = prepareResultReg(ins, GpRegs);
|
||||
ra = findRegFor(ins->oprnd1(), GpRegs & ~rmask(rr));
|
||||
XORQRR(rr, ra); // xor rr, ra
|
||||
asm_quad(rr, negateMask[0], /*canClobberCCs*/true); // mov rr, 0x8000000000000000
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -399,15 +399,18 @@ namespace nanojit
|
||||
void asm_shift(LIns*);\
|
||||
void asm_shift_imm(LIns*);\
|
||||
void asm_arith_imm(LIns*);\
|
||||
void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
|
||||
void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
|
||||
void regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
|
||||
void beginOp1Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
|
||||
void beginOp2Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
|
||||
void endOpRegs(LIns *ins, Register rr, Register ra);\
|
||||
void beginLoadRegs(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
|
||||
void endLoadRegs(LIns *ins);\
|
||||
void dis(NIns *p, int bytes);\
|
||||
void asm_cmp(LIns*);\
|
||||
void asm_cmp_imm(LIns*);\
|
||||
void fcmp(LIns*, LIns*);\
|
||||
void asm_fcmp(LIns*, LIns*);\
|
||||
NIns* asm_fbranch(bool, LIns*, NIns*);\
|
||||
void asm_div_mod(LIns *i);\
|
||||
void asm_div(LIns *ins);\
|
||||
void asm_div_mod(LIns *ins);\
|
||||
int max_stk_used;\
|
||||
void PUSHR(Register r);\
|
||||
void POPR(Register r);\
|
||||
|
@ -167,7 +167,7 @@ namespace nanojit
|
||||
// Do this after we've handled the call result, so we don't
|
||||
// force the call result to be spilled unnecessarily.
|
||||
|
||||
evictScratchRegs();
|
||||
evictScratchRegsExcept(0);
|
||||
|
||||
const CallInfo* call = ins->callInfo();
|
||||
// must be signed, not unsigned
|
||||
@ -393,13 +393,17 @@ namespace nanojit
|
||||
|
||||
} else {
|
||||
int d = findMemFor(ins);
|
||||
if (rmask(r) & GpRegs) {
|
||||
if (ins->isI32()) {
|
||||
NanoAssert(rmask(r) & GpRegs);
|
||||
LD(r, d, FP);
|
||||
} else if (rmask(r) & XmmRegs) {
|
||||
SSE_LDQ(r, d, FP);
|
||||
} else {
|
||||
NanoAssert(rmask(r) & x87Regs);
|
||||
FLDQ(d, FP);
|
||||
NanoAssert(ins->isF64());
|
||||
if (rmask(r) & XmmRegs) {
|
||||
SSE_LDQ(r, d, FP);
|
||||
} else {
|
||||
NanoAssert(rmask(r) & x87Regs);
|
||||
FLDQ(d, FP);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -976,7 +980,7 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
|
||||
// This is called when we have a mod(div(divL, divR)) sequence.
|
||||
// Generates code for a LIR_mod(LIR_div(divL, divR)) sequence.
|
||||
void Assembler::asm_div_mod(LInsp mod)
|
||||
{
|
||||
LInsp div = mod->oprnd1();
|
||||
@ -992,12 +996,10 @@ namespace nanojit
|
||||
prepareResultReg(div, rmask(EAX));
|
||||
|
||||
Register rDivR = findRegFor(divR, (GpRegs & ~(rmask(EAX)|rmask(EDX))));
|
||||
|
||||
Register rDivL = !divL->isInReg() ? EAX : divL->getReg();
|
||||
|
||||
DIV(rDivR);
|
||||
CDQ(); // sign-extend EAX into EDX:EAX
|
||||
|
||||
if (EAX != rDivL)
|
||||
MR(EAX, rDivL);
|
||||
|
||||
@ -1028,7 +1030,6 @@ namespace nanojit
|
||||
//
|
||||
void Assembler::asm_neg_not(LInsp ins)
|
||||
{
|
||||
LOpcode op = ins->opcode();
|
||||
LIns* lhs = ins->oprnd1();
|
||||
|
||||
Register rr = prepareResultReg(ins, GpRegs);
|
||||
@ -1036,11 +1037,12 @@ namespace nanojit
|
||||
// If 'lhs' isn't in a register, it can be clobbered by 'ins'.
|
||||
Register ra = !lhs->isInReg() ? rr : lhs->getReg();
|
||||
|
||||
if (op == LIR_not)
|
||||
if (ins->isop(LIR_not)) {
|
||||
NOT(rr);
|
||||
else
|
||||
} else {
|
||||
NanoAssert(ins->isop(LIR_neg));
|
||||
NEG(rr);
|
||||
|
||||
}
|
||||
if (rr != ra)
|
||||
MR(rr, ra);
|
||||
|
||||
|
@ -121,7 +121,7 @@ extern void* _tprof_before_id;
|
||||
#ifndef DOPROF
|
||||
#ifndef VMCFG_SYMBIAN
|
||||
#define _vprof(v,...)
|
||||
#define _nvprof(e,v,...)
|
||||
#define _nvprof(e,v)
|
||||
#define _hprof(h,n,...)
|
||||
#define _nhprof(e,v,n,...)
|
||||
#define _ntprof(e)
|
||||
@ -139,7 +139,7 @@ extern void* _tprof_before_id;
|
||||
;\
|
||||
}
|
||||
|
||||
#define _nvprof(e,v,...) \
|
||||
#define _nvprof(e,v) \
|
||||
{ \
|
||||
static void* id = 0; \
|
||||
(id != 0) ? \
|
||||
|
Loading…
Reference in New Issue
Block a user