Bug 513863 - nanojit: refactor registerAlloc(). r=rreitmai.

--HG--
extra : convert_revision : 6b7cffb8984f821980d38d4c9ccd31f8bdd3e363
This commit is contained in:
Nicholas Nethercote 2009-11-18 12:27:57 +11:00
parent 391f339dc4
commit e26daf15b9
5 changed files with 59 additions and 49 deletions

View File

@ -112,36 +112,62 @@ namespace nanojit
#endif
}
Register Assembler::registerAlloc(RegisterMask allow)
// Finds a register in 'allow' to store the result of 'ins', evicting one
// if necessary. Doesn't consider the prior state of 'ins' (except that
// ins->isUsed() must be true).
Register Assembler::registerAlloc(LIns* ins, RegisterMask allow)
{
RegAlloc &regs = _allocator;
RegisterMask allowedAndFree = allow & regs.free;
RegisterMask allowedAndFree = allow & _allocator.free;
Register r;
NanoAssert(ins->isUsed());
if (allowedAndFree)
{
if (allowedAndFree) {
// At least one usable register is free -- no need to steal.
// Pick a preferred one if possible.
RegisterMask preferredAndFree = allowedAndFree & SavedRegs;
RegisterMask set = ( preferredAndFree ? preferredAndFree : allowedAndFree );
Register r = nRegisterAllocFromSet(set);
return r;
r = nRegisterAllocFromSet(set);
_allocator.addActive(r, ins);
ins->setReg(r);
} else {
counter_increment(steals);
// Nothing free, steal one.
// LSRA says pick the one with the furthest use.
LIns* vicIns = findVictim(allow);
NanoAssert(vicIns->isUsed());
r = vicIns->getReg();
_allocator.removeActive(r);
vicIns->setReg(UnknownReg);
// Restore vicIns.
asm_restore(vicIns, vicIns->resv(), r);
// r ends up staying active, but the LIns defining it changes.
_allocator.addActive(r, ins);
ins->setReg(r);
}
counter_increment(steals);
// nothing free, steal one
// LSRA says pick the one with the furthest use
LIns* vic = findVictim(allow);
NanoAssert(vic);
// restore vic
Register r = vic->getReg();
regs.removeActive(r);
vic->setReg(UnknownReg);
asm_restore(vic, vic->resv(), r);
return r;
}
// Finds a register in 'allow' to store a temporary value (one not
// associated with a particular LIns), evicting one if necessary. The
// returned register is marked as being free and so can only be safely
// used for code generation purposes until the register state is next
// inspected or updated.
Register Assembler::registerAllocTmp(RegisterMask allow)
{
LIns dummyIns;
dummyIns.markAsUsed();
Register r = registerAlloc(&dummyIns, allow);
// Mark r as free, ready for use as a temporary value.
_allocator.removeActive(r);
_allocator.addFree(r);
return r;
}
/**
* these instructions don't have to be saved & reloaded to spill,
* they can just be recalculated w/out any inputs.
@ -352,17 +378,13 @@ namespace nanojit
// No reservation. Create one, and do a fresh allocation.
ins->markAsUsed();
RegisterMask prefer = hint(ins, allow);
r = registerAlloc(prefer);
ins->setReg(r);
_allocator.addActive(r, ins);
r = registerAlloc(ins, prefer);
} else if (!ins->hasKnownReg()) {
// Existing reservation with an unknown register. Do a fresh
// allocation.
RegisterMask prefer = hint(ins, allow);
r = registerAlloc(prefer);
ins->setReg(r);
_allocator.addActive(r, ins);
r = registerAlloc(ins, prefer);
} else if (rmask(r = ins->getReg()) & allow) {
// Existing reservation with a known register allocated, and
@ -380,9 +402,7 @@ namespace nanojit
// x87 <-> xmm copy required
//_nvprof("fpu-evict",1);
evict(r, ins);
r = registerAlloc(prefer);
ins->setReg(r);
_allocator.addActive(r, ins);
r = registerAlloc(ins, prefer);
} else
#elif defined(NANOJIT_PPC)
if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
@ -405,9 +425,7 @@ namespace nanojit
//
_allocator.retire(r);
Register s = r;
r = registerAlloc(prefer);
ins->setReg(r);
_allocator.addActive(r, ins);
r = registerAlloc(ins, prefer);
if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
#ifdef NANOJIT_ARM
MOV(s, r); // ie. move 'ins' from its pre-state reg to its post-state reg

View File

@ -250,7 +250,8 @@ namespace nanojit
void arFree(uint32_t idx);
void arReset();
Register registerAlloc(RegisterMask allow);
Register registerAlloc(LIns* ins, RegisterMask allow);
Register registerAllocTmp(RegisterMask allow);
void registerResetAll();
void evictAllActiveRegs();
void evictSomeActiveRegs(RegisterMask regs);

View File

@ -442,8 +442,7 @@ namespace nanojit
// value is either a 64bit struct or maybe a float
// that isn't live in an FPU reg. Either way, don't
// put it in an FPU reg just to load & store it.
Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
_allocator.addFree(t);
Register t = registerAllocTmp(GpRegs & ~(rmask(rd)|rmask(rs)));
STW32(t, dd+4, rd);
LDSW32(rs, ds+4, t);
STW32(t, dd, rd);
@ -905,8 +904,7 @@ namespace nanojit
underrunProtect(72);
// where our result goes
Register rr = prepResultReg(ins, FpRegs);
Register rt = registerAlloc(FpRegs & ~(rmask(rr)));
_allocator.addFree(rt);
Register rt = registerAllocTmp(FpRegs & ~(rmask(rr)));
Register gr = findRegFor(ins->oprnd1(), GpRegs);
int disp = -8;

View File

@ -1758,8 +1758,7 @@ namespace nanojit
emitrxb_imm(X64_jmpx, (Register)0, indexreg, (Register)5, (int32_t)(uintptr_t)table);
} else {
// don't use R13 for base because we want to use mod=00, i.e. [index*8+base + 0]
Register tablereg = registerAlloc(GpRegs & ~(rmask(indexreg)|rmask(R13)));
_allocator.addFree(tablereg);
Register tablereg = registerAllocTmp(GpRegs & ~(rmask(indexreg)|rmask(R13)));
// jmp [indexreg*8 + tablereg]
emitxb(X64_jmpxb, indexreg, tablereg);
// tablereg <- #table

View File

@ -616,16 +616,14 @@ namespace nanojit
if (config.sse2)
{
// use SSE to load+store 64bits
Register t = registerAlloc(XmmRegs);
_allocator.addFree(t);
Register t = registerAllocTmp(XmmRegs);
SSE_STQ(dd, rd, t);
SSE_LDQ(t, ds, rs);
}
else
{
// get a scratch reg
Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
_allocator.addFree(t);
Register t = registerAllocTmp(GpRegs & ~(rmask(rd)|rmask(rs)));
ST(rd, dd+4, t);
LD(t, ds+4, rs);
ST(rd, dd, t);
@ -1187,11 +1185,10 @@ namespace nanojit
LDSDm(rr, &k_ONE);
} else if (d && d == (int)d) {
// can fit in 32bits? then use cvt which is faster
Register gr = registerAlloc(GpRegs);
Register gr = registerAllocTmp(GpRegs);
SSE_CVTSI2SD(rr, gr);
SSE_XORPDr(rr,rr); // zero rr to ensure no dependency stalls
LDi(gr, (int)d);
_allocator.addFree(gr);
} else {
findMemFor(ins);
const int d = disp(ins);
@ -1559,7 +1556,7 @@ namespace nanojit
{
// don't call findRegFor, we want a reg we can stomp on for a very short time,
// not a reg that will continue to be associated with the LIns
Register gr = registerAlloc(GpRegs);
Register gr = registerAllocTmp(GpRegs);
// technique inspired by gcc disassembly
// Edwin explains it:
@ -1597,9 +1594,6 @@ namespace nanojit
SUBi(gr, 0x80000000);
LD(gr, d, FP);
}
// ok, we're done with it
_allocator.addFree(gr);
}
else
{