gecko/js/src/nanojit/Assembler.cpp
2008-10-20 10:15:07 -07:00

2289 lines
62 KiB
C++
Executable File

/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 2004-2007
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
#ifdef FEATURE_NANOJIT
#ifdef AVMPLUS_PORTING_API
#include "portapi_nanojit.h"
#endif
#if defined(AVMPLUS_UNIX) && defined(AVMPLUS_ARM)
#include <asm/unistd.h>
extern "C" void __clear_cache(char *BEG, char *END);
#endif
namespace nanojit
{
class DeadCodeFilter: public LirFilter
{
const CallInfo *functions;
bool ignoreInstruction(LInsp ins)
{
LOpcode op = ins->opcode();
if (ins->isStore() ||
op == LIR_loop ||
op == LIR_label ||
op == LIR_live ||
isRet(op)) {
return false;
}
return ins->resv() == 0;
}
public:
DeadCodeFilter(LirFilter *in, const CallInfo *f) : LirFilter(in), functions(f) {}
LInsp read() {
for (;;) {
LInsp i = in->read();
if (!i || i->isGuard() || i->isBranch()
|| i->isCall() && !i->isCse(functions)
|| !ignoreInstruction(i))
return i;
}
}
};
#ifdef NJ_VERBOSE
class VerboseBlockReader: public LirFilter
{
Assembler *assm;
LirNameMap *names;
avmplus::List<LInsp, avmplus::LIST_NonGCObjects> block;
bool flushnext;
public:
VerboseBlockReader(LirFilter *in, Assembler *a, LirNameMap *n)
: LirFilter(in), assm(a), names(n), block(a->_gc), flushnext(false)
{}
void flush() {
flushnext = false;
if (!block.isEmpty()) {
for (int j=0,n=block.size(); j < n; j++) {
LIns *i = block[j];
assm->outputf(" %s", names->formatIns(block[j]));
if (i->isop(LIR_label)) {
assm->outputf(" %p:", assm->_nIns);
assm->output("");
}
}
block.clear();
}
}
void flush_add(LInsp i) {
flush();
block.add(i);
}
LInsp read() {
LInsp i = in->read();
if (!i) {
flush();
return i;
}
if (i->isGuard()) {
flush_add(i);
if (i->oprnd1())
block.add(i->oprnd1());
}
else if (isRet(i->opcode()) || i->isBranch()) {
flush_add(i);
}
else {
if (flushnext)
flush();
flush_add(i);//block.add(i);
if (i->isop(LIR_label))
flushnext = true;
}
return i;
}
};
#endif
/**
* Need the following:
*
* - merging paths ( build a graph? ), possibly use external rep to drive codegen
*/
Assembler::Assembler(Fragmento* frago)
: hasLoop(0)
, _frago(frago)
, _gc(frago->core()->gc)
, _labels(_gc)
, _patches(_gc)
, pending_lives(_gc)
{
AvmCore *core = frago->core();
nInit(core);
verbose_only( _verbose = !core->quiet_opt() && core->verbose() );
verbose_only( _outputCache = 0);
internalReset();
pageReset();
}
void Assembler::arReset()
{
_activation.highwatermark = 0;
_activation.lowwatermark = 0;
_activation.tos = 0;
for(uint32_t i=0; i<NJ_MAX_STACK_ENTRY; i++)
_activation.entry[i] = 0;
}
void Assembler::registerResetAll()
{
nRegisterResetAll(_allocator);
// keep a tally of the registers to check that our allocator works correctly
debug_only(_allocator.count = _allocator.countFree(); )
debug_only(_allocator.checkCount(); )
debug_only(_fpuStkDepth = 0; )
}
Register Assembler::registerAlloc(RegisterMask allow)
{
RegAlloc &regs = _allocator;
// RegisterMask prefer = livePastCall(_ins) ? saved : scratch;
RegisterMask prefer = SavedRegs & allow;
RegisterMask free = regs.free & allow;
RegisterMask set = prefer;
if (set == 0) set = allow;
if (free)
{
// at least one is free
set &= free;
// ok we have at least 1 free register so let's try to pick
// the best one given the profile of the instruction
if (!set)
{
// desired register class is not free so pick first of any class
set = free;
}
NanoAssert((set & allow) != 0);
Register r = nRegisterAllocFromSet(set);
regs.used |= rmask(r);
return r;
}
counter_increment(steals);
// nothing free, steal one
// LSRA says pick the one with the furthest use
LIns* vic = findVictim(regs, allow);
NanoAssert(vic != NULL);
Reservation* resv = getresv(vic);
// restore vic
Register r = resv->reg;
regs.removeActive(r);
resv->reg = UnknownReg;
asm_restore(vic, resv, r);
return r;
}
void Assembler::reserveReset()
{
_resvTable[0].arIndex = 0;
int i;
for(i=1; i<NJ_MAX_STACK_ENTRY; i++) {
_resvTable[i].arIndex = i-1;
_resvTable[i].used = 0;
}
_resvFree= i-1;
}
/**
* these instructions don't have to be saved & reloaded to spill,
* they can just be recalculated w/out any inputs.
*/
bool Assembler::canRemat(LIns *i) {
return i->isconst() || i->isconstq() || i->isop(LIR_alloc);
}
Reservation* Assembler::reserveAlloc(LInsp i)
{
uint32_t item = _resvFree;
Reservation *r = &_resvTable[item];
_resvFree = r->arIndex;
r->reg = UnknownReg;
r->arIndex = 0;
r->used = 1;
if (!item)
setError(ResvFull);
i->setresv(item);
return r;
}
void Assembler::reserveFree(LInsp i)
{
Reservation *rs = getresv(i);
NanoAssert(rs == &_resvTable[i->resv()]);
rs->arIndex = _resvFree;
rs->used = 0;
_resvFree = i->resv();
i->setresv(0);
}
void Assembler::internalReset()
{
// readies for a brand spanking new code generation pass.
registerResetAll();
reserveReset();
arReset();
pending_lives.clear();
}
NIns* Assembler::pageAlloc(bool exitPage)
{
Page*& list = (exitPage) ? _nativeExitPages : _nativePages;
Page* page = _frago->pageAlloc();
if (page)
{
page->next = list;
list = page;
nMarkExecute(page);
_stats.pages++;
}
else
{
// return prior page (to allow overwrites) and mark out of mem
page = list;
setError(OutOMem);
}
return &page->code[sizeof(page->code)/sizeof(NIns)]; // just past the end
}
void Assembler::pageReset()
{
pagesFree(_nativePages);
pagesFree(_nativeExitPages);
_nIns = 0;
_nExitIns = 0;
_stats.pages = 0;
nativePageReset();
}
void Assembler::pagesFree(Page*& page)
{
while(page)
{
Page *next = page->next; // pull next ptr prior to free
_frago->pageFree(page);
page = next;
}
}
#define bytesFromTop(x) ( (size_t)(x) - (size_t)pageTop(x) )
#define bytesToBottom(x) ( (size_t)pageBottom(x) - (size_t)(x) )
#define bytesBetween(x,y) ( (size_t)(x) - (size_t)(y) )
int32_t Assembler::codeBytes()
{
// start and end on same page?
size_t exit = 0;
int32_t pages = _stats.pages;
if (_nExitIns-1 == _stats.codeExitStart)
;
else if (samepage(_nExitIns,_stats.codeExitStart))
exit = bytesBetween(_stats.codeExitStart, _nExitIns);
else
{
pages--;
exit = ((intptr_t)_stats.codeExitStart & (NJ_PAGE_SIZE-1)) ? bytesFromTop(_stats.codeExitStart)+1 : 0;
exit += bytesToBottom(_nExitIns)+1;
}
size_t main = 0;
if (_nIns-1 == _stats.codeStart)
;
else if (samepage(_nIns,_stats.codeStart))
main = bytesBetween(_stats.codeStart, _nIns);
else
{
pages--;
main = ((intptr_t)_stats.codeStart & (NJ_PAGE_SIZE-1)) ? bytesFromTop(_stats.codeStart)+1 : 0;
main += bytesToBottom(_nIns)+1;
}
//fprintf(stderr,"size %d, exit is %d, main is %d, page count %d, sizeof %d\n", (int)((pages) * NJ_PAGE_SIZE + main + exit),(int)exit, (int)main, (int)_stats.pages, (int)sizeof(Page));
return (pages) * NJ_PAGE_SIZE + main + exit;
}
#undef bytesFromTop
#undef bytesToBottom
#undef byteBetween
Page* Assembler::handoverPages(bool exitPages)
{
Page*& list = (exitPages) ? _nativeExitPages : _nativePages;
NIns*& ins = (exitPages) ? _nExitIns : _nIns;
Page* start = list;
list = 0;
ins = 0;
return start;
}
#ifdef _DEBUG
bool Assembler::onPage(NIns* where, bool exitPages)
{
Page* page = (exitPages) ? _nativeExitPages : _nativePages;
bool on = false;
while(page)
{
if (samepage(where-1,page))
on = true;
page = page->next;
}
return on;
}
void Assembler::pageValidate()
{
if (error()) return;
// _nIns and _nExitIns need to be at least on
// one of these pages
NanoAssertMsg( onPage(_nIns)&& onPage(_nExitIns,true), "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
}
#endif
#ifdef _DEBUG
void Assembler::resourceConsistencyCheck()
{
if (error()) return;
#ifdef NANOJIT_IA32
NanoAssert(_allocator.active[FST0] && _fpuStkDepth == -1 ||
!_allocator.active[FST0] && _fpuStkDepth == 0);
#endif
AR &ar = _activation;
// check AR entries
NanoAssert(ar.highwatermark < NJ_MAX_STACK_ENTRY);
LIns* ins = 0;
RegAlloc* regs = &_allocator;
for(uint32_t i = ar.lowwatermark; i < ar.tos; i++)
{
ins = ar.entry[i];
if ( !ins )
continue;
Reservation *r = getresv(ins);
NanoAssert(r != 0);
int32_t idx = r - _resvTable;
NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a stack location assigned to it");
if (r->arIndex) {
if (ins->isop(LIR_alloc)) {
int j=i+1;
for (int n = i + (ins->size()>>2); j < n; j++) {
NanoAssert(ar.entry[j]==ins);
}
NanoAssert(r->arIndex == (uint32_t)j-1);
i = j-1;
}
else if (ins->isQuad()) {
NanoAssert(ar.entry[i - stack_direction(1)]==ins);
i += 1; // skip high word
}
else {
NanoAssertMsg(r->arIndex == i, "Stack record index mismatch");
}
}
NanoAssertMsg( r->reg==UnknownReg || regs->isConsistent(r->reg,ins), "Register record mismatch");
}
registerConsistencyCheck();
// check resv table
int32_t inuseCount = 0;
int32_t notInuseCount = 0;
for(uint32_t i=1; i < sizeof(_resvTable)/sizeof(_resvTable[0]); i++) {
_resvTable[i].used ? inuseCount++ : notInuseCount++;
}
int32_t freeCount = 0;
uint32_t free = _resvFree;
while(free) {
free = _resvTable[free].arIndex;
freeCount++;
}
NanoAssert( ( freeCount==notInuseCount && inuseCount+notInuseCount==(NJ_MAX_STACK_ENTRY-1) ) );
}
void Assembler::registerConsistencyCheck()
{
// check registers
RegAlloc *regs = &_allocator;
uint32_t managed = regs->managed;
Register r = FirstReg;
while(managed)
{
if (managed&1)
{
if (regs->isFree(r))
{
NanoAssert(regs->getActive(r)==0);
}
else
{
LIns* ins = regs->getActive(r);
// @todo we should be able to check across RegAlloc's somehow (to include savedGP...)
Reservation *v = getresv(ins);
NanoAssert(v != 0);
int32_t idx = v - _resvTable;
NanoAssert(idx >= 0 && idx < NJ_MAX_STACK_ENTRY);
NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a register assigned to it");
NanoAssertMsg( regs->getActive(v->reg)==ins, "Register record mismatch");
}
}
// next register in bitfield
r = nextreg(r);
managed >>= 1;
}
}
#endif /* _DEBUG */
void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Reservation* &resva, LIns* ib, Reservation* &resvb)
{
if (ia == ib)
{
findRegFor(ia, allow);
resva = resvb = getresv(ia);
}
else
{
Register rb = UnknownReg;
resvb = getresv(ib);
if (resvb && (rb = resvb->reg) != UnknownReg) {
if (allow & rmask(rb)) {
// ib already assigned to an allowable reg, keep that one
allow &= ~rmask(rb);
} else {
// ib assigned to unusable reg, pick a different one below.
rb = UnknownReg;
}
}
Register ra = findRegFor(ia, allow);
resva = getresv(ia);
NanoAssert(error() || (resva != 0 && ra != UnknownReg));
if (rb == UnknownReg)
{
allow &= ~rmask(ra);
findRegFor(ib, allow);
resvb = getresv(ib);
}
}
}
Register Assembler::findSpecificRegFor(LIns* i, Register w)
{
return findRegFor(i, rmask(w));
}
Register Assembler::getBaseReg(LIns *i, int &d, RegisterMask allow)
{
if (i->isop(LIR_alloc)) {
d += findMemFor(i);
return FP;
} else {
return findRegFor(i, allow);
}
}
Register Assembler::findRegFor(LIns* i, RegisterMask allow)
{
if (i->isop(LIR_alloc)) {
// never allocate a reg for this w/out stack space too
findMemFor(i);
}
Reservation* resv = getresv(i);
Register r;
// if we have an existing reservation and it has a non-unknown
// register allocated, and that register is in our allowed mask,
// return it.
if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
_allocator.useActive(r);
return r;
}
// figure out what registers are preferred for this instruction
RegisterMask prefer = hint(i, allow);
// if we didn't have a reservation, allocate one now
if (!resv)
resv = reserveAlloc(i);
r = resv->reg;
#ifdef AVMPLUS_IA32
if (r != UnknownReg &&
((rmask(r)&XmmRegs) && !(allow&XmmRegs) ||
(rmask(r)&x87Regs) && !(allow&x87Regs)))
{
// x87 <-> xmm copy required
//_nvprof("fpu-evict",1);
evict(r);
r = UnknownReg;
}
#endif
if (r == UnknownReg)
{
r = resv->reg = registerAlloc(prefer);
_allocator.addActive(r, i);
return r;
}
else
{
// the already-allocated register isn't in the allowed mask;
// we need to grab a new one and then copy over the old
// contents to the new.
resv->reg = UnknownReg;
_allocator.retire(r);
Register s = resv->reg = registerAlloc(prefer);
_allocator.addActive(s, i);
if ((rmask(r) & GpRegs) && (rmask(s) & GpRegs)) {
MR(r, s);
}
else {
asm_nongp_copy(r, s);
}
return s;
}
}
int Assembler::findMemFor(LIns *i)
{
Reservation* resv = getresv(i);
if (!resv)
resv = reserveAlloc(i);
if (!resv->arIndex) {
resv->arIndex = arReserve(i);
NanoAssert(resv->arIndex <= _activation.highwatermark);
}
return disp(resv);
}
Register Assembler::prepResultReg(LIns *i, RegisterMask allow)
{
Reservation* resv = getresv(i);
const bool pop = !resv || resv->reg == UnknownReg;
Register rr = findRegFor(i, allow);
freeRsrcOf(i, pop);
return rr;
}
void Assembler::asm_spilli(LInsp i, Reservation *resv, bool pop)
{
int d = disp(resv);
Register rr = resv->reg;
bool quad = i->opcode() == LIR_param || i->isQuad();
asm_spill(rr, d, pop, quad);
if (d)
{
verbose_only(if (_verbose) {
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
})
}
}
void Assembler::freeRsrcOf(LIns *i, bool pop)
{
Reservation* resv = getresv(i);
int index = resv->arIndex;
Register rr = resv->reg;
if (rr != UnknownReg)
{
asm_spilli(i, resv, pop);
_allocator.retire(rr); // free any register associated with entry
}
if (index)
arFree(index); // free any stack stack space associated with entry
reserveFree(i); // clear fields of entry and add it to free list
}
void Assembler::evict(Register r)
{
registerAlloc(rmask(r));
_allocator.addFree(r);
}
void Assembler::asm_cmp(LIns *cond)
{
LOpcode condop = cond->opcode();
// LIR_ov and LIR_cs recycle the flags set by arithmetic ops
if ((condop == LIR_ov) || (condop == LIR_cs))
return;
LInsp lhs = cond->oprnd1();
LInsp rhs = cond->oprnd2();
Reservation *rA, *rB;
NanoAssert((!lhs->isQuad() && !rhs->isQuad()) || (lhs->isQuad() && rhs->isQuad()));
// Not supported yet.
#if !defined NANOJIT_64BIT
NanoAssert(!lhs->isQuad() && !rhs->isQuad());
#endif
// ready to issue the compare
if (rhs->isconst())
{
int c = rhs->constval();
if (c == 0 && cond->isop(LIR_eq)) {
Register r = findRegFor(lhs, GpRegs);
if (rhs->isQuad()) {
#if defined NANOJIT_64BIT
TESTQ(r, r);
#endif
} else {
TEST(r,r);
}
// No 64-bit immediates so fall-back to below
}
else if (!rhs->isQuad()) {
Register r = getBaseReg(lhs, c, GpRegs);
CMPi(r, c);
}
}
else
{
findRegFor2(GpRegs, lhs, rA, rhs, rB);
Register ra = rA->reg;
Register rb = rB->reg;
if (rhs->isQuad()) {
#if defined NANOJIT_64BIT
CMPQ(ra, rb);
#endif
} else {
CMP(ra, rb);
}
}
}
void Assembler::patch(GuardRecord *lr)
{
Fragment *frag = lr->target;
NanoAssert(frag->fragEntry != 0);
NIns* was = asm_adjustBranch((NIns*)lr->jmp, frag->fragEntry);
if (!lr->origTarget) lr->origTarget = was;
verbose_only(verbose_outputf("patching jump at %p to target %p (was %p)\n",
lr->jmp, frag->fragEntry, was);)
}
void Assembler::unpatch(GuardRecord *lr)
{
NIns* was = asm_adjustBranch((NIns*)lr->jmp, (NIns*)lr->origTarget);
(void)was;
verbose_only(verbose_outputf("unpatching jump at %p to original target %p (was %p)\n",
lr->jmp, lr->origTarget, was);)
}
NIns* Assembler::asm_exit(LInsp guard)
{
SideExit *exit = guard->exit();
NIns* at = 0;
if (!_branchStateMap->get(exit))
{
at = asm_leave_trace(guard);
}
else
{
RegAlloc* captured = _branchStateMap->get(exit);
intersectRegisterState(*captured);
verbose_only(
verbose_outputf(" merging trunk with %s",
_frago->labels->format(exit->target));
verbose_outputf(" %p:",_nIns);
)
at = exit->target->fragEntry;
NanoAssert(at != 0);
_branchStateMap->remove(exit);
}
return at;
}
NIns* Assembler::asm_leave_trace(LInsp guard)
{
verbose_only(bool priorVerbose = _verbose; )
verbose_only( _verbose = verbose_enabled() && _frago->core()->config.verbose_exits; )
verbose_only( int32_t nativeSave = _stats.native );
verbose_only(verbose_outputf("--------------------------------------- end exit block SID %d", guard->exit()->sid);)
RegAlloc capture = _allocator;
// this point is unreachable. so free all the registers.
// if an instruction has a stack entry we will leave it alone,
// otherwise we free it entirely. intersectRegisterState will restore.
releaseRegisters();
swapptrs();
_inExit = true;
//verbose_only( verbose_outputf(" LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
nFragExit(guard);
// restore the callee-saved register (aka saved params)
assignSavedParams();
// if/when we patch this exit to jump over to another fragment,
// that fragment will need its parameters set up just like ours.
LInsp stateins = _thisfrag->lirbuf->state;
Register state = findSpecificRegFor(stateins, argRegs[stateins->imm8()]);
asm_bailout(guard, state);
intersectRegisterState(capture);
// this can be useful for breaking whenever an exit is taken
//INT3();
//NOP();
// we are done producing the exit logic for the guard so demark where our exit block code begins
NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump
// swap back pointers, effectively storing the last location used in the exit path
swapptrs();
_inExit = false;
//verbose_only( verbose_outputf(" LIR_xt/xf swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
verbose_only( verbose_outputf(" %p:",jmpTarget);)
verbose_only( verbose_outputf("--------------------------------------- exit block (LIR_xt|LIR_xf)") );
#ifdef NANOJIT_IA32
NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d\n",_fpuStkDepth, _sv_fpuStkDepth);
debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
#endif
verbose_only( _verbose = priorVerbose; )
verbose_only(_stats.exitnative += (_stats.native-nativeSave));
return jmpTarget;
}
void Assembler::beginAssembly(Fragment *frag, RegAllocMap* branchStateMap)
{
_thisfrag = frag;
_activation.lowwatermark = 1;
_activation.tos = _activation.lowwatermark;
_activation.highwatermark = _activation.tos;
counter_reset(native);
counter_reset(exitnative);
counter_reset(steals);
counter_reset(spills);
counter_reset(remats);
setError(None);
// native code gen buffer setup
nativePageSetup();
#ifdef AVMPLUS_PORTING_API
_endJit1Addr = _nIns;
_endJit2Addr = _nExitIns;
#endif
// make sure we got memory at least one page
if (error()) return;
#ifdef PERFM
_stats.pages = 0;
_stats.codeStart = _nIns-1;
_stats.codeExitStart = _nExitIns-1;
//fprintf(stderr,"pageReset %d start %x exit start %x\n", _stats.pages, (int)_stats.codeStart, (int)_stats.codeExitStart);
#endif /* PERFM */
_epilogue = genEpilogue();
_branchStateMap = branchStateMap;
_labels.clear();
_patches.clear();
verbose_only( verbose_outputf(" %p:",_nIns) );
verbose_only( verbose_output(" epilogue:") );
}
void Assembler::assemble(Fragment* frag, NInsList& loopJumps)
{
if (error()) return;
AvmCore *core = _frago->core();
_thisfrag = frag;
// set up backwards pipeline: assembler -> StackFilter -> LirReader
LirReader bufreader(frag->lastIns);
GC *gc = core->gc;
StackFilter storefilter1(&bufreader, gc, frag->lirbuf, frag->lirbuf->sp);
StackFilter storefilter2(&storefilter1, gc, frag->lirbuf, frag->lirbuf->rp);
DeadCodeFilter deadfilter(&storefilter2, frag->lirbuf->_functions);
LirFilter* rdr = &deadfilter;
verbose_only(
VerboseBlockReader vbr(rdr, this, frag->lirbuf->names);
if (verbose_enabled())
rdr = &vbr;
)
verbose_only(_thisfrag->compileNbr++; )
verbose_only(_frago->_stats.compiles++; )
verbose_only(_frago->_stats.totalCompiles++; )
_latestGuard = 0;
_inExit = false;
gen(rdr, loopJumps);
frag->fragEntry = _nIns;
frag->outbound = core->config.tree_opt? _latestGuard : 0;
//fprintf(stderr, "assemble frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
if (!error()) {
// patch all branches
while(!_patches.isEmpty())
{
NIns* where = _patches.lastKey();
LInsp targ = _patches.removeLast();
LabelState *label = _labels.get(targ);
NIns* ntarg = label->addr;
if (ntarg) {
nPatchBranch(where,ntarg);
}
else {
_err = UnknownBranch;
break;
}
}
}
}
void Assembler::endAssembly(Fragment* frag, NInsList& loopJumps)
{
while(!loopJumps.isEmpty())
{
NIns* loopJump = (NIns*)loopJumps.removeLast();
nPatchBranch(loopJump, _nIns);
}
NIns* patchEntry = 0;
if (!error())
{
patchEntry = genPrologue();
verbose_only( verbose_outputf(" %p:",_nIns); )
verbose_only( verbose_output(" prologue"); )
}
// something bad happened?
if (!error())
{
// check for resource leaks
debug_only(
for(uint32_t i=_activation.lowwatermark;i<_activation.highwatermark; i++) {
NanoAssertMsgf(_activation.entry[i] == 0, "frame entry %d wasn't freed\n",-4*i);
}
)
frag->fragEntry = patchEntry;
NIns* code = _nIns;
#ifdef PERFM
_nvprof("code", codeBytes()); // requires that all pages are released between begin/endAssembly()otherwise we double count
#endif
// let the fragment manage the pages if we're using trees and there are branches
Page* manage = (_frago->core()->config.tree_opt) ? handoverPages() : 0;
frag->setCode(code, manage); // root of tree should manage all pages
//fprintf(stderr, "endAssembly frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
}
NanoAssertMsgf(error() || _fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
internalReset(); // clear the reservation tables and regalloc
NanoAssert(_branchStateMap->isEmpty());
_branchStateMap = 0;
#ifdef AVMPLUS_ARM
// If we've modified the code, we need to flush so we don't end up trying
// to execute junk
# if defined(UNDER_CE)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
# elif defined(AVMPLUS_UNIX)
for (int i = 0; i < 2; i++) {
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
Page *first = p;
while (p) {
if (!p->next || p->next != p+1) {
__clear_cache((char*)first, (char*)(p+1));
first = p->next;
}
p = p->next;
}
}
# endif
#endif
# ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
# endif
}
void Assembler::copyRegisters(RegAlloc* copyTo)
{
*copyTo = _allocator;
}
void Assembler::releaseRegisters()
{
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
{
LIns *i = _allocator.getActive(r);
if (i)
{
// clear reg allocation, preserve stack allocation.
Reservation* resv = getresv(i);
NanoAssert(resv != 0);
_allocator.retire(r);
if (r == resv->reg)
resv->reg = UnknownReg;
if (!resv->arIndex && resv->reg == UnknownReg)
{
reserveFree(i);
}
}
}
}
#ifdef PERFM
#define countlir_live() _nvprof("lir-live",1)
#define countlir_ret() _nvprof("lir-ret",1)
#define countlir_alloc() _nvprof("lir-alloc",1)
#define countlir_var() _nvprof("lir-var",1)
#define countlir_use() _nvprof("lir-use",1)
#define countlir_def() _nvprof("lir-def",1)
#define countlir_imm() _nvprof("lir-imm",1)
#define countlir_param() _nvprof("lir-param",1)
#define countlir_cmov() _nvprof("lir-cmov",1)
#define countlir_ld() _nvprof("lir-ld",1)
#define countlir_ldq() _nvprof("lir-ldq",1)
#define countlir_alu() _nvprof("lir-alu",1)
#define countlir_qjoin() _nvprof("lir-qjoin",1)
#define countlir_qlo() _nvprof("lir-qlo",1)
#define countlir_qhi() _nvprof("lir-qhi",1)
#define countlir_fpu() _nvprof("lir-fpu",1)
#define countlir_st() _nvprof("lir-st",1)
#define countlir_stq() _nvprof("lir-stq",1)
#define countlir_jmp() _nvprof("lir-jmp",1)
#define countlir_jcc() _nvprof("lir-jcc",1)
#define countlir_label() _nvprof("lir-label",1)
#define countlir_xcc() _nvprof("lir-xcc",1)
#define countlir_x() _nvprof("lir-x",1)
#define countlir_loop() _nvprof("lir-loop",1)
#define countlir_call() _nvprof("lir-call",1)
#else
#define countlir_live()
#define countlir_ret()
#define countlir_alloc()
#define countlir_var()
#define countlir_use()
#define countlir_def()
#define countlir_imm()
#define countlir_param()
#define countlir_cmov()
#define countlir_ld()
#define countlir_ldq()
#define countlir_alu()
#define countlir_qjoin()
#define countlir_qlo()
#define countlir_qhi()
#define countlir_fpu()
#define countlir_st()
#define countlir_stq()
#define countlir_jmp()
#define countlir_jcc()
#define countlir_label()
#define countlir_xcc()
#define countlir_x()
#define countlir_loop()
#define countlir_call()
#endif
void Assembler::gen(LirFilter* reader, NInsList& loopJumps)
{
// trace must start with LIR_x or LIR_loop
NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop));
for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read())
{
LOpcode op = ins->opcode();
switch(op)
{
default:
NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64);
break;
case LIR_live: {
countlir_live();
pending_lives.add(ins->oprnd1());
break;
}
case LIR_ret: {
countlir_ret();
if (_nIns != _epilogue) {
JMP(_epilogue);
}
assignSavedParams();
findSpecificRegFor(ins->oprnd1(), retRegs[0]);
break;
}
case LIR_fret: {
countlir_ret();
if (_nIns != _epilogue) {
JMP(_epilogue);
}
assignSavedParams();
#ifdef NANOJIT_IA32
findSpecificRegFor(ins->oprnd1(), FST0);
#else
NanoAssert(false);
#endif
fpu_pop();
break;
}
// allocate some stack space. the value of this instruction
// is the address of the stack space.
case LIR_alloc: {
countlir_alloc();
Reservation *resv = getresv(ins);
NanoAssert(resv->arIndex != 0);
Register r = resv->reg;
if (r != UnknownReg) {
_allocator.retire(r);
resv->reg = UnknownReg;
asm_restore(ins, resv, r);
}
freeRsrcOf(ins, 0);
break;
}
case LIR_short:
case LIR_int:
{
countlir_imm();
Register rr = prepResultReg(ins, GpRegs);
int32_t val;
if (op == LIR_int)
val = ins->imm32();
else
val = ins->imm16();
if (val == 0)
XOR(rr,rr);
else
LDi(rr, val);
break;
}
case LIR_quad:
{
countlir_imm();
asm_quad(ins);
break;
}
#if !defined NANOJIT_64BIT
case LIR_callh:
{
// return result of quad-call in register
prepResultReg(ins, rmask(retRegs[1]));
// if hi half was used, we must use the call to ensure it happens
findRegFor(ins->oprnd1(), rmask(retRegs[0]));
break;
}
#endif
case LIR_param:
{
countlir_param();
uint32_t a = ins->imm8();
uint32_t kind = ins->imm8b();
if (kind == 0) {
// ordinary param
AbiKind abi = _thisfrag->lirbuf->abi;
uint32_t abi_regcount = abi == ABI_FASTCALL ? 2 : abi == ABI_THISCALL ? 1 : 0;
if (a < abi_regcount) {
// incoming arg in register
prepResultReg(ins, rmask(argRegs[a]));
} else {
// incoming arg is on stack, and EBP points nearby (see genPrologue)
Register r = prepResultReg(ins, GpRegs);
int d = (a - abi_regcount) * sizeof(intptr_t) + 8;
LD(r, d, FP);
}
}
else {
// saved param
prepResultReg(ins, rmask(savedRegs[a]));
}
break;
}
case LIR_qlo:
{
countlir_qlo();
LIns *q = ins->oprnd1();
if (!asm_qlo(ins, q))
{
Register rr = prepResultReg(ins, GpRegs);
int d = findMemFor(q);
LD(rr, d, FP);
}
break;
}
case LIR_qhi:
{
countlir_qhi();
Register rr = prepResultReg(ins, GpRegs);
LIns *q = ins->oprnd1();
int d = findMemFor(q);
LD(rr, d+4, FP);
break;
}
case LIR_qcmov:
case LIR_cmov:
{
countlir_cmov();
LIns* condval = ins->oprnd1();
NanoAssert(condval->isCmp());
LIns* values = ins->oprnd2();
NanoAssert(values->opcode() == LIR_2);
LIns* iftrue = values->oprnd1();
LIns* iffalse = values->oprnd2();
NanoAssert(op == LIR_qcmov || (!iftrue->isQuad() && !iffalse->isQuad()));
const Register rr = prepResultReg(ins, GpRegs);
// this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
// (This is true on Intel, is it true on all architectures?)
const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr));
if (op == LIR_cmov) {
switch (condval->opcode())
{
// note that these are all opposites...
case LIR_eq: MRNE(rr, iffalsereg); break;
case LIR_ov: MRNO(rr, iffalsereg); break;
case LIR_cs: MRNC(rr, iffalsereg); break;
case LIR_lt: MRGE(rr, iffalsereg); break;
case LIR_le: MRG(rr, iffalsereg); break;
case LIR_gt: MRLE(rr, iffalsereg); break;
case LIR_ge: MRL(rr, iffalsereg); break;
case LIR_ult: MRAE(rr, iffalsereg); break;
case LIR_ule: MRA(rr, iffalsereg); break;
case LIR_ugt: MRBE(rr, iffalsereg); break;
case LIR_uge: MRB(rr, iffalsereg); break;
debug_only( default: NanoAssert(0); break; )
}
} else if (op == LIR_qcmov) {
#if !defined NANOJIT_64BIT
NanoAssert(0);
#else
switch (condval->opcode())
{
// note that these are all opposites...
case LIR_eq: MRQNE(rr, iffalsereg); break;
case LIR_ov: MRQNO(rr, iffalsereg); break;
case LIR_cs: MRQNC(rr, iffalsereg); break;
case LIR_lt: MRQGE(rr, iffalsereg); break;
case LIR_le: MRQG(rr, iffalsereg); break;
case LIR_gt: MRQLE(rr, iffalsereg); break;
case LIR_ge: MRQL(rr, iffalsereg); break;
case LIR_ult: MRQAE(rr, iffalsereg); break;
case LIR_ule: MRQA(rr, iffalsereg); break;
case LIR_ugt: MRQBE(rr, iffalsereg); break;
case LIR_uge: MRQB(rr, iffalsereg); break;
debug_only( default: NanoAssert(0); break; )
}
#endif
}
/*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);
asm_cmp(condval);
break;
}
case LIR_ld:
case LIR_ldc:
case LIR_ldcb:
{
countlir_ld();
LIns* base = ins->oprnd1();
LIns* disp = ins->oprnd2();
Register rr = prepResultReg(ins, GpRegs);
int d = disp->constval();
Register ra = getBaseReg(base, d, GpRegs);
if (op == LIR_ldcb)
LD8Z(rr, d, ra);
else
LD(rr, d, ra);
break;
}
case LIR_ldq:
case LIR_ldqc:
{
countlir_ldq();
asm_load64(ins);
break;
}
case LIR_neg:
case LIR_not:
{
countlir_alu();
Register rr = prepResultReg(ins, GpRegs);
LIns* lhs = ins->oprnd1();
Reservation *rA = getresv(lhs);
// if this is last use of lhs in reg, we can re-use result reg
Register ra;
if (rA == 0 || (ra=rA->reg) == UnknownReg)
ra = findSpecificRegFor(lhs, rr);
// else, rA already has a register assigned.
if (op == LIR_not)
NOT(rr);
else
NEG(rr);
if ( rr != ra )
MR(rr,ra);
break;
}
case LIR_qjoin:
{
countlir_qjoin();
asm_qjoin(ins);
break;
}
#if defined NANOJIT_64BIT
case LIR_qiadd:
case LIR_qiand:
case LIR_qilsh:
case LIR_qior:
{
asm_qbinop(ins);
break;
}
#endif
case LIR_add:
case LIR_addp:
case LIR_sub:
case LIR_mul:
case LIR_and:
case LIR_or:
case LIR_xor:
case LIR_lsh:
case LIR_rsh:
case LIR_ush:
{
countlir_alu();
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
Register rb = UnknownReg;
RegisterMask allow = GpRegs;
bool forceReg = (op == LIR_mul || !rhs->isconst());
#ifdef NANOJIT_ARM
// Arm can't do an immediate op with immediates
// outside of +/-255 (for AND) r outside of
// 0..255 for others.
if (!forceReg)
{
if (rhs->isconst() && !isU8(rhs->constval()))
forceReg = true;
}
#endif
if (lhs != rhs && forceReg)
{
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
rb = findRegFor(rhs, allow);
}
allow &= ~rmask(rb);
}
else if ((op == LIR_add||op == LIR_addp) && lhs->isop(LIR_alloc) && rhs->isconst()) {
// add alloc+const, use lea
Register rr = prepResultReg(ins, allow);
int d = findMemFor(lhs) + rhs->constval();
LEA(rr, d, FP);
break;
}
Register rr = prepResultReg(ins, allow);
Reservation* rA = getresv(lhs);
Register ra;
// if this is last use of lhs in reg, we can re-use result reg
if (rA == 0 || (ra = rA->reg) == UnknownReg)
ra = findSpecificRegFor(lhs, rr);
// else, rA already has a register assigned.
if (forceReg)
{
if (lhs == rhs)
rb = ra;
if (op == LIR_add || op == LIR_addp)
ADD(rr, rb);
else if (op == LIR_sub)
SUB(rr, rb);
else if (op == LIR_mul)
MUL(rr, rb);
else if (op == LIR_and)
AND(rr, rb);
else if (op == LIR_or)
OR(rr, rb);
else if (op == LIR_xor)
XOR(rr, rb);
else if (op == LIR_lsh)
SHL(rr, rb);
else if (op == LIR_rsh)
SAR(rr, rb);
else if (op == LIR_ush)
SHR(rr, rb);
else
NanoAssertMsg(0, "Unsupported");
}
else
{
int c = rhs->constval();
if (op == LIR_add || op == LIR_addp) {
#ifdef NANOJIT_IA32_TODO
if (ra != rr) {
// this doesn't set cc's, only use it when cc's not required.
LEA(rr, c, ra);
ra = rr; // suppress mov
} else
#endif
{
ADDi(rr, c);
}
} else if (op == LIR_sub) {
#ifdef NANOJIT_IA32
if (ra != rr) {
LEA(rr, -c, ra);
ra = rr;
} else
#endif
{
SUBi(rr, c);
}
} else if (op == LIR_and)
ANDi(rr, c);
else if (op == LIR_or)
ORi(rr, c);
else if (op == LIR_xor)
XORi(rr, c);
else if (op == LIR_lsh)
SHLi(rr, c);
else if (op == LIR_rsh)
SARi(rr, c);
else if (op == LIR_ush)
SHRi(rr, c);
else
NanoAssertMsg(0, "Unsupported");
}
if ( rr != ra )
MR(rr,ra);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_fneg:
{
countlir_fpu();
asm_fneg(ins);
break;
}
case LIR_fadd:
case LIR_fsub:
case LIR_fmul:
case LIR_fdiv:
{
countlir_fpu();
asm_fop(ins);
break;
}
case LIR_i2f:
{
countlir_fpu();
asm_i2f(ins);
break;
}
case LIR_u2f:
{
countlir_fpu();
asm_u2f(ins);
break;
}
#endif // NJ_SOFTFLOAT
case LIR_st:
case LIR_sti:
{
countlir_st();
asm_store32(ins->oprnd1(), ins->immdisp(), ins->oprnd2());
break;
}
case LIR_stq:
case LIR_stqi:
{
countlir_stq();
LIns* value = ins->oprnd1();
LIns* base = ins->oprnd2();
int dr = ins->immdisp();
if (value->isop(LIR_qjoin))
{
// this is correct for little-endian only
asm_store32(value->oprnd1(), dr, base);
asm_store32(value->oprnd2(), dr+4, base);
}
else
{
asm_store64(value, dr, base);
}
break;
}
case LIR_j:
{
countlir_jmp();
LInsp to = ins->getTarget();
LabelState *label = _labels.get(to);
// the jump is always taken so whatever register state we
// have from downstream code, is irrelevant to code before
// this jump. so clear it out. we will pick up register
// state from the jump target, if we have seen that label.
releaseRegisters();
if (label && label->addr) {
// forward jump - pick up register state from target.
unionRegisterState(label->regs);
JMP(label->addr);
}
else {
// backwards jump
hasLoop = true;
handleLoopCarriedExprs();
if (!label) {
// save empty register state at loop header
_labels.add(to, 0, _allocator);
}
else {
intersectRegisterState(label->regs);
}
JMP(0);
_patches.put(_nIns, to);
verbose_only(
verbose_outputf(" Loop %s -> %s",
lirNames[ins->opcode()],
_thisfrag->lirbuf->names->formatRef(to));
)
}
break;
}
case LIR_jt:
case LIR_jf:
{
countlir_jcc();
LInsp to = ins->getTarget();
LIns* cond = ins->oprnd1();
LabelState *label = _labels.get(to);
if (label && label->addr) {
// forward jump to known label. need to merge with label's register state.
unionRegisterState(label->regs);
asm_branch(op == LIR_jf, cond, label->addr);
}
else {
// back edge.
hasLoop = true;
handleLoopCarriedExprs();
if (!label) {
// evict all registers, most conservative approach.
evictRegs(~_allocator.free);
_labels.add(to, 0, _allocator);
}
else {
// evict all registers, most conservative approach.
intersectRegisterState(label->regs);
}
NIns *branch = asm_branch(op == LIR_jf, cond, 0);
_patches.put(branch,to);
verbose_only(
verbose_outputf("Loop %s -> %s",
lirNames[ins->opcode()],
_thisfrag->lirbuf->names->formatRef(to));
)
}
break;
}
case LIR_label:
{
countlir_label();
LabelState *label = _labels.get(ins);
if (!label) {
// label seen first, normal target of forward jump, save addr & allocator
_labels.add(ins, _nIns, _allocator);
}
else {
// we're at the top of a loop
hasLoop = true;
NanoAssert(label->addr == 0 && label->regs.isValid());
//evictRegs(~_allocator.free);
intersectRegisterState(label->regs);
//asm_align_code();
label->addr = _nIns;
verbose_only(
verbose_outputf("Loop %s", _thisfrag->lirbuf->names->formatRef(ins));
)
}
break;
}
case LIR_xt:
case LIR_xf:
{
countlir_xcc();
// we only support cmp with guard right now, also assume it is 'close' and only emit the branch
NIns* exit = asm_exit(ins); // does intersectRegisterState()
LIns* cond = ins->oprnd1();
asm_branch(op == LIR_xf, cond, exit);
break;
}
case LIR_x:
{
countlir_x();
verbose_only(verbose_output(""));
// generate the side exit branch on the main trace.
NIns *exit = asm_exit(ins);
JMP( exit );
break;
}
case LIR_loop:
{
countlir_loop();
JMP_long_placeholder(); // jump to SOT
verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf(" jmp SOT"); } );
loopJumps.add(_nIns);
#ifdef NJ_VERBOSE
// branching from this frag to ourself.
if (_frago->core()->config.show_stats)
#if defined NANOJIT_AMD64
LDQi(argRegs[1], intptr_t((Fragment*)_thisfrag));
#else
LDi(argRegs[1], int((Fragment*)_thisfrag));
#endif
#endif
assignSavedParams();
// restore first parameter, the only one we use
LInsp state = _thisfrag->lirbuf->state;
findSpecificRegFor(state, argRegs[state->imm8()]);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_feq:
case LIR_fle:
case LIR_flt:
case LIR_fgt:
case LIR_fge:
{
countlir_fpu();
// only want certain regs
Register r = prepResultReg(ins, AllowableFlagRegs);
asm_setcc(r, ins);
#ifdef NJ_ARM_VFP
SETE(r);
#else
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
SETNP(r);
#endif
asm_fcmp(ins);
break;
}
#endif
case LIR_eq:
case LIR_ov:
case LIR_cs:
case LIR_le:
case LIR_lt:
case LIR_gt:
case LIR_ge:
case LIR_ult:
case LIR_ule:
case LIR_ugt:
case LIR_uge:
{
countlir_alu();
// only want certain regs
Register r = prepResultReg(ins, AllowableFlagRegs);
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
if (op == LIR_eq)
SETE(r);
else if (op == LIR_ov)
SETO(r);
else if (op == LIR_cs)
SETC(r);
else if (op == LIR_lt)
SETL(r);
else if (op == LIR_le)
SETLE(r);
else if (op == LIR_gt)
SETG(r);
else if (op == LIR_ge)
SETGE(r);
else if (op == LIR_ult)
SETB(r);
else if (op == LIR_ule)
SETBE(r);
else if (op == LIR_ugt)
SETA(r);
else // if (op == LIR_uge)
SETAE(r);
asm_cmp(ins);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_fcall:
case LIR_fcalli:
#endif
#if defined NANOJIT_64BIT
case LIR_callh:
#endif
case LIR_call:
case LIR_calli:
{
countlir_call();
Register rr = UnknownReg;
#ifndef NJ_SOFTFLOAT
if ((op&LIR64))
{
// fcall or fcalli
Reservation* rR = getresv(ins);
rr = asm_prep_fcall(rR, ins);
}
else
#endif
{
rr = retRegs[0];
prepResultReg(ins, rmask(rr));
}
// do this after we've handled the call result, so we dont
// force the call result to be spilled unnecessarily.
evictScratchRegs();
asm_call(ins);
}
}
// check that all is well (don't check in exit paths since its more complicated)
debug_only( pageValidate(); )
debug_only( resourceConsistencyCheck(); )
}
}
NIns* Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ)
{
NIns* at = 0;
LOpcode condop = cond->opcode();
NanoAssert(cond->isCond());
#ifndef NJ_SOFTFLOAT
if (condop >= LIR_feq && condop <= LIR_fge)
{
return asm_jmpcc(branchOnFalse, cond, targ);
}
#endif
// produce the branch
if (branchOnFalse)
{
if (condop == LIR_eq)
JNE(targ);
else if (condop == LIR_ov)
JNO(targ);
else if (condop == LIR_cs)
JNC(targ);
else if (condop == LIR_lt)
JNL(targ);
else if (condop == LIR_le)
JNLE(targ);
else if (condop == LIR_gt)
JNG(targ);
else if (condop == LIR_ge)
JNGE(targ);
else if (condop == LIR_ult)
JNB(targ);
else if (condop == LIR_ule)
JNBE(targ);
else if (condop == LIR_ugt)
JNA(targ);
else //if (condop == LIR_uge)
JNAE(targ);
}
else // op == LIR_xt
{
if (condop == LIR_eq)
JE(targ);
else if (condop == LIR_ov)
JO(targ);
else if (condop == LIR_cs)
JC(targ);
else if (condop == LIR_lt)
JL(targ);
else if (condop == LIR_le)
JLE(targ);
else if (condop == LIR_gt)
JG(targ);
else if (condop == LIR_ge)
JGE(targ);
else if (condop == LIR_ult)
JB(targ);
else if (condop == LIR_ule)
JBE(targ);
else if (condop == LIR_ugt)
JA(targ);
else //if (condop == LIR_uge)
JAE(targ);
}
at = _nIns;
asm_cmp(cond);
return at;
}
void Assembler::assignSavedParams()
{
// restore saved regs
releaseRegisters();
LirBuffer *b = _thisfrag->lirbuf;
for (int i=0, n = NumSavedRegs; i < n; i++) {
LIns *p = b->savedParams[i];
if (p)
findSpecificRegFor(p, savedRegs[p->imm8()]);
}
}
void Assembler::reserveSavedParams()
{
LirBuffer *b = _thisfrag->lirbuf;
for (int i=0, n = NumSavedRegs; i < n; i++) {
LIns *p = b->savedParams[i];
if (p)
findMemFor(p);
}
}
void Assembler::handleLoopCarriedExprs()
{
// ensure that exprs spanning the loop are marked live at the end of the loop
reserveSavedParams();
for (int i=0, n=pending_lives.size(); i < n; i++) {
findMemFor(pending_lives[i]);
}
}
void Assembler::arFree(uint32_t idx)
{
AR &ar = _activation;
LIns *i = ar.entry[idx];
NanoAssert(i != 0);
do {
ar.entry[idx] = 0;
idx--;
} while (ar.entry[idx] == i);
}
#ifdef NJ_VERBOSE
void Assembler::printActivationState()
{
bool verbose_activation = false;
if (!verbose_activation)
return;
#ifdef NANOJIT_ARM
verbose_only(
if (_verbose) {
char* s = &outline[0];
memset(s, ' ', 51); s[51] = '\0';
s += strlen(s);
sprintf(s, " SP ");
s += strlen(s);
for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) {
LInsp ins = _activation.entry[i];
if (ins && ins !=_activation.entry[i+1]) {
sprintf(s, "%d(%s) ", 4*i, _thisfrag->lirbuf->names->formatRef(ins));
s += strlen(s);
}
}
output(&outline[0]);
}
)
#else
verbose_only(
char* s = &outline[0];
if (_verbose) {
memset(s, ' ', 51); s[51] = '\0';
s += strlen(s);
sprintf(s, " ebp ");
s += strlen(s);
for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) {
LInsp ins = _activation.entry[i];
if (ins /* && _activation.entry[i]!=_activation.entry[i+1]*/) {
sprintf(s, "%d(%s) ", -4*i,_thisfrag->lirbuf->names->formatRef(ins));
s += strlen(s);
}
}
output(&outline[0]);
}
)
#endif
}
#endif
bool canfit(int32_t size, int32_t loc, AR &ar) {
for (int i=0; i < size; i++) {
if (ar.entry[loc+stack_direction(i)])
return false;
}
return true;
}
uint32_t Assembler::arReserve(LIns* l)
{
NanoAssert(!l->isTramp());
//verbose_only(printActivationState());
int32_t size = l->isop(LIR_alloc) ? (l->size()>>2) : l->isQuad() ? 2 : sizeof(intptr_t)>>2;
AR &ar = _activation;
const int32_t tos = ar.tos;
int32_t start = ar.lowwatermark;
int32_t i = 0;
NanoAssert(start>0);
if (size == 1) {
// easy most common case -- find a hole, or make the frame bigger
for (i=start; i < NJ_MAX_STACK_ENTRY; i++) {
if (ar.entry[i] == 0) {
// found a hole
ar.entry[i] = l;
break;
}
}
}
else if (size == 2) {
if ( (start&1)==1 ) start++; // even 8 boundary
for (i=start; i < NJ_MAX_STACK_ENTRY; i+=2) {
if ( (ar.entry[i+stack_direction(1)] == 0) && (i==tos || (ar.entry[i] == 0)) ) {
// found 2 adjacent aligned slots
NanoAssert(_activation.entry[i] == 0);
NanoAssert(_activation.entry[i+stack_direction(1)] == 0);
ar.entry[i] = l;
ar.entry[i+stack_direction(1)] = l;
break;
}
}
}
else {
// alloc larger block on 8byte boundary.
if (start < size) start = size;
if ((start&1)==1) start++;
for (i=start; i < NJ_MAX_STACK_ENTRY; i+=2) {
if (canfit(size, i, ar)) {
// place the entry in the table and mark the instruction with it
for (int32_t j=0; j < size; j++) {
NanoAssert(_activation.entry[i+stack_direction(j)] == 0);
_activation.entry[i+stack_direction(j)] = l;
}
break;
}
}
}
if (i >= (int32_t)ar.tos) {
ar.tos = ar.highwatermark = i+1;
}
if (tos+size >= NJ_MAX_STACK_ENTRY) {
setError(StackFull);
}
return i;
}
/**
* move regs around so the SavedRegs contains the highest priority regs.
*/
void Assembler::evictScratchRegs()
{
// find the top GpRegs that are candidates to put in SavedRegs
// tosave is a binary heap stored in an array. the root is tosave[0],
// left child is at i+1, right child is at i+2.
Register tosave[LastReg-FirstReg+1];
int len=0;
RegAlloc *regs = &_allocator;
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
if (rmask(r) & GpRegs) {
LIns *i = regs->getActive(r);
if (i) {
if (canRemat(i)) {
evict(r);
}
else {
int32_t pri = regs->getPriority(r);
// add to heap by adding to end and bubbling up
int j = len++;
while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
tosave[j] = tosave[j/2];
j /= 2;
}
NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
tosave[j] = r;
}
}
}
}
// now primap has the live exprs in priority order.
// allocate each of the top priority exprs to a SavedReg
RegisterMask allow = SavedRegs;
while (allow && len > 0) {
// get the highest priority var
Register hi = tosave[0];
LIns *i = regs->getActive(hi);
Register r = findRegFor(i, allow);
allow &= ~rmask(r);
// remove from heap by replacing root with end element and bubbling down.
if (allow && --len > 0) {
Register last = tosave[len];
int j = 0;
while (j+1 < len) {
int child = j+1;
if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
child++;
if (regs->getPriority(last) > regs->getPriority(tosave[child]))
break;
tosave[j] = tosave[child];
j = child;
}
tosave[j] = last;
}
}
// now evict everything else.
evictRegs(~SavedRegs);
}
void Assembler::evictRegs(RegisterMask regs)
{
// generate code to restore callee saved registers
// @todo speed this up
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
if ((rmask(r) & regs) && _allocator.getActive(r)) {
evict(r);
}
}
}
/**
* Merge the current state of the registers with a previously stored version
* current == saved skip
* current & saved evict current, keep saved
* current & !saved evict current (unionRegisterState would keep)
* !current & saved keep saved
*/
void Assembler::intersectRegisterState(RegAlloc& saved)
{
// evictions and pops first
RegisterMask skip = 0;
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
LIns * curins = _allocator.getActive(r);
LIns * savedins = saved.getActive(r);
if (curins == savedins)
{
verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); )
skip |= rmask(r);
}
else
{
if (curins) {
//_nvprof("intersect-evict",1);
evict(r);
}
#ifdef NANOJIT_IA32
if (savedins && (rmask(r) & x87Regs))
FSTP(r);
#endif
}
}
assignSaved(saved, skip);
}
/**
* Merge the current state of the registers with a previously stored version.
*
* current == saved skip
* current & saved evict current, keep saved
* current & !saved keep current (intersectRegisterState would evict)
* !current & saved keep saved
*/
void Assembler::unionRegisterState(RegAlloc& saved)
{
// evictions and pops first
RegisterMask skip = 0;
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
LIns * curins = _allocator.getActive(r);
LIns * savedins = saved.getActive(r);
if (curins == savedins)
{
verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); )
skip |= rmask(r);
}
else
{
if (curins && savedins) {
//_nvprof("union-evict",1);
evict(r);
}
#ifdef NANOJIT_IA32
if (rmask(r) & x87Regs) {
if (savedins) {
FSTP(r);
}
else {
// saved state did not have fpu reg allocated,
// so we must evict here to keep x87 stack balanced.
evict(r);
}
}
#endif
}
}
assignSaved(saved, skip);
}
void Assembler::assignSaved(RegAlloc &saved, RegisterMask skip)
{
// now reassign mainline registers
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
LIns *i = saved.getActive(r);
if (i && !(skip&rmask(r)))
findSpecificRegFor(i, r);
}
debug_only(saved.used = 0); // marker that we are no longer in exit path
}
/**
* Guard records are laid out in the exit block buffer (_nInsExit),
* intersperced with the code. Preceding the record are the native
* instructions associated with the record (i.e. the exit code).
*
* The layout is as follows:
*
* [ native code ] [ GuardRecord1 ]
* ...
* [ native code ] [ GuardRecordN ]
*
* The guard record 'code' field should be used to locate
* the start of the native code associated with the
* exit block. N.B the code may lie in a different page
* than the guard record
*
* The last guard record is used for the unconditional jump
* at the end of the trace.
*
* NOTE: It is also not guaranteed that the native code
* is contained on a single page.
*/
GuardRecord* Assembler::placeGuardRecord(LInsp guard)
{
// we align the guards to 4Byte boundary
size_t size = GuardRecordSize(guard);
SideExit *exit = guard->exit();
NIns* ptr = (NIns*)alignTo(_nIns-size, 4);
underrunProtect( (intptr_t)_nIns-(intptr_t)ptr ); // either got us a new page or there is enough space for us
GuardRecord* rec = (GuardRecord*) alignTo(_nIns-size,4);
rec->outgoing = _latestGuard;
_latestGuard = rec;
_nIns = (NIns*)rec;
rec->next = 0;
rec->origTarget = 0;
rec->target = exit->target;
rec->from = _thisfrag;
initGuardRecord(guard,rec);
if (exit->target)
exit->target->addLink(rec);
return rec;
}
void Assembler::setCallTable(const CallInfo* functions)
{
_functions = functions;
}
#ifdef NJ_VERBOSE
char Assembler::outline[8192];
void Assembler::outputf(const char* format, ...)
{
va_list args;
va_start(args, format);
outline[0] = '\0';
vsprintf(outline, format, args);
output(outline);
}
void Assembler::output(const char* s)
{
if (_outputCache)
{
char* str = (char*)_gc->Alloc(strlen(s)+1);
strcpy(str, s);
_outputCache->add(str);
}
else
{
_frago->core()->console << s << "\n";
}
}
void Assembler::output_asm(const char* s)
{
if (!verbose_enabled())
return;
if (*s != '^')
output(s);
}
char* Assembler::outputAlign(char *s, int col)
{
int len = strlen(s);
int add = ((col-len)>0) ? col-len : 1;
memset(&s[len], ' ', add);
s[col] = '\0';
return &s[col];
}
#endif // verbose
#endif /* FEATURE_NANOJIT */
#if defined(FEATURE_NANOJIT) || defined(NJ_VERBOSE)
uint32_t CallInfo::_count_args(uint32_t mask) const
{
uint32_t argc = 0;
uint32_t argt = _argtypes;
for (uint32_t i = 0; i < MAXARGS; ++i) {
argt >>= 2;
argc += (argt & mask) != 0;
}
return argc;
}
uint32_t CallInfo::get_sizes(ArgSize* sizes) const
{
uint32_t argt = _argtypes;
uint32_t argc = 0;
for (uint32_t i = 0; i < MAXARGS; i++) {
argt >>= 2;
ArgSize a = ArgSize(argt&3);
#ifdef NJ_SOFTFLOAT
if (a == ARGSIZE_F) {
sizes[argc++] = ARGSIZE_LO;
sizes[argc++] = ARGSIZE_LO;
continue;
}
#endif
if (a != ARGSIZE_NONE) {
sizes[argc++] = a;
}
}
if (isIndirect()) {
// add one more arg for indirect call address
argc++;
}
return argc;
}
void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc &regs) {
LabelState *st = new (gc) LabelState(addr, regs);
labels.put(label, st);
}
LabelState* LabelStateMap::get(LIns *label) {
return labels.get(label);
}
}
#endif // FEATURE_NANOJIT