/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is [Open Source Virtual Machine]. * * The Initial Developer of the Original Code is * Adobe System Incorporated. * Portions created by the Initial Developer are Copyright (C) 2004-2007 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Adobe AS3 Team * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nanojit.h" namespace nanojit { #ifdef FEATURE_NANOJIT #ifdef AVMPLUS_WIN32 #define AVMPLUS_ALIGN16(type) __declspec(align(16)) type #else #define AVMPLUS_ALIGN16(type) type __attribute__ ((aligned (16))) #endif class DeadCodeFilter: public LirFilter { Assembler *assm; public: DeadCodeFilter(LirFilter *in, Assembler *a) : LirFilter(in), assm(a) {} LInsp read() { for (;;) { LInsp i = in->read(); if (!i || i->isGuard() || i->isCall() && !assm->_functions[i->imm8()]._cse || !assm->ignoreInstruction(i)) return i; if (i->isCall()) { // skip args while (in->pos()->isArg()) in->read(); } } } }; #ifdef NJ_VERBOSE class VerboseBlockReader: public LirFilter { Assembler *assm; LirNameMap *names; avmplus::List block; public: VerboseBlockReader(LirFilter *in, Assembler *a, LirNameMap *n) : LirFilter(in), assm(a), names(n), block(a->_gc) {} void flush() { assm->outputf(" %p:", assm->_nIns); assm->output(""); for (int j=0,n=block.size(); j < n; j++) assm->outputf(" %s", names->formatIns(block[j])); assm->output(""); block.clear(); } LInsp read() { LInsp i = in->read(); if (!i) { flush(); return i; } if (i->isGuard()) { flush(); block.add(i); if (i->oprnd1()) block.add(i->oprnd1()); } else if (!i->isArg()) { block.add(i); } return i; } }; #endif /** * Need the following: * * - merging paths ( build a graph? ), possibly use external rep to drive codegen */ Assembler::Assembler(Fragmento* frago) : _frago(frago) , _gc(frago->core()->gc) { AvmCore *core = frago->core(); nInit(core); verbose_only( _verbose = !core->quiet_opt() && core->verbose() ); verbose_only( _outputCache = 0); internalReset(); pageReset(); } void Assembler::arReset() { _activation.highwatermark = 0; _activation.lowwatermark = 0; _activation.tos = 0; for(uint32_t i=0; ireg; regs.removeActive(r); resv->reg = UnknownReg; asm_restore(vic, resv, r); return r; } void Assembler::reserveReset() { _resvTable[0].arIndex = 0; int i; for(i=1; iarIndex; r->reg = UnknownReg; r->arIndex = 0; if (!item) setError(ResvFull); if (i->isconst() || i->isconstq()) r->cost = 0; else if (i->isop(LIR_ld) && i->oprnd1() == _thisfrag->param0 && (i->oprnd2()->isconstval(offsetof(avmplus::InterpState,sp)) || i->oprnd2()->isconstval(offsetof(avmplus::InterpState,rp)))) r->cost = 2; else r->cost = 1; i->setresv(item); return r; } void Assembler::reserveFree(LInsp i) { Reservation *rs = getresv(i); NanoAssert(rs == &_resvTable[i->resv()]); rs->arIndex = _resvFree; _resvFree = i->resv(); i->setresv(0); } void Assembler::internalReset() { // readies for a brand spanking new code generation pass. registerResetAll(); reserveReset(); arReset(); } NIns* Assembler::pageAlloc(bool exitPage) { Page*& list = (exitPage) ? _nativeExitPages : _nativePages; Page* page = _frago->pageAlloc(); if (page) { page->next = list; list = page; nMarkExecute(page); } else { // return prior page (to allow overwrites) and mark out of mem page = list; setError(OutOMem); } return &page->code[sizeof(page->code)/sizeof(NIns)]; // just past the end } void Assembler::pageReset() { pagesFree(_nativePages); pagesFree(_nativeExitPages); _nIns = 0; _nExitIns = 0; nativePageReset(); } void Assembler::pagesFree(Page*& page) { while(page) { Page *next = page->next; // pull next ptr prior to free _frago->pageFree(page); page = next; } } Page* Assembler::handoverPages(bool exitPages) { Page*& list = (exitPages) ? _nativeExitPages : _nativePages; NIns*& ins = (exitPages) ? _nExitIns : _nIns; Page* start = list; list = 0; ins = 0; return start; } #ifdef _DEBUG bool Assembler::onPage(NIns* where, bool exitPages) { Page* page = (exitPages) ? _nativeExitPages : _nativePages; bool on = false; while(page) { if (samepage(where-1,page)) on = true; page = page->next; } return on; } void Assembler::pageValidate() { if (error()) return; // _nIns and _nExitIns need to be at least on // one of these pages NanoAssertMsg( onPage(_nIns)&& onPage(_nExitIns,true), "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction"); } #endif const CallInfo* Assembler::callInfoFor(int32_t fid) { NanoAssert(fid < CI_Max); return &_functions[fid]; } #ifdef _DEBUG void Assembler::resourceConsistencyCheck() { if (error()) return; #ifdef NANOJIT_IA32 NanoAssert(_allocator.active[FST0] && _fpuStkDepth == -1 || !_allocator.active[FST0] && _fpuStkDepth == 0); #endif // for tracking resv usage LIns* resv[NJ_MAX_STACK_ENTRY]; for(int i=0; iarIndex==0 || r->arIndex==i || (ins->isQuad()&&r->arIndex==i-(stack_direction(1))), "Stack record index mismatch"); NanoAssertMsg( r->reg==UnknownReg || regs->isConsistent(r->reg,ins), "Register record mismatch"); } registerConsistencyCheck(resv); // check resv table int32_t inuseCount = 0; int32_t notInuseCount = 0; for(uint32_t i=1; imanaged; Register r = FirstReg; while(managed) { if (managed&1) { if (regs->isFree(r)) { NanoAssert(regs->getActive(r)==0); } else { LIns* ins = regs->getActive(r); // @todo we should be able to check across RegAlloc's somehow (to include savedGP...) Reservation *v = getresv(ins); NanoAssert(v); int32_t idx = v - _resvTable; NanoAssert(idx >= 0 && idx < NJ_MAX_STACK_ENTRY); resv[idx]=ins; NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a register assigned to it"); NanoAssertMsg( v->arIndex==0 || ins==_activation.entry[v->arIndex], "Stack record index mismatch"); NanoAssertMsg( regs->getActive(v->reg)==ins, "Register record mismatch"); } } // next register in bitfield r = nextreg(r); managed >>= 1; } } #endif /* _DEBUG */ void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Reservation* &resva, LIns* ib, Reservation* &resvb) { if (ia == ib) { findRegFor(ia, allow); resva = resvb = getresv(ia); } else { Register rb = UnknownReg; resvb = getresv(ib); if (resvb && (rb = resvb->reg) != UnknownReg) allow &= ~rmask(rb); Register ra = findRegFor(ia, allow); resva = getresv(ia); NanoAssert(error() || (resva != 0 && ra != UnknownReg)); if (rb == UnknownReg) { allow &= ~rmask(ra); findRegFor(ib, allow); resvb = getresv(ib); } } } Register Assembler::findSpecificRegFor(LIns* i, Register w) { return findRegFor(i, rmask(w)); } Register Assembler::findRegFor(LIns* i, RegisterMask allow) { Reservation* resv = getresv(i); Register r; if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) { return r; } RegisterMask prefer = hint(i, allow); if (!resv) resv = reserveAlloc(i); if ((r=resv->reg) == UnknownReg) { if (resv->cost == 2 && (allow&SavedRegs)) prefer = allow&SavedRegs; r = resv->reg = registerAlloc(prefer); _allocator.addActive(r, i); return r; } else { // r not allowed resv->reg = UnknownReg; _allocator.retire(r); if (resv->cost == 2 && (allow&SavedRegs)) prefer = allow&SavedRegs; Register s = resv->reg = registerAlloc(prefer); _allocator.addActive(s, i); if (rmask(r) & GpRegs) { MR(r, s); } #ifdef NANOJIT_IA32 else if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) { MOVSD(r, s); } else { if (rmask(r) & XmmRegs) { // x87 -> xmm NanoAssert(false); } else { // xmm -> x87 NanoAssert(false); } } #endif return s; } } int Assembler::findMemFor(LIns *i) { Reservation* resv = getresv(i); if (!resv) resv = reserveAlloc(i); if (!resv->arIndex) resv->arIndex = arReserve(i); return disp(resv); } Register Assembler::prepResultReg(LIns *i, RegisterMask allow) { Reservation* resv = getresv(i); const bool pop = !resv || resv->reg == UnknownReg; Register rr = findRegFor(i, allow); freeRsrcOf(i, pop); return rr; } void Assembler::freeRsrcOf(LIns *i, bool pop) { Reservation* resv = getresv(i); int index = resv->arIndex; Register rr = resv->reg; if (rr != UnknownReg) { asm_spill(i, resv, pop); _allocator.retire(rr); // free any register associated with entry } arFree(index); // free any stack stack space associated with entry reserveFree(i); // clear fields of entry and add it to free list } void Assembler::evict(Register r) { registerAlloc(rmask(r)); _allocator.addFree(r); } void Assembler::asm_cmp(LIns *cond) { LOpcode condop = cond->opcode(); // LIR_ov and LIR_cs recycle the flags set by arithmetic ops if ((condop == LIR_ov) || (condop == LIR_cs)) return; LInsp lhs = cond->oprnd1(); LInsp rhs = cond->oprnd2(); NanoAssert(!lhs->isQuad() && !rhs->isQuad()); Reservation *rA, *rB; // ready to issue the compare if (rhs->isconst()) { int c = rhs->constval(); Register r = findRegFor(lhs, GpRegs); if (c == 0 && cond->isop(LIR_eq)) TEST(r,r); else CMPi(r, c); } else { findRegFor2(GpRegs, lhs, rA, rhs, rB); Register ra = rA->reg; Register rb = rB->reg; CMP(ra, rb); } } #ifndef NJ_SOFTFLOAT void Assembler::asm_fcmp(LIns *cond) { LOpcode condop = cond->opcode(); NanoAssert(condop >= LIR_feq && condop <= LIR_fge); LIns* lhs = cond->oprnd1(); LIns* rhs = cond->oprnd2(); int mask; if (condop == LIR_feq) mask = 0x44; else if (condop == LIR_fle) mask = 0x41; else if (condop == LIR_flt) mask = 0x05; else if (condop == LIR_fge) { // swap, use le LIns* t = lhs; lhs = rhs; rhs = t; mask = 0x41; } else { // if (condop == LIR_fgt) // swap, use lt LIns* t = lhs; lhs = rhs; rhs = t; mask = 0x05; } if (sse2) { // UNORDERED: ZF,PF,CF <- 111; // GREATER_THAN: ZF,PF,CF <- 000; // LESS_THAN: ZF,PF,CF <- 001; // EQUAL: ZF,PF,CF <- 100; if (condop == LIR_feq && lhs == rhs) { // nan check Register r = findRegFor(lhs, XmmRegs); UCOMISD(r, r); } else { evict(EAX); TEST_AH(mask); LAHF(); Reservation *rA, *rB; findRegFor2(XmmRegs, lhs, rA, rhs, rB); UCOMISD(rA->reg, rB->reg); } } else { evict(EAX); TEST_AH(mask); FNSTSW_AX(); NanoAssert(lhs->isQuad() && rhs->isQuad()); Reservation *rA; if (lhs != rhs) { // compare two different numbers int d = findMemFor(rhs); rA = getresv(lhs); int pop = !rA || rA->reg == UnknownReg; findSpecificRegFor(lhs, FST0); // lhs is in ST(0) and rhs is on stack FCOM(pop, d, FP); } else { // compare n to itself, this is a NaN test. rA = getresv(lhs); int pop = !rA || rA->reg == UnknownReg; findSpecificRegFor(lhs, FST0); // value in ST(0) if (pop) FCOMPP(); else FCOMP(); FLDr(FST0); // DUP } } } #endif //NJ_SOFTFLOAT void Assembler::patch(GuardRecord *lr) { Fragment *frag = lr->target; NanoAssert(frag->fragEntry); NIns* was = asm_adjustBranch(lr->jmp, frag->fragEntry); if (!lr->origTarget) lr->origTarget = was; verbose_only(verbose_outputf("patching jump at %p to target %p (was %p)\n", lr->jmp, frag->fragEntry, was);) } void Assembler::unpatch(GuardRecord *lr) { NIns* was = asm_adjustBranch(lr->jmp, lr->origTarget); (void)was; verbose_only(verbose_outputf("unpatching jump at %p to original target %p (was %p)\n", lr->jmp, lr->origTarget, was);) } NIns* Assembler::asm_exit(LInsp guard) { SideExit *exit = guard->exit(); NIns* at = 0; if (!_branchStateMap->get(exit)) { at = asm_leave_trace(guard); } else { RegAlloc* captured = _branchStateMap->get(exit); mergeRegisterState(*captured); verbose_only( verbose_outputf(" merging trunk with %s", _frago->labels->format(exit->target)); verbose_outputf(" %p:",_nIns); ) at = exit->target->fragEntry; NanoAssert(at); _branchStateMap->remove(exit); } return at; } NIns* Assembler::asm_leave_trace(LInsp guard) { verbose_only(bool priorVerbose = _verbose; ) verbose_only( _verbose = verbose_enabled() && _frago->core()->config.verbose_exits; ) verbose_only( int32_t nativeSave = _stats.native ); verbose_only(verbose_outputf("--------------------------------------- end exit block SID %d", guard->exit()->sid);) RegAlloc capture = _allocator; // this point is unreachable. so free all the registers. // if an instruction has a stack entry we will leave it alone, // otherwise we free it entirely. mergeRegisterState will restore. releaseRegisters(); swapptrs(); _inExit = true; //verbose_only( verbose_outputf(" LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) ); debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; ) GuardRecord *lr = nFragExit(guard); (void)lr; mergeRegisterState(capture); // this can be useful for breaking whenever an exit is taken //INT3(); //NOP(); // we are done producing the exit logic for the guard so demark where our exit block code begins NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump // swap back pointers, effectively storing the last location used in the exit path swapptrs(); _inExit = false; //verbose_only( verbose_outputf(" LIR_xt/xf swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) ); verbose_only( verbose_outputf(" %p:",jmpTarget);) verbose_only( verbose_outputf("--------------------------------------- exit block (LIR_xt|LIR_xf)") ); #ifdef NANOJIT_IA32 NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, ("LIR_xtf, _fpuStkDepth=%d, expect %d\n",_fpuStkDepth, _sv_fpuStkDepth)); debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; ) #endif verbose_only( _verbose = priorVerbose; ) verbose_only(_stats.exitnative += (_stats.native-nativeSave)); return jmpTarget; } bool Assembler::ignoreInstruction(LInsp ins) { LOpcode op = ins->opcode(); if (ins->isStore() || op == LIR_loop || ins->isArg()) return false; return getresv(ins) == 0; } void Assembler::beginAssembly(RegAllocMap* branchStateMap) { _activation.lowwatermark = 1; _activation.tos = _activation.lowwatermark; _activation.highwatermark = _activation.tos; counter_reset(native); counter_reset(exitnative); counter_reset(steals); counter_reset(spills); counter_reset(remats); setError(None); // native code gen buffer setup nativePageSetup(); // make sure we got memory at least one page if (error()) return; _epilogue = genEpilogue(SavedRegs); _branchStateMap = branchStateMap; verbose_only( verbose_outputf(" %p:",_nIns) ); verbose_only( verbose_output(" epilogue:") ); } void Assembler::assemble(Fragment* frag, NInsList& loopJumps) { if (error()) return; AvmCore *core = _frago->core(); GC *gc = core->gc; _thisfrag = frag; // set up backwards pipeline: assembler -> StoreFilter -> LirReader LirReader bufreader(frag->lastIns); StoreFilter storefilter(&bufreader, gc, frag->param0, frag->sp, frag->rp); DeadCodeFilter deadfilter(&storefilter, this); LirFilter* rdr = &deadfilter; verbose_only( VerboseBlockReader vbr(rdr, this, frag->lirbuf->names); if (verbose_enabled()) rdr = &vbr; ) verbose_only(_thisfrag->compileNbr++; ) verbose_only(_frago->_stats.compiles++; ) verbose_only(_frago->_stats.totalCompiles++; ) _latestGuard = 0; _inExit = false; gen(rdr, loopJumps); frag->fragEntry = _nIns; frag->outbound = core->config.tree_opt? _latestGuard : 0; //fprintf(stderr, "assemble frag %X entry %X\n", (int)frag, (int)frag->fragEntry); } void Assembler::endAssembly(Fragment* frag, NInsList& loopJumps) { while(!loopJumps.isEmpty()) { NIns* loopJump = (NIns*)loopJumps.removeLast(); nPatchBranch(loopJump, _nIns); } NIns* patchEntry = 0; if (!error()) { patchEntry = genPrologue(SavedRegs); verbose_only( verbose_outputf(" %p:",_nIns); ) verbose_only( verbose_output(" prologue"); ) } // something bad happened? if (!error()) { // check for resource leaks debug_only( for(uint32_t i=_activation.lowwatermark;i<_activation.highwatermark; i++) { NanoAssertMsgf(_activation.entry[i] == 0, ("frame entry %d wasn't freed\n",-4*i)); } ) frag->fragEntry = patchEntry; NIns* code = _nIns; // let the fragment manage the pages if we're using trees and there are branches Page* manage = (_frago->core()->config.tree_opt) ? handoverPages() : 0; frag->setCode(code, manage); // root of tree should manage all pages NanoAssert(!_frago->core()->config.tree_opt || frag == frag->anchor || frag->kind == MergeTrace); //fprintf(stderr, "endAssembly frag %X entry %X\n", (int)frag, (int)frag->fragEntry); } AvmAssertMsg(error() || _fpuStkDepth == 0, ("_fpuStkDepth %d\n",_fpuStkDepth)); internalReset(); // clear the reservation tables and regalloc NanoAssert(_branchStateMap->isEmpty()); _branchStateMap = 0; #ifdef UNDER_CE // If we've modified the code, we need to flush so we don't end up trying // to execute junk FlushInstructionCache(GetCurrentProcess(), NULL, NULL); #endif } void Assembler::copyRegisters(RegAlloc* copyTo) { *copyTo = _allocator; } void Assembler::releaseRegisters() { for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) { LIns *i = _allocator.getActive(r); if (i) { // clear reg allocation, preserve stack allocation. Reservation* resv = getresv(i); NanoAssert(resv != 0); _allocator.retire(r); if (r == resv->reg) resv->reg = UnknownReg; if (!resv->arIndex && resv->reg == UnknownReg) { reserveFree(i); } } } } void Assembler::gen(LirFilter* reader, NInsList& loopJumps) { const CallInfo* call = 0; // current call being emitted; if any uint32_t iargs = 0; uint32_t fargs = 0; int32_t stackUsed = 0; // stack space used for call // trace must start with LIR_x or LIR_loop NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop)); for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read()) { Reservation *rR = getresv(ins); LOpcode op = ins->opcode(); switch(op) { default: NanoAssertMsg(false, "unsupported LIR instruction"); break; case LIR_short: case LIR_int: { Register rr = prepResultReg(ins, GpRegs); int32_t val; if (op == LIR_int) val = ins->imm32(); else val = ins->imm16(); if (val == 0) XOR(rr,rr); else LDi(rr, val); break; } case LIR_quad: { #ifdef NANOJIT_IA32 Register rr = rR->reg; if (rr != UnknownReg) { // @todo -- add special-cases for 0 and 1 _allocator.retire(rr); rR->reg = UnknownReg; NanoAssert((rmask(rr) & FpRegs) != 0); const double d = ins->constvalf(); if (rmask(rr) & XmmRegs) { if (d == 0.0) { XORPDr(rr, rr); } else if (d == 1.0) { // 1.0 is extremely frequent and worth special-casing! static const double k_ONE = 1.0; LDSDm(rr, &k_ONE); } else { findMemFor(ins); const int d = disp(rR); LDQ(rr, d, FP); } } else { if (d == 0.0) { FLDZ(); } else if (d == 1.0) { FLD1(); } else { findMemFor(ins); int d = disp(rR); FLDQ(d,FP); } } } #endif // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here int d = disp(rR); freeRsrcOf(ins, false); if (d) { const int32_t* p = (const int32_t*) (ins-2); STi(FP,d+4,p[1]); STi(FP,d,p[0]); } break; } case LIR_callh: { // return result of quad-call in register prepResultReg(ins, rmask(retRegs[1])); // if hi half was used, we must use the call to ensure it happens findRegFor(ins->oprnd1(), rmask(retRegs[0])); break; } case LIR_param: { Register w = Register(ins->imm8()); NanoAssert(w != UnknownReg); // incoming arg in register prepResultReg(ins, rmask(w)); break; } case LIR_qlo: { LIns *q = ins->oprnd1(); #ifdef NANOJIT_IA32 if (sse2) { Reservation *resv = getresv(ins); Register rr = resv->reg; if (rr == UnknownReg) { // store quad in spill loc int d = disp(resv); freeRsrcOf(ins, false); Register qr = findRegFor(q, XmmRegs); STD(d, FP, qr); } else { freeRsrcOf(ins, false); Register qr = findRegFor(q, XmmRegs); MOVD(rr,qr); } } else #endif { Register rr = prepResultReg(ins, GpRegs); int d = findMemFor(q); LD(rr, d, FP); } break; } case LIR_qhi: { Register rr = prepResultReg(ins, GpRegs); LIns *q = ins->oprnd1(); int d = findMemFor(q); LD(rr, d+4, FP); break; } case LIR_cmov: { LIns* condval = ins->oprnd1(); NanoAssert(condval->isCmp()); LIns* values = ins->oprnd2(); NanoAssert(values->opcode() == LIR_2); LIns* iftrue = values->oprnd1(); LIns* iffalse = values->oprnd2(); NanoAssert(!iftrue->isQuad() && !iffalse->isQuad()); const Register rr = prepResultReg(ins, GpRegs); // this code assumes that neither LD nor MR nor MRcc set any of the condition flags. // (This is true on Intel, is it true on all architectures?) const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr)); switch (condval->opcode()) { // note that these are all opposites... case LIR_eq: MRNE(rr, iffalsereg); break; case LIR_ov: MRNO(rr, iffalsereg); break; case LIR_cs: MRNC(rr, iffalsereg); break; case LIR_lt: MRGE(rr, iffalsereg); break; case LIR_le: MRG(rr, iffalsereg); break; case LIR_gt: MRLE(rr, iffalsereg); break; case LIR_ge: MRL(rr, iffalsereg); break; case LIR_ult: MRAE(rr, iffalsereg); break; case LIR_ule: MRA(rr, iffalsereg); break; case LIR_ugt: MRBE(rr, iffalsereg); break; case LIR_uge: MRB(rr, iffalsereg); break; debug_only( default: NanoAssert(0); break; ) } /*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr); asm_cmp(condval); break; } case LIR_ld: case LIR_ldc: case LIR_ldcb: { LIns* base = ins->oprnd1(); LIns* disp = ins->oprnd2(); Register rr = prepResultReg(ins, GpRegs); Register ra = findRegFor(base, GpRegs); int d = disp->constval(); if (op == LIR_ldcb) LD8Z(rr, d, ra); else LD(rr, d, ra); break; } case LIR_ldq: { asm_load64(ins); break; } case LIR_neg: case LIR_not: { Register rr = prepResultReg(ins, GpRegs); LIns* lhs = ins->oprnd1(); Reservation *rA = getresv(lhs); // if this is last use of lhs in reg, we can re-use result reg Register ra; if (rA == 0 || (ra=rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); // else, rA already has a register assigned. if (op == LIR_not) NOT(rr); else NEG(rr); if ( rr != ra ) MR(rr,ra); break; } case LIR_qjoin: { asm_qjoin(ins); break; } case LIR_add: case LIR_sub: case LIR_mul: case LIR_and: case LIR_or: case LIR_xor: case LIR_lsh: case LIR_rsh: case LIR_ush: { LInsp lhs = ins->oprnd1(); LInsp rhs = ins->oprnd2(); Register rb = UnknownReg; RegisterMask allow = GpRegs; if (lhs != rhs && (op == LIR_mul || !rhs->isconst())) { #ifdef NANOJIT_IA32 if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush) rb = findSpecificRegFor(rhs, ECX); else #endif rb = findRegFor(rhs, allow); allow &= ~rmask(rb); } Register rr = prepResultReg(ins, allow); Reservation* rA = getresv(lhs); Register ra; // if this is last use of lhs in reg, we can re-use result reg if (rA == 0 || (ra = rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); // else, rA already has a register assigned. if (!rhs->isconst() || op == LIR_mul) { if (lhs == rhs) rb = ra; if (op == LIR_add) ADD(rr, rb); else if (op == LIR_sub) SUB(rr, rb); else if (op == LIR_mul) MUL(rr, rb); else if (op == LIR_and) AND(rr, rb); else if (op == LIR_or) OR(rr, rb); else if (op == LIR_xor) XOR(rr, rb); else if (op == LIR_lsh) SHL(rr, rb); else if (op == LIR_rsh) SAR(rr, rb); else if (op == LIR_ush) SHR(rr, rb); else NanoAssertMsg(0, "Unsupported"); } else { int c = rhs->constval(); if (op == LIR_add) { #ifdef NANOJIT_IA32 if (ra != rr) { LEA(rr, c, ra); ra = rr; // suppress mov } else #endif { ADDi(rr, c); } } else if (op == LIR_sub) { #ifdef NANOJIT_IA32 if (ra != rr) { LEA(rr, -c, ra); ra = rr; } else #endif { SUBi(rr, c); } } else if (op == LIR_and) ANDi(rr, c); else if (op == LIR_or) ORi(rr, c); else if (op == LIR_xor) XORi(rr, c); else if (op == LIR_lsh) SHLi(rr, c); else if (op == LIR_rsh) SARi(rr, c); else if (op == LIR_ush) SHRi(rr, c); else NanoAssertMsg(0, "Unsupported"); } if ( rr != ra ) MR(rr,ra); break; } #ifndef NJ_SOFTFLOAT case LIR_fneg: { if (sse2) { LIns *lhs = ins->oprnd1(); Register rr = prepResultReg(ins, XmmRegs); Reservation *rA = getresv(lhs); Register ra; // if this is last use of lhs in reg, we can re-use result reg if (rA == 0 || (ra = rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); // else, rA already has a register assigned. static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0}; XORPD(rr, negateMask); if (rr != ra) MOVSD(rr, ra); } else { Register rr = prepResultReg(ins, FpRegs); LIns* lhs = ins->oprnd1(); // lhs into reg, prefer same reg as result Reservation* rA = getresv(lhs); // if this is last use of lhs in reg, we can re-use result reg if (rA == 0 || rA->reg == UnknownReg) findSpecificRegFor(lhs, rr); // else, rA already has a different reg assigned NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0); // assume that the lhs is in ST(0) and rhs is on stack FCHS(); // if we had more than one fpu reg, this is where // we would move ra into rr if rr != ra. } break; } case LIR_fadd: case LIR_fsub: case LIR_fmul: case LIR_fdiv: { if (sse2) { LIns *lhs = ins->oprnd1(); LIns *rhs = ins->oprnd2(); RegisterMask allow = XmmRegs; Register rb = UnknownReg; if (lhs != rhs) { rb = findRegFor(rhs,allow); allow &= ~rmask(rb); } Register rr = prepResultReg(ins, allow); Reservation *rA = getresv(lhs); Register ra; // if this is last use of lhs in reg, we can re-use result reg if (rA == 0 || (ra = rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); // else, rA already has a register assigned. if (lhs == rhs) rb = ra; if (op == LIR_fadd) ADDSD(rr, rb); else if (op == LIR_fsub) SUBSD(rr, rb); else if (op == LIR_fmul) MULSD(rr, rb); else //if (op == LIR_fdiv) DIVSD(rr, rb); if (rr != ra) MOVSD(rr, ra); } else { // we swap lhs/rhs on purpose here, works out better // if you only have one fpu reg. use divr/subr. LIns* rhs = ins->oprnd1(); LIns* lhs = ins->oprnd2(); Register rr = prepResultReg(ins, rmask(FST0)); // make sure rhs is in memory int db = findMemFor(rhs); // lhs into reg, prefer same reg as result Reservation* rA = getresv(lhs); // last use of lhs in reg, can reuse rr if (rA == 0 || rA->reg == UnknownReg) findSpecificRegFor(lhs, rr); // else, rA already has a different reg assigned NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0); // assume that the lhs is in ST(0) and rhs is on stack if (op == LIR_fadd) { FADD(db, FP); } else if (op == LIR_fsub) { FSUBR(db, FP); } else if (op == LIR_fmul) { FMUL(db, FP); } else if (op == LIR_fdiv) { FDIVR(db, FP); } } break; } case LIR_i2f: { // where our result goes Register rr = prepResultReg(ins, FpRegs); if (rmask(rr) & XmmRegs) { // todo support int value in memory Register gr = findRegFor(ins->oprnd1(), GpRegs); CVTSI2SD(rr, gr); } else { int d = findMemFor(ins->oprnd1()); FILD(d, FP); } break; } case LIR_u2f: { // where our result goes Register rr = prepResultReg(ins, FpRegs); const int disp = -8; const Register base = ESP; if (rmask(rr) & XmmRegs) { // don't call findRegFor, we want a reg we can stomp on for a very short time, // not a reg that will continue to be associated with the LIns Register gr = registerAlloc(GpRegs); // technique inspired by gcc disassembly // Edwin explains it: // // gr is 0..2^32-1 // // sub gr,0x80000000 // // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value // as before // // cvtsi2sd rr,gr // // rr is now a double with the int value range // // addsd rr, 2147483648.0 // // adding back double(0x80000000) makes the range 0..2^32-1. static const double k_NEGONE = 2147483648.0; ADDSDm(rr, &k_NEGONE); CVTSI2SD(rr, gr); Reservation* resv = getresv(ins->oprnd1()); Register xr; if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs)) { #ifdef NANOJIT_IA32 LEA(gr, 0x80000000, xr); #else SUBi(gr, 0x80000000); MR(gr, xr); #endif } else { const int d = findMemFor(ins->oprnd1()); SUBi(gr, 0x80000000); LD(gr, d, FP); } // ok, we're done with it _allocator.addFree(gr); } else { Register gr = findRegFor(ins->oprnd1(), GpRegs); NanoAssert(rr == FST0); FILDQ(disp, base); STi(base, disp+4, 0); // high 32 bits = 0 ST(base, disp, gr); // low 32 bits = unsigned value } break; } #endif // NJ_SOFTFLOAT case LIR_st: case LIR_sti: { asm_store32(ins->oprnd1(), ins->immdisp(), ins->oprnd2()); break; } case LIR_stq: case LIR_stqi: { LIns* value = ins->oprnd1(); LIns* base = ins->oprnd2(); int dr = ins->immdisp(); if (value->isop(LIR_qjoin)) { // this is correct for little-endian only asm_store32(value->oprnd1(), dr, base); asm_store32(value->oprnd2(), dr+4, base); } else { asm_store64(value, dr, base); } break; } case LIR_xt: case LIR_xf: { NIns* exit = asm_exit(ins); // we only support cmp with guard right now, also assume it is 'close' and only emit the branch LIns* cond = ins->oprnd1(); LOpcode condop = cond->opcode(); NanoAssert(cond->isCmp()); #ifndef NJ_SOFTFLOAT bool fp = cond->oprnd1()->isQuad(); if (fp) { if (op == LIR_xf) JP(exit); else JNP(exit); asm_fcmp(cond); break; } #endif // produce the branch if (op == LIR_xf) { if (condop == LIR_eq) JNE(exit); else if (condop == LIR_ov) JNO(exit); else if (condop == LIR_cs) JNC(exit); else if (condop == LIR_lt) JNL(exit); else if (condop == LIR_le) JNLE(exit); else if (condop == LIR_gt) JNG(exit); else if (condop == LIR_ge) JNGE(exit); else if (condop == LIR_ult) JNB(exit); else if (condop == LIR_ule) JNBE(exit); else if (condop == LIR_ugt) JNA(exit); else //if (condop == LIR_uge) JNAE(exit); } else // op == LIR_xt { if (condop == LIR_eq) JE(exit); else if (condop == LIR_ov) JO(exit); else if (condop == LIR_cs) JC(exit); else if (condop == LIR_lt) JL(exit); else if (condop == LIR_le) JLE(exit); else if (condop == LIR_gt) JG(exit); else if (condop == LIR_ge) JGE(exit); else if (condop == LIR_ult) JB(exit); else if (condop == LIR_ule) JBE(exit); else if (condop == LIR_ugt) JA(exit); else //if (condop == LIR_uge) JAE(exit); } asm_cmp(cond); break; } case LIR_x: { verbose_only(verbose_output("")); // generate the side exit branch on the main trace. NIns *exit = asm_exit(ins); JMP( exit ); break; } case LIR_loop: { JMP_long_placeholder(); // jump to SOT verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf(" jmp SOT"); } ); loopJumps.add(_nIns); #ifdef NJ_VERBOSE // branching from this frag to ourself. if (_frago->core()->config.show_stats) LDi(argRegs[1], int((Fragment*)_thisfrag)); #endif // restore first parameter, the only one we use LInsp param0 = _thisfrag->param0; Register a0 = Register(param0->imm8()); findSpecificRegFor(param0, a0); break; } #ifndef NJ_SOFTFLOAT case LIR_feq: case LIR_fle: case LIR_flt: case LIR_fgt: case LIR_fge: { // only want certain regs Register r = prepResultReg(ins, AllowableFlagRegs); // SETcc only sets low 8 bits, so extend MOVZX8(r,r); SETNP(r); asm_fcmp(ins); break; } #endif case LIR_eq: case LIR_ov: case LIR_cs: case LIR_le: case LIR_lt: case LIR_gt: case LIR_ge: case LIR_ult: case LIR_ule: case LIR_ugt: case LIR_uge: { // only want certain regs Register r = prepResultReg(ins, AllowableFlagRegs); // SETcc only sets low 8 bits, so extend MOVZX8(r,r); if (op == LIR_eq) SETE(r); else if (op == LIR_ov) SETO(r); else if (op == LIR_cs) SETC(r); else if (op == LIR_lt) SETL(r); else if (op == LIR_le) SETLE(r); else if (op == LIR_gt) SETG(r); else if (op == LIR_ge) SETGE(r); else if (op == LIR_ult) SETB(r); else if (op == LIR_ule) SETBE(r); else if (op == LIR_ugt) SETA(r); else // if (op == LIR_uge) SETAE(r); asm_cmp(ins); break; } case LIR_ref: { // ref arg - use lea LIns *p = ins->oprnd1(); if (ins->resv()) { // arg in specific reg Register r = imm2register(ins->resv()); int da = findMemFor(p); LEA(r, da, FP); } else { NanoAssert(0); // not supported } ++iargs; nArgEmitted(call, 0, iargs, fargs); break; } case LIR_arg: { LIns* p = ins->oprnd1(); if (ins->resv()) { // arg goes in specific register Register r = imm2register(ins->resv()); if (p->isconst()) LDi(r, p->constval()); else findSpecificRegFor(p, r); } else { asm_pusharg(p); stackUsed += 1; } ++iargs; nArgEmitted(call, stackUsed, iargs, fargs); break; } #ifdef NANOJIT_IA32 case LIR_farg: { LIns* p = ins->oprnd1(); Register r = findRegFor(p, FpRegs); if (rmask(r) & XmmRegs) { STQ(0, SP, r); } else { FSTPQ(0, SP); } PUSHr(ECX); // 2*pushr is smaller than sub PUSHr(ECX); stackUsed += 2; ++fargs; nArgEmitted(call, stackUsed, iargs, fargs); break; } #endif #ifndef NJ_SOFTFLOAT case LIR_fcall: #endif case LIR_call: { const FunctionID fid = (FunctionID) ins->imm8(); // bogus assertion: zero is a legal value right now, with fmod() in that slot // NanoAssertMsg(fid!=0, "Function does not exist in the call table"); call = &_functions[ fid ]; iargs = 0; fargs = 0; Register rr = UnknownReg; #ifndef NJ_SOFTFLOAT if (op == LIR_fcall) { if (rR) { if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs)) evict(rr); } rr = FST0; prepResultReg(ins, rmask(rr)); } else #endif { rr = retRegs[0]; prepResultReg(ins, rmask(rr)); } // do this after we've handled the call result, so we dont // force the call result to be spilled unnecessarily. restoreCallerSaved(); nPostCallCleanup(call); #ifdef NJ_VERBOSE CALL(call->_address, call->_name); #else CALL(call->_address, ""); #endif stackUsed = 0; LirReader argReader(reader->pos()); #ifdef NANOJIT_ARM // pre-assign registers R0-R3 for arguments (if they fit) int regsUsed = 0; for (LInsp a = argReader.read(); a->isArg(); a = argReader.read()) { if (a->isop(LIR_arg) || a->isop(LIR_ref)) { a->setresv((int)R0 + 1 + regsUsed); regsUsed++; } if (regsUsed>=4) break; } #endif #ifdef NANOJIT_IA32 debug_only( if (rr == FST0) fpu_push(); ) // make sure fpu stack is empty before call (restoreCallerSaved) NanoAssert(_allocator.isFree(FST0)); // note: this code requires that LIR_ref arguments be one of the first two arguments // pre-assign registers to the first 2 4B args const uint32_t iargs = call->count_iargs(); const int max_regs = (iargs < 2) ? iargs : 2; int n = 0; for(LIns* a = argReader.read(); a->isArg() && nisop(LIR_arg)||a->isop(LIR_ref)) { a->setresv(argRegs[n++]); // tell LIR_arg what reg to use } } #endif } } // check that all is well (don't check in exit paths since its more complicated) debug_only( pageValidate(); ) debug_only( resourceConsistencyCheck(); ) } } uint32_t Assembler::arFree(uint32_t idx) { if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)]) _activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles _activation.entry[idx] = 0; return 0; } #ifdef NJ_VERBOSE void Assembler::printActivationState() { bool verbose_activation = false; if (!verbose_activation) return; #ifdef NANOJIT_ARM verbose_only( if (_verbose) { char* s = &outline[0]; memset(s, ' ', 51); s[51] = '\0'; s += strlen(s); sprintf(s, " SP "); s += strlen(s); for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) { LInsp ins = _activation.entry[i]; if (ins && ins !=_activation.entry[i+1]) { sprintf(s, "%d(%s) ", 4*i, _thisfrag->lirbuf->names->formatRef(ins)); s += strlen(s); } } output(&outline[0]); } ) #else verbose_only( char* s = &outline[0]; if (_verbose) { memset(s, ' ', 51); s[51] = '\0'; s += strlen(s); sprintf(s, " ebp "); s += strlen(s); for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) { LInsp ins = _activation.entry[i]; if (ins /* && _activation.entry[i]!=_activation.entry[i+1]*/) { sprintf(s, "%d(%s) ", -4*i,_thisfrag->lirbuf->names->formatRef(ins)); s += strlen(s); } } output(&outline[0]); } ) #endif } #endif uint32_t Assembler::arReserve(LIns* l) { NanoAssert(!l->isop(LIR_tramp)); //verbose_only(printActivationState()); const bool quad = l->isQuad(); const int32_t n = _activation.tos; int32_t start = _activation.lowwatermark; int32_t i = 0; NanoAssert(start>0); if (n >= NJ_MAX_STACK_ENTRY-2) { setError(StackFull); return start; } else if (quad) { if ( (start&1)==1 ) start++; // even for(i=start; i <= n; i+=2) { if ( (_activation.entry[i+stack_direction(1)] == 0) && (i==n || (_activation.entry[i] == 0)) ) break; // for fp we need 2 adjacent aligned slots } } else { for(i=start; i < n; i++) { if (_activation.entry[i] == 0) break; // not being used } } int32_t inc = ((i-n+1) < 0) ? 0 : (i-n+1); if (quad && stack_direction(1)>0) inc++; _activation.tos += inc; _activation.highwatermark += inc; // place the entry in the table and mark the instruction with it _activation.entry[i] = l; if (quad) _activation.entry[i+stack_direction(1)] = l; return i; } void Assembler::restoreCallerSaved() { // generate code to restore callee saved registers // @todo speed this up RegisterMask scratch = ~SavedRegs; for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) { if ((rmask(r) & scratch) && _allocator.getActive(r)) { evict(r); } } } /** * Merge the current state of the registers with a previously stored version */ void Assembler::mergeRegisterState(RegAlloc& saved) { // evictions and pops first RegisterMask skip = 0; for (Register r=FirstReg; r <= LastReg; r = nextreg(r)) { LIns * curins = _allocator.getActive(r); LIns * savedins = saved.getActive(r); if (curins == savedins) { verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); ) skip |= rmask(r); } else { if (curins) evict(r); #ifdef NANOJIT_IA32 if (savedins && (rmask(r) & x87Regs)) FSTP(r); #endif } } // now reassign mainline registers for (Register r=FirstReg; r <= LastReg; r = nextreg(r)) { LIns *i = saved.getActive(r); if (i && !(skip&rmask(r))) findSpecificRegFor(i, r); } debug_only(saved.used = 0); // marker that we are no longer in exit path } /** * Guard records are laid out in the exit block buffer (_nInsExit), * intersperced with the code. Preceding the record are the native * instructions associated with the record (i.e. the exit code). * * The layout is as follows: * * [ native code ] [ GuardRecord1 ] * ... * [ native code ] [ GuardRecordN ] * * The guard record 'code' field should be used to locate * the start of the native code associated with the * exit block. N.B the code may lie in a different page * than the guard record * * The last guard record is used for the unconditional jump * at the end of the trace. * * NOTE: It is also not guaranteed that the native code * is contained on a single page. */ GuardRecord* Assembler::placeGuardRecord(LInsp guard) { SideExit *exit = guard->exit(); // we align the guards to 4Byte boundary NIns* ptr = (NIns*)alignTo(_nIns-sizeof(GuardRecord), 4); underrunProtect( (int)_nIns-(int)ptr ); // either got us a new page or there is enough space for us GuardRecord* rec = (GuardRecord*) alignTo(_nIns-sizeof(GuardRecord),4); rec->outgoing = _latestGuard; _latestGuard = rec; _nIns = (NIns*)rec; rec->next = 0; rec->origTarget = 0; rec->target = exit->target; rec->from = _thisfrag; rec->guard = guard; rec->calldepth = exit->calldepth; verbose_only( rec->sid = exit->sid; ) if (exit->target) exit->target->addLink(rec); verbose_only( rec->compileNbr = _thisfrag->compileNbr; ) return rec; } void Assembler::setCallTable(const CallInfo* functions) { _functions = functions; } #ifdef NJ_VERBOSE char Assembler::outline[8192]; void Assembler::outputf(const char* format, ...) { va_list args; va_start(args, format); outline[0] = '\0'; vsprintf(outline, format, args); output(outline); } void Assembler::output(const char* s) { if (_outputCache) { char* str = (char*)_gc->Alloc(strlen(s)+1); strcpy(str, s); _outputCache->add(str); } else { _frago->core()->console << s << "\n"; } } void Assembler::output_asm(const char* s) { if (!verbose_enabled()) return; if (*s != '^') output(s); } char* Assembler::outputAlign(char *s, int col) { int len = strlen(s); int add = ((col-len)>0) ? col-len : 1; memset(&s[len], ' ', add); s[col] = '\0'; return &s[col]; } #endif // verbose #endif /* FEATURE_NANOJIT */ #if defined(FEATURE_NANOJIT) || defined(NJ_VERBOSE) uint32_t CallInfo::_count_args(uint32_t mask) const { uint32_t argc = 0; uint32_t argt = _argtypes; for (int i = 0; i < 5; ++i) { argt >>= 2; argc += (argt & mask) != 0; } return argc; } #endif }