From 5a17de3bcaa132b10858a7c40ac9bd8ad2c126e9 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Thu, 19 Jun 2008 10:47:58 -0700 Subject: [PATCH] Landed nanojit in TraceMonkey. This is untested and DEBUG must be off for now since we don't support AVM's String class. --- js/src/Makefile.ref | 21 +- js/src/nanojit/Assembler.cpp | 1951 +++++++++++++++++++++++++++++++ js/src/nanojit/Fragmento.cpp | 617 ++++++++++ js/src/nanojit/LIR.cpp | 2058 +++++++++++++++++++++++++++++++++ js/src/nanojit/Nativei386.cpp | 580 ++++++++++ js/src/nanojit/RegAlloc.cpp | 182 +++ js/src/nanojit/avmplus.cpp | 41 + js/src/nanojit/avmplus.h | 115 +- js/src/nanojit/vm_fops.h | 2 + 9 files changed, 5558 insertions(+), 9 deletions(-) create mode 100755 js/src/nanojit/Assembler.cpp create mode 100644 js/src/nanojit/Fragmento.cpp create mode 100755 js/src/nanojit/LIR.cpp create mode 100644 js/src/nanojit/Nativei386.cpp create mode 100644 js/src/nanojit/RegAlloc.cpp create mode 100644 js/src/nanojit/avmplus.cpp diff --git a/js/src/Makefile.ref b/js/src/Makefile.ref index 82c38b439c8..eeb09b87f78 100644 --- a/js/src/Makefile.ref +++ b/js/src/Makefile.ref @@ -61,7 +61,7 @@ DEFINES += -DNARCISSUS endif # Look in OBJDIR to find jsautocfg.h and jsautokw.h -INCLUDES += -I$(OBJDIR) +INCLUDES += -I. -Inanojit -I$(OBJDIR) ifdef JS_THREADSAFE DEFINES += -DJS_THREADSAFE @@ -81,6 +81,8 @@ ifdef JS_HAS_FILE_OBJECT DEFINES += -DJS_HAS_FILE_OBJECT endif +DEFINES += -DFEATURE_NANOJIT -DAVMPLUS_IA32 -DTRACEMONKEY + # # XCFLAGS may be set in the environment or on the gmake command line # @@ -180,6 +182,17 @@ JS_HFILES = \ jsstr.h \ jsxdrapi.h \ jsxml.h \ + nanojit/Assembler.h \ + nanojit/LIR.h \ + nanojit/NativeARM.h \ + nanojit/Nativei386.h \ + nanojit/avmplus.h \ + nanojit/vm_fops.h \ + nanojit/Fragmento.h \ + nanojit/Native.h \ + nanojit/NativeThumb.h \ + nanojit/RegAlloc.h \ + nanojit/nanojit.h \ $(NULL) API_HFILES = \ @@ -247,6 +260,12 @@ JS_CPPFILES = \ jsxdrapi.cpp \ jsxml.cpp \ prmjtime.cpp \ + nanojit/Assembler.cpp \ + nanojit/Fragmento.cpp \ + nanojit/LIR.cpp \ + nanojit/Nativei386.cpp \ + nanojit/RegAlloc.cpp \ + nanojit/avmplus.cpp \ $(NULL) ifdef JS_LIVECONNECT diff --git a/js/src/nanojit/Assembler.cpp b/js/src/nanojit/Assembler.cpp new file mode 100755 index 00000000000..590829f0ba4 --- /dev/null +++ b/js/src/nanojit/Assembler.cpp @@ -0,0 +1,1951 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is [Open Source Virtual Machine]. + * + * The Initial Developer of the Original Code is + * Adobe System Incorporated. + * Portions created by the Initial Developer are Copyright (C) 2004-2007 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Adobe AS3 Team + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nanojit.h" + +namespace nanojit +{ + #ifdef FEATURE_NANOJIT + + + #ifdef AVMPLUS_WIN32 + #define AVMPLUS_ALIGN16(type) __declspec(align(16)) type + #else + #define AVMPLUS_ALIGN16(type) type __attribute__ ((aligned (16))) + #endif + + class DeadCodeFilter: public LirFilter + { + Assembler *assm; + public: + DeadCodeFilter(LirFilter *in, Assembler *a) : LirFilter(in), assm(a) {} + LInsp read() { + for (;;) { + LInsp i = in->read(); + if (!i || i->isGuard() + || i->isCall() && !assm->_functions[i->imm8()]._cse + || !assm->ignoreInstruction(i)) + return i; + if (i->isCall()) { + // skip args + while (in->pos()->isArg()) + in->read(); + } + } + } + }; + +#ifdef NJ_VERBOSE + class VerboseBlockReader: public LirFilter + { + Assembler *assm; + LirNameMap *names; + avmplus::List block; + public: + VerboseBlockReader(LirFilter *in, Assembler *a, LirNameMap *n) + : LirFilter(in), assm(a), names(n), block(a->_gc) {} + + void flush() { + assm->outputf(" %p:", assm->_nIns); + assm->output(""); + for (int j=0,n=block.size(); j < n; j++) + assm->outputf(" %s", names->formatIns(block[j])); + assm->output(""); + block.clear(); + } + + LInsp read() { + LInsp i = in->read(); + if (!i) { + flush(); + return i; + } + if (i->isGuard()) { + flush(); + block.add(i); + if (i->oprnd1()) + block.add(i->oprnd1()); + } + else if (!i->isArg()) { + block.add(i); + } + return i; + } + }; +#endif + + /** + * Need the following: + * + * - merging paths ( build a graph? ), possibly use external rep to drive codegen + */ + Assembler::Assembler(Fragmento* frago) + : _frago(frago) + , _gc(frago->core()->gc) + { + AvmCore *core = frago->core(); + nInit(core); + verbose_only( _verbose = !core->quiet_opt() && core->verbose() ); + verbose_only( _outputCache = 0); + + verbose_only(Lir::initEngine();) + internalReset(); + pageReset(); + } + + void Assembler::arReset() + { + _activation.highwatermark = 0; + _activation.lowwatermark = 0; + _activation.tos = 0; + + for(uint32_t i=0; ireg; + regs.removeActive(r); + resv->reg = UnknownReg; + + asm_restore(vic, resv, r); + return r; + } + + void Assembler::reserveReset() + { + _resvTable[0].arIndex = 0; + int i; + for(i=1; iarIndex; + r->reg = UnknownReg; + r->arIndex = 0; + if (!item) + setError(ResvFull); + + if (i->isconst() || i->isconstq()) + r->cost = 0; + else if (i->isop(LIR_ld) && + i->oprnd1() == _thisfrag->param0 && + (i->oprnd2()->isconstval(offsetof(avmplus::InterpState,sp)) || + i->oprnd2()->isconstval(offsetof(avmplus::InterpState,rp)))) + r->cost = 2; + else + r->cost = 1; + + i->setresv(item); + return r; + } + + void Assembler::reserveFree(LInsp i) + { + Reservation *rs = getresv(i); + NanoAssert(rs == &_resvTable[i->resv()]); + rs->arIndex = _resvFree; + _resvFree = i->resv(); + i->setresv(0); + } + + void Assembler::internalReset() + { + // readies for a brand spanking new code generation pass. + registerResetAll(); + reserveReset(); + arReset(); + } + + NIns* Assembler::pageAlloc(bool exitPage) + { + Page*& list = (exitPage) ? _nativeExitPages : _nativePages; + Page* page = _frago->pageAlloc(); + if (page) + { + page->next = list; + list = page; + nMarkExecute(page); + } + else + { + // return prior page (to allow overwrites) and mark out of mem + page = list; + setError(OutOMem); + } + return &page->code[sizeof(page->code)/sizeof(NIns)]; // just past the end + } + + void Assembler::pageReset() + { + pagesFree(_nativePages); + pagesFree(_nativeExitPages); + + _nIns = 0; + _nExitIns = 0; + + nativePageReset(); + } + + void Assembler::pagesFree(Page*& page) + { + while(page) + { + Page *next = page->next; // pull next ptr prior to free + _frago->pageFree(page); + page = next; + } + } + + Page* Assembler::handoverPages(bool exitPages) + { + Page*& list = (exitPages) ? _nativeExitPages : _nativePages; + NIns*& ins = (exitPages) ? _nExitIns : _nIns; + Page* start = list; + list = 0; + ins = 0; + return start; + } + + #ifdef _DEBUG + bool Assembler::onPage(NIns* where, bool exitPages) + { + Page* page = (exitPages) ? _nativeExitPages : _nativePages; + bool on = false; + while(page) + { + if (samepage(where-1,page)) + on = true; + page = page->next; + } + return on; + } + + void Assembler::pageValidate() + { + if (error()) return; + // _nIns and _nExitIns need to be at least on + // one of these pages + NanoAssertMsg( onPage(_nIns)&& onPage(_nExitIns,true), "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction"); + } + #endif + + const CallInfo* Assembler::callInfoFor(int32_t fid) + { + NanoAssert(fid < CI_Max); + return &_functions[fid]; + } + + #ifdef _DEBUG + + void Assembler::resourceConsistencyCheck() + { + if (error()) return; + +#ifdef NANOJIT_IA32 + NanoAssert(_allocator.active[FST0] && _fpuStkDepth == -1 || + !_allocator.active[FST0] && _fpuStkDepth == 0); +#endif + + // for tracking resv usage + LIns* resv[NJ_MAX_STACK_ENTRY]; + for(int i=0; iarIndex==0 || r->arIndex==i || (ins->isQuad()&&r->arIndex==i-(stack_direction(1))), "Stack record index mismatch"); + NanoAssertMsg( r->reg==UnknownReg || regs->isConsistent(r->reg,ins), "Register record mismatch"); + } + + registerConsistencyCheck(resv); + + // check resv table + int32_t inuseCount = 0; + int32_t notInuseCount = 0; + for(uint32_t i=1; imanaged; + Register r = FirstReg; + while(managed) + { + if (managed&1) + { + if (regs->isFree(r)) + { + NanoAssert(regs->getActive(r)==0); + } + else + { + LIns* ins = regs->getActive(r); + // @todo we should be able to check across RegAlloc's somehow (to include savedGP...) + Reservation *v = getresv(ins); + NanoAssert(v); + int32_t idx = v - _resvTable; + NanoAssert(idx >= 0 && idx < NJ_MAX_STACK_ENTRY); + resv[idx]=ins; + NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a register assigned to it"); + NanoAssertMsg( v->arIndex==0 || ins==_activation.entry[v->arIndex], "Stack record index mismatch"); + NanoAssertMsg( regs->getActive(v->reg)==ins, "Register record mismatch"); + } + } + + // next register in bitfield + r = nextreg(r); + managed >>= 1; + } + } + #endif /* _DEBUG */ + + void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Reservation* &resva, LIns* ib, Reservation* &resvb) + { + if (ia == ib) + { + findRegFor(ia, allow); + resva = resvb = getresv(ia); + } + else + { + Register rb = UnknownReg; + resvb = getresv(ib); + if (resvb && (rb = resvb->reg) != UnknownReg) + allow &= ~rmask(rb); + Register ra = findRegFor(ia, allow); + resva = getresv(ia); + NanoAssert(error() || (resva != 0 && ra != UnknownReg)); + if (rb == UnknownReg) + { + allow &= ~rmask(ra); + findRegFor(ib, allow); + resvb = getresv(ib); + } + } + } + + Register Assembler::findSpecificRegFor(LIns* i, Register w) + { + return findRegFor(i, rmask(w)); + } + + Register Assembler::findRegFor(LIns* i, RegisterMask allow) + { + Reservation* resv = getresv(i); + Register r; + + if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) { + return r; + } + + RegisterMask prefer = hint(i, allow); + if (!resv) + resv = reserveAlloc(i); + + if ((r=resv->reg) == UnknownReg) + { + if (resv->cost == 2 && (allow&SavedRegs)) + prefer = allow&SavedRegs; + r = resv->reg = registerAlloc(prefer); + _allocator.addActive(r, i); + return r; + } + else + { + // r not allowed + resv->reg = UnknownReg; + _allocator.retire(r); + if (resv->cost == 2 && (allow&SavedRegs)) + prefer = allow&SavedRegs; + Register s = resv->reg = registerAlloc(prefer); + _allocator.addActive(s, i); + if (rmask(r) & GpRegs) { + MR(r, s); + } +#ifdef NANOJIT_IA32 + else if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) { + MOVSD(r, s); + } else { + if (rmask(r) & XmmRegs) { + // x87 -> xmm + NanoAssert(false); + } else { + // xmm -> x87 + NanoAssert(false); + } + } +#endif + return s; + } + } + + int Assembler::findMemFor(LIns *i) + { + Reservation* resv = getresv(i); + if (!resv) + resv = reserveAlloc(i); + if (!resv->arIndex) + resv->arIndex = arReserve(i); + return disp(resv); + } + + Register Assembler::prepResultReg(LIns *i, RegisterMask allow) + { + Reservation* resv = getresv(i); + const bool pop = !resv || resv->reg == UnknownReg; + Register rr = findRegFor(i, allow); + freeRsrcOf(i, pop); + return rr; + } + + void Assembler::freeRsrcOf(LIns *i, bool pop) + { + Reservation* resv = getresv(i); + int index = resv->arIndex; + Register rr = resv->reg; + + if (rr != UnknownReg) + { + asm_spill(i, resv, pop); + _allocator.retire(rr); // free any register associated with entry + } + arFree(index); // free any stack stack space associated with entry + reserveFree(i); // clear fields of entry and add it to free list + } + + void Assembler::evict(Register r) + { + registerAlloc(rmask(r)); + _allocator.addFree(r); + } + + void Assembler::asm_cmp(LIns *cond) + { + LInsp lhs = cond->oprnd1(); + LInsp rhs = cond->oprnd2(); + NanoAssert(!lhs->isQuad() && !rhs->isQuad()); + Reservation *rA, *rB; + + // ready to issue the compare + if (rhs->isconst()) + { + int c = rhs->constval(); + Register r = findRegFor(lhs, GpRegs); + if (c == 0 && cond->isop(LIR_eq)) + TEST(r,r); + else + CMPi(r, c); + } + else + { + findRegFor2(GpRegs, lhs, rA, rhs, rB); + Register ra = rA->reg; + Register rb = rB->reg; + CMP(ra, rb); + } + } + +#ifndef NJ_SOFTFLOAT + void Assembler::asm_fcmp(LIns *cond) + { + LOpcode condop = cond->opcode(); + NanoAssert(condop == LIR_eq || condop == LIR_le || condop == LIR_lt || condop == LIR_gt || condop == LIR_ge); + LIns* lhs = cond->oprnd1(); + LIns* rhs = cond->oprnd2(); + + int mask; + if (condop == LIR_eq) + mask = 0x44; + else if (condop == LIR_le) + mask = 0x41; + else if (condop == LIR_lt) + mask = 0x05; + else if (condop == LIR_ge) { + // swap, use le + LIns* t = lhs; lhs = rhs; rhs = t; + mask = 0x41; + } else { // if (condop == LIR_gt) + // swap, use lt + LIns* t = lhs; lhs = rhs; rhs = t; + mask = 0x05; + } + + if (sse2) + { + // UNORDERED: ZF,PF,CF <- 111; + // GREATER_THAN: ZF,PF,CF <- 000; + // LESS_THAN: ZF,PF,CF <- 001; + // EQUAL: ZF,PF,CF <- 100; + + if (condop == LIR_eq && lhs == rhs) { + // nan check + Register r = findRegFor(lhs, XmmRegs); + UCOMISD(r, r); + } else { + evict(EAX); + TEST_AH(mask); + LAHF(); + Reservation *rA, *rB; + findRegFor2(XmmRegs, lhs, rA, rhs, rB); + UCOMISD(rA->reg, rB->reg); + } + } + else + { + evict(EAX); + TEST_AH(mask); + FNSTSW_AX(); + + NanoAssert(lhs->isQuad() && rhs->isQuad()); + Reservation *rA; + if (lhs != rhs) + { + // compare two different numbers + int d = findMemFor(rhs); + rA = getresv(lhs); + int pop = !rA || rA->reg == UnknownReg; + findSpecificRegFor(lhs, FST0); + // lhs is in ST(0) and rhs is on stack + FCOM(pop, d, FP); + } + else + { + // compare n to itself, this is a NaN test. + rA = getresv(lhs); + int pop = !rA || rA->reg == UnknownReg; + findSpecificRegFor(lhs, FST0); + // value in ST(0) + if (pop) + FCOMPP(); + else + FCOMP(); + FLDr(FST0); // DUP + } + } + } +#endif //NJ_SOFTFLOAT + + void Assembler::patch(GuardRecord *lr) + { + Fragment *frag = lr->target; + NanoAssert(frag->fragEntry); + NIns* was = asm_adjustBranch(lr->jmp, frag->fragEntry); + if (!lr->origTarget) lr->origTarget = was; + verbose_only(verbose_outputf("patching jump at %X to target %X (was %X)\n",(int)lr->jmp,(int)frag->fragEntry,was);) + } + + void Assembler::unpatch(GuardRecord *lr) + { + NIns* was = asm_adjustBranch(lr->jmp, lr->origTarget); + (void)was; + verbose_only(verbose_outputf("unpatching jump at %X to original target %X (was %X)\n",(int)lr->jmp,(int)lr->origTarget,(int)was);) + } + + NIns* Assembler::asm_exit(SideExit *exit) + { + NIns* at = 0; + if (!_branchStateMap->get(exit)) + { + at = asm_leave_trace(exit); + } + else + { + RegAlloc* captured = _branchStateMap->get(exit); + verbose_only(verbose_outputf("merged trunk with branch for SID %d",exit->sid);) + mergeRegisterState(*captured); + verbose_only(verbose_outputf("merging trunk with branch for SID %d",exit->sid);) + at = exit->target->fragEntry; + NanoAssert(at); + } + return at; + } + + NIns* Assembler::asm_leave_trace(SideExit* exit) + { + verbose_only(bool priorVerbose = _verbose; ) + verbose_only( _verbose = verbose_enabled() && _frago->core()->config.verbose_exits; ) + verbose_only( int32_t nativeSave = _stats.native ); + verbose_only(verbose_outputf("--------------------------------------- end exit block SID %d",exit->sid);) + + RegAlloc capture = _allocator; + + // this point is unreachable. so free all the registers. + // if an instruction has a stack entry we will leave it alone, + // otherwise we free it entirely. mergeRegisterState will restore. + releaseRegisters(); + + swapptrs(); + _inExit = true; + + //verbose_only( verbose_outputf(" LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) ); + debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; ) + + GuardRecord *lr = nFragExit(exit); (void)lr; + verbose_only( if (lr) lr->gid = ++gid; ) + + mergeRegisterState(capture); + + // this can be useful for breaking whenever an exit is taken + //INT3(); + //NOP(); + + // we are done producing the exit logic for the guard so demark where our exit block code begins + NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump + + // swap back pointers, effectively storing the last location used in the exit path + swapptrs(); + _inExit = false; + + //verbose_only( verbose_outputf(" LIR_xt/xf swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) ); + verbose_only( verbose_outputf(" %p:",jmpTarget);) + verbose_only( verbose_outputf("--------------------------------------- exit block (LIR_xt|LIR_xf)") ); + +#ifdef NANOJIT_IA32 + NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, ("LIR_xtf, _fpuStkDepth=%d, expect %d\n",_fpuStkDepth, _sv_fpuStkDepth)); + debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; ) +#endif + + verbose_only( _verbose = priorVerbose; ) + verbose_only(_stats.exitnative += (_stats.native-nativeSave)); + + return jmpTarget; + } + + bool Assembler::ignoreInstruction(LInsp ins) + { + LOpcode op = ins->opcode(); + if (ins->isStore() || op == LIR_loop || ins->isArg()) + return false; + return getresv(ins) == 0; + } + + NIns* Assembler::beginAssembly(RegAllocMap* branchStateMap) + { + _activation.lowwatermark = 1; + _activation.tos = _activation.lowwatermark; + _activation.highwatermark = _activation.tos; + + counter_reset(native); + counter_reset(exitnative); + counter_reset(steals); + counter_reset(spills); + counter_reset(remats); + + setError(None); + + // native code gen buffer setup + nativePageSetup(); + + // make sure we got memory at least one page + if (error()) return 0; + + _epilogue = genEpilogue(SavedRegs); + _branchStateMap = branchStateMap; + + verbose_only( verbose_outputf(" %p:",_nIns) ); + verbose_only( verbose_output(" epilogue:") ); + return _epilogue; + } + + NIns* Assembler::assemble(Fragment* frag) + { + if (error()) return 0; + AvmCore *core = _frago->core(); + GC *gc = core->gc; + _thisfrag = frag; + + // set up backwards pipeline: assembler -> StoreFilter -> LirReader + LirReader bufreader(frag->lastIns); + StoreFilter storefilter(&bufreader, gc, this, + frag->param0, frag->sp, frag->rp); + DeadCodeFilter deadfilter(&storefilter, this); + LirFilter* rdr = &deadfilter; + verbose_only( + VerboseBlockReader vbr(rdr, this, frag->lirbuf->names); + if (verbose_enabled()) + rdr = &vbr; + ) + + verbose_only(_thisfrag->compileNbr++; ) + verbose_only(_frago->_stats.compiles++; ) + _latestGuard = 0; + _inExit = false; + NIns* loopJump = gen(rdr); + frag->fragEntry = _nIns; + frag->outbound = core->config.tree_opt? _latestGuard : 0; + //fprintf(stderr, "assemble frag %X entry %X\n", (int)frag, (int)frag->fragEntry); + return loopJump; + } + + NIns* Assembler::endAssembly(Fragment* frag, NInsList& loopJumps) + { + while(!loopJumps.isEmpty()) + { + NIns* loopJump = (NIns*)loopJumps.removeLast(); + nPatchBranch(loopJump, _nIns); + } + + NIns* patchEntry = genPrologue(SavedRegs); + verbose_only( verbose_outputf(" %p:",_nIns); ) + verbose_only( verbose_output(" prologue"); ) + + // something bad happened? + if (!error()) + { + // check for resource leaks + debug_only( + for(uint32_t i=_activation.lowwatermark;i<_activation.highwatermark; i++) { + NanoAssertMsgf(_activation.entry[i] == 0, ("frame entry %d wasn't freed\n",-4*i)); + } + ) + + frag->fragEntry = patchEntry; + frag->setCode(_nIns); + //fprintf(stderr, "endAssembly frag %X entry %X\n", (int)frag, (int)frag->fragEntry); + + } + + AvmAssertMsg(error() || _fpuStkDepth == 0, ("_fpuStkDepth %d\n",_fpuStkDepth)); + + internalReset(); // clear the reservation tables and regalloc + _branchStateMap = 0; + + #ifdef UNDER_CE + // If we've modified the code, we need to flush so we don't end up trying + // to execute junk + FlushInstructionCache(GetCurrentProcess(), NULL, NULL); + #endif + + return patchEntry; + } + + void Assembler::copyRegisters(RegAlloc* copyTo) + { + *copyTo = _allocator; + } + + void Assembler::releaseRegisters() + { + for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) + { + LIns *i = _allocator.getActive(r); + if (i) + { + // clear reg allocation, preserve stack allocation. + Reservation* resv = getresv(i); + NanoAssert(resv != 0); + _allocator.retire(r); + if (r == resv->reg) + resv->reg = UnknownReg; + + if (!resv->arIndex && resv->reg == UnknownReg) + { + reserveFree(i); + } + } + } + } + + NIns* Assembler::gen(LirFilter* reader) + { + NIns *loopJump = 0; + const CallInfo* call = 0; // current call being emitted; if any + uint32_t iargs = 0; + uint32_t fargs = 0; + int32_t stackUsed = 0; // stack space used for call + + // trace must start with LIR_x or LIR_loop + NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop)); + + for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read()) + { + Reservation *rR = getresv(ins); + LOpcode op = ins->opcode(); + switch(op) + { + default: + NanoAssertMsg(false, "unsupported LIR instruction"); + break; + + case LIR_short: + case LIR_int: + { + Register rr = prepResultReg(ins, GpRegs); + int32_t val; + if (op == LIR_int) + val = ins->imm32(); + else + val = ins->imm16(); + if (val == 0) + XOR(rr,rr); + else + LDi(rr, val); + break; + } + case LIR_quad: + { +#ifdef NANOJIT_IA32 + Register rr = rR->reg; + if (rr != UnknownReg) + { + // @todo -- add special-cases for 0 and 1 + _allocator.retire(rr); + rR->reg = UnknownReg; + NanoAssert((rmask(rr) & FpRegs) != 0); + + const double d = ins->constvalf(); + if (rmask(rr) & XmmRegs) { + if (d == 0.0) { + XORPDr(rr, rr); + } else if (d == 1.0) { + // 1.0 is extremely frequent and worth special-casing! + static const double k_ONE = 1.0; + LDSDm(rr, &k_ONE); + } else { + findMemFor(ins); + const int d = disp(rR); + LDQ(rr, d, FP); + } + } else { + if (d == 0.0) { + FLDZ(); + } else if (d == 1.0) { + FLD1(); + } else { + findMemFor(ins); + int d = disp(rR); + FLDQ(d,FP); + } + } + } +#endif + // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here + int d = disp(rR); + freeRsrcOf(ins, false); + if (d) + { + const int32_t* p = (const int32_t*) (ins-2); + STi(FP,d+4,p[1]); + STi(FP,d,p[0]); + } + break; + } + case LIR_callh: + { + // return result of quad-call in register + prepResultReg(ins, rmask(retRegs[1])); + // if hi half was used, we must use the call to ensure it happens + findRegFor(ins->oprnd1(), rmask(retRegs[0])); + break; + } + case LIR_param: + { + Register w = Register(ins->imm8()); + NanoAssert(w != UnknownReg); + // incoming arg in register + prepResultReg(ins, rmask(w)); + break; + } + case LIR_qlo: + { + LIns *q = ins->oprnd1(); +#ifdef NANOJIT_IA32 + if (sse2) { + Reservation *resv = getresv(ins); + Register rr = resv->reg; + if (rr == UnknownReg) { + // store quad in spill loc + int d = disp(resv); + freeRsrcOf(ins, false); + Register qr = findRegFor(q, XmmRegs); + STD(d, FP, qr); + } else { + freeRsrcOf(ins, false); + Register qr = findRegFor(q, XmmRegs); + MOVD(rr,qr); + } + } + else +#endif + { + Register rr = prepResultReg(ins, GpRegs); + int d = findMemFor(q); + LD(rr, d, FP); + } + break; + } + case LIR_qhi: + { + Register rr = prepResultReg(ins, GpRegs); + LIns *q = ins->oprnd1(); + int d = findMemFor(q); + LD(rr, d+4, FP); + break; + } + + case LIR_cmov: + { + LIns* condval = ins->oprnd1(); + NanoAssert(condval->isCmp()); + + LIns* values = ins->oprnd2(); + + // note that 'LIR_eq' is just a placeholder to hold two values... + // can't use the 123 form because we need space for reservation + NanoAssert(values->opcode() == LIR_2); + LIns* iftrue = values->oprnd1(); + LIns* iffalse = values->oprnd2(); + NanoAssert(!iftrue->isQuad() && !iffalse->isQuad()); + + const Register rr = prepResultReg(ins, GpRegs); + + // this code assumes that neither LD nor MR nor MRcc set any of the condition flags. + // (This is true on Intel, is it true on all architectures?) + const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr)); + switch (condval->opcode()) + { + // note that these are all opposites... + case LIR_eq: MRNE(rr, iffalsereg); break; + case LIR_lt: MRGE(rr, iffalsereg); break; + case LIR_le: MRG(rr, iffalsereg); break; + case LIR_gt: MRLE(rr, iffalsereg); break; + case LIR_ge: MRL(rr, iffalsereg); break; + case LIR_ult: MRAE(rr, iffalsereg); break; + case LIR_ule: MRA(rr, iffalsereg); break; + case LIR_ugt: MRBE(rr, iffalsereg); break; + case LIR_uge: MRB(rr, iffalsereg); break; + debug_only( default: NanoAssert(0); break; ) + } + /*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr); + asm_cmp(condval); + break; + } + + case LIR_ld: + case LIR_ldc: + case LIR_ldcb: + { + LIns* base = ins->oprnd1(); + LIns* disp = ins->oprnd2(); + Register rr = prepResultReg(ins, GpRegs); + Register ra = findRegFor(base, GpRegs); + int d = disp->constval(); + if (op == LIR_ldcb) + LD8Z(rr, d, ra); + else + LD(rr, d, ra); + break; + } + + case LIR_ldq: + { + asm_load64(ins); + break; + } + + case LIR_neg: + case LIR_not: + { + Register rr = prepResultReg(ins, GpRegs); + + LIns* lhs = ins->oprnd1(); + Reservation *rA = getresv(lhs); + // if this is last use of lhs in reg, we can re-use result reg + Register ra; + if (rA == 0 || (ra=rA->reg) == UnknownReg) + ra = findSpecificRegFor(lhs, rr); + // else, rA already has a register assigned. + + if (op == LIR_not) + NOT(rr); + else + NEG(rr); + + if ( rr != ra ) + MR(rr,ra); + break; + } + + case LIR_qjoin: + { + asm_qjoin(ins); + break; + } + + case LIR_add: + case LIR_sub: + case LIR_mul: + case LIR_and: + case LIR_or: + case LIR_xor: + case LIR_lsh: + case LIR_rsh: + case LIR_ush: + { + LInsp lhs = ins->oprnd1(); + LInsp rhs = ins->oprnd2(); + + Register rb = UnknownReg; + RegisterMask allow = GpRegs; + if (lhs != rhs && (op == LIR_mul || !rhs->isconst())) + { +#ifdef NANOJIT_IA32 + if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush) + rb = findSpecificRegFor(rhs, ECX); + else +#endif + rb = findRegFor(rhs, allow); + allow &= ~rmask(rb); + } + + Register rr = prepResultReg(ins, allow); + Reservation* rA = getresv(lhs); + Register ra; + // if this is last use of lhs in reg, we can re-use result reg + if (rA == 0 || (ra = rA->reg) == UnknownReg) + ra = findSpecificRegFor(lhs, rr); + // else, rA already has a register assigned. + + if (!rhs->isconst() || op == LIR_mul) + { + if (lhs == rhs) + rb = ra; + + if (op == LIR_add) + ADD(rr, rb); + else if (op == LIR_sub) + SUB(rr, rb); + else if (op == LIR_mul) + MUL(rr, rb); + else if (op == LIR_and) + AND(rr, rb); + else if (op == LIR_or) + OR(rr, rb); + else if (op == LIR_xor) + XOR(rr, rb); + else if (op == LIR_lsh) + SHL(rr, rb); + else if (op == LIR_rsh) + SAR(rr, rb); + else if (op == LIR_ush) + SHR(rr, rb); + else + NanoAssertMsg(0, "Unsupported"); + } + else + { + int c = rhs->constval(); + if (op == LIR_add) { +#ifdef NANOJIT_IA32 + if (ra != rr) { + LEA(rr, c, ra); + ra = rr; // suppress mov + } else +#endif + { + ADDi(rr, c); + } + } else if (op == LIR_sub) { +#ifdef NANOJIT_IA32 + if (ra != rr) { + LEA(rr, -c, ra); + ra = rr; + } else +#endif + { + SUBi(rr, c); + } + } else if (op == LIR_and) + ANDi(rr, c); + else if (op == LIR_or) + ORi(rr, c); + else if (op == LIR_xor) + XORi(rr, c); + else if (op == LIR_lsh) + SHLi(rr, c); + else if (op == LIR_rsh) + SARi(rr, c); + else if (op == LIR_ush) + SHRi(rr, c); + else + NanoAssertMsg(0, "Unsupported"); + } + + if ( rr != ra ) + MR(rr,ra); + break; + } +#ifndef NJ_SOFTFLOAT + case LIR_fneg: + { + if (sse2) + { + LIns *lhs = ins->oprnd1(); + + Register rr = prepResultReg(ins, XmmRegs); + Reservation *rA = getresv(lhs); + Register ra; + + // if this is last use of lhs in reg, we can re-use result reg + if (rA == 0 || (ra = rA->reg) == UnknownReg) + ra = findSpecificRegFor(lhs, rr); + // else, rA already has a register assigned. + + static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0}; + XORPD(rr, negateMask); + + if (rr != ra) + MOVSD(rr, ra); + } + else + { + Register rr = prepResultReg(ins, FpRegs); + + LIns* lhs = ins->oprnd1(); + + // lhs into reg, prefer same reg as result + Reservation* rA = getresv(lhs); + // if this is last use of lhs in reg, we can re-use result reg + if (rA == 0 || rA->reg == UnknownReg) + findSpecificRegFor(lhs, rr); + // else, rA already has a different reg assigned + + NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0); + // assume that the lhs is in ST(0) and rhs is on stack + FCHS(); + + // if we had more than one fpu reg, this is where + // we would move ra into rr if rr != ra. + } + break; + } + case LIR_fadd: + case LIR_fsub: + case LIR_fmul: + case LIR_fdiv: + { + if (sse2) + { + LIns *lhs = ins->oprnd1(); + LIns *rhs = ins->oprnd2(); + + RegisterMask allow = XmmRegs; + Register rb = UnknownReg; + if (lhs != rhs) { + rb = findRegFor(rhs,allow); + allow &= ~rmask(rb); + } + + Register rr = prepResultReg(ins, allow); + Reservation *rA = getresv(lhs); + Register ra; + + // if this is last use of lhs in reg, we can re-use result reg + if (rA == 0 || (ra = rA->reg) == UnknownReg) + ra = findSpecificRegFor(lhs, rr); + // else, rA already has a register assigned. + + if (lhs == rhs) + rb = ra; + + if (op == LIR_fadd) + ADDSD(rr, rb); + else if (op == LIR_fsub) + SUBSD(rr, rb); + else if (op == LIR_fmul) + MULSD(rr, rb); + else //if (op == LIR_fdiv) + DIVSD(rr, rb); + + if (rr != ra) + MOVSD(rr, ra); + } + else + { + // we swap lhs/rhs on purpose here, works out better + // if you only have one fpu reg. use divr/subr. + LIns* rhs = ins->oprnd1(); + LIns* lhs = ins->oprnd2(); + Register rr = prepResultReg(ins, rmask(FST0)); + + // make sure rhs is in memory + int db = findMemFor(rhs); + + // lhs into reg, prefer same reg as result + Reservation* rA = getresv(lhs); + // last use of lhs in reg, can reuse rr + if (rA == 0 || rA->reg == UnknownReg) + findSpecificRegFor(lhs, rr); + // else, rA already has a different reg assigned + + NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0); + // assume that the lhs is in ST(0) and rhs is on stack + if (op == LIR_fadd) + { FADD(db, FP); } + else if (op == LIR_fsub) + { FSUBR(db, FP); } + else if (op == LIR_fmul) + { FMUL(db, FP); } + else if (op == LIR_fdiv) + { FDIVR(db, FP); } + } + break; + } + case LIR_i2f: + { + // where our result goes + Register rr = prepResultReg(ins, FpRegs); + if (rmask(rr) & XmmRegs) + { + // todo support int value in memory + Register gr = findRegFor(ins->oprnd1(), GpRegs); + CVTSI2SD(rr, gr); + } + else + { + int d = findMemFor(ins->oprnd1()); + FILD(d, FP); + } + break; + } + case LIR_u2f: + { + // where our result goes + Register rr = prepResultReg(ins, FpRegs); + const int disp = -8; + const Register base = ESP; + if (rmask(rr) & XmmRegs) + { + // don't call findRegFor, we want a reg we can stomp on for a very short time, + // not a reg that will continue to be associated with the LIns + Register gr = registerAlloc(GpRegs); + + // technique inspired by gcc disassembly + // Edwin explains it: + // + // gr is 0..2^32-1 + // + // sub gr,0x80000000 + // + // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value + // as before + // + // cvtsi2sd rr,gr + // + // rr is now a double with the int value range + // + // addsd rr, 2147483648.0 + // + // adding back double(0x80000000) makes the range 0..2^32-1. + + static const double k_NEGONE = 2147483648.0; + ADDSDm(rr, &k_NEGONE); + CVTSI2SD(rr, gr); + + Reservation* resv = getresv(ins->oprnd1()); + Register xr; + if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs)) + { +#ifdef NANOJIT_IA32 + LEA(gr, 0x80000000, xr); +#else + SUBi(gr, 0x80000000); + MR(gr, xr); +#endif + } + else + { + const int d = findMemFor(ins->oprnd1()); + SUBi(gr, 0x80000000); + LD(gr, d, FP); + } + + // ok, we're done with it + _allocator.addFree(gr); + } + else + { + Register gr = findRegFor(ins->oprnd1(), GpRegs); + NanoAssert(rr == FST0); + FILDQ(disp, base); + STi(base, disp+4, 0); // high 32 bits = 0 + ST(base, disp, gr); // low 32 bits = unsigned value + } + break; + } +#endif // NJ_SOFTFLOAT + case LIR_st: + case LIR_sti: + { + LIns* value = ins->oprnd1(); + LIns* base = ins->oprnd2(); + int dr = ins->immdisp(); + if (!value->isQuad()) + asm_store32(value, dr, base); + else + asm_store64(value, dr, base); + break; + } + case LIR_xt: + case LIR_xf: + { + NIns* exit = asm_exit(ins->exit()); + + // we only support cmp with guard right now, also assume it is 'close' and only emit the branch + LIns* cond = ins->oprnd1(); + LOpcode condop = cond->opcode(); + NanoAssert(cond->isCmp()); +#ifndef NJ_SOFTFLOAT + bool fp = cond->oprnd1()->isQuad(); + + if (fp) + { + if (op == LIR_xf) + JP(exit); + else + JNP(exit); + asm_fcmp(cond); + break; + } +#endif + // produce the branch + if (op == LIR_xf) + { + if (condop == LIR_eq) + JNE(exit); + else if (condop == LIR_lt) + JNL(exit); + else if (condop == LIR_le) + JNLE(exit); + else if (condop == LIR_gt) + JNG(exit); + else if (condop == LIR_ge) + JNGE(exit); + else if (condop == LIR_ult) + JNB(exit); + else if (condop == LIR_ule) + JNBE(exit); + else if (condop == LIR_ugt) + JNA(exit); + else //if (condop == LIR_uge) + JNAE(exit); + } + else // op == LIR_xt + { + if (condop == LIR_eq) + JE(exit); + else if (condop == LIR_lt) + JL(exit); + else if (condop == LIR_le) + JLE(exit); + else if (condop == LIR_gt) + JG(exit); + else if (condop == LIR_ge) + JGE(exit); + else if (condop == LIR_ult) + JB(exit); + else if (condop == LIR_ule) + JBE(exit); + else if (condop == LIR_ugt) + JA(exit); + else //if (condop == LIR_uge) + JAE(exit); + } + asm_cmp(cond); + break; + } + case LIR_x: + { + verbose_only(verbose_output("")); + // generate the side exit branch on the main trace. + NIns *exit = asm_exit(ins->exit()); + JMP( exit ); + break; + } + case LIR_loop: + { + JMP_long_placeholder(); // jump to SOT + verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf(" jmp SOT"); } ); + + loopJump = _nIns; + + #ifdef NJ_VERBOSE + // branching from this frag to ourself. + if (_frago->core()->config.show_stats) + LDi(argRegs[1], int((Fragment*)_thisfrag)); + #endif + + // restore parameter 1, the only one we use + LInsp param0 = _thisfrag->param0; + Register a0 = Register(param0->imm8()); + findSpecificRegFor(param0, a0); + break; + } + case LIR_eq: + case LIR_le: + case LIR_lt: + case LIR_gt: + case LIR_ge: + case LIR_ult: + case LIR_ule: + case LIR_ugt: + case LIR_uge: + { + // only want certain regs + uint32_t allow = AllowableFlagRegs; + + Register r = prepResultReg(ins, allow); + // SETcc only sets low 8 bits, so extend + MOVZX8(r,r); +#ifndef NJ_SOFTFLOAT + if (ins->oprnd1()->isQuad()) + { + SETNP(r); + asm_fcmp(ins); + break; + } +#endif + if (op == LIR_eq) + SETE(r); + else if (op == LIR_lt) + SETL(r); + else if (op == LIR_le) + SETLE(r); + else if (op == LIR_gt) + SETG(r); + else if (op == LIR_ge) + SETGE(r); + else if (op == LIR_ult) + SETB(r); + else if (op == LIR_ule) + SETBE(r); + else if (op == LIR_ugt) + SETA(r); + else // if (op == LIR_uge) + SETAE(r); + asm_cmp(ins); + break; + } + case LIR_ref: + { + // ref arg - use lea + LIns *p = ins->oprnd1(); + if (ins->resv()) + { + // arg in specific reg + Register r = imm2register(ins->resv()); + int da = findMemFor(p); + LEA(r, da, FP); + } + else + { + NanoAssert(0); // not supported + } + ++iargs; + nArgEmitted(call, 0, iargs, fargs); + break; + } + case LIR_arg: + { + LIns* p = ins->oprnd1(); + if (ins->resv()) + { + // arg goes in specific register + Register r = imm2register(ins->resv()); + if (p->isconst()) + LDi(r, p->constval()); + else + findSpecificRegFor(p, r); + } + else + { + asm_pusharg(p); + stackUsed += 1; + } + ++iargs; + nArgEmitted(call, stackUsed, iargs, fargs); + break; + } +#ifdef NANOJIT_IA32 + case LIR_farg: + { + LIns* p = ins->oprnd1(); + Register r = findRegFor(p, FpRegs); + if (rmask(r) & XmmRegs) { + STQ(0, SP, r); + } else { + FSTPQ(0, SP); + } + PUSHr(ECX); // 2*pushr is smaller than sub + PUSHr(ECX); + stackUsed += 2; + ++fargs; + nArgEmitted(call, stackUsed, iargs, fargs); + break; + } +#endif + +#ifndef NJ_SOFTFLOAT + case LIR_fcall: +#endif + case LIR_call: + { + const FunctionID fid = (FunctionID) ins->imm8(); + // bogus assertion: zero is a legal value right now, with fmod() in that slot + // NanoAssertMsg(fid!=0, "Function does not exist in the call table"); + call = &_functions[ fid ]; + iargs = 0; + fargs = 0; + + Register rr = UnknownReg; +#ifndef NJ_SOFTFLOAT + if (op == LIR_fcall) + { + if (rR) { + if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs)) + evict(rr); + } + rr = FST0; + prepResultReg(ins, rmask(rr)); + } + else +#endif + { + rr = retRegs[0]; + prepResultReg(ins, rmask(rr)); + } + + // do this after we've handled the call result, so we dont + // force the call result to be spilled unnecessarily. + restoreCallerSaved(); + + nPostCallCleanup(call); + #ifdef NJ_VERBOSE + CALL(call->_address, call->_name); + #else + CALL(call->_address, ""); + #endif + + stackUsed = 0; + LirReader argReader(reader->pos()); + +#ifdef NANOJIT_ARM + // pre-assign registers R0-R3 for arguments (if they fit) + int regsUsed = 0; + for (LInsp a = argReader.read(); a->isArg(); a = argReader.read()) + { + if (a->isop(LIR_arg) || a->isop(LIR_ref)) + { + a->setresv((int)R0 + 1 + regsUsed); + regsUsed++; + } + if (regsUsed>=4) + break; + } +#endif +#ifdef NANOJIT_IA32 + debug_only( if (rr == FST0) fpu_push(); ) + // make sure fpu stack is empty before call (restoreCallerSaved) + NanoAssert(_allocator.isFree(FST0)); + // note: this code requires that LIR_ref arguments be one of the first two arguments + // pre-assign registers to the first 2 4B args + const uint32_t iargs = call->count_iargs(); + const int max_regs = (iargs < 2) ? iargs : 2; + int n = 0; + for(LIns* a = argReader.read(); a->isArg() && nisop(LIR_arg)||a->isop(LIR_ref)) + { + a->setresv(argRegs[n++]); // tell LIR_arg what reg to use + } + } +#endif + } + } + + // check that all is well (don't check in exit paths since its more complicated) + debug_only( pageValidate(); ) + debug_only( resourceConsistencyCheck(); ) + } + return loopJump; + } + + uint32_t Assembler::arFree(uint32_t idx) + { + if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)]) + _activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles + _activation.entry[idx] = 0; + return 0; + } + +#ifdef NJ_VERBOSE + void Assembler::printActivationState() + { + bool verbose_activation = false; + if (!verbose_activation) + return; + +#ifdef NANOJIT_ARM + verbose_only( + if (_verbose) { + char* s = &outline[0]; + memset(s, ' ', 51); s[51] = '\0'; + s += strlen(s); + sprintf(s, " SP "); + s += strlen(s); + for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) { + LInsp ins = _activation.entry[i]; + if (ins && ins !=_activation.entry[i+1]) { + sprintf(s, "%d(%s) ", 4*i, _thisfrag->lirbuf->names->formatRef(ins)); + s += strlen(s); + } + } + output(&outline[0]); + } + ) +#else + verbose_only( + char* s = &outline[0]; + if (_verbose) { + memset(s, ' ', 51); s[51] = '\0'; + s += strlen(s); + sprintf(s, " ebp "); + s += strlen(s); + + for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) { + LInsp ins = _activation.entry[i]; + if (ins /* && _activation.entry[i]!=_activation.entry[i+1]*/) { + sprintf(s, "%d(%s) ", -4*i,_thisfrag->lirbuf->names->formatRef(ins)); + s += strlen(s); + } + } + output(&outline[0]); + } + ) +#endif + } +#endif + + uint32_t Assembler::arReserve(LIns* l) + { + NanoAssert(!l->isop(LIR_tramp)); + + //verbose_only(printActivationState()); + const bool quad = l->isQuad(); + const int32_t n = _activation.tos; + int32_t start = _activation.lowwatermark; + int32_t i = 0; + NanoAssert(start>0); + if (n >= NJ_MAX_STACK_ENTRY-2) + { + setError(StackFull); + return start; + } + else if (quad) + { + if ( (start&1)==1 ) start++; // even + for(i=start; i <= n; i+=2) + { + if ( (_activation.entry[i+stack_direction(1)] == 0) && (i==n || (_activation.entry[i] == 0)) ) + break; // for fp we need 2 adjacent aligned slots + } + } + else + { + for(i=start; i < n; i++) + { + if (_activation.entry[i] == 0) + break; // not being used + } + } + + int32_t inc = ((i-n+1) < 0) ? 0 : (i-n+1); + if (quad && stack_direction(1)>0) inc++; + _activation.tos += inc; + _activation.highwatermark += inc; + + // place the entry in the table and mark the instruction with it + _activation.entry[i] = l; + if (quad) _activation.entry[i+stack_direction(1)] = l; + return i; + } + + void Assembler::restoreCallerSaved() + { + // generate code to restore callee saved registers + // @todo speed this up + RegisterMask scratch = ~SavedRegs; + for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) + { + if ((rmask(r) & scratch) && _allocator.getActive(r)) + { + evict(r); + } + } + } + + /** + * Merge the current state of the registers with a previously stored version + */ + void Assembler::mergeRegisterState(RegAlloc& saved) + { + // evictions and pops first + RegisterMask skip = 0; + for (Register r=FirstReg; r <= LastReg; r = nextreg(r)) + { + LIns * curins = _allocator.getActive(r); + LIns * savedins = saved.getActive(r); + if (curins == savedins) + { + verbose_only( if (curins) verbose_outputf("skip %s", regNames[r]); ) + skip |= rmask(r); + } + else + { + if (curins) + evict(r); + + #ifdef NANOJIT_IA32 + if (savedins && (rmask(r) & x87Regs)) + FSTP(r); + #endif + } + } + + // now reassign mainline registers + for (Register r=FirstReg; r <= LastReg; r = nextreg(r)) + { + LIns *i = saved.getActive(r); + if (i && !(skip&rmask(r))) + findSpecificRegFor(i, r); + } + debug_only(saved.used = 0); // marker that we are no longer in exit path + } + + /** + * Guard records are laid out in the exit block buffer (_nInsExit), + * intersperced with the code. Preceding the record are the native + * instructions associated with the record (i.e. the exit code). + * + * The layout is as follows: + * + * [ native code ] [ GuardRecord1 ] + * ... + * [ native code ] [ GuardRecordN ] + * + * The guard record 'code' field should be used to locate + * the start of the native code associated with the + * exit block. N.B the code may lie in a different page + * than the guard record + * + * The last guard record is used for the unconditional jump + * at the end of the trace. + * + * NOTE: It is also not guaranteed that the native code + * is contained on a single page. + */ + GuardRecord* Assembler::placeGuardRecord(SideExit *exit) + { + // we align the guards to 4Byte boundary + NIns* ptr = (NIns*)alignTo(_nIns-sizeof(GuardRecord), 4); + underrunProtect( (int)_nIns-(int)ptr ); // either got us a new page or there is enough space for us + GuardRecord* rec = (GuardRecord*) alignTo(_nIns-sizeof(GuardRecord),4); + rec->outgoing = _latestGuard; + _latestGuard = rec; + _nIns = (NIns*)rec; + rec->next = 0; + rec->origTarget = 0; + rec->target = exit->target; + rec->calldepth = exit->calldepth; + rec->from = _thisfrag; + rec->exit = exit; + if (exit->target) + exit->target->addLink(rec); + verbose_only( rec->compileNbr = _thisfrag->compileNbr; ) + return rec; + } + + void Assembler::setCallTable(const CallInfo* functions) + { + _functions = functions; + } + + #ifdef NJ_VERBOSE + char Assembler::outline[8192]; + + void Assembler::outputf(const char* format, ...) + { + va_list args; + va_start(args, format); + outline[0] = '\0'; + vsprintf(outline, format, args); + output(outline); + } + + void Assembler::output(const char* s) + { + if (_outputCache) + { + char* str = (char*)_gc->Alloc(strlen(s)+1); + strcpy(str, s); + _outputCache->add(str); + } + else + { + _frago->core()->console << s << "\n"; + } + } + + void Assembler::output_asm(const char* s) + { + if (!verbose_enabled()) + return; + if (*s != '^') + output(s); + } + + char* Assembler::outputAlign(char *s, int col) + { + int len = strlen(s); + int add = ((col-len)>0) ? col-len : 1; + memset(&s[len], ' ', add); + s[col] = '\0'; + return &s[col]; + } + #endif // verbose + + #endif /* FEATURE_NANOJIT */ + +#if defined(FEATURE_NANOJIT) || defined(NJ_VERBOSE) + uint32_t CallInfo::_count_args(uint32_t mask) const + { + uint32_t argc = 0; + uint32_t argt = _argtypes; + for (int i = 0; i < 5; ++i) + { + argt >>= 2; + argc += (argt & mask) != 0; + } + return argc; + } +#endif + +} diff --git a/js/src/nanojit/Fragmento.cpp b/js/src/nanojit/Fragmento.cpp new file mode 100644 index 00000000000..e22fbb84810 --- /dev/null +++ b/js/src/nanojit/Fragmento.cpp @@ -0,0 +1,617 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is [Open Source Virtual Machine]. + * + * The Initial Developer of the Original Code is + * Adobe System Incorporated. + * Portions created by the Initial Developer are Copyright (C) 2004-2007 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Adobe AS3 Team + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nanojit.h" + +namespace nanojit +{ + #ifdef FEATURE_NANOJIT + + using namespace avmplus; + + /** + * This is the main control center for creating and managing fragments. + */ + Fragmento::Fragmento(AvmCore* core) : _allocList(core->GetGC()) + { + _core = core; + GC *gc = core->GetGC(); + _frags = new (gc) FragmentMap(gc, 128); + _assm = new (gc) nanojit::Assembler(this); + verbose_only( enterCounts = new (gc) BlockHist(gc); ) + verbose_only( mergeCounts = new (gc) BlockHist(gc); ) + } + + Fragmento::~Fragmento() + { + debug_only( clearFrags() ); + NanoAssert(_stats.freePages == _stats.pages); + + _frags->clear(); + while( _allocList.size() > 0 ) + { + //fprintf(stderr,"dealloc %x\n", (intptr_t)_allocList.get(_allocList.size()-1)); + _gcHeap->Free( _allocList.removeLast() ); + } + } + + Page* Fragmento::pageAlloc() + { + NanoAssert(sizeof(Page) == NJ_PAGE_SIZE); + if (!_pageList) + pagesGrow(NJ_PAGES); // try to get more mem + Page *page = _pageList; + if (page) + { + _pageList = page->next; + debug_only(_stats.freePages--;) + } + //fprintf(stderr, "Fragmento::pageAlloc %X, %d free pages of %d\n", (int)page, _stats.freePages, _stats.pages); + debug_only( NanoAssert(pageCount()==_stats.freePages); ) + return page; + } + + void Fragmento::pageFree(Page* page) + { + //fprintf(stderr, "Fragmento::pageFree %X, %d free pages of %d\n", (int)page, _stats.freePages+1, _stats.pages); + + // link in the page + page->next = _pageList; + _pageList = page; + debug_only(_stats.freePages++;) + debug_only( NanoAssert(pageCount()==_stats.freePages); ) + } + + void Fragmento::pagesGrow(int32_t count) + { + NanoAssert(!_pageList); + MMGC_MEM_TYPE("NanojitFragmentoMem"); + Page* memory = 0; + if (NJ_UNLIMITED_GROWTH || _stats.pages < (uint32_t)NJ_PAGES) + { + // @todo nastiness that needs a fix'n + _gcHeap = _core->GetGC()->GetGCHeap(); + NanoAssert(NJ_PAGE_SIZE<=_gcHeap->kNativePageSize); + + // convert NJ_PAGES to gc page count + int32_t gcpages = (count*NJ_PAGE_SIZE) / _gcHeap->kNativePageSize; + MMGC_MEM_TYPE("NanojitMem"); + memory = (Page*)_gcHeap->Alloc(gcpages); + NanoAssert((int*)memory == pageTop(memory)); + //fprintf(stderr,"head alloc of %d at %x of %d pages using nj page size of %d\n", gcpages, (intptr_t)memory, (intptr_t)_gcHeap->kNativePageSize, NJ_PAGE_SIZE); + + // can't add memory if its not addressable from all locations + for(uint32_t i=0; i<_allocList.size(); i++) + { + Page* a = _allocList.get(i); + int32_t delta = (a < memory) ? (intptr_t)memory+(NJ_PAGE_SIZE*(count+1))-(intptr_t)a : (intptr_t)a+(NJ_PAGE_SIZE*(count+1))-(intptr_t)memory; + if ( delta > 16777215 ) + { + // can't use this memory + _gcHeap->Free(memory); + return; + } + } + _allocList.add(memory); + + Page* page = memory; + _pageList = page; + _stats.pages += count; + debug_only(_stats.freePages += count;) + while(--count > 0) + { + Page *next = page + 1; + //fprintf(stderr,"Fragmento::pageGrow adding page %x ; %d\n", (intptr_t)page, count); + page->next = next; + page = next; + } + page->next = 0; + debug_only( NanoAssert(pageCount()==_stats.freePages); ) + //fprintf(stderr,"Fragmento::pageGrow adding page %x ; %d\n", (intptr_t)page, count); + } + } + + void Fragmento::clearFrags() + { + //fprintf(stderr, "Fragmento::clearFrags()\n"); + + while (!_frags->isEmpty()) { + Fragment *f = _frags->removeLast(); + f->clear(); + } + + // reclaim native pages @todo this is to be moved into tree code. + _assm->pageReset(); + + verbose_only( enterCounts->clear();) + verbose_only( mergeCounts->clear();) + verbose_only( _flushes++ ); + } + + Assembler* Fragmento::assm() + { + return _assm; + } + + AvmCore* Fragmento::core() + { + return _core; + } + + Fragment* Fragmento::getLoop(const avmplus::InterpState &is) + { + Fragment* f = _frags->get(is.ip); + if (!f) { + f = newFrag(is); + _frags->put(is.ip, f); + f->anchor = f; + f->kind = LoopTrace; + f->mergeCounts = new (_core->gc) BlockHist(_core->gc); + verbose_only( addLabel(f, "T", _frags->size()); ) + } + return f; + } + +#ifdef NJ_VERBOSE + void Fragmento::addLabel(Fragment *f, const char *prefix, int id) + { + char fragname[20]; + sprintf(fragname,"%s%d", prefix, id); + labels->add(f, sizeof(Fragment), 0, fragname); + } +#endif + + Fragment *Fragmento::getMerge(GuardRecord *lr, const avmplus::InterpState &is) + { + Fragment *anchor = lr->from->anchor; + for (Fragment *f = anchor->branches; f != 0; f = f->nextbranch) { + if (f->kind == MergeTrace && f->frid == is.ip && f->calldepth == lr->calldepth) { + // found existing shared branch on anchor + return f; + } + } + + Fragment *f = newBranch(anchor, is); + f->kind = MergeTrace; + f->calldepth = lr->calldepth; + verbose_only(addLabel(f, "M", ++anchor->mergeid); ) + return f; + } + + Fragment *Fragmento::createBranch(GuardRecord *lr, const avmplus::InterpState &is) + { + Fragment *from = lr->from; + Fragment *f = newBranch(from, is); + f->kind = BranchTrace; + f->calldepth = lr->calldepth; + f->treeBranches = f->anchor->treeBranches; + f->anchor->treeBranches = f; + verbose_only( labels->add(f, sizeof(Fragment), 0, "-"); ); + return f; + } + +#ifdef NJ_VERBOSE + uint32_t Fragmento::pageCount() + { + uint32_t n = 0; + for(Page* page=_pageList; page; page = page->next) + n++; + return n; + } + + void Fragmento::dumpFragStats(Fragment *f, int level, int& size, + uint64_t &traceDur, uint64_t &interpDur) + { + avmplus::String *filep = f->file; + if (!filep) + filep = _core->k_str[avmplus::kstrconst_emptyString]; + avmplus::StringNullTerminatedUTF8 file(_core->gc, filep); + const char *s = file.c_str(); + const char *t = strrchr(s,'\\'); + if (!t) t = strrchr(s,'/'); + if (t) s = t+1; + + char buf[500]; + int namewidth = 35; + sprintf(buf, "%*c%s %.*s:%d", 1+level, ' ', labels->format(f), namewidth, s, f->line); + + int called = f->hits(); + if (called >= 0) + called += f->_called; + else + called = -(1<blacklistLevel) - called - 1; + + uint32_t main = f->_native - f->_exitNative; + + char cause[200]; + if (f->_token && strcmp(f->_token,"loop")==0) + sprintf(cause,"%s %d", f->_token, f->xjumpCount); + else if (f->_token) { + if (f->eot_target) { + sprintf(cause,"%s %s", f->_token, labels->format(f->eot_target)); + } else { + strcpy(cause, f->_token); + } + } + else + cause[0] = 0; + + FOpcodep ip = f->frid; + _assm->outputf("%-*s %7d %6d %6d %6d %4d %9llu %9llu %-12s %s", namewidth, buf, + called, f->guardCount, main, f->_native, f->compileNbr, f->traceTicks/1000, f->interpTicks/1000, + cause, core()->interp.labels->format(ip)); + + size += main; + traceDur += f->traceTicks; + interpDur += f->interpTicks; + + for (Fragment *x = f->branches; x != 0; x = x->nextbranch) + if (x->kind != MergeTrace) + dumpFragStats(x,level+1,size,traceDur,interpDur); + for (Fragment *x = f->branches; x != 0; x = x->nextbranch) + if (x->kind == MergeTrace) + dumpFragStats(x,level+1,size,traceDur,interpDur); + + if (f->anchor == f && f->branches != 0) { + //_assm->outputf("tree size %d ticks %llu",size,dur); + _assm->output(""); + } + } + + class DurData { public: + DurData(): frag(0), traceDur(0), interpDur(0), size(0) {} + DurData(int): frag(0), traceDur(0), interpDur(0), size(0) {} + DurData(Fragment* f, uint64_t td, uint64_t id, int32_t s) + : frag(f), traceDur(td), interpDur(id), size(s) {} + Fragment* frag; + uint64_t traceDur; + uint64_t interpDur; + int32_t size; + }; + + void Fragmento::dumpRatio(const char *label, BlockHist *hist) + { + int total=0, unique=0; + for (int i = 0, n=hist->size(); i < n; i++) { + const void * id = hist->keyAt(i); + int c = hist->get(id); + if (c > 1) { + //_assm->outputf("%d %X", c, id); + unique += 1; + } + else if (c == 1) { + unique += 1; + } + total += c; + } + _assm->outputf("%s total %d unique %d ratio %.1f%", label, total, unique, double(total)/unique); + } + + void Fragmento::dumpStats() + { + bool vsave = _assm->_verbose; + _assm->_verbose = true; + + _assm->output(""); + dumpRatio("inline", enterCounts); + dumpRatio("merges", mergeCounts); + _assm->outputf("abc %d il %d (%.1fx) abc+il %d (%.1fx)", + _stats.abcsize, _stats.ilsize, (double)_stats.ilsize/_stats.abcsize, + _stats.abcsize + _stats.ilsize, + double(_stats.abcsize+_stats.ilsize)/_stats.abcsize); + + int32_t count = _frags->size(); + int32_t pages = _stats.pages; + int32_t free = _stats.freePages; + if (!count) + { + _assm->outputf("No fragments in cache, %d flushes", _flushes); + _assm->_verbose = vsave; + return; + } + + _assm->outputf("\nFragment statistics for %d entries after %d cache flushes of %d pages (%dKB) where %d used and %d free", + count, _flushes, pages, pages<>10, pages-free,free); + _assm->outputf("h=loop header, x=exit trace, L=loop"); + _assm->output(" location calls guards main native gen T-trace T-interp"); + + avmplus::SortedMap durs(_core->gc); + uint64_t totaldur=0; + uint64_t totaltrace=0; + int totalsize=0; + for (int32_t i=0; iat(i); + int size = 0; + uint64_t traceDur=0, interpDur=0; + dumpFragStats(f, 0, size, traceDur, interpDur); + uint64_t bothDur = traceDur + interpDur; + if (bothDur) { + totaltrace += traceDur; + totaldur += bothDur; + totalsize += size; + while (durs.containsKey(bothDur)) bothDur++; + DurData d(f, traceDur, interpDur, size); + durs.put(bothDur, d); + } + } + _assm->outputf(""); + _assm->outputf(" trace interp"); + _assm->outputf("%9lld (%2d%%) %9lld (%2d%%)", + totaltrace/1000, int(100.0*totaltrace/totaldur), + (totaldur-totaltrace)/1000, int(100.0*(totaldur-totaltrace)/totaldur)); + _assm->outputf(""); + _assm->outputf("trace ticks trace interp size"); + for (int32_t i=durs.size()-1; i >= 0; i--) { + uint64_t bothDur = durs.keyAt(i); + DurData d = durs.get(bothDur); + int size = d.size; + _assm->outputf("%-4s %9lld (%2d%%) %9lld (%2d%%) %9lld (%2d%%) %6d (%2d%%)", + labels->format(d.frag), + bothDur/1000, int(100.0*bothDur/totaldur), + d.traceDur/1000, int(100.0*d.traceDur/totaldur), + d.interpDur/1000, int(100.0*d.interpDur/totaldur), + size, int(100.0*size/totalsize)); + } + + _assm->_verbose = vsave; + + } + + void Fragmento::countBlock(BlockHist *hist, FOpcodep ip) + { + int c = hist->count(ip); + if (_assm->_verbose) + _assm->outputf("++ %s %d", core()->interp.labels->format(ip), c); + } + + void Fragmento::countIL(uint32_t il, uint32_t abc) + { + _stats.ilsize += il; + _stats.abcsize += abc; + } +#endif // NJ_VERBOSE + + // + // Fragment + // + Fragment::Fragment(FragID id) : frid(id) + { + // Fragment is a gc object which is zero'd by the GC, no need to clear fields + } + + void Fragment::addLink(GuardRecord* lnk) + { + //fprintf(stderr,"addLink %x from %X target %X\n",(int)lnk,(int)lnk->from,(int)lnk->target); + lnk->next = _links; + _links = lnk; + } + + void Fragment::removeLink(GuardRecord* lnk) + { + GuardRecord* lr = _links; + GuardRecord** lrp = &_links; + while(lr) + { + if (lr == lnk) + { + *lrp = lr->next; + lnk->next = 0; + break; + } + lrp = &(lr->next); + lr = lr->next; + } + } + + void Fragment::link(Assembler* assm) + { + // patch all jumps into this fragment + GuardRecord* lr = _links; + while (lr) + { + GuardRecord* next = lr->next; + Fragment* from = lr->target; + if (from && from->fragEntry) assm->patch(lr); + lr = next; + } + + // and then patch all jumps leading out + lr = outbound; + while(lr) + { + GuardRecord* next = lr->outgoing; + Fragment* targ = lr->target; + if (targ && targ->fragEntry) assm->patch(lr); + lr = next; + } + } + + void Fragment::unlink(Assembler* assm) + { + // remove our guards from others' in-bound list, so they don't patch to us + GuardRecord* lr = outbound; + while (lr) + { + GuardRecord* next = lr->outgoing; + Fragment* targ = lr->target; + if (targ) targ->removeLink(lr); + verbose_only( lr->gid = 0; ) + lr = next; + } + + // then unpatch all jumps into this fragment + lr = _links; + while (lr) + { + GuardRecord* next = lr->next; + Fragment* from = lr->target; + if (from && from->fragEntry) assm->unpatch(lr); + verbose_only( lr->gid = 0; ) + lr = next; + } + } + + bool Fragment::hasOnlyTreeLinks() + { + // check that all incoming links are on the same tree + bool isIt = true; + GuardRecord *lr = _links; + while (lr) + { + GuardRecord *next = lr->next; + NanoAssert(lr->target == this); // def'n of GuardRecord + if (lr->from->anchor != anchor) + { + isIt = false; + break; + } + lr = next; + } + return isIt; + } + + void Fragment::removeIntraLinks() + { + // should only be called on root of tree + NanoAssert(this == anchor); + GuardRecord *lr = _links; + while (lr) + { + GuardRecord *next = lr->next; + NanoAssert(lr->target == this); // def'n of GuardRecord + if (lr->from->anchor == anchor && lr->from->kind != MergeTrace) + removeLink(lr); + lr = next; + } + } + + void Fragment::unlinkBranches(Assembler* /*assm*/) + { + // should only be called on root of tree + NanoAssert(this == anchor); + Fragment* frag = treeBranches; + while(frag) + { + NanoAssert(frag->kind == BranchTrace && frag->hasOnlyTreeLinks()); + frag->_links = 0; + frag->fragEntry = 0; + frag = frag->treeBranches; + } + } + + void Fragment::linkBranches(Assembler* assm) + { + // should only be called on root of tree + NanoAssert(this == anchor); + Fragment* frag = treeBranches; + while(frag) + { + if (frag->fragEntry) frag->link(assm); + frag = frag->treeBranches; + } + } + + void Fragment::blacklist() + { + blacklistLevel++; + _hits = -(1<gc; + Fragment *f = new (gc) Fragment(frid); + f->blacklistLevel = 5; +#ifdef AVMPLUS_VERBOSE + if (interp.f->filename) { + f->line = interp.f->linenum; + f->file = interp.f->filename; + } +#endif + return f; + } + + Fragment *Fragmento::newBranch(Fragment *from, const avmplus::InterpState &interp) + { + Fragment *f = newFrag(interp); + f->anchor = from->anchor; + f->mergeCounts = from->anchor->mergeCounts; + f->xjumpCount = from->xjumpCount; + /*// prepend + f->nextbranch = from->branches; + from->branches = f;*/ + // append + if (!from->branches) { + from->branches = f; + } else { + Fragment *p = from->branches; + while (p->nextbranch != 0) + p = p->nextbranch; + p->nextbranch = f; + } + return f; + } + + void Fragment::clear() + { + if (lirbuf) { + lirbuf->clear(); + lirbuf = 0; + } + lastIns = 0; + } + + void Fragment::removeExit(Fragment *target) + { + if (target && target == branches) { + branches = branches->nextbranch; + // @todo this doesn't seem right : target->clear(); + } else { + for (Fragment *x = branches; x && x->nextbranch; x = x->nextbranch) { + if (target == x->nextbranch) { + x->nextbranch = x->nextbranch->nextbranch; + // @todo this doesn't seem righ : target->clear(); + return; + } + } + } + } + + #endif /* FEATURE_NANOJIT */ +} diff --git a/js/src/nanojit/LIR.cpp b/js/src/nanojit/LIR.cpp new file mode 100755 index 00000000000..aadaa613e65 --- /dev/null +++ b/js/src/nanojit/LIR.cpp @@ -0,0 +1,2058 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is [Open Source Virtual Machine]. + * + * The Initial Developer of the Original Code is + * Adobe System Incorporated. + * Portions created by the Initial Developer are Copyright (C) 2004-2007 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Adobe AS3 Team + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + + +#include "nanojit.h" +#include + +namespace nanojit +{ + using namespace avmplus; + #ifdef FEATURE_NANOJIT + + // @todo -- a lookup table would be better here + uint32_t FASTCALL operandCount(LOpcode op) + { + switch(op) + { + case LIR_trace: + case LIR_skip: + case LIR_tramp: + case LIR_loop: + case LIR_x: + case LIR_short: + case LIR_int: + case LIR_quad: + case LIR_call: + case LIR_fcall: + case LIR_param: + return 0; + + case LIR_callh: + case LIR_arg: + case LIR_ref: + case LIR_farg: + case LIR_not: + case LIR_xt: + case LIR_xf: + case LIR_qlo: + case LIR_qhi: + case LIR_neg: + case LIR_fneg: + case LIR_i2f: + case LIR_u2f: + return 1; + + default: + return 2; + } + } + + // LIR verbose specific + #ifdef NJ_VERBOSE + + void Lir::initEngine() + { + debug_only( { LIns l; l.initOpcode(LIR_last); NanoAssert(l.opcode()>0); } ); + NanoAssert( LIR_last < (1<<8)-1 ); // only 8 bits or the opcode + verbose_only( initVerboseStructures() ); + } + + const char* Lir::_lirNames[LIR_last]; + + void Lir::initVerboseStructures() + { + memset(_lirNames, 0, sizeof(_lirNames)); + + _lirNames[LIR_short] = "short"; + _lirNames[LIR_int] = "int"; + _lirNames[LIR_quad] = "quad"; + _lirNames[LIR_trace] = "trace"; + _lirNames[LIR_skip] = "skip"; + _lirNames[LIR_tramp] = "tramp"; + _lirNames[LIR_loop] = "loop"; + _lirNames[LIR_x] = "x"; + _lirNames[LIR_xt] = "xt"; + _lirNames[LIR_xf] = "xf"; + _lirNames[LIR_eq] = "eq"; + _lirNames[LIR_lt] = "lt"; + _lirNames[LIR_le] = "le"; + _lirNames[LIR_gt] = "gt"; + _lirNames[LIR_ge] = "ge"; + _lirNames[LIR_ult] = "ult"; + _lirNames[LIR_ule] = "ule"; + _lirNames[LIR_ugt] = "ugt"; + _lirNames[LIR_uge] = "uge"; + _lirNames[LIR_neg] = "neg"; + _lirNames[LIR_add] = "add"; + _lirNames[LIR_sub] = "sub"; + _lirNames[LIR_mul] = "mul"; + _lirNames[LIR_and] = "and"; + _lirNames[LIR_or] = "or"; + _lirNames[LIR_xor] = "xor"; + _lirNames[LIR_not] = "not"; + _lirNames[LIR_lsh] = "lsh"; + _lirNames[LIR_rsh] = "rsh"; + _lirNames[LIR_ush] = "ush"; + _lirNames[LIR_fneg] = "fneg"; + _lirNames[LIR_fadd] = "fadd"; + _lirNames[LIR_fsub] = "fsub"; + _lirNames[LIR_fmul] = "fmul"; + _lirNames[LIR_fdiv] = "fdiv"; + _lirNames[LIR_i2f] = "i2f"; + _lirNames[LIR_u2f] = "u2f"; + _lirNames[LIR_ld] = "ld"; + _lirNames[LIR_ldc] = "ldc"; + _lirNames[LIR_ldcb] = "ldcb"; + _lirNames[LIR_cmov] = "cmov"; + _lirNames[LIR_2] = ""; + _lirNames[LIR_ldq] = "ldq"; + _lirNames[LIR_st] = "st"; + _lirNames[LIR_sti] = "sti"; + _lirNames[LIR_arg] = "arg"; + _lirNames[LIR_param] = "param"; + _lirNames[LIR_call] = "call"; + _lirNames[LIR_callh] = "callh"; + _lirNames[LIR_qjoin] = "qjoin"; + _lirNames[LIR_qlo] = "qlo"; + _lirNames[LIR_qhi] = "qhi"; + _lirNames[LIR_ref] = "ref"; + _lirNames[LIR_last]= "???"; + _lirNames[LIR_farg] = "farg"; + _lirNames[LIR_fcall] = "fcall"; + } + #endif /* NANOJIT_VEBROSE */ + + // implementation + +#ifdef NJ_PROFILE + // @todo fixup move to nanojit.h + #undef counter_value + #define counter_value(x) x +#endif /* NJ_PROFILE */ + + //static int32_t buffer_count = 0; + + // LCompressedBuffer + LirBuffer::LirBuffer(Fragmento* frago, const CallInfo* functions) + : _frago(frago), _functions(functions) + { + _start = 0; + clear(); + _start = pageAlloc(); + if (_start) + { + verbose_only(_start->seq = 0;) + _unused = &_start->lir[0]; + } + //buffer_count++; + //fprintf(stderr, "LirBuffer %x start %x count %d\n", (int)this, (int)_start, buffer_count); + } + + LirBuffer::~LirBuffer() + { + //buffer_count--; + //fprintf(stderr, "~LirBuffer %x count %d\n", (int)this, buffer_count); + clear(); + _frago = 0; + } + + void LirBuffer::clear() + { + // free all the memory and clear the stats + debug_only( if (_start) validate();) + while( _start ) + { + Page *next = _start->next; + _frago->pageFree( _start ); + _start = next; + _stats.pages--; + } + NanoAssert(_stats.pages == 0); + _unused = 0; + _stats.lir = 0; + _noMem = 0; + } + + #ifdef _DEBUG + void LirBuffer::validate() const + { + uint32_t count = 0; + Page *last = 0; + Page *page = _start; + while(page) + { + last = page; + page = page->next; + count++; + } + NanoAssert(count == _stats.pages); + NanoAssert(_noMem || _unused->page()->next == 0); + NanoAssert(_noMem || samepage(last,_unused)); + } + #endif + + Page* LirBuffer::pageAlloc() + { + Page* page = _frago->pageAlloc(); + if (page) + { + page->next = 0; // end of list marker for new page + _stats.pages++; + } + else + { + _noMem = 1; + } + return page; + } + + uint32_t LirBuffer::size() + { + debug_only( validate(); ) + return _stats.lir; + } + + LInsp LirBuffer::next() + { + debug_only( validate(); ) + return _unused; + } + + bool LirBuffer::addPage() + { + LInsp last = _unused; + // we need to pull in a new page and stamp the old one with a link to it + Page *lastPage = last->page(); + Page *page = pageAlloc(); + if (page) + { + lastPage->next = page; // forward link to next page + _unused = &page->lir[0]; + verbose_only(page->seq = lastPage->seq+1;) + //fprintf(stderr, "Fragmento::ensureRoom stamping %x with %x; start %x unused %x\n", (int)pageBottom(last), (int)page, (int)_start, (int)_unused); + debug_only( validate(); ) + return true; + } + else { + // mem failure, rewind pointer to top of page so that subsequent instruction works + verbose_only(if (_frago->assm()->_verbose) _frago->assm()->outputf("page alloc failed");) + _unused = &lastPage->lir[0]; + } + return false; + } + + bool LirBufWriter::ensureRoom(uint32_t count) + { + LInsp last = _buf->next(); + if (!samepage(last,last+count) + && _buf->addPage()) + { + // link LIR stream back to prior instruction (careful insFar relies on _unused...) + LInsp next = _buf->next(); + insFar(LIR_skip, last-1-next); + } + return !_buf->outOmem(); + } + + LInsp LirBuffer::commit(uint32_t count) + { + debug_only(validate();) + NanoAssertMsg( samepage(_unused, _unused+count), "You need to call ensureRoom first!" ); + return _unused += count; + } + + uint32_t LIns::reference(LIns *r) + { + int delta = this-r-1; + NanoAssert(isU8(delta)); + return delta; + } + + LInsp LirBufWriter::ensureReferenceable(LInsp i, int32_t addedDistance) + { + if (!i) return 0; + NanoAssert(!i->isop(LIR_tramp)); + LInsp next = _buf->next(); + LInsp from = next + addedDistance; + if ( canReference(from,i) ) + return i; + + // need a trampoline to get to i + LInsp tramp = insFar(LIR_tramp, i-next); + NanoAssert( tramp+tramp->imm24() == i ); + return tramp; + } + + LInsp LirBufWriter::insStore(LInsp o1, LInsp o2, LInsp o3) + { + LOpcode op = LIR_st; + NanoAssert(o1 && o2 && o3); + ensureRoom(4); + LInsp r1 = ensureReferenceable(o1,3); + LInsp r2 = ensureReferenceable(o2,2); + LInsp r3 = ensureReferenceable(o3,1); + + LInsp l = _buf->next(); + l->initOpcode(op); + l->setOprnd1(r1); + l->setOprnd2(r2); + l->setOprnd3(r3); + + _buf->commit(1); + _buf->_stats.lir++; + return l; + } + + LInsp LirBufWriter::insStorei(LInsp o1, LInsp o2, int32_t d) + { + LOpcode op = LIR_sti; + NanoAssert(o1 && o2 && isS8(d)); + ensureRoom(3); + LInsp r1 = ensureReferenceable(o1,2); + LInsp r2 = ensureReferenceable(o2,1); + + LInsp l = _buf->next(); + l->initOpcode(op); + l->setOprnd1(r1); + l->setOprnd2(r2); + l->setDisp(int8_t(d)); + + _buf->commit(1); + _buf->_stats.lir++; + return l; + } + + LInsp LirBufWriter::ins0(LOpcode op) + { + if (!ensureRoom(1)) return 0; + LInsp l = _buf->next(); + l->initOpcode(op); + _buf->commit(1); + _buf->_stats.lir++; + return l; + } + + LInsp LirBufWriter::ins1(LOpcode op, LInsp o1) + { + ensureRoom(2); + LInsp r1 = ensureReferenceable(o1,1); + + LInsp l = _buf->next(); + l->initOpcode(op); + if (r1) + l->setOprnd1(r1); + + _buf->commit(1); + _buf->_stats.lir++; + return l; + } + + LInsp LirBufWriter::ins2(LOpcode op, LInsp o1, LInsp o2) + { + ensureRoom(3); + LInsp r1 = ensureReferenceable(o1,2); + LInsp r2 = ensureReferenceable(o2,1); + + LInsp l = _buf->next(); + l->initOpcode(op); + if (r1) + l->setOprnd1(r1); + if (r2) + l->setOprnd2(r2); + + _buf->commit(1); + _buf->_stats.lir++; + return l; + } + + LInsp LirBufWriter::insLoad(LOpcode op, LInsp base, LInsp d) + { + return ins2(op,base,d); + } + + LInsp LirBufWriter::insGuard(LOpcode op, LInsp c, SideExit *x) + { + LInsp data = skip(sizeof(SideExit)); + *((SideExit*)data->payload()) = *x; + return ins2(op, c, data); + } + + LInsp LirBufWriter::insImm8(LOpcode op, int32_t a, int32_t b) + { + ensureRoom(1); + LInsp l = _buf->next(); + l->initOpcode(op); + l->setimm8(a,b); + + _buf->commit(1); + _buf->_stats.lir++; + return l; + } + + LInsp LirBufWriter::insFar(LOpcode op, int32_t imm) + { + ensureRoom(1); + + LInsp l = _buf->next(); + l->initOpcode(op); + l->setimm24(imm); + + _buf->commit(1); + return l; + } + + LInsp LirBufWriter::insImm(int32_t imm) + { + if (isS16(imm)) { + ensureRoom(1); + LInsp l = _buf->next(); + l->initOpcode(LIR_short); + l->setimm16(imm); + _buf->commit(1); + _buf->_stats.lir++; + return l; + } else { + ensureRoom(2); + int32_t* l = (int32_t*)_buf->next(); + *l = imm; + _buf->commit(1); + return ins0(LIR_int); + } + } + + LInsp LirBufWriter::insImmq(uint64_t imm) + { + ensureRoom(3); + int32_t* l = (int32_t*)_buf->next(); + l[0] = int32_t(imm); + l[1] = int32_t(imm>>32); + _buf->commit(2); + return ins0(LIR_quad); + } + + LInsp LirBufWriter::skip(size_t size) + { + const uint32_t n = (size+sizeof(LIns)-1)/sizeof(LIns); + ensureRoom(n+1); + LInsp i = _buf->next(); + _buf->commit(n); + return insFar(LIR_skip, i-1-_buf->next()); + } + + LInsp LirReader::read() + { + LInsp cur = _i; + if (!cur) + return 0; + LIns* i = cur; + LOpcode iop = i->opcode(); + do + { + switch (iop) + { + default: + i--; + break; + + case LIR_skip: + NanoAssert(i->imm24() != 0); + i += i->imm24(); + break; + + case LIR_int: + NanoAssert(samepage(i, i-2)); + i -= 2; + break; + + case LIR_quad: + NanoAssert(samepage(i,i-3)); + i -= 3; + break; + + case LIR_trace: + _i = 0; // start of trace + return cur; + } + iop = i->opcode(); + } + while (is_trace_skip_tramp(iop)||iop==LIR_2); + _i = i; + return cur; + } + + bool FASTCALL isCmp(LOpcode c) { + return c >= LIR_eq && c <= LIR_uge; + } + + bool LIns::isCmp() const { + return nanojit::isCmp(u.code); + } + + bool LIns::isCall() const + { + return (u.code&~LIR64) == LIR_call; + } + + bool LIns::isGuard() const + { + return u.code==LIR_x || u.code==LIR_xf || u.code==LIR_xt; + } + + bool LIns::isStore() const + { + return u.code == LIR_st || u.code == LIR_sti; + } + + bool LIns::isLoad() const + { + return u.code == LIR_ldq || u.code == LIR_ld || u.code == LIR_ldc; + } + + bool LIns::isconst() const + { + return (opcode()&~1) == LIR_short; + } + + bool LIns::isconstval(int32_t val) const + { + return isconst() && constval()==val; + } + + bool LIns::isconstq() const + { + return isop(LIR_quad); + } + + bool FASTCALL isCse(LOpcode op) { + op = LOpcode(op & ~LIR64); + return op >= LIR_cmov && op <= LIR_uge; + } + + bool LIns::isCse(const CallInfo *functions) const + { + return nanojit::isCse(u.code) || isCall() && functions[imm8()]._cse; + } + + void LIns::setimm8(int32_t a, int32_t b) + { + NanoAssert(isS8(a) && isS8(b)); + c.imm8a = int8_t(a); + c.imm8b = int8_t(b); + } + + void LIns::setimm16(int32_t x) + { + NanoAssert(isS16(x)); + i.imm16 = int16_t(x); + } + + void LIns::setimm24(int32_t x) + { + t.imm24 = x; + } + + void LIns::setresv(uint32_t resv) + { + NanoAssert(isU8(resv)); + g.resv = resv; + } + + void LIns::initOpcode(LOpcode op) + { + t.code = op; + t.imm24 = 0; + } + + void LIns::setOprnd1(LInsp r) + { + u.oprnd_1 = reference(r); + } + + void LIns::setOprnd2(LInsp r) + { + u.oprnd_2 = reference(r); + } + + void LIns::setOprnd3(LInsp r) + { + u.oprnd_3 = reference(r); + } + + void LIns::setDisp(int8_t d) + { + sti.disp = d; + } + + LInsp LIns::oprnd1() const + { + LInsp i = (LInsp) this - u.oprnd_1 - 1; + if (i->isop(LIR_tramp)) + { + i += i->imm24(); + if (i->isop(LIR_tramp)) + i += i->imm24(); + } + return i; + } + + LInsp LIns::oprnd2() const + { + LInsp i = (LInsp) this - u.oprnd_2 - 1; + if (i->isop(LIR_tramp)) + { + i += i->imm24(); + if (i->isop(LIR_tramp)) + i += i->imm24(); + } + return i; + } + + LInsp LIns::oprnd3() const + { + LInsp i = (LInsp) this - u.oprnd_3 - 1; + if (i->isop(LIR_tramp)) + { + i += i->imm24(); + if (i->isop(LIR_tramp)) + i += i->imm24(); + } + return i; + } + + void *LIns::payload() const + { + NanoAssert(opcode() == LIR_skip); + return (void*) (this+imm24()+1); + } + + LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm) + { + return ins2(v, oprnd1, insImm(imm)); + } + + bool insIsS16(LInsp i) + { + if (i->isconst()) { + int c = i->constval(); + return isS16(c); + } + if (i->isop(LIR_cmov)) { + LInsp vals = i->oprnd2(); + return insIsS16(vals->oprnd1()) && insIsS16(vals->oprnd2()); + } + if (i->isCmp()) + return true; + // many other possibilities too. + return false; + } + + LIns* ExprFilter::ins1(LOpcode v, LIns* i) + { + if (v == LIR_qlo) { + if (i->isconstq()) + return insImm(int32_t(i->constvalq())); + if (i->isop(LIR_qjoin)) + return i->oprnd1(); + } + else if (v == LIR_qhi) { + if (i->isconstq()) + return insImm(int32_t(i->constvalq()>>32)); + if (i->isop(LIR_qjoin)) + return i->oprnd2(); + } + else if (v == i->opcode() && (v == LIR_not || v == LIR_neg || v == LIR_fneg)) { + return i->oprnd1(); + } + + // todo + // -(a-b) = b-a + + return out->ins1(v, i); + } + + LIns* ExprFilter::ins2(LOpcode v, LIns* oprnd1, LIns* oprnd2) + { + NanoAssert(oprnd1 && oprnd2); + if (v == LIR_cmov) { + if (oprnd2->oprnd1() == oprnd2->oprnd2()) { + // c ? a : a => a + return oprnd2->oprnd1(); + } + } + if (oprnd1 == oprnd2) + { + if (v == LIR_xor || v == LIR_sub || + !oprnd1->isQuad() && (v == LIR_ult || v == LIR_ugt || v == LIR_gt || v == LIR_lt)) + return insImm(0); + if (v == LIR_or || v == LIR_and) + return oprnd1; + if (!oprnd1->isQuad() && (v == LIR_le || v == LIR_ule || v == LIR_ge || v == LIR_uge)) { + // x <= x == 1; x >= x == 1 + return insImm(1); + } + } + if (oprnd1->isconst() && oprnd2->isconst()) + { + int c1 = oprnd1->constval(); + int c2 = oprnd2->constval(); + if (v == LIR_qjoin) { + uint64_t q = c1 | uint64_t(c2)<<32; + return insImmq(q); + } + if (v == LIR_eq) + return insImm(c1 == c2); + if (v == LIR_lt) + return insImm(c1 < c2); + if (v == LIR_gt) + return insImm(c1 > c2); + if (v == LIR_le) + return insImm(c1 <= c2); + if (v == LIR_ge) + return insImm(c1 >= c2); + if (v == LIR_ult) + return insImm(uint32_t(c1) < uint32_t(c2)); + if (v == LIR_ugt) + return insImm(uint32_t(c1) > uint32_t(c2)); + if (v == LIR_ule) + return insImm(uint32_t(c1) <= uint32_t(c2)); + if (v == LIR_uge) + return insImm(uint32_t(c1) >= uint32_t(c2)); + if (v == LIR_rsh) + return insImm(int32_t(c1) >> int32_t(c2)); + if (v == LIR_lsh) + return insImm(int32_t(c1) << int32_t(c2)); + if (v == LIR_ush) + return insImm(uint32_t(c1) >> int32_t(c2)); + } + else if (oprnd1->isconstq() && oprnd2->isconstq()) + { + double c1 = oprnd1->constvalf(); + double c2 = oprnd1->constvalf(); + if (v == LIR_eq) + return insImm(c1 == c2); + if (v == LIR_lt) + return insImm(c1 < c2); + if (v == LIR_gt) + return insImm(c1 > c2); + if (v == LIR_le) + return insImm(c1 <= c2); + if (v == LIR_ge) + return insImm(c1 >= c2); + } + else if (oprnd1->isconst() && !oprnd2->isconst()) + { + if (v == LIR_add || v == LIR_mul || + v == LIR_fadd || v == LIR_fmul || + v == LIR_xor || v == LIR_or || v == LIR_and || + v == LIR_eq) { + // move const to rhs + LIns* t = oprnd2; + oprnd2 = oprnd1; + oprnd1 = t; + } + else if (v >= LIR_lt && v <= LIR_uge && !oprnd2->isQuad()) { + // move const to rhs, swap the operator + LIns *t = oprnd2; + oprnd2 = oprnd1; + oprnd1 = t; + v = LOpcode(v^1); + } + else if (v == LIR_cmov) { + // const ? x : y => return x or y depending on const + return oprnd1->constval() ? oprnd2->oprnd1() : oprnd2->oprnd2(); + } + } + + if (oprnd2->isconst()) + { + int c = oprnd2->constval(); + if (v == LIR_add && oprnd1->isop(LIR_add) && oprnd1->oprnd2()->isconst()) { + // add(add(x,c1),c2) => add(x,c1+c2) + c += oprnd1->oprnd2()->constval(); + oprnd2 = insImm(c); + oprnd1 = oprnd1->oprnd1(); + } + else if (v == LIR_sub && oprnd1->isop(LIR_add) && oprnd1->oprnd2()->isconst()) { + // sub(add(x,c1),c2) => add(x,c1-c2) + c = oprnd1->oprnd2()->constval() - c; + oprnd2 = insImm(c); + oprnd1 = oprnd1->oprnd1(); + v = LIR_add; + } + else if (v == LIR_rsh && c == 16 && oprnd1->isop(LIR_lsh) && + oprnd1->oprnd2()->isconstval(16)) { + if (insIsS16(oprnd1->oprnd1())) { + // rsh(lhs(x,16),16) == x, if x is S16 + return oprnd1->oprnd1(); + } + } + else if (v == LIR_ult) { + if (oprnd1->isop(LIR_cmov)) { + LInsp a = oprnd1->oprnd2()->oprnd1(); + LInsp b = oprnd1->oprnd2()->oprnd2(); + if (a->isconst() && b->isconst()) { + bool a_lt = uint32_t(a->constval()) < uint32_t(oprnd2->constval()); + bool b_lt = uint32_t(b->constval()) < uint32_t(oprnd2->constval()); + if (a_lt == b_lt) + return insImm(a_lt); + } + } + } + + if (c == 0) + { + if (v == LIR_add || v == LIR_or || v == LIR_xor || + v == LIR_sub || v == LIR_lsh || v == LIR_rsh || v == LIR_ush) + return oprnd1; + else if (v == LIR_and || v == LIR_mul) + return oprnd2; + else if (v == LIR_eq && oprnd1->isop(LIR_or) && + oprnd1->oprnd2()->isconst() && + oprnd1->oprnd2()->constval() != 0) { + // (x or c) != 0 if c != 0 + return insImm(0); + } + } + else if (c == -1 || c == 1 && oprnd1->isCmp()) { + if (v == LIR_or) { + // x | -1 = -1, cmp | 1 = 1 + return oprnd2; + } + else if (v == LIR_and) { + // x & -1 = x, cmp & 1 = cmp + return oprnd1; + } + } + } + + LInsp i; + if (v == LIR_qjoin && oprnd1->isop(LIR_qlo) && oprnd2->isop(LIR_qhi) + && (i = oprnd1->oprnd1()) == oprnd1->oprnd1()) { + // qjoin(qlo(x),qhi(x)) == x + return i; + } + + return out->ins2(v, oprnd1, oprnd2); + } + + LIns* ExprFilter::insGuard(LOpcode v, LInsp c, SideExit *x) + { + if (v != LIR_x) { + if (c->isconst()) { + if (v == LIR_xt && !c->constval() || v == LIR_xf && c->constval()) { + return 0; // no guard needed + } + else { + // need a way to EOT now, since this is trace end. + return out->insGuard(LIR_x, 0, x); + } + } + else { + while (c->isop(LIR_eq) && c->oprnd1()->isCmp() && + c->oprnd2()->isconstval(0)) { + // xt(eq(cmp,0)) => xf(cmp) or xf(eq(cmp,0)) => xt(cmp) + v = LOpcode(v^1); + c = c->oprnd1(); + } + } + } + return out->insGuard(v, c, x); + } + + LIns* LirWriter::insLoadi(LIns *base, int disp) + { + return insLoad(LIR_ld,base,disp); + } + + LIns* LirWriter::insLoad(LOpcode op, LIns *base, int disp) + { + return insLoad(op, base, insImm(disp)); + } + + LIns* LirWriter::ins_eq0(LIns* oprnd1) + { + return ins2i(LIR_eq, oprnd1, 0); + } + + LIns* LirWriter::qjoin(LInsp lo, LInsp hi) + { + return ins2(LIR_qjoin, lo, hi); + } + + LIns* LirWriter::ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse, bool hasConditionalMove) + { + // if not a conditional, make it implicitly an ==0 test (then flop results) + if (!cond->isCmp()) + { + cond = ins_eq0(cond); + LInsp tmp = iftrue; + iftrue = iffalse; + iffalse = tmp; + } + + if (hasConditionalMove) + { + return ins2(LIR_cmov, cond, ins2(LIR_2, iftrue, iffalse)); + } + + // @todo -- it might be better to use a short conditional branch rather than + // the bit-twiddling on systems that don't provide a conditional move instruction. + LInsp ncond = ins1(LIR_neg, cond); // cond ? -1 : 0 + return ins2(LIR_or, + ins2(LIR_and, iftrue, ncond), + ins2(LIR_and, iffalse, ins1(LIR_not, ncond))); + } + + LIns* LirBufWriter::insCall(int32_t fid, LInsp args[]) + { + static const LOpcode k_argmap[] = { LIR_farg, LIR_arg, LIR_ref }; + static const LOpcode k_callmap[] = { LIR_call, LIR_fcall, LIR_call, LIR_callh }; + + const CallInfo& ci = _functions[fid]; + uint32_t argt = ci._argtypes; + int32_t argc = ci.count_args(); + const uint32_t ret = argt & 3; + LOpcode op = k_callmap[ret]; + //printf(" ret is type %d %s\n", ret, Lir::_lirNames[op]); + +#ifdef NJ_SOFTFLOAT + if (op == LIR_fcall) + op = LIR_callh; + LInsp args2[5*2]; // arm could require 2 args per double + int32_t j = 0; + uint32_t argt2 = argt&3; // copy of return type + for (int32_t i = 0; i < argc; i++) { + argt >>= 2; + uint32_t a = argt&3; + if (a == ARGSIZE_F) { + LInsp q = args[i]; + args2[j++] = ins1(LIR_qhi, q); + argt2 |= ARGSIZE_LO << (j*2); + args2[j++] = ins1(LIR_qlo, q); + argt2 |= ARGSIZE_LO << (j*2); + } else { + args2[j++] = args[i]; + argt2 |= a << (j*2); + } + } + args = args2; + argt = argt2; + argc = j; +#endif + + for (int32_t i = 0; i < argc; i++) { + argt >>= 2; + AvmAssert((argt&3)!=0); + ins1(k_argmap[(argt&3)-1], args[i]); + } + + return insImm8(op==LIR_callh ? LIR_call : op, fid, argc); + } + + /* +#ifdef AVMPLUS_VERBOSE + void printTracker(const char* s, RegionTracker& trk, Assembler* assm) + { + assm->outputf("%s tracker width %d starting %X zeroth %X indexOf(starting) %d", s, trk.width, trk.starting, trk.zeroth, trk.indexOf(trk.starting)); + assm->output_ins(" location ", trk.location); + for(int k=0;koutputf(" [%d]", k+1); + assm->output_ins(" val ", trk.element[k]); + } + } + } +#endif + + LInsp adjustTracker(RegionTracker& trk, int32_t adj, LInsp i) + { + int32_t d = i->immdisp(); + LInsp unaligned = 0; + if ( d&((1<oprnd1(); + trk.set(at, v); + } + } + return unaligned; + } + + void trackersAtExit(SideExit* exit, RegionTracker& rtrk, RegionTracker& strk, Assembler *assm) + { + (void)assm; + int32_t s_adj=(int32_t)strk.starting, r_adj=(int32_t)rtrk.starting; + Fragment* frag = exit->from; + LInsp param0=frag->param0, sp=frag->sp, rp=frag->rp; + LirReader *r = frag->lirbuf->reader(); + AvmCore *core = frag->lirbuf->_frago->core(); + InsList live(core->gc); + + rtrk.location->setresv(0); + strk.location->setresv(0); + + verbose_only(if (assm->_verbose) assm->output_ins("Reconstituting region trackers, starting from ", exit->ins);) + + LInsp i = 0; + bool checkLive = true; +#if 0 + // @todo needed for partial tree compile + bool checkLive = true; + + // build a list of known live-valued instructions at the exit + verbose_only(if (assm->_verbose) assm->output(" compile-time live values at exit");) + LInsp i = r->setPos(exit->arAtExit); + while(i) + { + if (i->isop(LIR_2)) + { + LInsp t = i->oprnd1(); + if (live.indexOf(t)<0) + { + verbose_only(if (assm->_verbose) assm->output_ins(" ", t);) + live.add(t); + } + } + i = r->previous(); + } +#endif + + // traverse backward starting from the exit instruction + i = r->setPos(exit->ins); + while(i) + { + if (i->isStore()) + { + LInsp base = i->oprnd2(); + if (base == param0) + { + // update stop/rstop + int32_t d = i->immdisp(); + if (d == offsetof(InterpState,sp)) + { + s_adj += i->oprnd1()->oprnd2()->constval(); + } + else if (d == offsetof(InterpState,rp)) + { + r_adj += i->oprnd1()->oprnd2()->constval(); + } + } + else if (base == sp) + { + LInsp what = i->oprnd1(); + bool imm = what->isconst() || what->isconstq(); + if (!checkLive || (imm || live.indexOf(what)>=0)) + { + verbose_only(if (assm->_verbose) assm->output_ins(" strk-adding ", i);) + adjustTracker(strk, s_adj, i); + } + else + { + verbose_only(if (assm->_verbose) assm->output_ins(" strk-ignoring ", i);) + } + } + else if (base == rp) + { + LInsp what = i->oprnd1(); + bool imm = what->isconst() || what->isconstq(); + if (!checkLive || imm || live.indexOf(what)) + { + verbose_only(if (assm->_verbose) assm->output_ins(" rtrk-adding ", i);) + adjustTracker(rtrk, r_adj, i); + } + else + { + verbose_only(if (assm->_verbose) assm->output_ins(" rtrk-adding ", i);) + } + } + } + i = r->previous(); + } + + verbose_only(if (assm->_verbose) { printTracker("rtrk", rtrk,assm); } ) + verbose_only(if (assm->_verbose) { printTracker("strk", strk,assm); } ) + } + */ + + using namespace avmplus; + + StoreFilter::StoreFilter(LirFilter *in, GC *gc, Assembler *assm, LInsp p0, LInsp sp, LInsp rp) + : LirFilter(in), gc(gc), assm(assm), param0(p0), sp(sp), rp(rp), stop(0), rtop(0) + {} + + LInsp StoreFilter::read() + { + for (;;) + { + LInsp i = in->read(); + if (!i) + return i; + bool remove = false; + if (i->isStore()) + { + LInsp base = i->oprnd2(); + if (base == param0) + { + // update stop/rstop + int d = i->immdisp(); + if (d == offsetof(InterpState,sp)) { + stop = i->oprnd1()->oprnd2()->constval() >> 2; + NanoAssert(!(stop&1)); + } + else if (d == offsetof(InterpState,rp)) + rtop = i->oprnd1()->oprnd2()->constval() >> 2; + } + else if (base == sp) + { + LInsp v = i->oprnd1(); + int d = i->immdisp() >> 2; + int top = stop+2; + if (d >= top) { + remove = true; + } else { + d = top - d; + if (v->isQuad()) { + // storing 8 bytes + if (stk.get(d) && stk.get(d-1)) { + remove = true; + } else { + stk.set(gc, d); + stk.set(gc, d-1); + } + } + else { + // storing 4 bytes + if (stk.get(d)) + remove = true; + else + stk.set(gc, d); + } + } + } + else if (base == rp) + { + int d = i->immdisp() >> 2; + if (d >= rtop) { + remove = true; + } else { + d = rtop - d; + if (rstk.get(d)) + remove = true; + else + rstk.set(gc, d); + } + } + } + else if (i->isGuard()) + { + rstk.reset(); + stk.reset(); + SideExit *exit = i->exit(); + stop = exit->sp_adj >> 2; + rtop = exit->rp_adj >> 2; + NanoAssert(!(stop&1)); + } + if (!remove) + return i; + } + } + + // + // inlined/separated version of SuperFastHash + // This content is copyrighted by Paul Hsieh, For reference see : http://www.azillionmonkeys.com/qed/hash.html + // + inline uint32_t _hash8(uint32_t hash, const uint8_t data) + { + hash += data; + hash ^= hash << 10; + hash += hash >> 1; + return hash; + } + + inline uint32_t _hash32(uint32_t hash, const uint32_t data) + { + const uint32_t dlo = data & 0xffff; + const uint32_t dhi = data >> 16; + hash += dlo; + const uint32_t tmp = (dhi << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; + return hash; + } + + inline uint32_t _hashptr(uint32_t hash, const void* data) + { +#ifdef AVMPLUS_64BIT + hash = _hash32(hash, uint32_t(uintptr_t(data) >> 32)); + hash = _hash32(hash, uint32_t(uintptr_t(data))); + return hash; +#else + return _hash32(hash, uint32_t(data)); +#endif + } + + inline uint32_t _hashfinish(uint32_t hash) + { + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; + } + + LInsHashSet::LInsHashSet(GC* gc) : + m_list(gc, kInitialCap), m_used(0), m_gc(gc) + { + m_list.set(kInitialCap-1, 0); + } + + /*static*/ uint32_t FASTCALL LInsHashSet::hashcode(LInsp i) + { + const LOpcode op = i->opcode(); + switch (op) + { + case LIR_short: + return hashimm(i->imm16()); + case LIR_int: + return hashimm(i->imm32()); + case LIR_quad: + return hashimmq(i->constvalq()); + case LIR_call: + case LIR_fcall: + { + LInsp args[10]; + int32_t argc = i->imm8b(); + NanoAssert(argc < 10); + LirReader ri(i); + for (int32_t j=argc; j > 0; ) + args[--j] = ri.previous()->oprnd1(); + return hashcall(i->imm8(), argc, args); + } + default: + if (operandCount(op) == 2) + return hash2(op, i->oprnd1(), i->oprnd2()); + else + return hash1(op, i->oprnd1()); + } + } + + /*static*/ bool FASTCALL LInsHashSet::equals(LInsp a, LInsp b) + { + if (a==b) + return true; + AvmAssert(a->opcode() == b->opcode()); + const LOpcode op = a->opcode(); + switch (op) + { + case LIR_short: + { + return a->imm16() == b->imm16(); + } + case LIR_int: + { + return a->imm32() == b->imm32(); + } + case LIR_quad: + { + return a->constvalq() == b->constvalq(); + } + case LIR_call: + case LIR_fcall: + { + uint32_t argc; + if (a->imm8() != b->imm8()) return false; + if ((argc=a->imm8b()) != b->imm8b()) return false; + LirReader ra(a), rb(b); + while (argc-- > 0) + if (ra.previous()->oprnd1() != rb.previous()->oprnd1()) + return false; + return true; + } + default: + { + const uint32_t count = operandCount(op); + if ((count >= 1 && a->oprnd1() != b->oprnd1()) || + (count >= 2 && a->oprnd2() != b->oprnd2())) + return false; + return true; + } + } + } + + void FASTCALL LInsHashSet::grow() + { + const uint32_t newcap = m_list.size() << 1; + InsList newlist(m_gc, newcap); + newlist.set(newcap-1, 0); + for (uint32_t i=0, n=m_list.size(); i < n; i++) + { + LInsp name = m_list.get(i); + if (!name) continue; + uint32_t j = find(name, hashcode(name), newlist, newcap); + newlist.set(j, name); + } + m_list.become(newlist); + } + + uint32_t FASTCALL LInsHashSet::find(LInsp name, uint32_t hash, const InsList& list, uint32_t cap) + { + const uint32_t bitmask = (cap - 1) & ~0x1; + + uint32_t n = 7 << 1; + hash &= bitmask; + LInsp k; + while ((k = list.get(hash)) != NULL && + (!LIns::sameop(k,name) || !equals(k, name))) + { + hash = (hash + (n += 2)) & bitmask; // quadratic probe + } + return hash; + } + + LInsp LInsHashSet::add(LInsp name, uint32_t k) + { + // this is relatively short-lived so let's try a more aggressive load factor + // in the interest of improving performance + if (((m_used+1)<<1) >= m_list.size()) // 0.50 + { + grow(); + k = find(name, hashcode(name), m_list, m_list.size()); + } + NanoAssert(!m_list.get(k)); + m_used++; + m_list.set(k, name); + return name; + } + + uint32_t LInsHashSet::hashimm(int32_t a) { + return _hashfinish(_hash32(0,a)); + } + + uint32_t LInsHashSet::hashimmq(uint64_t a) { + uint32_t hash = _hash32(0, uint32_t(a >> 32)); + return _hashfinish(_hash32(hash, uint32_t(a))); + } + + uint32_t LInsHashSet::hash1(LOpcode op, LInsp a) { + uint32_t hash = _hash8(0,uint8_t(op)); + return _hashfinish(_hashptr(hash, a)); + } + + uint32_t LInsHashSet::hash2(LOpcode op, LInsp a, LInsp b) { + uint32_t hash = _hash8(0,uint8_t(op)); + hash = _hashptr(hash, a); + return _hashfinish(_hashptr(hash, b)); + } + + uint32_t LInsHashSet::hashcall(int32_t fid, uint32_t argc, LInsp args[]) { + uint32_t hash = _hash32(0,fid); + for (int32_t j=argc-1; j >= 0; j--) + hash = _hashptr(hash,args[j]); + return _hashfinish(hash); + } + + LInsp LInsHashSet::find32(int32_t a, uint32_t &i) + { + uint32_t cap = m_list.size(); + const InsList& list = m_list; + const uint32_t bitmask = (cap - 1) & ~0x1; + uint32_t hash = hashimm(a) & bitmask; + uint32_t n = 7 << 1; + LInsp k; + while ((k = list.get(hash)) != NULL && + (!k->isconst() || k->constval() != a)) + { + hash = (hash + (n += 2)) & bitmask; // quadratic probe + } + i = hash; + return k; + } + + LInsp LInsHashSet::find64(uint64_t a, uint32_t &i) + { + uint32_t cap = m_list.size(); + const InsList& list = m_list; + const uint32_t bitmask = (cap - 1) & ~0x1; + uint32_t hash = hashimmq(a) & bitmask; + uint32_t n = 7 << 1; + LInsp k; + while ((k = list.get(hash)) != NULL && + (!k->isconstq() || k->constvalq() != a)) + { + hash = (hash + (n += 2)) & bitmask; // quadratic probe + } + i = hash; + return k; + } + + LInsp LInsHashSet::find1(LOpcode op, LInsp a, uint32_t &i) + { + uint32_t cap = m_list.size(); + const InsList& list = m_list; + const uint32_t bitmask = (cap - 1) & ~0x1; + uint32_t hash = hash1(op,a) & bitmask; + uint32_t n = 7 << 1; + LInsp k; + while ((k = list.get(hash)) != NULL && + (k->opcode() != op || k->oprnd1() != a)) + { + hash = (hash + (n += 2)) & bitmask; // quadratic probe + } + i = hash; + return k; + } + + LInsp LInsHashSet::find2(LOpcode op, LInsp a, LInsp b, uint32_t &i) + { + uint32_t cap = m_list.size(); + const InsList& list = m_list; + const uint32_t bitmask = (cap - 1) & ~0x1; + uint32_t hash = hash2(op,a,b) & bitmask; + uint32_t n = 7 << 1; + LInsp k; + while ((k = list.get(hash)) != NULL && + (k->opcode() != op || k->oprnd1() != a || k->oprnd2() != b)) + { + hash = (hash + (n += 2)) & bitmask; // quadratic probe + } + i = hash; + return k; + } + + bool argsmatch(LInsp i, uint32_t argc, LInsp args[]) + { + // we don't have callinfo here so we cannot use argiterator + LirReader r(i); + for (LInsp a = r.previous(); a->isArg(); a=r.previous()) + if (a->oprnd1() != args[--argc]) + return false; + return true; + } + + LInsp LInsHashSet::findcall(int32_t fid, uint32_t argc, LInsp args[], uint32_t &i) + { + uint32_t cap = m_list.size(); + const InsList& list = m_list; + const uint32_t bitmask = (cap - 1) & ~0x1; + uint32_t hash = hashcall(fid, argc, args) & bitmask; + uint32_t n = 7 << 1; + LInsp k; + while ((k = list.get(hash)) != NULL && + (!k->isCall() || k->imm8() != fid || !argsmatch(k, argc, args))) + { + hash = (hash + (n += 2)) & bitmask; // quadratic probe + } + i = hash; + return k; + } + + SideExit *LIns::exit() + { + NanoAssert(isGuard()); + return (SideExit*)oprnd2()->payload(); + } + +#ifdef NJ_VERBOSE + class RetiredEntry: public GCObject + { + public: + List live; + LInsp i; + RetiredEntry(GC *gc): live(gc) {} + }; + class LiveTable + { + public: + SortedMap live; + List retired; + int maxlive; + LiveTable(GC *gc) : live(gc), retired(gc), maxlive(0) {} + void add(LInsp i, LInsp use) { + if (!i->isconst() && !i->isconstq() && !live.containsKey(i)) { + live.put(i,use); + } + } + void retire(LInsp i, GC *gc) { + RetiredEntry *e = new (gc) RetiredEntry(gc); + e->i = i; + for (int j=0, n=live.size(); j < n; j++) { + LInsp l = live.keyAt(j); + if (!l->isStore() && !l->isGuard() && !l->isArg() && !l->isop(LIR_loop)) + e->live.add(l); + } + int size=0; + if ((size = e->live.size()) > maxlive) + maxlive = size; + + live.remove(i); + retired.add(e); + } + bool contains(LInsp i) { + return live.containsKey(i); + } + }; + + void live(GC *gc, Assembler *assm, Fragment *frag) + { + // traverse backwards to find live exprs and a few other stats. + + LInsp sp = frag->sp; + LInsp rp = frag->rp; + LiveTable live(gc); + uint32_t exits = 0; + LirBuffer *lirbuf = frag->lirbuf; + LirReader br(lirbuf); + StoreFilter r(&br, gc, 0, frag->param0, sp, rp); + bool skipargs = false; + int total = 0; + live.add(frag->param0, r.pos()); + for (LInsp i = r.read(); i != 0; i = r.read()) + { + total++; + + if (i->isArg()) { + if (!skipargs) + live.add(i->oprnd1(),0); + } else { + skipargs = false; + } + + // first handle side-effect instructions + if (i->isStore() || i->isGuard() || i->isop(LIR_loop) || + i->isCall() && !assm->callInfoFor(i->imm8())->_cse) + { + live.add(i,0); + if (i->isGuard()) + exits++; + } + + // now propagate liveness + if (live.contains(i)) + { + live.retire(i,gc); + if (i->isStore()) { + live.add(i->oprnd2(),i); // base + live.add(i->oprnd1(),i); // val + } + else if (i->isop(LIR_cmov)) { + live.add(i->oprnd1(),i); + live.add(i->oprnd2()->oprnd1(),i); + live.add(i->oprnd2()->oprnd2(),i); + } + else if (operandCount(i->opcode()) == 1) { + live.add(i->oprnd1(),i); + } + else if (operandCount(i->opcode()) == 2) { + live.add(i->oprnd1(),i); + live.add(i->oprnd2(),i); + } + } + else + { + skipargs = i->isCall(); + } + } + + assm->outputf("live instruction count %ld, total %ld, max pressure %d", + live.retired.size(), total, live.maxlive); + assm->outputf("side exits %ld", exits); + + // print live exprs, going forwards + LirNameMap *names = frag->lirbuf->names; + for (int j=live.retired.size()-1; j >= 0; j--) + { + RetiredEntry *e = live.retired[j]; + char livebuf[1000], *s=livebuf; + *s = 0; + for (int k=0,n=e->live.size(); k < n; k++) { + strcpy(s, names->formatRef(e->live[k])); + s += strlen(s); + *s++ = ' '; *s = 0; + NanoAssert(s < livebuf+sizeof(livebuf)); + } + printf("%-60s %s\n", livebuf, names->formatIns(e->i)); + if (e->i->isGuard()) + printf("\n"); + } + } + + void LirNameMap::addName(LInsp i, Stringp name) { + Entry *e = new (labels->core->gc) Entry(name); + names.put(i, e); + } + void LirNameMap::addName(LInsp i, const char *name) { + addName(i, labels->core->newString(name)); + } + + void LirNameMap::copyName(LInsp i, const char *s, int suffix) { + char s2[200]; + sprintf(s2,"%s%d", s,suffix); + addName(i, labels->core->newString(s2)); + } + + void LirNameMap::formatImm(int32_t c, char *buf) { + if (c >= 10000 || c <= -10000) + sprintf(buf,"#%s",labels->format((void*)c)); + else + sprintf(buf,"%d", c); + } + + const char* LirNameMap::formatRef(LIns *ref) + { + char buffer[200], *buf=buffer; + buf[0]=0; + GC *gc = labels->core->gc; + if (names.containsKey(ref)) { + StringNullTerminatedUTF8 cname(gc, names.get(ref)->name); + strcat(buf, cname.c_str()); + } + else if (ref->isconstq()) { + formatImm(uint32_t(ref->constvalq()>>32), buf); + buf += strlen(buf); + *buf++ = ':'; + formatImm(uint32_t(ref->constvalq()), buf); + } + else if (ref->isconst()) { + formatImm(ref->constval(), buf); + } + else { + if (ref->isCall()) { + copyName(ref, _functions[ref->imm8()]._name, funccounts.add(ref->imm8())); + } else { + copyName(ref, nameof(ref), lircounts.add(ref->opcode())); + } + StringNullTerminatedUTF8 cname(gc, names.get(ref)->name); + strcat(buf, cname.c_str()); + } + return labels->dup(buffer); + } + + const char* LirNameMap::formatIns(LIns* i) + { + char sbuf[200]; + char *s = sbuf; + if (!i->isStore() && !i->isGuard() && !i->isop(LIR_trace) && !i->isop(LIR_loop)) { + sprintf(s, "%s = ", formatRef(i)); + s += strlen(s); + } + + switch(i->opcode()) + { + case LIR_short: + case LIR_int: + { + sprintf(s, "%s", formatRef(i)); + break; + } + + case LIR_quad: + { + int32_t *p = (int32_t*) (i-2); + sprintf(s, "#%X:%X", p[1], p[0]); + break; + } + + case LIR_loop: + case LIR_trace: + sprintf(s, "%s", nameof(i)); + break; + + case LIR_fcall: + case LIR_call: { + sprintf(s, "%s ( ", _functions[i->imm8()]._name); + LirReader r(i); + for (LInsp a = r.previous(); a->isArg(); a = r.previous()) { + s += strlen(s); + sprintf(s, "%s ",formatRef(a->oprnd1())); + } + s += strlen(s); + sprintf(s, ")"); + break; + } + + case LIR_param: + sprintf(s, "%s %s", nameof(i), gpn(i->imm8())); + break; + + case LIR_x: { + SideExit *x = (SideExit*) i->oprnd2()->payload(); + uint32_t ip = uint32_t(x->from->frid) + x->ip_adj; + sprintf(s, "%s -> %s sp%+d rp%+d f%+d", nameof(i), + labels->format((void*)ip), + x->sp_adj, x->rp_adj, x->f_adj); + break; + } + + case LIR_callh: + case LIR_neg: + case LIR_fneg: + case LIR_arg: + case LIR_farg: + case LIR_i2f: + case LIR_u2f: + case LIR_qlo: + case LIR_qhi: + case LIR_ref: + sprintf(s, "%s %s", nameof(i), formatRef(i->oprnd1())); + break; + + case LIR_xt: + case LIR_xf: { + SideExit *x = (SideExit*) i->oprnd2()->payload(); + uint32_t ip = int32_t(x->from->frid) + x->ip_adj; + sprintf(s, "%s %s -> %s sp%+d rp%+d f%+d", nameof(i), + formatRef(i->oprnd1()), + labels->format((void*)ip), + x->sp_adj, x->rp_adj, x->f_adj); + break; + } + case LIR_add: + case LIR_sub: + case LIR_mul: + case LIR_fadd: + case LIR_fsub: + case LIR_fmul: + case LIR_fdiv: + case LIR_and: + case LIR_or: + case LIR_not: + case LIR_xor: + case LIR_lsh: + case LIR_rsh: + case LIR_ush: + case LIR_eq: + case LIR_lt: + case LIR_le: + case LIR_gt: + case LIR_ge: + case LIR_ult: + case LIR_ule: + case LIR_ugt: + case LIR_uge: + case LIR_qjoin: + sprintf(s, "%s %s, %s", nameof(i), + formatRef(i->oprnd1()), + formatRef(i->oprnd2())); + break; + + case LIR_cmov: + sprintf(s, "%s ? %s : %s", + formatRef(i->oprnd1()), + formatRef(i->oprnd2()->oprnd1()), + formatRef(i->oprnd2()->oprnd2())); + break; + + case LIR_ld: + case LIR_ldc: + case LIR_ldq: + case LIR_ldcb: + sprintf(s, "%s %s[%s]", nameof(i), + formatRef(i->oprnd1()), + formatRef(i->oprnd2())); + break; + + case LIR_st: + case LIR_sti: + sprintf(s, "%s[%d] = %s", + formatRef(i->oprnd2()), + i->immdisp(), + formatRef(i->oprnd1())); + break; + + default: + sprintf(s, "?"); + break; + } + return labels->dup(sbuf); + } + + +#endif + + CseFilter::CseFilter(LirWriter *out, GC *gc) + : LirWriter(out), exprs(gc) {} + + LIns* CseFilter::insImm(int32_t imm) + { + uint32_t k; + LInsp found = exprs.find32(imm, k); + if (found) + return found; + return exprs.add(out->insImm(imm), k); + } + + LIns* CseFilter::insImmq(uint64_t q) + { + uint32_t k; + LInsp found = exprs.find64(q, k); + if (found) + return found; + return exprs.add(out->insImmq(q), k); + } + + LIns* CseFilter::ins1(LOpcode v, LInsp a) + { + if (isCse(v)) { + NanoAssert(operandCount(v)==1); + uint32_t k; + LInsp found = exprs.find1(v, a, k); + if (found) + return found; + return exprs.add(out->ins1(v,a), k); + } + return out->ins1(v,a); + } + + LIns* CseFilter::ins2(LOpcode v, LInsp a, LInsp b) + { + if (isCse(v)) { + NanoAssert(operandCount(v)==2); + uint32_t k; + LInsp found = exprs.find2(v, a, b, k); + if (found) + return found; + return exprs.add(out->ins2(v,a,b), k); + } + return out->ins2(v,a,b); + } + + LIns* CseFilter::insLoad(LOpcode v, LInsp base, LInsp disp) + { + if (isCse(v)) { + NanoAssert(operandCount(v)==2); + uint32_t k; + LInsp found = exprs.find2(v, base, disp, k); + if (found) + return found; + return exprs.add(out->insLoad(v,base,disp), k); + } + return out->insLoad(v,base,disp); + } + + LInsp CseFilter::insGuard(LOpcode v, LInsp c, SideExit *x) + { + if (isCse(v)) { + // conditional guard + NanoAssert(operandCount(v)==1); + uint32_t k; + LInsp found = exprs.find1(v, c, k); + if (found) + return 0; + return exprs.add(out->insGuard(v,c,x), k); + } + return out->insGuard(v, c, x); + } + + LInsp CseFilter::insCall(int32_t fid, LInsp args[]) + { + const CallInfo *c = &_functions[fid]; + if (c->_cse) { + uint32_t k; + LInsp found = exprs.findcall(fid, c->count_args(), args, k); + if (found) + return found; + return exprs.add(out->insCall(fid, args), k); + } + return out->insCall(fid, args); + } + + LIns* FASTCALL callArgN(LIns* i, uint32_t n) + { + // @todo clean up; shouldn't have to create a reader + LirReader rdr(i); + do + i = rdr.read(); + while (n-- > 0); + return i; + } + + void compile(Assembler* assm, Fragment* triggerFrag) + { + AvmCore *core = triggerFrag->lirbuf->_frago->core(); + GC *gc = core->gc; + + verbose_only( StringList asmOutput(gc); ) + verbose_only( assm->_outputCache = &asmOutput; ) + + verbose_only(if (assm->_verbose && core->config.verbose_live) + live(gc, assm, triggerFrag);) + + bool treeCompile = core->config.tree_opt && (triggerFrag->kind == BranchTrace); + RegAllocMap regMap(gc); + NInsList loopJumps(gc); + assm->beginAssembly(®Map); + + //fprintf(stderr, "recompile trigger %X kind %d\n", (int)triggerFrag, triggerFrag->kind); + Fragment* root = triggerFrag; + if (treeCompile) + { + // recompile the entire tree + root = triggerFrag->anchor; + root->removeIntraLinks(); + root->unlink(assm); // unlink all incoming jumps ; since the compile() can fail + root->unlinkBranches(assm); // no one jumps into a branch (except from within the tree) so safe to clear the links table + root->fragEntry = 0; + + // do the tree branches + Fragment* frag = root->treeBranches; + while(frag) + { + // compile til no more frags + if (frag->lastIns) + { + NIns* loopJump = assm->assemble(frag); + verbose_only(if (assm->_verbose) assm->outputf("compiling branch %X that exits from SID %d",frag->frid,frag->spawnedFrom->sid);) + if (loopJump) loopJumps.add((intptr_t)loopJump); + + NanoAssert(frag->kind == BranchTrace); + RegAlloc* regs = new (gc) RegAlloc(); + assm->copyRegisters(regs); + assm->releaseRegisters(); + SideExit* exit = frag->spawnedFrom; + regMap.put(exit, regs); + } + frag = frag->treeBranches; + } + } + + // now the the main trunk + + NIns* loopJump = assm->assemble(root); + verbose_only(if (assm->_verbose) assm->output("compiling trunk");) + if (loopJump) loopJumps.add((intptr_t)loopJump); + assm->endAssembly(root, loopJumps); + + // remove the map enties + while(!regMap.isEmpty()) + gc->Free(regMap.removeLast()); + + // reverse output so that assembly is displayed low-to-high + verbose_only( assm->_outputCache = 0; ) + verbose_only(for(int i=asmOutput.size()-1; i>=0; --i) { assm->outputf("%s",asmOutput.get(i)); } ); + + if (assm->error()) + { + root->fragEntry = 0; + } + else + { + root->link(assm); + if (treeCompile) root->linkBranches(assm); + } + } + + #endif /* FEATURE_NANOJIT */ + +#if defined(NJ_VERBOSE) + LabelMap::LabelMap(AvmCore *core, LabelMap* parent) + : parent(parent), names(core->gc), addrs(core->config.verbose_addrs), end(buf), core(core) + {} + + void LabelMap::add(const void *p, size_t size, size_t align, const char *name) + { + if (!this) return; + add(p, size, align, core->newString(name)); + } + + void LabelMap::add(const void *p, size_t size, size_t align, Stringp name) + { + if (!this) return; + Entry *e = new (core->gc) Entry(name, size<= 0) { + const void *start = names.keyAt(i); + Entry *e = names.at(i); + const void *end = (const char*)start + e->size; + avmplus::StringNullTerminatedUTF8 cname(core->gc, e->name); + const char *name = cname.c_str(); + if (p == start) { + if (addrs) + sprintf(b,"%p %s",p,name); + else + strcpy(b, name); + return dup(b); + } + else if (p > start && p < end) { + int d = (int(p)-int(start)) >> e->align; + if (addrs) + sprintf(b, "%p %s+%d", p, name, d); + else + sprintf(b,"%s+%d", name, d); + return dup(b); + } + } + if (parent) + return parent->format(p); + + sprintf(b, "%p", p); + return dup(b); + } + + const char *LabelMap::dup(const char *b) + { + int need = strlen(b)+1; + char *s = end; + end += need; + if (end > buf+sizeof(buf)) { + s = buf; + end = s+need; + } + strcpy(s, b); + return s; + } +#endif // NJ_VERBOSE +} + diff --git a/js/src/nanojit/Nativei386.cpp b/js/src/nanojit/Nativei386.cpp new file mode 100644 index 00000000000..90b6375cce5 --- /dev/null +++ b/js/src/nanojit/Nativei386.cpp @@ -0,0 +1,580 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is [Open Source Virtual Machine]. + * + * The Initial Developer of the Original Code is + * Adobe System Incorporated. + * Portions created by the Initial Developer are Copyright (C) 2004-2007 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Adobe AS3 Team + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifdef _MAC +// for MakeDataExecutable +#include +#endif + +#include "nanojit.h" + +namespace nanojit +{ + #ifdef FEATURE_NANOJIT + + #ifdef NJ_VERBOSE + const char *regNames[] = { + "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", + "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" + }; + #endif + + const Register Assembler::argRegs[] = { ECX, EDX }; + const Register Assembler::retRegs[] = { EAX, EDX }; + + void Assembler::nInit(AvmCore* core) + { + sse2 = core->use_sse2(); + // CMOVcc is actually available on most PPro+ chips (except for a few + // oddballs like Via C3) but for now tie to SSE2 detection + has_cmov = sse2; + OSDep::getDate(); + } + + NIns* Assembler::genPrologue(RegisterMask needSaving) + { + /** + * Prologue + */ + uint32_t stackNeeded = 4 * _activation.highwatermark; + uint32_t savingCount = 0; + + for(Register i=FirstReg; i <= LastReg; i = nextreg(i)) + if (needSaving&rmask(i)) + savingCount++; + + // so for alignment purposes we've pushed return addr, fp, and savingCount registers + uint32_t stackPushed = 4 * (3+savingCount); + uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK); + uint32_t amt = aligned - stackPushed; + + if (amt) + SUBi(SP, amt); + + verbose_only( verbose_outputf(" %p:",_nIns); ) + verbose_only( verbose_output(" patch entry:"); ) + NIns *patchEntry = _nIns; + MR(FP, SP); + PUSHr(FP); // push ebp twice to align frame on 8bytes + PUSHr(FP); + + for(Register i=FirstReg; i <= LastReg; i = nextreg(i)) + if (needSaving&rmask(i)) + PUSHr(i); + + #ifndef DARWIN + // dynamically align the stack + PUSHr(FP);//fake returnaddr. + ANDi(SP, -NJ_ALIGN_STACK); + MR(FP,SP); + PUSHr(FP); + #endif + + return patchEntry; + } + + GuardRecord * Assembler::nFragExit(SideExit *exit) + { + bool trees = _frago->core()->config.tree_opt; + Fragment *frag = exit->target; + GuardRecord *lr = 0; + bool destKnown = (frag && frag->fragEntry); + if (destKnown && !trees) + { + // already exists, emit jump now. no patching required. + JMP(frag->fragEntry); + lr = 0; + } + else + { + // target doesn't exit yet. emit jump to epilog, and set up to patch later. + lr = placeGuardRecord(exit); + JMP_long(_epilogue); + lr->jmp = _nIns; +#if 0 + // @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link() + // for trees we need the patch entry on the incoming fragment so we can unhook it later if needed + if (tress && destKnown) + patch(lr); +#endif + } + // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue + MR(SP,FP); + + + #ifdef NJ_VERBOSE + if (_frago->core()->config.show_stats) { + // load EDX (arg1) with Fragment *fromFrag, target fragment + // will make use of this when calling fragenter(). + int fromfrag = int((Fragment*)_thisfrag); + LDi(argRegs[1], fromfrag); + } + #endif + + // return value is GuardRecord* + LDi(EAX, int(lr)); + + // if/when we patch this exit to jump over to another fragment, + // that fragment will need its parameters set up just like ours. + LInsp param0 = _thisfrag->param0; + Register state = findSpecificRegFor(param0, Register(param0->imm8())); + + // update InterpState + + if (exit->rp_adj) + ADDmi((int32_t)offsetof(avmplus::InterpState, rp), state, exit->rp_adj); + + if (exit->sp_adj) + ADDmi((int32_t)offsetof(avmplus::InterpState, sp), state, exit->sp_adj); + + if (exit->ip_adj) + ADDmi((int32_t)offsetof(avmplus::InterpState, ip), state, exit->ip_adj); + + if (exit->f_adj) + ADDmi((int32_t)offsetof(avmplus::InterpState, f), state, exit->f_adj); + + return lr; + } + + NIns *Assembler::genEpilogue(RegisterMask restore) + { + RET(); + + #ifndef DARWIN + // undo dynamic alignment + POP(FP); + MR(SP,FP); + #endif + + for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i)) + if (restore&rmask(i)) { POP(i); } + + POP(FP); + POP(FP); + return _nIns; + } + + void Assembler::nArgEmitted(const CallInfo* call, uint32_t stackSlotCount, uint32_t iargs, uint32_t fargs) + { + // see if we have finished emitting all args. If so then make sure the + // new stack pointer is NJ_ALIGN_STACK aligned + const uint32_t istack = call->count_iargs(); + const uint32_t fstack = call->count_args() - istack; + //printf("call %s iargs %d fargs %d istack %d fstack %d\n",call->_name,iargs,fargs,istack,fstack); + AvmAssert(iargs <= istack); + AvmAssert(fargs <= fstack); + if (iargs == istack && fargs == fstack) + { + const int32_t size = 4*stackSlotCount; + const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - size; + if (extra > 0) + SUBi(SP, extra); + } + } + + void Assembler::nPostCallCleanup(const CallInfo* call) + { + // must be signed, not unsigned + int32_t istack = call->count_iargs(); + int32_t fstack = call->count_args() - istack; + + istack -= 2; // first 2 4B args are in registers + if (istack <= 0) + { + istack = 0; + if (fstack == 0) + return; // only using ECX/EDX nothing passed on the stack so no cleanup needed + } + + const int32_t size = 4*istack + 8*fstack; // actual stack space used + NanoAssert( size > 0 ); + + const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - (size); + + // stack re-alignment + // only pop our adjustment amount since callee pops args in FASTCALL mode + if (extra > 0) + { ADDi(SP, extra); } + } + + void Assembler::nMarkExecute(Page* page, int32_t count, bool enable) + { + #ifdef _MAC + MakeDataExecutable(page, count*NJ_PAGE_SIZE); + #else + (void)page; + (void)count; + #endif + (void)enable; + } + + Register Assembler::nRegisterAllocFromSet(int set) + { + Register r; + RegAlloc ®s = _allocator; + #ifdef WIN32 + _asm + { + mov ecx, regs + bsf eax, set // i = first bit set + btr RegAlloc::free[ecx], eax // free &= ~rmask(i) + mov r, eax + } + #else + asm( + "bsf %1, %%eax\n\t" + "btr %%eax, %2\n\t" + "movl %%eax, %0\n\t" + : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" ); + #endif /* WIN32 */ + return r; + } + + void Assembler::nRegisterResetAll(RegAlloc& a) + { + // add scratch registers to our free list for the allocator + a.clear(); + a.used = 0; + a.free = SavedRegs | ScratchRegs; + if (!sse2) + a.free &= ~XmmRegs; + debug_only( a.managed = a.free; ) + } + + void Assembler::nPatchBranch(NIns* branch, NIns* location) + { + uint32_t offset = location - branch; + if (branch[0] == JMPc) + *(uint32_t*)&branch[1] = offset - 5; + else + *(uint32_t*)&branch[2] = offset - 6; + } + + RegisterMask Assembler::hint(LIns* i, RegisterMask allow) + { + uint32_t op = i->opcode(); + int prefer = allow; + if (op == LIR_call) + prefer &= rmask(EAX); + else if (op == LIR_param) + prefer &= rmask(Register(i->imm8())); + else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh) + prefer &= rmask(EDX); + else if (i->isCmp()) + prefer &= AllowableFlagRegs; + else if (i->isconst()) + prefer &= ScratchRegs; + return (_allocator.free & prefer) ? prefer : allow; + } + + void Assembler::asm_qjoin(LIns *ins) + { + int d = findMemFor(ins); + AvmAssert(d); + LIns* lo = ins->oprnd1(); + LIns* hi = ins->oprnd2(); + + Reservation *resv = getresv(ins); + Register rr = resv->reg; + + if (rr != UnknownReg && (rmask(rr) & FpRegs)) + evict(rr); + + if (hi->isconst()) + { + STi(FP, d+4, hi->constval()); + } + else + { + Register r = findRegFor(hi, GpRegs); + ST(FP, d+4, r); + } + + if (lo->isconst()) + { + STi(FP, d, lo->constval()); + } + else + { + // okay if r gets recycled. + Register r = findRegFor(lo, GpRegs); + ST(FP, d, r); + } + + freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem + } + + void Assembler::asm_restore(LInsp i, Reservation *resv, Register r) + { + if (i->isconst()) + { + if (!resv->arIndex) { + reserveFree(i); + } + LDi(r, i->constval()); + } + else + { + int d = findMemFor(i); + if (rmask(r) & FpRegs) + { + if (rmask(r) & XmmRegs) { + LDQ(r, d, FP); + } else { + FLDQ(d, FP); + } + } + else + { + LD(r, d, FP); + } + verbose_only(if (_verbose) { + outputf(" restore %s", _thisfrag->lirbuf->names->formatRef(i)); + }) + } + } + + void Assembler::asm_store32(LIns *value, int dr, LIns *base) + { + if (value->isconst()) + { + Register rb = findRegFor(base, GpRegs); + int c = value->constval(); + STi(rb, dr, c); + } + else + { + // make sure what is in a register + Reservation *rA, *rB; + findRegFor2(GpRegs, value, rA, base, rB); + Register ra = rA->reg; + Register rb = rB->reg; + ST(rb, dr, ra); + } + } + + void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop) + { + (void)i; + int d = disp(resv); + Register rr = resv->reg; + if (d) + { + // save to spill location + if (rmask(rr) & FpRegs) + { + if (rmask(rr) & XmmRegs) { + STQ(d, FP, rr); + } else { + FSTQ((pop?1:0), d, FP); + } + } + else + { + ST(FP, d, rr); + } + verbose_only(if (_verbose) { + outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i)); + }) + } + else if (pop && (rmask(rr) & x87Regs)) + { + // pop the fpu result since it isn't used + FSTP(FST0); + } + } + + void Assembler::asm_load64(LInsp ins) + { + LIns* base = ins->oprnd1(); + int db = ins->oprnd2()->constval(); + Reservation *resv = getresv(ins); + int dr = disp(resv); + Register rr = resv->reg; + + if (rr != UnknownReg && rmask(rr) & XmmRegs) + { + freeRsrcOf(ins, false); + Register rb = findRegFor(base, GpRegs); + LDQ(rr, db, rb); + } + else + { + Register rb = findRegFor(base, GpRegs); + resv->reg = UnknownReg; + + // don't use an fpu reg to simply load & store the value. + if (dr) + asm_mmq(FP, dr, rb, db); + + freeRsrcOf(ins, false); + + if (rr != UnknownReg) + { + NanoAssert(rmask(rr)&FpRegs); + _allocator.retire(rr); + FLDQ(db, rb); + } + } + } + + void Assembler::asm_store64(LInsp value, int dr, LInsp base) + { + if (value->isconstq()) + { + // if a constant 64-bit value just store it now rather than + // generating a pointless store/load/store sequence + Register rb = findRegFor(base, GpRegs); + const int32_t* p = (const int32_t*) (value-2); + STi(rb, dr+4, p[1]); + STi(rb, dr, p[0]); + return; + } + + if (value->isop(LIR_ldq) || value->isop(LIR_qjoin)) + { + // value is 64bit struct or int64_t, or maybe a double. + // it may be live in an FPU reg. Either way, don't + // put it in an FPU reg just to load & store it. + + // a) if we know it's not a double, this is right. + // b) if we guarded that its a double, this store could be on + // the side exit, copying a non-double. + // c) maybe its a double just being stored. oh well. + + if (sse2) { + Register rv = findRegFor(value, XmmRegs); + Register rb = findRegFor(base, GpRegs); + STQ(dr, rb, rv); + return; + } + + int da = findMemFor(value); + Register rb = findRegFor(base, GpRegs); + asm_mmq(rb, dr, FP, da); + return; + } + + Reservation* rA = getresv(value); + int pop = !rA || rA->reg==UnknownReg; + Register rv = findRegFor(value, FpRegs); + Register rb = findRegFor(base, GpRegs); + + if (rmask(rv) & XmmRegs) { + STQ(dr, rb, rv); + } else { + FSTQ(pop, dr, rb); + } + } + + /** + * copy 64 bits: (rd+dd) <- (rs+ds) + */ + void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds) + { + // value is either a 64bit struct or maybe a float + // that isn't live in an FPU reg. Either way, don't + // put it in an FPU reg just to load & store it. + if (sse2) + { + // use SSE to load+store 64bits + Register t = registerAlloc(XmmRegs); + _allocator.addFree(t); + STQ(dd, rd, t); + LDQ(t, ds, rs); + } + else + { + // get a scratch reg + Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs))); + _allocator.addFree(t); + ST(rd, dd+4, t); + LD(t, ds+4, rs); + ST(rd, dd, t); + LD(t, ds, rs); + } + } + + void Assembler::asm_pusharg(LInsp p) + { + // arg goes on stack + Reservation* rA = getresv(p); + if (rA == 0) + { + if (p->isconst()) + { + // small const we push directly + PUSHi(p->constval()); + } + else + { + Register ra = findRegFor(p, GpRegs); + PUSHr(ra); + } + } + else if (rA->reg == UnknownReg) + { + PUSHm(disp(rA), FP); + } + else + { + PUSHr(rA->reg); + } + } + + NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target) + { + NIns* save = _nIns; + NIns* was = (NIns*)( (intptr_t)*(int32_t*)(at+1)+(intptr_t)(at+5) ); + _nIns = at +5; // +5 is size of JMP + intptr_t tt = (intptr_t)target - (intptr_t)_nIns; + IMM32(tt); + *(--_nIns) = JMPc; + _nIns = save; + return was; + } + + void Assembler::nativePageReset() {} + + void Assembler::nativePageSetup() + { + if (!_nIns) _nIns = pageAlloc(); + if (!_nExitIns) _nExitIns = pageAlloc(true); + } + #endif /* FEATURE_NANOJIT */ +} diff --git a/js/src/nanojit/RegAlloc.cpp b/js/src/nanojit/RegAlloc.cpp new file mode 100644 index 00000000000..77aab5a3538 --- /dev/null +++ b/js/src/nanojit/RegAlloc.cpp @@ -0,0 +1,182 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is [Open Source Virtual Machine]. + * + * The Initial Developer of the Original Code is + * Adobe System Incorporated. + * Portions created by the Initial Developer are Copyright (C) 2004-2007 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Adobe AS3 Team + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nanojit.h" + +namespace nanojit +{ + #ifdef FEATURE_NANOJIT + + /** + * Generic register allocation routines. + */ + void RegAlloc::clear() + { + free = 0; + used = 0; + memset(active, 0, NJ_MAX_REGISTERS * sizeof(LIns*)); + } + + bool RegAlloc::isFree(Register r) + { + NanoAssert(r != UnknownReg); + return (free & rmask(r)) != 0; + } + + void RegAlloc::addFree(Register r) + { + NanoAssert(!isFree(r)); + free |= rmask(r); + } + + void RegAlloc::removeFree(Register r) + { + NanoAssert(isFree(r)); + free &= ~rmask(r); + } + + void RegAlloc::addActive(Register r, LIns* v) + { + //addActiveCount++; + NanoAssert(v && r != UnknownReg && active[r] == NULL ); + active[r] = v; + } + + void RegAlloc::removeActive(Register r) + { + //registerReleaseCount++; + NanoAssert(r != UnknownReg); + NanoAssert(active[r] != NULL); + + // remove the given register from the active list + active[r] = NULL; + } + + LIns* RegAlloc::getActive(Register r) + { + NanoAssert(r != UnknownReg); + return active[r]; + } + + void RegAlloc::retire(Register r) + { + NanoAssert(r != UnknownReg); + NanoAssert(active[r] != NULL); + active[r] = NULL; + free |= rmask(r); + } + + // scan table for instruction with longest span + LIns* Assembler::findVictim(RegAlloc ®s, RegisterMask allow, RegisterMask prefer) + { + NanoAssert(allow != 0 && (allow&prefer)==prefer); + LIns *i, *a=0, *p = 0; + int acost=10, pcost=10; + for (Register r=FirstReg; r <= LastReg; r = nextreg(r)) + { + if ((allow & rmask(r)) && (i = regs.getActive(r)) != 0) + { + int cost = getresv(i)->cost; + if (!a || cost < acost || cost == acost && nbr(i) < nbr(a)) { + a = i; + acost = cost; + } + if (prefer & rmask(r)) { + if (!p || cost < pcost || cost == pcost && nbr(i) < nbr(p)) { + p = i; + pcost = cost; + } + } + } + } + return acost < pcost ? a : p; + } + + #ifdef NJ_VERBOSE + /* static */ void RegAlloc::formatRegisters(RegAlloc& regs, char* s, LirNameMap *names) + { + for(int i=0; iisQuad() ? fpn(r) : gpn(r); + sprintf(s, " %s(%s)", rname, names->formatRef(ins)); + } + } + #endif /* NJ_VERBOSE */ + + #ifdef _DEBUG + + uint32_t RegAlloc::countFree() + { + int cnt = 0; + for(Register i=FirstReg; i <= LastReg; i = nextreg(i)) + cnt += isFree(i) ? 1 : 0; + return cnt; + } + + uint32_t RegAlloc::countActive() + { + int cnt = 0; + for(Register i=FirstReg; i <= LastReg; i = nextreg(i)) + cnt += active[i] ? 1 : 0; + return cnt; + } + + void RegAlloc::checkCount() + { + NanoAssert(count == (countActive() + countFree())); + } + + bool RegAlloc::isConsistent(Register r, LIns* i) + { + NanoAssert(r != UnknownReg); + return (isFree(r) && !getActive(r) && !i) || + (!isFree(r) && getActive(r)== i && i ); + } + + #endif /*DEBUG*/ + #endif /* FEATURE_NANOJIT */ +} diff --git a/js/src/nanojit/avmplus.cpp b/js/src/nanojit/avmplus.cpp new file mode 100644 index 00000000000..67e6f1ded93 --- /dev/null +++ b/js/src/nanojit/avmplus.cpp @@ -0,0 +1,41 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version 1.1 (the + * "License"); you may not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, WITHOUT + * WARRANTY OF ANY KIND, either express or implied. See the License for the specific + * language governing rights and limitations under the License. + * + * The Original Code is [Open Source Virtual Machine.] + * + * The Initial Developer of the Original Code is Adobe System Incorporated. Portions created + * by the Initial Developer are Copyright (C)[ 2004-2006 ] Adobe Systems Incorporated. All Rights + * Reserved. + * + * Contributor(s): Adobe AS3 Team + * Andreas Gal + * + * Alternatively, the contents of this file may be used under the terms of either the GNU + * General Public License Version 2 or later (the "GPL"), or the GNU Lesser General Public + * License Version 2.1 or later (the "LGPL"), in which case the provisions of the GPL or the + * LGPL are applicable instead of those above. If you wish to allow use of your version of this + * file only under the terms of either the GPL or the LGPL, and not to allow others to use your + * version of this file under the terms of the MPL, indicate your decision by deleting provisions + * above and replace them with the notice and other provisions required by the GPL or the + * LGPL. If you do not delete the provisions above, a recipient may use your version of this file + * under the terms of any one of the MPL, the GPL or the LGPL. + * + ***** END LICENSE BLOCK ***** */ + +#include "avmplus.h" + +using namespace avmplus; + +AvmConfiguration AvmCore::config; +static GC _gc; +GC* AvmCore::gc = &_gc; +GCHeap GC::heap; + diff --git a/js/src/nanojit/avmplus.h b/js/src/nanojit/avmplus.h index 69ed644d2cf..a65637f637a 100644 --- a/js/src/nanojit/avmplus.h +++ b/js/src/nanojit/avmplus.h @@ -35,6 +35,7 @@ #include #include +#include #include "jstypes.h" #ifdef _MSC_VER @@ -45,21 +46,20 @@ #define FASTCALL +//#ifdef DEBUG +//#define _DEBUG +//#define NJ_VERBOSE +//#endif + #define AvmAssert(x) assert(x) +#define AvmAssertMsg(x, y) +#define AvmDebugLog(x) printf x typedef JSUint8 uint8_t; typedef JSUint16 uint16_t; typedef JSUint32 uint32_t; typedef JSUint64 uint64_t; -class GC -{ -}; - -class GCHeap -{ -}; - class GCObject { }; @@ -68,20 +68,108 @@ class GCFinalizedObject { }; +class GCHeap +{ +public: + uint32_t kNativePageSize; + + GCHeap() + { + kNativePageSize = 4096; // @todo: what is this? + } + + inline void* + Alloc(uint32_t pages) + { + void* p = malloc((pages + 1) * kNativePageSize); + p = (void*)(((int)(((char*)p) + kNativePageSize)) & (~0xfff)); + return p; + } + + inline void + Free(void* p) + { + // @todo: don't know how to free + } + +}; + +class GC +{ + static GCHeap heap; + +public: + static inline void + Free(void* p) + { + } + + static inline GCHeap* + GetGCHeap() + { + return &heap; + } +}; + +inline void* +operator new(size_t size, GC* gc) +{ + return (void*)new char[size]; +} + #define DWB(x) x +#define MMGC_MEM_TYPE(x) + +typedef int FunctionID; + namespace avmplus { class InterpState { + public: + void* f; + const uint16_t* ip; + void* rp; + void* sp; }; + class AvmConfiguration + { + public: + AvmConfiguration() { + memset(this, 0, sizeof(AvmConfiguration)); + } + + uint32_t tree_opt:1; + }; + class AvmCore { + public: + static AvmConfiguration config; + static GC* gc; + + static inline bool + use_sse2() + { + return true; + } + + static inline GC* + GetGC() + { + return gc; + } }; class OSDep { + public: + static inline void + getDate() + { + } }; /** @@ -127,6 +215,17 @@ namespace avmplus delete data; } + // 'this' steals the guts of 'that' and 'that' gets reset. + void FASTCALL become(List& that) + { + this->destroy(); + + this->data = that.data; + this->len = that.len; + + that.data = 0; + that.len = 0; + } uint32_t FASTCALL add(T value) { if (len >= capacity) { diff --git a/js/src/nanojit/vm_fops.h b/js/src/nanojit/vm_fops.h index 7250a043e70..b5f65279703 100644 --- a/js/src/nanojit/vm_fops.h +++ b/js/src/nanojit/vm_fops.h @@ -35,3 +35,5 @@ * * ***** END LICENSE BLOCK ***** */ +INTERP_FOPCODE_LIST_BEGIN +INTERP_FOPCODE_LIST_END