/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is [Open Source Virtual Machine]. * * The Initial Developer of the Original Code is * Adobe System Incorporated. * Portions created by the Initial Developer are Copyright (C) 2004-2007 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Adobe AS3 Team * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nanojit.h" #ifdef AVMPLUS_PORTING_API #include "portapi_nanojit.h" #endif #ifdef UNDER_CE #include #endif #if defined(AVMPLUS_LINUX) #include #endif namespace nanojit { #ifdef FEATURE_NANOJIT #ifdef NJ_VERBOSE const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","IP","SP","LR","PC"}; #endif const Register Assembler::argRegs[] = { R0, R1, R2, R3 }; const Register Assembler::retRegs[] = { R0, R1 }; const Register Assembler::savedRegs[] = { R4, R5, R6, R7 }; void Assembler::nInit(AvmCore*) { // Thumb mode does not have conditional move, alas has_cmov = false; } NIns* Assembler::genPrologue(RegisterMask needSaving) { /** * Prologue */ // NJ_RESV_OFFSET is space at the top of the stack for us // to use for parameter passing (8 bytes at the moment) uint32_t stackNeeded = 4 * _activation.highwatermark + NJ_STACK_OFFSET; uint32_t savingCount = 0; uint32_t savingMask = 0; savingCount = 5; // R4-R7, LR savingMask = 0xF0; (void)needSaving; // so for alignment purposes we've pushed return addr, fp, and savingCount registers uint32_t stackPushed = 4 * (2+savingCount); uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK); int32_t amt = aligned - stackPushed; // Make room on stack for what we are doing if (amt) { // largest value is 508 (7-bits << 2) if (amt>508) { int size = 508; while (size>0) { SUBi(SP, size); amt -= size; size = amt; if (size>508) size=508; } } else SUBi(SP, amt); } verbose_only( verbose_outputf(" %p:",_nIns); ) verbose_only( verbose_output(" patch entry"); ) NIns *patchEntry = _nIns; MR(FRAME_PTR, SP); PUSH_mask(savingMask|rmask(LR)); return patchEntry; } void Assembler::nFragExit(LInsp guard) { SideExit* exit = guard->exit(); Fragment *frag = exit->target; GuardRecord *lr; if (frag && frag->fragEntry) { JMP(frag->fragEntry); lr = 0; } else { // target doesn't exit yet. emit jump to epilog, and set up to patch later. lr = placeGuardRecord(guard); BL(_epilogue); lr->jmp = _nIns; } // pop the stack frame first MR(SP, FRAME_PTR); #ifdef NJ_VERBOSE if (_frago->core()->config.show_stats) { // load R1 with Fragment *fromFrag, target fragment // will make use of this when calling fragenter(). int fromfrag = int((Fragment*)_thisfrag); LDi(argRegs[1], fromfrag); } #endif // return value is GuardRecord* LDi(R2, int(lr)); } NIns* Assembler::genEpilogue(RegisterMask restore) { (void)restore; if (false) { // interworking BX(R3); // return POPr(R3); // POP LR into R3 POP_mask(0xF0); // {R4-R7} } else { // return to Thumb caller POP_mask(0xF0|rmask(PC)); } MR(R0,R2); // return LinkRecord* return _nIns; } void Assembler::asm_call(LInsp ins) { const CallInfo* call = ins->callInfo(); uint32_t atypes = call->_argtypes; uint32_t roffset = 0; // we need to detect if we have arg0 as LO followed by arg1 as F; // in that case, we need to skip using r1 -- the F needs to be // loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's // generated code. bool arg0IsInt32FollowedByFloat = false; while ((atypes & 3) != ARGSIZE_NONE) { if (((atypes >> 4) & 3) == ARGSIZE_LO && ((atypes >> 2) & 3) == ARGSIZE_F && ((atypes >> 6) & 3) == ARGSIZE_NONE) { arg0IsInt32FollowedByFloat = true; break; } atypes >>= 2; } CALL(call); ArgSize sizes[10]; uint32_t argc = call->get_sizes(sizes); for(uint32_t i=0; i < argc; i++) { uint32_t j = argc - i - 1; ArgSize sz = sizes[j]; NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q); // pre-assign registers R0-R3 for arguments (if they fit) Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg; asm_arg(sz, ins->arg(j), r); if (i == 0 && arg0IsInt32FollowedByFloat) roffset = 1; } } void Assembler::nMarkExecute(Page* page, int flags) { NanoAssert(sizeof(Page) == NJ_PAGE_SIZE); #ifdef UNDER_CE static const DWORD kProtFlags[4] = { PAGE_READONLY, // 0 PAGE_READWRITE, // PAGE_WRITE PAGE_EXECUTE_READ, // PAGE_EXEC PAGE_EXECUTE_READWRITE // PAGE_EXEC|PAGE_WRITE }; DWORD prot = kProtFlags[flags & (PAGE_WRITE|PAGE_EXEC)]; DWORD dwOld; BOOL res = VirtualProtect(page, NJ_PAGE_SIZE, prot, &dwOld); if (!res) { // todo: we can't abort or assert here, we have to fail gracefully. NanoAssertMsg(false, "FATAL ERROR: VirtualProtect() failed\n"); } #endif #ifdef AVMPLUS_PORTING_API NanoJIT_PortAPI_MarkExecutable(page, (void*)((char*)page+NJ_PAGE_SIZE), flags); // todo, must add error-handling to the portapi #endif } Register Assembler::nRegisterAllocFromSet(int set) { // need to implement faster way int i=0; while (!(set & rmask((Register)i))) i ++; _allocator.free &= ~rmask((Register)i); return (Register) i; } void Assembler::nRegisterResetAll(RegAlloc& a) { // add scratch registers to our free list for the allocator a.clear(); a.used = 0; a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5); debug_only(a.managed = a.free); } void Assembler::nPatchBranch(NIns* branch, NIns* target) { // Patch the jump in a loop // This is ALWAYS going to be a long branch (using the BL instruction) // Which is really 2 instructions, so we need to modify both // XXX -- this is B, not BL, at least on non-Thumb.. // branch+2 because PC is always 2 instructions ahead on ARM/Thumb int32_t offset = int(target) - int(branch+2); //printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset); NanoAssert(-(1<<21) <= offset && offset < (1<<21)); *branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF); *branch = (NIns)(0xF800 | (offset>>1)&0x7FF); } RegisterMask Assembler::hint(LIns* i, RegisterMask allow /* = ~0 */) { uint32_t op = i->opcode(); int prefer = ~0; if (op==LIR_call || op==LIR_fcall) prefer = rmask(R0); else if (op == LIR_callh) prefer = rmask(R1); else if (op == LIR_param) prefer = rmask(imm2register(argRegs[i->imm8()])); if (_allocator.free & allow & prefer) allow &= prefer; return allow; } void Assembler::asm_qjoin(LIns *ins) { int d = findMemFor(ins); AvmAssert(d); LIns* lo = ins->oprnd1(); LIns* hi = ins->oprnd2(); Register r = findRegFor(hi, GpRegs); ST(FP, d+4, r); // okay if r gets recycled. r = findRegFor(lo, GpRegs); ST(FP, d, r); freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem } void Assembler::asm_store32(LIns *value, int dr, LIns *base) { // make sure what is in a register Reservation *rA, *rB; findRegFor2(GpRegs, value, rA, base, rB); Register ra = rA->reg; Register rb = rB->reg; ST(rb, dr, ra); } void Assembler::asm_restore(LInsp i, Reservation *resv, Register r) { (void)resv; int d = findMemFor(i); LD(r, d, FP); verbose_only(if (_verbose) { outputf(" restore %s",_thisfrag->lirbuf->names->formatRef(i)); }) } void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop) { (void)i; (void)pop; if (resv->arIndex) { int d = disp(resv); // save to spill location Register rr = resv->reg; ST(FP, d, rr); verbose_only(if (_verbose){ outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i)); }) } } void Assembler::asm_load64(LInsp ins) { LIns* base = ins->oprnd1(); int db = ins->oprnd2()->constval(); Reservation *resv = getresv(ins); int dr = disp(resv); NanoAssert(resv->reg == UnknownReg && dr != 0); Register rb = findRegFor(base, GpRegs); resv->reg = UnknownReg; asm_mmq(FP, dr, rb, db); freeRsrcOf(ins, false); } void Assembler::asm_store64(LInsp value, int dr, LInsp base) { int da = findMemFor(value); Register rb = findRegFor(base, GpRegs); asm_mmq(rb, dr, FP, da); } void Assembler::asm_quad(LInsp ins) { Reservation *rR = getresv(ins); int d = disp(rR); freeRsrcOf(ins, false); if (d) { const int32_t* p = (const int32_t*) (ins-2); STi(FP,d+4,p[1]); STi(FP,d,p[0]); } } NIns* Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ, bool isfar) { NIns* at = 0; LOpcode condop = cond->opcode(); NanoAssert(cond->isCond()); #ifndef NJ_SOFTFLOAT if (condop >= LIR_feq && condop <= LIR_fge) { return asm_jmpcc(branchOnFalse, cond, targ); } #endif // produce the branch if (branchOnFalse) { if (condop == LIR_eq) JNE(targ); else if (condop == LIR_ov) JNO(targ); else if (condop == LIR_cs) JNC(targ); else if (condop == LIR_lt) JNL(targ); else if (condop == LIR_le) JNLE(targ); else if (condop == LIR_gt) JNG(targ); else if (condop == LIR_ge) JNGE(targ); else if (condop == LIR_ult) JNB(targ); else if (condop == LIR_ule) JNBE(targ); else if (condop == LIR_ugt) JNA(targ); else //if (condop == LIR_uge) JNAE(targ); } else // op == LIR_xt { if (condop == LIR_eq) JE(targ); else if (condop == LIR_ov) JO(targ); else if (condop == LIR_cs) JC(targ); else if (condop == LIR_lt) JL(targ); else if (condop == LIR_le) JLE(targ); else if (condop == LIR_gt) JG(targ); else if (condop == LIR_ge) JGE(targ); else if (condop == LIR_ult) JB(targ); else if (condop == LIR_ule) JBE(targ); else if (condop == LIR_ugt) JA(targ); else //if (condop == LIR_uge) JAE(targ); } at = _nIns; asm_cmp(cond); return at; } void Assembler::asm_cmp(LIns *cond) { LOpcode condop = cond->opcode(); // LIR_ov and LIR_cs recycle the flags set by arithmetic ops if ((condop == LIR_ov) || (condop == LIR_cs)) return; LInsp lhs = cond->oprnd1(); LInsp rhs = cond->oprnd2(); Reservation *rA, *rB; // Not supported yet. NanoAssert(!lhs->isQuad() && !rhs->isQuad()); // ready to issue the compare if (rhs->isconst()) { int c = rhs->constval(); if (c == 0 && cond->isop(LIR_eq)) { Register r = findRegFor(lhs, GpRegs); TEST(r,r); // No 64-bit immediates so fall-back to below } else if (!rhs->isQuad()) { Register r = getBaseReg(lhs, c, GpRegs); CMPi(r, c); } } else { findRegFor2(GpRegs, lhs, rA, rhs, rB); Register ra = rA->reg; Register rb = rB->reg; CMP(ra, rb); } } void Assembler::asm_loop(LInsp ins, NInsList& loopJumps) { (void)ins; JMP_long_placeholder(); // jump to SOT verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf(" jmp SOT"); } ); loopJumps.add(_nIns); #ifdef NJ_VERBOSE // branching from this frag to ourself. if (_frago->core()->config.show_stats) LDi(argRegs[1], int((Fragment*)_thisfrag)); #endif assignSavedParams(); // restore first parameter, the only one we use LInsp state = _thisfrag->lirbuf->state; findSpecificRegFor(state, argRegs[state->imm8()]); } void Assembler::asm_fcond(LInsp ins) { // only want certain regs Register r = prepResultReg(ins, AllowableFlagRegs); asm_setcc(r, ins); #ifdef NJ_ARM_VFP SETE(r); #else // SETcc only sets low 8 bits, so extend MOVZX8(r,r); SETNP(r); #endif asm_fcmp(ins); } void Assembler::asm_cond(LInsp ins) { // only want certain regs LOpcode op = ins->opcode(); Register r = prepResultReg(ins, AllowableFlagRegs); // SETcc only sets low 8 bits, so extend MOVZX8(r,r); if (op == LIR_eq) SETE(r); else if (op == LIR_ov) SETO(r); else if (op == LIR_cs) SETC(r); else if (op == LIR_lt) SETL(r); else if (op == LIR_le) SETLE(r); else if (op == LIR_gt) SETG(r); else if (op == LIR_ge) SETGE(r); else if (op == LIR_ult) SETB(r); else if (op == LIR_ule) SETBE(r); else if (op == LIR_ugt) SETA(r); else // if (op == LIR_uge) SETAE(r); asm_cmp(ins); } void Assembler::asm_arith(LInsp ins) { LOpcode op = ins->opcode(); LInsp lhs = ins->oprnd1(); LInsp rhs = ins->oprnd2(); Register rb = UnknownReg; RegisterMask allow = GpRegs; bool forceReg = (op == LIR_mul || !rhs->isconst()); #ifdef NANOJIT_ARM // Arm can't do an immediate op with immediates // outside of +/-255 (for AND) r outside of // 0..255 for others. if (!forceReg) { if (rhs->isconst() && !isU8(rhs->constval())) forceReg = true; } #endif if (lhs != rhs && forceReg) { if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) { rb = findRegFor(rhs, allow); } allow &= ~rmask(rb); } else if ((op == LIR_add||op == LIR_addp) && lhs->isop(LIR_alloc) && rhs->isconst()) { // add alloc+const, use lea Register rr = prepResultReg(ins, allow); int d = findMemFor(lhs) + rhs->constval(); LEA(rr, d, FP); } Register rr = prepResultReg(ins, allow); Reservation* rA = getresv(lhs); Register ra; // if this is last use of lhs in reg, we can re-use result reg if (rA == 0 || (ra = rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); // else, rA already has a register assigned. if (forceReg) { if (lhs == rhs) rb = ra; if (op == LIR_add || op == LIR_addp) ADD(rr, rb); else if (op == LIR_sub) SUB(rr, rb); else if (op == LIR_mul) MUL(rr, rb); else if (op == LIR_and) AND(rr, rb); else if (op == LIR_or) OR(rr, rb); else if (op == LIR_xor) XOR(rr, rb); else if (op == LIR_lsh) SHL(rr, rb); else if (op == LIR_rsh) SAR(rr, rb); else if (op == LIR_ush) SHR(rr, rb); else NanoAssertMsg(0, "Unsupported"); } else { int c = rhs->constval(); if (op == LIR_add || op == LIR_addp) { { ADDi(rr, c); } } else if (op == LIR_sub) { { SUBi(rr, c); } } else if (op == LIR_and) ANDi(rr, c); else if (op == LIR_or) ORi(rr, c); else if (op == LIR_xor) XORi(rr, c); else if (op == LIR_lsh) SHLi(rr, c); else if (op == LIR_rsh) SARi(rr, c); else if (op == LIR_ush) SHRi(rr, c); else NanoAssertMsg(0, "Unsupported"); } if ( rr != ra ) MR(rr,ra); } void Assembler::asm_neg_not(LInsp ins) { LOpcode op = ins->opcode(); Register rr = prepResultReg(ins, GpRegs); LIns* lhs = ins->oprnd1(); Reservation *rA = getresv(lhs); // if this is last use of lhs in reg, we can re-use result reg Register ra; if (rA == 0 || (ra=rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); // else, rA already has a register assigned. if (op == LIR_not) NOT(rr); else NEG(rr); if ( rr != ra ) MR(rr,ra); } void Assembler::asm_ld(LInsp ins) { LOpcode op = ins->opcode(); LIns* base = ins->oprnd1(); LIns* disp = ins->oprnd2(); Register rr = prepResultReg(ins, GpRegs); int d = disp->constval(); Register ra = getBaseReg(base, d, GpRegs); if (op == LIR_ldcb) LD8Z(rr, d, ra); else LD(rr, d, ra); } void Assembler::asm_cmov(LInsp ins) { LOpcode op = ins->opcode(); LIns* condval = ins->oprnd1(); NanoAssert(condval->isCmp()); LIns* values = ins->oprnd2(); NanoAssert(values->opcode() == LIR_2); LIns* iftrue = values->oprnd1(); LIns* iffalse = values->oprnd2(); NanoAssert(op == LIR_qcmov || (!iftrue->isQuad() && !iffalse->isQuad())); const Register rr = prepResultReg(ins, GpRegs); // this code assumes that neither LD nor MR nor MRcc set any of the condition flags. // (This is true on Intel, is it true on all architectures?) const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr)); if (op == LIR_cmov) { switch (condval->opcode()) { // note that these are all opposites... case LIR_eq: MRNE(rr, iffalsereg); break; case LIR_ov: MRNO(rr, iffalsereg); break; case LIR_cs: MRNC(rr, iffalsereg); break; case LIR_lt: MRGE(rr, iffalsereg); break; case LIR_le: MRG(rr, iffalsereg); break; case LIR_gt: MRLE(rr, iffalsereg); break; case LIR_ge: MRL(rr, iffalsereg); break; case LIR_ult: MRAE(rr, iffalsereg); break; case LIR_ule: MRA(rr, iffalsereg); break; case LIR_ugt: MRBE(rr, iffalsereg); break; case LIR_uge: MRB(rr, iffalsereg); break; debug_only( default: NanoAssert(0); break; ) } } else if (op == LIR_qcmov) { NanoAssert(0); } /*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr); asm_cmp(condval); } void Assembler::asm_qhi(LInsp ins) { Register rr = prepResultReg(ins, GpRegs); LIns *q = ins->oprnd1(); int d = findMemFor(q); LD(rr, d+4, FP); } void Assembler::asm_param(LInsp ins) { uint32_t a = ins->imm8(); uint32_t kind = ins->imm8b(); if (kind == 0) { // ordinary param AbiKind abi = _thisfrag->lirbuf->abi; uint32_t abi_regcount = abi == ABI_FASTCALL ? 2 : abi == ABI_THISCALL ? 1 : 0; if (a < abi_regcount) { // incoming arg in register prepResultReg(ins, rmask(argRegs[a])); } else { // incoming arg is on stack, and EBP points nearby (see genPrologue) Register r = prepResultReg(ins, GpRegs); int d = (a - abi_regcount) * sizeof(intptr_t) + 8; LD(r, d, FP); } } else { // saved param prepResultReg(ins, rmask(savedRegs[a])); } } void Assembler::asm_short(LInsp ins) { Register rr = prepResultReg(ins, GpRegs); int32_t val = ins->imm16(); if (val == 0) XOR(rr,rr); else LDi(rr, val); } void Assembler::asm_int(LInsp ins) { Register rr = prepResultReg(ins, GpRegs); int32_t val = ins->imm32(); if (val == 0) XOR(rr,rr); else LDi(rr, val); } void Assembler::asm_quad(LInsp ins) { Reservation *rR = getresv(ins); Register rr = rR->reg; if (rr != UnknownReg) { // @todo -- add special-cases for 0 and 1 _allocator.retire(rr); rR->reg = UnknownReg; NanoAssert((rmask(rr) & FpRegs) != 0); const double d = ins->constvalf(); const uint64_t q = ins->constvalq(); if (rmask(rr) & XmmRegs) { if (q == 0.0) { // test (int64)0 since -0.0 == 0.0 SSE_XORPDr(rr, rr); } else if (d == 1.0) { // 1.0 is extremely frequent and worth special-casing! static const double k_ONE = 1.0; LDSDm(rr, &k_ONE); } else { findMemFor(ins); const int d = disp(rR); SSE_LDQ(rr, d, FP); } } else { if (q == 0.0) { // test (int64)0 since -0.0 == 0.0 FLDZ(); } else if (d == 1.0) { FLD1(); } else { findMemFor(ins); int d = disp(rR); FLDQ(d,FP); } } } // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here int d = disp(rR); freeRsrcOf(ins, false); if (d) { const int32_t* p = (const int32_t*) (ins-2); STi(FP,d+4,p[1]); STi(FP,d,p[0]); } } void Assembler::asm_qlo(LInsp ins) { LIns *q = ins->oprnd1(); Reservation *resv = getresv(ins); Register rr = resv->reg; if (rr == UnknownReg) { // store quad in spill loc int d = disp(resv); freeRsrcOf(ins, false); Register qr = findRegFor(q, XmmRegs); SSE_MOVDm(d, FP, qr); } else { freeRsrcOf(ins, false); Register qr = findRegFor(q, XmmRegs); SSE_MOVD(rr,qr); } } void Assembler::asm_nongp_copy(Register r, Register s) { // we will need this for VFP support (void)r; (void)s; NanoAssert(false); } Register Assembler::asm_binop_rhs_reg(LInsp ins) { return UnknownReg; } /** * copy 64 bits: (rd+dd) <- (rs+ds) */ void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds) { // value is either a 64bit struct or maybe a float // that isn't live in an FPU reg. Either way, don't // put it in an FPU reg just to load & store it. // get a scratch reg Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs))); _allocator.addFree(t); ST(rd, dd+4, t); LD(t, ds+4, rs); ST(rd, dd, t); LD(t, ds, rs); } void Assembler::asm_pusharg(LInsp p) { // arg goes on stack Reservation* rA = getresv(p); if (rA == 0) { Register ra = findRegFor(p, GpRegs); ST(SP,0,ra); } else if (rA->reg == UnknownReg) { ST(SP,0,Scratch); LD(Scratch,disp(rA),FP); } else { ST(SP,0,rA->reg); } } void Assembler::nativePageReset() { _nPool = 0; _nSlot = 0; _nExitPool = 0; _nExitSlot = 0; } void Assembler::nativePageSetup() { if (!_nIns) _nIns = pageAlloc(); if (!_nExitIns) _nExitIns = pageAlloc(true); //fprintf(stderr, "assemble onto %x exits into %x\n", (int)_nIns, (int)_nExitIns); if (!_nPool) { _nSlot = _nPool = (int*)_nIns; // Make original pool at end of page. Currently // we are pointing off the end of the original page, // so back up 1+NJ_CPOOL_SIZE _nPool = (int*)((int)_nIns - (sizeof(int32_t)*NJ_CPOOL_SIZE)); // _nSlot points at last slot in pool (fill upwards) _nSlot = _nPool + (NJ_CPOOL_SIZE-1); // Move _nIns to the top of the pool _nIns = (NIns*)_nPool; // no branch needed since this follows the epilogue } } void Assembler::flushCache(NIns* n1, NIns* n2) { #if defined(UNDER_CE) // we changed the code, so we need to do this (sadly) FlushInstructionCache(GetCurrentProcess(), NULL, NULL); #elif defined(AVMPLUS_LINUX) // Just need to clear this one page (not even the whole page really) //Page *page = (Page*)pageTop(_nIns); register unsigned long _beg __asm("a1") = (unsigned long)(n1); register unsigned long _end __asm("a2") = (unsigned long)(n2); register unsigned long _flg __asm("a3") = 0; register unsigned long _swi __asm("r7") = 0xF0002; __asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi)); #endif } NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target) { NIns* save = _nIns; NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2)); _nIns = at + 2; BL(target); flushCache(_nIns, _nIns+2); #ifdef AVMPLUS_PORTING_API // XXX save.._nIns+2? really? NanoJIT_PortAPI_FlushInstructionCache(save, _nIns+2); #endif _nIns = save; return was; } void Assembler::STi(Register b, int32_t d, int32_t v) { ST(b, d, Scratch); LDi(Scratch, v); } bool isB11(NIns *target, NIns *cur) { NIns *br_base = (cur-1)+2; int br_off = int(target) - int(br_base); return (-(1<<11) <= br_off && br_off < (1<<11)); } void Assembler::underrunProtect(int bytes) { NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small"); // perhaps bytes + sizeof(PageHeader)/sizeof(NIns) + 4 ? intptr_t u = bytes + 4; if (!samepage(_nIns-u, _nIns-1)) { NIns* target = _nIns; _nIns = pageAlloc(_inExit); // might be able to do a B instead of BL (save an instruction) if (isB11(target, _nIns)) { NIns *br_base = (_nIns-1)+2; int br_off = int(target) - int(br_base); *(--_nIns) = (NIns)(0xE000 | ((br_off>>1)&0x7FF)); } else { int offset = int(target)-int(_nIns-2+2); *(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) ); *(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) ); } } } bool isB22(NIns *target, NIns *cur) { int offset = int(target)-int(cur-2+2); return (-(1<<22) <= offset && offset < (1<<22)); } void Assembler::BL(NIns* target) { underrunProtect(4); NanoAssert(isB22(target,_nIns)); int offset = int(target)-int(_nIns-2+2); *(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) ); *(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) ); asm_output("bl %X offset=%d",(int)target, offset); } void Assembler::B(NIns *target) { underrunProtect(2); NanoAssert(isB11(target,_nIns)); NIns *br_base = (_nIns-1)+2; int br_off = int(target) - int(br_base); NanoAssert(-(1<<11) <= br_off && br_off < (1<<11)); *(--_nIns) = (NIns)(0xE000 | ((br_off>>1)&0x7FF)); asm_output("b %X offset=%d", (int)target, br_off); } void Assembler::JMP(NIns *target) { underrunProtect(4); if (isB11(target,_nIns)) B(target); else BL(target); } void Assembler::PUSH_mask(RegisterMask mask) { NanoAssert((mask&(0xff|rmask(LR)))==mask); underrunProtect(2); if (mask & rmask(LR)) { mask &= ~rmask(LR); mask |= rmask(R8); } *(--_nIns) = (NIns)(0xB400 | mask); asm_output("push {%x}", mask); } void Assembler::POPr(Register r) { underrunProtect(2); NanoAssert(((unsigned)r)<8 || r == PC); if (r == PC) r = R8; *(--_nIns) = (NIns)(0xBC00 | (1<<(r))); asm_output("pop {%s}",gpn(r)); } void Assembler::POP_mask(RegisterMask mask) { NanoAssert((mask&(0xff|rmask(PC)))==mask); underrunProtect(2); if (mask & rmask(PC)) { mask &= ~rmask(PC); mask |= rmask(R8); } *(--_nIns) = (NIns)(0xBC00 | mask); asm_output("pop {%x}", mask); } void Assembler::MOVi(Register r, int32_t v) { NanoAssert(isU8(v)); underrunProtect(2); *(--_nIns) = (NIns)(0x2000 | r<<8 | v); asm_output("mov %s,#%d",gpn(r),v); } void Assembler::LDi(Register r, int32_t v) { if (isU8(v)) { MOVi(r,v); } else if (isU8(-v)) { NEG(r); MOVi(r,-v); } else { underrunProtect(2); LD32_nochk(r, v); } } void Assembler::B_cond(int c, NIns *target) { #ifdef NJ_VERBOSE static const char *ccname[] = { "eq","ne","hs","lo","mi","pl","vs","vc","hi","ls","ge","lt","gt","le","al","nv" }; #endif underrunProtect(6); int tt = int(target) - int(_nIns-1+2); if (tt < (1<<8) && tt >= -(1<<8)) { *(--_nIns) = (NIns)(0xD000 | ((c)<<8) | (tt>>1)&0xFF ); asm_output("b%s %X offset=%d", ccname[c], target, tt); } else { NIns *skip = _nIns; BL(target); c ^= 1; *(--_nIns) = (NIns)(0xD000 | c<<8 | 1 ); asm_output("b%s %X", ccname[c], skip); } } void Assembler::STR_sp(int32_t offset, Register reg) { NanoAssert((offset&3)==0);// require natural alignment int32_t off = offset>>2; NanoAssert(isU8(off)); underrunProtect(2); *(--_nIns) = (NIns)(0x9000 | ((reg)<<8) | off ); asm_output("str %s, %d(%s)", gpn(reg), offset, gpn(SP)); } void Assembler::STR_index(Register base, Register off, Register reg) { underrunProtect(2); *(--_nIns) = (NIns)(0x5000 | (off<<6) | (base<<3) | (reg)); asm_output("str %s,(%s+%s)",gpn(reg),gpn(base),gpn(off)); } void Assembler::STR_m(Register base, int32_t offset, Register reg) { NanoAssert(offset >= 0 && offset < 128 && (offset&3)==0); underrunProtect(2); int32_t off = offset>>2; *(--_nIns) = (NIns)(0x6000 | off<<6 | base<<3 | reg); asm_output("str %s,%d(%s)", gpn(reg), offset, gpn(base)); } void Assembler::LDMIA(Register base, RegisterMask regs) { underrunProtect(2); NanoAssert((regs&rmask(base))==0 && isU8(regs)); *(--_nIns) = (NIns)(0xC800 | base<<8 | regs); asm_output("ldmia %s!,{%x}", gpn(base), regs); } void Assembler::STMIA(Register base, RegisterMask regs) { underrunProtect(2); NanoAssert((regs&rmask(base))==0 && isU8(regs)); *(--_nIns) = (NIns)(0xC000 | base<<8 | regs); asm_output("stmia %s!,{%x}", gpn(base), regs); } void Assembler::ST(Register base, int32_t offset, Register reg) { NanoAssert((offset&3)==0);// require natural alignment int off = offset>>2; if (base==SP) { STR_sp(offset, reg); } else if ((offset)<0) { STR_index(base, Scratch, reg); NEG(Scratch); if (offset < -255) { NanoAssert(offset >= -1020); SHLi(Scratch, 2); MOVi(Scratch, -off); } else { MOVi(Scratch, -offset); } } else { underrunProtect(6); if (off<32) { STR_m(base, offset, reg); } else { STR_index(base, Scratch, reg); if (offset > 255) { SHLi(Scratch, 2); MOVi(Scratch, off); } else { MOVi(Scratch, offset); } } } } void Assembler::ADDi8(Register r, int32_t i) { underrunProtect(2); NanoAssert(isU8(i)); *(--_nIns) = (NIns)(0x3000 | r<<8 | i); asm_output("add %s,#%d", gpn(r), i); } void Assembler::ADDi(Register r, int32_t i) { if (i < 0 && i != 0x80000000) { SUBi(r, -i); } else if (r == SP) { NanoAssert((i&3)==0 && i >= 0 && i < (1<<9)); underrunProtect(2); *(--_nIns) = (NIns)(0xB000 | i>>2); asm_output("add %s,#%d", gpn(SP), i); } else if (isU8(i)) { ADDi8(r,i); } else if (i >= 0 && i <= (255+255)) { ADDi8(r,i-255); ADDi8(r,255); } else { ADD(r, Scratch); LDi(Scratch, i); } } void Assembler::SUBi8(Register r, int32_t i) { underrunProtect(2); NanoAssert(isU8(i)); *(--_nIns) = (NIns)(0x3800 | r<<8 | i); asm_output("sub %s,#%d", gpn(r), i); } void Assembler::SUBi(Register r, int32_t i) { if (i < 0 && i != 0x80000000) { ADDi(r, -i); } else if (r == SP) { NanoAssert((i&3)==0 && i >= 0 && i < (1<<9)); underrunProtect(2); *(--_nIns) = (NIns)(0xB080 | i>>2); asm_output("sub %s,#%d", gpn(SP), i); } else if (isU8(i)) { SUBi8(r,i); } else if (i >= 0 && i <= (255+255)) { SUBi8(r,i-255); SUBi8(r,255); } else { SUB(r, Scratch); LDi(Scratch, i); } } void Assembler::CALL(const CallInfo *ci) { intptr_t addr = ci->_address; if (isB22((NIns*)addr, _nIns)) { int offset = int(addr)-int(_nIns-2+2); *(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) ); *(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) ); asm_output("call %08X:%s", addr, ci->_name); } else { underrunProtect(2*(10)); if ( (((int(_nIns))&0xFFFF)%4) != 0) *(--_nIns) = (NIns)0; *(--_nIns) = (NIns)(0xF800 | (((-14)&0xFFF)>>1) ); *(--_nIns) = (NIns)(0xF000 | (((-14)>>12)&0x7FF) ); *(--_nIns) = (NIns)(0x4600 | (1<<7) | (Scratch<<3) | (IP&7)); *(--_nIns) = (NIns)0; *(--_nIns) = (short)((addr) >> 16); *(--_nIns) = (short)((addr) & 0xFFFF); *(--_nIns) = (NIns)(0x4700 | (IP<<3)); *(--_nIns) = (NIns)(0xE000 | (4>>1)); *(--_nIns) = (NIns)(0x4800 | (Scratch<<8) | (1)); asm_output("call %08X:%s", addr, ci->_name); } } void Assembler::LD32_nochk(Register r, int32_t imm) { // Can we reach the current slot/pool? int offset = (int)(_nSlot) - (int)(_nIns); if ((offset>=NJ_MAX_CPOOL_OFFSET || offset<0) || (_nSlot < _nPool)) { // cant reach, or no room // need a new pool // Make sure we have space for a pool and the LDR underrunProtect(sizeof(int32_t)*NJ_CPOOL_SIZE+1); NIns* skip = _nIns; _nPool = (int*)(((int)_nIns - (sizeof(int32_t)*NJ_CPOOL_SIZE)) &~3); _nSlot = _nPool + (NJ_CPOOL_SIZE-1); _nIns = (NIns*)_nPool; // jump over the pool B(skip); //*(--_nIns) = (NIns)( COND_AL | (0x5<<25) | (NJ_CPOOL_SIZE-1) ); } *(_nSlot--) = (int)imm; NIns *data = (NIns*)(_nSlot+1);; int data_off = int(data) - (int(_nIns+1)&~3); *(--_nIns) = (NIns)(0x4800 | r<<8 | data_off>>2); asm_output("ldr %s,%d(PC) [%X]",gpn(r),data_off,(int)data); } #endif /* FEATURE_NANOJIT */ }