/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is [Open Source Virtual Machine]. * * The Initial Developer of the Original Code is * Adobe System Incorporated. * Portions created by the Initial Developer are Copyright (C) 2008 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Adobe AS3 Team * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nanojit.h" #if defined FEATURE_NANOJIT && defined NANOJIT_PPC namespace nanojit { const Register Assembler::retRegs[] = { R3, R4 }; // high=R3, low=R4 const Register Assembler::argRegs[] = { R3, R4, R5, R6, R7, R8, R9, R10 }; const Register Assembler::savedRegs[] = { #if !defined NANOJIT_64BIT R13, #endif R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30 }; const char *regNames[] = { "r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" }; const char *bitNames[] = { "lt", "gt", "eq", "so" }; #define TODO(x) do{ avmplus::AvmLog(#x); NanoAssertMsgf(false, "%s", #x); } while(0) /* * see http://developer.apple.com/documentation/developertools/Conceptual/LowLevelABI/index.html * stack layout (higher address going down) * sp -> out linkage area * out parameter area * local variables * saved registers * sp' -> in linkage area * in parameter area * * linkage area layout: * PPC32 PPC64 * sp+0 sp+0 saved sp * sp+4 sp+8 saved cr * sp+8 sp+16 saved lr * sp+12 sp+24 reserved */ const int linkage_size = 6*sizeof(void*); const int lr_offset = 2*sizeof(void*); // linkage.lr const int cr_offset = 1*sizeof(void*); // linkage.cr NIns* Assembler::genPrologue() { // mflr r0 // stw r0, lr_offset(sp) // stwu sp, -framesize(sp) // activation frame is 4 bytes per entry even on 64bit machines uint32_t stackNeeded = max_param_size + linkage_size + _activation.tos * 4; uint32_t aligned = alignUp(stackNeeded, NJ_ALIGN_STACK); UNLESS_PEDANTIC( if (isS16(aligned)) { STPU(SP, -aligned, SP); // *(sp-aligned) = sp; sp -= aligned } else ) { STPUX(SP, SP, R0); asm_li(R0, -aligned); } NIns *patchEntry = _nIns; MR(FP,SP); // save SP to use as a FP STP(FP, cr_offset, SP); // cheat and save our FP in linkage.cr STP(R0, lr_offset, SP); // save LR in linkage.lr MFLR(R0); return patchEntry; } NIns* Assembler::genEpilogue() { BLR(); MTLR(R0); LP(R0, lr_offset, SP); LP(FP, cr_offset, SP); // restore FP from linkage.cr MR(SP,FP); return _nIns; } void Assembler::asm_qjoin(LIns *ins) { int d = findMemFor(ins); NanoAssert(d && isS16(d)); LIns* lo = ins->oprnd1(); LIns* hi = ins->oprnd2(); Register r = findRegFor(hi, GpRegs); STW(r, d+4, FP); // okay if r gets recycled. r = findRegFor(lo, GpRegs); STW(r, d, FP); freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem } void Assembler::asm_ld(LIns *ins) { LIns* base = ins->oprnd1(); int d = ins->disp(); Register rr = prepResultReg(ins, GpRegs); Register ra = getBaseReg(ins->opcode(), base, d, GpRegs); #if !PEDANTIC if (isS16(d)) { if (ins->isop(LIR_ldcb)) { LBZ(rr, d, ra); } else { LWZ(rr, d, ra); } return; } #endif // general case underrunProtect(12); LWZX(rr, ra, R0); // rr = [ra+R0] asm_li(R0,d); } void Assembler::asm_store32(LIns *value, int32_t dr, LIns *base) { Register rs = findRegFor(value, GpRegs); Register ra = value == base ? rs : getBaseReg(LIR_sti, base, dr, GpRegs & ~rmask(rs)); #if !PEDANTIC if (isS16(dr)) { STW(rs, dr, ra); return; } #endif // general case store, any offset size STWX(rs, ra, R0); asm_li(R0, dr); } void Assembler::asm_load64(LIns *ins) { LIns* base = ins->oprnd1(); #ifdef NANOJIT_64BIT Register rr = ins->getReg(); if (isKnownReg(rr) && (rmask(rr) & FpRegs)) { // FPR already assigned, fine, use it freeRsrcOf(ins, false); } else { // use a GPR register; its okay to copy doubles with GPR's // but *not* okay to copy non-doubles with FPR's rr = prepResultReg(ins, GpRegs); } #else Register rr = prepResultReg(ins, FpRegs); #endif int dr = ins->disp(); Register ra = getBaseReg(ins->opcode(), base, dr, GpRegs); #ifdef NANOJIT_64BIT if (rmask(rr) & GpRegs) { #if !PEDANTIC if (isS16(dr)) { LD(rr, dr, ra); return; } #endif // general case 64bit GPR load LDX(rr, ra, R0); asm_li(R0, dr); return; } #endif // FPR #if !PEDANTIC if (isS16(dr)) { LFD(rr, dr, ra); return; } #endif // general case FPR load LFDX(rr, ra, R0); asm_li(R0, dr); } void Assembler::asm_li(Register r, int32_t imm) { #if !PEDANTIC if (isS16(imm)) { LI(r, imm); return; } if ((imm & 0xffff) == 0) { imm = uint32_t(imm) >> 16; LIS(r, imm); return; } #endif asm_li32(r, imm); } void Assembler::asm_li32(Register r, int32_t imm) { // general case // TODO use ADDI instead of ORI if r != r0, impl might have 3way adder ORI(r, r, imm); LIS(r, imm>>16); // on ppc64, this sign extends } void Assembler::asm_li64(Register r, uint64_t imm) { underrunProtect(5*sizeof(NIns)); // must be contiguous to be patchable ORI(r,r,uint16_t(imm)); // r[0:15] = imm[0:15] ORIS(r,r,uint16_t(imm>>16)); // r[16:31] = imm[16:31] SLDI(r,r,32); // r[32:63] = r[0:31], r[0:31] = 0 asm_li32(r, int32_t(imm>>32)); // r[0:31] = imm[32:63] } void Assembler::asm_store64(LIns *value, int32_t dr, LIns *base) { NanoAssert(value->isQuad()); Register ra = getBaseReg(LIR_stqi, base, dr, GpRegs); #if !PEDANTIC && !defined NANOJIT_64BIT if (value->isop(LIR_quad) && isS16(dr) && isS16(dr+4)) { // quad constant and short offset uint64_t q = value->imm64(); STW(R0, dr, ra); // hi asm_li(R0, int32_t(q>>32)); // hi STW(R0, dr+4, ra); // lo asm_li(R0, int32_t(q)); // lo return; } if (value->isop(LIR_qjoin) && isS16(dr) && isS16(dr+4)) { // short offset and qjoin(lo,hi) - store lo & hi separately RegisterMask allow = GpRegs & ~rmask(ra); LIns *lo = value->oprnd1(); Register rlo = findRegFor(lo, allow); LIns *hi = value->oprnd2(); Register rhi = hi == lo ? rlo : findRegFor(hi, allow & ~rmask(rlo)); STW(rhi, dr, ra); // hi STW(rlo, dr+4, ra); // lo return; } #endif // !PEDANTIC // general case for any value #if !defined NANOJIT_64BIT // on 32bit cpu's, we only use store64 for doubles Register rs = findRegFor(value, FpRegs); #else // if we have to choose a register, use a GPR Register rs = ( value->isUnusedOrHasUnknownReg() ? findRegFor(value, GpRegs & ~rmask(ra)) : value->getReg() ); if (rmask(rs) & GpRegs) { #if !PEDANTIC if (isS16(dr)) { // short offset STD(rs, dr, ra); return; } #endif // general case store 64bit GPR STDX(rs, ra, R0); asm_li(R0, dr); return; } #endif // NANOJIT_64BIT #if !PEDANTIC if (isS16(dr)) { // short offset STFD(rs, dr, ra); return; } #endif // general case for any offset STFDX(rs, ra, R0); asm_li(R0, dr); } void Assembler::asm_cond(LIns *ins) { LOpcode op = ins->opcode(); LIns *a = ins->oprnd1(); LIns *b = ins->oprnd2(); ConditionRegister cr = CR7; Register r = prepResultReg(ins, GpRegs); switch (op) { case LIR_eq: case LIR_feq: case LIR_qeq: EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq MFCR(r); break; case LIR_lt: case LIR_ult: case LIR_flt: case LIR_fle: case LIR_qlt: case LIR_qult: EXTRWI(r, r, 1, 4*cr+COND_lt); // extract CR7.lt MFCR(r); break; case LIR_gt: case LIR_ugt: case LIR_fgt: case LIR_fge: case LIR_qgt: case LIR_qugt: EXTRWI(r, r, 1, 4*cr+COND_gt); // extract CR7.gt MFCR(r); break; case LIR_le: case LIR_ule: case LIR_qle: case LIR_qule: EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq MFCR(r); CROR(CR7, eq, lt, eq); break; case LIR_ge: case LIR_uge: case LIR_qge: case LIR_quge: EXTRWI(r, r, 1, 4*cr+COND_eq); // select CR7.eq MFCR(r); CROR(CR7, eq, gt, eq); break; default: debug_only(outputf("%s",lirNames[ins->opcode()]);) TODO(asm_cond); break; } asm_cmp(op, a, b, cr); } void Assembler::asm_fcond(LIns *ins) { asm_cond(ins); } // cause 32bit sign extension to test bits #define isS14(i) ((int32_t(bd<<18)>>18) == (i)) NIns* Assembler::asm_branch(bool onfalse, LIns *cond, NIns * const targ) { LOpcode condop = cond->opcode(); NanoAssert(cond->isCond()); // powerpc offsets are based on the address of the branch instruction NIns *patch; #if !PEDANTIC ptrdiff_t bd = targ - (_nIns-1); if (targ && isS24(bd)) patch = asm_branch_near(onfalse, cond, targ); else #endif patch = asm_branch_far(onfalse, cond, targ); asm_cmp(condop, cond->oprnd1(), cond->oprnd2(), CR7); return patch; } NIns* Assembler::asm_branch_near(bool onfalse, LIns *cond, NIns * const targ) { NanoAssert(targ != 0); underrunProtect(4); ptrdiff_t bd = targ - (_nIns-1); NIns *patch = 0; if (!isS14(bd)) { underrunProtect(8); bd = targ - (_nIns-1); if (isS24(bd)) { // can't fit conditional branch offset into 14 bits, but // we can fit in 24, so invert the condition and branch // around an unconditional jump verbose_only(verbose_outputf("%p:", _nIns);) NIns *skip = _nIns; B(bd); patch = _nIns; // this is the patchable branch to the given target onfalse = !onfalse; bd = skip - (_nIns-1); NanoAssert(isS14(bd)); verbose_only(verbose_outputf("branch24");) } else { // known far target return asm_branch_far(onfalse, cond, targ); } } ConditionRegister cr = CR7; switch (cond->opcode()) { case LIR_eq: case LIR_feq: case LIR_qeq: if (onfalse) BNE(cr,bd); else BEQ(cr,bd); break; case LIR_lt: case LIR_ult: case LIR_flt: case LIR_fle: case LIR_qlt: case LIR_qult: if (onfalse) BNL(cr,bd); else BLT(cr,bd); break; case LIR_le: case LIR_ule: case LIR_qle: case LIR_qule: if (onfalse) BGT(cr,bd); else BLE(cr,bd); break; case LIR_gt: case LIR_ugt: case LIR_fgt: case LIR_fge: case LIR_qgt: case LIR_qugt: if (onfalse) BNG(cr,bd); else BGT(cr,bd); break; case LIR_ge: case LIR_uge: case LIR_qge: case LIR_quge: if (onfalse) BLT(cr,bd); else BGE(cr,bd); break; default: debug_only(outputf("%s",lirNames[cond->opcode()]);) TODO(unknown_cond); } if (!patch) patch = _nIns; return patch; } // general case branch to any address (using CTR) NIns *Assembler::asm_branch_far(bool onfalse, LIns *cond, NIns * const targ) { LOpcode condop = cond->opcode(); ConditionRegister cr = CR7; underrunProtect(16); switch (condop) { case LIR_eq: case LIR_feq: case LIR_qeq: if (onfalse) BNECTR(cr); else BEQCTR(cr); break; case LIR_lt: case LIR_ult: case LIR_qlt: case LIR_qult: case LIR_flt: case LIR_fle: if (onfalse) BNLCTR(cr); else BLTCTR(cr); break; case LIR_le: case LIR_ule: case LIR_qle: case LIR_qule: if (onfalse) BGTCTR(cr); else BLECTR(cr); break; case LIR_gt: case LIR_ugt: case LIR_qgt: case LIR_qugt: case LIR_fgt: case LIR_fge: if (onfalse) BNGCTR(cr); else BGTCTR(cr); break; case LIR_ge: case LIR_uge: case LIR_qge: case LIR_quge: if (onfalse) BLTCTR(cr); else BGECTR(cr); break; default: debug_only(outputf("%s",lirNames[condop]);) TODO(unknown_cond); } #if !defined NANOJIT_64BIT MTCTR(R0); asm_li32(R0, (int)targ); #else MTCTR(R0); if (!targ || !isU32(uintptr_t(targ))) { asm_li64(R0, uint64_t(targ)); } else { asm_li32(R0, uint32_t(uintptr_t(targ))); } #endif return _nIns; } void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) { RegisterMask allow = condop >= LIR_feq && condop <= LIR_fge ? FpRegs : GpRegs; Register ra = findRegFor(a, allow); #if !PEDANTIC if (b->isconst()) { int32_t d = b->imm32(); if (isS16(d)) { if (condop >= LIR_eq && condop <= LIR_ge) { CMPWI(cr, ra, d); return; } if (condop >= LIR_qeq && condop <= LIR_qge) { CMPDI(cr, ra, d); TODO(cmpdi); return; } } if (isU16(d)) { if ((condop == LIR_eq || condop >= LIR_ult && condop <= LIR_uge)) { CMPLWI(cr, ra, d); return; } if ((condop == LIR_qeq || condop >= LIR_qult && condop <= LIR_quge)) { CMPLDI(cr, ra, d); TODO(cmpldi); return; } } } #endif // general case Register rb = b==a ? ra : findRegFor(b, allow & ~rmask(ra)); if (condop >= LIR_eq && condop <= LIR_ge) { CMPW(cr, ra, rb); } else if (condop >= LIR_ult && condop <= LIR_uge) { CMPLW(cr, ra, rb); } else if (condop >= LIR_qeq && condop <= LIR_qge) { CMPD(cr, ra, rb); } else if (condop >= LIR_qult && condop <= LIR_quge) { CMPLD(cr, ra, rb); } else if (condop >= LIR_feq && condop <= LIR_fge) { // set the lt/gt bit for fle/fge. We don't do this for // int/uint because in those cases we can invert the branch condition. // for float, we can't because of unordered comparisons if (condop == LIR_fle) CROR(cr, lt, lt, eq); // lt = lt|eq else if (condop == LIR_fge) CROR(cr, gt, gt, eq); // gt = gt|eq FCMPU(cr, ra, rb); } else { TODO(asm_cmp); } } void Assembler::asm_ret(LIns *ins) { genEpilogue(); assignSavedRegs(); LIns *value = ins->oprnd1(); Register r = ins->isop(LIR_ret) ? R3 : F1; findSpecificRegFor(value, r); } void Assembler::asm_nongp_copy(Register r, Register s) { // PPC doesn't support any GPR<->FPR moves NanoAssert((rmask(r) & FpRegs) && (rmask(s) & FpRegs)); FMR(r, s); } void Assembler::asm_restore(LIns *i, Reservation *, Register r) { int d; if (i->isop(LIR_alloc)) { d = disp(i); ADDI(r, FP, d); } else if (i->isconst()) { if (!i->getArIndex()) { i->markAsClear(); } asm_li(r, i->imm32()); } else { d = findMemFor(i); if (IsFpReg(r)) { NanoAssert(i->isQuad()); LFD(r, d, FP); } else if (i->isQuad()) { LD(r, d, FP); } else { LWZ(r, d, FP); } } } Register Assembler::asm_prep_fcall(Reservation*, LIns *ins) { return prepResultReg(ins, rmask(F1)); } void Assembler::asm_int(LIns *ins) { Register rr = prepResultReg(ins, GpRegs); asm_li(rr, ins->imm32()); } void Assembler::asm_fneg(LIns *ins) { Register rr = prepResultReg(ins, FpRegs); Register ra = findRegFor(ins->oprnd1(), FpRegs); FNEG(rr,ra); } void Assembler::asm_param(LIns *ins) { uint32_t a = ins->paramArg(); uint32_t kind = ins->paramKind(); if (kind == 0) { // ordinary param // first eight args always in R3..R10 for PPC if (a < 8) { // incoming arg in register prepResultReg(ins, rmask(argRegs[a])); } else { // todo: support stack based args, arg 0 is at [FP+off] where off // is the # of regs to be pushed in genProlog() TODO(asm_param_stk); } } else { // saved param prepResultReg(ins, rmask(savedRegs[a])); } } void Assembler::asm_call(LIns *ins) { const CallInfo* call = ins->callInfo(); ArgSize sizes[MAXARGS]; uint32_t argc = call->get_sizes(sizes); bool indirect; if (!(indirect = call->isIndirect())) { verbose_only(if (_logc->lcbits & LC_Assembly) outputf(" %p:", _nIns); ) br((NIns*)call->_address, 1); } else { // Indirect call: we assign the address arg to R11 since it's not // used for regular arguments, and is otherwise scratch since it's // clobberred by the call. underrunProtect(8); // underrunProtect might clobber CTR BCTRL(); MTCTR(R11); asm_regarg(ARGSIZE_P, ins->arg(--argc), R11); } int param_size = 0; Register r = R3; Register fr = F1; for(uint32_t i = 0; i < argc; i++) { uint32_t j = argc - i - 1; ArgSize sz = sizes[j]; LInsp arg = ins->arg(j); if (sz & ARGSIZE_MASK_INT) { // GP arg if (r <= R10) { asm_regarg(sz, arg, r); r = nextreg(r); param_size += sizeof(void*); } else { // put arg on stack TODO(stack_int32); } } else if (sz == ARGSIZE_F) { // double if (fr <= F13) { asm_regarg(sz, arg, fr); fr = nextreg(fr); #ifdef NANOJIT_64BIT r = nextreg(r); #else r = nextreg(nextreg(r)); // skip 2 gpr's #endif param_size += sizeof(double); } else { // put arg on stack TODO(stack_double); } } else { TODO(ARGSIZE_UNK); } } if (param_size > max_param_size) max_param_size = param_size; } void Assembler::asm_regarg(ArgSize sz, LInsp p, Register r) { NanoAssert(r != UnknownReg); if (sz & ARGSIZE_MASK_INT) { #ifdef NANOJIT_64BIT if (sz == ARGSIZE_I) { // sign extend 32->64 EXTSW(r, r); } else if (sz == ARGSIZE_U) { // zero extend 32->64 CLRLDI(r, r, 32); } #endif // arg goes in specific register if (p->isconst()) { asm_li(r, p->imm32()); } else { if (p->isUsed()) { if (!p->hasKnownReg()) { // load it into the arg reg int d = findMemFor(p); if (p->isop(LIR_alloc)) { NanoAssert(isS16(d)); ADDI(r, FP, d); } else if (p->isQuad()) { LD(r, d, FP); } else { LWZ(r, d, FP); } } else { // it must be in a saved reg MR(r, p->getReg()); } } else { // this is the last use, so fine to assign it // to the scratch reg, it's dead after this point. findSpecificRegFor(p, r); } } } else if (sz == ARGSIZE_F) { if (p->isUsed()) { Register rp = p->getReg(); if (!isKnownReg(rp) || !IsFpReg(rp)) { // load it into the arg reg int d = findMemFor(p); LFD(r, d, FP); } else { // it must be in a saved reg NanoAssert(IsFpReg(r) && IsFpReg(rp)); FMR(r, rp); } } else { // this is the last use, so fine to assign it // to the scratch reg, it's dead after this point. findSpecificRegFor(p, r); } } else { TODO(ARGSIZE_UNK); } } void Assembler::asm_spill(Register rr, int d, bool /* pop */, bool quad) { (void)quad; if (d) { if (IsFpReg(rr)) { NanoAssert(quad); STFD(rr, d, FP); } #ifdef NANOJIT_64BIT else if (quad) { STD(rr, d, FP); } #endif else { NanoAssert(!quad); STW(rr, d, FP); } } } void Assembler::asm_arith(LIns *ins) { LOpcode op = ins->opcode(); LInsp lhs = ins->oprnd1(); LInsp rhs = ins->oprnd2(); RegisterMask allow = GpRegs; Register rr = prepResultReg(ins, allow); Register ra = findRegFor(lhs, GpRegs); if (rhs->isconst()) { int32_t rhsc = rhs->imm32(); if (isS16(rhsc)) { // ppc arith immediate ops sign-exted the imm16 value switch (op) { case LIR_add: case LIR_iaddp: IF_64BIT(case LIR_qiadd:) IF_64BIT(case LIR_qaddp:) ADDI(rr, ra, rhsc); return; case LIR_sub: SUBI(rr, ra, rhsc); return; case LIR_mul: MULLI(rr, ra, rhsc); return; } } if (isU16(rhsc)) { // ppc logical immediate zero-extend the imm16 value switch (op) { IF_64BIT(case LIR_qior:) case LIR_or: ORI(rr, ra, rhsc); return; IF_64BIT(case LIR_qiand:) case LIR_and: ANDI(rr, ra, rhsc); return; IF_64BIT(case LIR_qxor:) case LIR_xor: XORI(rr, ra, rhsc); return; } } // LIR shift ops only use last 5bits of shift const switch (op) { case LIR_lsh: SLWI(rr, ra, rhsc&31); return; case LIR_ush: SRWI(rr, ra, rhsc&31); return; case LIR_rsh: SRAWI(rr, ra, rhsc&31); return; } } // general case, put rhs in register Register rb = rhs==lhs ? ra : findRegFor(rhs, GpRegs&~rmask(ra)); switch (op) { IF_64BIT(case LIR_qiadd:) IF_64BIT(case LIR_qaddp:) case LIR_add: case LIR_iaddp: ADD(rr, ra, rb); break; IF_64BIT(case LIR_qiand:) case LIR_and: AND(rr, ra, rb); break; IF_64BIT(case LIR_qior:) case LIR_or: OR(rr, ra, rb); break; IF_64BIT(case LIR_qxor:) case LIR_xor: XOR(rr, ra, rb); break; case LIR_sub: SUBF(rr, rb, ra); break; case LIR_lsh: SLW(rr, ra, R0); ANDI(R0, rb, 31); break; case LIR_rsh: SRAW(rr, ra, R0); ANDI(R0, rb, 31); break; case LIR_ush: SRW(rr, ra, R0); ANDI(R0, rb, 31); break; case LIR_mul: MULLW(rr, ra, rb); break; #ifdef NANOJIT_64BIT case LIR_qilsh: SLD(rr, ra, R0); ANDI(R0, rb, 63); break; case LIR_qursh: SRD(rr, ra, R0); ANDI(R0, rb, 63); break; case LIR_qirsh: SRAD(rr, ra, R0); ANDI(R0, rb, 63); TODO(qirsh); break; #endif default: debug_only(outputf("%s",lirNames[op]);) TODO(asm_arith); } } void Assembler::asm_fop(LIns *ins) { LOpcode op = ins->opcode(); LInsp lhs = ins->oprnd1(); LInsp rhs = ins->oprnd2(); RegisterMask allow = FpRegs; Register rr = prepResultReg(ins, allow); Register ra, rb; findRegFor2b(allow, lhs, ra, rhs, rb); switch (op) { case LIR_fadd: FADD(rr, ra, rb); break; case LIR_fsub: FSUB(rr, ra, rb); break; case LIR_fmul: FMUL(rr, ra, rb); break; case LIR_fdiv: FDIV(rr, ra, rb); break; default: debug_only(outputf("%s",lirNames[op]);) TODO(asm_fop); } } void Assembler::asm_i2f(LIns *ins) { Register r = prepResultReg(ins, FpRegs); Register v = findRegFor(ins->oprnd1(), GpRegs); const int d = 16; // natural aligned #if defined NANOJIT_64BIT && !PEDANTIC FCFID(r, r); // convert to double LFD(r, d, SP); // load into fpu register STD(v, d, SP); // save int64 EXTSW(v, v); // extend sign destructively, ok since oprnd1 only is 32bit #else FSUB(r, r, F0); LFD(r, d, SP); // scratch area in outgoing linkage area STW(R0, d+4, SP); XORIS(R0, v, 0x8000); LFD(F0, d, SP); STW(R0, d+4, SP); LIS(R0, 0x8000); STW(R0, d, SP); LIS(R0, 0x4330); #endif } void Assembler::asm_u2f(LIns *ins) { Register r = prepResultReg(ins, FpRegs); Register v = findRegFor(ins->oprnd1(), GpRegs); const int d = 16; #if defined NANOJIT_64BIT && !PEDANTIC FCFID(r, r); // convert to double LFD(r, d, SP); // load into fpu register STD(v, d, SP); // save int64 CLRLDI(v, v, 32); // zero-extend destructively #else FSUB(r, r, F0); LFD(F0, d, SP); STW(R0, d+4, SP); LI(R0, 0); LFD(r, d, SP); STW(v, d+4, SP); STW(R0, d, SP); LIS(R0, 0x4330); #endif } void Assembler::asm_promote(LIns *ins) { LOpcode op = ins->opcode(); Register r = prepResultReg(ins, GpRegs); Register v = findRegFor(ins->oprnd1(), GpRegs); switch (op) { default: debug_only(outputf("%s",lirNames[op])); TODO(asm_promote); case LIR_u2q: CLRLDI(r, v, 32); // clears the top 32 bits break; case LIR_i2q: EXTSW(r, v); break; } } void Assembler::asm_quad(LIns *ins) { #ifdef NANOJIT_64BIT Register r = ins->getReg(); if (isKnownReg(r) && (rmask(r) & FpRegs)) { // FPR already assigned, fine, use it freeRsrcOf(ins, false); } else { // use a GPR register; its okay to copy doubles with GPR's // but *not* okay to copy non-doubles with FPR's r = prepResultReg(ins, GpRegs); } #else Register r = prepResultReg(ins, FpRegs); #endif if (rmask(r) & FpRegs) { union { double d; struct { int32_t hi, lo; } w; }; d = ins->imm64f(); LFD(r, 12, SP); STW(R0, 12, SP); asm_li(R0, w.hi); STW(R0, 16, SP); asm_li(R0, w.lo); } else { int64_t q = ins->imm64(); if (isS32(q)) { asm_li(r, int32_t(q)); return; } RLDIMI(r,R0,32,0); // or 32,32? asm_li(R0, int32_t(q>>32)); // hi bits into R0 asm_li(r, int32_t(q)); // lo bits into dest reg } } void Assembler::br(NIns* addr, int link) { // destination unknown, then use maximum branch possible if (!addr) { br_far(addr,link); return; } // powerpc offsets are based on the address of the branch instruction underrunProtect(4); // ensure _nIns is addr of Bx ptrdiff_t offset = addr - (_nIns-1); // we want ptr diff's implicit >>2 here #if !PEDANTIC if (isS24(offset)) { Bx(offset, 0, link); // b addr or bl addr return; } ptrdiff_t absaddr = addr - (NIns*)0; // ptr diff implies >>2 if (isS24(absaddr)) { Bx(absaddr, 1, link); // ba addr or bla addr return; } #endif // !PEDANTIC br_far(addr,link); } void Assembler::br_far(NIns* addr, int link) { // far jump. // can't have a page break in this sequence, because the break // would also clobber ctr and r2. We use R2 here because it's not available // to the register allocator, and we use R0 everywhere else as scratch, so using // R2 here avoids clobbering anything else besides CTR. #ifdef NANOJIT_64BIT if (addr==0 || !isU32(uintptr_t(addr))) { // really far jump to 64bit abs addr underrunProtect(28); // 7 instructions BCTR(link); MTCTR(R2); asm_li64(R2, uintptr_t(addr)); // 5 instructions return; } #endif underrunProtect(16); BCTR(link); MTCTR(R2); asm_li32(R2, uint32_t(uintptr_t(addr))); // 2 instructions } void Assembler::underrunProtect(int bytes) { NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small"); int instr = (bytes + sizeof(NIns) - 1) / sizeof(NIns); NIns *pc = _nIns; NIns *top = codeStart; // this may be in a normal code chunk or an exit code chunk #if PEDANTIC // pedanticTop is based on the last call to underrunProtect; any time we call // underrunProtect and would use more than what's already protected, then insert // a page break jump. Sometimes, this will be to a new page, usually it's just // the next instruction and the only effect is to clobber R2 & CTR NanoAssert(pedanticTop >= top); if (pc - instr < pedanticTop) { // no page break required, but insert a far branch anyway just to be difficult #ifdef NANOJIT_64BIT const int br_size = 7; #else const int br_size = 4; #endif if (pc - instr - br_size < top) { // really do need a page break verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);) codeAlloc(); } // now emit the jump, but make sure we won't need another page break. // we're pedantic, but not *that* pedantic. pedanticTop = _nIns - br_size; br(pc, 0); pedanticTop = _nIns - instr; } #else if (pc - instr < top) { verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);) // This may be in a normal code chunk or an exit code chunk. codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes)); // This jump will call underrunProtect again, but since we're on a new // page, nothing will happen. br(pc, 0); } #endif } void Assembler::asm_cmov(LIns *ins) { NanoAssert(ins->isop(LIR_cmov) || ins->isop(LIR_qcmov)); LIns* cond = ins->oprnd1(); LIns* iftrue = ins->oprnd2(); LIns* iffalse = ins->oprnd3(); NanoAssert(cond->isCmp()); NanoAssert(iftrue->isQuad() == iffalse->isQuad()); // fixme: we could handle fpu registers here, too, since we're just branching Register rr = prepResultReg(ins, GpRegs); findSpecificRegFor(iftrue, rr); Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr)); NIns *after = _nIns; verbose_only(if (_logc->lcbits & LC_Assembly) outputf("%p:",after);) MR(rr, rf); asm_branch(false, cond, after); } RegisterMask Assembler::hint(LIns *i, RegisterMask allow) { LOpcode op = i->opcode(); RegisterMask prefer = ~0LL; if (op == LIR_icall || op == LIR_qcall) prefer = rmask(R3); else if (op == LIR_fcall) prefer = rmask(F1); else if (op == LIR_param) { if (i->paramArg() < 8) { prefer = rmask(argRegs[i->paramArg()]); } } // narrow the allow set to whatever is preferred and also free if (_allocator.free & allow & prefer) allow &= prefer; return allow; } void Assembler::asm_neg_not(LIns *ins) { Register rr = prepResultReg(ins, GpRegs); Register ra = findRegFor(ins->oprnd1(), GpRegs); if (ins->isop(LIR_neg)) { NEG(rr, ra); } else { NOT(rr, ra); } } void Assembler::asm_qlo(LIns *ins) { Register rr = prepResultReg(ins, GpRegs); int d = findMemFor(ins->oprnd1()); LWZ(rr, d+4, FP); } void Assembler::asm_qhi(LIns *ins) { Register rr = prepResultReg(ins, GpRegs); int d = findMemFor(ins->oprnd1()); LWZ(rr, d, FP); TODO(asm_qhi); } void Assembler::nInit(AvmCore*) { } void Assembler::nBeginAssembly() { max_param_size = 0; } void Assembler::nativePageSetup() { NanoAssert(!_inExit); if (!_nIns) { codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes)); IF_PEDANTIC( pedanticTop = _nIns; ) } if (!_nExitIns) { codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes)); } } void Assembler::nativePageReset() {} // Increment the 32-bit profiling counter at pCtr, without // changing any registers. verbose_only( void Assembler::asm_inc_m32(uint32_t* /*pCtr*/) { } ) void Assembler::nPatchBranch(NIns *branch, NIns *target) { // ppc relative offsets are based on the addr of the branch instruction ptrdiff_t bd = target - branch; if (branch[0] == PPC_b) { // unconditional, 24bit offset. Whoever generated the unpatched jump // must have known the final size would fit in 24bits! otherwise the // jump would be (lis,ori,mtctr,bctr) and we'd be patching the lis,ori. NanoAssert(isS24(bd)); branch[0] |= (bd & 0xffffff) << 2; } else if ((branch[0] & PPC_bc) == PPC_bc) { // conditional, 14bit offset. Whoever generated the unpatched jump // must have known the final size would fit in 14bits! otherwise the // jump would be (lis,ori,mtctr,bcctr) and we'd be patching the lis,ori below. NanoAssert(isS14(bd)); NanoAssert(((branch[0] & 0x3fff)<<2) == 0); branch[0] |= (bd & 0x3fff) << 2; TODO(patch_bc); } #ifdef NANOJIT_64BIT // patch 64bit branch else if ((branch[0] & ~(31<<21)) == PPC_addis) { // general branch, using lis,ori,sldi,oris,ori to load the const 64bit addr. Register rd = Register((branch[0] >> 21) & 31); NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16); NanoAssert(branch[3] == PPC_oris | GPR(rd)<<21 | GPR(rd)<<16); NanoAssert(branch[4] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16); uint64_t imm = uintptr_t(target); uint32_t lo = uint32_t(imm); uint32_t hi = uint32_t(imm>>32); branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(hi>>16); branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(hi); branch[3] = PPC_oris | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo>>16); branch[4] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo); } #else // NANOJIT_64BIT // patch 32bit branch else if ((branch[0] & ~(31<<21)) == PPC_addis) { // general branch, using lis,ori to load the const addr. // patch a lis,ori sequence with a 32bit value Register rd = Register((branch[0] >> 21) & 31); NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16); uint32_t imm = uint32_t(target); branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(imm >> 16); // lis rd, imm >> 16 branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(imm); // ori rd, rd, imm & 0xffff } #endif // !NANOJIT_64BIT else { TODO(unknown_patch); } } static int cntzlw(int set) { // On PowerPC, prefer higher registers, to minimize // size of nonvolatile area that must be saved. register Register i; #ifdef __GNUC__ asm ("cntlzw %0,%1" : "=r" (i) : "r" (set)); #else // __GNUC__ # error("unsupported compiler") #endif // __GNUC__ return 31-i; } Register Assembler::nRegisterAllocFromSet(RegisterMask set) { Register i; // note, deliberate truncation of 64->32 bits if (set & 0xffffffff) { i = Register(cntzlw(int(set))); // gp reg } else { i = Register(32+cntzlw(int(set>>32))); // fp reg } _allocator.free &= ~rmask(i); return i; } void Assembler::nRegisterResetAll(RegAlloc ®s) { regs.clear(); regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */; debug_only(regs.managed = regs.free); } #ifdef NANOJIT_64BIT void Assembler::asm_qbinop(LIns *ins) { LOpcode op = ins->opcode(); switch (op) { case LIR_qaddp: case LIR_qior: case LIR_qiand: case LIR_qursh: case LIR_qirsh: case LIR_qilsh: case LIR_qxor: case LIR_qiadd: asm_arith(ins); break; default: debug_only(outputf("%s",lirNames[op])); TODO(asm_qbinop); } } #endif // NANOJIT_64BIT void Assembler::nFragExit(LIns*) { TODO(nFragExit); } void Assembler::asm_jtbl(LIns* ins, NIns** native_table) { // R0 = index*4, R2 = table, CTR = computed address to jump to. // must ensure no page breaks in here because R2 & CTR can get clobbered. Register indexreg = findRegFor(ins->oprnd1(), GpRegs); #ifdef NANOJIT_64BIT underrunProtect(9*4); BCTR(0); // jump to address in CTR MTCTR(R2); // CTR = R2 LDX(R2, R2, R0); // R2 = [table + index*8] SLDI(R0, indexreg, 3); // R0 = index*8 asm_li64(R2, uint64_t(native_table)); // R2 = table (5 instr) #else // 64bit underrunProtect(6*4); BCTR(0); // jump to address in CTR MTCTR(R2); // CTR = R2 LWZX(R2, R2, R0); // R2 = [table + index*4] SLWI(R0, indexreg, 2); // R0 = index*4 asm_li(R2, int32_t(native_table)); // R2 = table (up to 2 instructions) #endif // 64bit } void Assembler::swapCodeChunks() { SWAP(NIns*, _nIns, _nExitIns); SWAP(NIns*, codeStart, exitStart); SWAP(NIns*, codeEnd, exitEnd); verbose_only( SWAP(size_t, codeBytes, exitBytes); ) } } // namespace nanojit #endif // FEATURE_NANOJIT && NANOJIT_PPC