gecko/js/src/nanojit/NativePPC.cpp
Edwin Smith d7a89e77b5 Add LIR_jtbl instruction, and patch up lirasm to remove LIR_ji (bug 465582 r=nnethercote+, rreitmai+)
new opcode: LIR_jtbl.  jtbl takes a uint32_t index and a table of label
references (LIns**), representing a jump to one of the labels.

the index must be in range (range checking is not included in the opcode).

the table is allocated in memory at least as long lived as the LIR; this is
accomplished by doing the allocation from the same Allocator used by LirBuffer.

In the x86 backend, this is implemented with a simple jmp [R*4+ADDR] where ADDR
is the address of the table.  I added a new dataAllocator (Allocator&)
parameter to Assembler, which is used for allocating data along with code (data
& code have same lifetime).  The x86 backend allocates the final table of
addresses from this allocator and embeds the pointer to the table in code.

In other backends more than one instruction must be used due to limited range
of the constant part of the addressing mode (ppc, arm), or non-support for
full-range pc-relative indexing (x64, ppc64).

Anyone generating LIR code for use with LIR_jtbl must also generate a
LIR_regfence instruction after each label reachable by a forwards edge
from LIR_jtbl.  This is to workaround the register allocator's inability
to merge register states between 2 or more pre-existing targets.  LIR_regfence
is not required for backwards edges.

--HG--
extra : convert_revision : ee709eaaa30f720f77ab863ba4c9e6d10d69982b
2009-11-11 14:38:12 -05:00

1333 lines
44 KiB
C++

/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 2008
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
#if defined FEATURE_NANOJIT && defined NANOJIT_PPC
namespace nanojit
{
const Register Assembler::retRegs[] = { R3, R4 }; // high=R3, low=R4
const Register Assembler::argRegs[] = { R3, R4, R5, R6, R7, R8, R9, R10 };
const Register Assembler::savedRegs[] = {
#if !defined NANOJIT_64BIT
R13,
#endif
R14, R15, R16, R17, R18, R19, R20, R21, R22,
R23, R24, R25, R26, R27, R28, R29, R30
};
const char *regNames[] = {
"r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
};
const char *bitNames[] = { "lt", "gt", "eq", "so" };
#define TODO(x) do{ avmplus::AvmLog(#x); NanoAssertMsgf(false, "%s", #x); } while(0)
/*
* see http://developer.apple.com/documentation/developertools/Conceptual/LowLevelABI/index.html
* stack layout (higher address going down)
* sp -> out linkage area
* out parameter area
* local variables
* saved registers
* sp' -> in linkage area
* in parameter area
*
* linkage area layout:
* PPC32 PPC64
* sp+0 sp+0 saved sp
* sp+4 sp+8 saved cr
* sp+8 sp+16 saved lr
* sp+12 sp+24 reserved
*/
const int linkage_size = 6*sizeof(void*);
const int lr_offset = 2*sizeof(void*); // linkage.lr
const int cr_offset = 1*sizeof(void*); // linkage.cr
NIns* Assembler::genPrologue() {
// mflr r0
// stw r0, lr_offset(sp)
// stwu sp, -framesize(sp)
// activation frame is 4 bytes per entry even on 64bit machines
uint32_t stackNeeded = max_param_size + linkage_size + _activation.tos * 4;
uint32_t aligned = alignUp(stackNeeded, NJ_ALIGN_STACK);
UNLESS_PEDANTIC( if (isS16(aligned)) {
STPU(SP, -aligned, SP); // *(sp-aligned) = sp; sp -= aligned
} else ) {
STPUX(SP, SP, R0);
asm_li(R0, -aligned);
}
NIns *patchEntry = _nIns;
MR(FP,SP); // save SP to use as a FP
STP(FP, cr_offset, SP); // cheat and save our FP in linkage.cr
STP(R0, lr_offset, SP); // save LR in linkage.lr
MFLR(R0);
return patchEntry;
}
NIns* Assembler::genEpilogue() {
BLR();
MTLR(R0);
LP(R0, lr_offset, SP);
LP(FP, cr_offset, SP); // restore FP from linkage.cr
MR(SP,FP);
return _nIns;
}
void Assembler::asm_qjoin(LIns *ins) {
int d = findMemFor(ins);
NanoAssert(d && isS16(d));
LIns* lo = ins->oprnd1();
LIns* hi = ins->oprnd2();
Register r = findRegFor(hi, GpRegs);
STW(r, d+4, FP);
// okay if r gets recycled.
r = findRegFor(lo, GpRegs);
STW(r, d, FP);
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
void Assembler::asm_ld(LIns *ins) {
LIns* base = ins->oprnd1();
int d = ins->disp();
Register rr = prepResultReg(ins, GpRegs);
Register ra = getBaseReg(ins->opcode(), base, d, GpRegs);
#if !PEDANTIC
if (isS16(d)) {
if (ins->isop(LIR_ldcb)) {
LBZ(rr, d, ra);
} else {
LWZ(rr, d, ra);
}
return;
}
#endif
// general case
underrunProtect(12);
LWZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
void Assembler::asm_store32(LIns *value, int32_t dr, LIns *base) {
Register rs = findRegFor(value, GpRegs);
Register ra = value == base ? rs : getBaseReg(LIR_sti, base, dr, GpRegs & ~rmask(rs));
#if !PEDANTIC
if (isS16(dr)) {
STW(rs, dr, ra);
return;
}
#endif
// general case store, any offset size
STWX(rs, ra, R0);
asm_li(R0, dr);
}
void Assembler::asm_load64(LIns *ins) {
LIns* base = ins->oprnd1();
#ifdef NANOJIT_64BIT
Reservation *resv = getresv(ins);
Register rr;
if (resv && (rr = resv->reg) != UnknownReg && (rmask(rr) & FpRegs)) {
// FPR already assigned, fine, use it
freeRsrcOf(ins, false);
} else {
// use a GPR register; its okay to copy doubles with GPR's
// but *not* okay to copy non-doubles with FPR's
rr = prepResultReg(ins, GpRegs);
}
#else
Register rr = prepResultReg(ins, FpRegs);
#endif
int dr = ins->disp();
Register ra = getBaseReg(ins->opcode(), base, dr, GpRegs);
#ifdef NANOJIT_64BIT
if (rmask(rr) & GpRegs) {
#if !PEDANTIC
if (isS16(dr)) {
LD(rr, dr, ra);
return;
}
#endif
// general case 64bit GPR load
LDX(rr, ra, R0);
asm_li(R0, dr);
return;
}
#endif
// FPR
#if !PEDANTIC
if (isS16(dr)) {
LFD(rr, dr, ra);
return;
}
#endif
// general case FPR load
LFDX(rr, ra, R0);
asm_li(R0, dr);
}
void Assembler::asm_li(Register r, int32_t imm) {
#if !PEDANTIC
if (isS16(imm)) {
LI(r, imm);
return;
}
if ((imm & 0xffff) == 0) {
imm = uint32_t(imm) >> 16;
LIS(r, imm);
return;
}
#endif
asm_li32(r, imm);
}
void Assembler::asm_li32(Register r, int32_t imm) {
// general case
// TODO use ADDI instead of ORI if r != r0, impl might have 3way adder
ORI(r, r, imm);
LIS(r, imm>>16); // on ppc64, this sign extends
}
void Assembler::asm_li64(Register r, uint64_t imm) {
underrunProtect(5*sizeof(NIns)); // must be contiguous to be patchable
ORI(r,r,uint16_t(imm)); // r[0:15] = imm[0:15]
ORIS(r,r,uint16_t(imm>>16)); // r[16:31] = imm[16:31]
SLDI(r,r,32); // r[32:63] = r[0:31], r[0:31] = 0
asm_li32(r, int32_t(imm>>32)); // r[0:31] = imm[32:63]
}
void Assembler::asm_store64(LIns *value, int32_t dr, LIns *base) {
NanoAssert(value->isQuad());
Register ra = getBaseReg(LIR_stqi, base, dr, GpRegs);
#if !PEDANTIC && !defined NANOJIT_64BIT
if (value->isop(LIR_quad) && isS16(dr) && isS16(dr+4)) {
// quad constant and short offset
uint64_t q = value->imm64();
STW(R0, dr, ra); // hi
asm_li(R0, int32_t(q>>32)); // hi
STW(R0, dr+4, ra); // lo
asm_li(R0, int32_t(q)); // lo
return;
}
if (value->isop(LIR_qjoin) && isS16(dr) && isS16(dr+4)) {
// short offset and qjoin(lo,hi) - store lo & hi separately
RegisterMask allow = GpRegs & ~rmask(ra);
LIns *lo = value->oprnd1();
Register rlo = findRegFor(lo, allow);
LIns *hi = value->oprnd2();
Register rhi = hi == lo ? rlo : findRegFor(hi, allow & ~rmask(rlo));
STW(rhi, dr, ra); // hi
STW(rlo, dr+4, ra); // lo
return;
}
#endif // !PEDANTIC
// general case for any value
#if !defined NANOJIT_64BIT
// on 32bit cpu's, we only use store64 for doubles
Register rs = findRegFor(value, FpRegs);
#else
// if we have to choose a register, use a GPR
Reservation *resv = getresv(value);
Register rs;
if (!resv || (rs = resv->reg) == UnknownReg) {
rs = findRegFor(value, GpRegs & ~rmask(ra));
}
if (rmask(rs) & GpRegs) {
#if !PEDANTIC
if (isS16(dr)) {
// short offset
STD(rs, dr, ra);
return;
}
#endif
// general case store 64bit GPR
STDX(rs, ra, R0);
asm_li(R0, dr);
return;
}
#endif // NANOJIT_64BIT
#if !PEDANTIC
if (isS16(dr)) {
// short offset
STFD(rs, dr, ra);
return;
}
#endif
// general case for any offset
STFDX(rs, ra, R0);
asm_li(R0, dr);
}
void Assembler::asm_cond(LIns *ins) {
LOpcode op = ins->opcode();
LIns *a = ins->oprnd1();
LIns *b = ins->oprnd2();
ConditionRegister cr = CR7;
Register r = prepResultReg(ins, GpRegs);
switch (op) {
case LIR_eq: case LIR_feq:
case LIR_qeq:
EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
MFCR(r);
break;
case LIR_lt: case LIR_ult:
case LIR_flt: case LIR_fle:
case LIR_qlt: case LIR_qult:
EXTRWI(r, r, 1, 4*cr+COND_lt); // extract CR7.lt
MFCR(r);
break;
case LIR_gt: case LIR_ugt:
case LIR_fgt: case LIR_fge:
case LIR_qgt: case LIR_qugt:
EXTRWI(r, r, 1, 4*cr+COND_gt); // extract CR7.gt
MFCR(r);
break;
case LIR_le: case LIR_ule:
case LIR_qle: case LIR_qule:
EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
MFCR(r);
CROR(CR7, eq, lt, eq);
break;
case LIR_ge: case LIR_uge:
case LIR_qge: case LIR_quge:
EXTRWI(r, r, 1, 4*cr+COND_eq); // select CR7.eq
MFCR(r);
CROR(CR7, eq, gt, eq);
break;
default:
debug_only(outputf("%s",lirNames[ins->opcode()]);)
TODO(asm_cond);
break;
}
asm_cmp(op, a, b, cr);
}
void Assembler::asm_fcond(LIns *ins) {
asm_cond(ins);
}
// cause 32bit sign extension to test bits
#define isS14(i) ((int32_t(bd<<18)>>18) == (i))
NIns* Assembler::asm_branch(bool onfalse, LIns *cond, NIns * const targ) {
LOpcode condop = cond->opcode();
NanoAssert(cond->isCond());
// powerpc offsets are based on the address of the branch instruction
NIns *patch;
#if !PEDANTIC
ptrdiff_t bd = targ - (_nIns-1);
if (targ && isS24(bd))
patch = asm_branch_near(onfalse, cond, targ);
else
#endif
patch = asm_branch_far(onfalse, cond, targ);
asm_cmp(condop, cond->oprnd1(), cond->oprnd2(), CR7);
return patch;
}
NIns* Assembler::asm_branch_near(bool onfalse, LIns *cond, NIns * const targ) {
NanoAssert(targ != 0);
underrunProtect(4);
ptrdiff_t bd = targ - (_nIns-1);
NIns *patch = 0;
if (!isS14(bd)) {
underrunProtect(8);
bd = targ - (_nIns-1);
if (isS24(bd)) {
// can't fit conditional branch offset into 14 bits, but
// we can fit in 24, so invert the condition and branch
// around an unconditional jump
verbose_only(verbose_outputf("%p:", _nIns);)
NIns *skip = _nIns;
B(bd);
patch = _nIns; // this is the patchable branch to the given target
onfalse = !onfalse;
bd = skip - (_nIns-1);
NanoAssert(isS14(bd));
verbose_only(verbose_outputf("branch24");)
}
else {
// known far target
return asm_branch_far(onfalse, cond, targ);
}
}
ConditionRegister cr = CR7;
switch (cond->opcode()) {
case LIR_eq:
case LIR_feq:
case LIR_qeq:
if (onfalse) BNE(cr,bd); else BEQ(cr,bd);
break;
case LIR_lt: case LIR_ult:
case LIR_flt: case LIR_fle:
case LIR_qlt: case LIR_qult:
if (onfalse) BNL(cr,bd); else BLT(cr,bd);
break;
case LIR_le: case LIR_ule:
case LIR_qle: case LIR_qule:
if (onfalse) BGT(cr,bd); else BLE(cr,bd);
break;
case LIR_gt: case LIR_ugt:
case LIR_fgt: case LIR_fge:
case LIR_qgt: case LIR_qugt:
if (onfalse) BNG(cr,bd); else BGT(cr,bd);
break;
case LIR_ge: case LIR_uge:
case LIR_qge: case LIR_quge:
if (onfalse) BLT(cr,bd); else BGE(cr,bd);
break;
default:
debug_only(outputf("%s",lirNames[cond->opcode()]);)
TODO(unknown_cond);
}
if (!patch)
patch = _nIns;
return patch;
}
// general case branch to any address (using CTR)
NIns *Assembler::asm_branch_far(bool onfalse, LIns *cond, NIns * const targ) {
LOpcode condop = cond->opcode();
ConditionRegister cr = CR7;
underrunProtect(16);
switch (condop) {
case LIR_eq:
case LIR_feq:
case LIR_qeq:
if (onfalse) BNECTR(cr); else BEQCTR(cr);
break;
case LIR_lt: case LIR_ult:
case LIR_qlt: case LIR_qult:
case LIR_flt: case LIR_fle:
if (onfalse) BNLCTR(cr); else BLTCTR(cr);
break;
case LIR_le: case LIR_ule:
case LIR_qle: case LIR_qule:
if (onfalse) BGTCTR(cr); else BLECTR(cr);
break;
case LIR_gt: case LIR_ugt:
case LIR_qgt: case LIR_qugt:
case LIR_fgt: case LIR_fge:
if (onfalse) BNGCTR(cr); else BGTCTR(cr);
break;
case LIR_ge: case LIR_uge:
case LIR_qge: case LIR_quge:
if (onfalse) BLTCTR(cr); else BGECTR(cr);
break;
default:
debug_only(outputf("%s",lirNames[condop]);)
TODO(unknown_cond);
}
#if !defined NANOJIT_64BIT
MTCTR(R0);
asm_li32(R0, (int)targ);
#else
MTCTR(R0);
if (!targ || !isU32(uintptr_t(targ))) {
asm_li64(R0, uint64_t(targ));
} else {
asm_li32(R0, uint32_t(uintptr_t(targ)));
}
#endif
return _nIns;
}
void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {
RegisterMask allow = condop >= LIR_feq && condop <= LIR_fge ? FpRegs : GpRegs;
Register ra = findRegFor(a, allow);
#if !PEDANTIC
if (b->isconst()) {
int32_t d = b->imm32();
if (isS16(d)) {
if (condop >= LIR_eq && condop <= LIR_ge) {
CMPWI(cr, ra, d);
return;
}
if (condop >= LIR_qeq && condop <= LIR_qge) {
CMPDI(cr, ra, d);
TODO(cmpdi);
return;
}
}
if (isU16(d)) {
if ((condop == LIR_eq || condop >= LIR_ult && condop <= LIR_uge)) {
CMPLWI(cr, ra, d);
return;
}
if ((condop == LIR_qeq || condop >= LIR_qult && condop <= LIR_quge)) {
CMPLDI(cr, ra, d);
TODO(cmpldi);
return;
}
}
}
#endif
// general case
Register rb = b==a ? ra : findRegFor(b, allow & ~rmask(ra));
if (condop >= LIR_eq && condop <= LIR_ge) {
CMPW(cr, ra, rb);
} else if (condop >= LIR_ult && condop <= LIR_uge) {
CMPLW(cr, ra, rb);
} else if (condop >= LIR_qeq && condop <= LIR_qge) {
CMPD(cr, ra, rb);
}
else if (condop >= LIR_qult && condop <= LIR_quge) {
CMPLD(cr, ra, rb);
}
else if (condop >= LIR_feq && condop <= LIR_fge) {
// set the lt/gt bit for fle/fge. We don't do this for
// int/uint because in those cases we can invert the branch condition.
// for float, we can't because of unordered comparisons
if (condop == LIR_fle)
CROR(cr, lt, lt, eq); // lt = lt|eq
else if (condop == LIR_fge)
CROR(cr, gt, gt, eq); // gt = gt|eq
FCMPU(cr, ra, rb);
}
else {
TODO(asm_cmp);
}
}
void Assembler::asm_ret(LIns *ins) {
genEpilogue();
assignSavedRegs();
LIns *value = ins->oprnd1();
Register r = ins->isop(LIR_ret) ? R3 : F1;
findSpecificRegFor(value, r);
}
void Assembler::asm_nongp_copy(Register r, Register s) {
// PPC doesn't support any GPR<->FPR moves
NanoAssert((rmask(r) & FpRegs) && (rmask(s) & FpRegs));
FMR(r, s);
}
void Assembler::asm_restore(LIns *i, Reservation *resv, Register r) {
int d;
if (i->isop(LIR_alloc)) {
d = disp(resv);
ADDI(r, FP, d);
}
else if (i->isconst()) {
if (!resv->arIndex) {
i->resv()->clear();
}
asm_li(r, i->imm32());
}
else {
d = findMemFor(i);
if (IsFpReg(r)) {
NanoAssert(i->isQuad());
LFD(r, d, FP);
} else if (i->isQuad()) {
LD(r, d, FP);
} else {
LWZ(r, d, FP);
}
verbose_only( if (_logc->lcbits & LC_RegAlloc) {
outputForEOL(" <= restore %s",
_thisfrag->lirbuf->names->formatRef(i)); } )
}
}
Register Assembler::asm_prep_fcall(Reservation*, LIns *ins) {
return prepResultReg(ins, rmask(F1));
}
void Assembler::asm_int(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
asm_li(rr, ins->imm32());
}
void Assembler::asm_fneg(LIns *ins) {
Register rr = prepResultReg(ins, FpRegs);
Register ra = findRegFor(ins->oprnd1(), FpRegs);
FNEG(rr,ra);
}
void Assembler::asm_param(LIns *ins) {
uint32_t a = ins->paramArg();
uint32_t kind = ins->paramKind();
if (kind == 0) {
// ordinary param
// first eight args always in R3..R10 for PPC
if (a < 8) {
// incoming arg in register
prepResultReg(ins, rmask(argRegs[a]));
} else {
// todo: support stack based args, arg 0 is at [FP+off] where off
// is the # of regs to be pushed in genProlog()
TODO(asm_param_stk);
}
}
else {
// saved param
prepResultReg(ins, rmask(savedRegs[a]));
}
}
void Assembler::asm_call(LIns *ins) {
const CallInfo* call = ins->callInfo();
ArgSize sizes[MAXARGS];
uint32_t argc = call->get_sizes(sizes);
bool indirect;
if (!(indirect = call->isIndirect())) {
verbose_only(if (_logc->lcbits & LC_Assembly)
outputf(" %p:", _nIns);
)
br((NIns*)call->_address, 1);
} else {
// Indirect call: we assign the address arg to R11 since it's not
// used for regular arguments, and is otherwise scratch since it's
// clobberred by the call.
underrunProtect(8); // underrunProtect might clobber CTR
BCTRL();
MTCTR(R11);
asm_regarg(ARGSIZE_P, ins->arg(--argc), R11);
}
int param_size = 0;
Register r = R3;
Register fr = F1;
for(uint32_t i = 0; i < argc; i++) {
uint32_t j = argc - i - 1;
ArgSize sz = sizes[j];
LInsp arg = ins->arg(j);
if (sz & ARGSIZE_MASK_INT) {
// GP arg
if (r <= R10) {
asm_regarg(sz, arg, r);
r = nextreg(r);
param_size += sizeof(void*);
} else {
// put arg on stack
TODO(stack_int32);
}
} else if (sz == ARGSIZE_F) {
// double
if (fr <= F13) {
asm_regarg(sz, arg, fr);
fr = nextreg(fr);
#ifdef NANOJIT_64BIT
r = nextreg(r);
#else
r = nextreg(nextreg(r)); // skip 2 gpr's
#endif
param_size += sizeof(double);
} else {
// put arg on stack
TODO(stack_double);
}
} else {
TODO(ARGSIZE_UNK);
}
}
if (param_size > max_param_size)
max_param_size = param_size;
}
void Assembler::asm_regarg(ArgSize sz, LInsp p, Register r)
{
NanoAssert(r != UnknownReg);
if (sz & ARGSIZE_MASK_INT)
{
#ifdef NANOJIT_64BIT
if (sz == ARGSIZE_I) {
// sign extend 32->64
EXTSW(r, r);
} else if (sz == ARGSIZE_U) {
// zero extend 32->64
CLRLDI(r, r, 32);
}
#endif
// arg goes in specific register
if (p->isconst()) {
asm_li(r, p->imm32());
} else {
Reservation* rA = getresv(p);
if (rA) {
if (rA->reg == UnknownReg) {
// load it into the arg reg
int d = findMemFor(p);
if (p->isop(LIR_alloc)) {
NanoAssert(isS16(d));
ADDI(r, FP, d);
} else if (p->isQuad()) {
LD(r, d, FP);
} else {
LWZ(r, d, FP);
}
} else {
// it must be in a saved reg
MR(r, rA->reg);
}
}
else {
// this is the last use, so fine to assign it
// to the scratch reg, it's dead after this point.
findSpecificRegFor(p, r);
}
}
}
else if (sz == ARGSIZE_F) {
Reservation* rA = getresv(p);
if (rA) {
if (rA->reg == UnknownReg || !IsFpReg(rA->reg)) {
// load it into the arg reg
int d = findMemFor(p);
LFD(r, d, FP);
} else {
// it must be in a saved reg
NanoAssert(IsFpReg(r) && IsFpReg(rA->reg));
FMR(r, rA->reg);
}
}
else {
// this is the last use, so fine to assign it
// to the scratch reg, it's dead after this point.
findSpecificRegFor(p, r);
}
}
else {
TODO(ARGSIZE_UNK);
}
}
void Assembler::asm_spill(Register rr, int d, bool /* pop */, bool quad) {
(void)quad;
if (d) {
if (IsFpReg(rr)) {
NanoAssert(quad);
STFD(rr, d, FP);
}
#ifdef NANOJIT_64BIT
else if (quad) {
STD(rr, d, FP);
}
#endif
else {
NanoAssert(!quad);
STW(rr, d, FP);
}
}
}
void Assembler::asm_arith(LIns *ins) {
LOpcode op = ins->opcode();
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
RegisterMask allow = GpRegs;
Register rr = prepResultReg(ins, allow);
Register ra = findRegFor(lhs, GpRegs);
if (rhs->isconst()) {
int32_t rhsc = rhs->imm32();
if (isS16(rhsc)) {
// ppc arith immediate ops sign-exted the imm16 value
switch (op) {
case LIR_add:
case LIR_iaddp:
IF_64BIT(case LIR_qiadd:)
IF_64BIT(case LIR_qaddp:)
ADDI(rr, ra, rhsc);
return;
case LIR_sub:
SUBI(rr, ra, rhsc);
return;
case LIR_mul:
MULLI(rr, ra, rhsc);
return;
}
}
if (isU16(rhsc)) {
// ppc logical immediate zero-extend the imm16 value
switch (op) {
IF_64BIT(case LIR_qior:)
case LIR_or:
ORI(rr, ra, rhsc);
return;
IF_64BIT(case LIR_qiand:)
case LIR_and:
ANDI(rr, ra, rhsc);
return;
IF_64BIT(case LIR_qxor:)
case LIR_xor:
XORI(rr, ra, rhsc);
return;
}
}
// LIR shift ops only use last 5bits of shift const
switch (op) {
case LIR_lsh:
SLWI(rr, ra, rhsc&31);
return;
case LIR_ush:
SRWI(rr, ra, rhsc&31);
return;
case LIR_rsh:
SRAWI(rr, ra, rhsc&31);
return;
}
}
// general case, put rhs in register
Register rb = rhs==lhs ? ra : findRegFor(rhs, GpRegs&~rmask(ra));
switch (op) {
IF_64BIT(case LIR_qiadd:)
IF_64BIT(case LIR_qaddp:)
case LIR_add:
case LIR_iaddp:
ADD(rr, ra, rb);
break;
IF_64BIT(case LIR_qiand:)
case LIR_and:
AND(rr, ra, rb);
break;
IF_64BIT(case LIR_qior:)
case LIR_or:
OR(rr, ra, rb);
break;
IF_64BIT(case LIR_qxor:)
case LIR_xor:
XOR(rr, ra, rb);
break;
case LIR_sub: SUBF(rr, rb, ra); break;
case LIR_lsh: SLW(rr, ra, R0); ANDI(R0, rb, 31); break;
case LIR_rsh: SRAW(rr, ra, R0); ANDI(R0, rb, 31); break;
case LIR_ush: SRW(rr, ra, R0); ANDI(R0, rb, 31); break;
case LIR_mul: MULLW(rr, ra, rb); break;
#ifdef NANOJIT_64BIT
case LIR_qilsh:
SLD(rr, ra, R0);
ANDI(R0, rb, 63);
break;
case LIR_qursh:
SRD(rr, ra, R0);
ANDI(R0, rb, 63);
break;
case LIR_qirsh:
SRAD(rr, ra, R0);
ANDI(R0, rb, 63);
TODO(qirsh);
break;
#endif
default:
debug_only(outputf("%s",lirNames[op]);)
TODO(asm_arith);
}
}
void Assembler::asm_fop(LIns *ins) {
LOpcode op = ins->opcode();
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
RegisterMask allow = FpRegs;
Register rr = prepResultReg(ins, allow);
Reservation *rA, *rB;
findRegFor2(allow, lhs, rA, rhs, rB);
Register ra = rA->reg;
Register rb = rB->reg;
switch (op) {
case LIR_fadd: FADD(rr, ra, rb); break;
case LIR_fsub: FSUB(rr, ra, rb); break;
case LIR_fmul: FMUL(rr, ra, rb); break;
case LIR_fdiv: FDIV(rr, ra, rb); break;
default:
debug_only(outputf("%s",lirNames[op]);)
TODO(asm_fop);
}
}
void Assembler::asm_i2f(LIns *ins) {
Register r = prepResultReg(ins, FpRegs);
Register v = findRegFor(ins->oprnd1(), GpRegs);
const int d = 16; // natural aligned
#if defined NANOJIT_64BIT && !PEDANTIC
FCFID(r, r); // convert to double
LFD(r, d, SP); // load into fpu register
STD(v, d, SP); // save int64
EXTSW(v, v); // extend sign destructively, ok since oprnd1 only is 32bit
#else
FSUB(r, r, F0);
LFD(r, d, SP); // scratch area in outgoing linkage area
STW(R0, d+4, SP);
XORIS(R0, v, 0x8000);
LFD(F0, d, SP);
STW(R0, d+4, SP);
LIS(R0, 0x8000);
STW(R0, d, SP);
LIS(R0, 0x4330);
#endif
}
void Assembler::asm_u2f(LIns *ins) {
Register r = prepResultReg(ins, FpRegs);
Register v = findRegFor(ins->oprnd1(), GpRegs);
const int d = 16;
#if defined NANOJIT_64BIT && !PEDANTIC
FCFID(r, r); // convert to double
LFD(r, d, SP); // load into fpu register
STD(v, d, SP); // save int64
CLRLDI(v, v, 32); // zero-extend destructively
#else
FSUB(r, r, F0);
LFD(F0, d, SP);
STW(R0, d+4, SP);
LI(R0, 0);
LFD(r, d, SP);
STW(v, d+4, SP);
STW(R0, d, SP);
LIS(R0, 0x4330);
#endif
}
void Assembler::asm_promote(LIns *ins) {
LOpcode op = ins->opcode();
Register r = prepResultReg(ins, GpRegs);
Register v = findRegFor(ins->oprnd1(), GpRegs);
switch (op) {
default:
debug_only(outputf("%s",lirNames[op]));
TODO(asm_promote);
case LIR_u2q:
CLRLDI(r, v, 32); // clears the top 32 bits
break;
case LIR_i2q:
EXTSW(r, v);
break;
}
}
void Assembler::asm_quad(LIns *ins) {
#ifdef NANOJIT_64BIT
Reservation *resv = getresv(ins);
Register r;
if (resv && (r = resv->reg) != UnknownReg && (rmask(r) & FpRegs)) {
// FPR already assigned, fine, use it
freeRsrcOf(ins, false);
} else {
// use a GPR register; its okay to copy doubles with GPR's
// but *not* okay to copy non-doubles with FPR's
r = prepResultReg(ins, GpRegs);
}
#else
Register r = prepResultReg(ins, FpRegs);
#endif
if (rmask(r) & FpRegs) {
union {
double d;
struct {
int32_t hi, lo;
} w;
};
d = ins->imm64f();
LFD(r, 12, SP);
STW(R0, 12, SP);
asm_li(R0, w.hi);
STW(R0, 16, SP);
asm_li(R0, w.lo);
}
else {
int64_t q = ins->imm64();
if (isS32(q)) {
asm_li(r, int32_t(q));
return;
}
RLDIMI(r,R0,32,0); // or 32,32?
asm_li(R0, int32_t(q>>32)); // hi bits into R0
asm_li(r, int32_t(q)); // lo bits into dest reg
}
}
void Assembler::br(NIns* addr, int link) {
// destination unknown, then use maximum branch possible
if (!addr) {
br_far(addr,link);
return;
}
// powerpc offsets are based on the address of the branch instruction
underrunProtect(4); // ensure _nIns is addr of Bx
ptrdiff_t offset = addr - (_nIns-1); // we want ptr diff's implicit >>2 here
#if !PEDANTIC
if (isS24(offset)) {
Bx(offset, 0, link); // b addr or bl addr
return;
}
ptrdiff_t absaddr = addr - (NIns*)0; // ptr diff implies >>2
if (isS24(absaddr)) {
Bx(absaddr, 1, link); // ba addr or bla addr
return;
}
#endif // !PEDANTIC
br_far(addr,link);
}
void Assembler::br_far(NIns* addr, int link) {
// far jump.
// can't have a page break in this sequence, because the break
// would also clobber ctr and r2. We use R2 here because it's not available
// to the register allocator, and we use R0 everywhere else as scratch, so using
// R2 here avoids clobbering anything else besides CTR.
#ifdef NANOJIT_64BIT
if (addr==0 || !isU32(uintptr_t(addr))) {
// really far jump to 64bit abs addr
underrunProtect(28); // 7 instructions
BCTR(link);
MTCTR(R2);
asm_li64(R2, uintptr_t(addr)); // 5 instructions
return;
}
#endif
underrunProtect(16);
BCTR(link);
MTCTR(R2);
asm_li32(R2, uint32_t(uintptr_t(addr))); // 2 instructions
}
void Assembler::underrunProtect(int bytes) {
NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
int instr = (bytes + sizeof(NIns) - 1) / sizeof(NIns);
NIns *top = _inExit ? this->exitStart : this->codeStart;
NIns *pc = _nIns;
#if PEDANTIC
// pedanticTop is based on the last call to underrunProtect; any time we call
// underrunProtect and would use more than what's already protected, then insert
// a page break jump. Sometimes, this will be to a new page, usually it's just
// the next instruction and the only effect is to clobber R2 & CTR
NanoAssert(pedanticTop >= top);
if (pc - instr < pedanticTop) {
// no page break required, but insert a far branch anyway just to be difficult
#ifdef NANOJIT_64BIT
const int br_size = 7;
#else
const int br_size = 4;
#endif
if (pc - instr - br_size < top) {
// really do need a page break
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
codeAlloc();
}
// now emit the jump, but make sure we won't need another page break.
// we're pedantic, but not *that* pedantic.
pedanticTop = _nIns - br_size;
br(pc, 0);
pedanticTop = _nIns - instr;
}
#else
if (pc - instr < top) {
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
if (_inExit)
codeAlloc(exitStart, exitEnd, _nIns verbose_only(, exitBytes));
else
codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
// this jump will call underrunProtect again, but since we're on a new
// page, nothing will happen.
br(pc, 0);
}
#endif
}
void Assembler::asm_cmov(LIns *ins) {
NanoAssert(ins->isop(LIR_cmov) || ins->isop(LIR_qcmov));
LIns* cond = ins->oprnd1();
LIns* iftrue = ins->oprnd2();
LIns* iffalse = ins->oprnd3();
NanoAssert(cond->isCmp());
NanoAssert(iftrue->isQuad() == iffalse->isQuad());
// fixme: we could handle fpu registers here, too, since we're just branching
Register rr = prepResultReg(ins, GpRegs);
findSpecificRegFor(iftrue, rr);
Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
NIns *after = _nIns;
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("%p:",after);)
MR(rr, rf);
asm_branch(false, cond, after);
}
RegisterMask Assembler::hint(LIns *i, RegisterMask allow) {
LOpcode op = i->opcode();
RegisterMask prefer = ~0LL;
if (op == LIR_icall || op == LIR_qcall)
prefer = rmask(R3);
else if (op == LIR_fcall)
prefer = rmask(F1);
else if (op == LIR_param) {
if (i->paramArg() < 8) {
prefer = rmask(argRegs[i->paramArg()]);
}
}
// narrow the allow set to whatever is preferred and also free
if (_allocator.free & allow & prefer)
allow &= prefer;
return allow;
}
void Assembler::asm_neg_not(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
Register ra = findRegFor(ins->oprnd1(), GpRegs);
if (ins->isop(LIR_neg)) {
NEG(rr, ra);
} else {
NOT(rr, ra);
}
}
void Assembler::asm_qlo(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
int d = findMemFor(ins->oprnd1());
LWZ(rr, d+4, FP);
}
void Assembler::asm_qhi(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
int d = findMemFor(ins->oprnd1());
LWZ(rr, d, FP);
TODO(asm_qhi);
}
void Assembler::nInit(AvmCore*) {
}
void Assembler::nBeginAssembly() {
max_param_size = 0;
}
void Assembler::nativePageSetup() {
if (!_nIns) {
codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
IF_PEDANTIC( pedanticTop = _nIns; )
}
if (!_nExitIns) {
codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
}
}
void Assembler::nativePageReset()
{}
// Increment the 32-bit profiling counter at pCtr, without
// changing any registers.
verbose_only(
void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
{
}
)
void Assembler::nPatchBranch(NIns *branch, NIns *target) {
// ppc relative offsets are based on the addr of the branch instruction
ptrdiff_t bd = target - branch;
if (branch[0] == PPC_b) {
// unconditional, 24bit offset. Whoever generated the unpatched jump
// must have known the final size would fit in 24bits! otherwise the
// jump would be (lis,ori,mtctr,bctr) and we'd be patching the lis,ori.
NanoAssert(isS24(bd));
branch[0] |= (bd & 0xffffff) << 2;
}
else if ((branch[0] & PPC_bc) == PPC_bc) {
// conditional, 14bit offset. Whoever generated the unpatched jump
// must have known the final size would fit in 14bits! otherwise the
// jump would be (lis,ori,mtctr,bcctr) and we'd be patching the lis,ori below.
NanoAssert(isS14(bd));
NanoAssert(((branch[0] & 0x3fff)<<2) == 0);
branch[0] |= (bd & 0x3fff) << 2;
TODO(patch_bc);
}
#ifdef NANOJIT_64BIT
// patch 64bit branch
else if ((branch[0] & ~(31<<21)) == PPC_addis) {
// general branch, using lis,ori,sldi,oris,ori to load the const 64bit addr.
Register rd = Register((branch[0] >> 21) & 31);
NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
NanoAssert(branch[3] == PPC_oris | GPR(rd)<<21 | GPR(rd)<<16);
NanoAssert(branch[4] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
uint64_t imm = uintptr_t(target);
uint32_t lo = uint32_t(imm);
uint32_t hi = uint32_t(imm>>32);
branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(hi>>16);
branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(hi);
branch[3] = PPC_oris | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo>>16);
branch[4] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo);
}
#else // NANOJIT_64BIT
// patch 32bit branch
else if ((branch[0] & ~(31<<21)) == PPC_addis) {
// general branch, using lis,ori to load the const addr.
// patch a lis,ori sequence with a 32bit value
Register rd = Register((branch[0] >> 21) & 31);
NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
uint32_t imm = uint32_t(target);
branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(imm >> 16); // lis rd, imm >> 16
branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(imm); // ori rd, rd, imm & 0xffff
}
#endif // !NANOJIT_64BIT
else {
TODO(unknown_patch);
}
}
static int cntzlw(int set) {
// On PowerPC, prefer higher registers, to minimize
// size of nonvolatile area that must be saved.
register Register i;
#ifdef __GNUC__
asm ("cntlzw %0,%1" : "=r" (i) : "r" (set));
#else // __GNUC__
# error("unsupported compiler")
#endif // __GNUC__
return 31-i;
}
Register Assembler::nRegisterAllocFromSet(RegisterMask set) {
Register i;
// note, deliberate truncation of 64->32 bits
if (set & 0xffffffff) {
i = Register(cntzlw(int(set))); // gp reg
} else {
i = Register(32+cntzlw(int(set>>32))); // fp reg
}
_allocator.free &= ~rmask(i);
return i;
}
void Assembler::nRegisterResetAll(RegAlloc &regs) {
regs.clear();
regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */;
debug_only(regs.managed = regs.free);
}
#ifdef NANOJIT_64BIT
void Assembler::asm_qbinop(LIns *ins) {
LOpcode op = ins->opcode();
switch (op) {
case LIR_qaddp:
case LIR_qior:
case LIR_qiand:
case LIR_qursh:
case LIR_qirsh:
case LIR_qilsh:
case LIR_qxor:
case LIR_qiadd:
asm_arith(ins);
break;
default:
debug_only(outputf("%s",lirNames[op]));
TODO(asm_qbinop);
}
}
#endif // NANOJIT_64BIT
void Assembler::nFragExit(LIns*) {
TODO(nFragExit);
}
void Assembler::asm_jtbl(LIns* ins, NIns** native_table)
{
// R0 = index*4, R2 = table, CTR = computed address to jump to.
// must ensure no page breaks in here because R2 & CTR can get clobbered.
Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
#ifdef NANOJIT_64BIT
underrunProtect(9*4);
BCTR(0); // jump to address in CTR
MTCTR(R2); // CTR = R2
LDX(R2, R2, R0); // R2 = [table + index*8]
SLDI(R0, indexreg, 3); // R0 = index*8
asm_li64(R2, uint64_t(native_table)); // R2 = table (5 instr)
#else // 64bit
underrunProtect(6*4);
BCTR(0); // jump to address in CTR
MTCTR(R2); // CTR = R2
LWZX(R2, R2, R0); // R2 = [table + index*4]
SLWI(R0, indexreg, 2); // R0 = index*4
asm_li(R2, int32_t(native_table)); // R2 = table (up to 2 instructions)
#endif // 64bit
}
} // namespace nanojit
#endif // FEATURE_NANOJIT && NANOJIT_PPC