This commit is contained in:
Andreas Gal 2008-09-02 23:11:51 -07:00
commit 78b5b7fc05
13 changed files with 2203 additions and 866 deletions

View File

@ -98,6 +98,20 @@ BUILTIN3(Object_p_propertyIsEnumerable,
BUILTIN2(BooleanToNumber, LO, LO, F, jsdouble, JSContext*, jsint, 1, 1)
BUILTIN2(ObjectToString, LO, LO, P, JSString*, JSContext*, JSObject*, 0, 0)
BUILTIN3(Array_1int, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, jsint, 0, 0)
// soft float
BUILTIN1(fneg, F, F, jsdouble, jsdouble, 1, 1)
BUILTIN1(i2f, LO, F, jsdouble, jsint, 1, 1)
BUILTIN1(u2f, LO, F, jsdouble, jsuint, 1, 1)
BUILTIN2(fcmpeq, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmplt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmple, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmpgt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmpge, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fmul, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fadd, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fdiv, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fsub, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN3(Array_1str, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSString*, 0, 0)
BUILTIN4(Array_2obj, LO, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSObject*, JSObject**, 0, 0)
BUILTIN5(Array_3num, LO, LO, F, F, F, P, JSObject*, JSContext*, JSObject*, jsdouble, jsdouble, jsdouble, 0, 0)

View File

@ -80,7 +80,7 @@ endif
ifeq ($(CPU_ARCH),arm)
OS_CFLAGS += -DAVMPLUS_ARM -DAVMPLUS_LINUX
NANOJIT_ARCH = Thumb
NANOJIT_ARCH = ARM
endif
GFX_ARCH = x

View File

@ -79,7 +79,7 @@
#include "jsstr.h"
#include "prmjtime.h"
#if !defined JS_THREADSAFE && defined JS_TRACER
#ifdef JS_TRACER
#include "jstracer.h"
#endif

View File

@ -719,6 +719,79 @@ js_Array_3num(JSContext* cx, JSObject* proto, jsdouble n1, jsdouble n2, jsdouble
return NULL;)
}
/* soft float */
jsdouble FASTCALL
js_fneg(jsdouble x)
{
return -x;
}
jsdouble FASTCALL
js_i2f(jsint i)
{
return i;
}
jsdouble FASTCALL
js_u2f(jsuint u)
{
return u;
}
jsint FASTCALL
js_fcmpeq(jsdouble x, jsdouble y)
{
return x==y;
}
jsint FASTCALL
js_fcmplt(jsdouble x, jsdouble y)
{
return x < y;
}
jsint FASTCALL
js_fcmple(jsdouble x, jsdouble y)
{
return x <= y;
}
jsint FASTCALL
js_fcmpgt(jsdouble x, jsdouble y)
{
return x > y;
}
jsint FASTCALL
js_fcmpge(jsdouble x, jsdouble y)
{
return x >= y;
}
jsdouble FASTCALL
js_fmul(jsdouble x, jsdouble y)
{
return x * y;
}
jsdouble FASTCALL
js_fadd(jsdouble x, jsdouble y)
{
return x + y;
}
jsdouble FASTCALL
js_fdiv(jsdouble x, jsdouble y)
{
return x / y;
}
jsdouble FASTCALL
js_fsub(jsdouble x, jsdouble y)
{
return x - y;
}
#define LO ARGSIZE_LO
#define F ARGSIZE_F
#define Q ARGSIZE_Q

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=4 sw=4 et tw=99:
*
* ***** BEGIN LICENSE BLOCK *****
@ -119,7 +119,7 @@ static bool nesting_enabled = true;
static bool oracle_enabled = true;
static bool did_we_check_sse2 = false;
#ifdef DEBUG
#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT)
static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose");
#define debug_only_v(x) if (verbose_debug) { x; }
#else
@ -277,12 +277,59 @@ Oracle::clear()
_dontDemote.reset();
}
static bool isi2f(LInsp i)
{
if (i->isop(LIR_i2f))
return true;
#if defined(NJ_SOFTFLOAT)
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
{
if (i->oprnd1()->imm8() == F_i2f)
return true;
}
#endif
return false;
}
static bool isu2f(LInsp i)
{
if (i->isop(LIR_u2f))
return true;
#if defined(NJ_SOFTFLOAT)
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
{
if (i->oprnd1()->imm8() == F_u2f)
return true;
}
#endif
return false;
}
static LInsp iu2fArg(LInsp i)
{
#if defined(NJ_SOFTFLOAT)
if (i->isop(LIR_qjoin))
return i->oprnd1()->arg(0);
#endif
return i->oprnd1();
}
static LIns* demote(LirWriter *out, LInsp i)
{
if (i->isCall())
return callArgN(i, 0);
if (i->isop(LIR_i2f) || i->isop(LIR_u2f))
return i->oprnd1();
if (isi2f(i) || isu2f(i))
return iu2fArg(i);
if (i->isconst())
return i;
AvmAssert(i->isconstq());
@ -294,14 +341,14 @@ static LIns* demote(LirWriter *out, LInsp i)
static bool isPromoteInt(LIns* i)
{
jsdouble d;
return i->isop(LIR_i2f) || i->isconst() ||
return isi2f(i) || i->isconst() ||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsint)d) && !JSDOUBLE_IS_NEGZERO(d));
}
static bool isPromoteUint(LIns* i)
{
jsdouble d;
return i->isop(LIR_u2f) || i->isconst() ||
return isu2f(i) || i->isconst() ||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsuint)d));
}
@ -324,6 +371,79 @@ static bool overflowSafe(LIns* i)
((c->constval() > 0)));
}
#if defined(NJ_SOFTFLOAT)
class SoftFloatFilter: public LirWriter
{
public:
SoftFloatFilter(LirWriter* out):
LirWriter(out)
{
}
LInsp quadCall(uint32_t fid, LInsp args[]) {
LInsp qlo, qhi;
qlo = out->insCall(fid, args);
qhi = out->ins1(LIR_callh, qlo);
return out->qjoin(qlo, qhi);
}
LInsp ins1(LOpcode v, LInsp s0)
{
if (v == LIR_fneg)
return quadCall(F_fneg, &s0);
if (v == LIR_i2f)
return quadCall(F_i2f, &s0);
if (v == LIR_u2f)
return quadCall(F_u2f, &s0);
return out->ins1(v, s0);
}
LInsp ins2(LOpcode v, LInsp s0, LInsp s1)
{
LInsp args[2];
LInsp bv;
// change the numeric value and order of these LIR opcodes and die
if (LIR_fadd <= v && v <= LIR_fdiv) {
static uint32_t fmap[] = { F_fadd, F_fsub, F_fmul, F_fdiv };
args[0] = s1;
args[1] = s0;
return quadCall(fmap[v - LIR_fadd], args);
}
if (LIR_feq <= v && v <= LIR_fge) {
static uint32_t fmap[] = { F_fcmpeq, F_fcmplt, F_fcmpgt, F_fcmple, F_fcmpge };
args[0] = s1;
args[1] = s0;
bv = out->insCall(fmap[v - LIR_feq], args);
return out->ins2(LIR_eq, bv, out->insImm(1));
}
return out->ins2(v, s0, s1);
}
LInsp insCall(uint32_t fid, LInsp args[])
{
// if the return type is ARGSIZE_F, we have
// to do a quadCall ( qjoin(call,callh) )
if ((builtins[fid]._argtypes & 3) == ARGSIZE_F)
return quadCall(fid, args);
return out->insCall(fid, args);
}
};
#endif // NJ_SOFTFLOAT
class FuncFilter: public LirWriter
{
TraceRecorder& recorder;
@ -417,6 +537,20 @@ public:
return out->ins2(LIR_add, x, y);
}
}
#ifdef NANOJIT_ARM
else if (v == LIR_lsh ||
v == LIR_rsh ||
v == LIR_ush)
{
// needed on ARM -- arm doesn't mask shifts to 31 like x86 does
if (s1->isconst())
s1->setimm16(s1->constval() & 31);
else
s1 = out->ins2(LIR_and, s1, out->insImm(31));
return out->ins2(v, s0, s1);
}
#endif
return out->ins2(v, s0, s1);
}
@ -427,9 +561,8 @@ public:
case F_DoubleToUint32:
if (s0->isconstq())
return out->insImm(js_DoubleToECMAUint32(s0->constvalf()));
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) {
return s0->oprnd1();
}
if (isi2f(s0) || isu2f(s0))
return iu2fArg(s0);
break;
case F_DoubleToInt32:
if (s0->isconstq())
@ -442,9 +575,9 @@ public:
return out->ins2(op, demote(out, lhs), demote(out, rhs));
}
}
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) {
return s0->oprnd1();
}
if (isi2f(s0) || isu2f(s0))
return iu2fArg(s0);
// XXX ARM -- check for qjoin(call(F_UnboxDouble),call(F_UnboxDouble))
if (s0->isCall() && s0->fid() == F_UnboxDouble) {
LIns* args2[] = { callArgN(s0, 0) };
return out->insCall(F_UnboxInt32, args2);
@ -472,7 +605,7 @@ public:
/* In debug mode vpname contains a textual description of the type of the
slot during the forall iteration over al slots. */
#ifdef DEBUG
#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT)
#define DEF_VPNAME const char* vpname; unsigned vpnum
#define SET_VPNAME(name) do { vpname = name; vpnum = 0; } while(0)
#define INC_VPNUM() do { ++vpnum; } while(0)
@ -688,6 +821,9 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
#ifdef DEBUG
if (verbose_debug)
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
#endif
#ifdef NJ_SOFTFLOAT
lir = float_filter = new (&gc) SoftFloatFilter(lir);
#endif
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
lir = expr_filter = new (&gc) ExprFilter(lir);
@ -732,6 +868,9 @@ TraceRecorder::~TraceRecorder()
delete cse_filter;
delete expr_filter;
delete func_filter;
#ifdef NJ_SOFTFLOAT
delete float_filter;
#endif
delete lir_buf_writer;
}
@ -1263,22 +1402,39 @@ js_IsLoopExit(JSContext* cx, JSScript* script, jsbytecode* header, jsbytecode* p
case JSOP_GE:
case JSOP_NE:
case JSOP_EQ:
/* These ops try to dispatch a JSOP_IFEQ or JSOP_IFNE that follows. */
JS_ASSERT(js_CodeSpec[*pc].length == 1);
pc++;
/* FALL THROUGH */
break;
default:
for (;;) {
if (*pc == JSOP_AND || *pc == JSOP_OR)
pc += GET_JUMP_OFFSET(pc);
else if (*pc == JSOP_ANDX || *pc == JSOP_ORX)
pc += GET_JUMPX_OFFSET(pc);
else
break;
}
}
switch (*pc) {
case JSOP_IFEQ:
case JSOP_IFEQX:
case JSOP_IFNE:
case JSOP_IFNEX:
/*
* Forward jumps are usually intra-branch, but for-in loops jump to the trailing enditer to
* clean up, so check for that case here.
* Forward jumps are usually intra-branch, but for-in loops jump to the
* trailing enditer to clean up, so check for that case here.
*/
if (pc[GET_JUMP_OFFSET(pc)] == JSOP_ENDITER)
return true;
return pc + GET_JUMP_OFFSET(pc) == header;
case JSOP_IFEQX:
case JSOP_IFNEX:
if (pc[GET_JUMPX_OFFSET(pc)] == JSOP_ENDITER)
return true;
return pc + GET_JUMPX_OFFSET(pc) == header;
default:;
}
return false;
@ -1418,7 +1574,7 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
if (!isNumber(v))
return false; /* not a number? type mismatch */
LIns* i = get(&v);
if (!i->isop(LIR_i2f)) {
if (!isi2f(i)) {
debug_only_v(printf("int slot is !isInt32, slot #%d, triggering re-compilation\n",
!isGlobal(&v)
? nativeStackOffset(&v)
@ -1428,11 +1584,11 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
return true; /* keep checking types, but request re-compilation */
}
/* Looks good, slot is an int32, the last instruction should be i2f. */
JS_ASSERT(isInt32(v) && i->isop(LIR_i2f));
JS_ASSERT(isInt32(v) && (i->isop(LIR_i2f) || i->isop(LIR_qjoin)));
/* We got the final LIR_i2f as we expected. Overwrite the value in that
slot with the argument of i2f since we want the integer store to flow along
the loop edge, not the casted value. */
set(&v, i->oprnd1());
set(&v, iu2fArg(i));
return true;
}
if (t == JSVAL_DOUBLE) {
@ -2063,10 +2219,11 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
/* execute previously recorded trace */
TreeInfo* ti = (TreeInfo*)f->vmprivate;
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u\n",
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u code: %p\n",
cx->fp->script->filename,
js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc),
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots););
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots,
f->code()););
JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx);
unsigned ngslots = tm->globalSlots->length();
@ -2121,8 +2278,10 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
union { NIns *code; GuardRecord* (FASTCALL *func)(InterpState*, Fragment*); } u;
u.code = f->code();
#if defined(DEBUG) && defined(NANOJIT_IA32)
#ifdef DEBUG
#if defined(NANOJIT_IA32)
uint64 start = rdtsc();
#endif
#endif
/*
@ -2203,22 +2362,24 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
fp->regs->pc = (jsbytecode*)lr->from->root->ip + e->ip_adj;
fp->regs->sp = StackBase(fp) + (e->sp_adj / sizeof(double)) - calldepth_slots;
JS_ASSERT(fp->slots + fp->script->nfixed +
js_ReconstructStackDepth(cx, cx->fp->script, fp->regs->pc) == fp->regs->sp);
js_ReconstructStackDepth(cx, fp->script, fp->regs->pc) == fp->regs->sp);
#if defined(DEBUG) && defined(NANOJIT_IA32)
if (verbose_debug) {
printf("leaving trace at %s:%u@%u, op=%s, lr=%p, exitType=%d, sp=%d, ip=%p, "
"cycles=%llu\n",
fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc),
fp->regs->pc - fp->script->code,
js_CodeName[*fp->regs->pc],
lr,
lr->exit->exitType,
fp->regs->sp - StackBase(fp), lr->jmp,
(rdtsc() - start));
}
uint64 cycles = rdtsc() - start;
#else
debug_only_v(uint64 cycles = 0;)
#endif
debug_only_v(printf("leaving trace at %s:%u@%u, op=%s, lr=%p, exitType=%d, sp=%d, ip=%p, "
"cycles=%llu\n",
fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc),
fp->regs->pc - fp->script->code,
js_CodeName[*fp->regs->pc],
lr,
lr->exit->exitType,
fp->regs->sp - StackBase(fp), lr->jmp,
cycles));
/* If this trace is part of a tree, later branches might have added additional globals for
with we don't have any type information available in the side exit. We merge in this
information from the entry type-map. See also comment in the constructor of TraceRecorder
@ -2399,7 +2560,7 @@ js_InitJIT(JSTraceMonitor *tm)
did_we_check_sse2 = true;
}
#endif
if (!oracle) // TODO: currently oracle does not get deallocated on shutdown
if (!oracle)
oracle = new (&gc) Oracle();
if (!tm->fragmento) {
JS_ASSERT(!tm->globalSlots && !tm->globalTypeMap);
@ -2476,7 +2637,7 @@ js_FlushJITCache(JSContext* cx)
}
}
extern void
void
js_ShutDownJIT()
{
if (oracle) {

View File

@ -221,6 +221,9 @@ class TraceRecorder {
nanojit::LirWriter* cse_filter;
nanojit::LirWriter* expr_filter;
nanojit::LirWriter* func_filter;
#ifdef NJ_SOFTFLOAT
nanojit::LirWriter* float_filter;
#endif
nanojit::LIns* cx_ins;
nanojit::LIns* gp_ins;
nanojit::LIns* eos_ins;

View File

@ -44,6 +44,7 @@
#if defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
#include <asm/unistd.h>
extern "C" void __clear_cache(char *BEG, char *END);
#endif
namespace nanojit
@ -178,6 +179,8 @@ namespace nanojit
// nothing free, steal one
// LSRA says pick the one with the furthest use
LIns* vic = findVictim(regs,allow,prefer);
NanoAssert(vic != NULL);
Reservation* resv = getresv(vic);
// restore vic
@ -446,25 +449,37 @@ namespace nanojit
Reservation* resv = getresv(i);
Register r;
// if we have an existing reservation and it has a non-unknown
// register allocated, and that register is in our allowed mask,
// return it.
if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
return r;
}
// figure out what registers are preferred for this instruction
RegisterMask prefer = hint(i, allow);
// if we didn't have a reservation, allocate one now
if (!resv)
resv = reserveAlloc(i);
// if the reservation doesn't have a register assigned to it...
if ((r=resv->reg) == UnknownReg)
{
// .. if the cost is 2 and the allowed mask includes
// the saved regs, then prefer just those.
if (resv->cost == 2 && (allow&SavedRegs))
prefer = allow&SavedRegs;
// grab one.
r = resv->reg = registerAlloc(prefer);
_allocator.addActive(r, i);
return r;
}
else
{
// r not allowed
// the already-allocated register isn't in the allowed mask;
// we need to grab a new one and then copy over the old
// contents to the new.
resv->reg = UnknownReg;
_allocator.retire(r);
if (resv->cost == 2 && (allow&SavedRegs))
@ -789,37 +804,31 @@ namespace nanojit
NanoAssert(_branchStateMap->isEmpty());
_branchStateMap = 0;
#if defined(UNDER_CE)
#ifdef AVMPLUS_ARM
// If we've modified the code, we need to flush so we don't end up trying
// to execute junk
# if defined(UNDER_CE)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
// N A S T Y - obviously have to fix this
// determine our page range
# elif defined(AVMPLUS_LINUX)
for (int i = 0; i < 2; i++) {
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
Page *page=0, *first=0, *last=0;
for (int i=2;i!=0;i--) {
page = first = last = (i==2 ? _nativePages : _nativeExitPages);
while (page)
{
if (page<first)
first = page;
if (page>last)
last = page;
page = page->next;
Page *first = p;
while (p) {
if (!p->next || p->next != p+1) {
__clear_cache((char*)first, (char*)(p+1));
first = p->next;
}
p = p->next;
}
register unsigned long _beg __asm("a1") = (unsigned long)(first);
register unsigned long _end __asm("a2") = (unsigned long)(last+NJ_PAGE_SIZE);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
}
#endif
#ifdef AVMPLUS_PORTING_API
# endif
#endif
# ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
#endif
# endif
}
void Assembler::copyRegisters(RegAlloc* copyTo)
@ -861,7 +870,7 @@ namespace nanojit
switch(op)
{
default:
NanoAssertMsg(false, "unsupported LIR instruction");
NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64);
break;
case LIR_short:
@ -1063,7 +1072,20 @@ namespace nanojit
Register rb = UnknownReg;
RegisterMask allow = GpRegs;
if (lhs != rhs && (op == LIR_mul || !rhs->isconst()))
bool forceReg = (op == LIR_mul || !rhs->isconst());
#ifdef NANOJIT_ARM
// Arm can't do an immediate op with immediates
// outside of +/-255 (for AND) r outside of
// 0..255 for others.
if (!forceReg)
{
if (rhs->isconst() && !isU8(rhs->constval()))
forceReg = true;
}
#endif
if (lhs != rhs && forceReg)
{
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
rb = findRegFor(rhs, allow);
@ -1079,7 +1101,7 @@ namespace nanojit
ra = findSpecificRegFor(lhs, rr);
// else, rA already has a register assigned.
if (!rhs->isconst() || op == LIR_mul)
if (forceReg)
{
if (lhs == rhs)
rb = ra;
@ -1204,13 +1226,20 @@ namespace nanojit
LIns* cond = ins->oprnd1();
LOpcode condop = cond->opcode();
NanoAssert(cond->isCond());
#ifndef NJ_SOFTFLOAT
#if !defined(NJ_SOFTFLOAT)
if (condop >= LIR_feq && condop <= LIR_fge)
{
#if defined(NJ_ARM_VFP)
if (op == LIR_xf)
JNE(exit);
else
JE(exit);
#else
if (op == LIR_xf)
JP(exit);
else
JNP(exit);
#endif
asm_fcmp(cond);
break;
}
@ -1309,9 +1338,13 @@ namespace nanojit
{
// only want certain regs
Register r = prepResultReg(ins, AllowableFlagRegs);
#ifdef NJ_ARM_VFP
SETE(r);
#else
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
SETNP(r);
#endif
asm_fcmp(ins);
break;
}
@ -1433,8 +1466,13 @@ namespace nanojit
uint32_t Assembler::arFree(uint32_t idx)
{
// nothing to free
if (idx == 0)
return 0;
if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)])
_activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles
_activation.entry[idx] = 0;
return 0;
}

View File

@ -376,8 +376,6 @@ namespace nanojit
return l;
}
#define isS24(x) (((int32_t(x)<<8)>>8) == (x))
LInsp LirBufWriter::insFar(LOpcode op, LInsp target)
{
NanoAssert(op == LIR_skip || op == LIR_tramp);
@ -1546,7 +1544,12 @@ namespace nanojit
}
else {
if (ref->isCall()) {
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
if (ref->isop(LIR_callh)) {
// we've presumably seen the other half already
ref = ref->oprnd1();
} else {
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
}
} else {
NanoAssert(ref->opcode() < sizeof(lirNames) / sizeof(lirNames[0]));
copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
@ -1652,7 +1655,6 @@ namespace nanojit
case LIR_fle:
case LIR_fgt:
case LIR_fge:
case LIR_qjoin:
case LIR_qiadd:
case LIR_qiand:
case LIR_qilsh:
@ -1662,6 +1664,12 @@ namespace nanojit
formatRef(i->oprnd2()));
break;
case LIR_qjoin:
sprintf(s, "%s (%s), %s", lirNames[op],
formatIns(i->oprnd1()),
formatRef(i->oprnd2()));
break;
case LIR_qcmov:
case LIR_cmov:
sprintf(s, "%s ? %s : %s",

1043
js/src/nanojit/NativeARM.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -63,13 +63,8 @@ namespace nanojit
void Assembler::nInit(AvmCore*)
{
#ifdef NJ_THUMB_JIT
// Thumb mode does not have conditional move, alas
has_cmov = false;
#else
// all ARMs have conditional move
has_cmov = true;
#endif
}
NIns* Assembler::genPrologue(RegisterMask needSaving)
@ -84,15 +79,9 @@ namespace nanojit
uint32_t savingCount = 0;
uint32_t savingMask = 0;
#if defined(NJ_THUMB_JIT)
savingCount = 5; // R4-R7, LR
savingMask = 0xF0;
(void)needSaving;
#else
savingCount = 9; //R4-R10,R11,LR
savingMask = SavedRegs | rmask(FRAME_PTR);
(void)needSaving;
#endif
// so for alignment purposes we've pushed return addr, fp, and savingCount registers
uint32_t stackPushed = 4 * (2+savingCount);
@ -101,7 +90,6 @@ namespace nanojit
// Make room on stack for what we are doing
if (amt)
#ifdef NJ_THUMB_JIT
{
// largest value is 508 (7-bits << 2)
if (amt>508)
@ -120,11 +108,6 @@ namespace nanojit
SUBi(SP, amt);
}
#else
{
SUBi(SP, amt);
}
#endif
verbose_only( verbose_outputf(" %p:",_nIns); )
verbose_only( verbose_output(" patch entry"); )
NIns *patchEntry = _nIns;
@ -148,7 +131,8 @@ namespace nanojit
{
// target doesn't exit yet. emit jump to epilog, and set up to patch later.
lr = placeGuardRecord(guard);
BL(_epilogue);
BL(_epilogue);
lr->jmp = _nIns;
}
@ -171,7 +155,6 @@ namespace nanojit
NIns* Assembler::genEpilogue(RegisterMask restore)
{
#ifdef NJ_THUMB_JIT
(void)restore;
if (false) {
// interworking
@ -184,18 +167,30 @@ namespace nanojit
}
MR(R0,R2); // return LinkRecord*
return _nIns;
#else
BX(LR); // return
MR(R0,R2); // return LinkRecord*
RegisterMask savingMask = restore | rmask(FRAME_PTR) | rmask(LR);
POP_mask(savingMask); // regs
return _nIns;
#endif
}
void Assembler::asm_call(LInsp ins)
{
const CallInfo* call = callInfoFor(ins->fid());
uint32_t atypes = call->_argtypes;
uint32_t roffset = 0;
// we need to detect if we have arg0 as LO followed by arg1 as F;
// in that case, we need to skip using r1 -- the F needs to be
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
// generated code.
bool arg0IsInt32FollowedByFloat = false;
while ((atypes & 3) != ARGSIZE_NONE) {
if (((atypes >> 4) & 3) == ARGSIZE_LO &&
((atypes >> 2) & 3) == ARGSIZE_F &&
((atypes >> 6) & 3) == ARGSIZE_NONE)
{
arg0IsInt32FollowedByFloat = true;
break;
}
atypes >>= 2;
}
CALL(call);
ArgSize sizes[10];
uint32_t argc = call->get_sizes(sizes);
@ -205,8 +200,11 @@ namespace nanojit
ArgSize sz = sizes[j];
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
// pre-assign registers R0-R3 for arguments (if they fit)
Register r = i < 4 ? argRegs[i] : UnknownReg;
Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
asm_arg(sz, ins->arg(j), r);
if (i == 0 && arg0IsInt32FollowedByFloat)
roffset = 1;
}
}
@ -226,40 +224,12 @@ namespace nanojit
Register Assembler::nRegisterAllocFromSet(int set)
{
#ifdef NJ_THUMB_JIT
// need to implement faster way
int i=0;
while (!(set & rmask((Register)i)))
i ++;
_allocator.free &= ~rmask((Register)i);
return (Register) i;
#else
// Note: The clz instruction only works on armv5 and up.
#ifndef UNDER_CE
#ifdef __ARMCC__
register int i;
__asm { clz i,set }
Register r = Register(31-i);
_allocator.free &= ~rmask(r);
return r;
#else
// need to implement faster way
int i=0;
while (!(set & rmask((Register)i)))
i ++;
_allocator.free &= ~rmask((Register)i);
return (Register) i;
#endif
#else
Register r;
r = (Register)_CountLeadingZeros(set);
r = (Register)(31-r);
_allocator.free &= ~rmask(r);
return r;
#endif
#endif
}
void Assembler::nRegisterResetAll(RegAlloc& a)
@ -277,20 +247,16 @@ namespace nanojit
// This is ALWAYS going to be a long branch (using the BL instruction)
// Which is really 2 instructions, so we need to modify both
// XXX -- this is B, not BL, at least on non-Thumb..
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb
int32_t offset = int(target) - int(branch+2);
//printf("---patching branch at %X to location %X (%d)\n", branch, target, offset);
//printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
#ifdef NJ_THUMB_JIT
NanoAssert(-(1<<21) <= offset && offset < (1<<21));
*branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF);
*branch = (NIns)(0xF800 | (offset>>1)&0x7FF);
#else
// ARM goodness, using unconditional B
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2)& 0xFFFFFF) );
#endif
}
RegisterMask Assembler::hint(LIns* i, RegisterMask allow /* = ~0 */)
@ -451,48 +417,12 @@ namespace nanojit
}
}
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
NIns* save = _nIns;
#ifdef NJ_THUMB_JIT
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
_nIns = at + 2;
#else
NIns* was = (NIns*) (((*at&0xFFFFFF)<<2));
_nIns = at + 1;
#endif
BL(target);
#ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns);
#endif
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(_nIns);
register unsigned long _end __asm("a2") = (unsigned long)(_nIns+2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
_nIns = save;
return was;
}
void Assembler::nativePageReset()
{
#ifdef NJ_THUMB_JIT
_nPool = 0;
_nSlot = 0;
_nExitPool = 0;
_nExitSlot = 0;
#else
_nSlot = 0;
_nExitSlot = 0;
#endif
}
void Assembler::nativePageSetup()
@ -501,7 +431,6 @@ namespace nanojit
if (!_nExitIns) _nExitIns = pageAlloc(true);
//fprintf(stderr, "assemble onto %x exits into %x\n", (int)_nIns, (int)_nExitIns);
#ifdef NJ_THUMB_JIT
if (!_nPool) {
_nSlot = _nPool = (int*)_nIns;
@ -518,23 +447,42 @@ namespace nanojit
// no branch needed since this follows the epilogue
}
#else
if (!_nSlot)
{
// This needs to be done or the samepage macro gets confused
_nIns--;
_nExitIns--;
// constpool starts at top of page and goes down,
// code starts at bottom of page and moves up
_nSlot = (int*)(pageTop(_nIns)+1);
}
#endif
}
void Assembler::flushCache(NIns* n1, NIns* n2) {
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(n1);
register unsigned long _end __asm("a2") = (unsigned long)(n2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
}
#ifdef NJ_THUMB_JIT
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
NIns* save = _nIns;
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
_nIns = at + 2;
BL(target);
flushCache(_nIns, _nIns+2);
#ifdef AVMPLUS_PORTING_API
// XXX save.._nIns+2? really?
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns+2);
#endif
_nIns = save;
return was;
}
void Assembler::STi(Register b, int32_t d, int32_t v)
{
@ -551,6 +499,7 @@ namespace nanojit
void Assembler::underrunProtect(int bytes)
{
// perhaps bytes + sizeof(PageHeader)/sizeof(NIns) + 4 ?
intptr_t u = bytes + 4;
if (!samepage(_nIns-u, _nIns-1)) {
NIns* target = _nIns;
@ -854,55 +803,8 @@ namespace nanojit
}
}
#else // ARM_JIT
void Assembler::underrunProtect(int bytes)
{
intptr_t u = (bytes) + 4;
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
(!samepage((intptr_t)_nIns-u,_nIns)) )
{
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
JMP_nochk(target);
_nSlot = pageTop(_nIns);
}
}
bool isB24(NIns *target, NIns *cur)
{
int offset = int(target)-int(cur-2+2);
return (-(1<<24) <= offset && offset < (1<<24));
}
void Assembler::CALL(const CallInfo *ci)
{
intptr_t addr = ci->_address;
if (isB24((NIns*)addr, _nIns))
{
// we can do this with a single BL call
underrunProtect(4);
BL(addr);
asm_output2("call %08X:%s", addr, ci->_name);
}
else
{
underrunProtect(16);
*(--_nIns) = (NIns)((addr));
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
asm_output2("call %08X:%s", addr, ci->_name);
}
}
#endif // NJ_THUMB_JIT
void Assembler::LD32_nochk(Register r, int32_t imm)
{
#ifdef NJ_THUMB_JIT
// Can we reach the current slot/pool?
int offset = (int)(_nSlot) - (int)(_nIns);
if ((offset>=NJ_MAX_CPOOL_OFFSET || offset<0) ||
@ -932,38 +834,6 @@ namespace nanojit
int data_off = int(data) - (int(_nIns+1)&~3);
*(--_nIns) = (NIns)(0x4800 | r<<8 | data_off>>2);
asm_output3("ldr %s,%d(PC) [%X]",gpn(r),data_off,(int)data);
#else
// We can always reach the const pool, since it's on the same page (<4096)
if (!_nSlot)
_nSlot = pageTop(_nIns);
if ( (_nSlot+1) >= (_nIns-1) )
{
// This would overrun the code, so we need a new page
// and a jump to that page
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
JMP_nochk(target);
// reset the slot
_nSlot = pageTop(_nIns);
}
*(++_nSlot) = (int)imm;
int offset = (int)(_nSlot) - (int)(_nIns+1);
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | -(offset));
asm_output2("ld %s,%d",gpn(r),imm);
#endif
}
#endif /* FEATURE_NANOJIT */
}

View File

@ -68,7 +68,9 @@ namespace nanojit
debug_only( uint32_t count; )
debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management
LIns* active[NJ_MAX_REGISTERS]; // active[r] = OP that defines r
// RegisterMask is a 32-bit value, so we can never have more than 32 active.
// hardcode 32 here in case we have non-contiguous register numbers
LIns* active[32]; // active[r] = OP that defines r
RegisterMask free;
RegisterMask used;

View File

@ -151,11 +151,13 @@ namespace nanojit
#define isU8(i) ( int32_t(i) == uint8_t(i) )
#define isS16(i) ( int32_t(i) == int16_t(i) )
#define isU16(i) ( int32_t(i) == uint16_t(i) )
#define isS24(i) ( ((int32_t(i)<<8)>>8) == (i) )
#define alignTo(x,s) ((((uintptr_t)(x)))&~(((uintptr_t)s)-1))
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
#define pageTop(x) ( (int*)alignTo(x,NJ_PAGE_SIZE) )
#define pageDataStart(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE) + sizeof(PageHeader)) )
#define pageBottom(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 )
#define samepage(x,y) (pageTop(x) == pageTop(y))