b=449526, TM: fix up ARM code generation / softfloat

This commit is contained in:
Vladimir Vukicevic 2008-09-02 22:29:23 -07:00
parent e3916b316d
commit 420e72ed85
9 changed files with 519 additions and 175 deletions

View File

@ -98,6 +98,20 @@ BUILTIN3(Object_p_propertyIsEnumerable,
BUILTIN2(BooleanToNumber, LO, LO, F, jsdouble, JSContext*, jsint, 1, 1)
BUILTIN2(ObjectToString, LO, LO, P, JSString*, JSContext*, JSObject*, 0, 0)
BUILTIN3(Array_1int, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, jsint, 0, 0)
// soft float
BUILTIN1(fneg, F, F, jsdouble, jsdouble, 1, 1)
BUILTIN1(i2f, LO, F, jsdouble, jsint, 1, 1)
BUILTIN1(u2f, LO, F, jsdouble, jsuint, 1, 1)
BUILTIN2(fcmpeq, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmplt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmple, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmpgt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmpge, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fmul, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fadd, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fdiv, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fsub, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN3(Array_1str, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSString*, 0, 0)
BUILTIN4(Array_2obj, LO, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSObject*, JSObject**, 0, 0)
BUILTIN5(Array_3num, LO, LO, F, F, F, P, JSObject*, JSContext*, JSObject*, jsdouble, jsdouble, jsdouble, 0, 0)

View File

@ -719,6 +719,79 @@ js_Array_3num(JSContext* cx, JSObject* proto, jsdouble n1, jsdouble n2, jsdouble
return NULL;)
}
/* soft float */
jsdouble FASTCALL
js_fneg(jsdouble x)
{
return -x;
}
jsdouble FASTCALL
js_i2f(jsint i)
{
return i;
}
jsdouble FASTCALL
js_u2f(jsuint u)
{
return u;
}
jsint FASTCALL
js_fcmpeq(jsdouble x, jsdouble y)
{
return x==y;
}
jsint FASTCALL
js_fcmplt(jsdouble x, jsdouble y)
{
return x < y;
}
jsint FASTCALL
js_fcmple(jsdouble x, jsdouble y)
{
return x <= y;
}
jsint FASTCALL
js_fcmpgt(jsdouble x, jsdouble y)
{
return x > y;
}
jsint FASTCALL
js_fcmpge(jsdouble x, jsdouble y)
{
return x >= y;
}
jsdouble FASTCALL
js_fmul(jsdouble x, jsdouble y)
{
return x * y;
}
jsdouble FASTCALL
js_fadd(jsdouble x, jsdouble y)
{
return x + y;
}
jsdouble FASTCALL
js_fdiv(jsdouble x, jsdouble y)
{
return x / y;
}
jsdouble FASTCALL
js_fsub(jsdouble x, jsdouble y)
{
return x - y;
}
#define LO ARGSIZE_LO
#define F ARGSIZE_F
#define Q ARGSIZE_Q

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=4 sw=4 et tw=99:
*
* ***** BEGIN LICENSE BLOCK *****
@ -277,12 +277,59 @@ Oracle::clear()
_dontDemote.reset();
}
static bool isi2f(LInsp i)
{
if (i->isop(LIR_i2f))
return true;
#ifdef NANOJIT_ARM
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
{
if (i->oprnd1()->imm8() == F_i2f)
return true;
}
#endif
return false;
}
static bool isu2f(LInsp i)
{
if (i->isop(LIR_u2f))
return true;
#ifdef NANOJIT_ARM
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
{
if (i->oprnd1()->imm8() == F_u2f)
return true;
}
#endif
return false;
}
static LInsp iu2fArg(LInsp i)
{
#ifdef NANOJIT_ARM
if (i->isop(LIR_qjoin))
return i->oprnd1()->arg(0);
#endif
return i->oprnd1();
}
static LIns* demote(LirWriter *out, LInsp i)
{
if (i->isCall())
return callArgN(i, 0);
if (i->isop(LIR_i2f) || i->isop(LIR_u2f))
return i->oprnd1();
if (isi2f(i) || isu2f(i))
return iu2fArg(i);
if (i->isconst())
return i;
AvmAssert(i->isconstq());
@ -294,14 +341,14 @@ static LIns* demote(LirWriter *out, LInsp i)
static bool isPromoteInt(LIns* i)
{
jsdouble d;
return i->isop(LIR_i2f) || i->isconst() ||
return isi2f(i) || i->isconst() ||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsint)d) && !JSDOUBLE_IS_NEGZERO(d));
}
static bool isPromoteUint(LIns* i)
{
jsdouble d;
return i->isop(LIR_u2f) || i->isconst() ||
return isu2f(i) || i->isconst() ||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsuint)d));
}
@ -324,6 +371,92 @@ static bool overflowSafe(LIns* i)
((c->constval() > 0)));
}
#ifdef NANOJIT_ARM
class SoftFloatFilter: public LirWriter
{
public:
SoftFloatFilter(LirWriter* out):
LirWriter(out)
{
}
LInsp quadCall(uint32_t fid, LInsp args[]) {
LInsp qlo, qhi;
qlo = out->insCall(fid, args);
qhi = out->ins1(LIR_callh, qlo);
return out->qjoin(qlo, qhi);
}
LInsp ins1(LOpcode v, LInsp s0)
{
if (v == LIR_fneg)
return quadCall(F_fneg, &s0);
if (v == LIR_i2f)
return quadCall(F_i2f, &s0);
if (v == LIR_u2f)
return quadCall(F_u2f, &s0);
return out->ins1(v, s0);
}
LInsp ins2(LOpcode v, LInsp s0, LInsp s1)
{
LInsp args[2];
LInsp bv;
// change the numeric value and order of these LIR opcodes and die
if (LIR_fadd <= v && v <= LIR_fdiv) {
static uint32_t fmap[] = { F_fadd, F_fsub, F_fmul, F_fdiv };
args[0] = s1;
args[1] = s0;
return quadCall(fmap[v - LIR_fadd], args);
}
if (LIR_feq <= v && v <= LIR_fge) {
static uint32_t fmap[] = { F_fcmpeq, F_fcmplt, F_fcmpgt, F_fcmple, F_fcmpge };
args[0] = s1;
args[1] = s0;
bv = out->insCall(fmap[v - LIR_feq], args);
return out->ins2(LIR_eq, bv, out->insImm(1));
}
// not really a softfloat filter, but needed on ARM --
// arm doesn't mask shifts to 31 like x86 does
if (v == LIR_lsh ||
v == LIR_rsh ||
v == LIR_ush)
{
if (s1->isconst())
s1->setimm16(s1->constval() & 31);
else
s1 = out->ins2(LIR_and, s1, out->insImm(31));
return out->ins2(v, s0, s1);
}
return out->ins2(v, s0, s1);
}
LInsp insCall(uint32_t fid, LInsp args[])
{
// if the return type is ARGSIZE_F, we have
// to do a quadCall ( qjoin(call,callh) )
if ((builtins[fid]._argtypes & 3) == ARGSIZE_F)
return quadCall(fid, args);
return out->insCall(fid, args);
}
};
#endif
class FuncFilter: public LirWriter
{
TraceRecorder& recorder;
@ -427,9 +560,8 @@ public:
case F_DoubleToUint32:
if (s0->isconstq())
return out->insImm(js_DoubleToECMAUint32(s0->constvalf()));
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) {
return s0->oprnd1();
}
if (isi2f(s0) || isu2f(s0))
return iu2fArg(s0);
break;
case F_DoubleToInt32:
if (s0->isconstq())
@ -442,9 +574,9 @@ public:
return out->ins2(op, demote(out, lhs), demote(out, rhs));
}
}
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) {
return s0->oprnd1();
}
if (isi2f(s0) || isu2f(s0))
return iu2fArg(s0);
// XXX ARM -- check for qjoin(call(F_UnboxDouble),call(F_UnboxDouble))
if (s0->isCall() && s0->fid() == F_UnboxDouble) {
LIns* args2[] = { callArgN(s0, 0) };
return out->insCall(F_UnboxInt32, args2);
@ -688,6 +820,9 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
#ifdef DEBUG
if (verbose_debug)
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
#endif
#ifdef NANOJIT_ARM
lir = float_filter = new (&gc) SoftFloatFilter(lir);
#endif
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
lir = expr_filter = new (&gc) ExprFilter(lir);
@ -732,6 +867,9 @@ TraceRecorder::~TraceRecorder()
delete cse_filter;
delete expr_filter;
delete func_filter;
#ifdef NANOJIT_ARM
delete float_filter;
#endif
delete lir_buf_writer;
}
@ -1435,7 +1573,7 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
if (!isNumber(v))
return false; /* not a number? type mismatch */
LIns* i = get(&v);
if (!i->isop(LIR_i2f)) {
if (!isi2f(i)) {
debug_only_v(printf("int slot is !isInt32, slot #%d, triggering re-compilation\n",
!isGlobal(&v)
? nativeStackOffset(&v)
@ -1445,11 +1583,11 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
return true; /* keep checking types, but request re-compilation */
}
/* Looks good, slot is an int32, the last instruction should be i2f. */
JS_ASSERT(isInt32(v) && i->isop(LIR_i2f));
JS_ASSERT(isInt32(v) && (i->isop(LIR_i2f) || i->isop(LIR_qjoin)));
/* We got the final LIR_i2f as we expected. Overwrite the value in that
slot with the argument of i2f since we want the integer store to flow along
the loop edge, not the casted value. */
set(&v, i->oprnd1());
set(&v, iu2fArg(i));
return true;
}
if (t == JSVAL_DOUBLE) {
@ -2080,10 +2218,11 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
/* execute previously recorded trace */
TreeInfo* ti = (TreeInfo*)f->vmprivate;
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u\n",
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u code: %p\n",
cx->fp->script->filename,
js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc),
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots););
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots,
f->code()););
JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx);
unsigned ngslots = tm->globalSlots->length();

View File

@ -221,6 +221,9 @@ class TraceRecorder {
nanojit::LirWriter* cse_filter;
nanojit::LirWriter* expr_filter;
nanojit::LirWriter* func_filter;
#ifdef NANOJIT_ARM
nanojit::LirWriter* float_filter;
#endif
nanojit::LIns* cx_ins;
nanojit::LIns* gp_ins;
nanojit::LIns* eos_ins;

View File

@ -789,33 +789,24 @@ namespace nanojit
NanoAssert(_branchStateMap->isEmpty());
_branchStateMap = 0;
#if defined(UNDER_CE)
#ifdef AVMPLUS_ARM
// If we've modified the code, we need to flush so we don't end up trying
// to execute junk
# if defined(UNDER_CE)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
// N A S T Y - obviously have to fix this
// determine our page range
# elif defined(AVMPLUS_LINUX)
// XXX fixme flush adjacent pages together
for (int i = 0; i < 2; i++) {
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
Page *page=0, *first=0, *last=0;
for (int i=2;i!=0;i--) {
page = first = last = (i==2 ? _nativePages : _nativeExitPages);
while (page)
{
if (page<first)
first = page;
if (page>last)
last = page;
page = page->next;
while (p) {
flushCache((NIns*)p, (NIns*)((intptr_t)(p) + NJ_PAGE_SIZE));
p = p->next;
}
register unsigned long _beg __asm("a1") = (unsigned long)(first);
register unsigned long _end __asm("a2") = (unsigned long)(last+NJ_PAGE_SIZE);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
}
# endif
#endif
# ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
@ -861,7 +852,7 @@ namespace nanojit
switch(op)
{
default:
NanoAssertMsg(false, "unsupported LIR instruction");
NanoAssertMsgf(false, ("unsupported LIR instruction: %d (~0x40: %d)\n",op, op&~LIR64));
break;
case LIR_short:
@ -1063,7 +1054,20 @@ namespace nanojit
Register rb = UnknownReg;
RegisterMask allow = GpRegs;
if (lhs != rhs && (op == LIR_mul || !rhs->isconst()))
bool forceReg = (op == LIR_mul || !rhs->isconst());
#ifdef NANOJIT_ARM
// Arm can't do an immediate op with immediates
// outside of +/-255 (for AND) r outside of
// 0..255 for others.
if (!forceReg)
{
if (rhs->isconst() && !isU8(rhs->constval()))
forceReg = true;
}
#endif
if (lhs != rhs && forceReg)
{
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
rb = findRegFor(rhs, allow);
@ -1079,7 +1083,7 @@ namespace nanojit
ra = findSpecificRegFor(lhs, rr);
// else, rA already has a register assigned.
if (!rhs->isconst() || op == LIR_mul)
if (forceReg)
{
if (lhs == rhs)
rb = ra;

View File

@ -1546,7 +1546,12 @@ namespace nanojit
}
else {
if (ref->isCall()) {
if (ref->isop(LIR_callh)) {
// we've presumably seen the other half already
ref = ref->oprnd1();
} else {
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
}
} else {
NanoAssert(ref->opcode() < sizeof(lirNames) / sizeof(lirNames[0]));
copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
@ -1652,7 +1657,6 @@ namespace nanojit
case LIR_fle:
case LIR_fgt:
case LIR_fge:
case LIR_qjoin:
case LIR_qiadd:
case LIR_qiand:
case LIR_qilsh:
@ -1662,6 +1666,12 @@ namespace nanojit
formatRef(i->oprnd2()));
break;
case LIR_qjoin:
sprintf(s, "%s (%s), %s", lirNames[op],
formatIns(i->oprnd1()),
formatRef(i->oprnd2()));
break;
case LIR_qcmov:
case LIR_cmov:
sprintf(s, "%s ? %s : %s",

View File

@ -149,11 +149,14 @@ namespace nanojit
#define DECLARE_PLATFORM_ASSEMBLER()\
const static Register argRegs[4], retRegs[2];\
void LD32_nochk(Register r, int32_t imm);\
void BL(NIns*);\
void BL_far(NIns*);\
void CALL(const CallInfo*);\
void underrunProtect(int bytes);\
bool has_cmov;\
void nativePageReset();\
void nativePageSetup();\
void flushCache(NIns*,NIns*);\
int* _nSlot;\
int* _nExitSlot;
@ -232,6 +235,7 @@ ShiftOperator;
*(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
asm_output2("and %s,%d",gpn(_r),(_imm));}\
else if ((_imm)<0 && (_imm)>-256) {\
underrunProtect(8);\
*(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (Scratch) );\
asm_output2("and %s,%s",gpn(_r),gpn(Scratch));\
*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((Scratch)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
@ -532,6 +536,7 @@ ShiftOperator;
//#define INT3() underrunProtect(1); *(--_nIns) = 0xcc; asm_output("int3")
//#define RET() INT3()
#define BKPT_nochk() do { *(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0);
// this is pushing a reg
#define PUSHr(_r) do {\
@ -564,49 +569,66 @@ ShiftOperator;
*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) );\
asm_output1("pop %x", (_mask));} while (0)
// takes an offset (right?)
#define JMP_long_nochk_offset(_off) do {\
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_off)>>2) & 0xFFFFFF) ); \
asm_output1("jmp_l_n 0x%08x",(_off));} while (0)
// take an address, not an offset
#define JMP(t) do {\
underrunProtect(4);\
intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
asm_output1("JMP 0x%08x\n",(unsigned int)(t)); } while (0)
#define JMP_nochk(t) do {\
intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
asm_output1("JMP 0x%08x\n",(unsigned int)(t)); } while (0)
#define JMP_long_placeholder() do {JMP_long(0xffffffff); } while(0)
#define JMP_long(_t) do {\
underrunProtect(4);\
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_t)>>2) & 0xFFFFFF) ); \
asm_output1("JMP_long 0x%08x\n", (unsigned int)(_t) ); } while (0)
#define BL(_t) do {\
underrunProtect(4);\
intptr_t _tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((_tt)>>2) & 0xFFFFFF) ); \
asm_output2("BL 0x%08x offset=%d",(intptr_t)(_nIns) + (_tt),(_tt)) } while (0)
#define JMP_long_nochk(_t) do {\
intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
asm_output1("JMP_l_n 0x%08x\n", (unsigned int)(_t)) } while (0)
#define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
#define JMP_S24_OFFSET_OK(offs) ((-(1<<24)) <= (offs) && (offs) < (1<<24))
// (XXX This ought to be a function instead of a macro)
//
// Branch to target address _t with condition _c, doing underrun
// checks (_chk == 1) or skipping them (_chk == 0).
//
// If the jump fits in a relative jump (+/-32MB), emit that.
// If the jump is unconditional, emit the dest address inline in
// the instruction stream and load it into pc.
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
// pointer is valid, stick the constant in the slot and emit a conditional
// load into pc.
// Otherwise, emit the conditional load into pc from a nearby constant,
// and emit a jump to jump over it it in case the condition fails.
//
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
#define B_cond_chk(_c,_t,_chk) do { \
int32 offs = PC_OFFSET_FROM(_t,(intptr_t)(_nIns)-4); \
if (JMP_S24_OFFSET_OK(offs)) { \
if(_chk) underrunProtect(4); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); \
} else if (_c == AL) { \
if(_chk) underrunProtect(8); \
*(--_nIns) = (NIns)(_t); \
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); \
} else if (samepage(_nIns,_nSlot)) { \
if(_chk) underrunProtect(8); \
*(++_nSlot) = (NIns)(_t); \
offs = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); \
NanoAssert(offs < 0); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); \
} else { \
if(_chk) underrunProtect(24); \
*(--_nIns) = (NIns)(_t); \
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); \
} \
asm_output2("%s %p\n", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); \
} while(0)
#define B_cond(_c,_t) \
underrunProtect(4);\
intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | ((tt >>2)& 0xFFFFFF) ); \
asm_output2("b(cond) 0x%08x (%tX)",(unsigned int)(_t), tt);
B_cond_chk(_c,_t,1)
// NB: don't use COND_AL here, we shift the condition into place!
#define JMP(_t) \
B_cond_chk(AL,_t,1)
#define JMP_nochk(_t) \
B_cond_chk(AL,_t,0)
// emit a placeholder that will be filled in later by nPatchBranch;
// emit two breakpoint instructions in case something goes wrong with
// the patching.
#define JMP_long_placeholder() do { \
underrunProtect(8); \
BKPT_nochk(); \
BKPT_nochk(); \
} while(0)
#define JA(t) do {B_cond(HI,t); asm_output1("ja 0x%08x",(unsigned int)t); } while(0)
#define JNA(t) do {B_cond(LS,t); asm_output1("jna 0x%08x",(unsigned int)t); } while(0)

View File

@ -148,7 +148,14 @@ namespace nanojit
{
// target doesn't exit yet. emit jump to epilog, and set up to patch later.
lr = placeGuardRecord(guard);
#ifdef NJ_THUMB_JIT
BL(_epilogue);
#else
// we need to know that there's an extra immediate value available
// for us; always force a far jump here.
BL_far(_epilogue);
#endif
lr->jmp = _nIns;
}
@ -196,6 +203,25 @@ namespace nanojit
void Assembler::asm_call(LInsp ins)
{
const CallInfo* call = callInfoFor(ins->fid());
uint32_t atypes = call->_argtypes;
uint32_t roffset = 0;
// we need to detect if we have arg0 as LO followed by arg1 as F;
// in that case, we need to skip using r1 -- the F needs to be
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
// generated code.
bool arg0IsInt32FollowedByFloat = false;
while ((atypes & 3) != ARGSIZE_NONE) {
if (((atypes >> 4) & 3) == ARGSIZE_LO &&
((atypes >> 2) & 3) == ARGSIZE_F &&
((atypes >> 6) & 3) == ARGSIZE_NONE)
{
arg0IsInt32FollowedByFloat = true;
break;
}
atypes >>= 2;
}
CALL(call);
ArgSize sizes[10];
uint32_t argc = call->get_sizes(sizes);
@ -205,8 +231,11 @@ namespace nanojit
ArgSize sz = sizes[j];
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
// pre-assign registers R0-R3 for arguments (if they fit)
Register r = i < 4 ? argRegs[i] : UnknownReg;
Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
asm_arg(sz, ins->arg(j), r);
if (i == 0 && arg0IsInt32FollowedByFloat)
roffset = 1;
}
}
@ -277,19 +306,28 @@ namespace nanojit
// This is ALWAYS going to be a long branch (using the BL instruction)
// Which is really 2 instructions, so we need to modify both
// XXX -- this is B, not BL, at least on non-Thumb..
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb
int32_t offset = int(target) - int(branch+2);
//printf("---patching branch at %X to location %X (%d)\n", branch, target, offset);
//printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
#ifdef NJ_THUMB_JIT
NanoAssert(-(1<<21) <= offset && offset < (1<<21));
*branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF);
*branch = (NIns)(0xF800 | (offset>>1)&0x7FF);
#else
// We have 2 words to work with here -- if offset is in range of a 24-bit
// relative jump, emit that; otherwise, we do a pc-relative load into pc.
if (-(1<<24) <= offset & offset < (1<<24)) {
// ARM goodness, using unconditional B
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2) & 0xFFFFFF) );
} else {
// LDR pc,[pc]
*branch++ = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | ( 0x004 ) );
*branch = (NIns)target;
}
#endif
}
@ -451,37 +489,6 @@ namespace nanojit
}
}
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
NIns* save = _nIns;
#ifdef NJ_THUMB_JIT
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
_nIns = at + 2;
#else
NIns* was = (NIns*) (((*at&0xFFFFFF)<<2));
_nIns = at + 1;
#endif
BL(target);
#ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns);
#endif
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(_nIns);
register unsigned long _end __asm("a2") = (unsigned long)(_nIns+2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
_nIns = save;
return was;
}
void Assembler::nativePageReset()
{
#ifdef NJ_THUMB_JIT
@ -521,21 +528,55 @@ namespace nanojit
#else
if (!_nSlot)
{
// This needs to be done or the samepage macro gets confused
// This needs to be done or the samepage macro gets confused; pageAlloc
// gives us a pointer to just past the end of the page.
_nIns--;
_nExitIns--;
// constpool starts at top of page and goes down,
// code starts at bottom of page and moves up
_nSlot = (int*)(pageTop(_nIns)+1);
_nSlot = pageDataStart(_nIns); //(int*)(&((Page*)pageTop(_nIns))->lir[0]);
}
#endif
}
void Assembler::flushCache(NIns* n1, NIns* n2) {
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(n1);
register unsigned long _end __asm("a2") = (unsigned long)(n2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
}
#ifdef NJ_THUMB_JIT
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
NIns* save = _nIns;
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
_nIns = at + 2;
BL(target);
flushCache(_nIns, _nIns+2);
#ifdef AVMPLUS_PORTING_API
// XXX save.._nIns+2? really?
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns+2);
#endif
_nIns = save;
return was;
}
void Assembler::STi(Register b, int32_t d, int32_t v)
{
ST(b, d, Scratch);
@ -551,6 +592,7 @@ namespace nanojit
void Assembler::underrunProtect(int bytes)
{
// perhaps bytes + sizeof(PageHeader)/sizeof(NIns) + 4 ?
intptr_t u = bytes + 4;
if (!samepage(_nIns-u, _nIns-1)) {
NIns* target = _nIns;
@ -855,45 +897,94 @@ namespace nanojit
}
#else // ARM_JIT
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
// This always got emitted as a BL_far sequence; at points
// to the first of 4 instructions. Ensure that we're where
// we think we were..
NanoAssert(at[1] == (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) ));
NanoAssert(at[2] == (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) ));
NIns* was = (NIns*) at[3];
at[3] = (NIns)target;
flushCache(at, at+4);
#ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(at, at+4);
#endif
return was;
}
void Assembler::underrunProtect(int bytes)
{
intptr_t u = (bytes) + 4;
intptr_t u = bytes + sizeof(PageHeader)/sizeof(NIns) + 8;
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
(!samepage((intptr_t)_nIns-u,_nIns)) )
{
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
// XXX _nIns at this point points to one past the end of
// the page, intended to be written into using *(--_nIns).
// However, (guess) something seems to be storing the value
// of _nIns as is, and then later generating a jump to a bogus
// address. So pre-decrement to ensure that it's always
// valid; we end up skipping using the last instruction this
// way.
_nIns--;
// Update slot, either to _nIns (if decremented above), or
// _nIns-1 once the above bug is fixed/found.
_nSlot = pageDataStart(_nIns);
// If samepage() is used on _nIns and _nSlot, it'll fail, since _nIns
// points to one past the end of the page right now. Assume that
// JMP_nochk won't ever try to write to _nSlot, and so won't ever
// check samepage(). See B_cond_chk macro.
JMP_nochk(target);
_nSlot = pageTop(_nIns);
} else if (!_nSlot) {
// make sure that there's always a slot pointer
_nSlot = pageDataStart(_nIns);
}
}
bool isB24(NIns *target, NIns *cur)
{
int offset = int(target)-int(cur-2+2);
return (-(1<<24) <= offset && offset < (1<<24));
void Assembler::BL_far(NIns* addr) {
// we have to stick an immediate into the stream and make lr
// point to the right spot before branching
underrunProtect(16);
// the address
*(--_nIns) = (NIns)((addr));
// bx ip // branch to the address we loaded earlier
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
// add lr, [pc + #4] // set lr to be past the address that we wrote
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
// ldr ip, [pc + #4] // load the address into ip, reading it from [pc+4]
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
asm_output1("bl %p (32-bit)", addr);
}
void Assembler::BL(NIns* addr) {
intptr_t offs = PC_OFFSET_FROM(addr,(intptr_t)_nIns-4);
if (JMP_S24_OFFSET_OK(offs)) {
// we can do this with a single BL call
underrunProtect(4);
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); \
asm_output1("bl %p", addr);
} else {
BL_far(addr);
}
}
void Assembler::CALL(const CallInfo *ci)
{
intptr_t addr = ci->_address;
if (isB24((NIns*)addr, _nIns))
{
// we can do this with a single BL call
underrunProtect(4);
BL(addr);
asm_output2("call %08X:%s", addr, ci->_name);
}
else
{
underrunProtect(16);
*(--_nIns) = (NIns)((addr));
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
asm_output2("call %08X:%s", addr, ci->_name);
}
BL((NIns*)addr);
asm_output1(" (call %s)", ci->_name);
}
#endif // NJ_THUMB_JIT
@ -937,31 +1028,18 @@ namespace nanojit
#else
// We can always reach the const pool, since it's on the same page (<4096)
if (!_nSlot)
_nSlot = pageTop(_nIns);
if ( (_nSlot+1) >= (_nIns-1) )
{
// This would overrun the code, so we need a new page
// and a jump to that page
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
JMP_nochk(target);
// reset the slot
_nSlot = pageTop(_nIns);
}
underrunProtect(8);
*(++_nSlot) = (int)imm;
int offset = (int)(_nSlot) - (int)(_nIns+1);
//fprintf (stderr, "wrote slot(2) %p with %08x, jmp @ %p\n", _nSlot, (intptr_t)imm, _nIns-1);
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | -(offset));
int offset = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4);
NanoAssert(JMP_S24_OFFSET_OK(offset) && (offset < 0));
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | ((-offset) & 0xFFFFFF) );
asm_output2("ld %s,%d",gpn(r),imm);
#endif
}

View File

@ -156,6 +156,7 @@ namespace nanojit
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
#define pageTop(x) ( (int*)alignTo(x,NJ_PAGE_SIZE) )
#define pageDataStart(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE) + sizeof(PageHeader)) )
#define pageBottom(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 )
#define samepage(x,y) (pageTop(x) == pageTop(y))