b=449526, TM: fix up ARM code generation / softfloat

This commit is contained in:
Vladimir Vukicevic 2008-09-02 22:29:23 -07:00
parent e3916b316d
commit 420e72ed85
9 changed files with 519 additions and 175 deletions

View File

@ -98,6 +98,20 @@ BUILTIN3(Object_p_propertyIsEnumerable,
BUILTIN2(BooleanToNumber, LO, LO, F, jsdouble, JSContext*, jsint, 1, 1) BUILTIN2(BooleanToNumber, LO, LO, F, jsdouble, JSContext*, jsint, 1, 1)
BUILTIN2(ObjectToString, LO, LO, P, JSString*, JSContext*, JSObject*, 0, 0) BUILTIN2(ObjectToString, LO, LO, P, JSString*, JSContext*, JSObject*, 0, 0)
BUILTIN3(Array_1int, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, jsint, 0, 0) BUILTIN3(Array_1int, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, jsint, 0, 0)
// soft float
BUILTIN1(fneg, F, F, jsdouble, jsdouble, 1, 1)
BUILTIN1(i2f, LO, F, jsdouble, jsint, 1, 1)
BUILTIN1(u2f, LO, F, jsdouble, jsuint, 1, 1)
BUILTIN2(fcmpeq, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmplt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmple, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmpgt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fcmpge, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
BUILTIN2(fmul, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fadd, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fdiv, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN2(fsub, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
BUILTIN3(Array_1str, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSString*, 0, 0) BUILTIN3(Array_1str, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSString*, 0, 0)
BUILTIN4(Array_2obj, LO, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSObject*, JSObject**, 0, 0) BUILTIN4(Array_2obj, LO, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSObject*, JSObject**, 0, 0)
BUILTIN5(Array_3num, LO, LO, F, F, F, P, JSObject*, JSContext*, JSObject*, jsdouble, jsdouble, jsdouble, 0, 0) BUILTIN5(Array_3num, LO, LO, F, F, F, P, JSObject*, JSContext*, JSObject*, jsdouble, jsdouble, jsdouble, 0, 0)

View File

@ -719,6 +719,79 @@ js_Array_3num(JSContext* cx, JSObject* proto, jsdouble n1, jsdouble n2, jsdouble
return NULL;) return NULL;)
} }
/* soft float */
jsdouble FASTCALL
js_fneg(jsdouble x)
{
return -x;
}
jsdouble FASTCALL
js_i2f(jsint i)
{
return i;
}
jsdouble FASTCALL
js_u2f(jsuint u)
{
return u;
}
jsint FASTCALL
js_fcmpeq(jsdouble x, jsdouble y)
{
return x==y;
}
jsint FASTCALL
js_fcmplt(jsdouble x, jsdouble y)
{
return x < y;
}
jsint FASTCALL
js_fcmple(jsdouble x, jsdouble y)
{
return x <= y;
}
jsint FASTCALL
js_fcmpgt(jsdouble x, jsdouble y)
{
return x > y;
}
jsint FASTCALL
js_fcmpge(jsdouble x, jsdouble y)
{
return x >= y;
}
jsdouble FASTCALL
js_fmul(jsdouble x, jsdouble y)
{
return x * y;
}
jsdouble FASTCALL
js_fadd(jsdouble x, jsdouble y)
{
return x + y;
}
jsdouble FASTCALL
js_fdiv(jsdouble x, jsdouble y)
{
return x / y;
}
jsdouble FASTCALL
js_fsub(jsdouble x, jsdouble y)
{
return x - y;
}
#define LO ARGSIZE_LO #define LO ARGSIZE_LO
#define F ARGSIZE_F #define F ARGSIZE_F
#define Q ARGSIZE_Q #define Q ARGSIZE_Q

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=4 sw=4 et tw=99: * vim: set ts=4 sw=4 et tw=99:
* *
* ***** BEGIN LICENSE BLOCK ***** * ***** BEGIN LICENSE BLOCK *****
@ -277,12 +277,59 @@ Oracle::clear()
_dontDemote.reset(); _dontDemote.reset();
} }
static bool isi2f(LInsp i)
{
if (i->isop(LIR_i2f))
return true;
#ifdef NANOJIT_ARM
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
{
if (i->oprnd1()->imm8() == F_i2f)
return true;
}
#endif
return false;
}
static bool isu2f(LInsp i)
{
if (i->isop(LIR_u2f))
return true;
#ifdef NANOJIT_ARM
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
{
if (i->oprnd1()->imm8() == F_u2f)
return true;
}
#endif
return false;
}
static LInsp iu2fArg(LInsp i)
{
#ifdef NANOJIT_ARM
if (i->isop(LIR_qjoin))
return i->oprnd1()->arg(0);
#endif
return i->oprnd1();
}
static LIns* demote(LirWriter *out, LInsp i) static LIns* demote(LirWriter *out, LInsp i)
{ {
if (i->isCall()) if (i->isCall())
return callArgN(i, 0); return callArgN(i, 0);
if (i->isop(LIR_i2f) || i->isop(LIR_u2f)) if (isi2f(i) || isu2f(i))
return i->oprnd1(); return iu2fArg(i);
if (i->isconst()) if (i->isconst())
return i; return i;
AvmAssert(i->isconstq()); AvmAssert(i->isconstq());
@ -294,14 +341,14 @@ static LIns* demote(LirWriter *out, LInsp i)
static bool isPromoteInt(LIns* i) static bool isPromoteInt(LIns* i)
{ {
jsdouble d; jsdouble d;
return i->isop(LIR_i2f) || i->isconst() || return isi2f(i) || i->isconst() ||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsint)d) && !JSDOUBLE_IS_NEGZERO(d)); (i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsint)d) && !JSDOUBLE_IS_NEGZERO(d));
} }
static bool isPromoteUint(LIns* i) static bool isPromoteUint(LIns* i)
{ {
jsdouble d; jsdouble d;
return i->isop(LIR_u2f) || i->isconst() || return isu2f(i) || i->isconst() ||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsuint)d)); (i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsuint)d));
} }
@ -324,6 +371,92 @@ static bool overflowSafe(LIns* i)
((c->constval() > 0))); ((c->constval() > 0)));
} }
#ifdef NANOJIT_ARM
class SoftFloatFilter: public LirWriter
{
public:
SoftFloatFilter(LirWriter* out):
LirWriter(out)
{
}
LInsp quadCall(uint32_t fid, LInsp args[]) {
LInsp qlo, qhi;
qlo = out->insCall(fid, args);
qhi = out->ins1(LIR_callh, qlo);
return out->qjoin(qlo, qhi);
}
LInsp ins1(LOpcode v, LInsp s0)
{
if (v == LIR_fneg)
return quadCall(F_fneg, &s0);
if (v == LIR_i2f)
return quadCall(F_i2f, &s0);
if (v == LIR_u2f)
return quadCall(F_u2f, &s0);
return out->ins1(v, s0);
}
LInsp ins2(LOpcode v, LInsp s0, LInsp s1)
{
LInsp args[2];
LInsp bv;
// change the numeric value and order of these LIR opcodes and die
if (LIR_fadd <= v && v <= LIR_fdiv) {
static uint32_t fmap[] = { F_fadd, F_fsub, F_fmul, F_fdiv };
args[0] = s1;
args[1] = s0;
return quadCall(fmap[v - LIR_fadd], args);
}
if (LIR_feq <= v && v <= LIR_fge) {
static uint32_t fmap[] = { F_fcmpeq, F_fcmplt, F_fcmpgt, F_fcmple, F_fcmpge };
args[0] = s1;
args[1] = s0;
bv = out->insCall(fmap[v - LIR_feq], args);
return out->ins2(LIR_eq, bv, out->insImm(1));
}
// not really a softfloat filter, but needed on ARM --
// arm doesn't mask shifts to 31 like x86 does
if (v == LIR_lsh ||
v == LIR_rsh ||
v == LIR_ush)
{
if (s1->isconst())
s1->setimm16(s1->constval() & 31);
else
s1 = out->ins2(LIR_and, s1, out->insImm(31));
return out->ins2(v, s0, s1);
}
return out->ins2(v, s0, s1);
}
LInsp insCall(uint32_t fid, LInsp args[])
{
// if the return type is ARGSIZE_F, we have
// to do a quadCall ( qjoin(call,callh) )
if ((builtins[fid]._argtypes & 3) == ARGSIZE_F)
return quadCall(fid, args);
return out->insCall(fid, args);
}
};
#endif
class FuncFilter: public LirWriter class FuncFilter: public LirWriter
{ {
TraceRecorder& recorder; TraceRecorder& recorder;
@ -427,9 +560,8 @@ public:
case F_DoubleToUint32: case F_DoubleToUint32:
if (s0->isconstq()) if (s0->isconstq())
return out->insImm(js_DoubleToECMAUint32(s0->constvalf())); return out->insImm(js_DoubleToECMAUint32(s0->constvalf()));
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) { if (isi2f(s0) || isu2f(s0))
return s0->oprnd1(); return iu2fArg(s0);
}
break; break;
case F_DoubleToInt32: case F_DoubleToInt32:
if (s0->isconstq()) if (s0->isconstq())
@ -442,9 +574,9 @@ public:
return out->ins2(op, demote(out, lhs), demote(out, rhs)); return out->ins2(op, demote(out, lhs), demote(out, rhs));
} }
} }
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) { if (isi2f(s0) || isu2f(s0))
return s0->oprnd1(); return iu2fArg(s0);
} // XXX ARM -- check for qjoin(call(F_UnboxDouble),call(F_UnboxDouble))
if (s0->isCall() && s0->fid() == F_UnboxDouble) { if (s0->isCall() && s0->fid() == F_UnboxDouble) {
LIns* args2[] = { callArgN(s0, 0) }; LIns* args2[] = { callArgN(s0, 0) };
return out->insCall(F_UnboxInt32, args2); return out->insCall(F_UnboxInt32, args2);
@ -688,6 +820,9 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
#ifdef DEBUG #ifdef DEBUG
if (verbose_debug) if (verbose_debug)
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names); lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
#endif
#ifdef NANOJIT_ARM
lir = float_filter = new (&gc) SoftFloatFilter(lir);
#endif #endif
lir = cse_filter = new (&gc) CseFilter(lir, &gc); lir = cse_filter = new (&gc) CseFilter(lir, &gc);
lir = expr_filter = new (&gc) ExprFilter(lir); lir = expr_filter = new (&gc) ExprFilter(lir);
@ -732,6 +867,9 @@ TraceRecorder::~TraceRecorder()
delete cse_filter; delete cse_filter;
delete expr_filter; delete expr_filter;
delete func_filter; delete func_filter;
#ifdef NANOJIT_ARM
delete float_filter;
#endif
delete lir_buf_writer; delete lir_buf_writer;
} }
@ -1435,7 +1573,7 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
if (!isNumber(v)) if (!isNumber(v))
return false; /* not a number? type mismatch */ return false; /* not a number? type mismatch */
LIns* i = get(&v); LIns* i = get(&v);
if (!i->isop(LIR_i2f)) { if (!isi2f(i)) {
debug_only_v(printf("int slot is !isInt32, slot #%d, triggering re-compilation\n", debug_only_v(printf("int slot is !isInt32, slot #%d, triggering re-compilation\n",
!isGlobal(&v) !isGlobal(&v)
? nativeStackOffset(&v) ? nativeStackOffset(&v)
@ -1445,11 +1583,11 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
return true; /* keep checking types, but request re-compilation */ return true; /* keep checking types, but request re-compilation */
} }
/* Looks good, slot is an int32, the last instruction should be i2f. */ /* Looks good, slot is an int32, the last instruction should be i2f. */
JS_ASSERT(isInt32(v) && i->isop(LIR_i2f)); JS_ASSERT(isInt32(v) && (i->isop(LIR_i2f) || i->isop(LIR_qjoin)));
/* We got the final LIR_i2f as we expected. Overwrite the value in that /* We got the final LIR_i2f as we expected. Overwrite the value in that
slot with the argument of i2f since we want the integer store to flow along slot with the argument of i2f since we want the integer store to flow along
the loop edge, not the casted value. */ the loop edge, not the casted value. */
set(&v, i->oprnd1()); set(&v, iu2fArg(i));
return true; return true;
} }
if (t == JSVAL_DOUBLE) { if (t == JSVAL_DOUBLE) {
@ -2080,10 +2218,11 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
/* execute previously recorded trace */ /* execute previously recorded trace */
TreeInfo* ti = (TreeInfo*)f->vmprivate; TreeInfo* ti = (TreeInfo*)f->vmprivate;
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u\n", debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u code: %p\n",
cx->fp->script->filename, cx->fp->script->filename,
js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc), js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc),
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots);); cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots,
f->code()););
JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx); JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx);
unsigned ngslots = tm->globalSlots->length(); unsigned ngslots = tm->globalSlots->length();

View File

@ -221,6 +221,9 @@ class TraceRecorder {
nanojit::LirWriter* cse_filter; nanojit::LirWriter* cse_filter;
nanojit::LirWriter* expr_filter; nanojit::LirWriter* expr_filter;
nanojit::LirWriter* func_filter; nanojit::LirWriter* func_filter;
#ifdef NANOJIT_ARM
nanojit::LirWriter* float_filter;
#endif
nanojit::LIns* cx_ins; nanojit::LIns* cx_ins;
nanojit::LIns* gp_ins; nanojit::LIns* gp_ins;
nanojit::LIns* eos_ins; nanojit::LIns* eos_ins;

View File

@ -788,38 +788,29 @@ namespace nanojit
internalReset(); // clear the reservation tables and regalloc internalReset(); // clear the reservation tables and regalloc
NanoAssert(_branchStateMap->isEmpty()); NanoAssert(_branchStateMap->isEmpty());
_branchStateMap = 0; _branchStateMap = 0;
#if defined(UNDER_CE) #ifdef AVMPLUS_ARM
// If we've modified the code, we need to flush so we don't end up trying // If we've modified the code, we need to flush so we don't end up trying
// to execute junk // to execute junk
# if defined(UNDER_CE)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL); FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM) # elif defined(AVMPLUS_LINUX)
// N A S T Y - obviously have to fix this // XXX fixme flush adjacent pages together
// determine our page range for (int i = 0; i < 2; i++) {
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
Page *page=0, *first=0, *last=0; while (p) {
for (int i=2;i!=0;i--) { flushCache((NIns*)p, (NIns*)((intptr_t)(p) + NJ_PAGE_SIZE));
page = first = last = (i==2 ? _nativePages : _nativeExitPages); p = p->next;
while (page)
{
if (page<first)
first = page;
if (page>last)
last = page;
page = page->next;
} }
register unsigned long _beg __asm("a1") = (unsigned long)(first);
register unsigned long _end __asm("a2") = (unsigned long)(last+NJ_PAGE_SIZE);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
} }
#endif # endif
#ifdef AVMPLUS_PORTING_API #endif
# ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr); NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr); NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
#endif # endif
} }
void Assembler::copyRegisters(RegAlloc* copyTo) void Assembler::copyRegisters(RegAlloc* copyTo)
@ -861,7 +852,7 @@ namespace nanojit
switch(op) switch(op)
{ {
default: default:
NanoAssertMsg(false, "unsupported LIR instruction"); NanoAssertMsgf(false, ("unsupported LIR instruction: %d (~0x40: %d)\n",op, op&~LIR64));
break; break;
case LIR_short: case LIR_short:
@ -1063,7 +1054,20 @@ namespace nanojit
Register rb = UnknownReg; Register rb = UnknownReg;
RegisterMask allow = GpRegs; RegisterMask allow = GpRegs;
if (lhs != rhs && (op == LIR_mul || !rhs->isconst())) bool forceReg = (op == LIR_mul || !rhs->isconst());
#ifdef NANOJIT_ARM
// Arm can't do an immediate op with immediates
// outside of +/-255 (for AND) r outside of
// 0..255 for others.
if (!forceReg)
{
if (rhs->isconst() && !isU8(rhs->constval()))
forceReg = true;
}
#endif
if (lhs != rhs && forceReg)
{ {
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) { if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
rb = findRegFor(rhs, allow); rb = findRegFor(rhs, allow);
@ -1079,7 +1083,7 @@ namespace nanojit
ra = findSpecificRegFor(lhs, rr); ra = findSpecificRegFor(lhs, rr);
// else, rA already has a register assigned. // else, rA already has a register assigned.
if (!rhs->isconst() || op == LIR_mul) if (forceReg)
{ {
if (lhs == rhs) if (lhs == rhs)
rb = ra; rb = ra;

View File

@ -1546,7 +1546,12 @@ namespace nanojit
} }
else { else {
if (ref->isCall()) { if (ref->isCall()) {
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid())); if (ref->isop(LIR_callh)) {
// we've presumably seen the other half already
ref = ref->oprnd1();
} else {
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
}
} else { } else {
NanoAssert(ref->opcode() < sizeof(lirNames) / sizeof(lirNames[0])); NanoAssert(ref->opcode() < sizeof(lirNames) / sizeof(lirNames[0]));
copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode())); copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
@ -1652,7 +1657,6 @@ namespace nanojit
case LIR_fle: case LIR_fle:
case LIR_fgt: case LIR_fgt:
case LIR_fge: case LIR_fge:
case LIR_qjoin:
case LIR_qiadd: case LIR_qiadd:
case LIR_qiand: case LIR_qiand:
case LIR_qilsh: case LIR_qilsh:
@ -1662,6 +1666,12 @@ namespace nanojit
formatRef(i->oprnd2())); formatRef(i->oprnd2()));
break; break;
case LIR_qjoin:
sprintf(s, "%s (%s), %s", lirNames[op],
formatIns(i->oprnd1()),
formatRef(i->oprnd2()));
break;
case LIR_qcmov: case LIR_qcmov:
case LIR_cmov: case LIR_cmov:
sprintf(s, "%s ? %s : %s", sprintf(s, "%s ? %s : %s",

View File

@ -149,11 +149,14 @@ namespace nanojit
#define DECLARE_PLATFORM_ASSEMBLER()\ #define DECLARE_PLATFORM_ASSEMBLER()\
const static Register argRegs[4], retRegs[2];\ const static Register argRegs[4], retRegs[2];\
void LD32_nochk(Register r, int32_t imm);\ void LD32_nochk(Register r, int32_t imm);\
void BL(NIns*);\
void BL_far(NIns*);\
void CALL(const CallInfo*);\ void CALL(const CallInfo*);\
void underrunProtect(int bytes);\ void underrunProtect(int bytes);\
bool has_cmov;\ bool has_cmov;\
void nativePageReset();\ void nativePageReset();\
void nativePageSetup();\ void nativePageSetup();\
void flushCache(NIns*,NIns*);\
int* _nSlot;\ int* _nSlot;\
int* _nExitSlot; int* _nExitSlot;
@ -232,6 +235,7 @@ ShiftOperator;
*(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\ *(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
asm_output2("and %s,%d",gpn(_r),(_imm));}\ asm_output2("and %s,%d",gpn(_r),(_imm));}\
else if ((_imm)<0 && (_imm)>-256) {\ else if ((_imm)<0 && (_imm)>-256) {\
underrunProtect(8);\
*(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (Scratch) );\ *(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (Scratch) );\
asm_output2("and %s,%s",gpn(_r),gpn(Scratch));\ asm_output2("and %s,%s",gpn(_r),gpn(Scratch));\
*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((Scratch)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\ *(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((Scratch)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
@ -532,6 +536,7 @@ ShiftOperator;
//#define INT3() underrunProtect(1); *(--_nIns) = 0xcc; asm_output("int3") //#define INT3() underrunProtect(1); *(--_nIns) = 0xcc; asm_output("int3")
//#define RET() INT3() //#define RET() INT3()
#define BKPT_nochk() do { *(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0);
// this is pushing a reg // this is pushing a reg
#define PUSHr(_r) do {\ #define PUSHr(_r) do {\
@ -564,49 +569,66 @@ ShiftOperator;
*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) );\ *(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) );\
asm_output1("pop %x", (_mask));} while (0) asm_output1("pop %x", (_mask));} while (0)
// takes an offset (right?) #define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
#define JMP_long_nochk_offset(_off) do {\ #define JMP_S24_OFFSET_OK(offs) ((-(1<<24)) <= (offs) && (offs) < (1<<24))
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_off)>>2) & 0xFFFFFF) ); \
asm_output1("jmp_l_n 0x%08x",(_off));} while (0)
// take an address, not an offset // (XXX This ought to be a function instead of a macro)
#define JMP(t) do {\ //
underrunProtect(4);\ // Branch to target address _t with condition _c, doing underrun
intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\ // checks (_chk == 1) or skipping them (_chk == 0).
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \ //
asm_output1("JMP 0x%08x\n",(unsigned int)(t)); } while (0) // If the jump fits in a relative jump (+/-32MB), emit that.
// If the jump is unconditional, emit the dest address inline in
// the instruction stream and load it into pc.
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
// pointer is valid, stick the constant in the slot and emit a conditional
// load into pc.
// Otherwise, emit the conditional load into pc from a nearby constant,
// and emit a jump to jump over it it in case the condition fails.
//
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
#define B_cond_chk(_c,_t,_chk) do { \
int32 offs = PC_OFFSET_FROM(_t,(intptr_t)(_nIns)-4); \
if (JMP_S24_OFFSET_OK(offs)) { \
if(_chk) underrunProtect(4); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); \
} else if (_c == AL) { \
if(_chk) underrunProtect(8); \
*(--_nIns) = (NIns)(_t); \
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); \
} else if (samepage(_nIns,_nSlot)) { \
if(_chk) underrunProtect(8); \
*(++_nSlot) = (NIns)(_t); \
offs = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); \
NanoAssert(offs < 0); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); \
} else { \
if(_chk) underrunProtect(24); \
*(--_nIns) = (NIns)(_t); \
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); \
} \
asm_output2("%s %p\n", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); \
} while(0)
#define JMP_nochk(t) do {\ #define B_cond(_c,_t) \
intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\ B_cond_chk(_c,_t,1)
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
asm_output1("JMP 0x%08x\n",(unsigned int)(t)); } while (0)
#define JMP_long_placeholder() do {JMP_long(0xffffffff); } while(0) // NB: don't use COND_AL here, we shift the condition into place!
#define JMP(_t) \
B_cond_chk(AL,_t,1)
#define JMP_long(_t) do {\ #define JMP_nochk(_t) \
underrunProtect(4);\ B_cond_chk(AL,_t,0)
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_t)>>2) & 0xFFFFFF) ); \
asm_output1("JMP_long 0x%08x\n", (unsigned int)(_t) ); } while (0)
#define BL(_t) do {\
underrunProtect(4);\
intptr_t _tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((_tt)>>2) & 0xFFFFFF) ); \
asm_output2("BL 0x%08x offset=%d",(intptr_t)(_nIns) + (_tt),(_tt)) } while (0)
#define JMP_long_nochk(_t) do {\
intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
asm_output1("JMP_l_n 0x%08x\n", (unsigned int)(_t)) } while (0)
#define B_cond(_c,_t)\
underrunProtect(4);\
intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | ((tt >>2)& 0xFFFFFF) ); \
asm_output2("b(cond) 0x%08x (%tX)",(unsigned int)(_t), tt);
// emit a placeholder that will be filled in later by nPatchBranch;
// emit two breakpoint instructions in case something goes wrong with
// the patching.
#define JMP_long_placeholder() do { \
underrunProtect(8); \
BKPT_nochk(); \
BKPT_nochk(); \
} while(0)
#define JA(t) do {B_cond(HI,t); asm_output1("ja 0x%08x",(unsigned int)t); } while(0) #define JA(t) do {B_cond(HI,t); asm_output1("ja 0x%08x",(unsigned int)t); } while(0)
#define JNA(t) do {B_cond(LS,t); asm_output1("jna 0x%08x",(unsigned int)t); } while(0) #define JNA(t) do {B_cond(LS,t); asm_output1("jna 0x%08x",(unsigned int)t); } while(0)

View File

@ -148,7 +148,14 @@ namespace nanojit
{ {
// target doesn't exit yet. emit jump to epilog, and set up to patch later. // target doesn't exit yet. emit jump to epilog, and set up to patch later.
lr = placeGuardRecord(guard); lr = placeGuardRecord(guard);
BL(_epilogue);
#ifdef NJ_THUMB_JIT
BL(_epilogue);
#else
// we need to know that there's an extra immediate value available
// for us; always force a far jump here.
BL_far(_epilogue);
#endif
lr->jmp = _nIns; lr->jmp = _nIns;
} }
@ -196,6 +203,25 @@ namespace nanojit
void Assembler::asm_call(LInsp ins) void Assembler::asm_call(LInsp ins)
{ {
const CallInfo* call = callInfoFor(ins->fid()); const CallInfo* call = callInfoFor(ins->fid());
uint32_t atypes = call->_argtypes;
uint32_t roffset = 0;
// we need to detect if we have arg0 as LO followed by arg1 as F;
// in that case, we need to skip using r1 -- the F needs to be
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
// generated code.
bool arg0IsInt32FollowedByFloat = false;
while ((atypes & 3) != ARGSIZE_NONE) {
if (((atypes >> 4) & 3) == ARGSIZE_LO &&
((atypes >> 2) & 3) == ARGSIZE_F &&
((atypes >> 6) & 3) == ARGSIZE_NONE)
{
arg0IsInt32FollowedByFloat = true;
break;
}
atypes >>= 2;
}
CALL(call); CALL(call);
ArgSize sizes[10]; ArgSize sizes[10];
uint32_t argc = call->get_sizes(sizes); uint32_t argc = call->get_sizes(sizes);
@ -205,8 +231,11 @@ namespace nanojit
ArgSize sz = sizes[j]; ArgSize sz = sizes[j];
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q); NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
// pre-assign registers R0-R3 for arguments (if they fit) // pre-assign registers R0-R3 for arguments (if they fit)
Register r = i < 4 ? argRegs[i] : UnknownReg; Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
asm_arg(sz, ins->arg(j), r); asm_arg(sz, ins->arg(j), r);
if (i == 0 && arg0IsInt32FollowedByFloat)
roffset = 1;
} }
} }
@ -277,19 +306,28 @@ namespace nanojit
// This is ALWAYS going to be a long branch (using the BL instruction) // This is ALWAYS going to be a long branch (using the BL instruction)
// Which is really 2 instructions, so we need to modify both // Which is really 2 instructions, so we need to modify both
// XXX -- this is B, not BL, at least on non-Thumb..
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb // branch+2 because PC is always 2 instructions ahead on ARM/Thumb
int32_t offset = int(target) - int(branch+2); int32_t offset = int(target) - int(branch+2);
//printf("---patching branch at %X to location %X (%d)\n", branch, target, offset); //printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
#ifdef NJ_THUMB_JIT #ifdef NJ_THUMB_JIT
NanoAssert(-(1<<21) <= offset && offset < (1<<21)); NanoAssert(-(1<<21) <= offset && offset < (1<<21));
*branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF); *branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF);
*branch = (NIns)(0xF800 | (offset>>1)&0x7FF); *branch = (NIns)(0xF800 | (offset>>1)&0x7FF);
#else #else
// ARM goodness, using unconditional B // We have 2 words to work with here -- if offset is in range of a 24-bit
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2)& 0xFFFFFF) ); // relative jump, emit that; otherwise, we do a pc-relative load into pc.
if (-(1<<24) <= offset & offset < (1<<24)) {
// ARM goodness, using unconditional B
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2) & 0xFFFFFF) );
} else {
// LDR pc,[pc]
*branch++ = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | ( 0x004 ) );
*branch = (NIns)target;
}
#endif #endif
} }
@ -451,37 +489,6 @@ namespace nanojit
} }
} }
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
NIns* save = _nIns;
#ifdef NJ_THUMB_JIT
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
_nIns = at + 2;
#else
NIns* was = (NIns*) (((*at&0xFFFFFF)<<2));
_nIns = at + 1;
#endif
BL(target);
#ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns);
#endif
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(_nIns);
register unsigned long _end __asm("a2") = (unsigned long)(_nIns+2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
_nIns = save;
return was;
}
void Assembler::nativePageReset() void Assembler::nativePageReset()
{ {
#ifdef NJ_THUMB_JIT #ifdef NJ_THUMB_JIT
@ -521,21 +528,55 @@ namespace nanojit
#else #else
if (!_nSlot) if (!_nSlot)
{ {
// This needs to be done or the samepage macro gets confused // This needs to be done or the samepage macro gets confused; pageAlloc
// gives us a pointer to just past the end of the page.
_nIns--; _nIns--;
_nExitIns--; _nExitIns--;
// constpool starts at top of page and goes down, // constpool starts at top of page and goes down,
// code starts at bottom of page and moves up // code starts at bottom of page and moves up
_nSlot = (int*)(pageTop(_nIns)+1); _nSlot = pageDataStart(_nIns); //(int*)(&((Page*)pageTop(_nIns))->lir[0]);
} }
#endif #endif
} }
void Assembler::flushCache(NIns* n1, NIns* n2) {
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(n1);
register unsigned long _end __asm("a2") = (unsigned long)(n2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
}
#ifdef NJ_THUMB_JIT #ifdef NJ_THUMB_JIT
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
NIns* save = _nIns;
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
_nIns = at + 2;
BL(target);
flushCache(_nIns, _nIns+2);
#ifdef AVMPLUS_PORTING_API
// XXX save.._nIns+2? really?
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns+2);
#endif
_nIns = save;
return was;
}
void Assembler::STi(Register b, int32_t d, int32_t v) void Assembler::STi(Register b, int32_t d, int32_t v)
{ {
ST(b, d, Scratch); ST(b, d, Scratch);
@ -551,6 +592,7 @@ namespace nanojit
void Assembler::underrunProtect(int bytes) void Assembler::underrunProtect(int bytes)
{ {
// perhaps bytes + sizeof(PageHeader)/sizeof(NIns) + 4 ?
intptr_t u = bytes + 4; intptr_t u = bytes + 4;
if (!samepage(_nIns-u, _nIns-1)) { if (!samepage(_nIns-u, _nIns-1)) {
NIns* target = _nIns; NIns* target = _nIns;
@ -855,45 +897,94 @@ namespace nanojit
} }
#else // ARM_JIT #else // ARM_JIT
void Assembler::underrunProtect(int bytes) NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
intptr_t u = (bytes) + 4;
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
(!samepage((intptr_t)_nIns-u,_nIns)) )
{
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
JMP_nochk(target);
_nSlot = pageTop(_nIns);
}
}
bool isB24(NIns *target, NIns *cur)
{ {
int offset = int(target)-int(cur-2+2); // This always got emitted as a BL_far sequence; at points
return (-(1<<24) <= offset && offset < (1<<24)); // to the first of 4 instructions. Ensure that we're where
// we think we were..
NanoAssert(at[1] == (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) ));
NanoAssert(at[2] == (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) ));
NIns* was = (NIns*) at[3];
at[3] = (NIns)target;
flushCache(at, at+4);
#ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(at, at+4);
#endif
return was;
}
void Assembler::underrunProtect(int bytes)
{
intptr_t u = bytes + sizeof(PageHeader)/sizeof(NIns) + 8;
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
(!samepage((intptr_t)_nIns-u,_nIns)) )
{
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
// XXX _nIns at this point points to one past the end of
// the page, intended to be written into using *(--_nIns).
// However, (guess) something seems to be storing the value
// of _nIns as is, and then later generating a jump to a bogus
// address. So pre-decrement to ensure that it's always
// valid; we end up skipping using the last instruction this
// way.
_nIns--;
// Update slot, either to _nIns (if decremented above), or
// _nIns-1 once the above bug is fixed/found.
_nSlot = pageDataStart(_nIns);
// If samepage() is used on _nIns and _nSlot, it'll fail, since _nIns
// points to one past the end of the page right now. Assume that
// JMP_nochk won't ever try to write to _nSlot, and so won't ever
// check samepage(). See B_cond_chk macro.
JMP_nochk(target);
} else if (!_nSlot) {
// make sure that there's always a slot pointer
_nSlot = pageDataStart(_nIns);
}
}
void Assembler::BL_far(NIns* addr) {
// we have to stick an immediate into the stream and make lr
// point to the right spot before branching
underrunProtect(16);
// the address
*(--_nIns) = (NIns)((addr));
// bx ip // branch to the address we loaded earlier
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
// add lr, [pc + #4] // set lr to be past the address that we wrote
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
// ldr ip, [pc + #4] // load the address into ip, reading it from [pc+4]
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
asm_output1("bl %p (32-bit)", addr);
}
void Assembler::BL(NIns* addr) {
intptr_t offs = PC_OFFSET_FROM(addr,(intptr_t)_nIns-4);
if (JMP_S24_OFFSET_OK(offs)) {
// we can do this with a single BL call
underrunProtect(4);
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); \
asm_output1("bl %p", addr);
} else {
BL_far(addr);
}
} }
void Assembler::CALL(const CallInfo *ci) void Assembler::CALL(const CallInfo *ci)
{ {
intptr_t addr = ci->_address; intptr_t addr = ci->_address;
if (isB24((NIns*)addr, _nIns)) BL((NIns*)addr);
{ asm_output1(" (call %s)", ci->_name);
// we can do this with a single BL call
underrunProtect(4);
BL(addr);
asm_output2("call %08X:%s", addr, ci->_name);
}
else
{
underrunProtect(16);
*(--_nIns) = (NIns)((addr));
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
asm_output2("call %08X:%s", addr, ci->_name);
}
} }
#endif // NJ_THUMB_JIT #endif // NJ_THUMB_JIT
@ -937,31 +1028,18 @@ namespace nanojit
#else #else
// We can always reach the const pool, since it's on the same page (<4096) // We can always reach the const pool, since it's on the same page (<4096)
underrunProtect(8);
if (!_nSlot)
_nSlot = pageTop(_nIns);
if ( (_nSlot+1) >= (_nIns-1) )
{
// This would overrun the code, so we need a new page
// and a jump to that page
NIns* target = _nIns;
_nIns = pageAlloc(_inExit);
JMP_nochk(target);
// reset the slot
_nSlot = pageTop(_nIns);
}
*(++_nSlot) = (int)imm; *(++_nSlot) = (int)imm;
int offset = (int)(_nSlot) - (int)(_nIns+1); //fprintf (stderr, "wrote slot(2) %p with %08x, jmp @ %p\n", _nSlot, (intptr_t)imm, _nIns-1);
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | -(offset)); int offset = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4);
NanoAssert(JMP_S24_OFFSET_OK(offset) && (offset < 0));
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | ((-offset) & 0xFFFFFF) );
asm_output2("ld %s,%d",gpn(r),imm); asm_output2("ld %s,%d",gpn(r),imm);
#endif #endif
} }

View File

@ -156,6 +156,7 @@ namespace nanojit
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1)) #define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
#define pageTop(x) ( (int*)alignTo(x,NJ_PAGE_SIZE) ) #define pageTop(x) ( (int*)alignTo(x,NJ_PAGE_SIZE) )
#define pageDataStart(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE) + sizeof(PageHeader)) )
#define pageBottom(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 ) #define pageBottom(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 )
#define samepage(x,y) (pageTop(x) == pageTop(y)) #define samepage(x,y) (pageTop(x) == pageTop(y))