mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
b=449526, TM: fix up ARM code generation / softfloat
This commit is contained in:
parent
e3916b316d
commit
420e72ed85
@ -98,6 +98,20 @@ BUILTIN3(Object_p_propertyIsEnumerable,
|
|||||||
BUILTIN2(BooleanToNumber, LO, LO, F, jsdouble, JSContext*, jsint, 1, 1)
|
BUILTIN2(BooleanToNumber, LO, LO, F, jsdouble, JSContext*, jsint, 1, 1)
|
||||||
BUILTIN2(ObjectToString, LO, LO, P, JSString*, JSContext*, JSObject*, 0, 0)
|
BUILTIN2(ObjectToString, LO, LO, P, JSString*, JSContext*, JSObject*, 0, 0)
|
||||||
BUILTIN3(Array_1int, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, jsint, 0, 0)
|
BUILTIN3(Array_1int, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, jsint, 0, 0)
|
||||||
|
|
||||||
|
// soft float
|
||||||
|
BUILTIN1(fneg, F, F, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN1(i2f, LO, F, jsdouble, jsint, 1, 1)
|
||||||
|
BUILTIN1(u2f, LO, F, jsdouble, jsuint, 1, 1)
|
||||||
|
BUILTIN2(fcmpeq, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fcmplt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fcmple, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fcmpgt, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fcmpge, F, F, LO, jsint, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fmul, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fadd, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fdiv, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
|
||||||
|
BUILTIN2(fsub, F, F, F, jsdouble, jsdouble, jsdouble, 1, 1)
|
||||||
BUILTIN3(Array_1str, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSString*, 0, 0)
|
BUILTIN3(Array_1str, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSString*, 0, 0)
|
||||||
BUILTIN4(Array_2obj, LO, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSObject*, JSObject**, 0, 0)
|
BUILTIN4(Array_2obj, LO, LO, LO, LO, P, JSObject*, JSContext*, JSObject*, JSObject*, JSObject**, 0, 0)
|
||||||
BUILTIN5(Array_3num, LO, LO, F, F, F, P, JSObject*, JSContext*, JSObject*, jsdouble, jsdouble, jsdouble, 0, 0)
|
BUILTIN5(Array_3num, LO, LO, F, F, F, P, JSObject*, JSContext*, JSObject*, jsdouble, jsdouble, jsdouble, 0, 0)
|
||||||
|
@ -719,6 +719,79 @@ js_Array_3num(JSContext* cx, JSObject* proto, jsdouble n1, jsdouble n2, jsdouble
|
|||||||
return NULL;)
|
return NULL;)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* soft float */
|
||||||
|
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_fneg(jsdouble x)
|
||||||
|
{
|
||||||
|
return -x;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_i2f(jsint i)
|
||||||
|
{
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_u2f(jsuint u)
|
||||||
|
{
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsint FASTCALL
|
||||||
|
js_fcmpeq(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x==y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsint FASTCALL
|
||||||
|
js_fcmplt(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x < y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsint FASTCALL
|
||||||
|
js_fcmple(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x <= y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsint FASTCALL
|
||||||
|
js_fcmpgt(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x > y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsint FASTCALL
|
||||||
|
js_fcmpge(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x >= y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_fmul(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x * y;
|
||||||
|
}
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_fadd(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x + y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_fdiv(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdouble FASTCALL
|
||||||
|
js_fsub(jsdouble x, jsdouble y)
|
||||||
|
{
|
||||||
|
return x - y;
|
||||||
|
}
|
||||||
|
|
||||||
#define LO ARGSIZE_LO
|
#define LO ARGSIZE_LO
|
||||||
#define F ARGSIZE_F
|
#define F ARGSIZE_F
|
||||||
#define Q ARGSIZE_Q
|
#define Q ARGSIZE_Q
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||||
* vim: set ts=4 sw=4 et tw=99:
|
* vim: set ts=4 sw=4 et tw=99:
|
||||||
*
|
*
|
||||||
* ***** BEGIN LICENSE BLOCK *****
|
* ***** BEGIN LICENSE BLOCK *****
|
||||||
@ -277,12 +277,59 @@ Oracle::clear()
|
|||||||
_dontDemote.reset();
|
_dontDemote.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isi2f(LInsp i)
|
||||||
|
{
|
||||||
|
if (i->isop(LIR_i2f))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
if (i->isop(LIR_qjoin) &&
|
||||||
|
i->oprnd1()->isop(LIR_call) &&
|
||||||
|
i->oprnd2()->isop(LIR_callh))
|
||||||
|
{
|
||||||
|
if (i->oprnd1()->imm8() == F_i2f)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isu2f(LInsp i)
|
||||||
|
{
|
||||||
|
if (i->isop(LIR_u2f))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
if (i->isop(LIR_qjoin) &&
|
||||||
|
i->oprnd1()->isop(LIR_call) &&
|
||||||
|
i->oprnd2()->isop(LIR_callh))
|
||||||
|
{
|
||||||
|
if (i->oprnd1()->imm8() == F_u2f)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static LInsp iu2fArg(LInsp i)
|
||||||
|
{
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
if (i->isop(LIR_qjoin))
|
||||||
|
return i->oprnd1()->arg(0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return i->oprnd1();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static LIns* demote(LirWriter *out, LInsp i)
|
static LIns* demote(LirWriter *out, LInsp i)
|
||||||
{
|
{
|
||||||
if (i->isCall())
|
if (i->isCall())
|
||||||
return callArgN(i, 0);
|
return callArgN(i, 0);
|
||||||
if (i->isop(LIR_i2f) || i->isop(LIR_u2f))
|
if (isi2f(i) || isu2f(i))
|
||||||
return i->oprnd1();
|
return iu2fArg(i);
|
||||||
if (i->isconst())
|
if (i->isconst())
|
||||||
return i;
|
return i;
|
||||||
AvmAssert(i->isconstq());
|
AvmAssert(i->isconstq());
|
||||||
@ -294,14 +341,14 @@ static LIns* demote(LirWriter *out, LInsp i)
|
|||||||
static bool isPromoteInt(LIns* i)
|
static bool isPromoteInt(LIns* i)
|
||||||
{
|
{
|
||||||
jsdouble d;
|
jsdouble d;
|
||||||
return i->isop(LIR_i2f) || i->isconst() ||
|
return isi2f(i) || i->isconst() ||
|
||||||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsint)d) && !JSDOUBLE_IS_NEGZERO(d));
|
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsint)d) && !JSDOUBLE_IS_NEGZERO(d));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isPromoteUint(LIns* i)
|
static bool isPromoteUint(LIns* i)
|
||||||
{
|
{
|
||||||
jsdouble d;
|
jsdouble d;
|
||||||
return i->isop(LIR_u2f) || i->isconst() ||
|
return isu2f(i) || i->isconst() ||
|
||||||
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsuint)d));
|
(i->isconstq() && ((d = i->constvalf()) == (jsdouble)(jsuint)d));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -324,6 +371,92 @@ static bool overflowSafe(LIns* i)
|
|||||||
((c->constval() > 0)));
|
((c->constval() > 0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
|
||||||
|
class SoftFloatFilter: public LirWriter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SoftFloatFilter(LirWriter* out):
|
||||||
|
LirWriter(out)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
LInsp quadCall(uint32_t fid, LInsp args[]) {
|
||||||
|
LInsp qlo, qhi;
|
||||||
|
|
||||||
|
qlo = out->insCall(fid, args);
|
||||||
|
qhi = out->ins1(LIR_callh, qlo);
|
||||||
|
return out->qjoin(qlo, qhi);
|
||||||
|
}
|
||||||
|
|
||||||
|
LInsp ins1(LOpcode v, LInsp s0)
|
||||||
|
{
|
||||||
|
if (v == LIR_fneg)
|
||||||
|
return quadCall(F_fneg, &s0);
|
||||||
|
|
||||||
|
if (v == LIR_i2f)
|
||||||
|
return quadCall(F_i2f, &s0);
|
||||||
|
|
||||||
|
if (v == LIR_u2f)
|
||||||
|
return quadCall(F_u2f, &s0);
|
||||||
|
|
||||||
|
return out->ins1(v, s0);
|
||||||
|
}
|
||||||
|
|
||||||
|
LInsp ins2(LOpcode v, LInsp s0, LInsp s1)
|
||||||
|
{
|
||||||
|
LInsp args[2];
|
||||||
|
LInsp bv;
|
||||||
|
|
||||||
|
// change the numeric value and order of these LIR opcodes and die
|
||||||
|
if (LIR_fadd <= v && v <= LIR_fdiv) {
|
||||||
|
static uint32_t fmap[] = { F_fadd, F_fsub, F_fmul, F_fdiv };
|
||||||
|
|
||||||
|
args[0] = s1;
|
||||||
|
args[1] = s0;
|
||||||
|
|
||||||
|
return quadCall(fmap[v - LIR_fadd], args);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LIR_feq <= v && v <= LIR_fge) {
|
||||||
|
static uint32_t fmap[] = { F_fcmpeq, F_fcmplt, F_fcmpgt, F_fcmple, F_fcmpge };
|
||||||
|
|
||||||
|
args[0] = s1;
|
||||||
|
args[1] = s0;
|
||||||
|
|
||||||
|
bv = out->insCall(fmap[v - LIR_feq], args);
|
||||||
|
return out->ins2(LIR_eq, bv, out->insImm(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// not really a softfloat filter, but needed on ARM --
|
||||||
|
// arm doesn't mask shifts to 31 like x86 does
|
||||||
|
if (v == LIR_lsh ||
|
||||||
|
v == LIR_rsh ||
|
||||||
|
v == LIR_ush)
|
||||||
|
{
|
||||||
|
if (s1->isconst())
|
||||||
|
s1->setimm16(s1->constval() & 31);
|
||||||
|
else
|
||||||
|
s1 = out->ins2(LIR_and, s1, out->insImm(31));
|
||||||
|
return out->ins2(v, s0, s1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out->ins2(v, s0, s1);
|
||||||
|
}
|
||||||
|
|
||||||
|
LInsp insCall(uint32_t fid, LInsp args[])
|
||||||
|
{
|
||||||
|
// if the return type is ARGSIZE_F, we have
|
||||||
|
// to do a quadCall ( qjoin(call,callh) )
|
||||||
|
if ((builtins[fid]._argtypes & 3) == ARGSIZE_F)
|
||||||
|
return quadCall(fid, args);
|
||||||
|
|
||||||
|
return out->insCall(fid, args);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
class FuncFilter: public LirWriter
|
class FuncFilter: public LirWriter
|
||||||
{
|
{
|
||||||
TraceRecorder& recorder;
|
TraceRecorder& recorder;
|
||||||
@ -427,9 +560,8 @@ public:
|
|||||||
case F_DoubleToUint32:
|
case F_DoubleToUint32:
|
||||||
if (s0->isconstq())
|
if (s0->isconstq())
|
||||||
return out->insImm(js_DoubleToECMAUint32(s0->constvalf()));
|
return out->insImm(js_DoubleToECMAUint32(s0->constvalf()));
|
||||||
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) {
|
if (isi2f(s0) || isu2f(s0))
|
||||||
return s0->oprnd1();
|
return iu2fArg(s0);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case F_DoubleToInt32:
|
case F_DoubleToInt32:
|
||||||
if (s0->isconstq())
|
if (s0->isconstq())
|
||||||
@ -442,9 +574,9 @@ public:
|
|||||||
return out->ins2(op, demote(out, lhs), demote(out, rhs));
|
return out->ins2(op, demote(out, lhs), demote(out, rhs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (s0->isop(LIR_i2f) || s0->isop(LIR_u2f)) {
|
if (isi2f(s0) || isu2f(s0))
|
||||||
return s0->oprnd1();
|
return iu2fArg(s0);
|
||||||
}
|
// XXX ARM -- check for qjoin(call(F_UnboxDouble),call(F_UnboxDouble))
|
||||||
if (s0->isCall() && s0->fid() == F_UnboxDouble) {
|
if (s0->isCall() && s0->fid() == F_UnboxDouble) {
|
||||||
LIns* args2[] = { callArgN(s0, 0) };
|
LIns* args2[] = { callArgN(s0, 0) };
|
||||||
return out->insCall(F_UnboxInt32, args2);
|
return out->insCall(F_UnboxInt32, args2);
|
||||||
@ -688,6 +820,9 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
|
|||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (verbose_debug)
|
if (verbose_debug)
|
||||||
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
|
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
|
||||||
|
#endif
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
lir = float_filter = new (&gc) SoftFloatFilter(lir);
|
||||||
#endif
|
#endif
|
||||||
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
|
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
|
||||||
lir = expr_filter = new (&gc) ExprFilter(lir);
|
lir = expr_filter = new (&gc) ExprFilter(lir);
|
||||||
@ -732,6 +867,9 @@ TraceRecorder::~TraceRecorder()
|
|||||||
delete cse_filter;
|
delete cse_filter;
|
||||||
delete expr_filter;
|
delete expr_filter;
|
||||||
delete func_filter;
|
delete func_filter;
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
delete float_filter;
|
||||||
|
#endif
|
||||||
delete lir_buf_writer;
|
delete lir_buf_writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1435,7 +1573,7 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
|
|||||||
if (!isNumber(v))
|
if (!isNumber(v))
|
||||||
return false; /* not a number? type mismatch */
|
return false; /* not a number? type mismatch */
|
||||||
LIns* i = get(&v);
|
LIns* i = get(&v);
|
||||||
if (!i->isop(LIR_i2f)) {
|
if (!isi2f(i)) {
|
||||||
debug_only_v(printf("int slot is !isInt32, slot #%d, triggering re-compilation\n",
|
debug_only_v(printf("int slot is !isInt32, slot #%d, triggering re-compilation\n",
|
||||||
!isGlobal(&v)
|
!isGlobal(&v)
|
||||||
? nativeStackOffset(&v)
|
? nativeStackOffset(&v)
|
||||||
@ -1445,11 +1583,11 @@ TraceRecorder::checkType(jsval& v, uint8 t, bool& unstable)
|
|||||||
return true; /* keep checking types, but request re-compilation */
|
return true; /* keep checking types, but request re-compilation */
|
||||||
}
|
}
|
||||||
/* Looks good, slot is an int32, the last instruction should be i2f. */
|
/* Looks good, slot is an int32, the last instruction should be i2f. */
|
||||||
JS_ASSERT(isInt32(v) && i->isop(LIR_i2f));
|
JS_ASSERT(isInt32(v) && (i->isop(LIR_i2f) || i->isop(LIR_qjoin)));
|
||||||
/* We got the final LIR_i2f as we expected. Overwrite the value in that
|
/* We got the final LIR_i2f as we expected. Overwrite the value in that
|
||||||
slot with the argument of i2f since we want the integer store to flow along
|
slot with the argument of i2f since we want the integer store to flow along
|
||||||
the loop edge, not the casted value. */
|
the loop edge, not the casted value. */
|
||||||
set(&v, i->oprnd1());
|
set(&v, iu2fArg(i));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (t == JSVAL_DOUBLE) {
|
if (t == JSVAL_DOUBLE) {
|
||||||
@ -2080,10 +2218,11 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
|
|||||||
/* execute previously recorded trace */
|
/* execute previously recorded trace */
|
||||||
TreeInfo* ti = (TreeInfo*)f->vmprivate;
|
TreeInfo* ti = (TreeInfo*)f->vmprivate;
|
||||||
|
|
||||||
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u\n",
|
debug_only_v(printf("entering trace at %s:%u@%u, native stack slots: %u code: %p\n",
|
||||||
cx->fp->script->filename,
|
cx->fp->script->filename,
|
||||||
js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc),
|
js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc),
|
||||||
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots););
|
cx->fp->regs->pc - cx->fp->script->code, ti->maxNativeStackSlots,
|
||||||
|
f->code()););
|
||||||
|
|
||||||
JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx);
|
JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx);
|
||||||
unsigned ngslots = tm->globalSlots->length();
|
unsigned ngslots = tm->globalSlots->length();
|
||||||
|
@ -221,6 +221,9 @@ class TraceRecorder {
|
|||||||
nanojit::LirWriter* cse_filter;
|
nanojit::LirWriter* cse_filter;
|
||||||
nanojit::LirWriter* expr_filter;
|
nanojit::LirWriter* expr_filter;
|
||||||
nanojit::LirWriter* func_filter;
|
nanojit::LirWriter* func_filter;
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
nanojit::LirWriter* float_filter;
|
||||||
|
#endif
|
||||||
nanojit::LIns* cx_ins;
|
nanojit::LIns* cx_ins;
|
||||||
nanojit::LIns* gp_ins;
|
nanojit::LIns* gp_ins;
|
||||||
nanojit::LIns* eos_ins;
|
nanojit::LIns* eos_ins;
|
||||||
|
@ -788,38 +788,29 @@ namespace nanojit
|
|||||||
internalReset(); // clear the reservation tables and regalloc
|
internalReset(); // clear the reservation tables and regalloc
|
||||||
NanoAssert(_branchStateMap->isEmpty());
|
NanoAssert(_branchStateMap->isEmpty());
|
||||||
_branchStateMap = 0;
|
_branchStateMap = 0;
|
||||||
|
|
||||||
#if defined(UNDER_CE)
|
#ifdef AVMPLUS_ARM
|
||||||
// If we've modified the code, we need to flush so we don't end up trying
|
// If we've modified the code, we need to flush so we don't end up trying
|
||||||
// to execute junk
|
// to execute junk
|
||||||
|
# if defined(UNDER_CE)
|
||||||
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
||||||
#elif defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
|
# elif defined(AVMPLUS_LINUX)
|
||||||
// N A S T Y - obviously have to fix this
|
// XXX fixme flush adjacent pages together
|
||||||
// determine our page range
|
for (int i = 0; i < 2; i++) {
|
||||||
|
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
|
||||||
|
|
||||||
Page *page=0, *first=0, *last=0;
|
while (p) {
|
||||||
for (int i=2;i!=0;i--) {
|
flushCache((NIns*)p, (NIns*)((intptr_t)(p) + NJ_PAGE_SIZE));
|
||||||
page = first = last = (i==2 ? _nativePages : _nativeExitPages);
|
p = p->next;
|
||||||
while (page)
|
|
||||||
{
|
|
||||||
if (page<first)
|
|
||||||
first = page;
|
|
||||||
if (page>last)
|
|
||||||
last = page;
|
|
||||||
page = page->next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
register unsigned long _beg __asm("a1") = (unsigned long)(first);
|
|
||||||
register unsigned long _end __asm("a2") = (unsigned long)(last+NJ_PAGE_SIZE);
|
|
||||||
register unsigned long _flg __asm("a3") = 0;
|
|
||||||
register unsigned long _swi __asm("r7") = 0xF0002;
|
|
||||||
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
|
|
||||||
}
|
}
|
||||||
#endif
|
# endif
|
||||||
#ifdef AVMPLUS_PORTING_API
|
#endif
|
||||||
|
|
||||||
|
# ifdef AVMPLUS_PORTING_API
|
||||||
NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
|
NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
|
||||||
NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
|
NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
|
||||||
#endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::copyRegisters(RegAlloc* copyTo)
|
void Assembler::copyRegisters(RegAlloc* copyTo)
|
||||||
@ -861,7 +852,7 @@ namespace nanojit
|
|||||||
switch(op)
|
switch(op)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
NanoAssertMsg(false, "unsupported LIR instruction");
|
NanoAssertMsgf(false, ("unsupported LIR instruction: %d (~0x40: %d)\n",op, op&~LIR64));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case LIR_short:
|
case LIR_short:
|
||||||
@ -1063,7 +1054,20 @@ namespace nanojit
|
|||||||
|
|
||||||
Register rb = UnknownReg;
|
Register rb = UnknownReg;
|
||||||
RegisterMask allow = GpRegs;
|
RegisterMask allow = GpRegs;
|
||||||
if (lhs != rhs && (op == LIR_mul || !rhs->isconst()))
|
bool forceReg = (op == LIR_mul || !rhs->isconst());
|
||||||
|
|
||||||
|
#ifdef NANOJIT_ARM
|
||||||
|
// Arm can't do an immediate op with immediates
|
||||||
|
// outside of +/-255 (for AND) r outside of
|
||||||
|
// 0..255 for others.
|
||||||
|
if (!forceReg)
|
||||||
|
{
|
||||||
|
if (rhs->isconst() && !isU8(rhs->constval()))
|
||||||
|
forceReg = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (lhs != rhs && forceReg)
|
||||||
{
|
{
|
||||||
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
|
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
|
||||||
rb = findRegFor(rhs, allow);
|
rb = findRegFor(rhs, allow);
|
||||||
@ -1079,7 +1083,7 @@ namespace nanojit
|
|||||||
ra = findSpecificRegFor(lhs, rr);
|
ra = findSpecificRegFor(lhs, rr);
|
||||||
// else, rA already has a register assigned.
|
// else, rA already has a register assigned.
|
||||||
|
|
||||||
if (!rhs->isconst() || op == LIR_mul)
|
if (forceReg)
|
||||||
{
|
{
|
||||||
if (lhs == rhs)
|
if (lhs == rhs)
|
||||||
rb = ra;
|
rb = ra;
|
||||||
|
@ -1546,7 +1546,12 @@ namespace nanojit
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (ref->isCall()) {
|
if (ref->isCall()) {
|
||||||
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
|
if (ref->isop(LIR_callh)) {
|
||||||
|
// we've presumably seen the other half already
|
||||||
|
ref = ref->oprnd1();
|
||||||
|
} else {
|
||||||
|
copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
NanoAssert(ref->opcode() < sizeof(lirNames) / sizeof(lirNames[0]));
|
NanoAssert(ref->opcode() < sizeof(lirNames) / sizeof(lirNames[0]));
|
||||||
copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
|
copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
|
||||||
@ -1652,7 +1657,6 @@ namespace nanojit
|
|||||||
case LIR_fle:
|
case LIR_fle:
|
||||||
case LIR_fgt:
|
case LIR_fgt:
|
||||||
case LIR_fge:
|
case LIR_fge:
|
||||||
case LIR_qjoin:
|
|
||||||
case LIR_qiadd:
|
case LIR_qiadd:
|
||||||
case LIR_qiand:
|
case LIR_qiand:
|
||||||
case LIR_qilsh:
|
case LIR_qilsh:
|
||||||
@ -1662,6 +1666,12 @@ namespace nanojit
|
|||||||
formatRef(i->oprnd2()));
|
formatRef(i->oprnd2()));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case LIR_qjoin:
|
||||||
|
sprintf(s, "%s (%s), %s", lirNames[op],
|
||||||
|
formatIns(i->oprnd1()),
|
||||||
|
formatRef(i->oprnd2()));
|
||||||
|
break;
|
||||||
|
|
||||||
case LIR_qcmov:
|
case LIR_qcmov:
|
||||||
case LIR_cmov:
|
case LIR_cmov:
|
||||||
sprintf(s, "%s ? %s : %s",
|
sprintf(s, "%s ? %s : %s",
|
||||||
|
@ -149,11 +149,14 @@ namespace nanojit
|
|||||||
#define DECLARE_PLATFORM_ASSEMBLER()\
|
#define DECLARE_PLATFORM_ASSEMBLER()\
|
||||||
const static Register argRegs[4], retRegs[2];\
|
const static Register argRegs[4], retRegs[2];\
|
||||||
void LD32_nochk(Register r, int32_t imm);\
|
void LD32_nochk(Register r, int32_t imm);\
|
||||||
|
void BL(NIns*);\
|
||||||
|
void BL_far(NIns*);\
|
||||||
void CALL(const CallInfo*);\
|
void CALL(const CallInfo*);\
|
||||||
void underrunProtect(int bytes);\
|
void underrunProtect(int bytes);\
|
||||||
bool has_cmov;\
|
bool has_cmov;\
|
||||||
void nativePageReset();\
|
void nativePageReset();\
|
||||||
void nativePageSetup();\
|
void nativePageSetup();\
|
||||||
|
void flushCache(NIns*,NIns*);\
|
||||||
int* _nSlot;\
|
int* _nSlot;\
|
||||||
int* _nExitSlot;
|
int* _nExitSlot;
|
||||||
|
|
||||||
@ -232,6 +235,7 @@ ShiftOperator;
|
|||||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
|
*(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
|
||||||
asm_output2("and %s,%d",gpn(_r),(_imm));}\
|
asm_output2("and %s,%d",gpn(_r),(_imm));}\
|
||||||
else if ((_imm)<0 && (_imm)>-256) {\
|
else if ((_imm)<0 && (_imm)>-256) {\
|
||||||
|
underrunProtect(8);\
|
||||||
*(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (Scratch) );\
|
*(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (Scratch) );\
|
||||||
asm_output2("and %s,%s",gpn(_r),gpn(Scratch));\
|
asm_output2("and %s,%s",gpn(_r),gpn(Scratch));\
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((Scratch)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
|
*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((Scratch)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
|
||||||
@ -532,6 +536,7 @@ ShiftOperator;
|
|||||||
//#define INT3() underrunProtect(1); *(--_nIns) = 0xcc; asm_output("int3")
|
//#define INT3() underrunProtect(1); *(--_nIns) = 0xcc; asm_output("int3")
|
||||||
//#define RET() INT3()
|
//#define RET() INT3()
|
||||||
|
|
||||||
|
#define BKPT_nochk() do { *(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0);
|
||||||
|
|
||||||
// this is pushing a reg
|
// this is pushing a reg
|
||||||
#define PUSHr(_r) do {\
|
#define PUSHr(_r) do {\
|
||||||
@ -564,49 +569,66 @@ ShiftOperator;
|
|||||||
*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) );\
|
*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) );\
|
||||||
asm_output1("pop %x", (_mask));} while (0)
|
asm_output1("pop %x", (_mask));} while (0)
|
||||||
|
|
||||||
// takes an offset (right?)
|
#define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
|
||||||
#define JMP_long_nochk_offset(_off) do {\
|
#define JMP_S24_OFFSET_OK(offs) ((-(1<<24)) <= (offs) && (offs) < (1<<24))
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_off)>>2) & 0xFFFFFF) ); \
|
|
||||||
asm_output1("jmp_l_n 0x%08x",(_off));} while (0)
|
|
||||||
|
|
||||||
// take an address, not an offset
|
// (XXX This ought to be a function instead of a macro)
|
||||||
#define JMP(t) do {\
|
//
|
||||||
underrunProtect(4);\
|
// Branch to target address _t with condition _c, doing underrun
|
||||||
intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\
|
// checks (_chk == 1) or skipping them (_chk == 0).
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
|
//
|
||||||
asm_output1("JMP 0x%08x\n",(unsigned int)(t)); } while (0)
|
// If the jump fits in a relative jump (+/-32MB), emit that.
|
||||||
|
// If the jump is unconditional, emit the dest address inline in
|
||||||
|
// the instruction stream and load it into pc.
|
||||||
|
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
|
||||||
|
// pointer is valid, stick the constant in the slot and emit a conditional
|
||||||
|
// load into pc.
|
||||||
|
// Otherwise, emit the conditional load into pc from a nearby constant,
|
||||||
|
// and emit a jump to jump over it it in case the condition fails.
|
||||||
|
//
|
||||||
|
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
|
||||||
|
#define B_cond_chk(_c,_t,_chk) do { \
|
||||||
|
int32 offs = PC_OFFSET_FROM(_t,(intptr_t)(_nIns)-4); \
|
||||||
|
if (JMP_S24_OFFSET_OK(offs)) { \
|
||||||
|
if(_chk) underrunProtect(4); \
|
||||||
|
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); \
|
||||||
|
} else if (_c == AL) { \
|
||||||
|
if(_chk) underrunProtect(8); \
|
||||||
|
*(--_nIns) = (NIns)(_t); \
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); \
|
||||||
|
} else if (samepage(_nIns,_nSlot)) { \
|
||||||
|
if(_chk) underrunProtect(8); \
|
||||||
|
*(++_nSlot) = (NIns)(_t); \
|
||||||
|
offs = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); \
|
||||||
|
NanoAssert(offs < 0); \
|
||||||
|
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); \
|
||||||
|
} else { \
|
||||||
|
if(_chk) underrunProtect(24); \
|
||||||
|
*(--_nIns) = (NIns)(_t); \
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); \
|
||||||
|
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); \
|
||||||
|
} \
|
||||||
|
asm_output2("%s %p\n", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
#define JMP_nochk(t) do {\
|
#define B_cond(_c,_t) \
|
||||||
intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\
|
B_cond_chk(_c,_t,1)
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
|
|
||||||
asm_output1("JMP 0x%08x\n",(unsigned int)(t)); } while (0)
|
|
||||||
|
|
||||||
#define JMP_long_placeholder() do {JMP_long(0xffffffff); } while(0)
|
// NB: don't use COND_AL here, we shift the condition into place!
|
||||||
|
#define JMP(_t) \
|
||||||
|
B_cond_chk(AL,_t,1)
|
||||||
|
|
||||||
#define JMP_long(_t) do {\
|
#define JMP_nochk(_t) \
|
||||||
underrunProtect(4);\
|
B_cond_chk(AL,_t,0)
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_t)>>2) & 0xFFFFFF) ); \
|
|
||||||
asm_output1("JMP_long 0x%08x\n", (unsigned int)(_t) ); } while (0)
|
|
||||||
|
|
||||||
#define BL(_t) do {\
|
|
||||||
underrunProtect(4);\
|
|
||||||
intptr_t _tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
|
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((_tt)>>2) & 0xFFFFFF) ); \
|
|
||||||
asm_output2("BL 0x%08x offset=%d",(intptr_t)(_nIns) + (_tt),(_tt)) } while (0)
|
|
||||||
|
|
||||||
|
|
||||||
#define JMP_long_nochk(_t) do {\
|
|
||||||
intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
|
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) ); \
|
|
||||||
asm_output1("JMP_l_n 0x%08x\n", (unsigned int)(_t)) } while (0)
|
|
||||||
|
|
||||||
|
|
||||||
#define B_cond(_c,_t)\
|
|
||||||
underrunProtect(4);\
|
|
||||||
intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
|
|
||||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | ((tt >>2)& 0xFFFFFF) ); \
|
|
||||||
asm_output2("b(cond) 0x%08x (%tX)",(unsigned int)(_t), tt);
|
|
||||||
|
|
||||||
|
// emit a placeholder that will be filled in later by nPatchBranch;
|
||||||
|
// emit two breakpoint instructions in case something goes wrong with
|
||||||
|
// the patching.
|
||||||
|
#define JMP_long_placeholder() do { \
|
||||||
|
underrunProtect(8); \
|
||||||
|
BKPT_nochk(); \
|
||||||
|
BKPT_nochk(); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
#define JA(t) do {B_cond(HI,t); asm_output1("ja 0x%08x",(unsigned int)t); } while(0)
|
#define JA(t) do {B_cond(HI,t); asm_output1("ja 0x%08x",(unsigned int)t); } while(0)
|
||||||
#define JNA(t) do {B_cond(LS,t); asm_output1("jna 0x%08x",(unsigned int)t); } while(0)
|
#define JNA(t) do {B_cond(LS,t); asm_output1("jna 0x%08x",(unsigned int)t); } while(0)
|
||||||
|
@ -148,7 +148,14 @@ namespace nanojit
|
|||||||
{
|
{
|
||||||
// target doesn't exit yet. emit jump to epilog, and set up to patch later.
|
// target doesn't exit yet. emit jump to epilog, and set up to patch later.
|
||||||
lr = placeGuardRecord(guard);
|
lr = placeGuardRecord(guard);
|
||||||
BL(_epilogue);
|
|
||||||
|
#ifdef NJ_THUMB_JIT
|
||||||
|
BL(_epilogue);
|
||||||
|
#else
|
||||||
|
// we need to know that there's an extra immediate value available
|
||||||
|
// for us; always force a far jump here.
|
||||||
|
BL_far(_epilogue);
|
||||||
|
#endif
|
||||||
|
|
||||||
lr->jmp = _nIns;
|
lr->jmp = _nIns;
|
||||||
}
|
}
|
||||||
@ -196,6 +203,25 @@ namespace nanojit
|
|||||||
void Assembler::asm_call(LInsp ins)
|
void Assembler::asm_call(LInsp ins)
|
||||||
{
|
{
|
||||||
const CallInfo* call = callInfoFor(ins->fid());
|
const CallInfo* call = callInfoFor(ins->fid());
|
||||||
|
uint32_t atypes = call->_argtypes;
|
||||||
|
uint32_t roffset = 0;
|
||||||
|
|
||||||
|
// we need to detect if we have arg0 as LO followed by arg1 as F;
|
||||||
|
// in that case, we need to skip using r1 -- the F needs to be
|
||||||
|
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
|
||||||
|
// generated code.
|
||||||
|
bool arg0IsInt32FollowedByFloat = false;
|
||||||
|
while ((atypes & 3) != ARGSIZE_NONE) {
|
||||||
|
if (((atypes >> 4) & 3) == ARGSIZE_LO &&
|
||||||
|
((atypes >> 2) & 3) == ARGSIZE_F &&
|
||||||
|
((atypes >> 6) & 3) == ARGSIZE_NONE)
|
||||||
|
{
|
||||||
|
arg0IsInt32FollowedByFloat = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
atypes >>= 2;
|
||||||
|
}
|
||||||
|
|
||||||
CALL(call);
|
CALL(call);
|
||||||
ArgSize sizes[10];
|
ArgSize sizes[10];
|
||||||
uint32_t argc = call->get_sizes(sizes);
|
uint32_t argc = call->get_sizes(sizes);
|
||||||
@ -205,8 +231,11 @@ namespace nanojit
|
|||||||
ArgSize sz = sizes[j];
|
ArgSize sz = sizes[j];
|
||||||
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
|
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
|
||||||
// pre-assign registers R0-R3 for arguments (if they fit)
|
// pre-assign registers R0-R3 for arguments (if they fit)
|
||||||
Register r = i < 4 ? argRegs[i] : UnknownReg;
|
Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
|
||||||
asm_arg(sz, ins->arg(j), r);
|
asm_arg(sz, ins->arg(j), r);
|
||||||
|
|
||||||
|
if (i == 0 && arg0IsInt32FollowedByFloat)
|
||||||
|
roffset = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -277,19 +306,28 @@ namespace nanojit
|
|||||||
|
|
||||||
// This is ALWAYS going to be a long branch (using the BL instruction)
|
// This is ALWAYS going to be a long branch (using the BL instruction)
|
||||||
// Which is really 2 instructions, so we need to modify both
|
// Which is really 2 instructions, so we need to modify both
|
||||||
|
// XXX -- this is B, not BL, at least on non-Thumb..
|
||||||
|
|
||||||
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb
|
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb
|
||||||
int32_t offset = int(target) - int(branch+2);
|
int32_t offset = int(target) - int(branch+2);
|
||||||
|
|
||||||
//printf("---patching branch at %X to location %X (%d)\n", branch, target, offset);
|
//printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
|
||||||
|
|
||||||
#ifdef NJ_THUMB_JIT
|
#ifdef NJ_THUMB_JIT
|
||||||
NanoAssert(-(1<<21) <= offset && offset < (1<<21));
|
NanoAssert(-(1<<21) <= offset && offset < (1<<21));
|
||||||
*branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF);
|
*branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF);
|
||||||
*branch = (NIns)(0xF800 | (offset>>1)&0x7FF);
|
*branch = (NIns)(0xF800 | (offset>>1)&0x7FF);
|
||||||
#else
|
#else
|
||||||
// ARM goodness, using unconditional B
|
// We have 2 words to work with here -- if offset is in range of a 24-bit
|
||||||
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2)& 0xFFFFFF) );
|
// relative jump, emit that; otherwise, we do a pc-relative load into pc.
|
||||||
|
if (-(1<<24) <= offset & offset < (1<<24)) {
|
||||||
|
// ARM goodness, using unconditional B
|
||||||
|
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2) & 0xFFFFFF) );
|
||||||
|
} else {
|
||||||
|
// LDR pc,[pc]
|
||||||
|
*branch++ = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | ( 0x004 ) );
|
||||||
|
*branch = (NIns)target;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -451,37 +489,6 @@ namespace nanojit
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
|
|
||||||
{
|
|
||||||
NIns* save = _nIns;
|
|
||||||
#ifdef NJ_THUMB_JIT
|
|
||||||
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
|
|
||||||
_nIns = at + 2;
|
|
||||||
#else
|
|
||||||
NIns* was = (NIns*) (((*at&0xFFFFFF)<<2));
|
|
||||||
_nIns = at + 1;
|
|
||||||
#endif
|
|
||||||
BL(target);
|
|
||||||
#ifdef AVMPLUS_PORTING_API
|
|
||||||
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(UNDER_CE)
|
|
||||||
// we changed the code, so we need to do this (sadly)
|
|
||||||
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
|
||||||
#elif defined(AVMPLUS_LINUX)
|
|
||||||
// Just need to clear this one page (not even the whole page really)
|
|
||||||
//Page *page = (Page*)pageTop(_nIns);
|
|
||||||
register unsigned long _beg __asm("a1") = (unsigned long)(_nIns);
|
|
||||||
register unsigned long _end __asm("a2") = (unsigned long)(_nIns+2);
|
|
||||||
register unsigned long _flg __asm("a3") = 0;
|
|
||||||
register unsigned long _swi __asm("r7") = 0xF0002;
|
|
||||||
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
|
|
||||||
#endif
|
|
||||||
_nIns = save;
|
|
||||||
return was;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Assembler::nativePageReset()
|
void Assembler::nativePageReset()
|
||||||
{
|
{
|
||||||
#ifdef NJ_THUMB_JIT
|
#ifdef NJ_THUMB_JIT
|
||||||
@ -521,21 +528,55 @@ namespace nanojit
|
|||||||
#else
|
#else
|
||||||
if (!_nSlot)
|
if (!_nSlot)
|
||||||
{
|
{
|
||||||
// This needs to be done or the samepage macro gets confused
|
// This needs to be done or the samepage macro gets confused; pageAlloc
|
||||||
|
// gives us a pointer to just past the end of the page.
|
||||||
_nIns--;
|
_nIns--;
|
||||||
_nExitIns--;
|
_nExitIns--;
|
||||||
|
|
||||||
// constpool starts at top of page and goes down,
|
// constpool starts at top of page and goes down,
|
||||||
// code starts at bottom of page and moves up
|
// code starts at bottom of page and moves up
|
||||||
_nSlot = (int*)(pageTop(_nIns)+1);
|
_nSlot = pageDataStart(_nIns); //(int*)(&((Page*)pageTop(_nIns))->lir[0]);
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::flushCache(NIns* n1, NIns* n2) {
|
||||||
|
#if defined(UNDER_CE)
|
||||||
|
// we changed the code, so we need to do this (sadly)
|
||||||
|
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
||||||
|
#elif defined(AVMPLUS_LINUX)
|
||||||
|
// Just need to clear this one page (not even the whole page really)
|
||||||
|
//Page *page = (Page*)pageTop(_nIns);
|
||||||
|
register unsigned long _beg __asm("a1") = (unsigned long)(n1);
|
||||||
|
register unsigned long _end __asm("a2") = (unsigned long)(n2);
|
||||||
|
register unsigned long _flg __asm("a3") = 0;
|
||||||
|
register unsigned long _swi __asm("r7") = 0xF0002;
|
||||||
|
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef NJ_THUMB_JIT
|
#ifdef NJ_THUMB_JIT
|
||||||
|
|
||||||
|
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
|
||||||
|
{
|
||||||
|
NIns* save = _nIns;
|
||||||
|
NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
|
||||||
|
|
||||||
|
_nIns = at + 2;
|
||||||
|
BL(target);
|
||||||
|
|
||||||
|
flushCache(_nIns, _nIns+2);
|
||||||
|
|
||||||
|
#ifdef AVMPLUS_PORTING_API
|
||||||
|
// XXX save.._nIns+2? really?
|
||||||
|
NanoJIT_PortAPI_FlushInstructionCache(save, _nIns+2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
_nIns = save;
|
||||||
|
|
||||||
|
return was;
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::STi(Register b, int32_t d, int32_t v)
|
void Assembler::STi(Register b, int32_t d, int32_t v)
|
||||||
{
|
{
|
||||||
ST(b, d, Scratch);
|
ST(b, d, Scratch);
|
||||||
@ -551,6 +592,7 @@ namespace nanojit
|
|||||||
|
|
||||||
void Assembler::underrunProtect(int bytes)
|
void Assembler::underrunProtect(int bytes)
|
||||||
{
|
{
|
||||||
|
// perhaps bytes + sizeof(PageHeader)/sizeof(NIns) + 4 ?
|
||||||
intptr_t u = bytes + 4;
|
intptr_t u = bytes + 4;
|
||||||
if (!samepage(_nIns-u, _nIns-1)) {
|
if (!samepage(_nIns-u, _nIns-1)) {
|
||||||
NIns* target = _nIns;
|
NIns* target = _nIns;
|
||||||
@ -855,45 +897,94 @@ namespace nanojit
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else // ARM_JIT
|
#else // ARM_JIT
|
||||||
void Assembler::underrunProtect(int bytes)
|
NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
|
||||||
{
|
|
||||||
intptr_t u = (bytes) + 4;
|
|
||||||
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
|
|
||||||
(!samepage((intptr_t)_nIns-u,_nIns)) )
|
|
||||||
{
|
|
||||||
NIns* target = _nIns;
|
|
||||||
_nIns = pageAlloc(_inExit);
|
|
||||||
JMP_nochk(target);
|
|
||||||
_nSlot = pageTop(_nIns);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isB24(NIns *target, NIns *cur)
|
|
||||||
{
|
{
|
||||||
int offset = int(target)-int(cur-2+2);
|
// This always got emitted as a BL_far sequence; at points
|
||||||
return (-(1<<24) <= offset && offset < (1<<24));
|
// to the first of 4 instructions. Ensure that we're where
|
||||||
|
// we think we were..
|
||||||
|
NanoAssert(at[1] == (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) ));
|
||||||
|
NanoAssert(at[2] == (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) ));
|
||||||
|
|
||||||
|
NIns* was = (NIns*) at[3];
|
||||||
|
|
||||||
|
at[3] = (NIns)target;
|
||||||
|
|
||||||
|
flushCache(at, at+4);
|
||||||
|
|
||||||
|
#ifdef AVMPLUS_PORTING_API
|
||||||
|
NanoJIT_PortAPI_FlushInstructionCache(at, at+4);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return was;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::underrunProtect(int bytes)
|
||||||
|
{
|
||||||
|
intptr_t u = bytes + sizeof(PageHeader)/sizeof(NIns) + 8;
|
||||||
|
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
|
||||||
|
(!samepage((intptr_t)_nIns-u,_nIns)) )
|
||||||
|
{
|
||||||
|
NIns* target = _nIns;
|
||||||
|
|
||||||
|
_nIns = pageAlloc(_inExit);
|
||||||
|
|
||||||
|
// XXX _nIns at this point points to one past the end of
|
||||||
|
// the page, intended to be written into using *(--_nIns).
|
||||||
|
// However, (guess) something seems to be storing the value
|
||||||
|
// of _nIns as is, and then later generating a jump to a bogus
|
||||||
|
// address. So pre-decrement to ensure that it's always
|
||||||
|
// valid; we end up skipping using the last instruction this
|
||||||
|
// way.
|
||||||
|
_nIns--;
|
||||||
|
|
||||||
|
// Update slot, either to _nIns (if decremented above), or
|
||||||
|
// _nIns-1 once the above bug is fixed/found.
|
||||||
|
_nSlot = pageDataStart(_nIns);
|
||||||
|
|
||||||
|
// If samepage() is used on _nIns and _nSlot, it'll fail, since _nIns
|
||||||
|
// points to one past the end of the page right now. Assume that
|
||||||
|
// JMP_nochk won't ever try to write to _nSlot, and so won't ever
|
||||||
|
// check samepage(). See B_cond_chk macro.
|
||||||
|
JMP_nochk(target);
|
||||||
|
} else if (!_nSlot) {
|
||||||
|
// make sure that there's always a slot pointer
|
||||||
|
_nSlot = pageDataStart(_nIns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::BL_far(NIns* addr) {
|
||||||
|
// we have to stick an immediate into the stream and make lr
|
||||||
|
// point to the right spot before branching
|
||||||
|
underrunProtect(16);
|
||||||
|
|
||||||
|
// the address
|
||||||
|
*(--_nIns) = (NIns)((addr));
|
||||||
|
// bx ip // branch to the address we loaded earlier
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
|
||||||
|
// add lr, [pc + #4] // set lr to be past the address that we wrote
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
|
||||||
|
// ldr ip, [pc + #4] // load the address into ip, reading it from [pc+4]
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
|
||||||
|
asm_output1("bl %p (32-bit)", addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::BL(NIns* addr) {
|
||||||
|
intptr_t offs = PC_OFFSET_FROM(addr,(intptr_t)_nIns-4);
|
||||||
|
if (JMP_S24_OFFSET_OK(offs)) {
|
||||||
|
// we can do this with a single BL call
|
||||||
|
underrunProtect(4);
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); \
|
||||||
|
asm_output1("bl %p", addr);
|
||||||
|
} else {
|
||||||
|
BL_far(addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::CALL(const CallInfo *ci)
|
void Assembler::CALL(const CallInfo *ci)
|
||||||
{
|
{
|
||||||
intptr_t addr = ci->_address;
|
intptr_t addr = ci->_address;
|
||||||
if (isB24((NIns*)addr, _nIns))
|
BL((NIns*)addr);
|
||||||
{
|
asm_output1(" (call %s)", ci->_name);
|
||||||
// we can do this with a single BL call
|
|
||||||
underrunProtect(4);
|
|
||||||
|
|
||||||
BL(addr);
|
|
||||||
asm_output2("call %08X:%s", addr, ci->_name);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
underrunProtect(16);
|
|
||||||
*(--_nIns) = (NIns)((addr));
|
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
|
|
||||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
|
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
|
|
||||||
asm_output2("call %08X:%s", addr, ci->_name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // NJ_THUMB_JIT
|
#endif // NJ_THUMB_JIT
|
||||||
@ -937,31 +1028,18 @@ namespace nanojit
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
// We can always reach the const pool, since it's on the same page (<4096)
|
// We can always reach the const pool, since it's on the same page (<4096)
|
||||||
|
underrunProtect(8);
|
||||||
if (!_nSlot)
|
|
||||||
_nSlot = pageTop(_nIns);
|
|
||||||
|
|
||||||
if ( (_nSlot+1) >= (_nIns-1) )
|
|
||||||
{
|
|
||||||
// This would overrun the code, so we need a new page
|
|
||||||
// and a jump to that page
|
|
||||||
|
|
||||||
NIns* target = _nIns;
|
|
||||||
_nIns = pageAlloc(_inExit);
|
|
||||||
JMP_nochk(target);
|
|
||||||
|
|
||||||
// reset the slot
|
|
||||||
_nSlot = pageTop(_nIns);
|
|
||||||
}
|
|
||||||
|
|
||||||
*(++_nSlot) = (int)imm;
|
*(++_nSlot) = (int)imm;
|
||||||
|
|
||||||
int offset = (int)(_nSlot) - (int)(_nIns+1);
|
//fprintf (stderr, "wrote slot(2) %p with %08x, jmp @ %p\n", _nSlot, (intptr_t)imm, _nIns-1);
|
||||||
|
|
||||||
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | -(offset));
|
int offset = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4);
|
||||||
|
|
||||||
|
NanoAssert(JMP_S24_OFFSET_OK(offset) && (offset < 0));
|
||||||
|
|
||||||
|
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | ((-offset) & 0xFFFFFF) );
|
||||||
asm_output2("ld %s,%d",gpn(r),imm);
|
asm_output2("ld %s,%d",gpn(r),imm);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -156,6 +156,7 @@ namespace nanojit
|
|||||||
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
|
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
|
||||||
|
|
||||||
#define pageTop(x) ( (int*)alignTo(x,NJ_PAGE_SIZE) )
|
#define pageTop(x) ( (int*)alignTo(x,NJ_PAGE_SIZE) )
|
||||||
|
#define pageDataStart(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE) + sizeof(PageHeader)) )
|
||||||
#define pageBottom(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 )
|
#define pageBottom(x) ( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 )
|
||||||
#define samepage(x,y) (pageTop(x) == pageTop(y))
|
#define samepage(x,y) (pageTop(x) == pageTop(y))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user