Merge tracemonkey to mozilla-central.

This commit is contained in:
Robert Sayre 2009-12-17 23:18:40 -05:00
commit cc4ca115a3
55 changed files with 2723 additions and 1776 deletions

View File

@ -86,7 +86,7 @@ MSG_DEF(JSMSG_MORE_ARGS_NEEDED, 3, 3, JSEXN_TYPEERR, "{0} requires more
MSG_DEF(JSMSG_BAD_CHAR, 4, 1, JSEXN_INTERNALERR, "invalid format character {0}")
MSG_DEF(JSMSG_BAD_TYPE, 5, 1, JSEXN_TYPEERR, "unknown type {0}")
MSG_DEF(JSMSG_ALLOC_OVERFLOW, 6, 0, JSEXN_INTERNALERR, "allocation size overflow")
MSG_DEF(JSMSG_CANT_UNLOCK, 7, 0, JSEXN_INTERNALERR, "can't unlock memory")
MSG_DEF(JSMSG_UNUSED7, 7, 0, JSEXN_NONE, "")
MSG_DEF(JSMSG_INCOMPATIBLE_PROTO, 8, 3, JSEXN_TYPEERR, "{0}.prototype.{1} called on incompatible {2}")
MSG_DEF(JSMSG_NO_CONSTRUCTOR, 9, 1, JSEXN_TYPEERR, "{0} has no constructor")
MSG_DEF(JSMSG_CANT_ALIAS, 10, 3, JSEXN_TYPEERR, "can't alias {0} to {1} in class {2}")

View File

@ -119,6 +119,7 @@ BEGIN_TEST(testXDR_bug525481)
JSXDRState *r = JS_XDRNewMem(cx, JSXDR_DECODE);
JS_XDRMemSetData(r, frozen, nbytes);
CHECK(JS_XDRScript(r, &script));
JS_DestroyScript(cx, script);
JS_XDRDestroy(r); // this frees `frozen`
return true;
}

View File

@ -2008,19 +2008,16 @@ JS_LockGCThingRT(JSRuntime *rt, void *thing)
JS_PUBLIC_API(JSBool)
JS_UnlockGCThing(JSContext *cx, void *thing)
{
JSBool ok;
CHECK_REQUEST(cx);
ok = js_UnlockGCThingRT(cx->runtime, thing);
if (!ok)
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_UNLOCK);
return ok;
js_UnlockGCThingRT(cx->runtime, thing);
return true;
}
JS_PUBLIC_API(JSBool)
JS_UnlockGCThingRT(JSRuntime *rt, void *thing)
{
return js_UnlockGCThingRT(rt, thing);
js_UnlockGCThingRT(rt, thing);
return true;
}
JS_PUBLIC_API(void)
@ -2543,7 +2540,7 @@ JS_PUBLIC_API(JSBool)
JS_IsAboutToBeFinalized(JSContext *cx, void *thing)
{
JS_ASSERT(thing);
return js_IsAboutToBeFinalized(cx, thing);
return js_IsAboutToBeFinalized(thing);
}
JS_PUBLIC_API(void)

View File

@ -822,22 +822,6 @@ slowarray_addProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
return JS_TRUE;
}
static void
slowarray_trace(JSTracer *trc, JSObject *obj)
{
uint32 length = obj->fslots[JSSLOT_ARRAY_LENGTH];
JS_ASSERT(STOBJ_GET_CLASS(obj) == &js_SlowArrayClass);
/*
* Move JSSLOT_ARRAY_LENGTH aside to prevent the GC from treating
* untagged integer values as objects or strings.
*/
obj->fslots[JSSLOT_ARRAY_LENGTH] = JSVAL_VOID;
js_TraceObject(trc, obj);
obj->fslots[JSSLOT_ARRAY_LENGTH] = length;
}
static JSObjectOps js_SlowArrayObjectOps;
static JSObjectOps *
@ -1271,7 +1255,7 @@ JSClass js_ArrayClass = {
JSClass js_SlowArrayClass = {
"Array",
JSCLASS_HAS_RESERVED_SLOTS(1) |
JSCLASS_HAS_PRIVATE |
JSCLASS_HAS_CACHED_PROTO(JSProto_Array),
slowarray_addProperty, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
JS_EnumerateStub, JS_ResolveStub, js_TryValueOf, NULL,
@ -1338,9 +1322,14 @@ js_MakeArraySlow(JSContext *cx, JSObject *obj)
* a jsval, set our slow/sparse COUNT to the current length as a jsval, so
* we can tell when only named properties have been added to a dense array
* to make it slow-but-not-sparse.
*
* We do not need to make the length slot GC-safe as this slot is private
* where the implementation can store an arbitrary value.
*/
{
uint32 length = obj->fslots[JSSLOT_ARRAY_LENGTH];
JS_STATIC_ASSERT(JSSLOT_ARRAY_LENGTH == JSSLOT_PRIVATE);
JS_ASSERT(js_SlowArrayClass.flags & JSCLASS_HAS_PRIVATE);
uint32 length = uint32(obj->fslots[JSSLOT_ARRAY_LENGTH]);
obj->fslots[JSSLOT_ARRAY_COUNT] = INT_FITS_IN_JSVAL(length)
? INT_TO_JSVAL(length)
: JSVAL_VOID;
@ -3464,7 +3453,6 @@ js_InitArrayClass(JSContext *cx, JSObject *obj)
/* Initialize the ops structure used by slow arrays */
memcpy(&js_SlowArrayObjectOps, &js_ObjectOps, sizeof(JSObjectOps));
js_SlowArrayObjectOps.trace = slowarray_trace;
js_SlowArrayObjectOps.enumerate = slowarray_enumerate;
js_SlowArrayObjectOps.call = NULL;

View File

@ -581,7 +581,6 @@ js_atom_sweeper(JSDHashTable *table, JSDHashEntryHdr *hdr,
uint32 number, void *arg)
{
JSAtomHashEntry *entry = TO_ATOM_ENTRY(hdr);
JSContext *cx = (JSContext *)arg;
/* Remove uninitialized entries. */
if (entry->keyAndFlags == 0)
@ -589,8 +588,8 @@ js_atom_sweeper(JSDHashTable *table, JSDHashEntryHdr *hdr,
if (ATOM_ENTRY_FLAGS(entry) & (ATOM_PINNED | ATOM_INTERNED)) {
/* Pinned or interned key cannot be finalized. */
JS_ASSERT(!js_IsAboutToBeFinalized(cx, ATOM_ENTRY_KEY(entry)));
} else if (js_IsAboutToBeFinalized(cx, ATOM_ENTRY_KEY(entry))) {
JS_ASSERT(!js_IsAboutToBeFinalized(ATOM_ENTRY_KEY(entry)));
} else if (js_IsAboutToBeFinalized(ATOM_ENTRY_KEY(entry))) {
/* Remove entries with things about to be GC'ed. */
return JS_DHASH_REMOVE;
}
@ -602,8 +601,8 @@ js_SweepAtomState(JSContext *cx)
{
JSAtomState *state = &cx->runtime->atomState;
JS_DHashTableEnumerate(&state->doubleAtoms, js_atom_sweeper, cx);
JS_DHashTableEnumerate(&state->stringAtoms, js_atom_sweeper, cx);
JS_DHashTableEnumerate(&state->doubleAtoms, js_atom_sweeper, NULL);
JS_DHashTableEnumerate(&state->stringAtoms, js_atom_sweeper, NULL);
/*
* Optimize for simplicity and mutate table generation numbers even if the

View File

@ -235,7 +235,8 @@ js_AddProperty(JSContext* cx, JSObject* obj, JSScopeProperty* sprop)
uint32 slot = sprop->slot;
JSScope* scope = OBJ_SCOPE(obj);
JS_ASSERT(slot == scope->freeslot);
if (slot != scope->freeslot)
goto exit_trace;
JS_ASSERT(sprop->parent == scope->lastProperty());
if (scope->owned()) {

View File

@ -95,10 +95,6 @@ namespace nanojit
class Assembler;
class CodeAlloc;
class Fragment;
class LirBuffer;
#ifdef DEBUG
class LabelMap;
#endif
template<typename K> struct DefaultHash;
template<typename K, typename V, typename H> class HashMap;
template<typename T> class Seq;
@ -113,7 +109,6 @@ static const size_t MAX_GLOBAL_SLOTS = 4096;
static const size_t GLOBAL_SLOTS_BUFFER_SIZE = MAX_GLOBAL_SLOTS + 1;
/* Forward declarations of tracer types. */
class TreeInfo;
class VMAllocator;
class TraceRecorder;
class FrameInfoCache;
@ -149,7 +144,7 @@ struct InterpState
// call exit guard mismatched
void* rpAtLastTreeCall; // value of rp at innermost tree call guard
VMSideExit* outermostTreeExitGuard; // the last side exit returned by js_CallTree
TreeInfo* outermostTree; // the outermost tree we initially invoked
TreeFragment* outermostTree; // the outermost tree we initially invoked
uintN* inlineCallCountp; // inline call count counter
VMSideExit** innermostNestedGuardp;
VMSideExit* innermost;
@ -168,7 +163,7 @@ struct InterpState
uintN nativeVpLen;
jsval* nativeVp;
InterpState(JSContext *cx, JSTraceMonitor *tm, TreeInfo *ti,
InterpState(JSContext *cx, JSTraceMonitor *tm, TreeFragment *ti,
uintN &inlineCallCountp, VMSideExit** innermostNestedGuardp);
~InterpState();
};
@ -222,32 +217,40 @@ struct JSTraceMonitor {
TraceNativeStorage storage;
/*
* There are 3 allocators here. This might seem like overkill, but they
* There are 5 allocators here. This might seem like overkill, but they
* have different lifecycles, and by keeping them separate we keep the
* amount of retained memory down significantly.
* amount of retained memory down significantly. They are flushed (ie.
* all the allocated memory is freed) periodically.
*
* The dataAlloc has the lifecycle of the monitor. It's flushed only
* when the monitor is flushed.
* - dataAlloc has the lifecycle of the monitor. It's flushed only when
* the monitor is flushed. It's used for fragments.
*
* The traceAlloc has the same flush lifecycle as the dataAlloc, but
* it is also *marked* when a recording starts and rewinds to the mark
* point if recording aborts. So you can put things in it that are only
* reachable on a successful record/compile cycle.
* - traceAlloc has the same flush lifecycle as the dataAlloc, but it is
* also *marked* when a recording starts and rewinds to the mark point
* if recording aborts. So you can put things in it that are only
* reachable on a successful record/compile cycle like GuardRecords and
* SideExits.
*
* The tempAlloc is flushed after each recording, successful or not.
* - tempAlloc is flushed after each recording, successful or not. It's
* used to store LIR code and for all other elements in the LIR
* pipeline.
*
* - reTempAlloc is just like tempAlloc, but is used for regexp
* compilation in RegExpNativeCompiler rather than normal compilation in
* TraceRecorder.
*
* - codeAlloc has the same lifetime as dataAlloc, but its API is
* different (CodeAlloc vs. VMAllocator). It's used for native code.
* It's also a good idea to keep code and data separate to avoid I-cache
* vs. D-cache issues.
*/
VMAllocator* dataAlloc; /* A chunk allocator for fragments. */
VMAllocator* traceAlloc; /* An allocator for trace metadata. */
VMAllocator* tempAlloc; /* A temporary chunk allocator. */
nanojit::CodeAlloc* codeAlloc; /* An allocator for native code. */
VMAllocator* dataAlloc;
VMAllocator* traceAlloc;
VMAllocator* tempAlloc;
VMAllocator* reTempAlloc;
nanojit::CodeAlloc* codeAlloc;
nanojit::Assembler* assembler;
nanojit::LirBuffer* lirbuf;
nanojit::LirBuffer* reLirBuf;
FrameInfoCache* frameCache;
#ifdef DEBUG
nanojit::LabelMap* labels;
#endif
TraceRecorder* recorder;
@ -280,11 +283,6 @@ struct JSTraceMonitor {
*/
REHashMap* reFragments;
/*
* A temporary allocator for RE recording.
*/
VMAllocator* reTempAlloc;
#ifdef DEBUG
/* Fields needed for fragment/guard profiling. */
nanojit::Seq<nanojit::Fragment*>* branches;
@ -319,7 +317,7 @@ typedef struct InterpStruct InterpStruct;
# define JS_ON_TRACE(cx) JS_FALSE
#endif
#ifdef DEBUG
#ifdef DEBUG_brendan
# define JS_EVAL_CACHE_METERING 1
# define JS_FUNCTION_METERING 1
#endif
@ -608,14 +606,10 @@ struct JSRuntime {
*/
ptrdiff_t gcMallocBytes;
/*
* Stack of GC arenas containing things that the GC marked, where children
* reached from those things have not yet been marked. This helps avoid
* using too much native stack during recursive GC marking.
*/
JSGCArenaInfo *gcUntracedArenaStackTop;
/* See comments before DelayMarkingChildren is jsgc.cpp. */
JSGCArenaInfo *gcUnmarkedArenaStackTop;
#ifdef DEBUG
size_t gcTraceLaterCount;
size_t gcMarkLaterCount;
#endif
/*

View File

@ -526,7 +526,7 @@ js_SweepWatchPoints(JSContext *cx)
&wp->links != &rt->watchPointList;
wp = next) {
next = (JSWatchPoint *)wp->links.next;
if (js_IsAboutToBeFinalized(cx, wp->object)) {
if (js_IsAboutToBeFinalized(wp->object)) {
sample = rt->debuggerMutations;
/* Ignore failures. */

View File

@ -4384,7 +4384,7 @@ js_EmitTree(JSContext *cx, JSCodeGenerator *cg, JSParseNode *pn)
cg2->staticLevel = cg->staticLevel + 1;
/* We measured the max scope depth when we parsed the function. */
JS_SCOPE_DEPTH_METERING(cg2->maxScopeDepth = (uintN) -1);
JS_SCOPE_DEPTH_METERING(cg2->maxScopeDepth = uint16(-1));
if (!js_EmitFunctionScript(cx, cg2, pn->pn_body))
pn = NULL;

View File

@ -157,8 +157,10 @@ struct JSStmtInfo {
#ifdef JS_SCOPE_DEPTH_METER
# define JS_SCOPE_DEPTH_METERING(code) ((void) (code))
# define JS_SCOPE_DEPTH_METERING_IF(cond, code) ((cond) ? (void) (code) : (void) 0)
#else
# define JS_SCOPE_DEPTH_METERING(code) ((void) 0)
# define JS_SCOPE_DEPTH_METERING_IF(code, x) ((void) 0)
#endif
struct JSTreeContext { /* tree context for semantic checks */
@ -208,15 +210,15 @@ struct JSTreeContext { /* tree context for semantic checks */
/*
* For functions the tree context is constructed and destructed a second
* time during code generation. To avoid a redundant stats update in such
* cases, we store (uintN) -1 in maxScopeDepth.
* cases, we store uint16(-1) in maxScopeDepth.
*/
~JSTreeContext() {
JS_SCOPE_DEPTH_METERING(maxScopeDepth == (uintN) -1 ||
JS_BASIC_STATS_ACCUM(&compiler
->context
->runtime
->lexicalScopeDepthStats,
maxScopeDepth));
JS_SCOPE_DEPTH_METERING_IF((maxScopeDepth != uint16(-1)),
JS_BASIC_STATS_ACCUM(&compiler
->context
->runtime
->lexicalScopeDepthStats,
maxScopeDepth));
}
uintN blockid() { return topStmt ? topStmt->blockid : bodyid; }

View File

@ -209,11 +209,13 @@ struct JSGCArenaInfo {
JSGCArenaInfo *prev;
/*
* A link field for the list of arenas with marked but not yet traced
* things. The field is encoded as arena's page to share the space with
* firstArena and arenaIndex fields.
* A link field for the list of arenas with marked things that haven't yet
* been scanned for live children. The field is encoded as arena's page to
* to hold only the high-order arena-counting bits to share the space with
* firstArena and arenaIndex fields. For details see comments before
* DelayMarkingChildren.
*/
jsuword prevUntracedPage : JS_BITS_PER_WORD - GC_ARENA_SHIFT;
jsuword prevUnmarkedPage : JS_BITS_PER_WORD - GC_ARENA_SHIFT;
/*
* When firstArena is false, the index of arena in the chunk. When
@ -228,22 +230,20 @@ struct JSGCArenaInfo {
/* Flag indicating if the arena is the first in the chunk. */
jsuword firstArena : 1;
union {
struct {
JSGCThing *freeList;
jsuword untracedThings; /* bitset for fast search of marked
but not yet traced things */
} finalizable;
JSGCThing *freeList;
bool hasMarkedDoubles; /* the arena has marked doubles */
union {
/* See comments before DelayMarkingChildren. */
jsuword unmarkedChildren;
/* The arena has marked doubles. */
bool hasMarkedDoubles;
};
};
/* GC flag definitions, must fit in 8 bits. */
const uint8 GCF_MARK = JS_BIT(0);
const uint8 GCF_LOCK = JS_BIT(1); /* lock request bit in API */
const uint8 GCF_CHILDREN = JS_BIT(2); /* GC things with children to be
marked later. */
/*
* The private JSGCThing struct, which describes a JSRuntime.gcFreeList element.
@ -693,7 +693,7 @@ NewGCArena(JSContext *cx)
}
rt->gcBytes += GC_ARENA_SIZE;
a->prevUntracedPage = 0;
a->prevUnmarkedPage = 0;
return a;
}
@ -894,8 +894,8 @@ js_GetGCStringRuntime(JSString *str)
offsetof(JSRuntime, gcArenaList));
}
JSBool
js_IsAboutToBeFinalized(JSContext *cx, void *thing)
bool
js_IsAboutToBeFinalized(void *thing)
{
JSGCArenaInfo *a;
uint32 index, flags;
@ -1095,9 +1095,9 @@ js_DumpGCStats(JSRuntime *rt, FILE *fp)
fprintf(fp, " maximum mark recursion: %lu\n", ULSTAT(maxdepth));
fprintf(fp, " mark C recursion depth: %lu\n", ULSTAT(cdepth));
fprintf(fp, " maximum mark C recursion: %lu\n", ULSTAT(maxcdepth));
fprintf(fp, " delayed tracing calls: %lu\n", ULSTAT(untraced));
fprintf(fp, " delayed tracing calls: %lu\n", ULSTAT(unmarked));
#ifdef DEBUG
fprintf(fp, " max trace later count: %lu\n", ULSTAT(maxuntraced));
fprintf(fp, " max trace later count: %lu\n", ULSTAT(maxunmarked));
#endif
fprintf(fp, " maximum GC nesting level: %lu\n", ULSTAT(maxlevel));
fprintf(fp, "potentially useful GC calls: %lu\n", ULSTAT(poke));
@ -1346,7 +1346,7 @@ CloseNativeIterators(JSContext *cx)
size_t newLength = 0;
for (size_t i = 0; i < length; ++i) {
JSObject *obj = array[i];
if (js_IsAboutToBeFinalized(cx, obj))
if (js_IsAboutToBeFinalized(obj))
js_CloseNativeIterator(cx, obj);
else
array[newLength++] = obj;
@ -1384,8 +1384,8 @@ JSGCFreeLists::purge()
JSGCThing *freeListHead = *p;
if (freeListHead) {
JSGCArenaInfo *a = THING_TO_ARENA(freeListHead);
JS_ASSERT(!a->finalizable.freeList);
a->finalizable.freeList = freeListHead;
JS_ASSERT(!a->freeList);
a->freeList = freeListHead;
*p = NULL;
}
}
@ -1473,9 +1473,9 @@ RefillFinalizableFreeList(JSContext *cx, unsigned thingKind)
while ((a = arenaList->cursor) != NULL) {
arenaList->cursor = a->prev;
JSGCThing *freeList = a->finalizable.freeList;
JSGCThing *freeList = a->freeList;
if (freeList) {
a->finalizable.freeList = NULL;
a->freeList = NULL;
JS_UNLOCK_GC(rt);
return freeList;
}
@ -1499,9 +1499,9 @@ RefillFinalizableFreeList(JSContext *cx, unsigned thingKind)
*/
a->list = arenaList;
a->prev = arenaList->head;
a->prevUntracedPage = 0;
a->finalizable.untracedThings = 0;
a->finalizable.freeList = NULL;
a->prevUnmarkedPage = 0;
a->freeList = NULL;
a->unmarkedChildren = 0;
arenaList->head = a;
JS_UNLOCK_GC(rt);
@ -1720,6 +1720,7 @@ RefillDoubleFreeList(JSContext *cx)
}
a->list = NULL;
a->freeList = NULL;
a->hasMarkedDoubles = false;
a->prev = rt->gcDoubleArenaList.head;
rt->gcDoubleArenaList.head = a;
@ -1876,11 +1877,11 @@ js_LockGCThingRT(JSRuntime *rt, void *thing)
return ok;
}
JSBool
void
js_UnlockGCThingRT(JSRuntime *rt, void *thing)
{
if (!thing)
return JS_TRUE;
return;
JS_LOCK_GC(rt);
@ -1908,7 +1909,6 @@ js_UnlockGCThingRT(JSRuntime *rt, void *thing)
METER(rt->gcStats.unlock++);
out:
JS_UNLOCK_GC(rt);
return JS_TRUE;
}
JS_PUBLIC_API(void)
@ -1940,110 +1940,146 @@ JS_TraceChildren(JSTracer *trc, void *thing, uint32 kind)
}
/*
* Number of things covered by a single bit of JSGCArenaInfo.untracedThings.
* When the native stack is low, the GC does not call JS_TraceChildren to mark
* the reachable "children" of the thing. Rather the thing is put aside and
* JS_TraceChildren is called later with more space on the C stack.
*
* To implement such delayed marking of the children with minimal overhead for
* the normal case of sufficient native stack, the code adds two fields to
* JSGCArenaInfo. The first field, JSGCArenaInfo::prevUnmarkedPage, links all
* arenas with delayed things into a stack list with the pointer to stack top
* in JSRuntime::gcUnmarkedArenaStackTop. DelayMarkingChildren adds arenas to
* the stack as necessary while MarkDelayedChildren pops the arenas from the
* stack until it empties.
*
* The second field, JSGCArenaInfo::unmarkedChildren, is a bitmap that tells
* for which things the GC should call JS_TraceChildren later. The bitmap is
* a single word. As such it does not pinpoint the delayed things in the arena
* but rather tells the intervals containing ThingsPerUnmarkedBit(thingSize)
* things. Later the code in MarkDelayedChildren discovers such intervals
* and calls JS_TraceChildren on any marked thing in the interval. This
* implies that JS_TraceChildren can be called many times for a single thing
* if the thing shares the same interval with some delayed things. This should
* be fine as any GC graph marking/traversing hooks must allow repeated calls
* during the same GC cycle. In particular, xpcom cycle collector relies on
* this.
*
* Note that such repeated scanning may slow down the GC. In particular, it is
* possible to construct an object graph where the GC calls JS_TraceChildren
* ThingsPerUnmarkedBit(thingSize) for almost all things in the graph. We
* tolerate this as the max value for ThingsPerUnmarkedBit(thingSize) is 4.
* This is archived for JSObject on 32 bit system as it is exactly JSObject
* that has the smallest size among the GC things that can be delayed. On 32
* bit CPU we have less than 128 objects per 4K GC arena so each bit in
* unmarkedChildren covers 4 objects.
*/
#define THINGS_PER_UNTRACED_BIT(thingSize) \
JS_HOWMANY(THINGS_PER_ARENA(thingSize), JS_BITS_PER_WORD)
static void
DelayTracingChildren(JSRuntime *rt, uint8 *flagp)
inline unsigned
ThingsPerUnmarkedBit(unsigned thingSize)
{
JSGCArenaInfo *a;
uint32 untracedBitIndex;
jsuword bit;
JS_ASSERT(!(*flagp & GCF_CHILDREN));
*flagp |= GCF_CHILDREN;
METER(rt->gcStats.untraced++);
#ifdef DEBUG
++rt->gcTraceLaterCount;
METER_UPDATE_MAX(rt->gcStats.maxuntraced, rt->gcTraceLaterCount);
#endif
a = FLAGP_TO_ARENA(flagp);
untracedBitIndex = FLAGP_TO_INDEX(flagp) /
THINGS_PER_UNTRACED_BIT(a->list->thingSize);
JS_ASSERT(untracedBitIndex < JS_BITS_PER_WORD);
bit = (jsuword)1 << untracedBitIndex;
if (a->finalizable.untracedThings != 0) {
JS_ASSERT(rt->gcUntracedArenaStackTop);
if (a->finalizable.untracedThings & bit) {
/* bit already covers things with children to trace later. */
return;
}
a->finalizable.untracedThings |= bit;
} else {
/*
* The thing is the first thing with not yet traced children in the
* whole arena, so push the arena on the stack of arenas with things
* to be traced later unless the arena has already been pushed. We
* detect that through checking prevUntracedPage as the field is 0
* only for not yet pushed arenas. To ensure that
* prevUntracedPage != 0
* even when the stack contains one element, we make prevUntracedPage
* for the arena at the bottom to point to itself.
*
* See comments in TraceDelayedChildren.
*/
a->finalizable.untracedThings = bit;
if (a->prevUntracedPage == 0) {
if (!rt->gcUntracedArenaStackTop) {
/* Stack was empty, mark the arena as the bottom element. */
a->prevUntracedPage = ARENA_INFO_TO_PAGE(a);
} else {
JS_ASSERT(rt->gcUntracedArenaStackTop->prevUntracedPage != 0);
a->prevUntracedPage =
ARENA_INFO_TO_PAGE(rt->gcUntracedArenaStackTop);
}
rt->gcUntracedArenaStackTop = a;
}
}
JS_ASSERT(rt->gcUntracedArenaStackTop);
return JS_HOWMANY(THINGS_PER_ARENA(thingSize), JS_BITS_PER_WORD);
}
static void
TraceDelayedChildren(JSTracer *trc)
DelayMarkingChildren(JSRuntime *rt, uint8 *flagp)
{
JSGCArenaInfo *a;
uint32 unmarkedBitIndex;
jsuword bit;
JS_ASSERT(*flagp & GCF_MARK);
METER(rt->gcStats.unmarked++);
a = FLAGP_TO_ARENA(flagp);
unmarkedBitIndex = FLAGP_TO_INDEX(flagp) /
ThingsPerUnmarkedBit(a->list->thingSize);
JS_ASSERT(unmarkedBitIndex < JS_BITS_PER_WORD);
bit = (jsuword)1 << unmarkedBitIndex;
if (a->unmarkedChildren != 0) {
JS_ASSERT(rt->gcUnmarkedArenaStackTop);
if (a->unmarkedChildren & bit) {
/* bit already covers things with children to mark later. */
return;
}
a->unmarkedChildren |= bit;
} else {
/*
* The thing is the first thing with not yet marked children in the
* whole arena, so push the arena on the stack of arenas with things
* to be marked later unless the arena has already been pushed. We
* detect that through checking prevUnmarkedPage as the field is 0
* only for not yet pushed arenas. To ensure that
* prevUnmarkedPage != 0
* even when the stack contains one element, we make prevUnmarkedPage
* for the arena at the bottom to point to itself.
*
* See comments in MarkDelayedChildren.
*/
a->unmarkedChildren = bit;
if (a->prevUnmarkedPage == 0) {
if (!rt->gcUnmarkedArenaStackTop) {
/* Stack was empty, mark the arena as the bottom element. */
a->prevUnmarkedPage = ARENA_INFO_TO_PAGE(a);
} else {
JS_ASSERT(rt->gcUnmarkedArenaStackTop->prevUnmarkedPage != 0);
a->prevUnmarkedPage =
ARENA_INFO_TO_PAGE(rt->gcUnmarkedArenaStackTop);
}
rt->gcUnmarkedArenaStackTop = a;
}
JS_ASSERT(rt->gcUnmarkedArenaStackTop);
}
#ifdef DEBUG
rt->gcMarkLaterCount += ThingsPerUnmarkedBit(a->list->thingSize);
METER_UPDATE_MAX(rt->gcStats.maxunmarked, rt->gcMarkLaterCount);
#endif
}
static void
MarkDelayedChildren(JSTracer *trc)
{
JSRuntime *rt;
JSGCArenaInfo *a, *aprev;
uint32 thingSize, traceKind;
uint32 thingsPerUntracedBit;
uint32 untracedBitIndex, thingIndex, indexLimit, endIndex;
uint32 thingsPerUnmarkedBit;
uint32 unmarkedBitIndex, thingIndex, indexLimit, endIndex;
JSGCThing *thing;
uint8 *flagp;
rt = trc->context->runtime;
a = rt->gcUntracedArenaStackTop;
a = rt->gcUnmarkedArenaStackTop;
if (!a) {
JS_ASSERT(rt->gcTraceLaterCount == 0);
JS_ASSERT(rt->gcMarkLaterCount == 0);
return;
}
for (;;) {
/*
* The following assert verifies that the current arena belongs to the
* untraced stack, since DelayTracingChildren ensures that even for
* stack's bottom prevUntracedPage != 0 but rather points to itself.
* unmarked stack, since DelayMarkingChildren ensures that even for
* the stack's bottom, prevUnmarkedPage != 0 but rather points to
* itself.
*/
JS_ASSERT(a->prevUntracedPage != 0);
JS_ASSERT(rt->gcUntracedArenaStackTop->prevUntracedPage != 0);
JS_ASSERT(a->prevUnmarkedPage != 0);
JS_ASSERT(rt->gcUnmarkedArenaStackTop->prevUnmarkedPage != 0);
thingSize = a->list->thingSize;
traceKind = GetFinalizableArenaTraceKind(a);
indexLimit = THINGS_PER_ARENA(thingSize);
thingsPerUntracedBit = THINGS_PER_UNTRACED_BIT(thingSize);
thingsPerUnmarkedBit = ThingsPerUnmarkedBit(thingSize);
/*
* We cannot use do-while loop here as a->untracedThings can be zero
* We cannot use do-while loop here as a->unmarkedChildren can be zero
* before the loop as a leftover from the previous iterations. See
* comments after the loop.
*/
while (a->finalizable.untracedThings != 0) {
untracedBitIndex = JS_FLOOR_LOG2W(a->finalizable.untracedThings);
a->finalizable.untracedThings &=
~((jsuword)1 << untracedBitIndex);
thingIndex = untracedBitIndex * thingsPerUntracedBit;
endIndex = thingIndex + thingsPerUntracedBit;
while (a->unmarkedChildren != 0) {
unmarkedBitIndex = JS_FLOOR_LOG2W(a->unmarkedChildren);
a->unmarkedChildren &= ~((jsuword)1 << unmarkedBitIndex);
#ifdef DEBUG
JS_ASSERT(rt->gcMarkLaterCount >= thingsPerUnmarkedBit);
rt->gcMarkLaterCount -= thingsPerUnmarkedBit;
#endif
thingIndex = unmarkedBitIndex * thingsPerUnmarkedBit;
endIndex = thingIndex + thingsPerUnmarkedBit;
/*
* endIndex can go beyond the last allocated thing as the real
@ -2052,22 +2088,12 @@ TraceDelayedChildren(JSTracer *trc)
if (endIndex > indexLimit)
endIndex = indexLimit;
JS_ASSERT(thingIndex < indexLimit);
do {
/*
* Skip free or already traced things that share the bit
* with untraced ones.
*/
flagp = THING_FLAGP(a, thingIndex);
if (!(*flagp & GCF_CHILDREN))
continue;
*flagp &= ~GCF_CHILDREN;
#ifdef DEBUG
JS_ASSERT(rt->gcTraceLaterCount != 0);
--rt->gcTraceLaterCount;
#endif
thing = FLAGP_TO_THING(flagp, thingSize);
JS_TraceChildren(trc, thing, traceKind);
if (*flagp & GCF_MARK) {
thing = FLAGP_TO_THING(flagp, thingSize);
JS_TraceChildren(trc, thing, traceKind);
}
} while (++thingIndex != endIndex);
}
@ -2076,29 +2102,29 @@ TraceDelayedChildren(JSTracer *trc)
* pop it from the stack if the arena is the stack's top.
*
* When JS_TraceChildren from the above calls JS_CallTracer that in
* turn on low C stack calls DelayTracingChildren and the latter
* pushes new arenas to the untraced stack, we have to skip popping
* turn on low C stack calls DelayMarkingChildren and the latter
* pushes new arenas to the unmarked stack, we have to skip popping
* of this arena until it becomes the top of the stack again.
*/
if (a == rt->gcUntracedArenaStackTop) {
aprev = ARENA_PAGE_TO_INFO(a->prevUntracedPage);
a->prevUntracedPage = 0;
if (a == rt->gcUnmarkedArenaStackTop) {
aprev = ARENA_PAGE_TO_INFO(a->prevUnmarkedPage);
a->prevUnmarkedPage = 0;
if (a == aprev) {
/*
* prevUntracedPage points to itself and we reached the
* prevUnmarkedPage points to itself and we reached the
* bottom of the stack.
*/
break;
}
rt->gcUntracedArenaStackTop = a = aprev;
rt->gcUnmarkedArenaStackTop = a = aprev;
} else {
a = rt->gcUntracedArenaStackTop;
a = rt->gcUnmarkedArenaStackTop;
}
}
JS_ASSERT(rt->gcUntracedArenaStackTop);
JS_ASSERT(rt->gcUntracedArenaStackTop->prevUntracedPage == 0);
rt->gcUntracedArenaStackTop = NULL;
JS_ASSERT(rt->gcTraceLaterCount == 0);
JS_ASSERT(rt->gcUnmarkedArenaStackTop);
JS_ASSERT(rt->gcUnmarkedArenaStackTop->prevUnmarkedPage == 0);
rt->gcUnmarkedArenaStackTop = NULL;
JS_ASSERT(rt->gcMarkLaterCount == 0);
}
JS_PUBLIC_API(void)
@ -2178,7 +2204,7 @@ JS_CallTracer(JSTracer *trc, void *thing, uint32 kind)
# define RECURSION_TOO_DEEP() (!JS_CHECK_STACK_SIZE(cx, stackDummy))
#endif
if (RECURSION_TOO_DEEP())
DelayTracingChildren(rt, flagp);
DelayMarkingChildren(rt, flagp);
else
JS_TraceChildren(trc, thing, kind);
} else {
@ -2190,16 +2216,16 @@ JS_CallTracer(JSTracer *trc, void *thing, uint32 kind)
*
* Since we do not know which call from inside the callback is the
* last, we ensure that children of all marked things are traced and
* call TraceDelayedChildren(trc) after tracing the thing.
* call MarkDelayedChildren(trc) after tracing the thing.
*
* As TraceDelayedChildren unconditionally invokes JS_TraceChildren
* for the things with untraced children, calling DelayTracingChildren
* As MarkDelayedChildren unconditionally invokes JS_TraceChildren
* for the things with unmarked children, calling DelayMarkingChildren
* is useless here. Hence we always trace thing's children even with a
* low native stack.
*/
cx->insideGCMarkCallback = JS_FALSE;
JS_TraceChildren(trc, thing, kind);
TraceDelayedChildren(trc);
MarkDelayedChildren(trc);
cx->insideGCMarkCallback = JS_TRUE;
}
@ -2220,7 +2246,7 @@ js_CallValueTracerIfGCThing(JSTracer *trc, jsval v)
if (JSVAL_IS_DOUBLE(v) || JSVAL_IS_STRING(v)) {
thing = JSVAL_TO_TRACEABLE(v);
kind = JSVAL_TRACE_KIND(v);
JS_ASSERT(kind == js_GetGCThingTraceKind(JSVAL_TO_GCTHING(v)));
JS_ASSERT(kind == js_GetGCThingTraceKind(thing));
} else if (JSVAL_IS_OBJECT(v) && v != JSVAL_NULL) {
/* v can be an arbitrary GC thing reinterpreted as an object. */
thing = JSVAL_TO_OBJECT(v);
@ -2241,41 +2267,43 @@ gc_root_traversal(JSDHashTable *table, JSDHashEntryHdr *hdr, uint32 num,
jsval v = *rp;
/* Ignore null reference, scalar values, and static strings. */
if (!JSVAL_IS_NULL(v) &&
JSVAL_IS_GCTHING(v) &&
!JSString::isStatic(JSVAL_TO_GCTHING(v))) {
if (JSVAL_IS_TRACEABLE(v)) {
#ifdef DEBUG
bool root_points_to_gcArenaList = false;
jsuword thing = (jsuword) JSVAL_TO_GCTHING(v);
JSRuntime *rt = trc->context->runtime;
for (unsigned i = 0; i != FINALIZE_LIMIT; i++) {
JSGCArenaList *arenaList = &rt->gcArenaList[i];
size_t thingSize = arenaList->thingSize;
size_t limit = THINGS_PER_ARENA(thingSize) * thingSize;
for (JSGCArenaInfo *a = arenaList->head; a; a = a->prev) {
if (thing - ARENA_INFO_TO_START(a) < limit) {
root_points_to_gcArenaList = true;
break;
if (!JSString::isStatic(JSVAL_TO_GCTHING(v))) {
bool root_points_to_gcArenaList = false;
jsuword thing = (jsuword) JSVAL_TO_GCTHING(v);
JSRuntime *rt = trc->context->runtime;
for (unsigned i = 0; i != FINALIZE_LIMIT; i++) {
JSGCArenaList *arenaList = &rt->gcArenaList[i];
size_t thingSize = arenaList->thingSize;
size_t limit = THINGS_PER_ARENA(thingSize) * thingSize;
for (JSGCArenaInfo *a = arenaList->head; a; a = a->prev) {
if (thing - ARENA_INFO_TO_START(a) < limit) {
root_points_to_gcArenaList = true;
break;
}
}
}
}
if (!root_points_to_gcArenaList) {
for (JSGCArenaInfo *a = rt->gcDoubleArenaList.head; a; a = a->prev) {
if (thing - ARENA_INFO_TO_START(a) <
DOUBLES_PER_ARENA * sizeof(jsdouble)) {
root_points_to_gcArenaList = true;
break;
if (!root_points_to_gcArenaList) {
for (JSGCArenaInfo *a = rt->gcDoubleArenaList.head;
a;
a = a->prev) {
if (thing - ARENA_INFO_TO_START(a) <
DOUBLES_PER_ARENA * sizeof(jsdouble)) {
root_points_to_gcArenaList = true;
break;
}
}
}
}
if (!root_points_to_gcArenaList && rhe->name) {
fprintf(stderr,
if (!root_points_to_gcArenaList && rhe->name) {
fprintf(stderr,
"JS API usage error: the address passed to JS_AddNamedRoot currently holds an\n"
"invalid jsval. This is usually caused by a missing call to JS_RemoveRoot.\n"
"The root's name is \"%s\".\n",
rhe->name);
rhe->name);
}
JS_ASSERT(root_points_to_gcArenaList);
}
JS_ASSERT(root_points_to_gcArenaList);
#endif
JS_SET_TRACING_NAME(trc, rhe->name ? rhe->name : "root");
js_CallValueTracerIfGCThing(trc, v);
@ -2765,8 +2793,8 @@ FinalizeArenaList(JSContext *cx, unsigned thingKind,
#endif
for (;;) {
JS_ASSERT(a->list == arenaList);
JS_ASSERT(a->prevUntracedPage == 0);
JS_ASSERT(a->finalizable.untracedThings == 0);
JS_ASSERT(a->prevUnmarkedPage == 0);
JS_ASSERT(a->unmarkedChildren == 0);
JSGCThing *freeList = NULL;
JSGCThing **tailp = &freeList;
@ -2778,9 +2806,7 @@ FinalizeArenaList(JSContext *cx, unsigned thingKind,
reinterpret_cast<JSGCThing *>(ARENA_INFO_TO_START(a) +
THINGS_PER_ARENA(sizeof(T)) *
sizeof(T));
JSGCThing* nextFree = a->finalizable.freeList
? a->finalizable.freeList
: thingsEnd;
JSGCThing* nextFree = a->freeList ? a->freeList : thingsEnd;
for (;; thing = NextThing(thing, sizeof(T)), --flagp) {
if (thing == nextFree) {
if (thing == thingsEnd)
@ -2844,7 +2870,7 @@ FinalizeArenaList(JSContext *cx, unsigned thingKind,
} else {
JS_ASSERT(nfree < THINGS_PER_ARENA(sizeof(T)));
*tailp = NULL;
a->finalizable.freeList = freeList;
a->freeList = freeList;
ap = &a->prev;
METER(nlivearenas++);
}
@ -3072,8 +3098,8 @@ js_GC(JSContext *cx, JSGCInvocationKind gckind)
restart:
rt->gcNumber++;
JS_ASSERT(!rt->gcUntracedArenaStackTop);
JS_ASSERT(rt->gcTraceLaterCount == 0);
JS_ASSERT(!rt->gcUnmarkedArenaStackTop);
JS_ASSERT(rt->gcMarkLaterCount == 0);
/*
* Reset the property cache's type id generator so we can compress ids.
@ -3118,7 +3144,7 @@ js_GC(JSContext *cx, JSGCInvocationKind gckind)
* Mark children of things that caused too deep recursion during the above
* tracing.
*/
TraceDelayedChildren(&trc);
MarkDelayedChildren(&trc);
JS_ASSERT(!cx->insideGCMarkCallback);
if (rt->gcCallback) {
@ -3127,7 +3153,7 @@ js_GC(JSContext *cx, JSGCInvocationKind gckind)
JS_ASSERT(cx->insideGCMarkCallback);
cx->insideGCMarkCallback = JS_FALSE;
}
JS_ASSERT(rt->gcTraceLaterCount == 0);
JS_ASSERT(rt->gcMarkLaterCount == 0);
rt->gcMarkingTracer = NULL;
@ -3198,7 +3224,7 @@ js_GC(JSContext *cx, JSGCInvocationKind gckind)
ap = &a->prev;
#ifdef JS_GCMETER
for (size_t i = 0; i != DOUBLES_PER_ARENA; ++i) {
if (IsMarkedDouble(a, index))
if (IsMarkedDouble(a, i))
METER(nthings++);
}
METER(nlivearenas++);

View File

@ -148,11 +148,11 @@ js_ReserveObjects(JSContext *cx, size_t nobjects);
extern JSBool
js_LockGCThingRT(JSRuntime *rt, void *thing);
extern JSBool
extern void
js_UnlockGCThingRT(JSRuntime *rt, void *thing);
extern JSBool
js_IsAboutToBeFinalized(JSContext *cx, void *thing);
extern bool
js_IsAboutToBeFinalized(void *thing);
/*
* Macro to test if a traversal is the marking phase of GC to avoid exposing
@ -414,10 +414,10 @@ typedef struct JSGCStats {
uint32 maxdepth; /* maximum mark tail recursion depth */
uint32 cdepth; /* mark recursion depth of C functions */
uint32 maxcdepth; /* maximum mark recursion depth of C functions */
uint32 untraced; /* number of times tracing of GC thing's children were
uint32 unmarked; /* number of times marking of GC thing's children were
delayed due to a low C stack */
#ifdef DEBUG
uint32 maxuntraced;/* maximum number of things with children to trace
uint32 maxunmarked;/* maximum number of things with children to mark
later */
#endif
uint32 maxlevel; /* maximum GC nesting (indirect recursion) level */

View File

@ -535,7 +535,6 @@ js_PurgePropertyCache(JSContext *cx, JSPropertyCache *cache)
P(addpchits);
P(setpchits);
P(setpcmisses);
P(slotchanges);
P(setmisses);
P(idmisses);
P(komisses);

View File

@ -313,8 +313,6 @@ typedef struct JSPropertyCache {
uint32 addpchits; /* adding next property pchit case */
uint32 setpchits; /* setting existing property pchit */
uint32 setpcmisses; /* setting/adding property pc misses */
uint32 slotchanges; /* clasp->reserveSlots result variance-
induced slot changes */
uint32 setmisses; /* JSOP_SET{NAME,PROP} total misses */
uint32 idmisses; /* slow-path key id == atom misses */
uint32 komisses; /* slow-path key object misses */

View File

@ -2206,13 +2206,24 @@ InitScopeForObject(JSContext* cx, JSObject* obj, JSObject* proto, JSObjectOps* o
/* Share proto's emptyScope only if obj is similar to proto. */
JSClass *clasp = OBJ_GET_CLASS(cx, obj);
JSScope *scope;
if (proto && OBJ_IS_NATIVE(proto) &&
(scope = OBJ_SCOPE(proto))->canProvideEmptyScope(ops, clasp)) {
scope = scope->getEmptyScope(cx, clasp);
if (!scope)
goto bad;
} else {
JSScope *scope = NULL;
if (proto && OBJ_IS_NATIVE(proto)) {
JS_LOCK_OBJ(cx, proto);
scope = OBJ_SCOPE(proto);
if (scope->canProvideEmptyScope(ops, clasp)) {
JSScope *emptyScope = scope->getEmptyScope(cx, clasp);
JS_UNLOCK_SCOPE(cx, scope);
if (!emptyScope)
goto bad;
scope = emptyScope;
} else {
JS_UNLOCK_SCOPE(cx, scope);
scope = NULL;
}
}
if (!scope) {
scope = JSScope::create(cx, ops, clasp, obj, js_GenerateShape(cx, false));
if (!scope)
goto bad;
@ -2225,6 +2236,7 @@ InitScopeForObject(JSContext* cx, JSObject* obj, JSObject* proto, JSObjectOps* o
goto bad;
}
}
obj->map = scope;
return true;
@ -3549,14 +3561,16 @@ js_ConstructObject(JSContext *cx, JSClass *clasp, JSObject *proto,
return obj;
}
/* XXXbe if one adds props, deletes earlier props, adds more, the last added
won't recycle the deleted props' slots. */
/*
* FIXME bug 535629: If one adds props, deletes earlier props, adds more, the
* last added won't recycle the deleted props' slots.
*/
JSBool
js_AllocSlot(JSContext *cx, JSObject *obj, uint32 *slotp)
{
JS_ASSERT(OBJ_IS_NATIVE(obj));
JSScope *scope = OBJ_SCOPE(obj);
JS_ASSERT(scope->object == obj);
JSClass *clasp = obj->getClass();
if (scope->freeslot == JSSLOT_FREE(clasp) && clasp->reserveSlots) {
/* Adjust scope->freeslot to include computed reserved slots, if any. */
@ -3577,9 +3591,8 @@ js_AllocSlot(JSContext *cx, JSObject *obj, uint32 *slotp)
void
js_FreeSlot(JSContext *cx, JSObject *obj, uint32 slot)
{
JS_ASSERT(OBJ_IS_NATIVE(obj));
JSScope *scope = OBJ_SCOPE(obj);
JS_ASSERT(scope->object == obj);
LOCKED_OBJ_SET_SLOT(obj, slot, JSVAL_VOID);
if (scope->freeslot == slot + 1)
scope->freeslot = slot;
@ -4175,7 +4188,7 @@ js_FindPropertyHelper(JSContext *cx, jsid id, JSBool cacheResult,
scopeIndex, protoIndex, pobj,
(JSScopeProperty *) prop, false);
}
SCOPE_DEPTH_ACCUM(&rt->scopeSearchDepthStats, scopeIndex);
SCOPE_DEPTH_ACCUM(&cx->runtime->scopeSearchDepthStats, scopeIndex);
goto out;
}

View File

@ -1815,10 +1815,10 @@ BEGIN_CASE(JSOP_SETMETHOD)
* if something created a hash table for scope, we must
* pay the price of JSScope::putProperty.
*
* If slot does not match the cached sprop's slot,
* update the cache entry in the hope that obj and
* other instances with the same number of reserved
* slots are now "hot".
* (A reserveSlots hook can cause scopes of the same
* shape to have different freeslot values. This is
* what causes the slot != sprop->slot case. See
* js_GetMutableScope.)
*/
if (slot != sprop->slot || scope->table) {
JSScopeProperty *sprop2 =
@ -1831,13 +1831,6 @@ BEGIN_CASE(JSOP_SETMETHOD)
JS_UNLOCK_SCOPE(cx, scope);
goto error;
}
if (sprop2 != sprop) {
PCMETER(cache->slotchanges++);
JS_ASSERT(slot != sprop->slot &&
slot == sprop2->slot &&
sprop2->id == sprop->id);
entry->vword = SPROP_TO_PCVAL(sprop2);
}
sprop = sprop2;
} else {
scope->extend(cx, sprop);
@ -3566,7 +3559,8 @@ BEGIN_CASE(JSOP_INITMETHOD)
scope->shape == scope->lastProperty()->shape);
if (scope->table) {
JSScopeProperty *sprop2 =
scope->addDataProperty(cx, sprop->id, slot, sprop->attrs);
scope->addProperty(cx, sprop->id, sprop->getter, sprop->setter, slot,
sprop->attrs, sprop->flags, sprop->shortid);
if (!sprop2) {
js_FreeSlot(cx, obj, slot);
JS_UNLOCK_SCOPE(cx, scope);

View File

@ -70,7 +70,7 @@ class RecursiveSlotMap : public SlotMap
* Store at exit->sp_adj - sizeof(double)
*/
ptrdiff_t retOffset = downPostSlots * sizeof(double) -
mRecorder.treeInfo->nativeStackBase;
mRecorder.tree->nativeStackBase;
mRecorder.lir->insStorei(mRecorder.addName(rval_ins, "rval_ins"),
mRecorder.lirbuf->sp, retOffset);
}
@ -93,7 +93,7 @@ class UpRecursiveSlotMap : public RecursiveSlotMap
/*
* The native stack offset of the return value once this frame has
* returned, is:
* -treeInfo->nativeStackBase + downPostSlots * sizeof(double)
* -tree->nativeStackBase + downPostSlots * sizeof(double)
*
* Note, not +1, since the offset is 0-based.
*
@ -101,15 +101,15 @@ class UpRecursiveSlotMap : public RecursiveSlotMap
* be the amount down recursion added, which was just guarded as
* |downPostSlots|. So the offset is:
*
* -treeInfo->nativeStackBase + downPostSlots * sizeof(double) -
* -tree->nativeStackBase + downPostSlots * sizeof(double) -
* downPostSlots * sizeof(double)
* Or:
* -treeInfo->nativeStackBase
* -tree->nativeStackBase
*
* This makes sense because this slot is just above the highest sp for
* the down frame.
*/
lir->insStorei(rval_ins, lirbuf->sp, -mRecorder.treeInfo->nativeStackBase);
lir->insStorei(rval_ins, lirbuf->sp, -mRecorder.tree->nativeStackBase);
lirbuf->sp = lir->ins2(LIR_piadd, lirbuf->sp,
lir->insImmWord(-int(downPostSlots) * sizeof(double)));
@ -152,7 +152,7 @@ TraceRecorder::downSnapshot(FrameInfo* downFrame)
/* Build the typemap the exit will have. Note extra stack slot for return value. */
unsigned downPostSlots = downFrame->callerHeight;
unsigned ngslots = treeInfo->globalSlots->length();
unsigned ngslots = tree->globalSlots->length();
unsigned exitTypeMapLen = downPostSlots + 1 + ngslots;
JSTraceType* exitTypeMap = (JSTraceType*)alloca(sizeof(JSTraceType) * exitTypeMapLen);
JSTraceType* typeMap = downFrame->get_typemap();
@ -176,7 +176,7 @@ TraceRecorder::downSnapshot(FrameInfo* downFrame)
exit->block = cx->fp->down->blockChain;
exit->pc = downFrame->pc + JSOP_CALL_LENGTH;
exit->imacpc = NULL;
exit->sp_adj = ((downPostSlots + 1) * sizeof(double)) - treeInfo->nativeStackBase;
exit->sp_adj = ((downPostSlots + 1) * sizeof(double)) - tree->nativeStackBase;
exit->rp_adj = exit->calldepth * sizeof(FrameInfo*);
exit->nativeCalleeWord = 0;
exit->lookupFlags = js_InferFlags(cx, 0);
@ -257,11 +257,11 @@ TraceRecorder::upRecursion()
*/
js_CaptureStackTypes(cx, 1, fi->get_typemap());
} else {
/* Case 2: Guess that up-recursion is backing out, infer types from our TreeInfo. */
JS_ASSERT(treeInfo->nStackTypes == downPostSlots + 1);
/* Case 2: Guess that up-recursion is backing out, infer types from our Tree. */
JS_ASSERT(tree->nStackTypes == downPostSlots + 1);
JSTraceType* typeMap = fi->get_typemap();
for (unsigned i = 0; i < downPostSlots; i++)
typeMap[i] = treeInfo->typeMap[i];
typeMap[i] = tree->typeMap[i];
}
fi = traceMonitor->frameCache->memoize(fi);
@ -311,7 +311,7 @@ TraceRecorder::upRecursion()
for (unsigned i = 0; i < downPostSlots; i++)
slotMap.addSlot(exit->stackType(i));
slotMap.addSlot(&stackval(-1));
VisitGlobalSlots(slotMap, cx, *treeInfo->globalSlots);
VisitGlobalSlots(slotMap, cx, *tree->globalSlots);
if (recursive_pc == (jsbytecode*)fragment->root->ip) {
debug_only_print0(LC_TMTracer, "Compiling up-recursive loop...\n");
} else {
@ -319,9 +319,9 @@ TraceRecorder::upRecursion()
exit->exitType = RECURSIVE_UNLINKED_EXIT;
exit->recursive_pc = recursive_pc;
}
JS_ASSERT(treeInfo->recursion != Recursion_Disallowed);
if (treeInfo->recursion != Recursion_Detected)
treeInfo->recursion = Recursion_Unwinds;
JS_ASSERT(tree->recursion != Recursion_Disallowed);
if (tree->recursion != Recursion_Detected)
tree->recursion = Recursion_Unwinds;
return closeLoop(slotMap, exit);
}
@ -424,7 +424,7 @@ TraceRecorder::slurpDownFrames(jsbytecode* return_pc)
* value. The slurpSlot variable keeps track of the last slot that has been
* unboxed, as to avoid re-unboxing when taking a SLURP_FAIL exit.
*/
unsigned numGlobalSlots = treeInfo->globalSlots->length();
unsigned numGlobalSlots = tree->globalSlots->length();
unsigned safeSlots = NativeStackSlots(cx, frameDepth) + 1 + numGlobalSlots;
jsbytecode* recursive_pc = return_pc + JSOP_CALL_LENGTH;
VMSideExit* exit = (VMSideExit*)
@ -435,7 +435,7 @@ TraceRecorder::slurpDownFrames(jsbytecode* return_pc)
exit->exitType = RECURSIVE_SLURP_FAIL_EXIT;
exit->numStackSlots = downPostSlots + 1;
exit->numGlobalSlots = numGlobalSlots;
exit->sp_adj = ((downPostSlots + 1) * sizeof(double)) - treeInfo->nativeStackBase;
exit->sp_adj = ((downPostSlots + 1) * sizeof(double)) - tree->nativeStackBase;
exit->recursive_pc = recursive_pc;
/*
@ -557,7 +557,7 @@ TraceRecorder::slurpDownFrames(jsbytecode* return_pc)
for (unsigned i = 0; i < downPostSlots; i++)
slotMap.addSlot(typeMap[i]);
slotMap.addSlot(&stackval(-1), typeMap[downPostSlots]);
VisitGlobalSlots(slotMap, cx, *treeInfo->globalSlots);
VisitGlobalSlots(slotMap, cx, *tree->globalSlots);
debug_only_print0(LC_TMTracer, "Compiling up-recursive slurp...\n");
exit = copy(exit);
if (exit->recursive_pc == fragment->root->ip)
@ -566,10 +566,25 @@ TraceRecorder::slurpDownFrames(jsbytecode* return_pc)
exit->exitType = RECURSIVE_UNLINKED_EXIT;
debug_only_printf(LC_TMTreeVis, "TREEVIS CHANGEEXIT EXIT=%p TYPE=%s\n", (void*)exit,
getExitName(exit->exitType));
JS_ASSERT(treeInfo->recursion >= Recursion_Unwinds);
JS_ASSERT(tree->recursion >= Recursion_Unwinds);
return closeLoop(slotMap, exit);
}
class ImportFrameSlotsVisitor : public SlotVisitorBase
{
TraceRecorder &mRecorder;
public:
ImportFrameSlotsVisitor(TraceRecorder &recorder) : mRecorder(recorder)
{}
JS_REQUIRES_STACK JS_ALWAYS_INLINE bool
visitStackSlots(jsval *vp, size_t count, JSStackFrame* fp) {
for (size_t i = 0; i < count; ++i)
mRecorder.get(vp++);
return true;
}
};
JS_REQUIRES_STACK AbortableRecordingStatus
TraceRecorder::downRecursion()
{
@ -584,9 +599,9 @@ TraceRecorder::downRecursion()
JS_ASSERT(unsigned(slots) == NativeStackSlots(cx, 1) - fp->argc - 2 - fp->script->nfixed - 1);
/* Guard that there is enough stack space. */
JS_ASSERT(treeInfo->maxNativeStackSlots >= treeInfo->nativeStackBase / sizeof(double));
int guardSlots = slots + treeInfo->maxNativeStackSlots -
treeInfo->nativeStackBase / sizeof(double);
JS_ASSERT(tree->maxNativeStackSlots >= tree->nativeStackBase / sizeof(double));
int guardSlots = slots + tree->maxNativeStackSlots -
tree->nativeStackBase / sizeof(double);
LIns* sp_top = lir->ins2(LIR_piadd, lirbuf->sp, lir->insImmWord(guardSlots * sizeof(double)));
guard(true, lir->ins2(LIR_plt, sp_top, eos_ins), OOM_EXIT);
@ -594,13 +609,23 @@ TraceRecorder::downRecursion()
LIns* rp_top = lir->ins2(LIR_piadd, lirbuf->rp, lir->insImmWord(sizeof(FrameInfo*)));
guard(true, lir->ins2(LIR_plt, rp_top, eor_ins), OOM_EXIT);
/*
* For every slot in the new frame that is not in the tracker, create a load
* in the tracker. This is necessary because otherwise snapshot() will see
* missing imports and use the down frame, rather than the new frame.
* This won't affect performance because the loads will be killed if not
* used.
*/
ImportFrameSlotsVisitor visitor(*this);
VisitStackSlots(visitor, cx, callDepth);
/* Add space for a new JIT frame. */
lirbuf->sp = lir->ins2(LIR_piadd, lirbuf->sp, lir->insImmWord(slots * sizeof(double)));
lir->insStorei(lirbuf->sp, lirbuf->state, offsetof(InterpState, sp));
lirbuf->rp = lir->ins2(LIR_piadd, lirbuf->rp, lir->insImmWord(sizeof(FrameInfo*)));
lir->insStorei(lirbuf->rp, lirbuf->state, offsetof(InterpState, rp));
--callDepth;
clearFrameSlotsFromCache();
clearFrameSlotsFromTracker(nativeFrameTracker);
/*
* If the callee and caller have identical call sites, this is a down-
@ -618,8 +643,8 @@ TraceRecorder::downRecursion()
exit = snapshot(RECURSIVE_UNLINKED_EXIT);
exit->recursive_pc = fp->script->code;
debug_only_print0(LC_TMTracer, "Compiling down-recursive function call.\n");
JS_ASSERT(treeInfo->recursion != Recursion_Disallowed);
treeInfo->recursion = Recursion_Detected;
JS_ASSERT(tree->recursion != Recursion_Disallowed);
tree->recursion = Recursion_Detected;
return closeLoop(exit);
}
@ -783,7 +808,7 @@ TraceRecorder::slurpSlot(LIns* val_ins, jsval* vp, SlurpInfo* info)
LIns* val = slurpSlot(val_ins, vp, exit);
lir->insStorei(val,
lirbuf->sp,
-treeInfo->nativeStackBase + ptrdiff_t(info->curSlot) * sizeof(double));
-tree->nativeStackBase + ptrdiff_t(info->curSlot) * sizeof(double));
info->curSlot++;
}

View File

@ -2026,7 +2026,6 @@ LookupNativeRegExp(JSContext* cx, uint16 re_flags,
? (++(tm->lastFragID)) : 0;
)
frag = new (alloc) REFragment(0 verbose_only(, profFragID));
frag->lirbuf = tm->reLirBuf;
/*
* Copy the re_chars portion of the hash key into the Allocator, so
* its lifecycle is disconnected from the lifecycle of the
@ -2304,6 +2303,8 @@ class RegExpNativeCompiler {
LIns* start;
LIns* cpend;
LirBuffer* const lirbuf;
bool outOfMemory() {
return tempAlloc.outOfMemory() || JS_TRACE_MONITOR(cx).dataAlloc->outOfMemory();
}
@ -3134,12 +3135,19 @@ class RegExpNativeCompiler {
public:
RegExpNativeCompiler(JSContext* cx, JSRegExp* re, CompilerState* cs, Fragment* fragment)
: tempAlloc(*JS_TRACE_MONITOR(cx).reTempAlloc), cx(cx),
re(re), cs(cs), fragment(fragment), lir(NULL), lirBufWriter(NULL) { }
re(re), cs(cs), fragment(fragment), lir(NULL), lirBufWriter(NULL),
lirbuf(new (tempAlloc) LirBuffer(tempAlloc))
{
fragment->lirbuf = lirbuf;
#ifdef DEBUG
LabelMap* labels = new (tempAlloc) LabelMap(tempAlloc, &js_LogController);
lirbuf->names = new (tempAlloc) LirNameMap(tempAlloc, labels);
#endif
}
~RegExpNativeCompiler() {
/* Purge the tempAlloc used during recording. */
tempAlloc.reset();
JS_TRACE_MONITOR(cx).reLirBuf->clear();
}
JSBool compile()
@ -3235,7 +3243,7 @@ class RegExpNativeCompiler {
*/
JS_ASSERT(!lirbuf->sp && !lirbuf->rp);
::compile(assm, fragment, tempAlloc verbose_only(, tm->labels));
::compile(assm, fragment, tempAlloc verbose_only(, lirbuf->names->labels));
if (assm->error() != nanojit::None)
goto fail;

View File

@ -146,7 +146,7 @@ js_CheckKeyword(const jschar *str, size_t length)
}
JS_FRIEND_API(void)
js_MapKeywords(void (*mapfun)(const char *))
js_MapKeywords(JSMapKeywordFun mapfun)
{
size_t i;

View File

@ -369,8 +369,10 @@ js_CheckKeyword(const jschar *chars, size_t length);
* Friend-exported API entry point to call a mapping function on each reserved
* identifier in the scanner's keyword table.
*/
typedef void (*JSMapKeywordFun)(const char *);
extern JS_FRIEND_API(void)
js_MapKeywords(void (*mapfun)(const char *));
js_MapKeywords(JSMapKeywordFun mapfun);
/*
* Check that str forms a valid JS identifier name. The function does not

View File

@ -112,6 +112,12 @@ js_GetMutableScope(JSContext *cx, JSObject *obj)
JS_ASSERT(newscope->freeslot == JSSLOT_FREE(STOBJ_GET_CLASS(obj)));
clasp = STOBJ_GET_CLASS(obj);
if (clasp->reserveSlots) {
/*
* FIXME: Here we change OBJ_SCOPE(obj)->freeslot without changing
* OBJ_SHAPE(obj). If we strengthen the shape guarantees to cover
* freeslot, we can eliminate a check in JSOP_SETPROP and in
* js_AddProperty. See bug 535416.
*/
freeslot = JSSLOT_FREE(clasp) + clasp->reserveSlots(cx, obj);
if (freeslot > STOBJ_NSLOTS(obj))
freeslot = STOBJ_NSLOTS(obj);
@ -1594,7 +1600,13 @@ JSScope::removeProperty(JSContext *cx, jsid id)
if (table) {
*spp = NULL;
#ifdef DEBUG
for (JSScopeProperty *aprop = lastProp; aprop; aprop = aprop->parent)
/*
* Check the consistency of the table but limit the number of
* checks not to alter significantly the complexity of the delete
* in debug builds, see bug 534493.
*/
JSScopeProperty *aprop = lastProp;
for (unsigned n = 50; aprop && n != 0; aprop = aprop->parent, --n)
JS_ASSERT_IF(aprop != sprop, hasProperty(aprop));
#endif
}

View File

@ -67,34 +67,26 @@
#include "jsscriptinlines.h"
const uint32 JSSLOT_EXEC_DEPTH = JSSLOT_PRIVATE + 1;
const uint32 JSSCRIPT_RESERVED_SLOTS = 1;
#if JS_HAS_SCRIPT_OBJECT
static const char js_script_exec_str[] = "Script.prototype.exec";
static const char js_script_compile_str[] = "Script.prototype.compile";
/*
* This routine requires that obj has been locked previously.
*/
static jsint
GetScriptExecDepth(JSContext *cx, JSObject *obj)
GetScriptExecDepth(JSObject *obj)
{
jsval v;
JS_ASSERT(JS_IS_OBJ_LOCKED(cx, obj));
v = LOCKED_OBJ_GET_SLOT(obj, JSSLOT_START(&js_ScriptClass));
jsval v = obj->fslots[JSSLOT_EXEC_DEPTH];
return JSVAL_IS_VOID(v) ? 0 : JSVAL_TO_INT(v);
}
static void
AdjustScriptExecDepth(JSContext *cx, JSObject *obj, jsint delta)
AdjustScriptExecDepth(JSObject *obj, jsint delta)
{
jsint execDepth;
JS_LOCK_OBJ(cx, obj);
execDepth = GetScriptExecDepth(cx, obj);
LOCKED_OBJ_SET_SLOT(obj, JSSLOT_START(&js_ScriptClass),
INT_TO_JSVAL(execDepth + delta));
JS_UNLOCK_OBJ(cx, obj);
jsint execDepth = GetScriptExecDepth(obj);
obj->fslots[JSSLOT_EXEC_DEPTH] = INT_TO_JSVAL(execDepth + delta);
}
#if JS_HAS_TOSOURCE
@ -272,7 +264,7 @@ script_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
return JS_FALSE;
JS_LOCK_OBJ(cx, obj);
execDepth = GetScriptExecDepth(cx, obj);
execDepth = GetScriptExecDepth(obj);
/*
* execDepth must be 0 to allow compilation here, otherwise the JSScript
@ -378,7 +370,7 @@ script_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
return JS_FALSE;
/* Keep track of nesting depth for the script. */
AdjustScriptExecDepth(cx, obj, 1);
AdjustScriptExecDepth(obj, 1);
/* Must get to out label after this */
script = (JSScript *) obj->getPrivate();
@ -397,7 +389,7 @@ script_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
ok = js_Execute(cx, scopeobj, script, caller, JSFRAME_EVAL, rval);
out:
AdjustScriptExecDepth(cx, obj, -1);
AdjustScriptExecDepth(obj, -1);
return ok;
}
@ -855,7 +847,7 @@ script_thaw(JSContext *cx, uintN argc, jsval *vp)
}
JS_LOCK_OBJ(cx, obj);
execDepth = GetScriptExecDepth(cx, obj);
execDepth = GetScriptExecDepth(obj);
/*
* execDepth must be 0 to allow compilation here, otherwise the JSScript
@ -948,7 +940,7 @@ script_trace(JSTracer *trc, JSObject *obj)
JS_FRIEND_DATA(JSClass) js_ScriptClass = {
js_Script_str,
JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(JSSCRIPT_RESERVED_SLOTS) |
JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_Script),
JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, script_finalize,

View File

@ -128,61 +128,64 @@ struct JSString {
static const size_t ATOMIZED = JSSTRING_BIT(3);
static const size_t DEFLATED = JSSTRING_BIT(4);
bool hasFlag(size_t flag) const {
inline bool hasFlag(size_t flag) const {
return (mFlags & flag) != 0;
}
public:
/* Generous but sane length bound. */
static const size_t MAX_LENGTH = (1 << 28);
/*
* Generous but sane length bound; the "-1" is there for comptibility with
* OOM tests.
*/
static const size_t MAX_LENGTH = (1 << 28) - 1;
bool isDependent() const {
inline bool isDependent() const {
return hasFlag(DEPENDENT);
}
bool isFlat() const {
inline bool isFlat() const {
return !isDependent();
}
bool isDeflated() const {
inline bool isDeflated() const {
return hasFlag(DEFLATED);
}
void setDeflated() {
inline void setDeflated() {
JS_ATOMIC_SET_MASK(&mFlags, DEFLATED);
}
bool isMutable() const {
inline bool isMutable() const {
return !isDependent() && hasFlag(MUTABLE);
}
bool isAtomized() const {
inline bool isAtomized() const {
return !isDependent() && hasFlag(ATOMIZED);
}
JS_ALWAYS_INLINE jschar *chars() {
inline jschar *chars() {
return isDependent() ? dependentChars() : flatChars();
}
JS_ALWAYS_INLINE size_t length() const {
inline size_t length() const {
return mLength;
}
JS_ALWAYS_INLINE bool empty() const {
inline bool empty() const {
return length() == 0;
}
JS_ALWAYS_INLINE void getCharsAndLength(const jschar *&chars, size_t &length) {
inline void getCharsAndLength(const jschar *&chars, size_t &length) {
chars = this->chars();
length = this->length();
}
JS_ALWAYS_INLINE void getCharsAndEnd(const jschar *&chars, const jschar *&end) {
inline void getCharsAndEnd(const jschar *&chars, const jschar *&end) {
end = length() + (chars = this->chars());
}
/* Specific flat string initializer and accessor methods. */
void initFlat(jschar *chars, size_t length) {
inline void initFlat(jschar *chars, size_t length) {
JS_ASSERT(length <= MAX_LENGTH);
mLength = length;
mOffset = 0;
@ -190,12 +193,12 @@ struct JSString {
mChars = chars;
}
jschar *flatChars() const {
inline jschar *flatChars() const {
JS_ASSERT(isFlat());
return mChars;
}
JS_ALWAYS_INLINE size_t flatLength() const {
inline size_t flatLength() const {
JS_ASSERT(isFlat());
return length();
}
@ -239,23 +242,23 @@ struct JSString {
* js_AtomizeString. This function would find that the string was already
* hashed and return it with the atomized bit set.
*/
void flatSetAtomized() {
inline void flatSetAtomized() {
JS_ASSERT(isFlat() && !isMutable());
JS_ATOMIC_SET_MASK(&mFlags, ATOMIZED);
}
void flatSetMutable() {
inline void flatSetMutable() {
JS_ASSERT(isFlat() && !isAtomized());
mFlags |= MUTABLE;
}
void flatClearMutable() {
inline void flatClearMutable() {
JS_ASSERT(isFlat());
if (hasFlag(MUTABLE))
mFlags &= ~MUTABLE;
}
void initDependent(JSString *bstr, size_t off, size_t len) {
inline void initDependent(JSString *bstr, size_t off, size_t len) {
JS_ASSERT(len <= MAX_LENGTH);
mLength = len;
mOffset = off;
@ -264,7 +267,7 @@ struct JSString {
}
/* See JSString::reinitFlat. */
void reinitDependent(JSString *bstr, size_t off, size_t len) {
inline void reinitDependent(JSString *bstr, size_t off, size_t len) {
JS_ASSERT(len <= MAX_LENGTH);
mLength = len;
mOffset = off;
@ -272,22 +275,22 @@ struct JSString {
mBase = bstr;
}
JSString *dependentBase() const {
inline JSString *dependentBase() const {
JS_ASSERT(isDependent());
return mBase;
}
JS_ALWAYS_INLINE jschar *dependentChars() {
inline jschar *dependentChars() {
return dependentBase()->isDependent()
? js_GetDependentStringChars(this)
: dependentBase()->flatChars() + dependentStart();
}
JS_ALWAYS_INLINE size_t dependentStart() const {
inline size_t dependentStart() const {
return mOffset;
}
JS_ALWAYS_INLINE size_t dependentLength() const {
inline size_t dependentLength() const {
JS_ASSERT(isDependent());
return length();
}

File diff suppressed because it is too large Load Diff

View File

@ -66,10 +66,11 @@ class Queue {
public:
void ensure(unsigned size) {
if (_max > size)
return;
if (!_max)
_max = 16;
while (_max < size)
_max <<= 1;
_max = 8;
_max = JS_MAX(_max * 2, size);
if (alloc) {
T* tmp = new (*alloc) T[_max];
memcpy(tmp, _data, _len * sizeof(T));
@ -155,6 +156,16 @@ public:
T* data() const {
return _data;
}
int offsetOf(T slot) {
T* p = _data;
unsigned n = 0;
for (n = 0; n < _len; ++n)
if (*p++ == slot)
return n;
return -1;
}
};
/*
@ -217,54 +228,6 @@ public:
TreeFragment* toTreeFragment();
};
struct LinkableFragment : public VMFragment
{
LinkableFragment(const void* _ip verbose_only(, uint32_t profFragID))
: VMFragment(_ip verbose_only(, profFragID))
{ }
uint32 branchCount;
};
/*
* argc is cx->fp->argc at the trace loop header, i.e., the number of arguments
* pushed for the innermost JS frame. This is required as part of the fragment
* key because the fragment will write those arguments back to the interpreter
* stack when it exits, using its typemap, which implicitly incorporates a
* given value of argc. Without this feature, a fragment could be called as an
* inner tree with two different values of argc, and entry type checking or
* exit frame synthesis could crash.
*/
struct TreeFragment : public LinkableFragment
{
TreeFragment(const void* _ip, JSObject* _globalObj, uint32 _globalShape, uint32 _argc
verbose_only(, uint32_t profFragID)) :
LinkableFragment(_ip verbose_only(, profFragID)),
treeInfo(NULL),
first(NULL),
next(NULL),
peer(NULL),
globalObj(_globalObj),
globalShape(_globalShape),
argc(_argc)
{ }
TreeInfo *treeInfo;
TreeFragment* first;
TreeFragment* next;
TreeFragment* peer;
JSObject* globalObj;
uint32 globalShape;
uint32 argc;
};
inline TreeFragment*
VMFragment::toTreeFragment()
{
JS_ASSERT(root == this);
return static_cast<TreeFragment*>(this);
}
#if defined(JS_JIT_SPEW) || defined(NJ_NO_VARIADIC_MACROS)
enum LC_TMBits {
@ -384,7 +347,8 @@ enum JSTraceType_
TT_STRING = 4, /* pointer to JSString */
TT_NULL = 5, /* null */
TT_PSEUDOBOOLEAN = 6, /* true, false, or undefined (0, 1, or 2) */
TT_FUNCTION = 7 /* pointer to JSObject whose class is js_FunctionClass */
TT_FUNCTION = 7, /* pointer to JSObject whose class is js_FunctionClass */
TT_IGNORE = 8
}
#if defined(__GNUC__) && defined(USE_TRACE_TYPE_ENUM)
__attribute__((packed))
@ -409,6 +373,8 @@ typedef Queue<uint16> SlotList;
class TypeMap : public Queue<JSTraceType> {
public:
TypeMap(nanojit::Allocator* alloc) : Queue<JSTraceType>(alloc) {}
void set(unsigned stackSlots, unsigned ngslots,
const JSTraceType* stackTypeMap, const JSTraceType* globalTypeMap);
JS_REQUIRES_STACK void captureTypes(JSContext* cx, JSObject* globalObj, SlotList& slots, unsigned callDepth);
JS_REQUIRES_STACK void captureMissingGlobalTypes(JSContext* cx, JSObject* globalObj, SlotList& slots,
unsigned stackSlots);
@ -633,8 +599,6 @@ struct REHashFn {
}
};
class TreeInfo;
struct FrameInfo {
JSObject* block; // caller block chain head
jsbytecode* pc; // caller fp->regs->pc
@ -695,51 +659,71 @@ enum RecursionStatus
Recursion_Detected /* Tree has down recursion and maybe up recursion. */
};
class TreeInfo {
public:
TreeFragment* const rootFragment;
JSScript* script;
unsigned maxNativeStackSlots;
ptrdiff_t nativeStackBase;
unsigned maxCallDepth;
struct LinkableFragment : public VMFragment
{
LinkableFragment(const void* _ip, nanojit::Allocator* alloc
verbose_only(, uint32_t profFragID))
: VMFragment(_ip verbose_only(, profFragID)), typeMap(alloc), nStackTypes(0)
{ }
uint32 branchCount;
TypeMap typeMap;
unsigned nStackTypes;
SlotList* globalSlots;
};
/*
* argc is cx->fp->argc at the trace loop header, i.e., the number of arguments
* pushed for the innermost JS frame. This is required as part of the fragment
* key because the fragment will write those arguments back to the interpreter
* stack when it exits, using its typemap, which implicitly incorporates a
* given value of argc. Without this feature, a fragment could be called as an
* inner tree with two different values of argc, and entry type checking or
* exit frame synthesis could crash.
*/
struct TreeFragment : public LinkableFragment
{
TreeFragment(const void* _ip, nanojit::Allocator* alloc, JSObject* _globalObj,
uint32 _globalShape, uint32 _argc verbose_only(, uint32_t profFragID)):
LinkableFragment(_ip, alloc verbose_only(, profFragID)),
first(NULL),
next(NULL),
peer(NULL),
globalObj(_globalObj),
globalShape(_globalShape),
argc(_argc),
dependentTrees(alloc),
linkedTrees(alloc),
sideExits(alloc),
gcthings(alloc),
sprops(alloc)
{ }
TreeFragment* first;
TreeFragment* next;
TreeFragment* peer;
JSObject* globalObj;
uint32 globalShape;
uint32 argc;
/* Dependent trees must be trashed if this tree dies, and updated on missing global types */
Queue<TreeFragment*> dependentTrees;
Queue<TreeFragment*> dependentTrees;
/* Linked trees must be updated on missing global types, but are not dependent */
Queue<TreeFragment*> linkedTrees;
Queue<VMSideExit*> sideExits;
UnstableExit* unstableExits;
/* All embedded GC things are registered here so the GC can scan them. */
Queue<jsval> gcthings;
Queue<JSScopeProperty*> sprops;
Queue<TreeFragment*> linkedTrees;
#ifdef DEBUG
const char* treeFileName;
uintN treeLineNumber;
uintN treePCOffset;
#endif
JSScript* script;
RecursionStatus recursion;
TreeInfo(nanojit::Allocator* alloc,
TreeFragment* fragment,
SlotList* globalSlots)
: rootFragment(fragment),
script(NULL),
maxNativeStackSlots(0),
nativeStackBase(0),
maxCallDepth(0),
typeMap(alloc),
nStackTypes(0),
globalSlots(globalSlots),
dependentTrees(alloc),
linkedTrees(alloc),
sideExits(alloc),
unstableExits(NULL),
gcthings(alloc),
sprops(alloc),
recursion(Recursion_None)
{}
UnstableExit* unstableExits;
Queue<VMSideExit*> sideExits;
ptrdiff_t nativeStackBase;
unsigned maxCallDepth;
/* All embedded GC things are registered here so the GC can scan them. */
Queue<jsval> gcthings;
Queue<JSScopeProperty*> sprops;
unsigned maxNativeStackSlots;
inline unsigned nGlobalTypes() {
return typeMap.length() - nStackTypes;
@ -750,13 +734,18 @@ public:
inline JSTraceType* stackTypeMap() {
return typeMap.data();
}
inline JSObject* globalObj() {
return rootFragment->globalObj;
}
JS_REQUIRES_STACK void initialize(JSContext* cx, SlotList *globalSlots);
UnstableExit* removeUnstableExit(VMSideExit* exit);
};
inline TreeFragment*
VMFragment::toTreeFragment()
{
JS_ASSERT(root == this);
return static_cast<TreeFragment*>(this);
}
typedef enum JSBuiltinStatus {
JSBUILTIN_BAILED = 1,
JSBUILTIN_ERROR = 2
@ -935,8 +924,8 @@ class TraceRecorder
/* The Fragment being recorded by this recording session. */
VMFragment* const fragment;
/* The tree to which this |fragment| will belong when finished. */
TreeInfo* const treeInfo;
/* The root fragment representing the tree. */
TreeFragment* const tree;
/* The reason we started recording. */
RecordReason const recordReason;
@ -965,6 +954,11 @@ class TraceRecorder
nanojit::LIns* const eor_ins;
nanojit::LIns* const loopLabel;
/* Lazy slot import state. */
unsigned importStackSlots;
unsigned importGlobalSlots;
TypeMap importTypeMap;
/*
* The LirBuffer used to supply memory to our LirWriter pipeline. Also contains the most recent
* instruction for {sp, rp, state}. Also contains names for debug JIT spew. Should be split.
@ -1064,17 +1058,20 @@ class TraceRecorder
JS_REQUIRES_STACK nanojit::GuardRecord* createGuardRecord(VMSideExit* exit);
bool isGlobal(jsval* p) const;
ptrdiff_t nativeGlobalSlot(jsval *p) const;
ptrdiff_t nativeGlobalOffset(jsval* p) const;
JS_REQUIRES_STACK ptrdiff_t nativeStackOffset(jsval* p) const;
JS_REQUIRES_STACK ptrdiff_t nativeStackSlot(jsval* p) const;
JS_REQUIRES_STACK ptrdiff_t nativespOffset(jsval* p) const;
JS_REQUIRES_STACK void import(nanojit::LIns* base, ptrdiff_t offset, jsval* p, JSTraceType t,
const char *prefix, uintN index, JSStackFrame *fp);
JS_REQUIRES_STACK void import(TreeInfo* treeInfo, nanojit::LIns* sp, unsigned stackSlots,
JS_REQUIRES_STACK void import(TreeFragment* tree, nanojit::LIns* sp, unsigned stackSlots,
unsigned callDepth, unsigned ngslots, JSTraceType* typeMap);
void trackNativeStackUse(unsigned slots);
JS_REQUIRES_STACK bool isValidSlot(JSScope* scope, JSScopeProperty* sprop);
JS_REQUIRES_STACK bool lazilyImportGlobalSlot(unsigned slot);
JS_REQUIRES_STACK void importGlobalSlot(unsigned slot);
JS_REQUIRES_STACK void guard(bool expected, nanojit::LIns* cond, ExitType exitType);
JS_REQUIRES_STACK void guard(bool expected, nanojit::LIns* cond, VMSideExit* exit);
@ -1148,10 +1145,11 @@ class TraceRecorder
JS_REQUIRES_STACK nanojit::LIns* alu(nanojit::LOpcode op, jsdouble v0, jsdouble v1,
nanojit::LIns* s0, nanojit::LIns* s1);
nanojit::LIns* f2i(nanojit::LIns* f);
nanojit::LIns* f2u(nanojit::LIns* f);
JS_REQUIRES_STACK nanojit::LIns* makeNumberInt32(nanojit::LIns* f);
JS_REQUIRES_STACK nanojit::LIns* stringify(jsval& v);
JS_REQUIRES_STACK nanojit::LIns* newArguments();
JS_REQUIRES_STACK nanojit::LIns* newArguments(nanojit::LIns* callee_ins);
JS_REQUIRES_STACK RecordingStatus call_imacro(jsbytecode* imacro);
@ -1290,7 +1288,7 @@ class TraceRecorder
ExitType exitType);
JS_REQUIRES_STACK RecordingStatus guardNotGlobalObject(JSObject* obj,
nanojit::LIns* obj_ins);
void clearFrameSlotsFromCache();
void clearFrameSlotsFromTracker(Tracker& which);
JS_REQUIRES_STACK void putArguments();
JS_REQUIRES_STACK RecordingStatus guardCallee(jsval& callee);
JS_REQUIRES_STACK JSStackFrame *guardArguments(JSObject *obj, nanojit::LIns* obj_ins,
@ -1365,7 +1363,7 @@ class TraceRecorder
inline void operator delete(void *p) { free(p); }
JS_REQUIRES_STACK
TraceRecorder(JSContext* cx, VMSideExit*, VMFragment*, TreeInfo*,
TraceRecorder(JSContext* cx, VMSideExit*, VMFragment*,
unsigned stackSlots, unsigned ngslots, JSTraceType* typeMap,
VMSideExit* expectedInnerExit, jsbytecode* outerTree,
uint32 outerArgc, RecordReason reason);
@ -1381,6 +1379,7 @@ class TraceRecorder
friend class AdjustCallerGlobalTypesVisitor;
friend class AdjustCallerStackTypesVisitor;
friend class TypeCompatibilityVisitor;
friend class ImportFrameSlotsVisitor;
friend class SlotMap;
friend class DefaultSlotMap;
friend class DetermineTypesVisitor;
@ -1392,14 +1391,14 @@ class TraceRecorder
public:
static bool JS_REQUIRES_STACK
startRecorder(JSContext*, VMSideExit*, VMFragment*, TreeInfo*,
startRecorder(JSContext*, VMSideExit*, VMFragment*,
unsigned stackSlots, unsigned ngslots, JSTraceType* typeMap,
VMSideExit* expectedInnerExit, jsbytecode* outerTree,
uint32 outerArgc, RecordReason reason);
/* Accessors. */
VMFragment* getFragment() const { return fragment; }
TreeInfo* getTreeInfo() const { return treeInfo; }
TreeFragment* getTree() const { return tree; }
bool outOfMemory() const { return traceMonitor->outOfMemory(); }
/* Entry points / callbacks from the interpreter. */

View File

@ -108,13 +108,12 @@ JS_Assert(const char *s, const char *file, JSIntn ln);
*/
extern JS_PUBLIC_API(void) JS_Abort(void);
#if 0
#ifdef DEBUG
# define JS_BASIC_STATS 1
# define JS_SCOPE_DEPTH_METER 1
#endif
#if defined DEBUG && !defined JS_BASIC_STATS
# define JS_BASIC_STATS 1
#ifdef DEBUG_brendan
# define JS_SCOPE_DEPTH_METER 1
#endif
#ifdef JS_BASIC_STATS

View File

@ -380,7 +380,7 @@ imm(const string &s)
}
uint64_t
quad(const string &s)
lquad(const string &s)
{
stringstream tmp(s);
uint64_t ret;
@ -571,7 +571,11 @@ FragmentAssembler::assemble_jump(bool isCond)
return mLir->insBranch(mOpcode, condition, target);
} else {
LIns *ins = mLir->insBranch(mOpcode, condition, NULL);
#ifdef __SUNPRO_CC
mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
#else
mFwdJumps.insert(make_pair(name, ins));
#endif
return ins;
}
}
@ -842,7 +846,11 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
if (!lab.empty()) {
ins = mLir->ins0(LIR_label);
typedef multimap<string, LIns *> mulmap;
#ifdef __SUNPRO_CC
typedef mulmap::iterator ci;
#else
typedef mulmap::const_iterator ci;
#endif
pair<ci, ci> range = mFwdJumps.equal_range(lab);
for (ci i = range.first; i != range.second; ++i) {
i->second->setTarget(ins);
@ -968,7 +976,7 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
case LIR_quad:
need(1);
ins = mLir->insImmq(quad(mTokens[0]));
ins = mLir->insImmq(lquad(mTokens[0]));
break;
case LIR_float:
@ -976,14 +984,29 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
ins = mLir->insImmf(immf(mTokens[0]));
break;
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
case LIR_stb:
case LIR_sts:
case LIR_st32f:
#endif
case LIR_sti:
case LIR_stqi:
need(3);
ins = mLir->insStorei(ref(mTokens[0]),
ins = mLir->insStore(mOpcode, ref(mTokens[0]),
ref(mTokens[1]),
imm(mTokens[2]));
break;
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
case LIR_ld32f:
case LIR_ldc32f:
#endif
case LIR_ld:
case LIR_ldc:
case LIR_ldq:
@ -1340,11 +1363,24 @@ FragmentAssembler::assembleRandomFragment(int nIns)
I_loads.push_back(LIR_ldc);
I_loads.push_back(LIR_ldcb);
I_loads.push_back(LIR_ldcs);
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
I_loads.push_back(LIR_ldzb);
I_loads.push_back(LIR_ldzs);
I_loads.push_back(LIR_ldsb);
I_loads.push_back(LIR_ldss);
I_loads.push_back(LIR_ldcsb);
I_loads.push_back(LIR_ldcss);
#endif
vector<LOpcode> QorF_loads;
QorF_loads.push_back(LIR_ldq); // weight LIR_ldq the heaviest
QorF_loads.push_back(LIR_ldq);
QorF_loads.push_back(LIR_ldqc);
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
// this loads a 32-bit float and expands to 64-bit float
QorF_loads.push_back(LIR_ld32f);
QorF_loads.push_back(LIR_ldc32f);
#endif
enum LInsClass {
#define CLASS(name, only64bit, relFreq) name,
@ -1748,13 +1784,10 @@ Lirasm::Lirasm(bool verbose) :
#endif
// Populate the mOpMap table.
#define OPDEF(op, number, repkind) \
mOpMap[#op] = LIR_##op;
#define OPD64(op, number, repkind) \
#define OPDEF(op, number, repKind, retType) \
mOpMap[#op] = LIR_##op;
#include "nanojit/LIRopcode.tbl"
#undef OPDEF
#undef OPD64
// TODO - These should alias to the appropriate platform-specific LIR opcode.
mOpMap["alloc"] = mOpMap["ialloc"];

View File

@ -1 +1 @@
23ed78f42df2b7b1a590fc7e986e6d446ef4d3d4
a6a96927117a1e462a04784e1b621a3d85f61099

View File

@ -83,7 +83,6 @@ namespace nanojit
verbose_only( _outputCache = 0; )
verbose_only( outline[0] = '\0'; )
verbose_only( outlineEOL[0] = '\0'; )
verbose_only( outputAddr = false; )
reset();
}
@ -777,7 +776,6 @@ namespace nanojit
}
NIns* fragEntry = genPrologue();
verbose_only( outputAddr=true; )
verbose_only( asm_output("[prologue]"); )
// check for resource leaks
@ -968,7 +966,7 @@ namespace nanojit
switch(op)
{
default:
NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64);
NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
break;
case LIR_regfence:
@ -1063,15 +1061,24 @@ namespace nanojit
asm_cmov(ins);
break;
}
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
case LIR_ld:
case LIR_ldc:
case LIR_ldcb:
case LIR_ldcs:
{
countlir_ld();
asm_ld(ins);
asm_load32(ins);
break;
}
case LIR_ld32f:
case LIR_ldc32f:
case LIR_ldq:
case LIR_ldqc:
{
@ -1159,27 +1166,30 @@ namespace nanojit
asm_promote(ins);
break;
}
case LIR_stb:
case LIR_sts:
case LIR_sti:
{
countlir_st();
asm_store32(ins->oprnd1(), ins->disp(), ins->oprnd2());
asm_store32(op, ins->oprnd1(), ins->disp(), ins->oprnd2());
break;
}
case LIR_st32f:
case LIR_stqi:
{
countlir_stq();
LIns* value = ins->oprnd1();
LIns* base = ins->oprnd2();
int dr = ins->disp();
if (value->isop(LIR_qjoin))
if (value->isop(LIR_qjoin) && op != LIR_st32f)
{
// this is correct for little-endian only
asm_store32(value->oprnd1(), dr, base);
asm_store32(value->oprnd2(), dr+4, base);
asm_store32(LIR_sti, value->oprnd1(), dr, base);
asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
}
else
{
asm_store64(value, dr, base);
asm_store64(op, value, dr, base);
}
break;
}
@ -1323,8 +1333,7 @@ namespace nanojit
label->addr = _nIns;
}
verbose_only( if (_logc->lcbits & LC_Assembly) {
outputAddr=true; asm_output("[%s]",
_thisfrag->lirbuf->names->formatRef(ins));
asm_output("[%s]", _thisfrag->lirbuf->names->formatRef(ins));
})
break;
}
@ -1407,23 +1416,6 @@ namespace nanojit
case LIR_icall:
{
countlir_call();
Register rr = UnknownReg;
if (ARM_VFP && op == LIR_fcall)
{
// fcall
rr = asm_prep_fcall(ins);
}
else
{
rr = retRegs[0];
prepResultReg(ins, rmask(rr));
}
// do this after we've handled the call result, so we dont
// force the call result to be spilled unnecessarily.
evictScratchRegs();
asm_call(ins);
break;
}
@ -1805,7 +1797,7 @@ namespace nanojit
}
}
}
/**
* Merge the current state of the registers with a previously stored version
* current == saved skip
@ -1825,9 +1817,13 @@ namespace nanojit
// of load/store multiple instructions. Hence iterate the loop the
// other way. The "r <= LastReg" guards against wraparound in
// the case where Register is treated as unsigned and FirstReg is zero.
for (Register r=LastReg; r >= FirstReg && r <= LastReg;
r = prevreg(r))
//
// Note, the loop var is deliberately typed as int (*not* Register)
// to outsmart compilers that will otherwise report
// "error: comparison is always true due to limited range of data type".
for (int ri=LastReg; ri >= FirstReg && ri <= LastReg; ri = int(prevreg(Register(ri))))
{
Register const r = Register(ri);
LIns * curins = _allocator.getActive(r);
LIns * savedins = saved.getActive(r);
if (curins == savedins)

View File

@ -207,9 +207,6 @@ namespace nanojit
// Buffer used to hold extra text to be printed at the end of some
// lines.
static char outlineEOL[512];
// If outputAddr=true the next asm instruction output will
// be prepended with its address.
bool outputAddr, vpad[3];
// Outputs 'outline' and 'outlineEOL', and resets them both.
// Output goes to '_outputCache' if it's non-NULL, or is printed
@ -341,10 +338,9 @@ namespace nanojit
NIns* asm_exit(LInsp guard);
NIns* asm_leave_trace(LInsp guard);
void asm_qjoin(LIns *ins);
void asm_store32(LIns *val, int d, LIns *base);
void asm_store64(LIns *val, int d, LIns *base);
void asm_store32(LOpcode op, LIns *val, int d, LIns *base);
void asm_store64(LOpcode op, LIns *val, int d, LIns *base);
void asm_restore(LInsp, Register);
void asm_load(int d, Register r);
void asm_spilli(LInsp i, bool pop);
void asm_spill(Register rr, int d, bool pop, bool quad);
void asm_load64(LInsp i);
@ -354,7 +350,7 @@ namespace nanojit
void asm_cond(LInsp i);
void asm_arith(LInsp i);
void asm_neg_not(LInsp i);
void asm_ld(LInsp i);
void asm_load32(LInsp i);
void asm_cmov(LInsp i);
void asm_param(LInsp i);
void asm_int(LInsp i);
@ -365,7 +361,6 @@ namespace nanojit
void asm_i2f(LInsp ins);
void asm_u2f(LInsp ins);
void asm_promote(LIns *ins);
Register asm_prep_fcall(LInsp ins);
void asm_nongp_copy(Register r, Register s);
void asm_call(LInsp);
Register asm_binop_rhs_reg(LInsp ins);

View File

@ -45,27 +45,29 @@ namespace nanojit
#ifdef FEATURE_NANOJIT
const uint8_t repKinds[] = {
#define OPDEF(op, number, repkind) \
LRK_##repkind,
#define OPD64(op, number, repkind) \
LRK_##repkind,
#define OPDEF(op, number, repKind, retType) \
LRK_##repKind,
#include "LIRopcode.tbl"
#undef OPDEF
#undef OPD64
0
};
const LTy retTypes[] = {
#define OPDEF(op, number, repKind, retType) \
LTy_##retType,
#include "LIRopcode.tbl"
#undef OPDEF
LTy_Void
};
// LIR verbose specific
#ifdef NJ_VERBOSE
const char* lirNames[] = {
#define OPDEF(op, number, repkind) \
#op,
#define OPD64(op, number, repkind) \
#define OPDEF(op, number, repKind, retType) \
#op,
#include "LIRopcode.tbl"
#undef OPDEF
#undef OPD64
NULL
};
@ -223,9 +225,8 @@ namespace nanojit
return startOfRoom;
}
LInsp LirBufWriter::insStorei(LInsp val, LInsp base, int32_t d)
LInsp LirBufWriter::insStore(LOpcode op, LInsp val, LInsp base, int32_t d)
{
LOpcode op = val->isQuad() ? LIR_stqi : LIR_sti;
base = insDisp(op, base, d);
LInsSti* insSti = (LInsSti*)_buf->makeRoom(sizeof(LInsSti));
LIns* ins = insSti->getLIns();
@ -328,22 +329,22 @@ namespace nanojit
LInsp LirBufWriter::insImmq(uint64_t imm)
{
LInsI64* insI64 = (LInsI64*)_buf->makeRoom(sizeof(LInsI64));
LIns* ins = insI64->getLIns();
ins->initLInsI64(LIR_quad, imm);
LInsN64* insN64 = (LInsN64*)_buf->makeRoom(sizeof(LInsN64));
LIns* ins = insN64->getLIns();
ins->initLInsN64(LIR_quad, imm);
return ins;
}
LInsp LirBufWriter::insImmf(double d)
{
LInsI64* insI64 = (LInsI64*)_buf->makeRoom(sizeof(LInsI64));
LIns* ins = insI64->getLIns();
LInsN64* insN64 = (LInsN64*)_buf->makeRoom(sizeof(LInsN64));
LIns* ins = insN64->getLIns();
union {
double d;
uint64_t q;
} u;
u.d = d;
ins->initLInsI64(LIR_float, u.q);
ins->initLInsN64(LIR_float, u.q);
return ins;
}
@ -352,13 +353,10 @@ namespace nanojit
{
static const uint8_t insSizes[] = {
// LIR_start is treated specially -- see below.
#define OPDEF(op, number, repkind) \
((number) == LIR_start ? 0 : sizeof(LIns##repkind)),
#define OPD64(op, number, repkind) \
OPDEF(op, number, repkind)
#define OPDEF(op, number, repKind, retType) \
((number) == LIR_start ? 0 : sizeof(LIns##repKind)),
#include "LIRopcode.tbl"
#undef OPDEF
#undef OPD64
0
};
@ -381,6 +379,33 @@ namespace nanojit
return ret;
}
LOpcode f64arith_to_i32arith(LOpcode op)
{
switch (op) {
case LIR_fneg: return LIR_neg;
case LIR_fadd: return LIR_add;
case LIR_fsub: return LIR_sub;
case LIR_fmul: return LIR_mul;
default: NanoAssert(0); return LIR_skip;
}
}
LOpcode i32cmp_to_i64cmp(LOpcode op)
{
switch (op) {
case LIR_eq: return LIR_qeq;
case LIR_lt: return LIR_qlt;
case LIR_gt: return LIR_qgt;
case LIR_le: return LIR_qle;
case LIR_ge: return LIR_qge;
case LIR_ult: return LIR_qult;
case LIR_ugt: return LIR_qugt;
case LIR_ule: return LIR_qule;
case LIR_uge: return LIR_quge;
default: NanoAssert(0); return LIR_skip;
}
}
// This is never called, but that's ok because it contains only static
// assertions.
void LIns::staticSanityCheck()
@ -400,9 +425,9 @@ namespace nanojit
NanoStaticAssert(sizeof(LInsP) == 2*sizeof(void*));
NanoStaticAssert(sizeof(LInsI) == 2*sizeof(void*));
#if defined NANOJIT_64BIT
NanoStaticAssert(sizeof(LInsI64) == 2*sizeof(void*));
NanoStaticAssert(sizeof(LInsN64) == 2*sizeof(void*));
#else
NanoStaticAssert(sizeof(LInsI64) == 3*sizeof(void*));
NanoStaticAssert(sizeof(LInsN64) == 3*sizeof(void*));
#endif
// oprnd_1 must be in the same position in LIns{Op1,Op2,Op3,Ld,Sti}
@ -859,6 +884,12 @@ namespace nanojit
#endif
}
LIns* LirWriter::insStorei(LIns* value, LIns* base, int32_t d)
{
LOpcode op = value->isQuad() ? LIR_stqi : LIR_sti;
return insStore(op, value, base, d);
}
LIns* LirWriter::qjoin(LInsp lo, LInsp hi)
{
return ins2(LIR_qjoin, lo, hi);
@ -1483,8 +1514,16 @@ namespace nanojit
case LIR_ldc:
case LIR_ldq:
case LIR_ldqc:
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldcb:
case LIR_ldcs:
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
case LIR_ld32f:
case LIR_ldc32f:
case LIR_ret:
case LIR_fret:
case LIR_live:
@ -1510,6 +1549,8 @@ namespace nanojit
case LIR_sti:
case LIR_stqi:
case LIR_stb:
case LIR_sts:
case LIR_eq:
case LIR_lt:
case LIR_gt:
@ -1884,8 +1925,16 @@ namespace nanojit
case LIR_ldc:
case LIR_ldq:
case LIR_ldqc:
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldcb:
case LIR_ldcs:
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
case LIR_ld32f:
case LIR_ldc32f:
VMPI_sprintf(s, "%s = %s %s[%d]", formatRef(i), lirNames[op],
formatRef(i->oprnd1()),
i->disp());
@ -1893,6 +1942,9 @@ namespace nanojit
case LIR_sti:
case LIR_stqi:
case LIR_stb:
case LIR_sts:
case LIR_st32f:
VMPI_sprintf(s, "%s %s[%d] = %s", lirNames[op],
formatRef(i->oprnd2()),
i->disp(),
@ -2187,12 +2239,28 @@ namespace nanojit
LInsp LoadFilter::insLoad(LOpcode v, LInsp base, int32_t disp)
{
if (base != sp && base != rp && (v == LIR_ld || v == LIR_ldq)) {
uint32_t k;
LInsp ins = exprs->findLoad(v, base, disp, k);
if (ins)
return ins;
return exprs->add(LInsLoad, out->insLoad(v,base,disp), k);
if (base != sp && base != rp)
{
switch (v)
{
case LIR_ld:
case LIR_ldq:
case LIR_ld32f:
case LIR_ldsb:
case LIR_ldss:
case LIR_ldzb:
case LIR_ldzs:
{
uint32_t k;
LInsp ins = exprs->findLoad(v, base, disp, k);
if (ins)
return ins;
return exprs->add(LInsLoad, out->insLoad(v,base,disp), k);
}
default:
// fall thru
break;
}
}
return out->insLoad(v, base, disp);
}
@ -2203,10 +2271,10 @@ namespace nanojit
exprs->clear();
}
LInsp LoadFilter::insStorei(LInsp v, LInsp b, int32_t d)
LInsp LoadFilter::insStore(LOpcode op, LInsp v, LInsp b, int32_t d)
{
clear(b);
return out->insStorei(v, b, d);
return out->insStore(op, v, b, d);
}
LInsp LoadFilter::insCall(const CallInfo *ci, LInsp args[])
@ -2250,18 +2318,12 @@ namespace nanojit
const void *end = (const char*)start + e->size;
const char *name = e->name;
if (p == start) {
if (!(logc->lcbits & LC_NoCodeAddrs))
VMPI_sprintf(b,"%p %s",p,name);
else
VMPI_strcpy(b, name);
VMPI_sprintf(b,"%p %s",p,name);
return dup(b);
}
else if (p > start && p < end) {
int32_t d = int32_t(intptr_t(p)-intptr_t(start)) >> e->align;
if (!(logc->lcbits & LC_NoCodeAddrs))
VMPI_sprintf(b, "%p %s+%d", p, name, d);
else
VMPI_sprintf(b,"%s+%d", name, d);
VMPI_sprintf(b, "%p %s+%d", p, name, d);
return dup(b);
}
else {

View File

@ -120,16 +120,28 @@ namespace nanojit
};
inline bool isCseOpcode(LOpcode op) {
op = LOpcode(op & ~LIR64);
return op >= LIR_int && op <= LIR_uge;
return (op >= LIR_int && op <= LIR_uge) ||
(op >= LIR_quad && op <= LIR_quge);
}
inline bool isRetOpcode(LOpcode op) {
return (op & ~LIR64) == LIR_ret;
return op == LIR_ret || op == LIR_fret;
}
LOpcode f64arith_to_i32arith(LOpcode op);
LOpcode i32cmp_to_i64cmp(LOpcode op);
// Array holding the 'repkind' field from LIRopcode.tbl.
// Array holding the 'repKind' field from LIRopcode.tbl.
extern const uint8_t repKinds[];
enum LTy {
LTy_Void, // no value/no type
LTy_I32, // 32-bit integer
LTy_I64, // 64-bit integer
LTy_F64 // 64-bit float
};
// Array holding the 'retType' field from LIRopcode.tbl.
extern const LTy retTypes[];
//-----------------------------------------------------------------------
// Low-level instructions. This is a bit complicated, because we have a
// variable-width representation to minimise space usage.
@ -213,7 +225,7 @@ namespace nanojit
LRK_C,
LRK_P,
LRK_I,
LRK_I64,
LRK_N64,
LRK_Jtbl,
LRK_None // this one is used for unused opcode numbers
};
@ -228,7 +240,7 @@ namespace nanojit
class LInsC;
class LInsP;
class LInsI;
class LInsI64;
class LInsN64;
class LInsJtbl;
class LIns
@ -265,7 +277,7 @@ namespace nanojit
inline LInsC* toLInsC() const;
inline LInsP* toLInsP() const;
inline LInsI* toLInsI() const;
inline LInsI64* toLInsI64() const;
inline LInsN64* toLInsN64() const;
inline LInsJtbl*toLInsJtbl()const;
void staticSanityCheck();
@ -284,7 +296,7 @@ namespace nanojit
inline void initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci);
inline void initLInsP(int32_t arg, int32_t kind);
inline void initLInsI(LOpcode opcode, int32_t imm32);
inline void initLInsI64(LOpcode opcode, int64_t imm64);
inline void initLInsN64(LOpcode opcode, int64_t imm64);
inline void initLInsJtbl(LIns* index, uint32_t size, LIns** table);
LOpcode opcode() const { return lastWord.opcode; }
@ -349,7 +361,7 @@ namespace nanojit
// For LInsI.
inline int32_t imm32() const;
// For LInsI64.
// For LInsN64.
inline int32_t imm64_0() const;
inline int32_t imm64_1() const;
inline uint64_t imm64() const;
@ -416,9 +428,9 @@ namespace nanojit
NanoAssert(LRK_None != repKinds[opcode()]);
return LRK_I == repKinds[opcode()];
}
bool isLInsI64() const {
bool isLInsN64() const {
NanoAssert(LRK_None != repKinds[opcode()]);
return LRK_I64 == repKinds[opcode()];
return LRK_N64 == repKinds[opcode()];
}
bool isLInsJtbl() const {
NanoAssert(LRK_None != repKinds[opcode()]);
@ -436,48 +448,35 @@ namespace nanojit
return opcode() == o;
}
bool isQuad() const {
LOpcode op = opcode();
#ifdef NANOJIT_64BIT
// callh in 64bit cpu's means a call that returns an int64 in a single register
return (!(op >= LIR_qeq && op <= LIR_quge) && (op & LIR64) != 0) ||
op == LIR_callh;
#else
// callh in 32bit cpu's means the 32bit MSW of an int64 result in 2 registers
return (op & LIR64) != 0;
#endif
LTy ty = retTypes[opcode()];
return ty == LTy_I64 || ty == LTy_F64;
}
bool isCond() const {
LOpcode op = opcode();
return (op == LIR_ov) || isCmp();
return (isop(LIR_ov)) || isCmp();
}
bool isFloat() const; // not inlined because it contains a switch
bool isCmp() const {
LOpcode op = opcode();
return (op >= LIR_eq && op <= LIR_uge) ||
return (op >= LIR_eq && op <= LIR_uge) ||
(op >= LIR_qeq && op <= LIR_quge) ||
(op >= LIR_feq && op <= LIR_fge);
}
bool isCall() const {
LOpcode op = opcode();
return (op & ~LIR64) == LIR_icall || op == LIR_qcall;
return isop(LIR_icall) || isop(LIR_fcall) || isop(LIR_qcall);
}
bool isStore() const {
LOpcode op = LOpcode(opcode() & ~LIR64);
return op == LIR_sti;
return isLInsSti();
}
bool isLoad() const {
LOpcode op = opcode();
return op == LIR_ldq || op == LIR_ld || op == LIR_ldc ||
op == LIR_ldqc || op == LIR_ldcs || op == LIR_ldcb;
return isLInsLd();
}
bool isGuard() const {
LOpcode op = opcode();
return op == LIR_x || op == LIR_xf || op == LIR_xt ||
op == LIR_xbarrier || op == LIR_xtbl;
return isop(LIR_x) || isop(LIR_xf) || isop(LIR_xt) ||
isop(LIR_xbarrier) || isop(LIR_xtbl);
}
// True if the instruction is a 32-bit or smaller constant integer.
bool isconst() const {
return opcode() == LIR_int;
return isop(LIR_int);
}
// True if the instruction is a 32-bit or smaller constant integer and
// has the value val when treated as a 32-bit signed integer.
@ -486,7 +485,7 @@ namespace nanojit
}
// True if the instruction is a constant quad value.
bool isconstq() const {
return opcode() == LIR_quad || opcode() == LIR_float;
return isop(LIR_quad) || isop(LIR_float);
}
// True if the instruction is a constant pointer value.
bool isconstp() const
@ -499,7 +498,7 @@ namespace nanojit
}
// True if the instruction is a constant float value.
bool isconstf() const {
return opcode() == LIR_float;
return isop(LIR_float);
}
bool isBranch() const {
@ -508,16 +507,16 @@ namespace nanojit
bool isPtr() {
#ifdef NANOJIT_64BIT
return isQuad();
return retTypes[opcode()] == LTy_I64;
#else
return !isQuad();
return retTypes[opcode()] == LTy_I32;
#endif
}
// Return true if removal of 'ins' from a LIR fragment could
// possibly change the behaviour of that fragment, even if any
// value computed by 'ins' is not used later in the fragment.
// In other words, can 'ins' possible alter control flow or memory?
// In other words, can 'ins' possibly alter control flow or memory?
// Note, this assumes that loads will never fault and hence cannot
// affect the control flow.
bool isStmt() {
@ -701,8 +700,8 @@ namespace nanojit
LIns* getLIns() { return &ins; };
};
// Used for LIR_quad.
class LInsI64
// Used for LIR_quad and LIR_float.
class LInsN64
{
private:
friend class LIns;
@ -750,7 +749,7 @@ namespace nanojit
LInsC* LIns::toLInsC() const { return (LInsC* )( uintptr_t(this+1) - sizeof(LInsC ) ); }
LInsP* LIns::toLInsP() const { return (LInsP* )( uintptr_t(this+1) - sizeof(LInsP ) ); }
LInsI* LIns::toLInsI() const { return (LInsI* )( uintptr_t(this+1) - sizeof(LInsI ) ); }
LInsI64* LIns::toLInsI64() const { return (LInsI64*)( uintptr_t(this+1) - sizeof(LInsI64) ); }
LInsN64* LIns::toLInsN64() const { return (LInsN64*)( uintptr_t(this+1) - sizeof(LInsN64) ); }
LInsJtbl*LIns::toLInsJtbl()const { return (LInsJtbl*)(uintptr_t(this+1) - sizeof(LInsJtbl)); }
void LIns::initLInsOp0(LOpcode opcode) {
@ -821,12 +820,12 @@ namespace nanojit
toLInsI()->imm32 = imm32;
NanoAssert(isLInsI());
}
void LIns::initLInsI64(LOpcode opcode, int64_t imm64) {
void LIns::initLInsN64(LOpcode opcode, int64_t imm64) {
markAsClear();
lastWord.opcode = opcode;
toLInsI64()->imm64_0 = int32_t(imm64);
toLInsI64()->imm64_1 = int32_t(imm64 >> 32);
NanoAssert(isLInsI64());
toLInsN64()->imm64_0 = int32_t(imm64);
toLInsN64()->imm64_1 = int32_t(imm64 >> 32);
NanoAssert(isLInsN64());
}
void LIns::initLInsJtbl(LIns* index, uint32_t size, LIns** table) {
markAsClear();
@ -898,11 +897,11 @@ namespace nanojit
inline int32_t LIns::imm32() const { NanoAssert(isconst()); return toLInsI()->imm32; }
inline int32_t LIns::imm64_0() const { NanoAssert(isconstq()); return toLInsI64()->imm64_0; }
inline int32_t LIns::imm64_1() const { NanoAssert(isconstq()); return toLInsI64()->imm64_1; }
inline int32_t LIns::imm64_0() const { NanoAssert(isconstq()); return toLInsN64()->imm64_0; }
inline int32_t LIns::imm64_1() const { NanoAssert(isconstq()); return toLInsN64()->imm64_1; }
uint64_t LIns::imm64() const {
NanoAssert(isconstq());
return (uint64_t(toLInsI64()->imm64_1) << 32) | uint32_t(toLInsI64()->imm64_0);
return (uint64_t(toLInsN64()->imm64_1) << 32) | uint32_t(toLInsN64()->imm64_0);
}
double LIns::imm64f() const {
union {
@ -1006,8 +1005,8 @@ namespace nanojit
virtual LInsp insLoad(LOpcode op, LIns* base, int32_t d) {
return out->insLoad(op, base, d);
}
virtual LInsp insStorei(LIns* value, LIns* base, int32_t d) {
return out->insStorei(value, base, d);
virtual LInsp insStore(LOpcode op, LIns* value, LIns* base, int32_t d) {
return out->insStore(op, value, base, d);
}
// args[] is in reverse order, ie. args[0] holds the rightmost arg.
virtual LInsp insCall(const CallInfo *call, LInsp args[]) {
@ -1039,6 +1038,8 @@ namespace nanojit
// Sign or zero extend integers to native integers. On 32-bit this is a no-op.
LIns* ins_i2p(LIns* intIns);
LIns* ins_u2p(LIns* uintIns);
// choose LIR_sti or LIR_stqi based on size of value
LIns* insStorei(LIns* value, LIns* base, int32_t d);
};
@ -1098,10 +1099,10 @@ namespace nanojit
char* name;
};
HashMap<LInsp, Entry*> names;
LabelMap *labels;
void formatImm(int32_t c, char *buf);
public:
public:
LabelMap *labels;
LirNameMap(Allocator& alloc, LabelMap *lm)
: alloc(alloc),
lircounts(alloc),
@ -1192,8 +1193,8 @@ namespace nanojit
LIns* insLoad(LOpcode v, LInsp base, int32_t disp) {
return add(out->insLoad(v, base, disp));
}
LIns* insStorei(LInsp v, LInsp b, int32_t d) {
return add(out->insStorei(v, b, d));
LIns* insStore(LOpcode op, LInsp v, LInsp b, int32_t d) {
return add(out->insStore(op, v, b, d));
}
LIns* insAlloc(int32_t size) {
return add(out->insAlloc(size));
@ -1374,7 +1375,7 @@ namespace nanojit
// LirWriter interface
LInsp insLoad(LOpcode op, LInsp base, int32_t disp);
LInsp insStorei(LInsp o1, LInsp o2, int32_t disp);
LInsp insStore(LOpcode op, LInsp o1, LInsp o2, int32_t disp);
LInsp ins0(LOpcode op);
LInsp ins1(LOpcode op, LInsp o1);
LInsp ins2(LOpcode op, LInsp o1, LInsp o2);
@ -1483,7 +1484,7 @@ namespace nanojit
LInsp ins0(LOpcode);
LInsp insLoad(LOpcode, LInsp base, int32_t disp);
LInsp insStorei(LInsp v, LInsp b, int32_t d);
LInsp insStore(LOpcode op, LInsp v, LInsp b, int32_t d);
LInsp insCall(const CallInfo *call, LInsp args[]);
};

View File

@ -42,16 +42,16 @@
* Definitions of LIR opcodes. If you need to allocate an opcode, look
* for a name beginning with "__" and claim it.
*
* Includers must define OPDEF and OPD64 macros of the following forms:
* Includers must define an OPDEF macro of the following form:
*
* #define OPDEF(op,val,repkind) ...
* #define OPD64(op,val,repkind) ...
* #define OPDEF(op, val, repKind, retType) ...
*
* Selected arguments can then be used within the macro expansions.
* - op Bytecode name, token-pasted after "LIR_" to form an LOpcode.
* - val Bytecode value, which is the LOpcode enumerator value.
* - repkind Indicates how the instruction is represented in memory; XYZ
* - repKind Indicates how the instruction is represented in memory; XYZ
* corresponds to LInsXYZ and LRK_XYZ.
* - retType Type (LTy) of the value returned by the instruction.
*
* This file is best viewed with 128 columns:
12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678
@ -63,36 +63,36 @@
/* op val name operands */
/* special operations (must be 0..N) */
OPDEF(start, 0, Op0) // start of a fragment
OPDEF(regfence, 1, Op0) // register fence, no register allocation is allowed across this meta instruction
OPDEF(skip, 2, Sk) // holds blobs ("payloads") of data; also links pages
OPDEF(__3, 3, None)
OPDEF(__4, 4, None)
OPDEF(__5, 5, None)
OPDEF(__6, 6, None)
OPDEF(start, 0, Op0, Void) // start of a fragment
OPDEF(regfence, 1, Op0, Void) // register fence, no register allocation is allowed across this meta instruction
OPDEF(skip, 2, Sk, Void) // used to link code chunks
/* non-pure operations */
OPDEF(iaddp, 7, Op2) // integer addition for temporary pointer calculations (32bit only)
OPDEF(iparam, 8, P) // load a parameter (32bit register or stk location)
OPDEF(__9, 9, None)
OPDEF(ld, 10, Ld) // 32-bit load
OPDEF(ialloc, 11, I) // alloc some stack space (value is 32bit address)
OPDEF(sti, 12, Sti) // 32-bit store
OPDEF(ret, 13, Op1) // return a word-sized value
OPDEF(live, 14, Op1) // extend live range of reference
OPDEF(flive, 15, Op1) // extend live range of a floating point value reference
OPDEF(icall, 16, C) // subroutine call returning a 32-bit value
OPDEF(__17, 17, None)
OPDEF(ldsb, 3, Ld, I32) // 8-bit integer load, sign-extend to 32-bit
OPDEF(ldss, 4, Ld, I32) // 16-bit integer load, sign-extend to 32-bit
OPDEF(ldzb, 5, Ld, I32) // 8-bit integer load, zero extend to 32-bit
OPDEF(ldzs, 6, Ld, I32) // 16-bit integer load, zero extend to 32-bit
OPDEF(iaddp, 7, Op2, I32) // integer addition for temporary pointer calculations (32bit only)
OPDEF(iparam, 8, P, I32) // load a parameter (32bit register or stk location)
OPDEF(stb, 9, Sti, Void) // 8-bit integer store
OPDEF(ld, 10, Ld, I32) // 32-bit integer load
OPDEF(ialloc, 11, I, I32) // alloc some stack space (value is 32bit address)
OPDEF(sti, 12, Sti, Void) // 32-bit integer store
OPDEF(ret, 13, Op1, Void) // return a word-sized value
OPDEF(live, 14, Op1, Void) // extend live range of reference
OPDEF(flive, 15, Op1, Void) // extend live range of a floating point value reference
OPDEF(icall, 16, C, I32) // subroutine call returning a 32-bit value
OPDEF(sts, 17, Sti, Void) // 16-bit integer store
/* guards */
OPDEF(x, 18, Op2) // exit always
OPDEF(x, 18, Op2, Void) // exit always
/* branches */
OPDEF(j, 19, Op2) // jump always
OPDEF(jt, 20, Op2) // jump if true
OPDEF(jf, 21, Op2) // jump if false
OPDEF(label, 22, Op0) // a jump target (no machine code is emitted for this)
OPDEF(jtbl, 23, Jtbl) // jump to address in table
OPDEF(j, 19, Op2, Void) // jump always
OPDEF(jt, 20, Op2, Void) // jump if true
OPDEF(jf, 21, Op2, Void) // jump if false
OPDEF(label, 22, Op0, Void) // a jump target (no machine code is emitted for this)
OPDEF(jtbl, 23, Jtbl, Void) // jump to address in table
/* operators */
@ -101,158 +101,154 @@ OPDEF(jtbl, 23, Jtbl) // jump to address in table
* common-subexpression-elimination detection code.
*/
OPDEF(int, 24, I) // constant 32-bit integer
OPDEF(cmov, 25, Op3) // conditional move
OPDEF(callh, 26, Op1) // get the high 32 bits of a call returning a 64-bit value in two 32bit registers
OPDEF(int, 24, I, I32) // constant 32-bit integer
OPDEF(cmov, 25, Op3, I32) // conditional move
OPDEF(callh, 26, Op1, I32) // get the high 32 bits of a call returning a 64-bit value in two 32bit registers
/*
* feq though fge must only be used on float arguments. They return integers.
* For all except feq, (op ^ 1) is the op which flips the
* left and right sides of the comparison, so (lt ^ 1) == gt, or the operator
* "<" is xored with 1 to get ">". Similarly, (op ^ 3) is the complement of
* op, so (lt ^ 1) == ge, or the complement of the operator "<" is ">=" xored
* with 3. NB: These opcodes must remain continuous so that comparison-opcode
* detection works correctly.
*/
OPDEF(feq, 27, Op2) // floating-point equality
OPDEF(flt, 28, Op2) // floating-point less-than
OPDEF(fgt, 29, Op2) // floating-point greater-than
OPDEF(fle, 30, Op2) // floating-point less-than-or-equal
OPDEF(fge, 31, Op2) // floating-point greater-than-or-equal
// feq though fge must only be used on float arguments. They return integers.
// For all except feq, (op ^ 1) is the op which flips the
// left and right sides of the comparison, so (lt ^ 1) == gt, or the operator
// "<" is xored with 1 to get ">". Similarly, (op ^ 3) is the complement of
// op, so (lt ^ 1) == ge, or the complement of the operator "<" is ">=" xored
// with 3. NB: These opcodes must remain continuous so that comparison-opcode
// detection works correctly.
OPDEF(feq, 27, Op2, I32) // floating-point equality
OPDEF(flt, 28, Op2, I32) // floating-point less-than
OPDEF(fgt, 29, Op2, I32) // floating-point greater-than
OPDEF(fle, 30, Op2, I32) // floating-point less-than-or-equal
OPDEF(fge, 31, Op2, I32) // floating-point greater-than-or-equal
OPDEF(ldcb, 32, Ld) // non-volatile 8-bit load
OPDEF(ldcs, 33, Ld) // non-volatile 16-bit load
OPDEF(ldc, 34, Ld) // non-volatile 32-bit load
OPDEF(ldcb, 32, Ld, I32) // non-volatile 8-bit integer load, zero-extended to 32-bit
OPDEF(ldcs, 33, Ld, I32) // non-volatile 16-bit integer load, zero-extended to 32-bit
OPDEF(ldc, 34, Ld, I32) // non-volatile 32-bit integer load, zero-extended to 32-bit
OPDEF(neg, 35, Op1) // integer negation
OPDEF(add, 36, Op2) // integer addition
OPDEF(sub, 37, Op2) // integer subtraction
OPDEF(mul, 38, Op2) // integer multiplication
OPDEF(div, 39, Op2) // integer division
OPDEF(mod, 40, Op1) // hack: get the modulus from a LIR_div result, for x86 only
OPDEF(neg, 35, Op1, I32) // integer negation
OPDEF(add, 36, Op2, I32) // integer addition
OPDEF(sub, 37, Op2, I32) // integer subtraction
OPDEF(mul, 38, Op2, I32) // integer multiplication
OPDEF(div, 39, Op2, I32) // integer division
OPDEF(mod, 40, Op1, I32) // hack: get the modulus from a LIR_div result, for x86 only
OPDEF(and, 41, Op2) // 32-bit bitwise AND
OPDEF(or, 42, Op2) // 32-bit bitwise OR
OPDEF(xor, 43, Op2) // 32-bit bitwise XOR
OPDEF(not, 44, Op1) // 32-bit bitwise NOT
OPDEF(lsh, 45, Op2) // 32-bit left shift
OPDEF(rsh, 46, Op2) // 32-bit right shift with sign-extend (>>)
OPDEF(ush, 47, Op2) // 32-bit unsigned right shift (>>>)
OPDEF(and, 41, Op2, I32) // 32-bit bitwise AND
OPDEF(or, 42, Op2, I32) // 32-bit bitwise OR
OPDEF(xor, 43, Op2, I32) // 32-bit bitwise XOR
OPDEF(not, 44, Op1, I32) // 32-bit bitwise NOT
OPDEF(lsh, 45, Op2, I32) // 32-bit left shift
OPDEF(rsh, 46, Op2, I32) // 32-bit right shift with sign-extend (>>)
OPDEF(ush, 47, Op2, I32) // 32-bit unsigned right shift (>>>)
// conditional guards, op^1 to complement. Only things that are
// Conditional guards, op^1 to complement. Only things that are
// isCond() can be passed to these.
OPDEF(xt, 48, Op2) // exit if true (0x30 0011 0000)
OPDEF(xf, 49, Op2) // exit if false (0x31 0011 0001)
OPDEF(xt, 48, Op2, Void) // exit if true (0x30 0011 0000)
OPDEF(xf, 49, Op2, Void) // exit if false (0x31 0011 0001)
OPDEF(qlo, 50, Op1) // get the low 32 bits of a 64-bit value
OPDEF(qhi, 51, Op1) // get the high 32 bits of a 64-bit value
OPDEF(qlo, 50, Op1, I32) // get the low 32 bits of a 64-bit value
OPDEF(qhi, 51, Op1, I32) // get the high 32 bits of a 64-bit value
OPDEF(__52, 52, None)
OPDEF(__53, 53, None)
OPDEF(ldcsb, 52, Ld, Void) // non-volatile 8-bit integer load, sign-extednded to 32-bit
OPDEF(ldcss, 53, Ld, Void) // non-volatile 16-bit integer load, sign-extednded to 32-bit
// This must be right before LIR_eq, so (op&~LIR64 - LIR_ov) can be indexed
// into a convenient table.
OPDEF(ov, 54, Op1) // test for overflow; value must have just been computed
OPDEF(ov, 54, Op1, I32) // test for overflow; value must have just been computed
// Integer (32 bit) relational operators. (op ^ 1) is the op which flips the
// Integer (32-bit) relational operators. (op ^ 1) is the op which flips the
// left and right sides of the comparison, so (lt ^ 1) == gt, or the operator
// "<" is xored with 1 to get ">". Similarly, (op ^ 3) is the complement of
// op, so (lt ^ 1) == ge, or the complement of the operator "<" is ">=" xored
// with 3. 'u' prefix indicates the unsigned integer variant.
// NB: These opcodes must remain continuous so that comparison-opcode detection
// works correctly.
OPDEF(eq, 55, Op2) // integer equality
OPDEF(lt, 56, Op2) // signed integer less-than (0x38 0011 1000)
OPDEF(gt, 57, Op2) // signed integer greater-than (0x39 0011 1001)
OPDEF(le, 58, Op2) // signed integer less-than-or-equal (0x3A 0011 1010)
OPDEF(ge, 59, Op2) // signed integer greater-than-or-equal (0x3B 0011 1011)
OPDEF(ult, 60, Op2) // unsigned integer less-than (0x3C 0011 1100)
OPDEF(ugt, 61, Op2) // unsigned integer greater-than (0x3D 0011 1101)
OPDEF(ule, 62, Op2) // unsigned integer less-than-or-equal (0x3E 0011 1110)
OPDEF(uge, 63, Op2) // unsigned integer greater-than-or-equal (0x3F 0011 1111)
OPDEF(eq, 55, Op2, I32) // integer equality
OPDEF(lt, 56, Op2, I32) // signed integer less-than (0x38 0011 1000)
OPDEF(gt, 57, Op2, I32) // signed integer greater-than (0x39 0011 1001)
OPDEF(le, 58, Op2, I32) // signed integer less-than-or-equal (0x3A 0011 1010)
OPDEF(ge, 59, Op2, I32) // signed integer greater-than-or-equal (0x3B 0011 1011)
OPDEF(ult, 60, Op2, I32) // unsigned integer less-than (0x3C 0011 1100)
OPDEF(ugt, 61, Op2, I32) // unsigned integer greater-than (0x3D 0011 1101)
OPDEF(ule, 62, Op2, I32) // unsigned integer less-than-or-equal (0x3E 0011 1110)
OPDEF(uge, 63, Op2, I32) // unsigned integer greater-than-or-equal (0x3F 0011 1111)
OPD64(__0_64, 0, None)
OPDEF(__64, 64, None, Void)
OPD64(file, 1, Op1) // source filename for debug symbols
OPD64(line, 2, Op1) // source line number for debug symbols
OPD64(xbarrier, 3, Op2) // memory barrier; doesn't exit, but flushes all values to the stack
OPD64(xtbl, 4, Op2) // exit via indirect jump
OPDEF(file, 65, Op1, Void) // source filename for debug symbols
OPDEF(line, 66, Op1, Void) // source line number for debug symbols
OPDEF(xbarrier, 67, Op2, Void) // memory barrier; doesn't exit, but flushes all values to the stack
OPDEF(xtbl, 68, Op2, Void) // exit via indirect jump
OPD64(__5_64, 5, None)
OPD64(__6_64, 6, None)
OPD64(qaddp, LIR_iaddp, Op2) // integer addition for temp pointer calculations (64bit only)
OPD64(qparam, LIR_iparam, P) // load a parameter (64bit register or stk location)
OPD64(__9_64, 9, None)
OPDEF(__69, 69, None, Void)
OPDEF(__70, 70, None, Void)
OPDEF(qaddp, 71, Op2, I64) // integer addition for temp pointer calculations (64bit only)
OPDEF(qparam, 72, P, I64) // load a parameter (64bit register or stk location)
OPDEF(__73, 73, None, Void)
OPD64(ldq, LIR_ld, Ld) // 64-bit (quad) load
OPDEF(ldq, 74, Ld, I64) // 64-bit (quad) load
OPD64(qalloc, LIR_ialloc, I) // allocate some stack space (value is 64bit address)
OPDEF(qalloc, 75, I, I64) // allocate some stack space (value is 64bit address)
OPD64(stqi, LIR_sti, Sti) // 64-bit (quad) store
OPD64(fret, LIR_ret, Op1)
OPDEF(stqi, 76, Sti, Void) // 64-bit (quad) store
OPDEF(fret, 77, Op1, Void)
OPD64(__14_64, 14, None)
OPD64(__15_64, 15, None)
OPDEF(st32f, 78, Sti, Void) // store 64-bit float as a 32-bit float (dropping precision)
OPDEF(ld32f, 79, Ld, F64) // load 32-bit float and widen to 64-bit float
OPD64(fcall, LIR_icall, C) // subroutine call returning 64-bit (quad) double value
OPD64(qcall, 17, C) // subroutine call returning 64-bit (quad) integer value
OPDEF(fcall, 80, C, F64) // subroutine call returning 64-bit (quad) double value
OPDEF(qcall, 81, C, I64) // subroutine call returning 64-bit (quad) integer value
OPD64(__18_64, 18, None)
OPD64(__19_64, 19, None)
OPD64(__20_64, 20, None)
OPD64(__21_64, 21, None)
OPD64(__22_64, 22, None)
OPD64(__23_64, 23, None)
OPDEF(__82, 82, None, Void)
OPDEF(__83, 83, None, Void)
OPDEF(__84, 84, None, Void)
OPDEF(__85, 85, None, Void)
OPDEF(__86, 86, None, Void)
OPDEF(__87, 87, None, Void)
// We strip off the 64 bit flag and compare that the opcode is between LIR_int
// and LIR_uge to decide whether we can CSE the opcode. All opcodes below
// this marker are subject to CSE.
// All opcodes below this marker are subject to CSE.
OPD64(quad, LIR_int, I64) // 64-bit (quad) constant value
OPD64(qcmov, LIR_cmov, Op3) // 64-bit conditional move
OPDEF(quad, 88, N64, I64) // 64-bit (quad) constant value
OPDEF(qcmov, 89, Op3, I64) // 64-bit conditional move
OPD64(i2q, 26, Op1) // sign-extend i32 to i64
OPD64(u2q, 27, Op1) // zero-extend u32 to u64
OPD64(i2f, 28, Op1) // convert a signed 32-bit integer to a float
OPD64(u2f, 29, Op1) // convert an unsigned 32-bit integer to a float
OPDEF(i2q, 90, Op1, I64) // sign-extend i32 to i64
OPDEF(u2q, 91, Op1, I64) // zero-extend u32 to u64
OPDEF(i2f, 92, Op1, F64) // convert a signed 32-bit integer to a float
OPDEF(u2f, 93, Op1, F64) // convert an unsigned 32-bit integer to a float
OPD64(__30_64, 30, None)
OPD64(__31_64, 31, None)
OPD64(__32_64, 32, None)
OPD64(__33_64, 33, None)
OPDEF(__94, 94, None, Void)
OPDEF(__95, 95, None, Void)
OPDEF(__96, 96, None, Void)
OPDEF(__97, 97, None, Void)
OPD64(ldqc, LIR_ldc, Ld) // non-volatile 64-bit load
OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit load
OPD64(fneg, LIR_neg, Op1) // floating-point negation
OPD64(fadd, LIR_add, Op2) // floating-point addition
OPD64(fsub, LIR_sub, Op2) // floating-point subtraction
OPD64(fmul, LIR_mul, Op2) // floating-point multiplication
OPD64(fdiv, LIR_div, Op2) // floating-point division
OPD64(fmod, LIR_mod, Op2) // floating-point modulus(?)
OPDEF(fneg, 99, Op1, F64) // floating-point negation
OPDEF(fadd, 100, Op2, F64) // floating-point addition
OPDEF(fsub, 101, Op2, F64) // floating-point subtraction
OPDEF(fmul, 102, Op2, F64) // floating-point multiplication
OPDEF(fdiv, 103, Op2, F64) // floating-point division
OPDEF(fmod, 104, Op2, F64) // floating-point modulus(?)
OPD64(qiand, 41, Op2) // 64-bit bitwise AND
OPD64(qior, 42, Op2) // 64-bit bitwise OR
OPD64(qxor, 43, Op2) // 64-bit bitwise XOR
OPD64(__44_64, 44, None)
OPD64(qilsh, 45, Op2) // 64-bit left shift
OPD64(qirsh, 46, Op2) // 64-bit signed right shift
OPD64(qursh, 47, Op2) // 64-bit unsigned right shift
OPD64(qiadd, 48, Op2) // 64-bit bitwise ADD
OPDEF(qiand, 105, Op2, I64) // 64-bit bitwise AND
OPDEF(qior, 106, Op2, I64) // 64-bit bitwise OR
OPDEF(qxor, 107, Op2, I64) // 64-bit bitwise XOR
OPDEF(__108, 108, None, Void)
OPDEF(qilsh, 109, Op2, I64) // 64-bit left shift
OPDEF(qirsh, 110, Op2, I64) // 64-bit signed right shift
OPDEF(qursh, 111, Op2, I64) // 64-bit unsigned right shift
OPDEF(qiadd, 112, Op2, I64) // 64-bit bitwise ADD
OPD64(__49_64, 49, None)
OPD64(qjoin, 50, Op2) // join two 32-bit values (1st arg is low bits, 2nd is high)
OPD64(__51_64, 51, None)
OPD64(__52_64, 52, None)
OPD64(__53_64, 53, None)
OPD64(float, 54, I64)
OPDEF(ldc32f, 113, Ld, F64) // non-volatile load 32-bit float and widen to 64-bit float
OPDEF(qjoin, 114, Op2, F64) // join two 32-bit values (1st arg is low bits, 2nd is high)
OPDEF(__115, 115, None, Void)
OPDEF(__116, 116, None, Void)
OPDEF(__117, 117, None, Void)
OPDEF(float, 118, N64, F64)
// 64bit equivalents for integer comparisons
OPD64(qeq, LIR_eq, Op2) // integer equality
OPD64(qlt, LIR_lt, Op2) // signed integer less-than (0x78 0111 1000)
OPD64(qgt, LIR_gt, Op2) // signed integer greater-than (0x79 0111 1001)
OPD64(qle, LIR_le, Op2) // signed integer less-than-or-equal (0x7A 0111 1010)
OPD64(qge, LIR_ge, Op2) // signed integer greater-than-or-equal (0x7B 0111 1011)
OPD64(qult, LIR_ult, Op2) // unsigned integer less-than (0x7C 0111 1100)
OPD64(qugt, LIR_ugt, Op2) // unsigned integer greater-than (0x7D 0111 1101)
OPD64(qule, LIR_ule, Op2) // unsigned integer less-than-or-equal (0x7E 0111 1110)
OPD64(quge, LIR_uge, Op2) // unsigned integer greater-than-or-equal (0x7F 0111 1111)
// Integer (64-bit) relational operators.
// NB: These opcodes must remain continuous so that comparison-opcode detection
// works correctly.
OPDEF(qeq, 119, Op2, I32) // integer equality
OPDEF(qlt, 120, Op2, I32) // signed integer less-than (0x78 0111 1000)
OPDEF(qgt, 121, Op2, I32) // signed integer greater-than (0x79 0111 1001)
OPDEF(qle, 122, Op2, I32) // signed integer less-than-or-equal (0x7A 0111 1010)
OPDEF(qge, 123, Op2, I32) // signed integer greater-than-or-equal (0x7B 0111 1011)
OPDEF(qult, 124, Op2, I32) // unsigned integer less-than (0x7C 0111 1100)
OPDEF(qugt, 125, Op2, I32) // unsigned integer greater-than (0x7D 0111 1101)
OPDEF(qule, 126, Op2, I32) // unsigned integer less-than-or-equal (0x7E 0111 1110)
OPDEF(quge, 127, Op2, I32) // unsigned integer greater-than-or-equal (0x7F 0111 1111)

View File

@ -61,17 +61,11 @@ namespace nanojit {
: unsigned
#endif
{
// flags; upper bits reserved
LIR64 = 0x40, // result is double or quad
#define OPDEF(op, number, repkind) \
#define OPDEF(op, number, repKind, retType) \
LIR_##op = (number),
#define OPD64(op, number, repkind) \
LIR_##op = ((number) | LIR64),
#include "LIRopcode.tbl"
LIR_sentinel,
#undef OPDEF
#undef OPD64
#ifdef NANOJIT_64BIT
# define PTR_SIZE(a,b) b
@ -125,6 +119,10 @@ namespace nanojit {
# define NJ_JTBL_SUPPORTED 0
#endif
#ifndef NJ_EXPANDED_LOADSTORE_SUPPORTED
# define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
#endif
namespace nanojit {
inline Register nextreg(Register r) {
@ -183,19 +181,14 @@ namespace nanojit {
#elif defined(NJ_VERBOSE)
// Used for printing native instructions. Like Assembler::outputf(),
// but only outputs if LC_Assembly is set. Also prepends the output
// with the address of the current native instruction if
// LC_NoCodeAddrs is not set.
// with the address of the current native instruction.
#define asm_output(...) do { \
counter_increment(native); \
if (_logc->lcbits & LC_Assembly) { \
outline[0]='\0'; \
if (outputAddr) \
VMPI_sprintf(outline, "%010lx ", (unsigned long)_nIns); \
else \
VMPI_memset(outline, (int)' ', 10+3); \
VMPI_sprintf(outline, "%010lx ", (unsigned long)_nIns); \
sprintf(&outline[13], ##__VA_ARGS__); \
output(); \
outputAddr=(_logc->lcbits & LC_NoCodeAddrs) ? false : true; \
} \
} while (0) /* no semi */
#define gpn(r) regNames[(r)]

View File

@ -824,6 +824,37 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
void
Assembler::asm_call(LInsp ins)
{
if (ARM_VFP && ins->isop(LIR_fcall)) {
/* Because ARM actually returns the result in (R0,R1), and not in a
* floating point register, the code to move the result into a correct
* register is below. We do nothing here.
*
* The reason being that if we did something here, the final code
* sequence we'd get would be something like:
* MOV {R0-R3},params [from below]
* BL function [from below]
* MOV {R0-R3},spilled data [from evictScratchRegs()]
* MOV Dx,{R0,R1} [from here]
* which is clearly broken.
*
* This is not a problem for non-floating point calls, because the
* restoring of spilled data into R0 is done via a call to
* prepResultReg(R0) in the other branch of this if-then-else,
* meaning that evictScratchRegs() will not modify R0. However,
* prepResultReg is not aware of the concept of using a register pair
* (R0,R1) for the result of a single operation, so it can only be
* used here with the ultimate VFP register, and not R0/R1, which
* potentially allows for R0/R1 to get corrupted as described.
*/
} else {
prepResultReg(ins, rmask(retRegs[0]));
}
// Do this after we've handled the call result, so we don't
// force the call result to be spilled unnecessarily.
evictScratchRegs();
const CallInfo* call = ins->callInfo();
ArgSize sizes[MAXARGS];
uint32_t argc = call->get_sizes(sizes);
@ -835,8 +866,8 @@ Assembler::asm_call(LInsp ins)
// If we're using VFP, and the return type is a double, it'll come back in
// R0/R1. We need to either place it in the result fp reg, or store it.
// See comments in asm_prep_fcall() for more details as to why this is
// necessary here for floating point calls, but not for integer calls.
// See comments above for more details as to why this is necessary here
// for floating point calls, but not for integer calls.
if (ARM_VFP && ins->isUsed()) {
// Determine the size (and type) of the instruction result.
ArgSize rsize = (ArgSize)(call->_argtypes & ARGSIZE_MASK_ANY);
@ -1172,8 +1203,21 @@ Assembler::asm_qjoin(LIns *ins)
}
void
Assembler::asm_store32(LIns *value, int dr, LIns *base)
Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
{
switch (op) {
case LIR_sti:
// handled by mainline code below for now
break;
case LIR_stb:
case LIR_sts:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
}
Register ra, rb;
if (base->isop(LIR_alloc)) {
rb = FP;
@ -1268,6 +1312,20 @@ Assembler::asm_load64(LInsp ins)
{
//asm_output("<<< load64");
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
NanoAssert(ins->isQuad());
LIns* base = ins->oprnd1();
@ -1310,10 +1368,22 @@ Assembler::asm_load64(LInsp ins)
}
void
Assembler::asm_store64(LInsp value, int dr, LInsp base)
Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
{
//asm_output("<<< store64 (dr: %d)", dr);
switch (op) {
case LIR_stqi:
// handled by mainline code below for now
break;
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
if (ARM_VFP) {
Register rb = findRegFor(base, GpRegs);
@ -2028,33 +2098,6 @@ Assembler::asm_fcmp(LInsp ins)
FCMPD(ra, rb, e_bit);
}
Register
Assembler::asm_prep_fcall(LInsp)
{
/* Because ARM actually returns the result in (R0,R1), and not in a
* floating point register, the code to move the result into a correct
* register is at the beginning of asm_call(). This function does
* nothing.
*
* The reason being that if this function did something, the final code
* sequence we'd get would be something like:
* MOV {R0-R3},params [from asm_call()]
* BL function [from asm_call()]
* MOV {R0-R3},spilled data [from evictScratchRegs()]
* MOV Dx,{R0,R1} [from this function]
* which is clearly broken.
*
* This is not a problem for non-floating point calls, because the
* restoring of spilled data into R0 is done via a call to prepResultReg(R0)
* at the same point in the sequence as this function is called, meaning that
* evictScratchRegs() will not modify R0. However, prepResultReg is not aware
* of the concept of using a register pair (R0,R1) for the result of a single
* operation, so it can only be used here with the ultimate VFP register, and
* not R0/R1, which potentially allows for R0/R1 to get corrupted as described.
*/
return UnknownReg;
}
/* Call this with targ set to 0 if the target is not yet known and the branch
* will be patched up later.
*/
@ -2440,7 +2483,7 @@ Assembler::asm_neg_not(LInsp ins)
}
void
Assembler::asm_ld(LInsp ins)
Assembler::asm_load32(LInsp ins)
{
LOpcode op = ins->opcode();
LIns* base = ins->oprnd1();
@ -2449,25 +2492,31 @@ Assembler::asm_ld(LInsp ins)
Register rr = prepResultReg(ins, GpRegs);
Register ra = getBaseReg(op, base, d, GpRegs);
// these will always be 4-byte aligned
if (op == LIR_ld || op == LIR_ldc) {
LDR(rr, ra, d);
return;
switch(op) {
case LIR_ldzb:
case LIR_ldcb:
LDRB(rr, ra, d);
return;
case LIR_ldzs:
case LIR_ldcs:
// these are expected to be 2 or 4-byte aligned
LDRH(rr, ra, d);
return;
case LIR_ld:
case LIR_ldc:
// these are expected to be 4-byte aligned
LDR(rr, ra, d);
return;
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
// these will be 2 or 4-byte aligned
if (op == LIR_ldcs) {
LDRH(rr, ra, d);
return;
}
// aaand this is just any byte.
if (op == LIR_ldcb) {
LDRB(rr, ra, d);
return;
}
NanoAssertMsg(0, "Unsupported instruction in asm_ld");
}
void

View File

@ -79,6 +79,7 @@ namespace nanojit
#define NJ_MAX_PARAMETERS 16
#define NJ_ALIGN_STACK 8
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
#define NJ_CONSTANT_POOLS
const int NJ_MAX_CPOOL_OFFSET = 4096;
@ -799,22 +800,6 @@ enum {
} \
} while(0)
#define STMIA(_b, _mask) do { \
underrunProtect(4); \
NanoAssert(IsGpReg(_b)); \
NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \
*(--_nIns) = (NIns)(COND_AL | (0x8A<<20) | ((_b)<<16) | (_mask)&0xFF); \
asm_output("stmia %s!,{0x%x}", gpn(_b), _mask); \
} while (0)
#define LDMIA(_b, _mask) do { \
underrunProtect(4); \
NanoAssert(IsGpReg(_b)); \
NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \
*(--_nIns) = (NIns)(COND_AL | (0x8B<<20) | ((_b)<<16) | (_mask)&0xFF); \
asm_output("ldmia %s!,{0x%x}", gpn(_b), (_mask)); \
} while (0)
/*
* VFP
*/

View File

@ -140,30 +140,69 @@ namespace nanojit
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
void Assembler::asm_ld(LIns *ins) {
void Assembler::asm_load32(LIns *ins) {
LIns* base = ins->oprnd1();
int d = ins->disp();
Register rr = prepResultReg(ins, GpRegs);
Register ra = getBaseReg(ins->opcode(), base, d, GpRegs);
#if !PEDANTIC
if (isS16(d)) {
if (ins->isop(LIR_ldcb)) {
LBZ(rr, d, ra);
} else {
LWZ(rr, d, ra);
}
return;
switch(ins->opcode()) {
case LIR_ldzb:
case LIR_ldcb:
if (isS16(d)) {
LBZ(rr, d, ra);
} else {
LBZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
return;
case LIR_ldzs:
case LIR_ldcs:
// these are expected to be 2 or 4-byte aligned
if (isS16(d)) {
LHZ(rr, d, ra);
} else {
LHZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
return;
case LIR_ld:
case LIR_ldc:
// these are expected to be 4-byte aligned
if (isS16(d)) {
LWZ(rr, d, ra);
} else {
LWZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
return;
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
#endif
// general case
underrunProtect(12);
LWZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
void Assembler::asm_store32(LIns *value, int32_t dr, LIns *base) {
void Assembler::asm_store32(LOpcode op, LIns *value, int32_t dr, LIns *base) {
switch (op) {
case LIR_sti:
// handled by mainline code below for now
break;
case LIR_stb:
case LIR_sts:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
}
Register rs = findRegFor(value, GpRegs);
Register ra = value == base ? rs : getBaseReg(LIR_sti, base, dr, GpRegs & ~rmask(rs));
@ -180,6 +219,21 @@ namespace nanojit
}
void Assembler::asm_load64(LIns *ins) {
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
LIns* base = ins->oprnd1();
#ifdef NANOJIT_64BIT
Register rr = ins->getReg();
@ -256,8 +310,21 @@ namespace nanojit
asm_li32(r, int32_t(imm>>32)); // r[0:31] = imm[32:63]
}
void Assembler::asm_store64(LIns *value, int32_t dr, LIns *base) {
void Assembler::asm_store64(LOpcode op, LIns *value, int32_t dr, LIns *base) {
NanoAssert(value->isQuad());
switch (op) {
case LIR_stqi:
// handled by mainline code below for now
break;
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
Register ra = getBaseReg(LIR_stqi, base, dr, GpRegs);
#if !PEDANTIC && !defined NANOJIT_64BIT
@ -595,10 +662,6 @@ namespace nanojit
}
}
Register Assembler::asm_prep_fcall(LIns *ins) {
return prepResultReg(ins, rmask(F1));
}
void Assembler::asm_int(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
asm_li(rr, ins->imm32());
@ -632,6 +695,14 @@ namespace nanojit
}
void Assembler::asm_call(LIns *ins) {
Register retReg = ( ins->isop(LIR_fcall) ? F1 : retRegs[0] );
prepResultReg(ins, rmask(retReg));
// Do this after we've handled the call result, so we don't
// force the call result to be spilled unnecessarily.
evictScratchRegs();
const CallInfo* call = ins->callInfo();
ArgSize sizes[MAXARGS];
uint32_t argc = call->get_sizes(sizes);

View File

@ -57,6 +57,7 @@ namespace nanojit
#define NJ_MAX_STACK_ENTRY 256
#define NJ_ALIGN_STACK 16
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
enum ConditionRegister {
CR0 = 0,
@ -191,10 +192,13 @@ namespace nanojit
PPC_fneg = 0xFC000050, // floating negate
PPC_fsub = 0xFC000028, // floating subtract (double precision)
PPC_lbz = 0x88000000, // load byte and zero
PPC_lbzx = 0x7C0000AE, // load byte and zero indexed
PPC_ld = 0xE8000000, // load doubleword
PPC_ldx = 0x7C00002A, // load doubleword indexed
PPC_lfd = 0xC8000000, // load floating point double
PPC_lfdx = 0x7C0004AE, // load floating-point double indexed
PPC_lhz = 0xA0000000, // load halfword and zero
PPC_lhzx = 0x7C00022E, // load halfword and zero indexed
PPC_lwz = 0x80000000, // load word and zero
PPC_lwzx = 0x7C00002E, // load word and zero indexed
PPC_mfcr = 0x7C000026, // move from condition register
@ -448,8 +452,11 @@ namespace nanojit
"%s %s,%s,%s", #op, gpn(rs), gpn(ra), gpn(rb))
#define LBZ(r, d, b) MEMd(lbz, r, d, b)
#define LHZ(r, d, b) MEMd(lhz, r, d, b)
#define LWZ(r, d, b) MEMd(lwz, r, d, b)
#define LD(r, d, b) MEMd(ld, r, d, b)
#define LBZX(r, a, b) MEMx(lbzx, r, a, b)
#define LHZX(r, a, b) MEMx(lhzx, r, a, b)
#define LWZX(r, a, b) MEMx(lwzx, r, a, b)
#define LDX(r, a, b) MEMx(ldx, r, a, b)

View File

@ -153,6 +153,14 @@ namespace nanojit
void Assembler::asm_call(LInsp ins)
{
Register retReg = ( ins->isop(LIR_fcall) ? F0 : retRegs[0] );
prepResultReg(ins, rmask(retReg));
// Do this after we've handled the call result, so we don't
// force the call result to be spilled unnecessarily.
evictScratchRegs();
const CallInfo* call = ins->callInfo();
underrunProtect(8);
@ -299,8 +307,21 @@ namespace nanojit
}
}
void Assembler::asm_store32(LIns *value, int dr, LIns *base)
void Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
{
switch (op) {
case LIR_sti:
// handled by mainline code below for now
break;
case LIR_stb:
case LIR_sts:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
}
underrunProtect(20);
if (value->isconst())
{
@ -344,6 +365,20 @@ namespace nanojit
void Assembler::asm_load64(LInsp ins)
{
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
underrunProtect(72);
LIns* base = ins->oprnd1();
int db = ins->disp();
@ -373,8 +408,20 @@ namespace nanojit
}
}
void Assembler::asm_store64(LInsp value, int dr, LInsp base)
void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
{
switch (op) {
case LIR_stqi:
// handled by mainline code below for now
break;
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
underrunProtect(48);
if (value->isconstq())
{
@ -716,7 +763,7 @@ namespace nanojit
ORI(ra, 0, rr);
}
void Assembler::asm_ld(LInsp ins)
void Assembler::asm_load32(LInsp ins)
{
underrunProtect(12);
LOpcode op = ins->opcode();
@ -724,12 +771,28 @@ namespace nanojit
int d = ins->disp();
Register rr = prepResultReg(ins, GpRegs);
Register ra = getBaseReg(ins->opcode(), base, d, GpRegs);
if (op == LIR_ldcb) {
LDUB32(ra, d, rr);
} else if (op == LIR_ldcs) {
LDUH32(ra, d, rr);
} else {
LDSW32(ra, d, rr);
switch(op) {
case LIR_ldzb:
case LIR_ldcb:
LDUB32(ra, d, rr);
break;
case LIR_ldzs:
case LIR_ldcs:
LDUH32(ra, d, rr);
break;
case LIR_ld:
case LIR_ldc:
LDSW32(ra, d, rr);
break;
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
}
@ -879,11 +942,6 @@ namespace nanojit
LDDF32(FP, d, rr);
}
Register Assembler::asm_prep_fcall(LInsp ins)
{
return prepResultReg(ins, rmask(F0));
}
void Assembler::asm_u2f(LInsp ins)
{
underrunProtect(72);

View File

@ -71,8 +71,10 @@ namespace nanojit
const int LARGEST_UNDERRUN_PROT = 32; // largest value passed to underrunProtect
#define NJ_MAX_STACK_ENTRY 256
#define NJ_MAX_PARAMETERS 1
#define NJ_MAX_STACK_ENTRY 256
#define NJ_MAX_PARAMETERS 1
#define NJ_JTBL_SUPPORTED 0
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
const int NJ_ALIGN_STACK = 16;

View File

@ -99,6 +99,11 @@ namespace nanojit
"ah", "ch", "dh", "bh"
};
const char *gpRegNames16[] = {
"ax", "cx", "dx", "bx", "spx", "bpx", "six", "dix",
"r8x", "r9x", "r10x", "r11x", "r12x", "r13x", "r14x", "r15x"
};
#ifdef _DEBUG
#define TODO(x) todo(#x)
static void todo(const char *s) {
@ -240,6 +245,11 @@ namespace nanojit
emit(rexprb(mod_rr(op, r, b), r, b));
}
// disp32 modrm8 form, when the disp fits in the instruction (opcode is 1-3 bytes)
void Assembler::emitrm8(uint64_t op, Register r, int32_t d, Register b) {
emit(rexrb8(mod_disp32(op, r, b, d), r, b));
}
// disp32 modrm form, when the disp fits in the instruction (opcode is 1-3 bytes)
void Assembler::emitrm(uint64_t op, Register r, int32_t d, Register b) {
emit(rexrb(mod_disp32(op, r, b, d), r, b));
@ -343,6 +353,7 @@ namespace nanojit
}
#define RB(r) gpRegNames8[(r)]
#define RS(r) gpRegNames16[(r)]
#define RBhi(r) gpRegNames8hi[(r)]
#define RL(r) gpRegNames32[(r)]
#define RQ(r) gpn(r)
@ -431,13 +442,16 @@ namespace nanojit
// XORPS is a 4x32f vector operation, we use it instead of the more obvious
// XORPD because it's one byte shorter. This is ok because it's only used for
// zeroing an XMM register; hence the single argument.
void Assembler::XORPS( R r) { emitprr(X64_xorps, r,r); asm_output("xorps %s, %s", RQ(r),RQ(r)); }
// Also note that (unlike most SSE2 instructions) XORPS does not have a prefix, thus emitrr() should be used.
void Assembler::XORPS( R r) { emitrr(X64_xorps, r,r); asm_output("xorps %s, %s", RQ(r),RQ(r)); }
void Assembler::DIVSD( R l, R r) { emitprr(X64_divsd, l,r); asm_output("divsd %s, %s", RQ(l),RQ(r)); }
void Assembler::MULSD( R l, R r) { emitprr(X64_mulsd, l,r); asm_output("mulsd %s, %s", RQ(l),RQ(r)); }
void Assembler::ADDSD( R l, R r) { emitprr(X64_addsd, l,r); asm_output("addsd %s, %s", RQ(l),RQ(r)); }
void Assembler::SUBSD( R l, R r) { emitprr(X64_subsd, l,r); asm_output("subsd %s, %s", RQ(l),RQ(r)); }
void Assembler::CVTSQ2SD(R l, R r) { emitprr(X64_cvtsq2sd,l,r); asm_output("cvtsq2sd %s, %s",RQ(l),RQ(r)); }
void Assembler::CVTSI2SD(R l, R r) { emitprr(X64_cvtsi2sd,l,r); asm_output("cvtsi2sd %s, %s",RQ(l),RL(r)); }
void Assembler::CVTSS2SD(R l, R r) { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
void Assembler::CVTSD2SS(R l, R r) { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
void Assembler::UCOMISD( R l, R r) { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
void Assembler::MOVQRX( R l, R r) { emitprr(X64_movqrx, r,l); asm_output("movq %s, %s", RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
void Assembler::MOVQXR( R l, R r) { emitprr(X64_movqxr, l,r); asm_output("movq %s, %s", RQ(l),RQ(r)); }
@ -482,14 +496,21 @@ namespace nanojit
void Assembler::LEAQRM(R r1, I d, R r2) { emitrm(X64_leaqrm,r1,d,r2); asm_output("leaq %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVLRM(R r1, I d, R r2) { emitrm(X64_movlrm,r1,d,r2); asm_output("movl %s, %d(%s)",RL(r1),d,RQ(r2)); }
void Assembler::MOVQRM(R r1, I d, R r2) { emitrm(X64_movqrm,r1,d,r2); asm_output("movq %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVBMR(R r1, I d, R r2) { emitrm8(X64_movbmr,r1,d,r2); asm_output("movb %d(%s), %s",d,RQ(r1),RB(r2)); }
void Assembler::MOVSMR(R r1, I d, R r2) { emitprm(X64_movsmr,r1,d,r2); asm_output("movs %d(%s), %s",d,RQ(r1),RS(r2)); }
void Assembler::MOVLMR(R r1, I d, R r2) { emitrm(X64_movlmr,r1,d,r2); asm_output("movl %d(%s), %s",d,RQ(r1),RL(r2)); }
void Assembler::MOVQMR(R r1, I d, R r2) { emitrm(X64_movqmr,r1,d,r2); asm_output("movq %d(%s), %s",d,RQ(r1),RQ(r2)); }
void Assembler::MOVZX8M( R r1, I d, R r2) { emitrm_wide(X64_movzx8m, r1,d,r2); asm_output("movzxb %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVZX16M(R r1, I d, R r2) { emitrm_wide(X64_movzx16m,r1,d,r2); asm_output("movzxs %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSX8M( R r1, I d, R r2) { emitrm_wide(X64_movsx8m, r1,d,r2); asm_output("movsxb %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSX16M(R r1, I d, R r2) { emitrm_wide(X64_movsx16m,r1,d,r2); asm_output("movsxs %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSDRM(R r1, I d, R r2) { emitprm(X64_movsdrm,r1,d,r2); asm_output("movsd %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSDMR(R r1, I d, R r2) { emitprm(X64_movsdmr,r1,d,r2); asm_output("movsd %d(%s), %s",d,RQ(r1),RQ(r2)); }
void Assembler::MOVSSRM(R r1, I d, R r2) { emitprm(X64_movssrm,r1,d,r2); asm_output("movss %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSSMR(R r1, I d, R r2) { emitprm(X64_movssmr,r1,d,r2); asm_output("movss %d(%s), %s",d,RQ(r1),RQ(r2)); }
void Assembler::JMP8( S n, NIns* t) { emit_target8(n, X64_jmp8,t); asm_output("jmp %p", t); }
@ -764,10 +785,10 @@ namespace nanojit
void Assembler::asm_arith(LIns *ins) {
Register rr, ra, rb;
switch (ins->opcode() & ~LIR64) {
case LIR_lsh:
case LIR_rsh:
case LIR_ush:
switch (ins->opcode()) {
case LIR_lsh: case LIR_qilsh:
case LIR_rsh: case LIR_qirsh:
case LIR_ush: case LIR_qursh:
asm_shift(ins);
return;
case LIR_mod:
@ -832,6 +853,14 @@ namespace nanojit
}
void Assembler::asm_call(LIns *ins) {
Register retReg = ( ins->isop(LIR_fcall) ? XMM0 : retRegs[0] );
prepResultReg(ins, rmask(retReg));
// Do this after we've handled the call result, so we don't
// force the call result to be spilled unnecessarily.
evictScratchRegs();
const CallInfo *call = ins->callInfo();
ArgSize sizes[MAXARGS];
int argc = call->get_sizes(sizes);
@ -991,32 +1020,32 @@ namespace nanojit
LOpcode condop = cond->opcode();
if (ins->opcode() == LIR_cmov) {
switch (condop & ~LIR64) {
case LIR_ov: CMOVNO( rr, rf); break;
case LIR_eq: CMOVNE( rr, rf); break;
case LIR_lt: CMOVNL( rr, rf); break;
case LIR_gt: CMOVNG( rr, rf); break;
case LIR_le: CMOVNLE(rr, rf); break;
case LIR_ge: CMOVNGE(rr, rf); break;
case LIR_ult: CMOVNB( rr, rf); break;
case LIR_ugt: CMOVNA( rr, rf); break;
case LIR_ule: CMOVNBE(rr, rf); break;
case LIR_uge: CMOVNAE(rr, rf); break;
default: NanoAssert(0); break;
switch (condop) {
case LIR_ov: CMOVNO( rr, rf); break;
case LIR_eq: case LIR_qeq: CMOVNE( rr, rf); break;
case LIR_lt: case LIR_qlt: CMOVNL( rr, rf); break;
case LIR_gt: case LIR_qgt: CMOVNG( rr, rf); break;
case LIR_le: case LIR_qle: CMOVNLE(rr, rf); break;
case LIR_ge: case LIR_qge: CMOVNGE(rr, rf); break;
case LIR_ult: case LIR_qult: CMOVNB( rr, rf); break;
case LIR_ugt: case LIR_qugt: CMOVNA( rr, rf); break;
case LIR_ule: case LIR_qule: CMOVNBE(rr, rf); break;
case LIR_uge: case LIR_quge: CMOVNAE(rr, rf); break;
default: NanoAssert(0); break;
}
} else {
switch (condop & ~LIR64) {
case LIR_ov: CMOVQNO( rr, rf); break;
case LIR_eq: CMOVQNE( rr, rf); break;
case LIR_lt: CMOVQNL( rr, rf); break;
case LIR_gt: CMOVQNG( rr, rf); break;
case LIR_le: CMOVQNLE(rr, rf); break;
case LIR_ge: CMOVQNGE(rr, rf); break;
case LIR_ult: CMOVQNB( rr, rf); break;
case LIR_ugt: CMOVQNA( rr, rf); break;
case LIR_ule: CMOVQNBE(rr, rf); break;
case LIR_uge: CMOVQNAE(rr, rf); break;
default: NanoAssert(0); break;
switch (condop) {
case LIR_ov: CMOVQNO( rr, rf); break;
case LIR_eq: case LIR_qeq: CMOVQNE( rr, rf); break;
case LIR_lt: case LIR_qlt: CMOVQNL( rr, rf); break;
case LIR_gt: case LIR_qgt: CMOVQNG( rr, rf); break;
case LIR_le: case LIR_qle: CMOVQNLE(rr, rf); break;
case LIR_ge: case LIR_qge: CMOVQNGE(rr, rf); break;
case LIR_ult: case LIR_qult: CMOVQNB( rr, rf); break;
case LIR_ugt: case LIR_qugt: CMOVQNA( rr, rf); break;
case LIR_ule: case LIR_qule: CMOVQNBE(rr, rf); break;
case LIR_uge: case LIR_quge: CMOVQNAE(rr, rf); break;
default: NanoAssert(0); break;
}
}
/*const Register rt =*/ findSpecificRegFor(iftrue, rr);
@ -1024,72 +1053,71 @@ namespace nanojit
}
NIns* Assembler::asm_branch(bool onFalse, LIns *cond, NIns *target) {
NanoAssert(cond->isCond());
LOpcode condop = cond->opcode();
if (condop >= LIR_feq && condop <= LIR_fge)
return asm_fbranch(onFalse, cond, target);
// we must ensure there's room for the instr before calculating
// the offset. and the offset, determines the opcode (8bit or 32bit)
NanoAssert((condop & ~LIR64) >= LIR_ov);
NanoAssert((condop & ~LIR64) <= LIR_uge);
// We must ensure there's room for the instr before calculating
// the offset. And the offset determines the opcode (8bit or 32bit).
if (target && isTargetWithinS8(target)) {
if (onFalse) {
switch (condop & ~LIR64) {
case LIR_ov: JNO8( 8, target); break;
case LIR_eq: JNE8( 8, target); break;
case LIR_lt: JNL8( 8, target); break;
case LIR_gt: JNG8( 8, target); break;
case LIR_le: JNLE8(8, target); break;
case LIR_ge: JNGE8(8, target); break;
case LIR_ult: JNB8( 8, target); break;
case LIR_ugt: JNA8( 8, target); break;
case LIR_ule: JNBE8(8, target); break;
case LIR_uge: JNAE8(8, target); break;
default: NanoAssert(0); break;
switch (condop) {
case LIR_ov: JNO8( 8, target); break;
case LIR_eq: case LIR_qeq: JNE8( 8, target); break;
case LIR_lt: case LIR_qlt: JNL8( 8, target); break;
case LIR_gt: case LIR_qgt: JNG8( 8, target); break;
case LIR_le: case LIR_qle: JNLE8(8, target); break;
case LIR_ge: case LIR_qge: JNGE8(8, target); break;
case LIR_ult: case LIR_qult: JNB8( 8, target); break;
case LIR_ugt: case LIR_qugt: JNA8( 8, target); break;
case LIR_ule: case LIR_qule: JNBE8(8, target); break;
case LIR_uge: case LIR_quge: JNAE8(8, target); break;
default: NanoAssert(0); break;
}
} else {
switch (condop & ~LIR64) {
case LIR_ov: JO8( 8, target); break;
case LIR_eq: JE8( 8, target); break;
case LIR_lt: JL8( 8, target); break;
case LIR_gt: JG8( 8, target); break;
case LIR_le: JLE8(8, target); break;
case LIR_ge: JGE8(8, target); break;
case LIR_ult: JB8( 8, target); break;
case LIR_ugt: JA8( 8, target); break;
case LIR_ule: JBE8(8, target); break;
case LIR_uge: JAE8(8, target); break;
default: NanoAssert(0); break;
switch (condop) {
case LIR_ov: JO8( 8, target); break;
case LIR_eq: case LIR_qeq: JE8( 8, target); break;
case LIR_lt: case LIR_qlt: JL8( 8, target); break;
case LIR_gt: case LIR_qgt: JG8( 8, target); break;
case LIR_le: case LIR_qle: JLE8(8, target); break;
case LIR_ge: case LIR_qge: JGE8(8, target); break;
case LIR_ult: case LIR_qult: JB8( 8, target); break;
case LIR_ugt: case LIR_qugt: JA8( 8, target); break;
case LIR_ule: case LIR_qule: JBE8(8, target); break;
case LIR_uge: case LIR_quge: JAE8(8, target); break;
default: NanoAssert(0); break;
}
}
} else {
if (onFalse) {
switch (condop & ~LIR64) {
case LIR_ov: JNO( 8, target); break;
case LIR_eq: JNE( 8, target); break;
case LIR_lt: JNL( 8, target); break;
case LIR_gt: JNG( 8, target); break;
case LIR_le: JNLE(8, target); break;
case LIR_ge: JNGE(8, target); break;
case LIR_ult: JNB( 8, target); break;
case LIR_ugt: JNA( 8, target); break;
case LIR_ule: JNBE(8, target); break;
case LIR_uge: JNAE(8, target); break;
default: NanoAssert(0); break;
switch (condop) {
case LIR_ov: JNO( 8, target); break;
case LIR_eq: case LIR_qeq: JNE( 8, target); break;
case LIR_lt: case LIR_qlt: JNL( 8, target); break;
case LIR_gt: case LIR_qgt: JNG( 8, target); break;
case LIR_le: case LIR_qle: JNLE(8, target); break;
case LIR_ge: case LIR_qge: JNGE(8, target); break;
case LIR_ult: case LIR_qult: JNB( 8, target); break;
case LIR_ugt: case LIR_qugt: JNA( 8, target); break;
case LIR_ule: case LIR_qule: JNBE(8, target); break;
case LIR_uge: case LIR_quge: JNAE(8, target); break;
default: NanoAssert(0); break;
}
} else {
switch (condop & ~LIR64) {
case LIR_ov: JO( 8, target); break;
case LIR_eq: JE( 8, target); break;
case LIR_lt: JL( 8, target); break;
case LIR_gt: JG( 8, target); break;
case LIR_le: JLE(8, target); break;
case LIR_ge: JGE(8, target); break;
case LIR_ult: JB( 8, target); break;
case LIR_ugt: JA( 8, target); break;
case LIR_ule: JBE(8, target); break;
case LIR_uge: JAE(8, target); break;
default: NanoAssert(0); break;
switch (condop) {
case LIR_ov: JO( 8, target); break;
case LIR_eq: case LIR_qeq: JE( 8, target); break;
case LIR_lt: case LIR_qlt: JL( 8, target); break;
case LIR_gt: case LIR_qgt: JG( 8, target); break;
case LIR_le: case LIR_qle: JLE(8, target); break;
case LIR_ge: case LIR_qge: JGE(8, target); break;
case LIR_ult: case LIR_qult: JB( 8, target); break;
case LIR_ugt: case LIR_qugt: JA( 8, target); break;
case LIR_ule: case LIR_qule: JBE(8, target); break;
case LIR_uge: case LIR_quge: JAE(8, target); break;
default: NanoAssert(0); break;
}
}
}
@ -1117,25 +1145,29 @@ namespace nanojit
}
LOpcode condop = cond->opcode();
if (condop & LIR64)
if (LIR_qeq <= condop && condop <= LIR_quge) {
CMPQR(ra, rb);
else
} else {
NanoAssert(LIR_eq <= condop && condop <= LIR_uge);
CMPLR(ra, rb);
}
}
void Assembler::asm_cmp_imm(LIns *cond) {
LOpcode condop = cond->opcode();
LIns *a = cond->oprnd1();
LIns *b = cond->oprnd2();
Register ra = findRegFor(a, GpRegs);
int32_t imm = getImm32(b);
if (isS8(imm)) {
if (cond->opcode() & LIR64)
if (LIR_qeq <= condop && condop <= LIR_quge) {
if (isS8(imm))
CMPQR8(ra, imm);
else
CMPLR8(ra, imm);
} else {
if (cond->opcode() & LIR64)
else
CMPQRI(ra, imm);
} else {
NanoAssert(LIR_eq <= condop && condop <= LIR_uge);
if (isS8(imm))
CMPLR8(ra, imm);
else
CMPLRI(ra, imm);
}
@ -1327,61 +1359,103 @@ namespace nanojit
// xmm <- xmm: use movaps. movsd r,r causes partial register stall
MOVAPSR(d, s);
} else {
NanoAssert(IsFpReg(d) && !IsFpReg(s));
// xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
MOVQXR(d, s);
}
}
void Assembler::regalloc_load(LIns *ins, Register &rr, int32_t &dr, Register &rb) {
void Assembler::regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &dr, Register &rb) {
dr = ins->disp();
LIns *base = ins->oprnd1();
rb = getBaseReg(ins->opcode(), base, dr, BaseRegs);
if (ins->isUnusedOrHasUnknownReg()) {
// use a gpr in case we're copying a non-double
rr = prepResultReg(ins, GpRegs & ~rmask(rb));
if (ins->isUnusedOrHasUnknownReg() || !(allow & rmask(ins->getReg()))) {
rr = prepResultReg(ins, allow & ~rmask(rb));
} else {
// keep already assigned register
rr = ins->getReg();
NanoAssert(allow & rmask(rr));
freeRsrcOf(ins, false);
}
}
void Assembler::asm_load64(LIns *ins) {
Register rr, rb;
int32_t dr;
regalloc_load(ins, rr, dr, rb);
if (IsGpReg(rr)) {
// general 64bit load, 32bit const displacement
MOVQRM(rr, dr, rb);
} else {
// load 64bits into XMM. don't know if double or int64, assume double.
MOVSDRM(rr, dr, rb);
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
regalloc_load(ins, GpRegs, rr, dr, rb);
if (IsGpReg(rr)) {
// general 64bit load, 32bit const displacement
MOVQRM(rr, dr, rb);
} else {
NanoAssert(IsFpReg(rr));
// load 64bits into XMM. don't know if double or int64, assume double.
MOVSDRM(rr, dr, rb);
}
break;
case LIR_ld32f:
case LIR_ldc32f:
regalloc_load(ins, FpRegs, rr, dr, rb);
NanoAssert(IsFpReg(rr));
CVTSS2SD(rr, rr);
MOVSSRM(rr, dr, rb);
break;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
break;
}
}
void Assembler::asm_ld(LIns *ins) {
void Assembler::asm_load32(LIns *ins) {
NanoAssert(!ins->isQuad());
Register r, b;
int32_t d;
regalloc_load(ins, r, d, b);
regalloc_load(ins, GpRegs, r, d, b);
LOpcode op = ins->opcode();
switch (op) {
case LIR_ldcb: MOVZX8M( r, d, b); break;
case LIR_ldcs: MOVZX16M(r, d, b); break;
default: MOVLRM( r, d, b); break;
switch(op) {
case LIR_ldzb:
case LIR_ldcb:
MOVZX8M( r, d, b);
break;
case LIR_ldzs:
case LIR_ldcs:
MOVZX16M(r, d, b);
break;
case LIR_ld:
case LIR_ldc:
MOVLRM( r, d, b);
break;
case LIR_ldsb:
case LIR_ldcsb:
MOVSX8M( r, d, b);
break;
case LIR_ldss:
case LIR_ldcss:
MOVSX16M( r, d, b);
break;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
break;
}
}
void Assembler::asm_store64(LIns *value, int d, LIns *base) {
void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
NanoAssert(value->isQuad());
Register b = getBaseReg(LIR_stqi, base, d, BaseRegs);
Register r;
// if we have to choose a register, use a GPR, but not the base reg
Register r;
if (value->isUnusedOrHasUnknownReg()) {
RegisterMask allow;
// If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
// and the value is almost certainly going to operated on as FP later anyway.
// XXX: isFloat doesn't cover float/fmod! see bug 520208.
if (value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
allow = FpRegs;
} else {
allow = GpRegs;
@ -1391,23 +1465,76 @@ namespace nanojit
r = value->getReg();
}
if (IsGpReg(r)) {
// gpr store
MOVQMR(r, d, b);
}
else {
// xmm store
MOVSDMR(r, d, b);
switch (op) {
case LIR_stqi:
{
if (IsGpReg(r)) {
// gpr store
MOVQMR(r, d, b);
}
else {
// xmm store
MOVSDMR(r, d, b);
}
break;
}
case LIR_st32f:
{
// need a scratch FPR reg
Register t = registerAllocTmp(FpRegs & ~rmask(r));
// store
MOVSSMR(t, d, b);
// cvt to single-precision
if (IsGpReg(r))
{
CVTSD2SS(t, t);
MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
}
else
{
NanoAssert(IsFpReg(r));
CVTSD2SS(t, r);
}
XORPS(t); // break dependency chains
break;
}
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
break;
}
}
void Assembler::asm_store32(LIns *value, int d, LIns *base) {
void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
// quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
// for single-byte stores with REX prefix
const RegisterMask SrcRegs =
(op == LIR_stb) ?
(GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
GpRegs;
NanoAssert(!value->isQuad());
Register b = getBaseReg(LIR_sti, base, d, BaseRegs);
Register r = findRegFor(value, GpRegs & ~rmask(b));
Register r = findRegFor(value, SrcRegs & ~rmask(b));
switch (op) {
case LIR_stb:
MOVBMR(r, d, b);
break;
case LIR_sts:
MOVSMR(r, d, b);
break;
case LIR_sti:
MOVLMR(r, d, b);
break;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
break;
}
// store 32bits to 64bit addr. use rex so we can use all 16 regs
MOVLMR(r, d, b);
}
// generate a 64bit constant, must not affect condition codes!
@ -1459,10 +1586,6 @@ namespace nanojit
TODO(asm_qjoin);
}
Register Assembler::asm_prep_fcall(LIns *ins) {
return prepResultReg(ins, rmask(XMM0));
}
void Assembler::asm_param(LIns *ins) {
uint32_t a = ins->paramArg();
uint32_t kind = ins->paramKind();
@ -1570,7 +1693,7 @@ namespace nanojit
SUBQRI(RSP, amt);
}
verbose_only( outputAddr=true; asm_output("[patch entry]"); )
verbose_only( asm_output("[patch entry]"); )
NIns *patchEntry = _nIns;
MR(FP, RSP); // Establish our own FP.
PUSHR(FP); // Save caller's FP.

View File

@ -61,6 +61,7 @@ namespace nanojit
#define NJ_MAX_STACK_ENTRY 256
#define NJ_ALIGN_STACK 16
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
enum Register {
RAX = 0, // 1st int return, # of sse varargs
@ -190,6 +191,8 @@ namespace nanojit
X64_cmpqr8 = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
X64_divsd = 0xC05E0F40F2000005LL, // divide scalar double r /= b
X64_mulsd = 0xC0590F40F2000005LL, // multiply scalar double r *= b
X64_addsd = 0xC0580F40F2000005LL, // add scalar double r += b
@ -229,6 +232,8 @@ namespace nanojit
X64_learm = 0x00000000808D4007LL, // 32bit load effective addr reg <- disp32+base
X64_learip = 0x00000000058D4807LL, // 64bit RIP-relative lea. reg <- disp32+rip (modrm = 00rrr101 = 05)
X64_movlr = 0xC08B400000000003LL, // 32bit mov r <- b
X64_movbmr = 0x0000000080884007LL, // 8bit store r -> [b+d32]
X64_movsmr = 0x8089406600000004LL, // 16bit store r -> [b+d32]
X64_movlmr = 0x0000000080894007LL, // 32bit store r -> [b+d32]
X64_movlrm = 0x00000000808B4007LL, // 32bit load r <- [b+d32]
X64_movqmr = 0x0000000080894807LL, // 64bit store gpr -> [b+d32]
@ -244,10 +249,14 @@ namespace nanojit
X64_movsdrr = 0xC0100F40F2000005LL, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
X64_movsdrm = 0x80100F40F2000005LL, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
X64_movsdmr = 0x80110F40F2000005LL, // 64bit store xmm-r -> [b+d32]
X64_movssrm = 0x80100F40F3000005LL, // 32bit load xmm-r <- [b+d32] (upper 96 cleared)
X64_movssmr = 0x80110F40F3000005LL, // 32bit store xmm-r -> [b+d32]
X64_movsxdr = 0xC063480000000003LL, // sign extend i32 to i64 r = (int64)(int32) b
X64_movzx8 = 0xC0B60F4000000004LL, // zero extend i8 to i64 r = (uint64)(uint8) b
X64_movzx8m = 0x80B60F4000000004LL, // zero extend i8 load to i32 r <- [b+d32]
X64_movzx16m= 0x80B70F4000000004LL, // zero extend i16 load to i32 r <- [b+d32]
X64_movsx8m = 0x80BE0F4000000004LL, // sign extend i8 load to i32 r <- [b+d32]
X64_movsx16m= 0x80BF0F4000000004LL, // sign extend i16 load to i32 r <- [b+d32]
X64_neg = 0xD8F7400000000003LL, // 32bit two's compliment b = -b
X64_nop1 = 0x9000000000000001LL, // one byte NOP
X64_nop2 = 0x9066000000000002LL, // two byte NOP
@ -358,6 +367,7 @@ namespace nanojit
void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
void emitprr(uint64_t op, Register r, Register b);\
void emitrm8(uint64_t op, Register r, int32_t d, Register b);\
void emitrm(uint64_t op, Register r, int32_t d, Register b);\
void emitrm_wide(uint64_t op, Register r, int32_t d, Register b);\
uint64_t emit_disp32(uint64_t op, int32_t d);\
@ -379,7 +389,7 @@ namespace nanojit
void asm_arith_imm(LIns*);\
void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
void regalloc_load(LIns *ins, Register &rr, int32_t &d, Register &rb);\
void regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
void dis(NIns *p, int bytes);\
void asm_cmp(LIns*);\
void asm_cmp_imm(LIns*);\
@ -459,6 +469,8 @@ namespace nanojit
void SUBSD(Register l, Register r);\
void CVTSQ2SD(Register l, Register r);\
void CVTSI2SD(Register l, Register r);\
void CVTSS2SD(Register l, Register r);\
void CVTSD2SS(Register l, Register r);\
void UCOMISD(Register l, Register r);\
void MOVQRX(Register l, Register r);\
void MOVQXR(Register l, Register r);\
@ -494,12 +506,18 @@ namespace nanojit
void LEAQRM(Register r1, int d, Register r2);\
void MOVLRM(Register r1, int d, Register r2);\
void MOVQRM(Register r1, int d, Register r2);\
void MOVBMR(Register r1, int d, Register r2);\
void MOVSMR(Register r1, int d, Register r2);\
void MOVLMR(Register r1, int d, Register r2);\
void MOVQMR(Register r1, int d, Register r2);\
void MOVZX8M(Register r1, int d, Register r2);\
void MOVZX16M(Register r1, int d, Register r2);\
void MOVSX8M(Register r1, int d, Register r2);\
void MOVSX16M(Register r1, int d, Register r2);\
void MOVSDRM(Register r1, int d, Register r2);\
void MOVSDMR(Register r1, int d, Register r2);\
void MOVSSMR(Register r1, int d, Register r2);\
void MOVSSRM(Register r1, int d, Register r2);\
void JMP8(size_t n, NIns* t);\
void JMP32(size_t n, NIns* t);\
void JMPX(Register indexreg, NIns** table);\

View File

@ -70,11 +70,45 @@ namespace nanojit
0 /* ABI_CDECL */
};
static bool CheckForSSE2()
{
int features = 0;
#if defined _MSC_VER
__asm
{
pushad
mov eax, 1
cpuid
mov features, edx
popad
}
#elif defined __GNUC__
asm("xchg %%esi, %%ebx\n" /* we can't clobber ebx on gcc (PIC register) */
"mov $0x01, %%eax\n"
"cpuid\n"
"mov %%edx, %0\n"
"xchg %%esi, %%ebx\n"
: "=m" (features)
: /* We have no inputs */
: "%eax", "%esi", "%ecx", "%edx"
);
#elif defined __SUNPRO_C || defined __SUNPRO_CC
asm("push %%ebx\n"
"mov $0x01, %%eax\n"
"cpuid\n"
"pop %%ebx\n"
: "=d" (features)
: /* We have no inputs */
: "%eax", "%ecx"
);
#endif
return (features & (1<<26)) != 0;
}
void Assembler::nInit(AvmCore* core)
{
(void) core;
VMPI_getDate();
config.sse2 = config.sse2 && CheckForSSE2();
}
void Assembler::nBeginAssembly() {
@ -102,7 +136,7 @@ namespace nanojit
SUBi(SP, amt);
}
verbose_only( outputAddr=true; asm_output("[frag entry]"); )
verbose_only( asm_output("[frag entry]"); )
NIns *fragEntry = _nIns;
MR(FP, SP); // Establish our own FP.
PUSHr(FP); // Save caller's FP.
@ -154,7 +188,7 @@ namespace nanojit
MR(SP,FP);
// return value is GuardRecord*
LDi(EAX, int(lr));
asm_int(EAX, int(lr), /*canClobberCCs*/true);
}
NIns *Assembler::genEpilogue()
@ -167,6 +201,14 @@ namespace nanojit
void Assembler::asm_call(LInsp ins)
{
Register retReg = ( ins->isop(LIR_fcall) ? FST0 : retRegs[0] );
prepResultReg(ins, rmask(retReg));
// Do this after we've handled the call result, so we don't
// force the call result to be spilled unnecessarily.
evictScratchRegs();
const CallInfo* call = ins->callInfo();
// must be signed, not unsigned
uint32_t iargs = call->count_iargs();
@ -407,7 +449,7 @@ namespace nanojit
if (!i->getArIndex()) {
i->markAsClear();
}
LDi(r, i->imm32());
asm_int(r, i->imm32(), /*canClobberCCs*/false);
}
else if (i->isop(LIR_param) && i->paramKind() == 0 &&
(arg = i->paramArg()) >= (abi_regcount = max_abi_regs[_thisfrag->lirbuf->abi])) {
@ -430,31 +472,62 @@ namespace nanojit
}
}
void Assembler::asm_store32(LIns *value, int dr, LIns *base)
void Assembler::asm_store32(LOpcode op, LIns* value, int dr, LIns* base)
{
if (value->isconst())
{
Register rb = getBaseReg(LIR_sti, base, dr, GpRegs);
int c = value->imm32();
STi(rb, dr, c);
switch(op) {
case LIR_stb:
ST8i(rb, dr, c);
break;
case LIR_sts:
ST16i(rb, dr, c);
break;
case LIR_sti:
STi(rb, dr, c);
break;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
break;
}
}
else
{
// quirk of x86-32: reg must be a/b/c/d for single-byte stores
const RegisterMask SrcRegs = (op == LIR_stb) ?
(1<<EAX | 1<<ECX | 1<<EDX | 1<<EBX) :
GpRegs;
// make sure what is in a register
Register ra, rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
ra = findRegFor(value, GpRegs);
ra = findRegFor(value, SrcRegs);
} else if (base->isconst()) {
// absolute address
dr += base->imm32();
ra = findRegFor(value, GpRegs);
ra = findRegFor(value, SrcRegs);
rb = UnknownReg;
} else {
findRegFor2(GpRegs, value, ra, base, rb);
findRegFor2(SrcRegs, value, ra, base, rb);
}
switch(op) {
case LIR_stb:
ST8(rb, dr, ra);
break;
case LIR_sts:
ST16(rb, dr, ra);
break;
case LIR_sti:
ST(rb, dr, ra);
break;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
break;
}
ST(rb, dr, ra);
}
}
@ -494,10 +567,25 @@ namespace nanojit
{
freeRsrcOf(ins, false);
Register rb = getBaseReg(ins->opcode(), base, db, GpRegs);
SSE_LDQ(rr, db, rb);
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
SSE_LDQ(rr, db, rb);
break;
case LIR_ld32f:
case LIR_ldc32f:
SSE_CVTSS2SD(rr, rr);
SSE_LDSS(rr, db, rb);
SSE_XORPDr(rr,rr);
break;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
break;
}
}
else
{
int dr = disp(ins);
Register rb;
if (base->isop(LIR_alloc)) {
@ -508,23 +596,79 @@ namespace nanojit
}
ins->setReg(UnknownReg);
// don't use an fpu reg to simply load & store the value.
if (dr)
asm_mmq(FP, dr, rb, db);
freeRsrcOf(ins, false);
if (isKnownReg(rr))
{
NanoAssert(rmask(rr)&FpRegs);
_allocator.retire(rr);
FLDQ(db, rb);
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
// don't use an fpu reg to simply load & store the value.
if (dr)
asm_mmq(FP, dr, rb, db);
freeRsrcOf(ins, false);
if (isKnownReg(rr))
{
NanoAssert(rmask(rr)&x87Regs);
_allocator.retire(rr);
FLDQ(db, rb);
}
break;
case LIR_ld32f:
case LIR_ldc32f:
freeRsrcOf(ins, false);
if (isKnownReg(rr))
{
NanoAssert(rmask(rr)&x87Regs);
_allocator.retire(rr);
FLD32(db, rb);
}
else
{
// need to use fpu to expand 32->64
NanoAssert(dr != 0);
FSTPQ(dr, FP);
FLD32(db, rb);
}
break;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
break;
}
}
}
void Assembler::asm_store64(LInsp value, int dr, LInsp base)
void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
{
if (op == LIR_st32f)
{
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
// if value already in a reg, use that, otherwise
// try to get it into XMM regs before FPU regs.
bool pop = value->isUnusedOrHasUnknownReg();
Register rv = findRegFor(value, config.sse2 ? XmmRegs : FpRegs);
if (rmask(rv) & XmmRegs)
{
// need a scratch reg
Register t = registerAllocTmp(XmmRegs);
// cvt to single-precision and store
SSE_STSS(dr, rb, t);
SSE_CVTSD2SS(t, rv);
SSE_XORPDr(t,t); // zero dest to ensure no dependency stalls
}
else
{
FST32(pop?1:0, dr, rb);
}
return;
}
NanoAssertMsg(op == LIR_stqi, "asm_store64 should never receive this LIR opcode");
if (value->isconstq())
{
// if a constant 64-bit value just store it now rather than
@ -592,9 +736,12 @@ namespace nanojit
? findRegFor(value, config.sse2 ? XmmRegs : FpRegs)
: value->getReg() );
if (rmask(rv) & XmmRegs) {
if (rmask(rv) & XmmRegs)
{
SSE_STQ(dr, rb, rv);
} else {
}
else
{
FSTQ(pop?1:0, dr, rb);
}
}
@ -1010,7 +1157,7 @@ namespace nanojit
MR(rr,ra);
}
void Assembler::asm_ld(LInsp ins)
void Assembler::asm_load32(LInsp ins)
{
LOpcode op = ins->opcode();
LIns* base = ins->oprnd1();
@ -1020,13 +1167,31 @@ namespace nanojit
if (base->isconst()) {
intptr_t addr = base->imm32();
addr += d;
if (op == LIR_ldcb)
LD8Zdm(rr, addr);
else if (op == LIR_ldcs)
LD16Zdm(rr, addr);
else
LDdm(rr, addr);
return;
switch(op) {
case LIR_ldzb:
case LIR_ldcb:
LD8Zdm(rr, addr);
return;
case LIR_ldsb:
case LIR_ldcsb:
LD8Sdm(rr, addr);
return;
case LIR_ldzs:
case LIR_ldcs:
LD16Zdm(rr, addr);
return;
case LIR_ldss:
case LIR_ldcss:
LD16Sdm(rr, addr);
return;
case LIR_ld:
case LIR_ldc:
LDdm(rr, addr);
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
}
/* Search for add(X,Y) */
@ -1058,23 +1223,59 @@ namespace nanojit
? findSpecificRegForUnallocated(rhs, rr)
: findRegFor(rhs, GpRegs & ~(rmask(rleft))) );
if (op == LIR_ldcb)
LD8Zsib(rr, d, rleft, rright, scale);
else if (op == LIR_ldcs)
LD16Zsib(rr, d, rleft, rright, scale);
else
LDsib(rr, d, rleft, rright, scale);
return;
switch(op) {
case LIR_ldzb:
case LIR_ldcb:
LD8Zsib(rr, d, rleft, rright, scale);
return;
case LIR_ldsb:
case LIR_ldcsb:
LD8Ssib(rr, d, rleft, rright, scale);
return;
case LIR_ldzs:
case LIR_ldcs:
LD16Zsib(rr, d, rleft, rright, scale);
return;
case LIR_ldss:
case LIR_ldcss:
LD16Ssib(rr, d, rleft, rright, scale);
return;
case LIR_ld:
case LIR_ldc:
LDsib(rr, d, rleft, rright, scale);
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
}
Register ra = getBaseReg(op, base, d, GpRegs);
if (op == LIR_ldcb)
LD8Z(rr, d, ra);
else if (op == LIR_ldcs)
LD16Z(rr, d, ra);
else
LD(rr, d, ra);
switch(op) {
case LIR_ldzb:
case LIR_ldcb:
LD8Z(rr, d, ra);
return;
case LIR_ldsb:
case LIR_ldcsb:
LD8S(rr, d, ra);
return;
case LIR_ldzs:
case LIR_ldcs:
LD16Z(rr, d, ra);
return;
case LIR_ldss:
case LIR_ldcss:
LD16S(rr, d, ra);
return;
case LIR_ld:
case LIR_ldc:
LD(rr, d, ra);
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
}
void Assembler::asm_cmov(LInsp ins)
@ -1150,11 +1351,15 @@ namespace nanojit
void Assembler::asm_int(LInsp ins)
{
Register rr = prepResultReg(ins, GpRegs);
int32_t val = ins->imm32();
if (val == 0)
XOR(rr,rr);
asm_int(rr, ins->imm32(), /*canClobberCCs*/true);
}
void Assembler::asm_int(Register r, int32_t val, bool canClobberCCs)
{
if (val == 0 && canClobberCCs)
XOR(r, r);
else
LDi(rr, val);
LDi(r, val);
}
void Assembler::asm_quad(LInsp ins)
@ -1182,7 +1387,7 @@ namespace nanojit
Register gr = registerAllocTmp(GpRegs);
SSE_CVTSI2SD(rr, gr);
SSE_XORPDr(rr,rr); // zero rr to ensure no dependency stalls
LDi(gr, (int)d);
asm_int(gr, (int)d, /*canClobberCCs*/true);
} else {
findMemFor(ins);
const int d = disp(ins);
@ -1329,7 +1534,7 @@ namespace nanojit
if (isKnownReg(r)) {
// arg goes in specific register
if (p->isconst()) {
LDi(r, p->imm32());
asm_int(r, p->imm32(), /*canClobberCCs*/true);
} else {
if (p->isUsed()) {
if (!p->hasKnownReg()) {
@ -1533,11 +1738,6 @@ namespace nanojit
}
}
Register Assembler::asm_prep_fcall(LInsp ins)
{
return prepResultReg(ins, rmask(FST0));
}
void Assembler::asm_u2f(LInsp ins)
{
// where our result goes
@ -1876,6 +2076,6 @@ namespace nanojit
SWAP(NIns*, codeEnd, exitEnd);
verbose_only( SWAP(size_t, codeBytes, exitBytes); )
}
#endif /* FEATURE_NANOJIT */
}

View File

@ -96,6 +96,7 @@ namespace nanojit
#define NJ_MAX_STACK_ENTRY 256
#define NJ_MAX_PARAMETERS 1
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
// Preserve a 16-byte stack alignment, to support the use of
// SSE instructions like MOVDQA (if not by Tamarin itself,
@ -178,6 +179,7 @@ namespace nanojit
void nativePageReset();\
void nativePageSetup();\
void underrunProtect(int);\
void asm_int(Register r, int32_t val, bool canClobberCCs);\
void asm_stkarg(LInsp p, int32_t& stkd);\
void asm_farg(LInsp, int32_t& stkd);\
void asm_arg(ArgSize sz, LInsp p, Register r, int32_t& stkd);\
@ -185,14 +187,24 @@ namespace nanojit
void asm_fcmp(LIns *cond);\
NIns* asm_fbranch(bool, LIns*, NIns*);\
void asm_cmp(LIns *cond); \
void asm_div_mod(LIns *cond);
void asm_div_mod(LIns *cond); \
void asm_load(int d, Register r);
#define IMM8(i) \
_nIns -= 1; \
*((int8_t*)_nIns) = (int8_t)(i)
#define IMM16(i) \
_nIns -= 2; \
*((int16_t*)_nIns) = (int16_t)(i)
#define IMM32(i) \
_nIns -= 4; \
*((int32_t*)_nIns) = (int32_t)(i)
// XXX rearrange NanoAssert() expression to workaround apparent gcc 4.3 bug:
// XXX "error: logical && with non-zero constant will always evaluate as true"
// underrunProtect(6) is necessary for worst-case
#define MODRMs(r,d,b,l,i) \
NanoAssert(unsigned(i)<8 && unsigned(b)<8 && unsigned(r)<8); \
if ((d) == 0 && (b) != EBP) { \
@ -210,6 +222,7 @@ namespace nanojit
*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | 4 ); \
}
// underrunProtect(6) is necessary for worst-case
#define MODRMm(r,d,b) \
NanoAssert(unsigned(r)<8 && ((b)==UnknownReg || unsigned(b)<8)); \
if ((b) == UnknownReg) {\
@ -440,37 +453,62 @@ namespace nanojit
asm_output("mov %s,%d(%s+%s*%c)",gpn(reg),disp,gpn(base),gpn(index),SIBIDX(scale)); \
} while (0)
// note: movzx/movsx are being output with an 8/16 suffix to indicate the size
// being loaded. this doesn't really match standard intel format (though is arguably
// terser and more obvious in this case) and would probably be nice to fix.
// (likewise, the 8/16 bit stores being output as "mov8" and "mov16" respectively.)
// load 16-bit, sign extend
#define LD16S(r,d,b) do { count_ld(); ALU2m(0x0fbf,r,d,b); asm_output("movsx %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD16S(r,d,b) do { count_ld(); ALU2m(0x0fbf,r,d,b); asm_output("movsx16 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD16Sdm(r,addr) do { count_ld(); ALU2dm(0x0fbf,r,addr); asm_output("movsx16 %s,0(%lx)", gpn(r),(unsigned long)addr); } while (0)
#define LD16Ssib(r,disp,base,index,scale) do { \
count_ld(); \
ALU2sib(0x0fbf,r,base,index,scale,disp); \
asm_output("movsx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \
} while (0)
// load 16-bit, zero extend
#define LD16Z(r,d,b) do { count_ld(); ALU2m(0x0fb7,r,d,b); asm_output("movsz %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD16Z(r,d,b) do { count_ld(); ALU2m(0x0fb7,r,d,b); asm_output("movzx16 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD16Zdm(r,addr) do { count_ld(); ALU2dm(0x0fb7,r,addr); asm_output("movsz %s,0(%lx)", gpn(r),(unsigned long)addr); } while (0)
#define LD16Zdm(r,addr) do { count_ld(); ALU2dm(0x0fb7,r,addr); asm_output("movzx16 %s,0(%lx)", gpn(r),(unsigned long)addr); } while (0)
#define LD16Zsib(r,disp,base,index,scale) do { \
count_ld(); \
ALU2sib(0x0fb7,r,base,index,scale,disp); \
asm_output("movsz %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \
asm_output("movzx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \
} while (0)
// load 8-bit, zero extend
#define LD8Z(r,d,b) do { count_ld(); ALU2m(0x0fb6,r,d,b); asm_output("movzx %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD8Z(r,d,b) do { count_ld(); ALU2m(0x0fb6,r,d,b); asm_output("movzx8 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD8Zdm(r,addr) do { \
count_ld(); \
NanoAssert((d)>=0&&(d)<=31); \
ALU2dm(0x0fb6,r,addr); \
asm_output("movzx %s,0(%lx)", gpn(r),(long unsigned)addr); \
asm_output("movzx8 %s,0(%lx)", gpn(r),(long unsigned)addr); \
} while(0)
#define LD8Zsib(r,disp,base,index,scale) do { \
count_ld(); \
NanoAssert((d)>=0&&(d)<=31); \
ALU2sib(0x0fb6,r,base,index,scale,disp); \
asm_output("movzx %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \
asm_output("movzx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \
} while(0)
// load 8-bit, sign extend
#define LD8S(r,d,b) do { count_ld(); ALU2m(0x0fbe,r,d,b); asm_output("movsx8 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD8Sdm(r,addr) do { \
count_ld(); \
ALU2dm(0x0fbe,r,addr); \
asm_output("movsx8 %s,0(%lx)", gpn(r),(long unsigned)addr); \
} while(0)
#define LD8Ssib(r,disp,base,index,scale) do { \
count_ld(); \
ALU2sib(0x0fbe,r,base,index,scale,disp); \
asm_output("movsx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \
} while(0)
#define LDi(r,i) do { \
count_ld();\
@ -480,14 +518,43 @@ namespace nanojit
*(--_nIns) = (uint8_t) (0xb8 | (r) ); \
asm_output("mov %s,%d",gpn(r),i); } while(0)
// quirk of x86-32: reg must be a/b/c/d for byte stores here
#define ST8(base,disp,reg) do { \
count_st();\
NanoAssert(((unsigned)reg)<4); \
ALUm(0x88,reg,disp,base); \
asm_output("mov8 %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
#define ST16(base,disp,reg) do { \
count_st();\
ALUm16(0x89,reg,disp,base); \
asm_output("mov16 %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
#define ST(base,disp,reg) do { \
count_st();\
ALUm(0x89,reg,disp,base); \
asm_output("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
#define ST8i(base,disp,imm) do { \
count_st();\
underrunProtect(8); \
IMM8(imm); \
MODRMm(0, disp, base); \
*(--_nIns) = 0xc6; \
asm_output("mov8 %d(%s),%d",disp,gpn(base),imm); } while(0)
#define ST16i(base,disp,imm) do { \
count_st();\
underrunProtect(10); \
IMM16(imm); \
MODRMm(0, disp, base); \
*(--_nIns) = 0xc7; \
*(--_nIns) = 0x66; \
asm_output("mov16 %d(%s),%d",disp,gpn(base),imm); } while(0)
#define STi(base,disp,imm) do { \
count_st();\
underrunProtect(12); \
underrunProtect(11); \
IMM32(imm); \
MODRMm(0, disp, base); \
*(--_nIns) = 0xc7; \
@ -680,12 +747,36 @@ namespace nanojit
asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); \
} while(0)
#define SSE_LDSS(r,d,b)do { \
count_ld();\
SSEm(0xf30f10, (r)&7, (d), (b)); \
asm_output("movss %s,%d(%s)",gpn(r),d,gpn(b)); \
} while(0)
#define SSE_STSS(d,b,r)do { \
count_st();\
SSEm(0xf30f11, (r)&7, (d), (b)); \
asm_output("movss %d(%s),%s",(d),gpn(b),gpn(r)); \
} while(0)
#define SSE_CVTSI2SD(xr,gr) do{ \
count_fpu();\
SSE(0xf20f2a, (xr)&7, (gr)&7); \
asm_output("cvtsi2sd %s,%s",gpn(xr),gpn(gr)); \
} while(0)
#define SSE_CVTSD2SS(xr,gr) do{ \
count_fpu();\
SSE(0xf20f5a, (xr)&7, (gr)&7); \
asm_output("cvtsd2ss %s,%s",gpn(xr),gpn(gr)); \
} while(0)
#define SSE_CVTSS2SD(xr,gr) do{ \
count_fpu();\
SSE(0xf30f5a, (xr)&7, (gr)&7); \
asm_output("cvtss2sd %s,%s",gpn(xr),gpn(gr)); \
} while(0)
#define CVTDQ2PD(dstr,srcr) do{ \
count_fpu();\
SSE(0xf30fe6, (dstr)&7, (srcr)&7); \
@ -828,9 +919,11 @@ namespace nanojit
#define FLD1() do { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } while(0)
#define FLDZ() do { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } while(0)
#define FFREE(r) do { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",fpn(r)); } while(0)
#define FST32(p,d,b) do { count_stq(); FPUm(0xd902|(p), d, b); asm_output("fst%s32 %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b); asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FSTPQ(d,b) FSTQ(1,d,b)
#define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FLD32(d,b) do { count_ldq(); FPUm(0xd900, d, b); asm_output("fld32 %d(%s)",d,gpn(b)); fpu_push();} while(0)
#define FLDQ(d,b) do { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0)
#define FILDQ(d,b) do { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0)
#define FILD(d,b) do { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0)

View File

@ -35,11 +35,6 @@
#include "nanojit.h"
#ifdef SOLARIS
#include <ucontext.h>
#include <dlfcn.h>
#include <procfs.h>
#include <sys/stat.h>
extern "C" caddr_t _getfp(void);
typedef caddr_t maddr_ptr;
#else
typedef void *maddr_ptr;

View File

@ -35,11 +35,6 @@
#include "nanojit.h"
#ifdef SOLARIS
#include <ucontext.h>
#include <dlfcn.h>
#include <procfs.h>
#include <sys/stat.h>
extern "C" caddr_t _getfp(void);
typedef caddr_t maddr_ptr;
#else
typedef void *maddr_ptr;

View File

@ -249,14 +249,13 @@ namespace nanojit {
and below, so that callers can use bits 16 and above for
themselves. */
// TODO: add entries for the writer pipeline
LC_FragProfile = 1<<7, // collect per-frag usage counts
LC_Activation = 1<<6, // enable printActivationState
LC_Liveness = 1<<5, // (show LIR liveness analysis)
LC_ReadLIR = 1<<4, // As read from LirBuffer
LC_AfterSF = 1<<3, // After StackFilter
LC_RegAlloc = 1<<2, // stuff to do with reg alloc
LC_Assembly = 1<<1, // final assembly
LC_NoCodeAddrs = 1<<0 // (don't show code addresses on asm output)
LC_FragProfile = 1<<6, // collect per-frag usage counts
LC_Activation = 1<<5, // enable printActivationState
LC_Liveness = 1<<4, // (show LIR liveness analysis)
LC_ReadLIR = 1<<3, // As read from LirBuffer
LC_AfterSF = 1<<2, // After StackFilter
LC_RegAlloc = 1<<1, // stuff to do with reg alloc
LC_Assembly = 1<<0 // final assembly
};
class LogControl

View File

@ -52,7 +52,7 @@ script regress-354145-03.js
script regress-354145-04.js
script regress-354145-05.js
script regress-354145-07.js
skip-if(!xulRuntime.shell&&isDebugBuild) script regress-354998.js # very slow; test needs revising
script regress-354998.js
script regress-355474-02.js
script regress-355478.js
script regress-355569.js

View File

@ -5,3 +5,4 @@ script fe-002.js
script regress-518103.js
script regress-524826.js
script regress-528082.js
script regress-533254.js

View File

@ -0,0 +1,29 @@
/*
* Any copyright is dedicated to the Public Domain.
* http://creativecommons.org/licenses/publicdomain/
*/
var gTestfile = 'regress-533254.js';
var BUGNUMBER = 533254;
var summary = 'init-method late in table-big initialiser screwup';
printBugNumber(BUGNUMBER);
printStatus(summary);
function f() {
var proto = {p8:8};
var obj = {
p0:0, p1:1, p2:2, p3:3, p4:4, p5:5, p6:6, p7:7, p8:8, p9:9,
p10:0, p11:1, p12:2, p13:3, p14:4, p15:5, p16:6, p17:7, p18:8, p19:9,
m: function() { return 42; }
};
return obj;
}
var expect = f(),
actual = f();
expect += '';
actual += '';
reportCompare(expect, actual, summary);
printStatus("All tests passed!");

View File

@ -2,3 +2,5 @@ url-prefix ../../jsreftest.html?test=js1_4/Eval/
script eval-001.js
script eval-002.js
script eval-003.js
script regress-531037.js
script regress-531682.js

View File

@ -82,3 +82,4 @@ script regress-479740.js
script regress-481800.js
script regress-483749.js
script regress-499524.js
script regress-532491.js

View File

@ -0,0 +1,26 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
function mk() {
return (function () {});
}
function f() {
var j = 55;
var f = function () {
return j;
};
var g = function() {};
var a = [ mk(), f, g, mk(), mk() ];
for (var i = 0; i < 5; ++i) {
a[i].p = 99;
}
}
f();
for (var i = 0; i < 9; i++)
({__parent__: []} = []);

View File

@ -0,0 +1,27 @@
function f() {
var _76 = {};
for (var i = 0; i < arguments.length; i++) {
var typ = arguments[i];
_76[typ] = typ;
}
return function () {
for (var i = 0; i < arguments.length; i++) {
if (!(typeof (arguments[i]) in _76)) {
return false;
}
}
return true;
}
}
g = f("number", "boolean", "object");
g("a", "b", "c", "d", "e", "f", 2);
g(2, "a", "b", "c", "d", "e", "f", 2);
/*
* Don't assert --
* Assertion failed: frame entry -4 wasn't freed
* : _activation.entry[i] == 0 (../nanojit/Assembler.cpp:786)
*/

View File

@ -0,0 +1,2 @@
for (var i = 0; i < 9; i++)
({__parent__: []} = []);