From a2d4d5a7793257f284a6b24e8a34498e479159e1 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Fri, 3 Apr 2009 16:30:46 -0700 Subject: [PATCH] Bug 484693 - Remove uses of alloca, r=gal. --- js/src/jscntxt.h | 15 +++++ js/src/jstracer.cpp | 145 +++++++++++++++++++++++++++++++++++++------- js/src/jstracer.h | 9 +++ 3 files changed, 146 insertions(+), 23 deletions(-) diff --git a/js/src/jscntxt.h b/js/src/jscntxt.h index 05ed2aca1fd..4f1735a8891 100644 --- a/js/src/jscntxt.h +++ b/js/src/jscntxt.h @@ -172,6 +172,21 @@ struct JSTraceMonitor { /* Keep a list of recorders we need to abort on cache flush. */ CLS(TraceRecorder) abortStack; + + /* Arena-pool for storing temps used during compilation. */ + JSArenaPool tempPool; + + /* + * Native data area. We store the interpreter state and and a native + * representation of all accessed properties of the global object + * here during trace execution. + */ + void* nativeData; + + /* Likewise, but for the native stack slots and FrameInfo* records. */ + void* nativeStack; + void* nativeFrames; + }; typedef struct InterpStruct InterpStruct; diff --git a/js/src/jstracer.cpp b/js/src/jstracer.cpp index fb92df3faaf..1e962ac9c08 100644 --- a/js/src/jstracer.cpp +++ b/js/src/jstracer.cpp @@ -119,10 +119,13 @@ static const char tagChar[] = "OIDISIBI"; #define MAX_CALLDEPTH 10 /* Max native stack size. */ -#define MAX_NATIVE_STACK_SLOTS 1024 +#define MAX_NATIVE_STACK_SLOTS 512 /* Max call stack size. */ -#define MAX_CALL_STACK_ENTRIES 64 +#define MAX_CALL_STACK_ENTRIES 32 + +/* Max slots in the global area. */ +#define MAX_GLOBAL_SLOTS 1024 /* Max memory you can allocate in a LIR buffer via a single skip() call. */ #define MAX_SKIP_BYTES (NJ_PAGE_SIZE - LIR_FAR_SLOTS) @@ -251,6 +254,60 @@ bool js_verboseStats = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "s bool js_verboseAbort = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "abort"); #endif +template +class TempArray +{ + JSContext *_cx; + void *_mark; + T *_ptr; + + TempArray& operator=(TempArray &other) {} + TempArray(TempArray &other) {} + +public: + + JS_ALWAYS_INLINE TempArray(JSContext *cx, size_t count=0) + : _cx(cx), + _mark(NULL), + _ptr(NULL) + { + acquire(count); + } + + + void JS_ALWAYS_INLINE acquire(size_t c) + { + if (c) { + JSTraceMonitor *tm = &JS_TRACE_MONITOR(_cx); + _mark = JS_ARENA_MARK(&tm->tempPool); + JS_ARENA_ALLOCATE_CAST(_ptr, T*, &tm->tempPool, c * sizeof(T)); + } else { + release(); + } + } + + void JS_ALWAYS_INLINE release() + { + if (_ptr) { + JSTraceMonitor *tm = &JS_TRACE_MONITOR(_cx); + JS_ASSERT(_mark); + JS_ARENA_RELEASE(&tm->tempPool, _mark); + } + _ptr = NULL; + _mark = NULL; + } + + JS_ALWAYS_INLINE ~TempArray() + { + release(); + } + + JS_ALWAYS_INLINE operator T* () + { + return _ptr; + } +}; + /* The entire VM shares one oracle. Collisions and concurrent updates are tolerated and worst case cause performance regressions. */ static Oracle oracle; @@ -1057,9 +1114,8 @@ public: for (n = 0; n < callDepth; ++n) { fp = fp->down; } \ entryFrame = fp; \ unsigned frames = callDepth+1; \ - JSStackFrame** fstack = \ - (JSStackFrame**) alloca(frames * sizeof (JSStackFrame*)); \ - JSStackFrame** fspstop = &fstack[frames]; \ + TempArray fstack(cx, frames); \ + JSStackFrame** fspstop = fstack + frames; \ JSStackFrame** fsp = fspstop-1; \ fp = currentFrame; \ for (;; fp = fp->down) { *fsp-- = fp; if (fp == entryFrame) break; } \ @@ -1419,8 +1475,8 @@ done: for (unsigned n = 0; n < callDepth; ++n) { fp = fp->down; } entryFrame = fp; unsigned frames = callDepth+1; - JSStackFrame** fstack = (JSStackFrame **)alloca(frames * sizeof (JSStackFrame *)); - JSStackFrame** fspstop = &fstack[frames]; + TempArray fstack(cx, frames); + JSStackFrame** fspstop = fstack + frames; JSStackFrame** fsp = fspstop-1; fp = currentFrame; for (;; fp = fp->down) { *fsp-- = fp; if (fp == entryFrame) break; } @@ -1836,10 +1892,12 @@ TraceRecorder::import(TreeInfo* treeInfo, LIns* sp, unsigned stackSlots, unsigne /* This is potentially the typemap of the side exit and thus shorter than the tree's global type map. */ - if (ngslots < length) + TempArray mem(cx); + if (ngslots < length) { + mem.acquire(length); mergeTypeMaps(&globalTypeMap/*out param*/, &ngslots/*out param*/, - treeInfo->globalTypeMap(), length, - (uint8*)alloca(sizeof(uint8) * length)); + treeInfo->globalTypeMap(), length, mem); + } JS_ASSERT(ngslots == treeInfo->nGlobalTypes()); /* the first time we compile a tree this will be empty as we add entries lazily */ @@ -1884,6 +1942,8 @@ TraceRecorder::lazilyImportGlobalSlot(unsigned slot) { if (slot != uint16(slot)) /* we use a table of 16-bit ints, bail out if that's not enough */ return false; + if (slot >= MAX_GLOBAL_SLOTS) /* we only support a certain number of global slots */ + return false; jsval* vp = &STOBJ_GET_SLOT(globalObj, slot); if (known(vp)) return true; /* we already have it */ @@ -1977,7 +2037,7 @@ TraceRecorder::checkForGlobalObjectReallocation() jsval* src = global_dslots; jsval* dst = globalObj->dslots; jsuint length = globalObj->dslots[-1] - JS_INITIAL_NSLOTS; - LIns** map = (LIns**)alloca(sizeof(LIns*) * length); + TempArray map(cx, length); for (jsuint n = 0; n < length; ++n) { map[n] = tracker.get(src); tracker.set(src++, NULL); @@ -2100,7 +2160,7 @@ TraceRecorder::snapshot(ExitType exitType) /* Capture the type map into a temporary location. */ unsigned ngslots = treeInfo->globalSlots->length(); unsigned typemap_size = (stackSlots + ngslots) * sizeof(uint8); - uint8* typemap = (uint8*)alloca(typemap_size); + TempArray typemap(cx, typemap_size); uint8* m = typemap; /* Determine the type of a store by looking at the current type of the actual value the @@ -2320,8 +2380,8 @@ TraceRecorder::deduceTypeStability(Fragment* root_peer, Fragment** stable_peer, */ bool success; unsigned stage_count; - jsval** stage_vals = (jsval**)alloca(sizeof(jsval*) * (treeInfo->typeMap.length())); - LIns** stage_ins = (LIns**)alloca(sizeof(LIns*) * (treeInfo->typeMap.length())); + TempArray stage_vals(cx, treeInfo->typeMap.length()); + TempArray stage_ins(cx, treeInfo->typeMap.length()); /* First run through and see if we can close ourselves - best case! */ stage_count = 0; @@ -2648,8 +2708,8 @@ TraceRecorder::joinEdgesToEntry(Fragmento* fragmento, VMFragment* peer_root) Fragment* peer; uint8* t1, *t2; UnstableExit* uexit, **unext; - uint32* stackDemotes = (uint32*)alloca(sizeof(uint32) * treeInfo->nStackTypes); - uint32* globalDemotes = (uint32*)alloca(sizeof(uint32) * treeInfo->nGlobalTypes()); + TempArray stackDemotes(cx, treeInfo->nStackTypes); + TempArray globalDemotes(cx, treeInfo->nGlobalTypes()); for (peer = peer_root; peer != NULL; peer = peer->peer) { if (!peer->code()) @@ -3879,11 +3939,13 @@ js_ExecuteTree(JSContext* cx, Fragment* f, uintN& inlineCallCount, JS_ASSERT(cx->builtinStatus == 0); JSTraceMonitor* tm = &JS_TRACE_MONITOR(cx); - JSObject* globalObj = JS_GetGlobalForObject(cx, cx->fp->scopeChain); TreeInfo* ti = (TreeInfo*)f->vmprivate; unsigned ngslots = ti->globalSlots->length(); uint16* gslots = ti->globalSlots->data(); +#ifdef DEBUG + JSObject* globalObj = JS_GetGlobalForObject(cx, cx->fp->scopeChain); unsigned globalFrameSize = STOBJ_NSLOTS(globalObj); +#endif /* Make sure the global object is sane. */ JS_ASSERT(!ngslots || (OBJ_SHAPE(JS_GetGlobalForObject(cx, cx->fp->scopeChain)) == @@ -3895,8 +3957,9 @@ js_ExecuteTree(JSContext* cx, Fragment* f, uintN& inlineCallCount, if (!js_ReserveObjects(cx, MAX_CALL_STACK_ENTRIES)) return NULL; - /* Setup the interpreter state block, which is followed by the native global frame. */ - InterpState* state = (InterpState*)alloca(sizeof(InterpState) + (globalFrameSize+1)*sizeof(double)); + /* Setup the interpreter state block, which is followed by the native global frame. */ + InterpState* state = (InterpState*)tm->nativeData; + state->cx = cx; state->inlineCallCountp = &inlineCallCount; state->innermostNestedGuardp = innermostNestedGuardp; @@ -3909,13 +3972,13 @@ js_ExecuteTree(JSContext* cx, Fragment* f, uintN& inlineCallCount, double* global = (double*)(state+1); /* Setup the native stack frame. */ - double stack_buffer[MAX_NATIVE_STACK_SLOTS]; + double *stack_buffer = (double*)tm->nativeStack; state->stackBase = stack_buffer; state->sp = stack_buffer + (ti->nativeStackBase/sizeof(double)); state->eos = stack_buffer + MAX_NATIVE_STACK_SLOTS; /* Setup the native call stack frame. */ - FrameInfo* callstack_buffer[MAX_CALL_STACK_ENTRIES]; + FrameInfo** callstack_buffer = (FrameInfo**)tm->nativeFrames; state->callstackBase = callstack_buffer; state->rp = callstack_buffer; state->eor = callstack_buffer + MAX_CALL_STACK_ENTRIES; @@ -3927,7 +3990,7 @@ js_ExecuteTree(JSContext* cx, Fragment* f, uintN& inlineCallCount, return NULL; #ifdef DEBUG - memset(stack_buffer, 0xCD, sizeof(stack_buffer)); + memset(stack_buffer, 0xCD, MAX_NATIVE_STACK_SLOTS * sizeof(double)); memset(global, 0xCD, (globalFrameSize+1)*sizeof(double)); #endif @@ -4150,6 +4213,7 @@ LeaveTree(InterpState& state, VMSideExit* lr) unsigned ngslots = outermostTree->globalSlots->length(); JS_ASSERT(ngslots == outermostTree->nGlobalTypes()); uint8* globalTypeMap; + TempArray mem(cx); /* Are there enough globals? This is the ideal fast path. */ if (innermost->numGlobalSlots == ngslots) { @@ -4162,7 +4226,8 @@ LeaveTree(InterpState& state, VMSideExit* lr) TreeInfo* ti = (TreeInfo*)innermost->from->root->vmprivate; JS_ASSERT(ti->nGlobalTypes() == ngslots); JS_ASSERT(ti->nGlobalTypes() > innermost->numGlobalSlots); - globalTypeMap = (uint8*)alloca(ngslots * sizeof(uint8)); + mem.acquire(ngslots); + globalTypeMap = mem; memcpy(globalTypeMap, getGlobalTypeMap(innermost), innermost->numGlobalSlots); memcpy(globalTypeMap + innermost->numGlobalSlots, ti->globalTypeMap() + innermost->numGlobalSlots, @@ -4649,6 +4714,25 @@ js_InitJIT(JSTraceMonitor *tm) tm->reFragmento = fragmento; tm->reLirBuf = new (&gc) LirBuffer(fragmento, NULL); } + + JS_INIT_ARENA_POOL(&tm->tempPool, "temps", 1024, sizeof(double), NULL); + + // Shove the pool forward a touch so it does not thrash on its 0th arena boundary. + void *dummy; + JS_ARENA_ALLOCATE(dummy, &tm->tempPool, sizeof(double)); + + if (!tm->nativeData) { + tm->nativeData = malloc(sizeof(InterpState) + sizeof(double) * MAX_GLOBAL_SLOTS); + JS_ASSERT(tm->nativeData && !(intptr_t(tm->nativeData) & 7)); + } + if (!tm->nativeStack) { + tm->nativeStack = malloc(sizeof(double) * MAX_NATIVE_STACK_SLOTS); + JS_ASSERT(tm->nativeStack); + } + if (!tm->nativeFrames) { + tm->nativeFrames = malloc(sizeof(FrameInfo*) * MAX_CALL_STACK_ENTRIES); + JS_ASSERT(tm->nativeFrames); + } #if !defined XP_WIN debug_only(memset(&jitstats, 0, sizeof(jitstats))); #endif @@ -4703,6 +4787,21 @@ js_FinishJIT(JSTraceMonitor *tm) verbose_only(delete tm->reFragmento->labels;) delete tm->reFragmento; } + + JS_FinishArenaPool(&tm->tempPool); + + if (tm->nativeData) { + free(tm->nativeData); + tm->nativeData = NULL; + } + if (tm->nativeStack) { + free(tm->nativeStack); + tm->nativeStack = NULL; + } + if (tm->nativeFrames) { + free(tm->nativeFrames); + tm->nativeFrames = NULL; + } } void diff --git a/js/src/jstracer.h b/js/src/jstracer.h index 01d7a0c6810..af4906af947 100644 --- a/js/src/jstracer.h +++ b/js/src/jstracer.h @@ -341,6 +341,9 @@ public: # define EXECUTE_TREE_TIMER #endif +#if defined(_WIN32) && defined(_MSC_VER) +__declspec(align(8)) +#endif struct InterpState { double *sp; // native stack pointer, stack[0] is spbase[0] @@ -365,7 +368,13 @@ struct InterpState #ifdef DEBUG bool jsframe_pop_blocks_set_on_entry; #endif +#ifdef __GNUC__ +} +__attribute__ ((aligned (8))); +#else }; +#endif +JS_STATIC_ASSERT(!(sizeof(InterpState) % sizeof(double))); enum JSMonitorRecordingStatus { JSMRS_CONTINUE,