Merge tracemonkey to mozilla-central.

2024-09-13 09:24:08 -07:00 · 2009-12-22 12:36:42 -08:00 · 2009-12-22 12:36:42 -08:00 · a82ab6ce10
commit a82ab6ce10
parent b567cced3a 51f333193a
20 changed files with 798 additions and 721 deletions
--- a/js/src/jstracer.cpp
+++ b/js/src/jstracer.cpp
@ -11858,6 +11858,8 @@ TraceRecorder::record_JSOP_GETELEM()
            }
            JS_ASSERT(v_ins);
            set(&lval, v_ins);
+            if (call)
+                set(&idx, obj_ins);
            return ARECORD_CONTINUE;
        }
        RETURN_STOP_A("can't reach arguments object's frame");
--- a/js/src/nanojit-import-rev
+++ b/js/src/nanojit-import-rev
@ -1 +1 @@
-a6a96927117a1e462a04784e1b621a3d85f61099
+b9640e93e1efe3c24e90afef0127e71ecef47ac4
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@ -92,7 +92,7 @@ namespace nanojit
        _activation.lowwatermark = 0;
        _activation.tos = 0;

-        for(uint32_t i=0; i<NJ_MAX_STACK_ENTRY; i++)
+        for (uint32_t i = 0; i < NJ_MAX_STACK_ENTRY; i++)
            _activation.entry[i] = 0;

        _branchStateMap.clear();
@ -134,31 +134,26 @@ namespace nanojit

            // Nothing free, steal one.
            // LSRA says pick the one with the furthest use.
-            LIns* vicIns = findVictim(allow);
-            NanoAssert(vicIns->isUsed());
-            r = vicIns->getReg();
+            LIns* vic = findVictim(allow);
+            NanoAssert(vic->isUsed());
+            r = vic->getReg();

-            _allocator.removeActive(r);
-            vicIns->setReg(UnknownReg);
-
-            // Restore vicIns.
-            verbose_only( if (_logc->lcbits & LC_Assembly) {
-                            setOutputForEOL("  <= restore %s",
-                            _thisfrag->lirbuf->names->formatRef(vicIns)); } )
-            asm_restore(vicIns, r);
+            evict(vic);

            // r ends up staying active, but the LIns defining it changes.
+            _allocator.removeFree(r);
            _allocator.addActive(r, ins);
            ins->setReg(r);
        }
+
        return r;
    }

    // Finds a register in 'allow' to store a temporary value (one not
    // associated with a particular LIns), evicting one if necessary.  The
    // returned register is marked as being free and so can only be safely
-    // used for code generation purposes until the register state is next
-    // inspected or updated.
+    // used for code generation purposes until the regstate is next inspected
+    // or updated.
    Register Assembler::registerAllocTmp(RegisterMask allow)
    {
        LIns dummyIns;
@ -241,7 +236,7 @@ namespace nanojit
        NanoAssert(ar.tos < NJ_MAX_STACK_ENTRY);
        LIns* ins = 0;
        RegAlloc* regs = &_allocator;
-        for(uint32_t i = ar.lowwatermark; i < ar.tos; i++)
+        for (uint32_t i = ar.lowwatermark; i < ar.tos; i++)
        {
            ins = ar.entry[i];
            if ( !ins )
@ -335,20 +330,23 @@ namespace nanojit
        return findRegFor(i, rmask(w));
    }

-    // The 'op' argument is the opcode of the instruction containing the
-    // displaced i[d] operand we're finding a register for. It is only used
-    // for differentiating classes of valid displacement in the native
-    // backends; a bit of a hack.
-    Register Assembler::getBaseReg(LOpcode op, LIns *i, int &d, RegisterMask allow)
+    // Like findRegFor(), but called when the LIns is used as a pointer.  It
+    // doesn't have to be called, findRegFor() can still be used, but it can
+    // optimize the LIR_alloc case by indexing off FP, thus saving the use of
+    // a GpReg.
+    //
+    Register Assembler::getBaseReg(LIns *i, int &d, RegisterMask allow)
    {
    #if !PEDANTIC
        if (i->isop(LIR_alloc)) {
-            int d2 = d;
-            d2 += findMemFor(i);
-            if (isValidDisplacement(op, d2)) {
-                d = d2;
-                return FP;
-            }
+            // The value of a LIR_alloc is a pointer to its stack memory,
+            // which is always relative to FP.  So we can just return FP if we
+            // also adjust 'd' (and can do so in a valid manner).  Or, in the
+            // PEDANTIC case, we can just assign a register as normal;
+            // findRegFor() will allocate the stack memory for LIR_alloc if
+            // necessary.
+            d += findMemFor(i);
+            return FP;
        }
    #else
        (void) d;
@ -357,7 +355,7 @@ namespace nanojit
    }

    // Finds a register in 'allow' to hold the result of 'ins'.  Used when we
-    // encounter a use of 'ins'.  The actions depend on the prior state of
+    // encounter a use of 'ins'.  The actions depend on the prior regstate of
    // 'ins':
    // - If the result of 'ins' is not in any register, we find an allowed
    //   one, evicting one if necessary.
@ -368,32 +366,30 @@ namespace nanojit
    Register Assembler::findRegFor(LIns* ins, RegisterMask allow)
    {
        if (ins->isop(LIR_alloc)) {
-            // never allocate a reg for this w/out stack space too
+            // Never allocate a reg for this without stack space too.
            findMemFor(ins);
        }

        Register r;

        if (!ins->isUsed()) {
-            // No reservation.  Create one, and do a fresh allocation.
+            // 'ins' is unused, ie. dead after this point.  Mark it as used
+            // and allocate it a register.
            ins->markAsUsed();
            RegisterMask prefer = hint(ins, allow);
            r = registerAlloc(ins, prefer);

        } else if (!ins->hasKnownReg()) {
-            // Existing reservation with an unknown register.  Do a fresh
-            // allocation.
+            // 'ins' is in a spill slot.  Allocate it a register.
            RegisterMask prefer = hint(ins, allow);
            r = registerAlloc(ins, prefer);

        } else if (rmask(r = ins->getReg()) & allow) {
-            // Existing reservation with a known register allocated, and
-            // that register is allowed.  Use it.
+            // 'ins' is in an allowed register.
            _allocator.useActive(r);

        } else {
-            // Existing reservation with a known register allocated, but
-            // the register is not allowed.
+            // 'ins' is in a register (r) that's not in 'allow'.
            RegisterMask prefer = hint(ins, allow);
 #ifdef NANOJIT_IA32
            if (((rmask(r)&XmmRegs) && !(allow&XmmRegs)) ||
@ -401,14 +397,14 @@ namespace nanojit
            {
                // x87 <-> xmm copy required
                //_nvprof("fpu-evict",1);
-                evict(r, ins);
+                evict(ins);
                r = registerAlloc(ins, prefer);
            } else
 #elif defined(NANOJIT_PPC)
            if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
                ((rmask(r)&FpRegs) && !(allow&FpRegs)))
            {
-                evict(r, ins);
+                evict(ins);
                r = registerAlloc(ins, prefer);
            } else
 #endif
@ -421,28 +417,27 @@ namespace nanojit
                // instruction: mov eax, ecx
                // post-state:  eax(ins)
                //
-                _allocator.retire(r);
                Register s = r;
+                _allocator.retire(r);
                r = registerAlloc(ins, prefer);
+
+                // 'ins' is in 'allow', in register r (different to the old r);
+                //  s is the old r.
                if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
-#ifdef NANOJIT_ARM
-                    MOV(s, r);  // ie. move 'ins' from its pre-state reg to its post-state reg
-#else
-                    MR(s, r);
-#endif
-                }
-                else {
+                    MR(s, r);   // move 'ins' from its pre-state reg (r) to its post-state reg (s)
+                } else {
                    asm_nongp_copy(s, r);
                }
            }
        }
+
        return r;
    }

    // Like findSpecificRegFor(), but only for when 'r' is known to be free
    // and 'ins' is known to not already have a register allocated.  Updates
-    // the register state (maintaining the invariants) but does not generate
-    // any code.  The return value is redundant, always being 'r', but it's
+    // the regstate (maintaining the invariants) but does not generate any
+    // code.  The return value is redundant, always being 'r', but it's
    // sometimes useful to have it there for assignments.
    Register Assembler::findSpecificRegForUnallocated(LIns* ins, Register r)
    {
@ -474,12 +469,11 @@ namespace nanojit
        return disp(ins);
    }

+    // XXX: this function is dangerous and should be phased out;
+    // See bug 513615.  Calls to it should replaced it with a
+    // prepareResultReg() / generate code / freeResourcesOf() sequence.
    Register Assembler::prepResultReg(LIns *ins, RegisterMask allow)
    {
-        // 'pop' is only relevant on i386 and if 'allow' includes FST0, in
-        // which case we have to pop if 'ins' isn't in FST0 in the post-state.
-        // This could be because 'ins' is unused, is in a spill slot, or is in
-        // an XMM register.
 #ifdef NANOJIT_IA32
        const bool pop = (allow & rmask(FST0)) &&
                         (ins->isUnusedOrHasUnknownReg() || ins->getReg() != FST0);
@ -491,6 +485,56 @@ namespace nanojit
        return r;
    }

+    // Finds a register in 'allow' to hold the result of 'ins'.  Also
+    // generates code to spill the result if necessary.  Called just prior to
+    // generating the code for 'ins' (because we generate code backwards).
+    //
+    // An example where no spill is necessary.  Lines marked '*' are those
+    // done by this function.
+    //
+    //   regstate:  R
+    //   asm:       define res into r
+    // * regstate:  R + r(res)
+    //              ...
+    //   asm:       use res in r
+    //
+    // An example where a spill is necessary.
+    //
+    //   regstate:  R
+    //   asm:       define res into r
+    // * regstate:  R + r(res)
+    // * asm:       spill res from r
+    //   regstate:  R
+    //              ...
+    //   asm:       restore res into r2
+    //   regstate:  R + r2(res) + other changes from "..."
+    //   asm:       use res in r2
+    //
+    Register Assembler::prepareResultReg(LIns *ins, RegisterMask allow)
+    {
+        // At this point, we know the result of 'ins' result has a use later
+        // in the code.  (Exception: if 'ins' is a call to an impure function
+        // the return value may not be used, but 'ins' will still be present
+        // because it has side-effects.)  It may have had to be evicted, in
+        // which case the restore will have already been generated, so we now
+        // generate the spill (unless the restore was actually a
+        // rematerialize, in which case it's not necessary).
+        //
+        // As for 'pop':  it's only relevant on i386 and if 'allow' includes
+        // FST0, in which case we have to pop if 'ins' isn't in FST0 in the
+        // post-regstate.  This could be because 'ins' is unused, 'ins' is in
+        // a spill slot, or 'ins' is in an XMM register.
+#ifdef NANOJIT_IA32
+        const bool pop = (allow & rmask(FST0)) &&
+                         (ins->isUnusedOrHasUnknownReg() || ins->getReg() != FST0);
+#else
+        const bool pop = false;
+#endif
+        Register r = findRegFor(ins, allow);
+        asm_spilli(ins, pop);
+        return r;
+    }
+
    void Assembler::asm_spilli(LInsp ins, bool pop)
    {
        int d = disp(ins);
@ -501,10 +545,7 @@ namespace nanojit
        asm_spill(r, d, pop, ins->isQuad());
    }

-    // NOTE: Because this function frees slots on the stack, it is not safe to
-    // follow a call to this with a call to anything which might spill a
-    // register, as the stack can be corrupted. Refer to bug 495239 for a more
-    // detailed description.
+    // XXX: This function is error-prone and should be phased out; see bug 513615.
    void Assembler::freeRsrcOf(LIns *ins, bool pop)
    {
        Register r = ins->getReg();
@ -520,44 +561,62 @@ namespace nanojit
        ins->markAsClear();
    }

-    // Frees 'r' in the RegAlloc state, if it's not already free.
+    // Frees all record of registers and spill slots used by 'ins'.
+    void Assembler::freeResourcesOf(LIns *ins)
+    {
+        Register r = ins->getReg();
+        if (isKnownReg(r)) {
+            _allocator.retire(r);   // free any register associated with entry
+        }
+        int arIndex = ins->getArIndex();
+        if (arIndex) {
+            NanoAssert(_activation.entry[arIndex] == ins);
+            arFree(arIndex);        // free any stack stack space associated with entry
+        }
+        ins->markAsClear();
+    }
+
+    // Frees 'r' in the RegAlloc regstate, if it's not already free.
    void Assembler::evictIfActive(Register r)
    {
        if (LIns* vic = _allocator.getActive(r)) {
-            evict(r, vic);
+            NanoAssert(vic->getReg() == r);
+            evict(vic);
        }
    }

-    // Frees 'r' (which currently holds the result of 'vic') in the RegAlloc
-    // state.  An example:
+    // Frees 'r' (which currently holds the result of 'vic') in the regstate.
+    // An example:
    //
-    //   pre-state:     eax(ld1)
+    //   pre-regstate:  eax(ld1)
    //   instruction:   mov ebx,-4(ebp) <= restore add1   # %ebx is dest
-    //   post-state:    eax(ld1) ebx(add1)
+    //   post-regstate: eax(ld1) ebx(add1)
    //
    // At run-time we are *restoring* 'add1' into %ebx, hence the call to
    // asm_restore().  But at regalloc-time we are moving backwards through
    // the code, so in that sense we are *evicting* 'add1' from %ebx.
    //
-    void Assembler::evict(Register r, LIns* vic)
+    void Assembler::evict(LIns* vic)
    {
        // Not free, need to steal.
        counter_increment(steals);

-        // Get vic's resv, check r matches.
+        Register r = vic->getReg();
+
        NanoAssert(!_allocator.isFree(r));
        NanoAssert(vic == _allocator.getActive(r));
-        NanoAssert(r == vic->getReg());

-        // Free r.
-        _allocator.retire(r);
-        vic->setReg(UnknownReg);
-
-        // Restore vic.
        verbose_only( if (_logc->lcbits & LC_Assembly) {
                        setOutputForEOL("  <= restore %s",
                        _thisfrag->lirbuf->names->formatRef(vic)); } )
        asm_restore(vic, r);
+
+        _allocator.retire(r);
+        if (vic->isUsed())
+            vic->setReg(UnknownReg);
+
+        // At this point 'vic' is unused (if rematerializable), or in a spill
+        // slot (if not).
    }

    void Assembler::patch(GuardRecord *lr)
@ -932,30 +991,38 @@ namespace nanojit
              we spill it!)  In particular, words like "before" and "after"
              must be used very carefully -- their meaning at regalloc-time is
              opposite to their meaning at run-time.  We use the term
-              "pre-state" to refer to the register allocation state that
-              occurs prior to an instruction's execution, and "post-state" to
-              refer to the state that occurs after an instruction's execution,
-              e.g.:
+              "pre-regstate" to refer to the register allocation state that
+              occurs prior to an instruction's execution, and "post-regstate"
+              to refer to the state that occurs after an instruction's
+              execution, e.g.:

-                pre-state:     ebx(ins)
+                pre-regstate:  ebx(ins)
                instruction:   mov eax, ebx     // mov dst, src
-                post-state:    eax(ins)
+                post-regstate: eax(ins)

-              At run-time, the instruction updates the pre-state into the
-              post-state (and these states are the real machine's states).
-              But when allocating registers, because we go backwards, the
-              pre-state is constructed from the post-state (and these states
-              are those stored in RegAlloc).
+              At run-time, the instruction updates the pre-regstate into the
+              post-regstate (and these states are the real machine's
+              regstates).  But when allocating registers, because we go
+              backwards, the pre-regstate is constructed from the
+              post-regstate (and these regstates are those stored in
+              RegAlloc).
+
+              One consequence of generating code backwards is that we tend to
+              both spill and restore registers as early (at run-time) as
+              possible;  this is good for tolerating memory latency.  If we
+              generated code forwards, we would expect to both spill and
+              restore registers as late (at run-time) as possible;  this might
+              be better for reducing register pressure.
            */
            bool required = ins->isStmt() || ins->isUsed();
            if (!required)
                continue;

 #ifdef NJ_VERBOSE
-            // Output the register post-state and/or activation post-state.
+            // Output the post-regstate (registers and/or activation).
            // Because asm output comes in reverse order, doing it now means
            // it is printed after the LIR and asm, exactly when the
-            // post-state should be shown.
+            // post-regstate should be shown.
            if ((_logc->lcbits & LC_Assembly) && (_logc->lcbits & LC_Activation))
                printActivationState();
            if ((_logc->lcbits & LC_Assembly) && (_logc->lcbits & LC_RegAlloc))
@ -999,18 +1066,18 @@ namespace nanojit
                    break;
                }

-                // allocate some stack space.  the value of this instruction
+                // Allocate some stack space.  The value of this instruction
                // is the address of the stack space.
                case LIR_alloc: {
                    countlir_alloc();
                    NanoAssert(ins->getArIndex() != 0);
                    Register r = ins->getReg();
                    if (isKnownReg(r)) {
+                        asm_restore(ins, r);
                        _allocator.retire(r);
                        ins->setReg(UnknownReg);
-                        asm_restore(ins, r);
                    }
-                    freeRsrcOf(ins, 0);
+                    freeResourcesOf(ins);
                    break;
                }
                case LIR_int:
@ -1268,7 +1335,7 @@ namespace nanojit
                    uint32_t count = ins->getTableSize();
                    bool has_back_edges = false;

-                    // Merge the register states of labels we have already seen.
+                    // Merge the regstates of labels we have already seen.
                    for (uint32_t i = count; i-- > 0;) {
                        LIns* to = ins->getTarget(i);
                        LabelState *lstate = _labels.get(to);
@ -1649,9 +1716,9 @@ namespace nanojit
        return true;
    }

-    uint32_t Assembler::arReserve(LIns* l)
+    uint32_t Assembler::arReserve(LIns* ins)
    {
-        int32_t size = l->isop(LIR_alloc) ? (l->size()>>2) : l->isQuad() ? 2 : 1;
+        int32_t size = ins->isop(LIR_alloc) ? (ins->size()>>2) : ins->isQuad() ? 2 : 1;
        AR &ar = _activation;
        const int32_t tos = ar.tos;
        int32_t start = ar.lowwatermark;
@ -1663,7 +1730,7 @@ namespace nanojit
            for (i=start; i < NJ_MAX_STACK_ENTRY; i++) {
                if (ar.entry[i] == 0) {
                    // found a hole
-                    ar.entry[i] = l;
+                    ar.entry[i] = ins;
                    break;
                }
            }
@ -1675,8 +1742,8 @@ namespace nanojit
                    // found 2 adjacent aligned slots
                    NanoAssert(ar.entry[i] == 0);
                    NanoAssert(ar.entry[i+stack_direction(1)] == 0);
-                    ar.entry[i] = l;
-                    ar.entry[i+stack_direction(1)] = l;
+                    ar.entry[i] = ins;
+                    ar.entry[i+stack_direction(1)] = ins;
                    break;
                }
            }
@ -1690,7 +1757,7 @@ namespace nanojit
                    // place the entry in the table and mark the instruction with it
                    for (int32_t j=0; j < size; j++) {
                        NanoAssert(ar.entry[i+stack_direction(j)] == 0);
-                        ar.entry[i+stack_direction(j)] = l;
+                        ar.entry[i+stack_direction(j)] = ins;
                    }
                    break;
                }
@ -1720,10 +1787,11 @@ namespace nanojit
        RegAlloc *regs = &_allocator;
        for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
            if (rmask(r) & GpRegs) {
-                LIns *i = regs->getActive(r);
-                if (i) {
-                    if (canRemat(i)) {
-                        evict(r, i);
+                LIns *ins = regs->getActive(r);
+                if (ins) {
+                    if (canRemat(ins)) {
+                        NanoAssert(ins->getReg() == r);
+                        evict(ins);
                    }
                    else {
                        int32_t pri = regs->getPriority(r);
@ -1748,8 +1816,8 @@ namespace nanojit
            // get the highest priority var
            Register hi = tosave[0];
            if (!(rmask(hi) & SavedRegs)) {
-                LIns *i = regs->getActive(hi);
-                Register r = findRegFor(i, allow);
+                LIns *ins = regs->getActive(hi);
+                Register r = findRegFor(ins, allow);
                allow &= ~rmask(r);
            }
            else {
@ -1799,7 +1867,7 @@ namespace nanojit
    }
    
    /**
-     * Merge the current state of the registers with a previously stored version
+     * Merge the current regstate with a previously stored version.
     * current == saved    skip
     * current & saved     evict current, keep saved
     * current & !saved    evict current  (unionRegisterState would keep)
@ -1836,7 +1904,8 @@ namespace nanojit
                if (curins) {
                    //_nvprof("intersect-evict",1);
                    verbose_only( shouldMention=true; )
-                    evict(r, curins);
+                    NanoAssert(curins->getReg() == r);
+                    evict(curins);
                }

                #ifdef NANOJIT_IA32
@ -1857,7 +1926,6 @@ namespace nanojit

    /**
     * Merge the current state of the registers with a previously stored version.
-     *
     * current == saved    skip
     * current & saved     evict current, keep saved
     * current & !saved    keep current (intersectRegisterState would evict)
@ -1882,7 +1950,8 @@ namespace nanojit
                if (curins && savedins) {
                    //_nvprof("union-evict",1);
                    verbose_only( shouldMention=true; )
-                    evict(r, curins);
+                    NanoAssert(curins->getReg() == r);
+                    evict(curins);
                }

                #ifdef NANOJIT_IA32
@ -1909,9 +1978,9 @@ namespace nanojit
        // now reassign mainline registers
        for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
        {
-            LIns *i = saved.getActive(r);
-            if (i && !(skip&rmask(r)))
-                findSpecificRegFor(i, r);
+            LIns *ins = saved.getActive(r);
+            if (ins && !(skip & rmask(r)))
+                findSpecificRegFor(ins, r);
        }
    }

@ -1919,22 +1988,22 @@ namespace nanojit
    // furthest in the future.
    LIns* Assembler::findVictim(RegisterMask allow)
    {
-        NanoAssert(allow != 0);
-        LIns *i, *a=0;
+        NanoAssert(allow);
+        LIns *ins, *vic = 0;
        int allow_pri = 0x7fffffff;
-        for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
+        for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
        {
-            if ((allow & rmask(r)) && (i = _allocator.getActive(r)) != 0)
+            if ((allow & rmask(r)) && (ins = _allocator.getActive(r)) != 0)
            {
-                int pri = canRemat(i) ? 0 : _allocator.getPriority(r);
-                if (!a || pri < allow_pri) {
-                    a = i;
+                int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
+                if (!vic || pri < allow_pri) {
+                    vic = ins;
                    allow_pri = pri;
                }
            }
        }
-        NanoAssert(a != 0);
-        return a;
+        NanoAssert(vic != 0);
+        return vic;
    }

 #ifdef NJ_VERBOSE
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@ -272,16 +272,18 @@ namespace nanojit
            void        assignSaved(RegAlloc &saved, RegisterMask skip);
            LInsp       findVictim(RegisterMask allow);

-            Register    getBaseReg(LOpcode op, LIns *i, int &d, RegisterMask allow);
+            Register    getBaseReg(LIns *i, int &d, RegisterMask allow);
            int         findMemFor(LIns* i);
            Register    findRegFor(LIns* i, RegisterMask allow);
            void        findRegFor2(RegisterMask allow, LIns* ia, Register &ra, LIns *ib, Register &rb);
            Register    findSpecificRegFor(LIns* i, Register r);
            Register    findSpecificRegForUnallocated(LIns* i, Register r);
            Register    prepResultReg(LIns *i, RegisterMask allow);
+            Register    prepareResultReg(LIns *i, RegisterMask allow);
            void        freeRsrcOf(LIns *i, bool pop);
+            void        freeResourcesOf(LIns *ins);
            void        evictIfActive(Register r);
-            void        evict(Register r, LIns* vic);
+            void        evict(LIns* vic);
            RegisterMask hint(LIns*i, RegisterMask allow);

            void        codeAlloc(NIns *&start, NIns *&end, NIns *&eip
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@ -227,7 +227,6 @@ namespace nanojit

    LInsp LirBufWriter::insStore(LOpcode op, LInsp val, LInsp base, int32_t d)
    {
-        base = insDisp(op, base, d);
        LInsSti* insSti = (LInsSti*)_buf->makeRoom(sizeof(LInsSti));
        LIns*    ins    = insSti->getLIns();
        ins->initLInsSti(op, val, base, d);
@ -268,7 +267,6 @@ namespace nanojit

    LInsp LirBufWriter::insLoad(LOpcode op, LInsp base, int32_t d)
    {
-        base = insDisp(op, base, d);
        LInsLd* insLd = (LInsLd*)_buf->makeRoom(sizeof(LInsLd));
        LIns*   ins   = insLd->getLIns();
        ins->initLInsLd(op, base, d);
@ -1119,15 +1117,6 @@ namespace nanojit
        return _hashfinish(_hash32(hash, uint32_t(a)));
    }

-    inline uint32_t LInsHashSet::hashImmf(double d) {
-        union {
-            double d;
-            uint64_t u64;
-        } u;
-        u.d = d;
-        return hashImmq(u.u64);
-    }
-
    inline uint32_t LInsHashSet::hash1(LOpcode op, LInsp a) {
        uint32_t hash = _hash8(0,uint8_t(op));
        return _hashfinish(_hashptr(hash, a));
@ -1236,22 +1225,15 @@ namespace nanojit
        return k;
    }

-    LInsp LInsHashSet::findImmf(double a, uint32_t &k)
+    LInsp LInsHashSet::findImmf(uint64_t a, uint32_t &k)
    {
-        // We must pun 'a' as a uint64_t otherwise 0 and -0 will be treated as
-        // equal, which breaks things (see bug 527288).
-        union {
-            double d;
-            uint64_t u64;
-        } u;
-        u.d = a;
        LInsHashKind kind = LInsImmf;
        const uint32_t bitmask = (m_cap[kind] - 1) & ~0x1;
-        uint32_t hash = hashImmf(a) & bitmask;
+        uint32_t hash = hashImmq(a) & bitmask;
        uint32_t n = 7 << 1;
        LInsp ins;
        while ((ins = m_list[kind][hash]) != NULL &&
-            (ins->imm64() != u.u64))
+            (ins->imm64() != a))
        {
            NanoAssert(ins->isconstf());
            hash = (hash + (n += 2)) & bitmask;        // quadratic probe
@ -1263,7 +1245,7 @@ namespace nanojit
    uint32_t LInsHashSet::findImmf(LInsp ins)
    {
        uint32_t k;
-        findImmf(ins->imm64f(), k);
+        findImmf(ins->imm64(), k);
        return k;
    }

@ -1693,10 +1675,27 @@ namespace nanojit
    }

    void LirNameMap::formatImm(int32_t c, char *buf) {
-        if (c >= 10000 || c <= -10000)
-            VMPI_sprintf(buf,"#%s",labels->format((void*)c));
-        else
+        if (-10000 < c || c < 10000) {
            VMPI_sprintf(buf,"%d", c);
+        } else {
+#if !defined NANOJIT_64BIT
+            VMPI_sprintf(buf, "%s", labels->format((void*)c));
+#else
+            VMPI_sprintf(buf, "0x%x", (unsigned int)c);
+#endif
+        }
+    }
+
+    void LirNameMap::formatImmq(uint64_t c, char *buf) {
+        if (-10000 < (int64_t)c || c < 10000) {
+            VMPI_sprintf(buf, "%dLL", (int)c);
+        } else {
+#if defined NANOJIT_64BIT
+            VMPI_sprintf(buf, "%s", labels->format((void*)c));
+#else
+            VMPI_sprintf(buf, "0x%llxLL", c);
+#endif
+        }
    }

    const char* LirNameMap::formatRef(LIns *ref)
@ -1711,11 +1710,7 @@ namespace nanojit
            VMPI_sprintf(buf, "%g", ref->imm64f());
        }
        else if (ref->isconstq()) {
-            int64_t c = ref->imm64();
-            if (c >= 10000 || c <= -10000)
-                VMPI_sprintf(buf, "#0x%llxLL", (long long unsigned int) c);
-            else
-                VMPI_sprintf(buf, "%dLL", (int)c);
+            formatImmq(ref->imm64(), buf);
        }
        else if (ref->isconst()) {
            formatImm(ref->imm32(), buf);
@ -1747,31 +1742,24 @@ namespace nanojit
        char sbuf[4096];
        char *s = sbuf;
        LOpcode op = i->opcode();
-        switch(op)
+        switch (op)
        {
            case LIR_int:
-            {
                VMPI_sprintf(s, "%s = %s %d", formatRef(i), lirNames[op], i->imm32());
                break;
-            }

-            case LIR_alloc: {
+            case LIR_alloc:
                VMPI_sprintf(s, "%s = %s %d", formatRef(i), lirNames[op], i->size());
                break;
-            }

            case LIR_quad:
-            {
-                VMPI_sprintf(s, "%s = %s #%X:%X /* %g */", formatRef(i), lirNames[op],
-                             i->imm64_1(), i->imm64_0(), i->imm64f());
+                VMPI_sprintf(s, "%s = %s %X:%X", formatRef(i), lirNames[op],
+                             i->imm64_1(), i->imm64_0());
                break;
-            }

            case LIR_float:
-            {
-                VMPI_sprintf(s, "%s = %s #%g", formatRef(i), lirNames[op], i->imm64f());
+                VMPI_sprintf(s, "%s = %s %g", formatRef(i), lirNames[op], i->imm64f());
                break;
-            }

            case LIR_start:
            case LIR_regfence:
@ -1796,7 +1784,7 @@ namespace nanojit
                break;
            }

-            case LIR_jtbl: {
+            case LIR_jtbl:
                VMPI_sprintf(s, "%s %s [ ", lirNames[op], formatRef(i->oprnd1()));
                for (uint32_t j = 0, n = i->getTableSize(); j < n; j++) {
                    if (VMPI_strlen(sbuf) + 50 > sizeof(sbuf)) {
@ -1811,7 +1799,6 @@ namespace nanojit
                s += VMPI_strlen(s);
                VMPI_sprintf(s, "]");
                break;
-            }

            case LIR_param: {
                uint32_t arg = i->paramArg();
@ -1982,7 +1969,11 @@ namespace nanojit
        LInsp ins = exprs->findImm(imm, k);
        if (ins)
            return ins;
-        return exprs->add(LInsImm, out->insImm(imm), k);
+        ins = out->insImm(imm);
+        // We assume that downstream stages do not modify the instruction, so
+        // that we can insert 'ins' into slot 'k'.  Check this.
+        NanoAssert(ins->opcode() == LIR_int && ins->imm32() == imm);
+        return exprs->add(LInsImm, ins, k);
    }

    LIns* CseFilter::insImmq(uint64_t q)
@ -1991,16 +1982,27 @@ namespace nanojit
        LInsp ins = exprs->findImmq(q, k);
        if (ins)
            return ins;
-        return exprs->add(LInsImmq, out->insImmq(q), k);
+        ins = out->insImmq(q);
+        NanoAssert(ins->opcode() == LIR_quad && ins->imm64() == q);
+        return exprs->add(LInsImmq, ins, k);
    }

    LIns* CseFilter::insImmf(double d)
    {
        uint32_t k;
-        LInsp ins = exprs->findImmf(d, k);
+        // We must pun 'd' as a uint64_t otherwise 0 and -0 will be treated as
+        // equal, which breaks things (see bug 527288).
+        union {
+            double d;
+            uint64_t u64;
+        } u;
+        u.d = d;
+        LInsp ins = exprs->findImmf(u.u64, k);
        if (ins)
            return ins;
-        return exprs->add(LInsImmf, out->insImmf(d), k);
+        ins = out->insImmf(d);
+        NanoAssert(ins->opcode() == LIR_float && ins->imm64() == u.u64);
+        return exprs->add(LInsImmf, ins, k);
    }

    LIns* CseFilter::ins0(LOpcode v)
@ -2017,7 +2019,9 @@ namespace nanojit
            LInsp ins = exprs->find1(v, a, k);
            if (ins)
                return ins;
-            return exprs->add(LIns1, out->ins1(v,a), k);
+            ins = out->ins1(v, a);
+            NanoAssert(ins->opcode() == v && ins->oprnd1() == a);
+            return exprs->add(LIns1, ins, k);
        }
        return out->ins1(v,a);
    }
@ -2029,7 +2033,9 @@ namespace nanojit
            LInsp ins = exprs->find2(v, a, b, k);
            if (ins)
                return ins;
-            return exprs->add(LIns2, out->ins2(v,a,b), k);
+            ins = out->ins2(v, a, b);
+            NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b);
+            return exprs->add(LIns2, ins, k);
        }
        return out->ins2(v,a,b);
    }
@ -2041,7 +2047,10 @@ namespace nanojit
        LInsp ins = exprs->find3(v, a, b, c, k);
        if (ins)
            return ins;
-        return exprs->add(LIns3, out->ins3(v,a,b,c), k);
+        ins = out->ins3(v, a, b, c);
+        NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b &&
+                                                               ins->oprnd3() == c);
+        return exprs->add(LIns3, ins, k);
    }

    LIns* CseFilter::insLoad(LOpcode v, LInsp base, int32_t disp)
@ -2051,9 +2060,11 @@ namespace nanojit
            LInsp ins = exprs->findLoad(v, base, disp, k);
            if (ins)
                return ins;
-            return exprs->add(LInsLoad, out->insLoad(v,base,disp), k);
+            ins = out->insLoad(v, base, disp);
+            NanoAssert(ins->opcode() == v && ins->oprnd1() == base && ins->disp() == disp);
+            return exprs->add(LInsLoad, ins, k);
        }
-        return out->insLoad(v,base,disp);
+        return out->insLoad(v, base, disp);
    }

    LInsp CseFilter::insGuard(LOpcode v, LInsp c, GuardRecord *gr)
@ -2081,7 +2092,9 @@ namespace nanojit
            LInsp ins = exprs->find1(v, c, k);
            if (ins)
                return 0;
-            return exprs->add(LIns1, out->insGuard(v,c,gr), k);
+            ins = out->insGuard(v, c, gr);
+            NanoAssert(ins->opcode() == v && ins->oprnd1() == c);
+            return exprs->add(LIns1, ins, k);
        }
        return out->insGuard(v, c, gr);
    }
@ -2094,7 +2107,9 @@ namespace nanojit
            LInsp ins = exprs->findCall(ci, argc, args, k);
            if (ins)
                return ins;
-            return exprs->add(LInsCall, out->insCall(ci, args), k);
+            ins = out->insCall(ci, args);
+            NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
+            return exprs->add(LInsCall, ins, k);
        }
        return out->insCall(ci, args);
    }
@ -2255,7 +2270,8 @@ namespace nanojit
                    LInsp ins = exprs->findLoad(v, base, disp, k);
                    if (ins)
                        return ins;
-                    return exprs->add(LInsLoad, out->insLoad(v,base,disp), k);
+                    ins = out->insLoad(v, base, disp);
+                    return exprs->add(LInsLoad, ins, k);
                }
                default:
                    // fall thru
@ -2318,7 +2334,7 @@ namespace nanojit
            const void *end = (const char*)start + e->size;
            const char *name = e->name;
            if (p == start) {
-                VMPI_sprintf(b,"%p %s",p,name);
+                VMPI_sprintf(b, "%p %s", p, name);
                return dup(b);
            }
            else if (p > start && p < end) {
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@ -51,6 +51,51 @@
 */
 namespace nanojit
 {
+    enum LOpcode
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+#pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
+          : unsigned
+#endif
+    {
+#define OPDEF(op, number, repKind, retType) \
+        LIR_##op = (number),
+#include "LIRopcode.tbl"
+        LIR_sentinel,
+#undef OPDEF
+
+#ifdef NANOJIT_64BIT
+#  define PTR_SIZE(a,b)  b
+#else
+#  define PTR_SIZE(a,b)  a
+#endif
+
+        // pointer op aliases
+        LIR_ldp     = PTR_SIZE(LIR_ld,     LIR_ldq),
+        LIR_ldcp    = PTR_SIZE(LIR_ldc,    LIR_ldqc),
+        LIR_stpi    = PTR_SIZE(LIR_sti,    LIR_stqi),
+        LIR_piadd   = PTR_SIZE(LIR_add,    LIR_qiadd),
+        LIR_piand   = PTR_SIZE(LIR_and,    LIR_qiand),
+        LIR_pilsh   = PTR_SIZE(LIR_lsh,    LIR_qilsh),
+        LIR_pirsh   = PTR_SIZE(LIR_rsh,    LIR_qirsh),
+        LIR_pursh   = PTR_SIZE(LIR_ush,    LIR_qursh),
+        LIR_pcmov   = PTR_SIZE(LIR_cmov,   LIR_qcmov),
+        LIR_pior    = PTR_SIZE(LIR_or,     LIR_qior),
+        LIR_pxor    = PTR_SIZE(LIR_xor,    LIR_qxor),
+        LIR_addp    = PTR_SIZE(LIR_iaddp,  LIR_qaddp),
+        LIR_peq     = PTR_SIZE(LIR_eq,     LIR_qeq),
+        LIR_plt     = PTR_SIZE(LIR_lt,     LIR_qlt),
+        LIR_pgt     = PTR_SIZE(LIR_gt,     LIR_qgt),
+        LIR_ple     = PTR_SIZE(LIR_le,     LIR_qle),
+        LIR_pge     = PTR_SIZE(LIR_ge,     LIR_qge),
+        LIR_pult    = PTR_SIZE(LIR_ult,    LIR_qult),
+        LIR_pugt    = PTR_SIZE(LIR_ugt,    LIR_qugt),
+        LIR_pule    = PTR_SIZE(LIR_ule,    LIR_qule),
+        LIR_puge    = PTR_SIZE(LIR_uge,    LIR_quge),
+        LIR_alloc   = PTR_SIZE(LIR_ialloc, LIR_qalloc),
+        LIR_pcall   = PTR_SIZE(LIR_icall,  LIR_qcall),
+        LIR_param   = PTR_SIZE(LIR_iparam, LIR_qparam)
+    };
+
    struct GuardRecord;
    struct SideExit;

@ -955,14 +1000,6 @@ namespace nanojit

    class LirWriter
    {
-    protected:
-        LInsp insDisp(LOpcode op, LInsp base, int32_t& d) {
-            if (!isValidDisplacement(op, d)) {
-                base = ins2i(LIR_piadd, base, d);
-                d = 0;
-            }
-            return base;
-        }
    public:
        LirWriter *out;

@ -1100,6 +1137,7 @@ namespace nanojit
        };
        HashMap<LInsp, Entry*> names;
        void formatImm(int32_t c, char *buf);
+        void formatImmq(uint64_t c, char *buf);

    public:
        LabelMap *labels;
@ -1263,7 +1301,6 @@ namespace nanojit

        static uint32_t hashImm(int32_t);
        static uint32_t hashImmq(uint64_t);
-        static uint32_t hashImmf(double);
        static uint32_t hash1(LOpcode v, LInsp);
        static uint32_t hash2(LOpcode v, LInsp, LInsp);
        static uint32_t hash3(LOpcode v, LInsp, LInsp, LInsp);
@ -1290,7 +1327,7 @@ namespace nanojit
        // These public versions are used before an LIns has been created.
        LInsp findImm(int32_t a, uint32_t &k);
        LInsp findImmq(uint64_t a, uint32_t &k);
-        LInsp findImmf(double d, uint32_t &k);
+        LInsp findImmf(uint64_t d, uint32_t &k);
        LInsp find1(LOpcode v, LInsp a, uint32_t &k);
        LInsp find2(LOpcode v, LInsp a, LInsp b, uint32_t &k);
        LInsp find3(LOpcode v, LInsp a, LInsp b, LInsp c, uint32_t &k);
--- a/js/src/nanojit/Native.h
+++ b/js/src/nanojit/Native.h
@ -54,53 +54,6 @@
 #  define IF_PEDANTIC(...)
 #endif

-namespace nanojit {
-    enum LOpcode
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-#pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
-          : unsigned
-#endif
-    {
-#define OPDEF(op, number, repKind, retType) \
-        LIR_##op = (number),
-#include "LIRopcode.tbl"
-        LIR_sentinel,
-#undef OPDEF
-
-#ifdef NANOJIT_64BIT
-#  define PTR_SIZE(a,b)  b
-#else
-#  define PTR_SIZE(a,b)  a
-#endif
-
-        // pointer op aliases
-        LIR_ldp     = PTR_SIZE(LIR_ld,     LIR_ldq),
-        LIR_ldcp    = PTR_SIZE(LIR_ldc,    LIR_ldqc),
-        LIR_stpi    = PTR_SIZE(LIR_sti,    LIR_stqi),
-        LIR_piadd   = PTR_SIZE(LIR_add,    LIR_qiadd),
-        LIR_piand   = PTR_SIZE(LIR_and,    LIR_qiand),
-        LIR_pilsh   = PTR_SIZE(LIR_lsh,    LIR_qilsh),
-        LIR_pirsh   = PTR_SIZE(LIR_rsh,    LIR_qirsh),
-        LIR_pursh   = PTR_SIZE(LIR_ush,    LIR_qursh),
-        LIR_pcmov   = PTR_SIZE(LIR_cmov,   LIR_qcmov),
-        LIR_pior    = PTR_SIZE(LIR_or,     LIR_qior),
-        LIR_pxor    = PTR_SIZE(LIR_xor,    LIR_qxor),
-        LIR_addp    = PTR_SIZE(LIR_iaddp,  LIR_qaddp),
-        LIR_peq     = PTR_SIZE(LIR_eq,     LIR_qeq),
-        LIR_plt     = PTR_SIZE(LIR_lt,     LIR_qlt),
-        LIR_pgt     = PTR_SIZE(LIR_gt,     LIR_qgt),
-        LIR_ple     = PTR_SIZE(LIR_le,     LIR_qle),
-        LIR_pge     = PTR_SIZE(LIR_ge,     LIR_qge),
-        LIR_pult    = PTR_SIZE(LIR_ult,    LIR_qult),
-        LIR_pugt    = PTR_SIZE(LIR_ugt,    LIR_qugt),
-        LIR_pule    = PTR_SIZE(LIR_ule,    LIR_qule),
-        LIR_puge    = PTR_SIZE(LIR_uge,    LIR_quge),
-        LIR_alloc   = PTR_SIZE(LIR_ialloc, LIR_qalloc),
-        LIR_pcall   = PTR_SIZE(LIR_icall,  LIR_qcall),
-        LIR_param   = PTR_SIZE(LIR_iparam, LIR_qparam)
-    };
-}
-
 #ifdef NANOJIT_IA32
 #include "Nativei386.h"
 #elif defined(NANOJIT_ARM)
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@ -1227,11 +1227,11 @@ Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
        findRegFor2(GpRegs, value, ra, base, rb);
    }

-    if (!isS12(dr)) {
+    if (isU12(-dr) || isU12(dr)) {
+        STR(ra, rb, dr);
+    } else {
        STR(ra, IP, 0);
        asm_add_imm(IP, rb, dr);
-    } else {
-        STR(ra, rb, dr);
    }
 }

@ -1912,7 +1912,7 @@ Assembler::asm_ld_imm(Register d, int32_t imm, bool chk /* = true */)
        ++_nSlot;
        offset += sizeof(_nSlot);
    }
-    NanoAssert(isS12(offset) && (offset <= -8));
+    NanoAssert((isU12(-offset) || isU12(offset)) && (offset <= -8));

    // Write the literal.
    *(_nSlot++) = imm;
@ -2194,15 +2194,11 @@ Assembler::asm_cmp(LIns *cond)
    // ready to issue the compare
    if (rhs->isconst()) {
        int c = rhs->imm32();
+        Register r = findRegFor(lhs, GpRegs);
        if (c == 0 && cond->isop(LIR_eq)) {
-            Register r = findRegFor(lhs, GpRegs);
-            TST(r,r);
-            // No 64-bit immediates so fall-back to below
-        } else if (!rhs->isQuad()) {
-            Register r = getBaseReg(condop, lhs, c, GpRegs);
-            asm_cmpi(r, c);
+            TST(r, r);
        } else {
-            NanoAssert(0);
+            asm_cmpi(r, c);
        }
    } else {
        Register ra, rb;
@ -2490,22 +2486,39 @@ Assembler::asm_load32(LInsp ins)
    int d = ins->disp();

    Register rr = prepResultReg(ins, GpRegs);
-    Register ra = getBaseReg(op, base, d, GpRegs);
+    Register ra = getBaseReg(base, d, GpRegs);

-    switch(op) {
+    switch (op) {
        case LIR_ldzb:
        case LIR_ldcb:
-            LDRB(rr, ra, d);
+            if (isU12(-d) || isU12(d)) {
+                LDRB(rr, ra, d);
+            } else {
+                LDRB(rr, IP, 0);
+                asm_add_imm(IP, ra, d);
+            }
            return;
        case LIR_ldzs:
        case LIR_ldcs:
-            // these are expected to be 2 or 4-byte aligned
-            LDRH(rr, ra, d);
+            // These are expected to be 2-byte aligned.  (Not all ARM machines
+            // can handle unaligned accesses.)
+            // Similar to the ldcb/ldzb case, but the max offset is smaller.
+            if (isU8(-d) || isU8(d)) {
+                LDRH(rr, ra, d);
+            } else {
+                LDRH(rr, IP, 0);
+                asm_add_imm(IP, ra, d);
+            }
            return;
        case LIR_ld:
        case LIR_ldc:
-            // these are expected to be 4-byte aligned
-            LDR(rr, ra, d);
+            // These are expected to be 4-byte aligned.
+            if (isU12(-d) || isU12(d)) {
+                LDR(rr, ra, d);
+            } else {
+                LDR(rr, IP, 0);
+                asm_add_imm(IP, ra, d);
+            }
            return;
        case LIR_ldsb:
        case LIR_ldss:
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@ -182,15 +182,8 @@ static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5
 static const RegisterMask GpRegs = 0xFFFF;
 static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;

-#define isS12(offs) ((-(1<<12)) <= (offs) && (offs) < (1<<12))
 #define isU12(offs) (((offs) & 0xfff) == (offs))

-static inline bool isValidDisplacement(LOpcode op, int32_t d) {
-    if (op == LIR_ldcs)
-        return (d >= 0) ? isU8(d) : isU8(-d);
-    return isS12(d);
-}
-
 #define IsFpReg(_r)     ((rmask((Register)_r) & (FpRegs)) != 0)
 #define IsGpReg(_r)     ((rmask((Register)_r) & (GpRegs)) != 0)
 #define FpRegNum(_fpr)  ((_fpr) - FirstFloatReg)
@ -664,7 +657,7 @@ enum {

 #define STR(_d,_n,_off) do {                                            \
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
-        NanoAssert(isS12(_off));                                        \
+        NanoAssert(isU12(_off) || isU12(-_off));                        \
        underrunProtect(4);                                             \
        if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
        else            *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
--- a/js/src/nanojit/NativePPC.cpp
+++ b/js/src/nanojit/NativePPC.cpp
@ -144,7 +144,7 @@ namespace nanojit
        LIns* base = ins->oprnd1();
        int d = ins->disp();
        Register rr = prepResultReg(ins, GpRegs);
-        Register ra = getBaseReg(ins->opcode(), base, d, GpRegs);
+        Register ra = getBaseReg(base, d, GpRegs);

        switch(ins->opcode()) {
            case LIR_ldzb:
@ -204,7 +204,7 @@ namespace nanojit
        }

        Register rs = findRegFor(value, GpRegs);
-        Register ra = value == base ? rs : getBaseReg(LIR_sti, base, dr, GpRegs & ~rmask(rs));
+        Register ra = value == base ? rs : getBaseReg(base, dr, GpRegs & ~rmask(rs));

    #if !PEDANTIC
        if (isS16(dr)) {
@ -250,7 +250,7 @@ namespace nanojit
    #endif

        int dr = ins->disp();
-        Register ra = getBaseReg(ins->opcode(), base, dr, GpRegs);
+        Register ra = getBaseReg(base, dr, GpRegs);

    #ifdef NANOJIT_64BIT
        if (rmask(rr) & GpRegs) {
@ -325,7 +325,7 @@ namespace nanojit
                return;
        }

-        Register ra = getBaseReg(LIR_stqi, base, dr, GpRegs);
+        Register ra = getBaseReg(base, dr, GpRegs);

    #if !PEDANTIC && !defined NANOJIT_64BIT
        if (value->isop(LIR_quad) && isS16(dr) && isS16(dr+4)) {
--- a/js/src/nanojit/NativePPC.h
+++ b/js/src/nanojit/NativePPC.h
@ -258,9 +258,6 @@ namespace nanojit
    static const int NumSavedRegs = 18; // R13-R30
 #endif

-    static inline bool isValidDisplacement(LOpcode, int32_t) {
-        return true;
-    }
    static inline bool IsFpReg(Register r) {
        return r >= F0;
    }
--- a/js/src/nanojit/NativeSparc.cpp
+++ b/js/src/nanojit/NativeSparc.cpp
@ -325,7 +325,7 @@ namespace nanojit
        underrunProtect(20);
        if (value->isconst())
            {
-                Register rb = getBaseReg(LIR_sti, base, dr, GpRegs);
+                Register rb = getBaseReg(base, dr, GpRegs);
                int c = value->imm32();
                STW32(L2, dr, rb);
                SET32(c, L2);
@ -584,12 +584,11 @@ namespace nanojit
        if (rhs->isconst())
            {
                int c = rhs->imm32();
+                Register r = findRegFor(lhs, GpRegs);
                if (c == 0 && cond->isop(LIR_eq)) {
-                    Register r = findRegFor(lhs, GpRegs);
                    ANDCC(r, r, G0);
                }
-                else if (!rhs->isQuad()) {
-                    Register r = getBaseReg(condop, lhs, c, GpRegs);
+                else {
                    SUBCC(r, L2, G0);
                    SET32(c, L2);
                }
@ -770,7 +769,7 @@ namespace nanojit
        LIns* base = ins->oprnd1();
        int d = ins->disp();
        Register rr = prepResultReg(ins, GpRegs);
-        Register ra = getBaseReg(ins->opcode(), base, d, GpRegs);
+        Register ra = getBaseReg(base, d, GpRegs);
        switch(op) {
            case LIR_ldzb:
            case LIR_ldcb:
--- a/js/src/nanojit/NativeSparc.h
+++ b/js/src/nanojit/NativeSparc.h
@ -191,10 +191,6 @@ namespace nanojit
    1<<F22;
    static const RegisterMask AllowableFlagRegs = GpRegs;

-    static inline bool isValidDisplacement(LOpcode, int32_t) {
-        return true;
-    }
-
    verbose_only( extern const char* regNames[]; )

 #define DECLARE_PLATFORM_STATS()
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@ -631,7 +631,7 @@ namespace nanojit
            // To make sure floating point operations stay in FPU registers
            // as much as possible, make sure that only a few opcodes are
            // reserving GPRs.
-            NanoAssert(a->isop(LIR_quad) || a->isop(LIR_ldq) || a->isop(LIR_ldqc)|| a->isop(LIR_u2f) || a->isop(LIR_float));
+            NanoAssert(a->isop(LIR_quad) || a->isop(LIR_ldq) || a->isop(LIR_ldqc)|| a->isop(LIR_ld32f) || a->isop(LIR_ldc32f)|| a->isop(LIR_u2f) || a->isop(LIR_float));
            allow &= ~rmask(rr);
            ra = findRegFor(a, allow);
        } else {
@ -1368,7 +1368,7 @@ namespace nanojit
    void Assembler::regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &dr, Register &rb) {
        dr = ins->disp();
        LIns *base = ins->oprnd1();
-        rb = getBaseReg(ins->opcode(), base, dr, BaseRegs);
+        rb = getBaseReg(base, dr, BaseRegs);
        if (ins->isUnusedOrHasUnknownReg() || !(allow & rmask(ins->getReg()))) {
            rr = prepResultReg(ins, allow & ~rmask(rb));
        } else {
@ -1446,7 +1446,7 @@ namespace nanojit
    void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
        NanoAssert(value->isQuad());

-        Register b = getBaseReg(LIR_stqi, base, d, BaseRegs);
+        Register b = getBaseReg(base, d, BaseRegs);
        Register r;

        // if we have to choose a register, use a GPR, but not the base reg
@ -1516,7 +1516,7 @@ namespace nanojit
                        GpRegs;

        NanoAssert(!value->isQuad());
-        Register b = getBaseReg(LIR_sti, base, d, BaseRegs);
+        Register b = getBaseReg(base, d, BaseRegs);
        Register r = findRegFor(value, SrcRegs & ~rmask(b));

        switch (op) {
@ -1787,6 +1787,13 @@ namespace nanojit
            }
        }

+        // profiling for the exit
+        verbose_only(
+           if (_logc->lcbits & LC_FragProfile) {
+              asm_inc_m32( &guard->record()->profCount );
+           }
+        )
+
        MR(RSP, RBP);

        // return value is GuardRecord*
@ -1861,9 +1868,17 @@ namespace nanojit
    // Increment the 32-bit profiling counter at pCtr, without
    // changing any registers.
    verbose_only(
-    void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
+    void Assembler::asm_inc_m32(uint32_t* pCtr)
    {
-        // todo: implement this
+        // Not as simple as on x86.  We need to temporarily free up a
+        // register into which to generate the address, so just push
+        // it on the stack.  This assumes that the scratch area at
+        // -8(%rsp) .. -1(%esp) isn't being used for anything else
+        // at this point.
+        emitr(X64_popr, RAX);             // popq    %rax
+        emit(X64_inclmRAX);               // incl    (%rax)
+        asm_quad(RAX, (uint64_t)pCtr);    // movabsq $pCtr, %rax
+        emitr(X64_pushr, RAX);            // pushq   %rax
    }
    )

--- a/js/src/nanojit/NativeX64.h
+++ b/js/src/nanojit/NativeX64.h
@ -307,6 +307,7 @@ namespace nanojit
        X64_xorps   = 0xC0570F4000000004LL, // 128bit xor xmm (four packed singles), one byte shorter
        X64_xorpsm  = 0x05570F4000000004LL, // 128bit xor xmm, [rip+disp32]
        X64_xorpsa  = 0x2504570F40000005LL, // 128bit xor xmm, [disp32]
+        X64_inclmRAX= 0x00FF000000000002LL, // incl (%rax)
        X64_jmpx    = 0xC524ff4000000004LL, // jmp [d32+x*8]
        X64_jmpxb   = 0xC024ff4000000004LL, // jmp [b+x*8]

@ -329,9 +330,6 @@ namespace nanojit
    static const int NumArgRegs = 6;
 #endif

-    static inline bool isValidDisplacement(LOpcode, int32_t) {
-        return true;
-    }
    static inline bool IsFpReg(Register r) {
        return ((1<<r) & FpRegs) != 0;
    }
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@ -157,10 +157,6 @@ namespace nanojit

    static const RegisterMask AllowableFlagRegs = 1<<EAX |1<<ECX | 1<<EDX | 1<<EBX;

-    static inline bool isValidDisplacement(LOpcode, int32_t) {
-        return true;
-    }
-
    #define _rmask_(r)      (1<<(r))
    #define _is_xmm_reg_(r) ((_rmask_(r)&XmmRegs)!=0)
    #define _is_x87_reg_(r) ((_rmask_(r)&x87Regs)!=0)
--- a/js/src/trace-test/tests/basic/bug535760.js
+++ b/js/src/trace-test/tests/basic/bug535760.js
@ -0,0 +1,13 @@
+/* vim: set ts=4 sw=4 tw=99 et: */
+function foundit(items, n) {
+    for (var i = 0; i < 10; i++)
+        arguments[2](items, this);
+}
+
+function dostuff() {
+    print(this);
+}
+foundit('crab', 'crab', dostuff);
+
+/* Don't crash or assert */
+
--- a/js/src/trace-test/trace-test.py
+++ b/js/src/trace-test/trace-test.py
@ -121,6 +121,7 @@ def run_test(test, lib_dir):
        any([os.path.exists(os.path.join(d, 'valgrind'))
             for d in os.environ['PATH'].split(os.pathsep)])):
        valgrind_prefix = [ 'valgrind',
+                            '-q',
                            '--smc-check=all',
                            '--error-exitcode=1',
                            '--leak-check=full']
--- a/toolkit/xre/nsSigHandlers.cpp
+++ b/toolkit/xre/nsSigHandlers.cpp
@ -361,7 +361,7 @@ void InstallSignalHandlers(const char *ProgramName)
 #define MXCSR(ctx) (ctx)->MxCsr
 #endif

-#if defined(_M_IA32) || define(_M_X64)
+#if defined(_M_IA32) || defined(_M_X64)

 #define X87CW(ctx) (ctx)->FloatSave.ControlWord
 #define X87SW(ctx) (ctx)->FloatSave.StatusWord