diff --git a/js/src/nanojit/Assembler.cpp b/js/src/nanojit/Assembler.cpp index 6b7e30a19d7..b3f62b2a7b6 100755 --- a/js/src/nanojit/Assembler.cpp +++ b/js/src/nanojit/Assembler.cpp @@ -80,6 +80,7 @@ namespace nanojit , _config(config) { VMPI_memset(&_stats, 0, sizeof(_stats)); + VMPI_memset(lookahead, 0, N_LOOKAHEAD * sizeof(LInsp)); nInit(core); (void)logc; verbose_only( _logc = logc; ) @@ -1208,67 +1209,77 @@ namespace nanojit NanoAssert(_thisfrag->nStaticExits == 0); // The trace must end with one of these opcodes. - NanoAssert(reader->pos()->isop(LIR_x) || - reader->pos()->isop(LIR_xtbl) || - reader->pos()->isRet() || - reader->pos()->isLive()); + NanoAssert(reader->finalIns()->isop(LIR_x) || + reader->finalIns()->isop(LIR_xtbl) || + reader->finalIns()->isRet() || + reader->finalIns()->isLive()); InsList pending_lives(alloc); NanoAssert(!error()); - for (LInsp ins = reader->read(); !ins->isop(LIR_start); ins = reader->read()) + + // What's going on here: we're visiting all the LIR instructions in + // the buffer, working strictly backwards in buffer-order, and + // generating machine instructions for them as we go. + // + // For each LIns, we first determine whether it's actually necessary, + // and if not skip it. Otherwise we generate code for it. There are + // two kinds of "necessary" instructions: + // + // - "Statement" instructions, which have side effects. Anything that + // could change control flow or the state of memory. + // + // - "Value" or "expression" instructions, which compute a value based + // only on the operands to the instruction (and, in the case of + // loads, the state of memory). Because we visit instructions in + // reverse order, if some previously visited instruction uses the + // value computed by this instruction, then this instruction will + // already have a register assigned to hold that value. Hence we + // can consult the instruction to detect whether its value is in + // fact used (i.e. not dead). + // + // Note that the backwards code traversal can make register allocation + // confusing. (For example, we restore a value before we spill it!) + // In particular, words like "before" and "after" must be used very + // carefully -- their meaning at regalloc-time is opposite to their + // meaning at run-time. We use the term "pre-regstate" to refer to + // the register allocation state that occurs prior to an instruction's + // execution, and "post-regstate" to refer to the state that occurs + // after an instruction's execution, e.g.: + // + // pre-regstate: ebx(ins) + // instruction: mov eax, ebx // mov dst, src + // post-regstate: eax(ins) + // + // At run-time, the instruction updates the pre-regstate into the + // post-regstate (and these states are the real machine's regstates). + // But when allocating registers, because we go backwards, the + // pre-regstate is constructed from the post-regstate (and these + // regstates are those stored in RegAlloc). + // + // One consequence of generating code backwards is that we tend to + // both spill and restore registers as early (at run-time) as + // possible; this is good for tolerating memory latency. If we + // generated code forwards, we would expect to both spill and restore + // registers as late (at run-time) as possible; this might be better + // for reducing register pressure. + // + // Another thing to note: we provide N_LOOKAHEAD instruction's worth + // of lookahead because it's useful for backends. This is nice and + // easy because once read() gets to the LIR_start at the beginning of + // the buffer it'll just keep regetting it. + + for (int32_t i = 0; i < N_LOOKAHEAD; i++) + lookahead[i] = reader->read(); + + while (!lookahead[0]->isop(LIR_start)) { - /* What's going on here: we're visiting all the LIR instructions - in the buffer, working strictly backwards in buffer-order, and - generating machine instructions for them as we go. + LInsp ins = lookahead[0]; // give it a shorter name for local use + LOpcode op = ins->opcode(); - For each LIns, we first determine whether it's actually - necessary, and if not skip it. Otherwise we generate code for - it. There are two kinds of "necessary" instructions: - - - "Statement" instructions, which have side effects. Anything - that could change control flow or the state of memory. - - - "Value" or "expression" instructions, which compute a value - based only on the operands to the instruction (and, in the - case of loads, the state of memory). Because we visit - instructions in reverse order, if some previously visited - instruction uses the value computed by this instruction, then - this instruction will already have a register assigned to - hold that value. Hence we can consult the instruction to - detect whether its value is in fact used (i.e. not dead). - - Note that the backwards code traversal can make register - allocation confusing. (For example, we restore a value before - we spill it!) In particular, words like "before" and "after" - must be used very carefully -- their meaning at regalloc-time is - opposite to their meaning at run-time. We use the term - "pre-regstate" to refer to the register allocation state that - occurs prior to an instruction's execution, and "post-regstate" - to refer to the state that occurs after an instruction's - execution, e.g.: - - pre-regstate: ebx(ins) - instruction: mov eax, ebx // mov dst, src - post-regstate: eax(ins) - - At run-time, the instruction updates the pre-regstate into the - post-regstate (and these states are the real machine's - regstates). But when allocating registers, because we go - backwards, the pre-regstate is constructed from the - post-regstate (and these regstates are those stored in - RegAlloc). - - One consequence of generating code backwards is that we tend to - both spill and restore registers as early (at run-time) as - possible; this is good for tolerating memory latency. If we - generated code forwards, we would expect to both spill and - restore registers as late (at run-time) as possible; this might - be better for reducing register pressure. - */ bool required = ins->isStmt() || ins->isUsed(); if (!required) - continue; + goto end_of_loop; #ifdef NJ_VERBOSE // Output the post-regstate (registers and/or activation). @@ -1281,8 +1292,7 @@ namespace nanojit printRegState(); #endif - LOpcode op = ins->opcode(); - switch(op) + switch (op) { default: NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op); @@ -1851,6 +1861,11 @@ namespace nanojit // check that all is well (don't check in exit paths since its more complicated) debug_only( pageValidate(); ) debug_only( resourceConsistencyCheck(); ) + + end_of_loop: + for (int32_t i = 1; i < N_LOOKAHEAD; i++) + lookahead[i-1] = lookahead[i]; + lookahead[N_LOOKAHEAD-1] = reader->read(); } } diff --git a/js/src/nanojit/Assembler.h b/js/src/nanojit/Assembler.h index cf36b617a4d..e0f47e5060c 100644 --- a/js/src/nanojit/Assembler.h +++ b/js/src/nanojit/Assembler.h @@ -413,6 +413,13 @@ namespace nanojit NIns* pedanticTop; #endif + + // Instruction lookahead in gen(). lookahead[0] is the current + // instruction. Nb: lookahead[1..N_LOOKAHEAD] may include dead + // instructions, but we won't know that they're dead yet. + static const int N_LOOKAHEAD = 3; + LInsp lookahead[N_LOOKAHEAD]; + AR _activation; RegAlloc _allocator; diff --git a/js/src/nanojit/LIR.cpp b/js/src/nanojit/LIR.cpp index 2a71fa4f697..9a0caad6f33 100644 --- a/js/src/nanojit/LIR.cpp +++ b/js/src/nanojit/LIR.cpp @@ -136,13 +136,18 @@ namespace nanojit LInsp ReverseLister::read() { - LInsp i = in->read(); + // This check is necessary to avoid printing the LIR_start multiple + // times due to lookahead in Assembler::gen(). + if (_prevIns && _prevIns->isop(LIR_start)) + return _prevIns; + LInsp ins = in->read(); InsBuf b; - const char* str = _printer->formatIns(&b, i); + const char* str = _printer->formatIns(&b, ins); char* cpy = new (_alloc) char[strlen(str)+1]; VMPI_strcpy(cpy, str); _strs.insert(cpy); - return i; + _prevIns = ins; + return ins; } #endif @@ -402,20 +407,20 @@ namespace nanojit 0 }; - // Check the invariant: _i never points to a skip. - NanoAssert(_i && !_i->isop(LIR_skip)); + // Check the invariant: _ins never points to a skip. + NanoAssert(_ins && !_ins->isop(LIR_skip)); // Step back one instruction. Use a table lookup rather than a switch // to avoid branch mispredictions. LIR_start is given a special size // of zero so that we don't step back past the start of the block. // (Callers of this function should stop once they see a LIR_start.) - LInsp ret = _i; - _i = (LInsp)(uintptr_t(_i) - insSizes[_i->opcode()]); + LInsp ret = _ins; + _ins = (LInsp)(uintptr_t(_ins) - insSizes[_ins->opcode()]); - // Ensure _i doesn't end up pointing to a skip. - while (_i->isop(LIR_skip)) { - NanoAssert(_i->prevLIns() != _i); - _i = _i->prevLIns(); + // Ensure _ins doesn't end up pointing to a skip. + while (_ins->isop(LIR_skip)) { + NanoAssert(_ins->prevLIns() != _ins); + _ins = _ins->prevLIns(); } return ret; @@ -1567,7 +1572,7 @@ namespace nanojit uint32_t exits = 0; int total = 0; if (frag->lirbuf->state) - live.add(frag->lirbuf->state, in->pos()); + live.add(frag->lirbuf->state, in->finalIns()); for (LInsp ins = in->read(); !ins->isop(LIR_start); ins = in->read()) { total++; diff --git a/js/src/nanojit/LIR.h b/js/src/nanojit/LIR.h index 7f16a6b7f58..def1b45a8a2 100644 --- a/js/src/nanojit/LIR.h +++ b/js/src/nanojit/LIR.h @@ -891,7 +891,7 @@ namespace nanojit // Note, this assumes that loads will never fault and hence cannot // affect the control flow. bool isStmt() { - NanoAssert(!isop(LIR_start) && !isop(LIR_skip)); + NanoAssert(!isop(LIR_skip)); // All instructions with Void retType are statements, as are calls // to impure functions. if (isCall()) @@ -1943,21 +1943,25 @@ namespace nanojit LirFilter(LirFilter *in) : in(in) {} virtual ~LirFilter(){} + // It's crucial that once this reaches the LIR_start at the beginning + // of the buffer, that it just keeps returning that LIR_start LIns on + // any subsequent calls. virtual LInsp read() { return in->read(); } - virtual LInsp pos() { - return in->pos(); + virtual LInsp finalIns() { + return in->finalIns(); } }; // concrete class LirReader : public LirFilter { - LInsp _i; // next instruction to be read; invariant: is never a skip + LInsp _ins; // next instruction to be read; invariant: is never a skip + LInsp _finalIns; // final instruction in the stream; ie. the first one to be read public: - LirReader(LInsp i) : LirFilter(0), _i(i) + LirReader(LInsp ins) : LirFilter(0), _ins(ins), _finalIns(ins) { // The last instruction for a fragment shouldn't be a skip. // (Actually, if the last *inserted* instruction exactly fills up @@ -1966,7 +1970,7 @@ namespace nanojit // cross-chunk link. But the last *inserted* instruction is what // is recorded and used to initialise each LirReader, and that is // what is seen here, and therefore this assertion holds.) - NanoAssert(i && !i->isop(LIR_skip)); + NanoAssert(ins && !ins->isop(LIR_skip)); } virtual ~LirReader() {} @@ -1974,9 +1978,8 @@ namespace nanojit // Invariant: never returns a skip. LInsp read(); - // Returns next instruction. Invariant: never returns a skip. - LInsp pos() { - return _i; + LInsp finalIns() { + return _finalIns; } }; @@ -2102,6 +2105,7 @@ namespace nanojit const char* _title; StringList _strs; LogControl* _logc; + LIns* _prevIns; public: ReverseLister(LirFilter* in, Allocator& alloc, LInsPrinter* printer, LogControl* logc, const char* title) @@ -2111,6 +2115,7 @@ namespace nanojit , _title(title) , _strs(alloc) , _logc(logc) + , _prevIns(NULL) { } void finish(); diff --git a/js/src/nanojit/Nativei386.cpp b/js/src/nanojit/Nativei386.cpp index 5fe50506184..8ec0ccdc38e 100644 --- a/js/src/nanojit/Nativei386.cpp +++ b/js/src/nanojit/Nativei386.cpp @@ -742,11 +742,19 @@ namespace nanojit // disturb the CCs! Register r = findRegFor(lhs, GpRegs); if (c == 0 && cond->isop(LIR_eq)) { - TEST(r, r); + NanoAssert(N_LOOKAHEAD >= 3); + if ((lhs->isop(LIR_and) || lhs->isop(LIR_or)) && + cond == lookahead[1] && lhs == lookahead[2]) + { + // Do nothing. At run-time, 'lhs' will have just computed + // by an i386 instruction that sets ZF for us ('and' or + // 'or'), so we don't have to do it ourselves. + } else { + TEST(r, r); // sets ZF according to the value of 'lhs' + } } else { CMPi(r, c); } - } else { Register ra, rb; findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);