Bug 553518 - nanojit: avoid 'test r,r' where possible on i386. r=edwsmith.

--HG--
extra : convert_revision : ec4d959e1cc9337cf30a08bf75b246516a1728a4
This commit is contained in:
Nicholas Nethercote 2010-03-24 15:34:34 -07:00
parent 36ee148277
commit 70b954205a
5 changed files with 118 additions and 78 deletions

View File

@ -80,6 +80,7 @@ namespace nanojit
, _config(config)
{
VMPI_memset(&_stats, 0, sizeof(_stats));
VMPI_memset(lookahead, 0, N_LOOKAHEAD * sizeof(LInsp));
nInit(core);
(void)logc;
verbose_only( _logc = logc; )
@ -1208,67 +1209,77 @@ namespace nanojit
NanoAssert(_thisfrag->nStaticExits == 0);
// The trace must end with one of these opcodes.
NanoAssert(reader->pos()->isop(LIR_x) ||
reader->pos()->isop(LIR_xtbl) ||
reader->pos()->isRet() ||
reader->pos()->isLive());
NanoAssert(reader->finalIns()->isop(LIR_x) ||
reader->finalIns()->isop(LIR_xtbl) ||
reader->finalIns()->isRet() ||
reader->finalIns()->isLive());
InsList pending_lives(alloc);
NanoAssert(!error());
for (LInsp ins = reader->read(); !ins->isop(LIR_start); ins = reader->read())
// What's going on here: we're visiting all the LIR instructions in
// the buffer, working strictly backwards in buffer-order, and
// generating machine instructions for them as we go.
//
// For each LIns, we first determine whether it's actually necessary,
// and if not skip it. Otherwise we generate code for it. There are
// two kinds of "necessary" instructions:
//
// - "Statement" instructions, which have side effects. Anything that
// could change control flow or the state of memory.
//
// - "Value" or "expression" instructions, which compute a value based
// only on the operands to the instruction (and, in the case of
// loads, the state of memory). Because we visit instructions in
// reverse order, if some previously visited instruction uses the
// value computed by this instruction, then this instruction will
// already have a register assigned to hold that value. Hence we
// can consult the instruction to detect whether its value is in
// fact used (i.e. not dead).
//
// Note that the backwards code traversal can make register allocation
// confusing. (For example, we restore a value before we spill it!)
// In particular, words like "before" and "after" must be used very
// carefully -- their meaning at regalloc-time is opposite to their
// meaning at run-time. We use the term "pre-regstate" to refer to
// the register allocation state that occurs prior to an instruction's
// execution, and "post-regstate" to refer to the state that occurs
// after an instruction's execution, e.g.:
//
// pre-regstate: ebx(ins)
// instruction: mov eax, ebx // mov dst, src
// post-regstate: eax(ins)
//
// At run-time, the instruction updates the pre-regstate into the
// post-regstate (and these states are the real machine's regstates).
// But when allocating registers, because we go backwards, the
// pre-regstate is constructed from the post-regstate (and these
// regstates are those stored in RegAlloc).
//
// One consequence of generating code backwards is that we tend to
// both spill and restore registers as early (at run-time) as
// possible; this is good for tolerating memory latency. If we
// generated code forwards, we would expect to both spill and restore
// registers as late (at run-time) as possible; this might be better
// for reducing register pressure.
//
// Another thing to note: we provide N_LOOKAHEAD instruction's worth
// of lookahead because it's useful for backends. This is nice and
// easy because once read() gets to the LIR_start at the beginning of
// the buffer it'll just keep regetting it.
for (int32_t i = 0; i < N_LOOKAHEAD; i++)
lookahead[i] = reader->read();
while (!lookahead[0]->isop(LIR_start))
{
/* What's going on here: we're visiting all the LIR instructions
in the buffer, working strictly backwards in buffer-order, and
generating machine instructions for them as we go.
LInsp ins = lookahead[0]; // give it a shorter name for local use
LOpcode op = ins->opcode();
For each LIns, we first determine whether it's actually
necessary, and if not skip it. Otherwise we generate code for
it. There are two kinds of "necessary" instructions:
- "Statement" instructions, which have side effects. Anything
that could change control flow or the state of memory.
- "Value" or "expression" instructions, which compute a value
based only on the operands to the instruction (and, in the
case of loads, the state of memory). Because we visit
instructions in reverse order, if some previously visited
instruction uses the value computed by this instruction, then
this instruction will already have a register assigned to
hold that value. Hence we can consult the instruction to
detect whether its value is in fact used (i.e. not dead).
Note that the backwards code traversal can make register
allocation confusing. (For example, we restore a value before
we spill it!) In particular, words like "before" and "after"
must be used very carefully -- their meaning at regalloc-time is
opposite to their meaning at run-time. We use the term
"pre-regstate" to refer to the register allocation state that
occurs prior to an instruction's execution, and "post-regstate"
to refer to the state that occurs after an instruction's
execution, e.g.:
pre-regstate: ebx(ins)
instruction: mov eax, ebx // mov dst, src
post-regstate: eax(ins)
At run-time, the instruction updates the pre-regstate into the
post-regstate (and these states are the real machine's
regstates). But when allocating registers, because we go
backwards, the pre-regstate is constructed from the
post-regstate (and these regstates are those stored in
RegAlloc).
One consequence of generating code backwards is that we tend to
both spill and restore registers as early (at run-time) as
possible; this is good for tolerating memory latency. If we
generated code forwards, we would expect to both spill and
restore registers as late (at run-time) as possible; this might
be better for reducing register pressure.
*/
bool required = ins->isStmt() || ins->isUsed();
if (!required)
continue;
goto end_of_loop;
#ifdef NJ_VERBOSE
// Output the post-regstate (registers and/or activation).
@ -1281,8 +1292,7 @@ namespace nanojit
printRegState();
#endif
LOpcode op = ins->opcode();
switch(op)
switch (op)
{
default:
NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
@ -1851,6 +1861,11 @@ namespace nanojit
// check that all is well (don't check in exit paths since its more complicated)
debug_only( pageValidate(); )
debug_only( resourceConsistencyCheck(); )
end_of_loop:
for (int32_t i = 1; i < N_LOOKAHEAD; i++)
lookahead[i-1] = lookahead[i];
lookahead[N_LOOKAHEAD-1] = reader->read();
}
}

View File

@ -413,6 +413,13 @@ namespace nanojit
NIns* pedanticTop;
#endif
// Instruction lookahead in gen(). lookahead[0] is the current
// instruction. Nb: lookahead[1..N_LOOKAHEAD] may include dead
// instructions, but we won't know that they're dead yet.
static const int N_LOOKAHEAD = 3;
LInsp lookahead[N_LOOKAHEAD];
AR _activation;
RegAlloc _allocator;

View File

@ -136,13 +136,18 @@ namespace nanojit
LInsp ReverseLister::read()
{
LInsp i = in->read();
// This check is necessary to avoid printing the LIR_start multiple
// times due to lookahead in Assembler::gen().
if (_prevIns && _prevIns->isop(LIR_start))
return _prevIns;
LInsp ins = in->read();
InsBuf b;
const char* str = _printer->formatIns(&b, i);
const char* str = _printer->formatIns(&b, ins);
char* cpy = new (_alloc) char[strlen(str)+1];
VMPI_strcpy(cpy, str);
_strs.insert(cpy);
return i;
_prevIns = ins;
return ins;
}
#endif
@ -402,20 +407,20 @@ namespace nanojit
0
};
// Check the invariant: _i never points to a skip.
NanoAssert(_i && !_i->isop(LIR_skip));
// Check the invariant: _ins never points to a skip.
NanoAssert(_ins && !_ins->isop(LIR_skip));
// Step back one instruction. Use a table lookup rather than a switch
// to avoid branch mispredictions. LIR_start is given a special size
// of zero so that we don't step back past the start of the block.
// (Callers of this function should stop once they see a LIR_start.)
LInsp ret = _i;
_i = (LInsp)(uintptr_t(_i) - insSizes[_i->opcode()]);
LInsp ret = _ins;
_ins = (LInsp)(uintptr_t(_ins) - insSizes[_ins->opcode()]);
// Ensure _i doesn't end up pointing to a skip.
while (_i->isop(LIR_skip)) {
NanoAssert(_i->prevLIns() != _i);
_i = _i->prevLIns();
// Ensure _ins doesn't end up pointing to a skip.
while (_ins->isop(LIR_skip)) {
NanoAssert(_ins->prevLIns() != _ins);
_ins = _ins->prevLIns();
}
return ret;
@ -1567,7 +1572,7 @@ namespace nanojit
uint32_t exits = 0;
int total = 0;
if (frag->lirbuf->state)
live.add(frag->lirbuf->state, in->pos());
live.add(frag->lirbuf->state, in->finalIns());
for (LInsp ins = in->read(); !ins->isop(LIR_start); ins = in->read())
{
total++;

View File

@ -891,7 +891,7 @@ namespace nanojit
// Note, this assumes that loads will never fault and hence cannot
// affect the control flow.
bool isStmt() {
NanoAssert(!isop(LIR_start) && !isop(LIR_skip));
NanoAssert(!isop(LIR_skip));
// All instructions with Void retType are statements, as are calls
// to impure functions.
if (isCall())
@ -1943,21 +1943,25 @@ namespace nanojit
LirFilter(LirFilter *in) : in(in) {}
virtual ~LirFilter(){}
// It's crucial that once this reaches the LIR_start at the beginning
// of the buffer, that it just keeps returning that LIR_start LIns on
// any subsequent calls.
virtual LInsp read() {
return in->read();
}
virtual LInsp pos() {
return in->pos();
virtual LInsp finalIns() {
return in->finalIns();
}
};
// concrete
class LirReader : public LirFilter
{
LInsp _i; // next instruction to be read; invariant: is never a skip
LInsp _ins; // next instruction to be read; invariant: is never a skip
LInsp _finalIns; // final instruction in the stream; ie. the first one to be read
public:
LirReader(LInsp i) : LirFilter(0), _i(i)
LirReader(LInsp ins) : LirFilter(0), _ins(ins), _finalIns(ins)
{
// The last instruction for a fragment shouldn't be a skip.
// (Actually, if the last *inserted* instruction exactly fills up
@ -1966,7 +1970,7 @@ namespace nanojit
// cross-chunk link. But the last *inserted* instruction is what
// is recorded and used to initialise each LirReader, and that is
// what is seen here, and therefore this assertion holds.)
NanoAssert(i && !i->isop(LIR_skip));
NanoAssert(ins && !ins->isop(LIR_skip));
}
virtual ~LirReader() {}
@ -1974,9 +1978,8 @@ namespace nanojit
// Invariant: never returns a skip.
LInsp read();
// Returns next instruction. Invariant: never returns a skip.
LInsp pos() {
return _i;
LInsp finalIns() {
return _finalIns;
}
};
@ -2102,6 +2105,7 @@ namespace nanojit
const char* _title;
StringList _strs;
LogControl* _logc;
LIns* _prevIns;
public:
ReverseLister(LirFilter* in, Allocator& alloc,
LInsPrinter* printer, LogControl* logc, const char* title)
@ -2111,6 +2115,7 @@ namespace nanojit
, _title(title)
, _strs(alloc)
, _logc(logc)
, _prevIns(NULL)
{ }
void finish();

View File

@ -742,11 +742,19 @@ namespace nanojit
// disturb the CCs!
Register r = findRegFor(lhs, GpRegs);
if (c == 0 && cond->isop(LIR_eq)) {
TEST(r, r);
NanoAssert(N_LOOKAHEAD >= 3);
if ((lhs->isop(LIR_and) || lhs->isop(LIR_or)) &&
cond == lookahead[1] && lhs == lookahead[2])
{
// Do nothing. At run-time, 'lhs' will have just computed
// by an i386 instruction that sets ZF for us ('and' or
// 'or'), so we don't have to do it ourselves.
} else {
TEST(r, r); // sets ZF according to the value of 'lhs'
}
} else {
CMPi(r, c);
}
} else {
Register ra, rb;
findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);