Bug 520714 - nanojit: distinguish 64-bit int and float loads/stores. r=gal,rreitmai.

--HG--
extra : convert_revision : a19809f7ba60b4381b77b84363bebf0ff7cf9629
This commit is contained in:
Nicholas Nethercote 2010-01-05 14:03:49 +11:00
parent c1aa4ffe1b
commit 1022818501
10 changed files with 116 additions and 46 deletions

View File

@ -99,14 +99,13 @@ CLASS( LOP_F_I, 0, 2) // 67% LIR_i2f, LIR_u2f
CLASS( LOP_I_F, 0, 2) // 69% LIR_qlo, LIR_qhi
CLASS( LOP_F_II, 0, 1) // 70% LIR_qjoin
// XXX: "QorF" because the same opcode is used for both 64-bit int and
// 64-bit float loads. Ditto for stores. That should be fixed, see
// bug 520714.
CLASS( LLD_I, 0, 4) // 74% LIR_ld
CLASS( LLD_QorF, 0, 4) // 78% LIR_ldq
CLASS( LLD_I, 0, 3) // 73% LIR_ld, LIR_ldc, LIR_ld*b, LIR_ld*s
CLASS( LLD_Q, 1, 2) // 75% LIR_ldq, LIR_ldqc
CLASS( LLD_F, 0, 3) // 78% LIR_ldf, LIR_ldfc
CLASS( LST_I, 0, 7) // 85% LIR_sti
CLASS( LST_QorF, 0, 7) // 92% LIR_stqi
CLASS( LST_I, 0, 5) // 83% LIR_sti
CLASS( LST_Q, 1, 4) // 87% LIR_stqi
CLASS( LST_F, 0, 5) // 92% LIR_stfi
CLASS( LCALL_I_I1, 0, 1) // 93% LIR_icall
CLASS( LCALL_I_I6, 0, 1) // 94% LIR_icall

View File

@ -991,6 +991,7 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
#endif
case LIR_sti:
case LIR_stqi:
case LIR_stfi:
need(3);
ins = mLir->insStore(mOpcode, ref(mTokens[0]),
ref(mTokens[1]),
@ -1011,6 +1012,8 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
case LIR_ldc:
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
case LIR_ldcb:
case LIR_ldcs:
ins = assemble_load();
@ -1357,7 +1360,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
F_II_ops.push_back(LIR_qjoin);
vector<LOpcode> I_loads;
I_loads.push_back(LIR_ld); // weight LIR_ld the heaviest
I_loads.push_back(LIR_ld); // weight LIR_ld more heavily
I_loads.push_back(LIR_ld);
I_loads.push_back(LIR_ld);
I_loads.push_back(LIR_ldc);
@ -1372,14 +1375,20 @@ FragmentAssembler::assembleRandomFragment(int nIns)
I_loads.push_back(LIR_ldcss);
#endif
vector<LOpcode> QorF_loads;
QorF_loads.push_back(LIR_ldq); // weight LIR_ldq the heaviest
QorF_loads.push_back(LIR_ldq);
QorF_loads.push_back(LIR_ldqc);
vector<LOpcode> Q_loads;
Q_loads.push_back(LIR_ldq); // weight LIR_ld more heavily
Q_loads.push_back(LIR_ldq);
Q_loads.push_back(LIR_ldqc);
vector<LOpcode> F_loads;
F_loads.push_back(LIR_ldf); // weight LIR_ldf more heavily
F_loads.push_back(LIR_ldf);
F_loads.push_back(LIR_ldfc);
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
// this loads a 32-bit float and expands to 64-bit float
QorF_loads.push_back(LIR_ld32f);
QorF_loads.push_back(LIR_ldc32f);
F_loads.push_back(LIR_ld32f); // weight LIR_ld32f more heavily
F_loads.push_back(LIR_ld32f);
F_loads.push_back(LIR_ldc32f);
#endif
enum LInsClass {
@ -1700,15 +1709,23 @@ FragmentAssembler::assembleRandomFragment(int nIns)
break;
}
case LLD_QorF: {
case LLD_Q:
if (!M8ps.empty()) {
LIns* base = rndPick(M8ps);
ins = mLir->insLoad(rndPick(QorF_loads), base, rndOffset64(base->size()));
addOrReplace((rnd(2) ? Qs : Fs), ins);
ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()));
addOrReplace(Qs, ins);
n++;
}
break;
case LLD_F:
if (!M8ps.empty()) {
LIns* base = rndPick(M8ps);
ins = mLir->insLoad(rndPick(F_loads), base, rndOffset64(base->size()));
addOrReplace(Fs, ins);
n++;
}
break;
}
case LST_I: {
vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
@ -1720,14 +1737,21 @@ FragmentAssembler::assembleRandomFragment(int nIns)
break;
}
case LST_QorF: {
case LST_Q:
if (!M8ps.empty() && !Qs.empty()) {
LIns* base = rndPick(M8ps);
mLir->insStorei(rndPick(Qs), base, rndOffset64(base->size()));
n++;
}
break;
case LST_F:
if (!M8ps.empty() && !Fs.empty()) {
LIns* base = rndPick(M8ps);
mLir->insStorei(rndPick(Fs), base, rndOffset64(base->size()));
n++;
}
break;
}
case LCALL_I_I1:
if (!Is.empty()) {

View File

@ -1205,6 +1205,8 @@ namespace nanojit
case LIR_ldc32f:
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
{
countlir_ldq();
asm_load64(ins);
@ -1300,14 +1302,15 @@ namespace nanojit
}
case LIR_st32f:
case LIR_stqi:
case LIR_stfi:
{
countlir_stq();
LIns* value = ins->oprnd1();
LIns* base = ins->oprnd2();
int dr = ins->disp();
if (value->isop(LIR_qjoin) && op != LIR_st32f)
if (value->isop(LIR_qjoin) && op == LIR_stfi)
{
// this is correct for little-endian only
// This is correct for little-endian only.
asm_store32(LIR_sti, value->oprnd1(), dr, base);
asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
}

View File

@ -884,7 +884,16 @@ namespace nanojit
LIns* LirWriter::insStorei(LIns* value, LIns* base, int32_t d)
{
LOpcode op = value->isQuad() ? LIR_stqi : LIR_sti;
// Determine which kind of store should be used for 'value' based on
// its type.
LOpcode op = LOpcode(0);
switch (retTypes[value->opcode()]) {
case LTy_I32: op = LIR_sti; break;
case LTy_I64: op = LIR_stqi; break;
case LTy_F64: op = LIR_stfi; break;
case LTy_Void: NanoAssert(0); break;
default: NanoAssert(0); break;
}
return insStore(op, value, base, d);
}
@ -1509,6 +1518,8 @@ namespace nanojit
case LIR_ldc:
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldcb:
@ -1545,6 +1556,7 @@ namespace nanojit
case LIR_sti:
case LIR_stqi:
case LIR_stfi:
case LIR_stb:
case LIR_sts:
case LIR_eq:
@ -1925,6 +1937,8 @@ namespace nanojit
case LIR_ldc:
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldcb:
@ -1942,6 +1956,7 @@ namespace nanojit
case LIR_sti:
case LIR_stqi:
case LIR_stfi:
case LIR_stb:
case LIR_sts:
case LIR_st32f:
@ -1952,7 +1967,7 @@ namespace nanojit
break;
default:
VMPI_sprintf(s, "?");
NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]);
break;
}
NanoAssert(VMPI_strlen(sbuf) < sizeof(sbuf)-1);
@ -2273,6 +2288,7 @@ namespace nanojit
{
case LIR_ld:
case LIR_ldq:
case LIR_ldf:
case LIR_ld32f:
case LIR_ldsb:
case LIR_ldss:

View File

@ -178,13 +178,13 @@ OPDEF(__69, 69, None, Void)
OPDEF(__70, 70, None, Void)
OPDEF(qaddp, 71, Op2, I64) // integer addition for temp pointer calculations (64bit only)
OPDEF(qparam, 72, P, I64) // load a parameter (64bit register or stk location)
OPDEF(__73, 73, None, Void)
OPDEF(ldq, 74, Ld, I64) // 64-bit (quad) load
OPDEF(ldf, 73, Ld, F64) // 64-bit float load
OPDEF(ldq, 74, Ld, I64) // 64-bit integer load
OPDEF(qalloc, 75, I, I64) // allocate some stack space (value is 64bit address)
OPDEF(stqi, 76, Sti, Void) // 64-bit (quad) store
OPDEF(stqi, 76, Sti, Void) // 64-bit integer store
OPDEF(fret, 77, Op1, Void)
OPDEF(st32f, 78, Sti, Void) // store 64-bit float as a 32-bit float (dropping precision)
@ -193,7 +193,8 @@ OPDEF(ld32f, 79, Ld, F64) // load 32-bit float and widen to 64-bit float
OPDEF(fcall, 80, C, F64) // subroutine call returning 64-bit (quad) double value
OPDEF(qcall, 81, C, I64) // subroutine call returning 64-bit (quad) integer value
OPDEF(__82, 82, None, Void)
OPDEF(stfi, 82, Sti, Void) // 64-bit float store
OPDEF(__83, 83, None, Void)
OPDEF(__84, 84, None, Void)
OPDEF(__85, 85, None, Void)
@ -202,7 +203,7 @@ OPDEF(__87, 87, None, Void)
// All opcodes below this marker are subject to CSE.
OPDEF(quad, 88, N64, I64) // 64-bit (quad) constant value
OPDEF(quad, 88, N64, I64) // 64-bit integer constant value
OPDEF(qcmov, 89, Op3, I64) // 64-bit conditional move
OPDEF(i2q, 90, Op1, I64) // sign-extend i32 to i64
@ -213,9 +214,9 @@ OPDEF(u2f, 93, Op1, F64) // convert an unsigned 32-bit integer to a float
OPDEF(__94, 94, None, Void)
OPDEF(__95, 95, None, Void)
OPDEF(__96, 96, None, Void)
OPDEF(__97, 97, None, Void)
OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit load
OPDEF(ldfc, 97, Ld, F64) // non-volatile 64-bit float load
OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit integer load
OPDEF(fneg, 99, Op1, F64) // floating-point negation
OPDEF(fadd, 100, Op2, F64) // floating-point addition
@ -238,7 +239,7 @@ OPDEF(qjoin, 114, Op2, F64) // join two 32-bit values (1st arg is low bits,
OPDEF(__115, 115, None, Void)
OPDEF(__116, 116, None, Void)
OPDEF(__117, 117, None, Void)
OPDEF(float, 118, N64, F64)
OPDEF(float, 118, N64, F64) // 64-bit float constant value
// Integer (64-bit) relational operators.
// NB: These opcodes must remain continuous so that comparison-opcode detection

View File

@ -1310,13 +1310,16 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
void
Assembler::asm_load64(LInsp ins)
{
NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
//asm_output("<<< load64");
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
@ -1370,15 +1373,19 @@ Assembler::asm_load64(LInsp ins)
void
Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
{
NanoAssert(op != LIR_stqi);
//asm_output("<<< store64 (dr: %d)", dr);
switch (op) {
case LIR_stqi:
case LIR_stfi:
// handled by mainline code below for now
break;
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;

View File

@ -221,6 +221,8 @@ namespace nanojit
void Assembler::asm_load64(LIns *ins) {
switch (ins->opcode()) {
case LIR_ldf:
case LIR_ldfc:
case LIR_ldq:
case LIR_ldqc:
// handled by mainline code below for now
@ -314,6 +316,7 @@ namespace nanojit
NanoAssert(value->isQuad());
switch (op) {
case LIR_stfi:
case LIR_stqi:
// handled by mainline code below for now
break;

View File

@ -365,9 +365,11 @@ namespace nanojit
void Assembler::asm_load64(LInsp ins)
{
NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
// handled by mainline code below for now
break;
case LIR_ld32f:
@ -410,8 +412,10 @@ namespace nanojit
void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
{
NanoAssert(op != LIR_stqi);
switch (op) {
case LIR_stqi:
case LIR_stfi:
// handled by mainline code below for now
break;
case LIR_st32f:
@ -435,7 +439,7 @@ namespace nanojit
return;
}
if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin))
{
// value is 64bit struct or int64_t, or maybe a double.
// it may be live in an FPU reg. Either way, don't
@ -458,6 +462,7 @@ namespace nanojit
return;
}
NanoAssert(!value->isop(LIR_ldq) || !value->isop(LIR_ldqc));
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;

View File

@ -631,7 +631,11 @@ namespace nanojit
// To make sure floating point operations stay in FPU registers
// as much as possible, make sure that only a few opcodes are
// reserving GPRs.
NanoAssert(a->isop(LIR_quad) || a->isop(LIR_ldq) || a->isop(LIR_ldqc)|| a->isop(LIR_ld32f) || a->isop(LIR_ldc32f)|| a->isop(LIR_u2f) || a->isop(LIR_float) || a->isop(LIR_fcall));
NanoAssert(a->isop(LIR_quad) || a->isop(LIR_float) ||
a->isop(LIR_ldf) || a->isop(LIR_ldfc) ||
a->isop(LIR_ldq) || a->isop(LIR_ldqc) ||
a->isop(LIR_ld32f) || a->isop(LIR_ldc32f) ||
a->isop(LIR_u2f) || a->isop(LIR_fcall));
allow &= ~rmask(rr);
ra = findRegFor(a, allow);
} else {
@ -1387,6 +1391,8 @@ namespace nanojit
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
regalloc_load(ins, GpRegs, rr, dr, rb);
if (IsGpReg(rr)) {
// general 64bit load, 32bit const displacement
@ -1468,6 +1474,7 @@ namespace nanojit
switch (op) {
case LIR_stqi:
case LIR_stfi:
{
if (IsGpReg(r)) {
// gpr store

View File

@ -555,6 +555,8 @@ namespace nanojit
void Assembler::asm_load64(LInsp ins)
{
NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
LIns* base = ins->oprnd1();
int db = ins->disp();
Register rr = ins->getReg();
@ -564,8 +566,8 @@ namespace nanojit
freeRsrcOf(ins, false);
Register rb = getBaseReg(base, db, GpRegs);
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
SSE_LDQ(rr, db, rb);
break;
case LIR_ld32f:
@ -593,8 +595,8 @@ namespace nanojit
ins->setReg(UnknownReg);
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
case LIR_ldf:
case LIR_ldfc:
// don't use an fpu reg to simply load & store the value.
if (dr)
asm_mmq(FP, dr, rb, db);
@ -637,6 +639,8 @@ namespace nanojit
void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
{
NanoAssert(op != LIR_stqi);
Register rb = getBaseReg(base, dr, GpRegs);
if (op == LIR_st32f) {
@ -662,7 +666,7 @@ namespace nanojit
STi(rb, dr+4, value->imm64_1());
STi(rb, dr, value->imm64_0());
} else if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin)) {
} else if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin)) {
// value is 64bit struct or int64_t, or maybe a double.
// It may be live in an FPU reg. Either way, don't put it in an
// FPU reg just to load & store it.
@ -681,6 +685,7 @@ namespace nanojit
}
} else {
NanoAssert(!value->isop(LIR_ldq) && !value->isop(LIR_ldqc));
bool pop = value->isUnusedOrHasUnknownReg();
Register rv = ( pop
? findRegFor(value, config.sse2 ? XmmRegs : FpRegs)
@ -900,7 +905,7 @@ namespace nanojit
default: NanoAssert(0); break;
}
freeResourcesOf(ins); // njn: move after asm_cmp?
freeResourcesOf(ins);
asm_cmp(ins);
}