diff --git a/js/src/lirasm/LInsClasses.tbl b/js/src/lirasm/LInsClasses.tbl index 305810461e4..4a7f3e8443e 100644 --- a/js/src/lirasm/LInsClasses.tbl +++ b/js/src/lirasm/LInsClasses.tbl @@ -99,14 +99,13 @@ CLASS( LOP_F_I, 0, 2) // 67% LIR_i2f, LIR_u2f CLASS( LOP_I_F, 0, 2) // 69% LIR_qlo, LIR_qhi CLASS( LOP_F_II, 0, 1) // 70% LIR_qjoin - // XXX: "QorF" because the same opcode is used for both 64-bit int and - // 64-bit float loads. Ditto for stores. That should be fixed, see - // bug 520714. -CLASS( LLD_I, 0, 4) // 74% LIR_ld -CLASS( LLD_QorF, 0, 4) // 78% LIR_ldq +CLASS( LLD_I, 0, 3) // 73% LIR_ld, LIR_ldc, LIR_ld*b, LIR_ld*s +CLASS( LLD_Q, 1, 2) // 75% LIR_ldq, LIR_ldqc +CLASS( LLD_F, 0, 3) // 78% LIR_ldf, LIR_ldfc -CLASS( LST_I, 0, 7) // 85% LIR_sti -CLASS( LST_QorF, 0, 7) // 92% LIR_stqi +CLASS( LST_I, 0, 5) // 83% LIR_sti +CLASS( LST_Q, 1, 4) // 87% LIR_stqi +CLASS( LST_F, 0, 5) // 92% LIR_stfi CLASS( LCALL_I_I1, 0, 1) // 93% LIR_icall CLASS( LCALL_I_I6, 0, 1) // 94% LIR_icall diff --git a/js/src/lirasm/lirasm.cpp b/js/src/lirasm/lirasm.cpp index 14cc51de56f..616021abf55 100644 --- a/js/src/lirasm/lirasm.cpp +++ b/js/src/lirasm/lirasm.cpp @@ -991,6 +991,7 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons #endif case LIR_sti: case LIR_stqi: + case LIR_stfi: need(3); ins = mLir->insStore(mOpcode, ref(mTokens[0]), ref(mTokens[1]), @@ -1011,6 +1012,8 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons case LIR_ldc: case LIR_ldq: case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: case LIR_ldcb: case LIR_ldcs: ins = assemble_load(); @@ -1357,7 +1360,7 @@ FragmentAssembler::assembleRandomFragment(int nIns) F_II_ops.push_back(LIR_qjoin); vector I_loads; - I_loads.push_back(LIR_ld); // weight LIR_ld the heaviest + I_loads.push_back(LIR_ld); // weight LIR_ld more heavily I_loads.push_back(LIR_ld); I_loads.push_back(LIR_ld); I_loads.push_back(LIR_ldc); @@ -1372,14 +1375,20 @@ FragmentAssembler::assembleRandomFragment(int nIns) I_loads.push_back(LIR_ldcss); #endif - vector QorF_loads; - QorF_loads.push_back(LIR_ldq); // weight LIR_ldq the heaviest - QorF_loads.push_back(LIR_ldq); - QorF_loads.push_back(LIR_ldqc); + vector Q_loads; + Q_loads.push_back(LIR_ldq); // weight LIR_ld more heavily + Q_loads.push_back(LIR_ldq); + Q_loads.push_back(LIR_ldqc); + + vector F_loads; + F_loads.push_back(LIR_ldf); // weight LIR_ldf more heavily + F_loads.push_back(LIR_ldf); + F_loads.push_back(LIR_ldfc); #if NJ_EXPANDED_LOADSTORE_SUPPORTED // this loads a 32-bit float and expands to 64-bit float - QorF_loads.push_back(LIR_ld32f); - QorF_loads.push_back(LIR_ldc32f); + F_loads.push_back(LIR_ld32f); // weight LIR_ld32f more heavily + F_loads.push_back(LIR_ld32f); + F_loads.push_back(LIR_ldc32f); #endif enum LInsClass { @@ -1700,15 +1709,23 @@ FragmentAssembler::assembleRandomFragment(int nIns) break; } - case LLD_QorF: { + case LLD_Q: if (!M8ps.empty()) { LIns* base = rndPick(M8ps); - ins = mLir->insLoad(rndPick(QorF_loads), base, rndOffset64(base->size())); - addOrReplace((rnd(2) ? Qs : Fs), ins); + ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size())); + addOrReplace(Qs, ins); + n++; + } + break; + + case LLD_F: + if (!M8ps.empty()) { + LIns* base = rndPick(M8ps); + ins = mLir->insLoad(rndPick(F_loads), base, rndOffset64(base->size())); + addOrReplace(Fs, ins); n++; } break; - } case LST_I: { vector Ms = rnd(2) ? M4s : M8ps; @@ -1720,14 +1737,21 @@ FragmentAssembler::assembleRandomFragment(int nIns) break; } - case LST_QorF: { + case LST_Q: + if (!M8ps.empty() && !Qs.empty()) { + LIns* base = rndPick(M8ps); + mLir->insStorei(rndPick(Qs), base, rndOffset64(base->size())); + n++; + } + break; + + case LST_F: if (!M8ps.empty() && !Fs.empty()) { LIns* base = rndPick(M8ps); mLir->insStorei(rndPick(Fs), base, rndOffset64(base->size())); n++; } break; - } case LCALL_I_I1: if (!Is.empty()) { diff --git a/js/src/nanojit/Assembler.cpp b/js/src/nanojit/Assembler.cpp index 3d67b0a47f3..a45e6f19584 100755 --- a/js/src/nanojit/Assembler.cpp +++ b/js/src/nanojit/Assembler.cpp @@ -1205,6 +1205,8 @@ namespace nanojit case LIR_ldc32f: case LIR_ldq: case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: { countlir_ldq(); asm_load64(ins); @@ -1300,14 +1302,15 @@ namespace nanojit } case LIR_st32f: case LIR_stqi: + case LIR_stfi: { countlir_stq(); LIns* value = ins->oprnd1(); LIns* base = ins->oprnd2(); int dr = ins->disp(); - if (value->isop(LIR_qjoin) && op != LIR_st32f) + if (value->isop(LIR_qjoin) && op == LIR_stfi) { - // this is correct for little-endian only + // This is correct for little-endian only. asm_store32(LIR_sti, value->oprnd1(), dr, base); asm_store32(LIR_sti, value->oprnd2(), dr+4, base); } diff --git a/js/src/nanojit/LIR.cpp b/js/src/nanojit/LIR.cpp index d8b3bdd0097..e86aac14d3d 100644 --- a/js/src/nanojit/LIR.cpp +++ b/js/src/nanojit/LIR.cpp @@ -884,7 +884,16 @@ namespace nanojit LIns* LirWriter::insStorei(LIns* value, LIns* base, int32_t d) { - LOpcode op = value->isQuad() ? LIR_stqi : LIR_sti; + // Determine which kind of store should be used for 'value' based on + // its type. + LOpcode op = LOpcode(0); + switch (retTypes[value->opcode()]) { + case LTy_I32: op = LIR_sti; break; + case LTy_I64: op = LIR_stqi; break; + case LTy_F64: op = LIR_stfi; break; + case LTy_Void: NanoAssert(0); break; + default: NanoAssert(0); break; + } return insStore(op, value, base, d); } @@ -1509,6 +1518,8 @@ namespace nanojit case LIR_ldc: case LIR_ldq: case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: case LIR_ldzb: case LIR_ldzs: case LIR_ldcb: @@ -1545,6 +1556,7 @@ namespace nanojit case LIR_sti: case LIR_stqi: + case LIR_stfi: case LIR_stb: case LIR_sts: case LIR_eq: @@ -1925,6 +1937,8 @@ namespace nanojit case LIR_ldc: case LIR_ldq: case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: case LIR_ldzb: case LIR_ldzs: case LIR_ldcb: @@ -1942,6 +1956,7 @@ namespace nanojit case LIR_sti: case LIR_stqi: + case LIR_stfi: case LIR_stb: case LIR_sts: case LIR_st32f: @@ -1952,7 +1967,7 @@ namespace nanojit break; default: - VMPI_sprintf(s, "?"); + NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]); break; } NanoAssert(VMPI_strlen(sbuf) < sizeof(sbuf)-1); @@ -2273,6 +2288,7 @@ namespace nanojit { case LIR_ld: case LIR_ldq: + case LIR_ldf: case LIR_ld32f: case LIR_ldsb: case LIR_ldss: diff --git a/js/src/nanojit/LIRopcode.tbl b/js/src/nanojit/LIRopcode.tbl index e95be658636..42b99e7813d 100644 --- a/js/src/nanojit/LIRopcode.tbl +++ b/js/src/nanojit/LIRopcode.tbl @@ -178,13 +178,13 @@ OPDEF(__69, 69, None, Void) OPDEF(__70, 70, None, Void) OPDEF(qaddp, 71, Op2, I64) // integer addition for temp pointer calculations (64bit only) OPDEF(qparam, 72, P, I64) // load a parameter (64bit register or stk location) -OPDEF(__73, 73, None, Void) -OPDEF(ldq, 74, Ld, I64) // 64-bit (quad) load +OPDEF(ldf, 73, Ld, F64) // 64-bit float load +OPDEF(ldq, 74, Ld, I64) // 64-bit integer load OPDEF(qalloc, 75, I, I64) // allocate some stack space (value is 64bit address) -OPDEF(stqi, 76, Sti, Void) // 64-bit (quad) store +OPDEF(stqi, 76, Sti, Void) // 64-bit integer store OPDEF(fret, 77, Op1, Void) OPDEF(st32f, 78, Sti, Void) // store 64-bit float as a 32-bit float (dropping precision) @@ -193,7 +193,8 @@ OPDEF(ld32f, 79, Ld, F64) // load 32-bit float and widen to 64-bit float OPDEF(fcall, 80, C, F64) // subroutine call returning 64-bit (quad) double value OPDEF(qcall, 81, C, I64) // subroutine call returning 64-bit (quad) integer value -OPDEF(__82, 82, None, Void) +OPDEF(stfi, 82, Sti, Void) // 64-bit float store + OPDEF(__83, 83, None, Void) OPDEF(__84, 84, None, Void) OPDEF(__85, 85, None, Void) @@ -202,7 +203,7 @@ OPDEF(__87, 87, None, Void) // All opcodes below this marker are subject to CSE. -OPDEF(quad, 88, N64, I64) // 64-bit (quad) constant value +OPDEF(quad, 88, N64, I64) // 64-bit integer constant value OPDEF(qcmov, 89, Op3, I64) // 64-bit conditional move OPDEF(i2q, 90, Op1, I64) // sign-extend i32 to i64 @@ -213,9 +214,9 @@ OPDEF(u2f, 93, Op1, F64) // convert an unsigned 32-bit integer to a float OPDEF(__94, 94, None, Void) OPDEF(__95, 95, None, Void) OPDEF(__96, 96, None, Void) -OPDEF(__97, 97, None, Void) -OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit load +OPDEF(ldfc, 97, Ld, F64) // non-volatile 64-bit float load +OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit integer load OPDEF(fneg, 99, Op1, F64) // floating-point negation OPDEF(fadd, 100, Op2, F64) // floating-point addition @@ -238,7 +239,7 @@ OPDEF(qjoin, 114, Op2, F64) // join two 32-bit values (1st arg is low bits, OPDEF(__115, 115, None, Void) OPDEF(__116, 116, None, Void) OPDEF(__117, 117, None, Void) -OPDEF(float, 118, N64, F64) +OPDEF(float, 118, N64, F64) // 64-bit float constant value // Integer (64-bit) relational operators. // NB: These opcodes must remain continuous so that comparison-opcode detection diff --git a/js/src/nanojit/NativeARM.cpp b/js/src/nanojit/NativeARM.cpp index 8862bb9d75e..6d3ffdae050 100644 --- a/js/src/nanojit/NativeARM.cpp +++ b/js/src/nanojit/NativeARM.cpp @@ -1310,13 +1310,16 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad) void Assembler::asm_load64(LInsp ins) { + NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc)); + //asm_output("<<< load64"); switch (ins->opcode()) { - case LIR_ldq: - case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: // handled by mainline code below for now break; + case LIR_ld32f: case LIR_ldc32f: NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture"); @@ -1370,15 +1373,19 @@ Assembler::asm_load64(LInsp ins) void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base) { + NanoAssert(op != LIR_stqi); + //asm_output("<<< store64 (dr: %d)", dr); switch (op) { - case LIR_stqi: + case LIR_stfi: // handled by mainline code below for now break; + case LIR_st32f: NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture"); return; + default: NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode"); return; diff --git a/js/src/nanojit/NativePPC.cpp b/js/src/nanojit/NativePPC.cpp index f19f185ff96..903b31feae9 100644 --- a/js/src/nanojit/NativePPC.cpp +++ b/js/src/nanojit/NativePPC.cpp @@ -221,6 +221,8 @@ namespace nanojit void Assembler::asm_load64(LIns *ins) { switch (ins->opcode()) { + case LIR_ldf: + case LIR_ldfc: case LIR_ldq: case LIR_ldqc: // handled by mainline code below for now @@ -314,6 +316,7 @@ namespace nanojit NanoAssert(value->isQuad()); switch (op) { + case LIR_stfi: case LIR_stqi: // handled by mainline code below for now break; diff --git a/js/src/nanojit/NativeSparc.cpp b/js/src/nanojit/NativeSparc.cpp index 27ad7a785e5..4c629dc600d 100644 --- a/js/src/nanojit/NativeSparc.cpp +++ b/js/src/nanojit/NativeSparc.cpp @@ -365,9 +365,11 @@ namespace nanojit void Assembler::asm_load64(LInsp ins) { + NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc)); + switch (ins->opcode()) { - case LIR_ldq: - case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: // handled by mainline code below for now break; case LIR_ld32f: @@ -410,8 +412,10 @@ namespace nanojit void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base) { + NanoAssert(op != LIR_stqi); + switch (op) { - case LIR_stqi: + case LIR_stfi: // handled by mainline code below for now break; case LIR_st32f: @@ -435,7 +439,7 @@ namespace nanojit return; } - if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin)) + if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin)) { // value is 64bit struct or int64_t, or maybe a double. // it may be live in an FPU reg. Either way, don't @@ -458,6 +462,7 @@ namespace nanojit return; } + NanoAssert(!value->isop(LIR_ldq) || !value->isop(LIR_ldqc)); Register rb; if (base->isop(LIR_alloc)) { rb = FP; diff --git a/js/src/nanojit/NativeX64.cpp b/js/src/nanojit/NativeX64.cpp index aaa944dd7d3..732b8a67ddd 100644 --- a/js/src/nanojit/NativeX64.cpp +++ b/js/src/nanojit/NativeX64.cpp @@ -631,7 +631,11 @@ namespace nanojit // To make sure floating point operations stay in FPU registers // as much as possible, make sure that only a few opcodes are // reserving GPRs. - NanoAssert(a->isop(LIR_quad) || a->isop(LIR_ldq) || a->isop(LIR_ldqc)|| a->isop(LIR_ld32f) || a->isop(LIR_ldc32f)|| a->isop(LIR_u2f) || a->isop(LIR_float) || a->isop(LIR_fcall)); + NanoAssert(a->isop(LIR_quad) || a->isop(LIR_float) || + a->isop(LIR_ldf) || a->isop(LIR_ldfc) || + a->isop(LIR_ldq) || a->isop(LIR_ldqc) || + a->isop(LIR_ld32f) || a->isop(LIR_ldc32f) || + a->isop(LIR_u2f) || a->isop(LIR_fcall)); allow &= ~rmask(rr); ra = findRegFor(a, allow); } else { @@ -1387,6 +1391,8 @@ namespace nanojit switch (ins->opcode()) { case LIR_ldq: case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: regalloc_load(ins, GpRegs, rr, dr, rb); if (IsGpReg(rr)) { // general 64bit load, 32bit const displacement @@ -1468,6 +1474,7 @@ namespace nanojit switch (op) { case LIR_stqi: + case LIR_stfi: { if (IsGpReg(r)) { // gpr store diff --git a/js/src/nanojit/Nativei386.cpp b/js/src/nanojit/Nativei386.cpp index 181fb41bcd3..09ae8a7e221 100644 --- a/js/src/nanojit/Nativei386.cpp +++ b/js/src/nanojit/Nativei386.cpp @@ -555,6 +555,8 @@ namespace nanojit void Assembler::asm_load64(LInsp ins) { + NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc)); + LIns* base = ins->oprnd1(); int db = ins->disp(); Register rr = ins->getReg(); @@ -564,8 +566,8 @@ namespace nanojit freeRsrcOf(ins, false); Register rb = getBaseReg(base, db, GpRegs); switch (ins->opcode()) { - case LIR_ldq: - case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: SSE_LDQ(rr, db, rb); break; case LIR_ld32f: @@ -593,8 +595,8 @@ namespace nanojit ins->setReg(UnknownReg); switch (ins->opcode()) { - case LIR_ldq: - case LIR_ldqc: + case LIR_ldf: + case LIR_ldfc: // don't use an fpu reg to simply load & store the value. if (dr) asm_mmq(FP, dr, rb, db); @@ -637,6 +639,8 @@ namespace nanojit void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base) { + NanoAssert(op != LIR_stqi); + Register rb = getBaseReg(base, dr, GpRegs); if (op == LIR_st32f) { @@ -662,7 +666,7 @@ namespace nanojit STi(rb, dr+4, value->imm64_1()); STi(rb, dr, value->imm64_0()); - } else if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin)) { + } else if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin)) { // value is 64bit struct or int64_t, or maybe a double. // It may be live in an FPU reg. Either way, don't put it in an // FPU reg just to load & store it. @@ -681,6 +685,7 @@ namespace nanojit } } else { + NanoAssert(!value->isop(LIR_ldq) && !value->isop(LIR_ldqc)); bool pop = value->isUnusedOrHasUnknownReg(); Register rv = ( pop ? findRegFor(value, config.sse2 ? XmmRegs : FpRegs) @@ -900,7 +905,7 @@ namespace nanojit default: NanoAssert(0); break; } - freeResourcesOf(ins); // njn: move after asm_cmp? + freeResourcesOf(ins); asm_cmp(ins); }