bug 601135. switch x86/x64 to use cvttsd2di. add documentation (+r nick)

--HG--
extra : convert_revision : 6e1ed02c675eafc3195d67439854ba1a7b4f7c13
This commit is contained in:
Werner Sharp (wsharp@adobe.com) 2010-11-03 16:30:00 -04:00
parent 9740d93c46
commit 0be07ddc2a
5 changed files with 25 additions and 4 deletions

View File

@ -309,7 +309,23 @@ OP_64(q2i, 110, Op1, I, 1) // truncate quad to int (removes the high 32
OP___(i2d, 111, Op1, D, 1) // convert int to double
OP___(ui2d, 112, Op1, D, 1) // convert unsigned int to double
OP___(d2i, 113, Op1, I, 1) // convert double to int (no exceptions raised, platform rounding rules)
// The rounding behavior of LIR_d2i is platform specific.
//
// Platform Asm code Behavior
// -------- -------- --------
// x86 w/ x87 fist uses current FP control word (default is rounding)
// x86 w/ SSE cvttsd2si performs round to zero (truncate)
// x64 (SSE) cvttsd2si performs round to zero (truncate)
// PowerPC unsupported
// ARM ftosid round to nearest
// MIPS trunc.w.d performs round to zero (truncate)
// SH4 frtc performs round to zero (truncate)
// SPARC fdtoi performs round to zero (truncate)
//
// round to zero examples: 1.9 -> 1, 1.1 -> 1, -1.1 -> -1, -1.9 -> -1
// round to nearest examples: 1.9 -> 2, 1.1 -> 1, -1.1 -> -1, -1.9 -> -2
OP___(d2i, 113, Op1, I, 1) // convert double to int (no exceptions raised)
OP_64(dasq, 114, Op1, Q, 1) // interpret the bits of a double as a quad
OP_64(qasd, 115, Op1, D, 1) // interpret the bits of a quad as a double

View File

@ -495,6 +495,7 @@ namespace nanojit
void Assembler::CVTSS2SD(R l, R r) { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
void Assembler::CVTSD2SS(R l, R r) { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
void Assembler::CVTSD2SI(R l, R r) { emitprr(X64_cvtsd2si,l,r); asm_output("cvtsd2si %s, %s",RL(l),RQ(r)); }
void Assembler::CVTTSD2SI(R l, R r) { emitprr(X64_cvttsd2si,l,r);asm_output("cvttsd2si %s, %s",RL(l),RQ(r));}
void Assembler::UCOMISD( R l, R r) { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
void Assembler::MOVQRX( R l, R r) { emitprr(X64_movqrx, r,l); asm_output("movq %s, %s", RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
void Assembler::MOVQXR( R l, R r) { emitprr(X64_movqxr, l,r); asm_output("movq %s, %s", RQ(l),RQ(r)); }
@ -1145,7 +1146,7 @@ namespace nanojit
Register rr = prepareResultReg(ins, GpRegs);
Register rb = findRegFor(a, FpRegs);
CVTSD2SI(rr, rb);
CVTTSD2SI(rr, rb);
freeResourcesOf(ins);
}

View File

@ -201,7 +201,8 @@ namespace nanojit
X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 r = (int32) b
X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 with rounding r = (int32) b
X64_cvttsd2si=0xC02C0F40F2000005LL, // convert double to int32 r = (int32) b
X64_divsd = 0xC05E0F40F2000005LL, // divide scalar double r /= b
X64_mulsd = 0xC0590F40F2000005LL, // multiply scalar double r *= b
X64_addsd = 0xC0580F40F2000005LL, // add scalar double r += b
@ -504,6 +505,7 @@ namespace nanojit
void CVTSS2SD(Register l, Register r);\
void CVTSD2SS(Register l, Register r);\
void CVTSD2SI(Register l, Register r);\
void CVTTSD2SI(Register l, Register r);\
void UCOMISD(Register l, Register r);\
void MOVQRX(Register l, Register r);\
void MOVQXR(Register l, Register r);\

View File

@ -718,6 +718,7 @@ namespace nanojit
inline void Assembler::SSE_CVTSI2SD(R xr, R gr) { count_fpu(); SSE(0xf20f2a, xr, gr); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); }
inline void Assembler::SSE_CVTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2d, gr, xr); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); }
inline void Assembler::SSE_CVTTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2c, gr, xr); asm_output("cvttsd2si %s,%s",gpn(gr), gpn(xr)); }
inline void Assembler::SSE_CVTSD2SS(R xr, R gr) { count_fpu(); SSE(0xf20f5a, xr, gr); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); }
inline void Assembler::SSE_CVTSS2SD(R xr, R gr) { count_fpu(); SSE(0xf30f5a, xr, gr); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); }
inline void Assembler::SSE_CVTDQ2PD(R d, R r) { count_fpu(); SSE(0xf30fe6, d, r); asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); }
@ -2587,7 +2588,7 @@ namespace nanojit
if (_config.i386_sse2) {
Register rr = prepareResultReg(ins, GpRegs);
Register ra = findRegFor(lhs, XmmRegs);
SSE_CVTSD2SI(rr, ra);
SSE_CVTTSD2SI(rr, ra);
} else {
bool pop = !lhs->isInReg();
findSpecificRegFor(lhs, FST0);

View File

@ -401,6 +401,7 @@ namespace nanojit
void SSE_STQsib(int32_t d, Register rb, Register ri, int32_t scale, Register rv); \
void SSE_CVTSI2SD(Register xr, Register gr); \
void SSE_CVTSD2SI(Register gr, Register xr); \
void SSE_CVTTSD2SI(Register gr, Register xr); \
void SSE_CVTSD2SS(Register xr, Register gr); \
void SSE_CVTSS2SD(Register xr, Register gr); \
void SSE_CVTDQ2PD(Register d, Register r); \