bug 601135. switch x86/x64 to use cvttsd2di. add documentation (+r nick)

--HG-- extra : convert_revision : 6e1ed02c675eafc3195d67439854ba1a7b4f7c13
2024-09-13 09:24:08 -07:00 · 2010-11-03 16:30:00 -04:00 · 2010-11-03 16:30:00 -04:00 · 0be07ddc2a
commit 0be07ddc2a
parent 9740d93c46
5 changed files with 25 additions and 4 deletions
--- a/js/src/nanojit/LIRopcode.tbl
+++ b/js/src/nanojit/LIRopcode.tbl
@ -309,7 +309,23 @@ OP_64(q2i,     110, Op1,  I,    1)  // truncate quad to int (removes the high 32

 OP___(i2d,     111, Op1,  D,    1)  // convert int to double
 OP___(ui2d,    112, Op1,  D,    1)  // convert unsigned int to double
-OP___(d2i,     113, Op1,  I,    1)  // convert double to int (no exceptions raised, platform rounding rules)
+
+// The rounding behavior of LIR_d2i is platform specific.
+//
+// Platform     Asm code		Behavior
+// --------     --------		--------
+// x86 w/ x87   fist            uses current FP control word (default is rounding)
+// x86 w/ SSE   cvttsd2si       performs round to zero (truncate)
+// x64 (SSE)    cvttsd2si       performs round to zero (truncate) 
+// PowerPC                      unsupported
+// ARM          ftosid          round to nearest
+// MIPS         trunc.w.d       performs round to zero (truncate)
+// SH4          frtc            performs round to zero (truncate)
+// SPARC        fdtoi           performs round to zero (truncate)
+//
+// round to zero examples:  1.9 -> 1, 1.1 -> 1, -1.1 -> -1, -1.9 -> -1
+// round to nearest examples: 1.9 -> 2, 1.1 -> 1, -1.1 -> -1, -1.9 -> -2
+OP___(d2i,     113, Op1,  I,    1)  // convert double to int (no exceptions raised)

 OP_64(dasq,    114, Op1,  Q,    1)  // interpret the bits of a double as a quad
 OP_64(qasd,    115, Op1,  D,    1)  // interpret the bits of a quad as a double
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@ -495,6 +495,7 @@ namespace nanojit
    void Assembler::CVTSS2SD(R l, R r)  { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
    void Assembler::CVTSD2SS(R l, R r)  { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
    void Assembler::CVTSD2SI(R l, R r)  { emitprr(X64_cvtsd2si,l,r); asm_output("cvtsd2si %s, %s",RL(l),RQ(r)); }
+    void Assembler::CVTTSD2SI(R l, R r) { emitprr(X64_cvttsd2si,l,r);asm_output("cvttsd2si %s, %s",RL(l),RQ(r));}
    void Assembler::UCOMISD( R l, R r)  { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
    void Assembler::MOVQRX(  R l, R r)  { emitprr(X64_movqrx,  r,l); asm_output("movq %s, %s",    RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
    void Assembler::MOVQXR(  R l, R r)  { emitprr(X64_movqxr,  l,r); asm_output("movq %s, %s",    RQ(l),RQ(r)); }
@ -1145,7 +1146,7 @@ namespace nanojit

        Register rr = prepareResultReg(ins, GpRegs);
        Register rb = findRegFor(a, FpRegs);
-        CVTSD2SI(rr, rb);
+        CVTTSD2SI(rr, rb); 
        freeResourcesOf(ins);
    }

--- a/js/src/nanojit/NativeX64.h
+++ b/js/src/nanojit/NativeX64.h
@ -201,7 +201,8 @@ namespace nanojit
        X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
        X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
        X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
-        X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 r = (int32) b
+        X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 with rounding r = (int32) b
+        X64_cvttsd2si=0xC02C0F40F2000005LL, // convert double to int32 r = (int32) b
        X64_divsd   = 0xC05E0F40F2000005LL, // divide scalar double r /= b
        X64_mulsd   = 0xC0590F40F2000005LL, // multiply scalar double r *= b
        X64_addsd   = 0xC0580F40F2000005LL, // add scalar double r += b
@ -504,6 +505,7 @@ namespace nanojit
        void CVTSS2SD(Register l, Register r);\
        void CVTSD2SS(Register l, Register r);\
        void CVTSD2SI(Register l, Register r);\
+        void CVTTSD2SI(Register l, Register r);\
        void UCOMISD(Register l, Register r);\
        void MOVQRX(Register l, Register r);\
        void MOVQXR(Register l, Register r);\
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@ -718,6 +718,7 @@ namespace nanojit

    inline void Assembler::SSE_CVTSI2SD(R xr, R gr)  { count_fpu(); SSE(0xf20f2a, xr, gr); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); }
    inline void Assembler::SSE_CVTSD2SI(R gr, R xr)  { count_fpu(); SSE(0xf20f2d, gr, xr); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); }
+    inline void Assembler::SSE_CVTTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2c, gr, xr); asm_output("cvttsd2si %s,%s",gpn(gr), gpn(xr)); }
    inline void Assembler::SSE_CVTSD2SS(R xr, R gr)  { count_fpu(); SSE(0xf20f5a, xr, gr); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); }
    inline void Assembler::SSE_CVTSS2SD(R xr, R gr)  { count_fpu(); SSE(0xf30f5a, xr, gr); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); }
    inline void Assembler::SSE_CVTDQ2PD(R d,  R r)   { count_fpu(); SSE(0xf30fe6, d,  r);  asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); }
@ -2587,7 +2588,7 @@ namespace nanojit
        if (_config.i386_sse2) {
            Register rr = prepareResultReg(ins, GpRegs);
            Register ra = findRegFor(lhs, XmmRegs);
-            SSE_CVTSD2SI(rr, ra);
+            SSE_CVTTSD2SI(rr, ra); 
        } else {
            bool pop = !lhs->isInReg();
            findSpecificRegFor(lhs, FST0);
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@ -401,6 +401,7 @@ namespace nanojit
        void SSE_STQsib(int32_t d, Register rb, Register ri, int32_t scale, Register rv); \
        void SSE_CVTSI2SD(Register xr, Register gr); \
        void SSE_CVTSD2SI(Register gr, Register xr); \
+        void SSE_CVTTSD2SI(Register gr, Register xr); \
        void SSE_CVTSD2SS(Register xr, Register gr); \
        void SSE_CVTSS2SD(Register xr, Register gr); \
        void SSE_CVTDQ2PD(Register d, Register r); \