599245 - nanojit: clean up formatting in Nativei386.cpp. r=stejohns.

--HG-- extra : convert_revision : f7b8497b9f0ac1d01d0a86ba7e28c812e6ea8140
2024-09-13 09:24:08 -07:00 · 2010-09-26 16:47:10 -07:00 · 2010-09-26 16:47:10 -07:00 · a9ecc87093
commit a9ecc87093
parent 52ea68fee4
2 changed files with 231 additions and 241 deletions
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@ -51,8 +51,8 @@ namespace nanojit

    #ifdef NJ_VERBOSE
        const char *regNames[] = {
-            "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
-            "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
+            "eax",  "ecx",  "edx",  "ebx",  "esp",  "ebp",  "esi",  "edi",
+            "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
            "f0"
        };
    #endif
@ -77,39 +77,39 @@ namespace nanojit
    // XXX "error: logical && with non-zero constant will always evaluate as true"
    // underrunProtect(6) is necessary for worst-case
    inline void Assembler::MODRMs(I32 r, I32 d, R b, I32 l, I32 i) {
-        NanoAssert(unsigned(i)<8 && unsigned(b)<8 && unsigned(r)<8);
+        NanoAssert(unsigned(i) < 8 && unsigned(b) < 8 && unsigned(r) < 8);
        if (d == 0 && b != EBP) {
            _nIns -= 2;
-            _nIns[0] = (uint8_t) ( 0<<6 | r<<3 | 4);
-            _nIns[1] = (uint8_t) ( l<<6 | i<<3 | b);
+            _nIns[0] = uint8_t(0 << 6 | r << 3 | 4);
+            _nIns[1] = uint8_t(l << 6 | i << 3 | b);
        } else if (isS8(d)) {
            _nIns -= 3;
-            _nIns[0] = (uint8_t) ( 1<<6 | r<<3 | 4 );
-            _nIns[1] = (uint8_t) ( l<<6 | i<<3 | b );
-            _nIns[2] = (uint8_t) d;
+            _nIns[0] = uint8_t(1 << 6 | r << 3 | 4);
+            _nIns[1] = uint8_t(l << 6 | i << 3 | b);
+            _nIns[2] = uint8_t(d);
        } else {
            IMM32(d);
-            *(--_nIns) = (uint8_t) ( l<<6 | i<<3 | b );
-            *(--_nIns) = (uint8_t) ( 2<<6 | r<<3 | 4 );
+            *(--_nIns) = uint8_t(l << 6 | i << 3 | b);
+            *(--_nIns) = uint8_t(2 << 6 | r << 3 | 4);
        }
    }

    // underrunProtect(6) is necessary for worst-case
    inline void Assembler::MODRMm(I32 r, I32 d, R b) {
-        NanoAssert(unsigned(r)<8 && ((b)==UnspecifiedReg || unsigned(b)<8));
-        if ((b) == UnspecifiedReg) {
+        NanoAssert(unsigned(r) < 8 && (b == UnspecifiedReg || unsigned(b) < 8));
+        if (b == UnspecifiedReg) {
            IMM32(d);
-            *(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5);
-        } else if ((b) == ESP) {
+            *(--_nIns) = uint8_t(0 << 6 | r << 3 | 5);
+        } else if (b == ESP) {
            MODRMs(r, d, b, 0, (Register)4);
-        } else if ( (d) == 0 && (b) != EBP) {
-            *(--_nIns) = (uint8_t) ( 0<<6 | r<<3 | b );
+        } else if (d == 0 && b != EBP) {
+            *(--_nIns) = uint8_t(0 << 6 | r << 3 | b);
        } else if (isS8(d)) {
-            *(--_nIns) = (uint8_t) (d);
-            *(--_nIns) = (uint8_t) ( 1<<6 | r<<3 | b );
+            *(--_nIns) = uint8_t(d);
+            *(--_nIns) = uint8_t(1 << 6 | r << 3 | b);
        } else {
            IMM32(d);
-            *(--_nIns) = (uint8_t) ( 2<<6 | r<<3 | b );
+            *(--_nIns) = uint8_t(2 << 6 | r << 3 | b);
        }
    }

@ -121,20 +121,20 @@ namespace nanojit
                IMM32(disp);
            }
        }
-        *(--_nIns) = uint8_t( scale<<6 | index<<3 | base );
+        *(--_nIns) = uint8_t(scale << 6 | index << 3 | base);
        if (disp == 0 && base != EBP) {
-            *(--_nIns) = uint8_t( (reg<<3) | 4);
+            *(--_nIns) = uint8_t(reg << 3 | 4);
        } else if (isS8(disp)) {
-            *(--_nIns) = uint8_t( (1<<6) | (reg<<3) | 4 );
+            *(--_nIns) = uint8_t(1 << 6 | reg << 3 | 4);
        } else {
-            *(--_nIns) = uint8_t( (2<<6) | (reg<<3) | 4 );
+            *(--_nIns) = uint8_t(2 << 6 | reg << 3 | 4);
        }
    }

    inline void Assembler::MODRMdm(I32 r, I32 addr) {
-        NanoAssert(unsigned(r)<8);
+        NanoAssert(unsigned(r) < 8);
        IMM32(addr);
-        *(--_nIns) = (uint8_t)( r<<3 | 5 );
+        *(--_nIns) = uint8_t(r << 3 | 5);
    }

    inline void Assembler::ALU0(I32 o) {
@ -190,7 +190,7 @@ namespace nanojit

    inline void Assembler::ALUi(I32 c, I32 r, I32 i) {
        underrunProtect(6);
-        NanoAssert(unsigned(r)<8);
+        NanoAssert(unsigned(r) < 8);
        if (isS8(i)) {
            *(--_nIns) = uint8_t(i);
            MODRM(c>>3, r);
@ -200,7 +200,7 @@ namespace nanojit
            if ( r == EAX) {
                *(--_nIns) = uint8_t(c);
            } else {
-                MODRM((c>>3),(r));
+                MODRM(c >> 3, r);
                *(--_nIns) = uint8_t(0x81);
            }
        }
@ -208,7 +208,7 @@ namespace nanojit

    inline void Assembler::ALUmi(I32 c, I32 d, Register b, I32 i) {
        underrunProtect(10);
-        NanoAssert(((unsigned)b)<8);
+        NanoAssert(unsigned(b) < 8);
        if (isS8(i)) {
            *(--_nIns) = uint8_t(i);
            MODRMm(c>>3, d, b);
@ -222,7 +222,7 @@ namespace nanojit

    inline void Assembler::ALU2(I32 c, I32 d, I32 s) {
        underrunProtect(3);
-        MODRM((d),(s));
+        MODRM(d, s);
        _nIns -= 2;
        _nIns[0] = uint8_t(c>>8);
        _nIns[1] = uint8_t(c);
@ -230,70 +230,70 @@ namespace nanojit

    inline void Assembler::LAHF()        { count_alu(); ALU0(0x9F);                   asm_output("lahf"); }
    inline void Assembler::SAHF()        { count_alu(); ALU0(0x9E);                   asm_output("sahf"); }
-    inline void Assembler::OR(R l, R r)  { count_alu(); ALU(0x0b, (l),(r));           asm_output("or %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::AND(R l, R r) { count_alu(); ALU(0x23, (l),(r));           asm_output("and %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::XOR(R l, R r) { count_alu(); ALU(0x33, (l),(r));           asm_output("xor %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::ADD(R l, R r) { count_alu(); ALU(0x03, (l),(r));           asm_output("add %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::SUB(R l, R r) { count_alu(); ALU(0x2b, (l),(r));           asm_output("sub %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::MUL(R l, R r) { count_alu(); ALU2(0x0faf,(l),(r));         asm_output("mul %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::DIV(R r)      { count_alu(); ALU(0xf7, (Register)7,(r));   asm_output("idiv  edx:eax, %s",gpn(r)); }
-    inline void Assembler::NOT(R r)      { count_alu(); ALU(0xf7, (Register)2,(r));   asm_output("not %s",gpn(r)); }
-    inline void Assembler::NEG(R r)      { count_alu(); ALU(0xf7, (Register)3,(r));   asm_output("neg %s",gpn(r)); }
+    inline void Assembler::OR(R l, R r)  { count_alu(); ALU(0x0b, l, r);              asm_output("or %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::AND(R l, R r) { count_alu(); ALU(0x23, l, r);              asm_output("and %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::XOR(R l, R r) { count_alu(); ALU(0x33, l, r);              asm_output("xor %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::ADD(R l, R r) { count_alu(); ALU(0x03, l, r);              asm_output("add %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::SUB(R l, R r) { count_alu(); ALU(0x2b, l, r);              asm_output("sub %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::MUL(R l, R r) { count_alu(); ALU2(0x0faf, l, r);           asm_output("mul %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::DIV(R r)      { count_alu(); ALU(0xf7, (Register)7, r);    asm_output("idiv  edx:eax, %s", gpn(r)); }
+    inline void Assembler::NOT(R r)      { count_alu(); ALU(0xf7, (Register)2, r);    asm_output("not %s", gpn(r)); }
+    inline void Assembler::NEG(R r)      { count_alu(); ALU(0xf7, (Register)3, r);    asm_output("neg %s", gpn(r)); }

    inline void Assembler::SHR(R r, R s) {
        count_alu();
        NanoAssert(s == ECX); (void)s;
-        ALU(0xd3, (Register)5,(r));
-        asm_output("shr %s,%s",gpn(r),gpn(s));
+        ALU(0xd3, (Register)5, r);
+        asm_output("shr %s,%s", gpn(r), gpn(s));
    }

    inline void Assembler::SAR(R r, R s) {
        count_alu();
        NanoAssert(s == ECX); (void)s;
-        ALU(0xd3, (Register)7,(r));
-        asm_output("sar %s,%s",gpn(r),gpn(s));
+        ALU(0xd3, (Register)7, r);
+        asm_output("sar %s,%s", gpn(r), gpn(s));
    }

    inline void Assembler::SHL(R r, R s) {
        count_alu();
        NanoAssert(s == ECX); (void)s;
-        ALU(0xd3, (Register)4,(r));
-        asm_output("shl %s,%s",gpn(r),gpn(s));
+        ALU(0xd3, (Register)4, r);
+        asm_output("shl %s,%s", gpn(r), gpn(s));
    }

    inline void Assembler::SHIFT(I32 c, R r, I32 i) {
        underrunProtect(3);
-        *--_nIns = (uint8_t)(i);
-        MODRM((Register)c,r);
+        *--_nIns = uint8_t(i);
+        MODRM((Register)c, r);
        *--_nIns = 0xc1;
    }

-    inline void Assembler::SHLi(R r, I32 i)   { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); }
-    inline void Assembler::SHRi(R r, I32 i)   { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); }
-    inline void Assembler::SARi(R r, I32 i)   { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); }
+    inline void Assembler::SHLi(R r, I32 i)   { count_alu(); SHIFT(4, r, i); asm_output("shl %s,%d", gpn(r), i); }
+    inline void Assembler::SHRi(R r, I32 i)   { count_alu(); SHIFT(5, r, i); asm_output("shr %s,%d", gpn(r), i); }
+    inline void Assembler::SARi(R r, I32 i)   { count_alu(); SHIFT(7, r, i); asm_output("sar %s,%d", gpn(r), i); }

-    inline void Assembler::MOVZX8(R d, R s)   { count_alu(); ALU2(0x0fb6,d,s); asm_output("movzx %s,%s", gpn(d),gpn(s)); }
+    inline void Assembler::MOVZX8(R d, R s)   { count_alu(); ALU2(0x0fb6, d, s); asm_output("movzx %s,%s", gpn(d), gpn(s)); }

-    inline void Assembler::SUBi(R r, I32 i)   { count_alu(); ALUi(0x2d,r,i);   asm_output("sub %s,%d",gpn(r),i); }
-    inline void Assembler::ADDi(R r, I32 i)   { count_alu(); ALUi(0x05,r,i);   asm_output("add %s,%d",gpn(r),i); }
-    inline void Assembler::ANDi(R r, I32 i)   { count_alu(); ALUi(0x25,r,i);   asm_output("and %s,%d",gpn(r),i); }
-    inline void Assembler::ORi(R r, I32 i)    { count_alu(); ALUi(0x0d,r,i);   asm_output("or %s,%d",gpn(r),i); }
-    inline void Assembler::XORi(R r, I32 i)   { count_alu(); ALUi(0x35,r,i);   asm_output("xor %s,%d",gpn(r),i); }
+    inline void Assembler::SUBi(R r, I32 i)   { count_alu(); ALUi(0x2d, r, i);  asm_output("sub %s,%d", gpn(r), i); }
+    inline void Assembler::ADDi(R r, I32 i)   { count_alu(); ALUi(0x05, r, i);  asm_output("add %s,%d", gpn(r), i); }
+    inline void Assembler::ANDi(R r, I32 i)   { count_alu(); ALUi(0x25, r, i);  asm_output("and %s,%d", gpn(r), i); }
+    inline void Assembler::ORi(R r, I32 i)    { count_alu(); ALUi(0x0d, r, i);  asm_output("or %s,%d", gpn(r), i); }
+    inline void Assembler::XORi(R r, I32 i)   { count_alu(); ALUi(0x35, r, i);  asm_output("xor %s,%d", gpn(r), i); }

    inline void Assembler::ADDmi(I32 d, R b, I32 i) { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); }

-    inline void Assembler::TEST(R d, R s)      { count_alu(); ALU(0x85,d,s);   asm_output("test %s,%s",gpn(d),gpn(s)); }
-    inline void Assembler::CMP(R l, R r)       { count_alu(); ALU(0x3b,l,r);   asm_output("cmp %s,%s",gpn(l),gpn(r)); }
-    inline void Assembler::CMPi(R r, I32 i)    { count_alu(); ALUi(0x3d,r,i);  asm_output("cmp %s,%d",gpn(r),i); }
+    inline void Assembler::TEST(R d, R s)      { count_alu(); ALU(0x85, d, s);  asm_output("test %s,%s", gpn(d), gpn(s)); }
+    inline void Assembler::CMP(R l, R r)       { count_alu(); ALU(0x3b, l, r);  asm_output("cmp %s,%s", gpn(l), gpn(r)); }
+    inline void Assembler::CMPi(R r, I32 i)    { count_alu(); ALUi(0x3d, r, i); asm_output("cmp %s,%d", gpn(r), i); }

-    inline void Assembler::LEA(R r, I32 d, R b)    { count_alu(); ALUm(0x8d, r,d,b);   asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); }
+    inline void Assembler::LEA(R r, I32 d, R b)    { count_alu(); ALUm(0x8d, r, d, b);  asm_output("lea %s,%d(%s)", gpn(r), d, gpn(b)); }
    // lea %r, d(%i*4)
    // This addressing mode is not supported by the MODRMSIB macro.
    inline void Assembler::LEAmi4(R r, I32 d, I32 i) {
        count_alu();
        IMM32(int32_t(d));
-        *(--_nIns) = (2<<6) | ((uint8_t)i<<3) | 5;
-        *(--_nIns) = (0<<6) | ((uint8_t)r<<3) | 4;
+        *(--_nIns) = uint8_t(2 << 6 | i << 3 | 5);
+        *(--_nIns) = uint8_t(0 << 6 | r << 3 | 4);
        *(--_nIns) = 0x8d;
        asm_output("lea %s, %p(%s*4)", gpn(r), (void*)d, gpn(i));
    }
@ -307,29 +307,29 @@ namespace nanojit
        asm_output("incl  (%p)", (void*)p);
    }

-    inline void Assembler::SETE( R r)  { count_alu(); ALU2(0x0f94,(r),(r));    asm_output("sete %s", gpn(r)); }
-    inline void Assembler::SETNP(R r)  { count_alu(); ALU2(0x0f9B,(r),(r));    asm_output("setnp %s",gpn(r)); }
-    inline void Assembler::SETL( R r)  { count_alu(); ALU2(0x0f9C,(r),(r));    asm_output("setl %s", gpn(r)); }
-    inline void Assembler::SETLE(R r)  { count_alu(); ALU2(0x0f9E,(r),(r));    asm_output("setle %s",gpn(r)); }
-    inline void Assembler::SETG( R r)  { count_alu(); ALU2(0x0f9F,(r),(r));    asm_output("setg %s", gpn(r)); }
-    inline void Assembler::SETGE(R r)  { count_alu(); ALU2(0x0f9D,(r),(r));    asm_output("setge %s",gpn(r)); }
-    inline void Assembler::SETB( R r)  { count_alu(); ALU2(0x0f92,(r),(r));    asm_output("setb %s", gpn(r)); }
-    inline void Assembler::SETBE(R r)  { count_alu(); ALU2(0x0f96,(r),(r));    asm_output("setbe %s",gpn(r)); }
-    inline void Assembler::SETA( R r)  { count_alu(); ALU2(0x0f97,(r),(r));    asm_output("seta %s", gpn(r)); }
-    inline void Assembler::SETAE(R r)  { count_alu(); ALU2(0x0f93,(r),(r));    asm_output("setae %s",gpn(r)); }
-    inline void Assembler::SETO( R r)  { count_alu(); ALU2(0x0f92,(r),(r));    asm_output("seto %s", gpn(r)); }
+    inline void Assembler::SETE( R r)  { count_alu(); ALU2(0x0f94, r, r);   asm_output("sete %s",  gpn(r)); }
+    inline void Assembler::SETNP(R r)  { count_alu(); ALU2(0x0f9B, r, r);   asm_output("setnp %s", gpn(r)); }
+    inline void Assembler::SETL( R r)  { count_alu(); ALU2(0x0f9C, r, r);   asm_output("setl %s",  gpn(r)); }
+    inline void Assembler::SETLE(R r)  { count_alu(); ALU2(0x0f9E, r, r);   asm_output("setle %s", gpn(r)); }
+    inline void Assembler::SETG( R r)  { count_alu(); ALU2(0x0f9F, r, r);   asm_output("setg %s",  gpn(r)); }
+    inline void Assembler::SETGE(R r)  { count_alu(); ALU2(0x0f9D, r, r);   asm_output("setge %s", gpn(r)); }
+    inline void Assembler::SETB( R r)  { count_alu(); ALU2(0x0f92, r, r);   asm_output("setb %s",  gpn(r)); }
+    inline void Assembler::SETBE(R r)  { count_alu(); ALU2(0x0f96, r, r);   asm_output("setbe %s", gpn(r)); }
+    inline void Assembler::SETA( R r)  { count_alu(); ALU2(0x0f97, r, r);   asm_output("seta %s",  gpn(r)); }
+    inline void Assembler::SETAE(R r)  { count_alu(); ALU2(0x0f93, r, r);   asm_output("setae %s", gpn(r)); }
+    inline void Assembler::SETO( R r)  { count_alu(); ALU2(0x0f92, r, r);   asm_output("seto %s",  gpn(r)); }

-    inline void Assembler::MREQ(R d, R s) { count_alu(); ALU2(0x0f44,d,s); asm_output("cmove %s,%s",  gpn(d),gpn(s)); }
-    inline void Assembler::MRNE(R d, R s) { count_alu(); ALU2(0x0f45,d,s); asm_output("cmovne %s,%s", gpn(d),gpn(s)); }
-    inline void Assembler::MRL( R d, R s) { count_alu(); ALU2(0x0f4C,d,s); asm_output("cmovl %s,%s",  gpn(d),gpn(s)); }
-    inline void Assembler::MRLE(R d, R s) { count_alu(); ALU2(0x0f4E,d,s); asm_output("cmovle %s,%s", gpn(d),gpn(s)); }
-    inline void Assembler::MRG( R d, R s) { count_alu(); ALU2(0x0f4F,d,s); asm_output("cmovg %s,%s",  gpn(d),gpn(s)); }
-    inline void Assembler::MRGE(R d, R s) { count_alu(); ALU2(0x0f4D,d,s); asm_output("cmovge %s,%s", gpn(d),gpn(s)); }
-    inline void Assembler::MRB( R d, R s) { count_alu(); ALU2(0x0f42,d,s); asm_output("cmovb %s,%s",  gpn(d),gpn(s)); }
-    inline void Assembler::MRBE(R d, R s) { count_alu(); ALU2(0x0f46,d,s); asm_output("cmovbe %s,%s", gpn(d),gpn(s)); }
-    inline void Assembler::MRA( R d, R s) { count_alu(); ALU2(0x0f47,d,s); asm_output("cmova %s,%s",  gpn(d),gpn(s)); }
-    inline void Assembler::MRAE(R d, R s) { count_alu(); ALU2(0x0f43,d,s); asm_output("cmovae %s,%s", gpn(d),gpn(s)); }
-    inline void Assembler::MRNO(R d, R s) { count_alu(); ALU2(0x0f41,d,s); asm_output("cmovno %s,%s", gpn(d),gpn(s)); }
+    inline void Assembler::MREQ(R d, R s) { count_alu(); ALU2(0x0f44, d, s); asm_output("cmove %s,%s",  gpn(d), gpn(s)); }
+    inline void Assembler::MRNE(R d, R s) { count_alu(); ALU2(0x0f45, d, s); asm_output("cmovne %s,%s", gpn(d), gpn(s)); }
+    inline void Assembler::MRL( R d, R s) { count_alu(); ALU2(0x0f4C, d, s); asm_output("cmovl %s,%s",  gpn(d), gpn(s)); }
+    inline void Assembler::MRLE(R d, R s) { count_alu(); ALU2(0x0f4E, d, s); asm_output("cmovle %s,%s", gpn(d), gpn(s)); }
+    inline void Assembler::MRG( R d, R s) { count_alu(); ALU2(0x0f4F, d, s); asm_output("cmovg %s,%s",  gpn(d), gpn(s)); }
+    inline void Assembler::MRGE(R d, R s) { count_alu(); ALU2(0x0f4D, d, s); asm_output("cmovge %s,%s", gpn(d), gpn(s)); }
+    inline void Assembler::MRB( R d, R s) { count_alu(); ALU2(0x0f42, d, s); asm_output("cmovb %s,%s",  gpn(d), gpn(s)); }
+    inline void Assembler::MRBE(R d, R s) { count_alu(); ALU2(0x0f46, d, s); asm_output("cmovbe %s,%s", gpn(d), gpn(s)); }
+    inline void Assembler::MRA( R d, R s) { count_alu(); ALU2(0x0f47, d, s); asm_output("cmova %s,%s",  gpn(d), gpn(s)); }
+    inline void Assembler::MRAE(R d, R s) { count_alu(); ALU2(0x0f43, d, s); asm_output("cmovae %s,%s", gpn(d), gpn(s)); }
+    inline void Assembler::MRNO(R d, R s) { count_alu(); ALU2(0x0f41, d, s); asm_output("cmovno %s,%s", gpn(d), gpn(s)); }

    // these aren't currently used but left in for reference
    //#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
@ -337,14 +337,14 @@ namespace nanojit

    inline void Assembler::LD(R reg, I32 disp, R base) {
        count_ld();
-        ALUm(0x8b,reg,disp,base);
-        asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base));
+        ALUm(0x8b, reg, disp, base);
+        asm_output("mov %s,%d(%s)", gpn(reg), disp, gpn(base));
    }

    inline void Assembler::LDdm(R reg, I32 addr) {
        count_ld();
        ALUdm(0x8b,reg,addr);
-        asm_output("mov   %s,0(%lx)",gpn(reg),(unsigned long)addr);
+        asm_output("mov %s,0(%lx)", gpn(reg), (unsigned long)addr);
    }

 #define SIBIDX(n)    "1248"[n]
@ -352,7 +352,7 @@ namespace nanojit
    inline void Assembler::LDsib(R reg, I32 disp, R base, I32 index, I32 scale) {
        count_ld();
        ALUsib(0x8b, reg, base, index, scale, disp);
-        asm_output("mov   %s,%d(%s+%s*%c)",gpn(reg),disp,gpn(base),gpn(index),SIBIDX(scale));
+        asm_output("mov %s,%d(%s+%s*%c)", gpn(reg), disp, gpn(base), gpn(index), SIBIDX(scale));
    }

    // note: movzx/movsx are being output with an 8/16 suffix to indicate the
@ -365,105 +365,105 @@ namespace nanojit
    inline void Assembler::LD16S(R r, I32 d, R b) {
        count_ld();
        ALU2m(0x0fbf, r, d, b);
-        asm_output("movsx16 %s,%d(%s)", gpn(r),d,gpn(b));
+        asm_output("movsx16 %s,%d(%s)", gpn(r), d, gpn(b));
    }

    inline void Assembler::LD16Sdm(R r, I32 addr) {
        count_ld();
        ALU2dm(0x0fbf, r, addr);
-        asm_output("movsx16 %s,0(%lx)", gpn(r),(unsigned long)addr);
+        asm_output("movsx16 %s,0(%lx)", gpn(r), (unsigned long)addr);
    }

    inline void Assembler::LD16Ssib(R r, I32 disp, R base, I32 index, I32 scale) {
        count_ld();
        ALU2sib(0x0fbf, r, base, index, scale, disp);
-        asm_output("movsx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale));
+        asm_output("movsx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
    }

    // Load 16-bit, zero extend.
    inline void Assembler::LD16Z(R r, I32 d, R b) {
        count_ld();
        ALU2m(0x0fb7, r, d, b);
-        asm_output("movzx16 %s,%d(%s)", gpn(r),d,gpn(b));
+        asm_output("movzx16 %s,%d(%s)", gpn(r), d, gpn(b));
    }

    inline void Assembler::LD16Zdm(R r, I32 addr) {
        count_ld();
        ALU2dm(0x0fb7, r, addr);
-        asm_output("movzx16 %s,0(%lx)", gpn(r),(unsigned long)addr);
+        asm_output("movzx16 %s,0(%lx)", gpn(r), (unsigned long)addr);
    }

    inline void Assembler::LD16Zsib(R r, I32 disp, R base, I32 index, I32 scale) {
        count_ld();
        ALU2sib(0x0fb7, r, base, index, scale, disp);
-        asm_output("movzx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale));
+        asm_output("movzx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
    }

    // Load 8-bit, zero extend.
    inline void Assembler::LD8Z(R r, I32 d, R b) {
        count_ld();
        ALU2m(0x0fb6, r, d, b);
-        asm_output("movzx8 %s,%d(%s)", gpn(r),d,gpn(b));
+        asm_output("movzx8 %s,%d(%s)", gpn(r), d, gpn(b));
    }

    inline void Assembler::LD8Zdm(R r, I32 addr) {
        count_ld();
        ALU2dm(0x0fb6, r, addr);
-        asm_output("movzx8 %s,0(%lx)", gpn(r),(long unsigned)addr);
+        asm_output("movzx8 %s,0(%lx)", gpn(r), (long unsigned)addr);
    }

    inline void Assembler::LD8Zsib(R r, I32 disp, R base, I32 index, I32 scale) {
        count_ld();
        ALU2sib(0x0fb6, r, base, index, scale, disp);
-        asm_output("movzx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale));
+        asm_output("movzx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
    }

    // Load 8-bit, sign extend.
    inline void Assembler::LD8S(R r, I32 d, R b) {
        count_ld();
        ALU2m(0x0fbe, r, d, b);
-        asm_output("movsx8 %s,%d(%s)", gpn(r),d,gpn(b));
+        asm_output("movsx8 %s,%d(%s)", gpn(r), d, gpn(b));
    }

    inline void Assembler::LD8Sdm(R r, I32 addr) {
        count_ld();
        ALU2dm(0x0fbe, r, addr);
-        asm_output("movsx8 %s,0(%lx)", gpn(r),(long unsigned)addr);
+        asm_output("movsx8 %s,0(%lx)", gpn(r), (long unsigned)addr);
    }

    inline void Assembler::LD8Ssib(R r, I32 disp, R base, I32 index, I32 scale) {
        count_ld();
        ALU2sib(0x0fbe, r, base, index, scale, disp);
-        asm_output("movsx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale));
+        asm_output("movsx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
    }

    inline void Assembler::LDi(R r, I32 i) {
        count_ld();
        underrunProtect(5);
        IMM32(i);
-        NanoAssert(((unsigned)r)<8);
-        *(--_nIns) = (uint8_t) ( 0xb8 | r );
-        asm_output("mov %s,%d",gpn(r),i);
+        NanoAssert(unsigned(r) < 8);
+        *(--_nIns) = uint8_t(0xb8 | r);
+        asm_output("mov %s,%d", gpn(r), i);
    }

    // Quirk of x86-32: reg must be a/b/c/d for byte stores here.
    inline void Assembler::ST8(R base, I32 disp, R reg) {
        count_st();
-        NanoAssert(((unsigned)reg)<4);
+        NanoAssert(unsigned(reg) < 4);
        ALUm(0x88, reg, disp, base);
-        asm_output("mov8 %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg));
+        asm_output("mov8 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
    }

    inline void Assembler::ST16(R base, I32 disp, R reg) {
        count_st();
        ALUm16(0x89, reg, disp, base);
-        asm_output("mov16 %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg));
+        asm_output("mov16 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
    }

    inline void Assembler::ST(R base, I32 disp, R reg) {
        count_st();
        ALUm(0x89, reg, disp, base);
-        asm_output("mov %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg));
+        asm_output("mov %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
    }

    inline void Assembler::ST8i(R base, I32 disp, I32 imm) {
@ -472,7 +472,7 @@ namespace nanojit
        IMM8(imm);
        MODRMm(0, disp, base);
        *(--_nIns) = 0xc6;
-        asm_output("mov8 %d(%s),%d",disp,gpn(base),imm);
+        asm_output("mov8 %d(%s),%d", disp, gpn(base), imm);
    }

    inline void Assembler::ST16i(R base, I32 disp, I32 imm) {
@ -482,7 +482,7 @@ namespace nanojit
        MODRMm(0, disp, base);
        *(--_nIns) = 0xc7;
        *(--_nIns) = 0x66;
-        asm_output("mov16 %d(%s),%d",disp,gpn(base),imm);
+        asm_output("mov16 %d(%s),%d", disp, gpn(base), imm);
    }

    inline void Assembler::STi(R base, I32 disp, I32 imm) {
@ -491,7 +491,7 @@ namespace nanojit
        IMM32(imm);
        MODRMm(0, disp, base);
        *(--_nIns) = 0xc7;
-        asm_output("mov %d(%s),%d",disp,gpn(base),imm);
+        asm_output("mov %d(%s),%d", disp, gpn(base), imm);
    }

    inline void Assembler::RET()   { count_ret(); ALU0(0xc3); asm_output("ret"); }
@ -502,8 +502,10 @@ namespace nanojit
        count_push();
        if (isS8(i)) {
            underrunProtect(2);
-            _nIns-=2; _nIns[0] = 0x6a; _nIns[1] = uint8_t(i);
-            asm_output("push %d",i);
+            _nIns -= 2;
+            _nIns[0] = 0x6a;
+            _nIns[1] = uint8_t(i);
+            asm_output("push %d", i);
        } else {
            PUSHi32(i);
        }
@ -514,29 +516,29 @@ namespace nanojit
        underrunProtect(5);
        IMM32(i);
        *(--_nIns) = 0x68;
-        asm_output("push %d",i);
+        asm_output("push %d", i);
    }

    inline void Assembler::PUSHr(R r) {
        count_push();
        underrunProtect(1);
-        NanoAssert(((unsigned)r)<8);
-        *(--_nIns) = (uint8_t) ( 0x50 | r );
-        asm_output("push %s",gpn(r));
+        NanoAssert(unsigned(r) < 8);
+        *(--_nIns) = uint8_t(0x50 | r);
+        asm_output("push %s", gpn(r));
    }

    inline void Assembler::PUSHm(I32 d, R b) {
        count_pushld();
        ALUm(0xff, 6, d, b);
-        asm_output("push %d(%s)",d,gpn(b));
+        asm_output("push %d(%s)", d, gpn(b));
    }

    inline void Assembler::POPr(R r) {
        count_pop();
        underrunProtect(1);
-        NanoAssert(((unsigned)r)<8);
-        *(--_nIns) = (uint8_t) ( 0x58 | (r) );
-        asm_output("pop %s",gpn(r));
+        NanoAssert(unsigned(r) < 8);
+        *(--_nIns) = uint8_t(0x58 | r);
+        asm_output("pop %s", gpn(r));
    }

    inline void Assembler::JCC(I32 o, NIns* t, const char* n) {
@ -545,15 +547,16 @@ namespace nanojit
        intptr_t tt = (intptr_t)t - (intptr_t)_nIns;
        if (t && isS8(tt)) {
            _nIns -= 2;
-            _nIns[0] = uint8_t( 0x70 | o );
+            _nIns[0] = uint8_t(0x70 | o);
            _nIns[1] = uint8_t(tt);
        } else {
            IMM32(tt);
            _nIns -= 2;
            _nIns[0] = JCC32;
-            _nIns[1] = (uint8_t) ( 0x80 | o );
+            _nIns[1] = uint8_t(0x80 | o);
        }
-        asm_output("%-5s %p", n, t); (void) n;
+        asm_output("%-5s %p", n, t);
+        (void) n;
    }

    inline void Assembler::JMP_long(NIns* t) {
@ -561,9 +564,9 @@ namespace nanojit
        underrunProtect(5);
        NanoAssert(t);
        intptr_t tt = (intptr_t)t - (intptr_t)_nIns;
-        IMM32(tt); \
-        *(--_nIns) = JMP32; \
-        asm_output("jmp %p", t); \
+        IMM32(tt);
+        *(--_nIns) = JMP32;
+        asm_output("jmp %p", t);
        verbose_only( verbose_outputf("%010lx:", (unsigned long)_nIns); )
    }

@ -578,10 +581,10 @@ namespace nanojit
        underrunProtect(7);
        IMM32(int32_t(addr));
        _nIns -= 3;
-        _nIns[0]   = (NIns) 0xff; /* jmp */
-        _nIns[1]   = (NIns) (0<<6 | 4<<3 | 4); /* modrm: base=sib + disp32 */
-        _nIns[2]   = (NIns) (ss<<6 | (x)<<3 | 5); /* sib: x<<ss + table */
-        asm_output("jmp   *(%s*%d+%p)", gpn(x), 1<<ss, (void*)(addr));
+        _nIns[0]   = uint8_t(0xff);                 /* jmp */
+        _nIns[1]   = uint8_t(0  << 6 | 4 << 3 | 4); /* modrm: base=sib + disp32 */
+        _nIns[2]   = uint8_t(ss << 6 | x << 3 | 5); /* sib: x<<ss + table */
+        asm_output("jmp   *(%s*%d+%p)", gpn(x), 1 << ss, (void*)addr);
    }

    inline void Assembler::JE(NIns* t)   { JCC(0x04, t, "je"); }
@ -615,19 +618,19 @@ namespace nanojit
    // sse instructions
    inline void Assembler::SSE(I32 c, I32 d, I32 s) {
        underrunProtect(9);
-        MODRM((d),(s));
+        MODRM(d, s);
        _nIns -= 3;
-        _nIns[0] = uint8_t((c>>16) & 0xff);
-        _nIns[1] = uint8_t((c>>8) & 0xff);
-        _nIns[2] = uint8_t(c&0xff);
+        _nIns[0] = uint8_t((c >> 16) & 0xff);
+        _nIns[1] = uint8_t((c >>  8) & 0xff);
+        _nIns[2] = uint8_t(c & 0xff);
    }

    inline void Assembler::SSEm(I32 c, I32 r, I32 d, R b) {
        underrunProtect(9);
        MODRMm(r, d, b);
        _nIns -= 3;
-        _nIns[0] = uint8_t((c>>16) & 0xff);
-        _nIns[1] = uint8_t((c>>8) & 0xff);
+        _nIns[0] = uint8_t((c >> 16) & 0xff);
+        _nIns[1] = uint8_t((c >>  8) & 0xff);
        _nIns[2] = uint8_t(c & 0xff);
    }

@ -635,143 +638,129 @@ namespace nanojit
        count_ldq();
        underrunProtect(8);
        IMM32(int32_t(addr));
-        *(--_nIns) = uint8_t(((r)&7)<<3|5);
+        *(--_nIns) = uint8_t((r & 7) << 3 | 5);
        *(--_nIns) = 0x10;
        *(--_nIns) = 0x0f;
        *(--_nIns) = 0xf2;
-        asm_output("movsd %s,(%p) // =%f",gpn(r),(void*)addr,*addr);
+        asm_output("movsd %s,(%p) // =%f", gpn(r), (void*)addr, *addr);
    }

-    inline void Assembler::SSE_LDSD(R r, I32 d, R b) { count_ldq(); SSEm(0xf20f10, r&7, d, b); asm_output("movsd %s,%d(%s)",gpn(r),(d),gpn(b)); }
-    inline void Assembler::SSE_LDQ( R r, I32 d, R b) { count_ldq(); SSEm(0xf30f7e, r&7, d, b); asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); }
-    inline void Assembler::SSE_LDSS(R r, I32 d, R b) { count_ld();  SSEm(0xf30f10, r&7, d, b); asm_output("movss %s,%d(%s)",gpn(r),d,gpn(b)); }
-    inline void Assembler::SSE_STSD(I32 d, R b, R r) { count_stq(); SSEm(0xf20f11, r&7, d, b); asm_output("movsd %d(%s),%s",(d),gpn(b),gpn(r)); }
-    inline void Assembler::SSE_STQ( I32 d, R b, R r) { count_stq(); SSEm(0x660fd6, r&7, d, b); asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); }
-    inline void Assembler::SSE_STSS(I32 d, R b, R r) { count_st();  SSEm(0xf30f11, r&7, d, b); asm_output("movss %d(%s),%s",(d),gpn(b),gpn(r)); }
+    inline void Assembler::SSE_LDSD(R r, I32 d, R b) { count_ldq(); SSEm(0xf20f10, r&7, d, b); asm_output("movsd %s,%d(%s)", gpn(r), d, gpn(b)); }
+    inline void Assembler::SSE_LDQ( R r, I32 d, R b) { count_ldq(); SSEm(0xf30f7e, r&7, d, b); asm_output("movq %s,%d(%s)", gpn(r), d, gpn(b)); }
+    inline void Assembler::SSE_LDSS(R r, I32 d, R b) { count_ld();  SSEm(0xf30f10, r&7, d, b); asm_output("movss %s,%d(%s)", gpn(r), d, gpn(b)); }
+    inline void Assembler::SSE_STSD(I32 d, R b, R r) { count_stq(); SSEm(0xf20f11, r&7, d, b); asm_output("movsd %d(%s),%s", d, gpn(b), gpn(r)); }
+    inline void Assembler::SSE_STQ( I32 d, R b, R r) { count_stq(); SSEm(0x660fd6, r&7, d, b); asm_output("movq %d(%s),%s", d, gpn(b), gpn(r)); }
+    inline void Assembler::SSE_STSS(I32 d, R b, R r) { count_st();  SSEm(0xf30f11, r&7, d, b); asm_output("movss %d(%s),%s", d, gpn(b), gpn(r)); }

-    inline void Assembler::SSE_CVTSI2SD(R xr, R gr)  { count_fpu(); SSE(0xf20f2a, xr&7, gr&7); asm_output("cvtsi2sd %s,%s",gpn(xr),gpn(gr)); }
-    inline void Assembler::SSE_CVTSD2SI(R gr, R xr)  { count_fpu(); SSE(0xf20f2d, gr&7, xr&7); asm_output("cvtsd2si %s,%s",gpn(gr),gpn(xr)); }
-    inline void Assembler::SSE_CVTSD2SS(R xr, R gr)  { count_fpu(); SSE(0xf20f5a, xr&7, gr&7); asm_output("cvtsd2ss %s,%s",gpn(xr),gpn(gr)); }
-    inline void Assembler::SSE_CVTSS2SD(R xr, R gr)  { count_fpu(); SSE(0xf30f5a, xr&7, gr&7); asm_output("cvtss2sd %s,%s",gpn(xr),gpn(gr)); }
-    inline void Assembler::SSE_CVTDQ2PD(R d,  R r)   { count_fpu(); SSE(0xf30fe6, d&7,  r&7);  asm_output("cvtdq2pd %s,%s",gpn(d),gpn(r)); }
+    inline void Assembler::SSE_CVTSI2SD(R xr, R gr)  { count_fpu(); SSE(0xf20f2a, xr&7, gr&7); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); }
+    inline void Assembler::SSE_CVTSD2SI(R gr, R xr)  { count_fpu(); SSE(0xf20f2d, gr&7, xr&7); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); }
+    inline void Assembler::SSE_CVTSD2SS(R xr, R gr)  { count_fpu(); SSE(0xf20f5a, xr&7, gr&7); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); }
+    inline void Assembler::SSE_CVTSS2SD(R xr, R gr)  { count_fpu(); SSE(0xf30f5a, xr&7, gr&7); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); }
+    inline void Assembler::SSE_CVTDQ2PD(R d,  R r)   { count_fpu(); SSE(0xf30fe6, d&7,  r&7);  asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); }

    // Move and zero-extend GP reg to XMM reg.
    inline void Assembler::SSE_MOVD(R d, R s) {
        count_mov();
-        if (_is_xmm_reg_(s)) {
-            NanoAssert(_is_gp_reg_(d));
+        if (IsXmmReg(s)) {
+            NanoAssert(IsGpReg(d));
            SSE(0x660f7e, s&7, d&7);
        } else {
-            NanoAssert(_is_gp_reg_(s));
-            NanoAssert(_is_xmm_reg_(d));
+            NanoAssert(IsGpReg(s));
+            NanoAssert(IsXmmReg(d));
            SSE(0x660f6e, d&7, s&7);
        }
-        asm_output("movd %s,%s",gpn(d),gpn(s));
+        asm_output("movd %s,%s", gpn(d), gpn(s));
    }

    inline void Assembler::SSE_MOVSD(R rd, R rs) {
        count_mov();
-        NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));
+        NanoAssert(IsXmmReg(rd) && IsXmmReg(rs));
        SSE(0xf20f10, rd&7, rs&7);
-        asm_output("movsd %s,%s",gpn(rd),gpn(rs));
-    }
-
-    inline void Assembler::SSE_MOVDm(R d, R b, R xrs) {
-        count_st();
-        NanoAssert(_is_xmm_reg_(xrs) && (_is_gp_reg_(b) || b==FP));
-        SSEm(0x660f7e, xrs&7, d, b);
-        asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs));
+        asm_output("movsd %s,%s", gpn(rd), gpn(rs));
    }

    inline void Assembler::SSE_ADDSD(R rd, R rs) {
        count_fpu();
-        NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));
+        NanoAssert(IsXmmReg(rd) && IsXmmReg(rs));
        SSE(0xf20f58, rd&7, rs&7);
-        asm_output("addsd %s,%s",gpn(rd),gpn(rs));
+        asm_output("addsd %s,%s", gpn(rd), gpn(rs));
    }

    inline void Assembler::SSE_ADDSDm(R r, const double* addr) {
        count_fpuld();
        underrunProtect(8);
-        NanoAssert(_is_xmm_reg_(r));
+        NanoAssert(IsXmmReg(r));
        const double* daddr = addr;
        IMM32(int32_t(daddr));
-        *(--_nIns) = uint8_t((r&7)<<3 | 5);
+        *(--_nIns) = uint8_t((r & 7) << 3 | 5);
        *(--_nIns) = 0x58;
        *(--_nIns) = 0x0f;
        *(--_nIns) = 0xf2;
-        asm_output("addsd %s,%p // =%f",gpn(r),(void*)daddr,*daddr);
+        asm_output("addsd %s,%p // =%f", gpn(r), (void*)daddr, *daddr);
    }

    inline void Assembler::SSE_SUBSD(R rd, R rs) {
        count_fpu();
-        NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));
+        NanoAssert(IsXmmReg(rd) && IsXmmReg(rs));
        SSE(0xf20f5c, rd&7, rs&7);
-        asm_output("subsd %s,%s",gpn(rd),gpn(rs));
+        asm_output("subsd %s,%s", gpn(rd), gpn(rs));
    }

    inline void Assembler::SSE_MULSD(R rd, R rs) {
        count_fpu();
-        NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));
+        NanoAssert(IsXmmReg(rd) && IsXmmReg(rs));
        SSE(0xf20f59, rd&7, rs&7);
-        asm_output("mulsd %s,%s",gpn(rd),gpn(rs));
+        asm_output("mulsd %s,%s", gpn(rd), gpn(rs));
    }

    inline void Assembler::SSE_DIVSD(R rd, R rs) {
        count_fpu();
-        NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));
+        NanoAssert(IsXmmReg(rd) && IsXmmReg(rs));
        SSE(0xf20f5e, rd&7, rs&7);
-        asm_output("divsd %s,%s",gpn(rd),gpn(rs));
+        asm_output("divsd %s,%s", gpn(rd), gpn(rs));
    }

    inline void Assembler::SSE_UCOMISD(R rl, R rr) {
        count_fpu();
-        NanoAssert(_is_xmm_reg_(rl) && _is_xmm_reg_(rr));
+        NanoAssert(IsXmmReg(rl) && IsXmmReg(rr));
        SSE(0x660f2e, rl&7, rr&7);
-        asm_output("ucomisd %s,%s",gpn(rl),gpn(rr));
-    }
-
-    inline void Assembler::SSE_CVTSI2SDm(R xr, R d, R b) {
-        count_fpu();
-        NanoAssert(_is_xmm_reg_(xr) && _is_gp_reg_(b));
-        SSEm(0xf20f2a, xr&7, d, b);
-        asm_output("cvtsi2sd %s,%d(%s)",gpn(xr),d,gpn(b));
+        asm_output("ucomisd %s,%s", gpn(rl), gpn(rr));
    }

    inline void Assembler::SSE_XORPD(R r, const uint32_t* maskaddr) {
        count_fpuld();
        underrunProtect(8);
        IMM32(int32_t(maskaddr));
-        *(--_nIns) = uint8_t((r&7)<<3 | 5);
+        *(--_nIns) = uint8_t((r & 7) << 3 | 5);
        *(--_nIns) = 0x57;
        *(--_nIns) = 0x0f;
        *(--_nIns) = 0x66;
-        asm_output("xorpd %s,[%p]",gpn(r),(void*)maskaddr);
+        asm_output("xorpd %s,[%p]", gpn(r), (void*)maskaddr);
    }

    inline void Assembler::SSE_XORPDr(R rd, R rs) {
        count_fpu();
        SSE(0x660f57, rd&7, rs&7);
-        asm_output("xorpd %s,%s",gpn(rd),gpn(rs));
+        asm_output("xorpd %s,%s", gpn(rd), gpn(rs));
    }

    // floating point unit
    inline void Assembler::FPUc(I32 o) {
        underrunProtect(2);
-        *(--_nIns) = (uint8_t)(o & 0xff);
-        *(--_nIns) = (uint8_t)((o>>8) & 0xff);
+        *(--_nIns) = uint8_t(o & 0xff);
+        *(--_nIns) = uint8_t((o >> 8) & 0xff);
    }

    inline void Assembler::FPUm(I32 o, I32 d, R b) {
        underrunProtect(7);
        MODRMm(uint8_t(o), d, b);
-        *(--_nIns) = (uint8_t)(o>>8);
+        *(--_nIns) = uint8_t(o >> 8);
    }

    inline void Assembler::FPUdm(I32 o, const double* const m) {
        underrunProtect(6);
        MODRMdm(uint8_t(o), int32_t(m));
-        *(--_nIns) = uint8_t(o>>8);
+        *(--_nIns) = uint8_t(o >> 8);
    }

    inline void Assembler::TEST_AH(I32 i) {
@ -780,7 +769,7 @@ namespace nanojit
        *(--_nIns) = uint8_t(i);
        *(--_nIns) = 0xc4;
        *(--_nIns) = 0xf6;
-        asm_output("test ah, %d",i);
+        asm_output("test ah, %d", i);
    }

    inline void Assembler::TEST_AX(I32 i) {
@ -788,10 +777,10 @@ namespace nanojit
        underrunProtect(5);
        *(--_nIns) = 0;
        *(--_nIns) = uint8_t(i);
-        *(--_nIns) = uint8_t((i)>>8);
+        *(--_nIns) = uint8_t(i >> 8);
        *(--_nIns) = 0;
        *(--_nIns) = 0xa9;
-        asm_output("test ax, %d",i);
+        asm_output("test ax, %d", i);
    }

    inline void Assembler::FNSTSW_AX() { count_fpu(); FPUc(0xdfe0);    asm_output("fnstsw_ax"); }
@ -801,49 +790,49 @@ namespace nanojit

    inline void Assembler::FFREE(R r)  { count_fpu(); FPU(0xddc0, r);  asm_output("ffree %s",gpn(r)); }

-    inline void Assembler::FST32(bool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b);   asm_output("fst%s32 %d(%s)",(p?"p":""),d,gpn(b)); if (p) fpu_pop(); }
-    inline void Assembler::FSTQ(bool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b);   asm_output("fst%sq %d(%s)",(p?"p":""),d,gpn(b)); if (p) fpu_pop(); }
+    inline void Assembler::FST32(bool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b);   asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
+    inline void Assembler::FSTQ(bool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b);   asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }

    inline void Assembler::FSTPQ(I32 d, R b) { FSTQ(1, d, b); }

-    inline void Assembler::FCOM(bool p, I32 d, R b) { count_fpuld(); FPUm(0xdc02|(p?1:0), d, b); asm_output("fcom%s %d(%s)",(p?"p":""),d,gpn(b)); if (p) fpu_pop(); }
+    inline void Assembler::FCOM(bool p, I32 d, R b) { count_fpuld(); FPUm(0xdc02|(p?1:0), d, b); asm_output("fcom%s %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
    inline void Assembler::FCOMdm(bool p, const double* dm) {
        count_fpuld();
        FPUdm(0xdc02|(p?1:0), dm);
-        asm_output("fcom%s (%p)",(p?"p":""),(void*)dm);
+        asm_output("fcom%s (%p)", (p?"p":""), (void*)dm);
        if (p) fpu_pop();
    }

-    inline void Assembler::FLD32(I32 d, R b)        { count_ldq();   FPUm(0xd900, d, b); asm_output("fld32 %d(%s)",d,gpn(b)); fpu_push();}
-    inline void Assembler::FLDQ(I32 d, R b)         { count_ldq();   FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();}
-    inline void Assembler::FLDQdm(const double* dm) { count_ldq();   FPUdm(0xdd00, dm);  asm_output("fldq (%p)",(void*)dm); fpu_push();}
-    inline void Assembler::FILDQ(I32 d, R b)        { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); }
-    inline void Assembler::FILD(I32 d, R b)         { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); }
+    inline void Assembler::FLD32(I32 d, R b)        { count_ldq();   FPUm(0xd900, d, b); asm_output("fld32 %d(%s)", d, gpn(b)); fpu_push();}
+    inline void Assembler::FLDQ(I32 d, R b)         { count_ldq();   FPUm(0xdd00, d, b); asm_output("fldq %d(%s)", d, gpn(b)); fpu_push();}
+    inline void Assembler::FLDQdm(const double* dm) { count_ldq();   FPUdm(0xdd00, dm);  asm_output("fldq (%p)", (void*)dm); fpu_push();}
+    inline void Assembler::FILDQ(I32 d, R b)        { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)", d, gpn(b)); fpu_push(); }
+    inline void Assembler::FILD(I32 d, R b)         { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)", d, gpn(b)); fpu_push(); }

    inline void Assembler::FIST(bool p, I32 d, R b) {
        count_fpu();
-        FPUm(0xdb02|(p?1:0), d, b);
-        asm_output("fist%s %d(%s)",(p?"p":""),d,gpn(b));
+        FPUm(0xdb02 | (p?1:0), d, b);
+        asm_output("fist%s %d(%s)", (p?"p":""), d, gpn(b));
        if (p) fpu_pop();
    }

-    inline void Assembler::FADD( I32 d, R b) { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)", d,gpn(b)); }
-    inline void Assembler::FSUB( I32 d, R b) { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)", d,gpn(b)); }
-    inline void Assembler::FSUBR(I32 d, R b) { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); }
-    inline void Assembler::FMUL( I32 d, R b) { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)", d,gpn(b)); }
-    inline void Assembler::FDIV( I32 d, R b) { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)", d,gpn(b)); }
-    inline void Assembler::FDIVR(I32 d, R b) { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); }
+    inline void Assembler::FADD( I32 d, R b) { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)", d, gpn(b)); }
+    inline void Assembler::FSUB( I32 d, R b) { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)", d, gpn(b)); }
+    inline void Assembler::FSUBR(I32 d, R b) { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)", d, gpn(b)); }
+    inline void Assembler::FMUL( I32 d, R b) { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)", d, gpn(b)); }
+    inline void Assembler::FDIV( I32 d, R b) { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)", d, gpn(b)); }
+    inline void Assembler::FDIVR(I32 d, R b) { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)", d, gpn(b)); }

    inline void Assembler::FADDdm( const double *dm) { count_ldq(); FPUdm(0xdc00, dm); asm_output("fadd (%p)", (void*)dm); }
-    inline void Assembler::FSUBRdm(const double* dm) { count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)",(void*)dm); }
+    inline void Assembler::FSUBRdm(const double* dm) { count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)", (void*)dm); }
    inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); }
-    inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)",(void*)dm); }
+    inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); }

    inline void Assembler::FINCSTP()   { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); fpu_pop(); }

    inline void Assembler::FCOMP()     { count_fpu(); FPUc(0xD8D9);    asm_output("fcomp"); fpu_pop();}
    inline void Assembler::FCOMPP()    { count_fpu(); FPUc(0xDED9);    asm_output("fcompp"); fpu_pop();fpu_pop();}
-    inline void Assembler::FLDr(R r)   { count_ldq(); FPU(0xd9c0,r);   asm_output("fld %s",gpn(r)); fpu_push(); }
+    inline void Assembler::FLDr(R r)   { count_ldq(); FPU(0xd9c0, r);  asm_output("fld %s", gpn(r)); fpu_push(); }
    inline void Assembler::EMMS()      { count_fpu(); FPUc(0x0f77);    asm_output("emms"); }

    // standard direct call
@ -851,9 +840,9 @@ namespace nanojit
        count_call();
        underrunProtect(5);
        int offset = (ci->_address) - ((int)_nIns);
-        IMM32( (uint32_t)offset );
+        IMM32((uint32_t)offset);
        *(--_nIns) = 0xE8;
-        verbose_only(asm_output("call %s",(ci->_name));)
+        verbose_only(asm_output("call %s", (ci->_name));)
        debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();)
    }

@ -861,8 +850,8 @@ namespace nanojit
    inline void Assembler::CALLr(const CallInfo* ci, Register r) {
        count_calli();
        underrunProtect(2);
-        ALU(0xff, 2, (r));
-        verbose_only(asm_output("call %s",gpn(r));)
+        ALU(0xff, 2, r);
+        verbose_only(asm_output("call %s", gpn(r));)
        debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();) (void)ci;
    }

@ -945,7 +934,7 @@ namespace nanojit
        // profiling for the exit
        verbose_only(
           if (_logc->lcbits & LC_FragProfile) {
-              INCLi( int32_t(&guard->record()->profCount) );
+              INCLi(int32_t(&guard->record()->profCount));
           }
        )

--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@ -154,20 +154,22 @@ namespace nanojit
    typedef int RegisterMask;

    static const int NumSavedRegs = 3;
-    static const RegisterMask SavedRegs = 1<<EBX | 1<<EDI | 1<<ESI;
-    static const RegisterMask GpRegs = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
-    static const RegisterMask XmmRegs = 1<<XMM0|1<<XMM1|1<<XMM2|1<<XMM3|1<<XMM4|1<<XMM5|1<<XMM6|1<<XMM7;
-    static const RegisterMask x87Regs = 1<<FST0;
-    static const RegisterMask FpRegs = x87Regs | XmmRegs;
+    static const RegisterMask SavedRegs   = 1<<EBX | 1<<EDI | 1<<ESI;
+    static const RegisterMask GpRegs      = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
+    static const RegisterMask XmmRegs     = 1<<XMM0 | 1<<XMM1 | 1<<XMM2 | 1<<XMM3 |
+                                            1<<XMM4 | 1<<XMM5 | 1<<XMM6 | 1<<XMM7;
+    static const RegisterMask x87Regs     = 1<<FST0;
+    static const RegisterMask FpRegs      = x87Regs | XmmRegs;
    static const RegisterMask ScratchRegs = 1<<EAX | 1<<ECX | 1<<EDX | FpRegs;

-    static const RegisterMask AllowableFlagRegs = 1<<EAX |1<<ECX | 1<<EDX | 1<<EBX;
+    static const RegisterMask AllowableFlagRegs = 1<<EAX | 1<<ECX | 1<<EDX | 1<<EBX;

-    #define _rmask_(r)      (1<<(r))
-    #define _is_xmm_reg_(r) ((_rmask_(r)&XmmRegs)!=0)
-    #define _is_x87_reg_(r) ((_rmask_(r)&x87Regs)!=0)
-    #define _is_fp_reg_(r)  ((_rmask_(r)&FpRegs)!=0)
-    #define _is_gp_reg_(r)  ((_rmask_(r)&GpRegs)!=0)
+    static inline bool IsGpReg(Register r) {
+        return ((1<<r) & GpRegs) != 0;
+    }
+    static inline bool IsXmmReg(Register r) {
+        return ((1<<r) & XmmRegs) != 0;
+    }

    verbose_only( extern const char* regNames[]; )

@ -198,23 +200,23 @@ namespace nanojit
        void asm_immd(Register r, uint64_t q, double d, bool canClobberCCs); \
        void IMM8(int32_t i) { \
            _nIns -= 1; \
-            *((int8_t*)_nIns) = (int8_t)(i); \
+            *((int8_t*)_nIns) = int8_t(i); \
        }; \
        void IMM16(int32_t i) { \
            _nIns -= 2; \
-            *((int16_t*)_nIns) = (int16_t)(i); \
+            *((int16_t*)_nIns) = int16_t(i); \
        }; \
        void IMM32(int32_t i) { \
            _nIns -= 4; \
-            *((int32_t*)_nIns) = (int32_t)(i); \
+            *((int32_t*)_nIns) = int32_t(i); \
        }; \
        void MODRMs(int32_t r, int32_t d, Register b, int32_t l, int32_t i); \
        void MODRMm(int32_t r, int32_t d, Register b); \
        void MODRMSIB(Register reg, Register base, int32_t index, int32_t scale, int32_t disp); \
        void MODRMdm(int32_t r, int32_t addr); \
        void MODRM(int32_t d, int32_t s) { \
-            NanoAssert((unsigned(d))<8 && (unsigned(s))<8); \
-            *(--_nIns) = (uint8_t) ( 3<<6 | d<<3 | s ); \
+            NanoAssert(unsigned(d) < 8 && unsigned(s) < 8); \
+            *(--_nIns) = uint8_t(3 << 6 | d << 3 | s); \
        }; \
        void ALU0(int32_t o); \
        void ALUm(int32_t c, int32_t r, int32_t d, Register b); \
@ -226,7 +228,7 @@ namespace nanojit
        void ALU2sib(int32_t c, Register r, Register base, int32_t index, int32_t scale, int32_t disp); \
        void ALU(int32_t c, int32_t d, int32_t s) { \
            underrunProtect(2); \
-            MODRM(d,s); \
+            MODRM(d, s); \
            *(--_nIns) = uint8_t(c); \
        }; \
        void ALUi(int32_t c, int32_t r, int32_t i); \
@ -262,8 +264,8 @@ namespace nanojit
        void CMPi(Register r, int32_t i); \
        void MR(Register d, Register s) { \
            count_mov(); \
-            ALU(0x8b,d,s); \
-            asm_output("mov %s,%s",gpn(d),gpn(s)); \
+            ALU(0x8b, d, s); \
+            asm_output("mov %s,%s", gpn(d), gpn(s)); \
        }; \
        void LEA(Register r, int32_t d, Register b); \
        void LEAmi4(Register r, int32_t d, int32_t i); \
@ -377,21 +379,19 @@ namespace nanojit
        void SSE_CVTDQ2PD(Register d, Register r); \
        void SSE_MOVD(Register d, Register s); \
        void SSE_MOVSD(Register rd, Register rs); \
-        void SSE_MOVDm(Register d, Register b, Register xrs); \
        void SSE_ADDSD(Register rd, Register rs); \
        void SSE_ADDSDm(Register r, const double* addr); \
        void SSE_SUBSD(Register rd, Register rs); \
        void SSE_MULSD(Register rd, Register rs); \
        void SSE_DIVSD(Register rd, Register rs); \
        void SSE_UCOMISD(Register rl, Register rr); \
-        void SSE_CVTSI2SDm(Register xr, Register d, Register b); \
        void SSE_XORPD(Register r, const uint32_t* maskaddr); \
        void SSE_XORPDr(Register rd, Register rs); \
        void FPUc(int32_t o); \
        void FPU(int32_t o, Register r) { \
            underrunProtect(2); \
-            *(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | (r&7)); \
-            *(--_nIns) = (uint8_t)((o>>8)&0xff); \
+            *(--_nIns) = uint8_t((uint8_t(o) & 0xff) | (r & 7)); \
+            *(--_nIns) = uint8_t((o >> 8) & 0xff); \
        }; \
        void FPUm(int32_t o, int32_t d, Register b); \
        void FPUdm(int32_t o, const double* const m); \
@ -427,7 +427,8 @@ namespace nanojit
        void FSTP(Register r) { \
            count_fpu(); \
            FPU(0xddd8, r); \
-            asm_output("fstp %s",gpn(r)); fpu_pop(); \
+            asm_output("fstp %s", gpn(r)); \
+            fpu_pop(); \
        }; \
        void FCOMP(); \
        void FCOMPP(); \