b=484561; [arm] fix EABI calling convention; clean up asm_call; r=graydon

2024-09-13 09:24:08 -07:00 · 2009-03-27 14:31:53 -07:00 · 2009-03-27 14:31:53 -07:00 · eb82870ba4
commit eb82870ba4
parent 34105fd15e
4 changed files with 186 additions and 213 deletions
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@ -1943,12 +1943,6 @@ namespace nanojit
 		for (uint32_t i = 0; i < MAXARGS; i++) {
 			argt >>= 2;
 			ArgSize a = ArgSize(argt&3);
-			if (AvmCore::config.soft_float && a == ARGSIZE_F) {
-                sizes[argc++] = ARGSIZE_LO;
-                sizes[argc++] = ARGSIZE_LO;
-                continue;
-            }
-
            if (a != ARGSIZE_NONE) {
                sizes[argc++] = a;
            } else {
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@ -1100,28 +1100,12 @@ namespace nanojit
        LOpcode op = (ci->isIndirect() ? k_callimap : k_callmap)[argt & 3];
        NanoAssert(op != LIR_skip); // LIR_skip here is just an error condition

-        ArgSize sizes[2*MAXARGS];
+        ArgSize sizes[MAXARGS];
        int32_t argc = ci->get_sizes(sizes);

 		if (AvmCore::config.soft_float) {
 			if (op == LIR_fcall)
 				op = LIR_callh;
-			LInsp args2[MAXARGS*2]; // arm could require 2 args per double
-			int32_t j = 0;
-			int32_t i = 0;
-			while (j < argc) {
-				argt >>= 2;
-				ArgSize a = ArgSize(argt&3);
-				if (a == ARGSIZE_F) {
-					LInsp q = args[i++];
-					args2[j++] = ins1(LIR_qhi, q);
-					args2[j++] = ins1(LIR_qlo, q);
-				} else {
-					args2[j++] = args[i++];
-				}
-			}
-			args = args2;
-			NanoAssert(j == argc);
 		}

 		//
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@ -54,6 +54,13 @@
 extern "C" void __clear_cache(char *BEG, char *END);
 #endif

+// assume EABI, except under CE
+#ifdef UNDER_CE
+#undef NJ_ARM_EABI
+#else
+#define NJ_ARM_EABI
+#endif
+
 #ifdef FEATURE_NANOJIT

 namespace nanojit
@ -175,19 +182,162 @@ Assembler::genEpilogue()
    return _nIns;
 }

-/* ARM EABI (used by gcc/linux) calling conventions differ from Windows CE; use these
- * as the default.
+/* gcc/linux use the ARM EABI; Windows CE uses the legacy abi.
 *
- * - double arg following an initial dword arg use r0 for the int arg
- *   and r2/r3 for the double; r1 is skipped
- * - 3 dword args followed by a double arg cause r3 to be skipped,
- *   and the double to be stuck on the stack.
+ * Under EABI:
+ * - doubles are 64-bit aligned both in registers and on the stack.
+ *   If the next available argument register is R1, it is skipped
+ *   and the double is placed in R2:R3.  If R0:R1 or R2:R3 are not
+ *   available, the double is placed on the stack, 64-bit aligned.
+ * - 32-bit arguments are placed in registers and 32-bit aligned
+ *   on the stack.
 *
- * Under Windows CE, the arguments are placed in r0-r3 and then the stack,
- * one dword at a time, with the high order dword of a quad/double coming
- * first.  No registers are skipped as they are in the EABI case.
+ * Under legacy ABI:
+ * - doubles are placed in subsequent arg registers; if the next
+ *   available register is r3, the low order word goes into r3
+ *   and the high order goes on the stack.
+ * - 32-bit arguments are placed in the next available arg register,
+ * - both doubles and 32-bit arguments are placed on stack with 32-bit
+ *   alignment.
 */

+void
+Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
+{
+    // should never be called -- the ARM-specific longer form of
+    // asm_arg is used on ARM.
+    NanoAssert(0);
+}
+
+/*
+ * asm_arg will update r and stkd to indicate where the next
+ * argument should go.  If r == UnknownReg, then the argument
+ * is placed on the stack at stkd, and stkd is updated.
+ *
+ * Note that this currently doesn't actually use stkd on input,
+ * except for figuring out alignment; it always pushes to SP.
+ * See TODO in asm_call.
+ */
+void
+Assembler::asm_arg(ArgSize sz, LInsp arg, Register& r, int& stkd)
+{
+    if (sz == ARGSIZE_F) {
+#ifdef NJ_ARM_EABI
+        NanoAssert(r == UnknownReg || r == R0 || r == R2);
+
+        // if we're about to put this on the stack, make sure the
+        // stack is 64-bit aligned
+        if (r == UnknownReg && (stkd&7) != 0) {
+            SUBi(SP, SP, 4);
+            stkd += 4;
+        }
+#endif
+
+        Reservation* argRes = getresv(arg);
+
+        // handle qjoin first; won't ever show up if VFP is available
+        if (arg->isop(LIR_qjoin)) {
+            asm_arg(ARGSIZE_LO, arg->oprnd1(), r, stkd);
+            asm_arg(ARGSIZE_LO, arg->oprnd2(), r, stkd);
+        } else if (!argRes || argRes->reg == UnknownReg || !AvmCore::config.vfp) {
+            // if we don't have a register allocated,
+            // or we're not vfp, just read from memory.
+            if (arg->isop(LIR_quad)) {
+                const int32_t* p = (const int32_t*) (arg-2);
+
+                // XXX use some load-multiple action here from our const pool?
+                for (int k = 0; k < 2; k++) {
+                    if (r != UnknownReg) {
+                        asm_ld_imm(r, *p++);
+                        r = nextreg(r);
+                        if (r == R4)
+                            r = UnknownReg;
+                    } else {
+                        STR_preindex(IP, SP, -4);
+                        asm_ld_imm(IP, *p++);
+                        stkd += 4;
+                    }
+                }
+            } else {
+                int d = findMemFor(arg);
+
+                for (int k = 0; k < 2; k++) {
+                    if (r != UnknownReg) {
+                        LDR(r, FP, d + k*4);
+                        r = nextreg(r);
+                        if (r == R4)
+                            r = UnknownReg;
+                    } else {
+                        STR_preindex(IP, SP, -4);
+                        LDR(IP, FP, d + k*4);
+                        stkd += 4;
+                    }
+                }
+            }
+        } else {
+            // handle the VFP with-register case
+            Register sr = argRes->reg;
+            if (r != UnknownReg && r < R3) {
+                FMRRD(r, nextreg(r), sr);
+
+                // make sure the next register is correct on return
+                if (r == R0)
+                    r = R2;
+                else
+                    r = UnknownReg;
+            } else if (r == R3) {
+                // legacy ABI only
+                STR_preindex(IP, SP, -4);
+                FMRDL(IP, sr);
+                FMRDH(r, sr);
+                stkd += 4;
+
+                r = UnknownReg;
+            } else {
+                FSTD(sr, SP, 0);
+                SUB(SP, SP, 8);
+                stkd += 8;
+                r = UnknownReg;
+            }
+        }
+    } else if (sz == ARGSIZE_LO) {
+        if (r != UnknownReg) {
+            if (arg->isconst()) {
+                asm_ld_imm(r, arg->constval());
+            } else {
+                Reservation* argRes = getresv(arg);
+                if (argRes) {
+                    if (argRes->reg == UnknownReg) {
+                        // load it into the arg reg
+                        int d = findMemFor(arg);
+                        if (arg->isop(LIR_alloc)) {
+                            asm_add_imm(r, FP, d);
+                        } else {
+                            LDR(r, FP, d);
+                        }
+                    } else {
+                        MOV(r, argRes->reg);
+                    }
+                } else {
+                    findSpecificRegFor(arg, r);
+                }
+            }
+
+            if (r < R3)
+                r = nextreg(r);
+            else
+                r = UnknownReg;
+        } else {
+            int d = findMemFor(arg);
+            STR_preindex(IP, SP, -4);
+            LDR(IP, FP, d);
+            stkd += 4;
+        }
+    } else {
+        NanoAssert(0);
+    }
+}
+
 void
 Assembler::asm_call(LInsp ins)
 {
@ -202,30 +352,14 @@ Assembler::asm_call(LInsp ins)

    atypes >>= 2;

-    bool arg0IsInt32FollowedByFloat = false;
-#ifndef UNDER_CE
-    // we need to detect if we have arg0 as LO followed by arg1 as F;
-    // in that case, we need to skip using r1 -- the F needs to be
-    // loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
-    // generated code.
-    while ((atypes & 3) != ARGSIZE_NONE) {
-        if (((atypes >> 2) & 3) == ARGSIZE_LO &&
-            ((atypes >> 0) & 3) == ARGSIZE_F &&
-            ((atypes >> 4) & 3) == ARGSIZE_NONE)
-        {
-            arg0IsInt32FollowedByFloat = true;
-            break;
-        }
-        atypes >>= 2;
-    }
-#endif
+    // if we're using VFP, and the return type is a double,
+    // it'll come back in R0/R1.  We need to either place it
+    // in the result fp reg, or store it.

    if (AvmCore::config.vfp && rsize == ARGSIZE_F) {
        NanoAssert(ins->opcode() == LIR_fcall);
        NanoAssert(callRes);

-        //fprintf (stderr, "call ins: %p callRes: %p reg: %d ar: %d\n", ins, callRes, callRes->reg, callRes->arIndex);
-
        Register rr = callRes->reg;
        int d = disp(callRes);
        freeRsrcOf(ins, rr != UnknownReg);
@ -240,50 +374,37 @@ Assembler::asm_call(LInsp ins)
        }
    }

+    // make the call
    BL((NIns*)(call->_address));

-    ArgSize sizes[10];
+    ArgSize sizes[MAXARGS];
    uint32_t argc = call->get_sizes(sizes);
+
+    Register r = R0;
+    int stkd = 0;
+
+    // XXX TODO we should go through the args and figure out how much
+    // stack space we'll need, allocate it up front, and then do
+    // SP-relative stores using stkd instead of doing STR_preindex for
+    // every stack write like we currently do in asm_arg.
+
    for(uint32_t i = 0; i < argc; i++) {
        uint32_t j = argc - i - 1;
        ArgSize sz = sizes[j];
        LInsp arg = ins->arg(j);
-        // pre-assign registers R0-R3 for arguments (if they fit)

-        Register r = (i + roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
+        NanoAssert(r < R4 || r == UnknownReg);
+
+#ifdef NJ_ARM_EABI
        if (sz == ARGSIZE_F) {
-            Register rlo = UnknownReg;
-            Register rhi = UnknownReg;
-
-#ifdef UNDER_CE
-            if (r >= R0 && r <= R2) {
-                rlo = r;
-                rhi = nextreg(r);
-                roffset++;
-            } else if (r == R3) {
-                rlo = r;
-                rhi = UnknownReg;
-            }
-#else
-            if (r == R0 || r == R2) {
-                rlo = r;
-                rhi = nextreg(r);
-                roffset++;
-            } else if (r == R1) {
-                rlo = R2;
-                rhi = nextreg(r);
-                roffset += 2;
-            }
+            if (r == R1)
+                r = R2;
+            else if (r == R3)
+                r = UnknownReg;
+        }
 #endif

-            asm_arm_farg(arg, rlo, rhi);
-        } else {
-            asm_arg(sz, arg, r);
-        }
-
-        // Under CE, arg0IsInt32FollowedByFloat will always be false
-        if (i == 0 && arg0IsInt32FollowedByFloat)
-            roffset = 1;
+        asm_arg(sz, arg, r, stkd);
    }
 }

@ -1612,132 +1733,6 @@ Assembler::asm_int(LInsp ins)
        LDi(rr, val);
 }

-void
-Assembler::asm_pusharg(LInsp arg)
-{
-    Reservation* argRes = getresv(arg);
-    bool quad = arg->isQuad();
-
-    if (argRes && argRes->reg != UnknownReg) {
-        if (!quad) {
-            STR_preindex(argRes->reg, SP, -4);
-        } else {
-            FSTD(argRes->reg, SP, 0);
-            SUBi(SP, SP, 8);
-        }
-    } else {
-        int d = findMemFor(arg);
-
-        if (!quad) {
-            STR_preindex(IP, SP, -4);
-            LDR(IP, FP, d);
-        } else {
-            STR_preindex(IP, SP, -4);
-            LDR(IP, FP, d+4);
-            STR_preindex(IP, SP, -4);
-            LDR(IP, FP, d);
-        }
-    }
-}
-
-void
-Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
-{
-    // this only handles ARGSIZE_LO; we don't support ARGSIZE_Q,
-    // and ARGSIZE_F is handled by asm_arm_farg
-    NanoAssert(sz == ARGSIZE_LO);
-
-    if (r != UnknownReg) {
-        // arg goes in specific register
-        if (p->isconst()) {
-            LDi(r, p->constval());
-        } else {
-            Reservation* rA = getresv(p);
-            if (rA) {
-                if (rA->reg == UnknownReg) {
-                    // load it into the arg reg
-                    int d = findMemFor(p);
-                    if (p->isop(LIR_alloc)) {
-                        asm_add_imm(r, FP, d);
-                    } else {
-                        LDR(r, FP, d);
-                    }
-                } else {
-                    // it must be in a saved reg
-                    MOV(r, rA->reg);
-                }
-            } else {
-                // this is the last use, so fine to assign it
-                // to the scratch reg, it's dead after this point.
-                findSpecificRegFor(p, r);
-            }
-        }
-    } else {
-        asm_pusharg(p);
-    }
-}
-
-void
-Assembler::asm_arm_farg(LInsp arg, Register rlo, Register rhi)
-{
-    if (AvmCore::config.vfp) {
-        Register sr = findRegFor(arg, FpRegs);
-
-        if (rlo != UnknownReg && rhi != UnknownReg) {
-            NanoAssert(sr != UnknownReg);
-            FMRRD(rlo, rhi, sr);
-        } else if (rlo != UnknownReg && rhi == UnknownReg) {
-            NanoAssert(sr != UnknownReg);
-            STR_preindex(IP, SP, -4);
-            FMRDL(IP, sr);
-            FMRDH(rhi, sr);
-        } else {
-            asm_pusharg(arg);
-        }
-
-        return;
-    }
-
-    NanoAssert(arg->opcode() == LIR_qjoin || arg->opcode() == LIR_quad);
-
-    if (rlo != UnknownReg && rhi != UnknownReg) {
-        if (arg->opcode() == LIR_qjoin) {
-            LIns* lo = arg->oprnd1();
-            LIns* hi = arg->oprnd2();
-
-            findSpecificRegFor(lo, rlo);
-            findSpecificRegFor(hi, rhi);
-        } else {
-            // LIR_quad
-            const int32_t* p = (const int32_t*) (arg-2);
-
-            asm_ld_imm(rhi, p[1]);
-            asm_ld_imm(rlo, p[0]);
-        }
-    } else if (rlo != UnknownReg && rhi == UnknownReg) {
-        if (arg->opcode() == LIR_qjoin) {
-            LIns* lo = arg->oprnd1();
-            LIns* hi = arg->oprnd2();
-
-            int d = findMemFor(hi);
-
-            findSpecificRegFor(lo, rlo);
-
-            STR_preindex(IP, SP, -4);
-            LDR(IP, FP, d);
-        } else {
-            // LIR_quad
-            const int32_t* p = (const int32_t*) (arg-2);
-
-            STR_preindex(IP, SP, -4);
-            asm_ld_imm(IP, p[1]);
-            asm_ld_imm(rlo, p[0]);
-        }
-    } else {
-        asm_pusharg(arg);
-    }
-}
-
 }

 #endif /* FEATURE_NANOJIT */
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@ -192,7 +192,7 @@ verbose_only( extern const char* shiftNames[]; )
    void asm_cmpi(Register, int32_t imm);                               \
    void asm_ldr_chk(Register d, Register b, int32_t off, bool chk);    \
    void asm_ld_imm(Register d, int32_t imm);                           \
-    void asm_arm_farg(LInsp arg, Register rlo, Register rhi); \
+    void asm_arg(ArgSize sz, LInsp arg, Register& r, int& stkd);        \
    int* _nSlot;                                                        \
    int* _nExitSlot;

@ -521,7 +521,7 @@ enum {
        underrunProtect(4);                                             \
        if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x52<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
        else            *(--_nIns) = (NIns)( COND_AL | (0x5A<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
-        asm_output("str %s, [%s, #%d]", gpn(_d), gpn(_n), (_off));      \
+        asm_output("str %s, [%s, #%d]!", gpn(_d), gpn(_n), (_off));     \
    } while(0)

 // [Rd] = Rn ; Rd += _off
@ -530,7 +530,7 @@ enum {
        underrunProtect(4);                                             \
        if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x40<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
        else            *(--_nIns) = (NIns)( COND_AL | (0x48<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
-        asm_output("str %s, [%s], %d", gpn(_d), gpn(_n), (_off));      \
+        asm_output("str %s, [%s]!, %d", gpn(_d), gpn(_n), (_off));      \
    } while(0)

 //#define RET()   underrunProtect(1); *(--_nIns) = 0xc3;    asm_output("ret")