b=484561; [arm] fix EABI calling convention; clean up asm_call; r=graydon

This commit is contained in:
Vladimir Vukicevic 2009-03-27 14:31:53 -07:00
parent 34105fd15e
commit eb82870ba4
4 changed files with 186 additions and 213 deletions

View File

@ -1943,12 +1943,6 @@ namespace nanojit
for (uint32_t i = 0; i < MAXARGS; i++) {
argt >>= 2;
ArgSize a = ArgSize(argt&3);
if (AvmCore::config.soft_float && a == ARGSIZE_F) {
sizes[argc++] = ARGSIZE_LO;
sizes[argc++] = ARGSIZE_LO;
continue;
}
if (a != ARGSIZE_NONE) {
sizes[argc++] = a;
} else {

View File

@ -1100,28 +1100,12 @@ namespace nanojit
LOpcode op = (ci->isIndirect() ? k_callimap : k_callmap)[argt & 3];
NanoAssert(op != LIR_skip); // LIR_skip here is just an error condition
ArgSize sizes[2*MAXARGS];
ArgSize sizes[MAXARGS];
int32_t argc = ci->get_sizes(sizes);
if (AvmCore::config.soft_float) {
if (op == LIR_fcall)
op = LIR_callh;
LInsp args2[MAXARGS*2]; // arm could require 2 args per double
int32_t j = 0;
int32_t i = 0;
while (j < argc) {
argt >>= 2;
ArgSize a = ArgSize(argt&3);
if (a == ARGSIZE_F) {
LInsp q = args[i++];
args2[j++] = ins1(LIR_qhi, q);
args2[j++] = ins1(LIR_qlo, q);
} else {
args2[j++] = args[i++];
}
}
args = args2;
NanoAssert(j == argc);
}
//

View File

@ -54,6 +54,13 @@
extern "C" void __clear_cache(char *BEG, char *END);
#endif
// assume EABI, except under CE
#ifdef UNDER_CE
#undef NJ_ARM_EABI
#else
#define NJ_ARM_EABI
#endif
#ifdef FEATURE_NANOJIT
namespace nanojit
@ -175,19 +182,162 @@ Assembler::genEpilogue()
return _nIns;
}
/* ARM EABI (used by gcc/linux) calling conventions differ from Windows CE; use these
* as the default.
/* gcc/linux use the ARM EABI; Windows CE uses the legacy abi.
*
* - double arg following an initial dword arg use r0 for the int arg
* and r2/r3 for the double; r1 is skipped
* - 3 dword args followed by a double arg cause r3 to be skipped,
* and the double to be stuck on the stack.
* Under EABI:
* - doubles are 64-bit aligned both in registers and on the stack.
* If the next available argument register is R1, it is skipped
* and the double is placed in R2:R3. If R0:R1 or R2:R3 are not
* available, the double is placed on the stack, 64-bit aligned.
* - 32-bit arguments are placed in registers and 32-bit aligned
* on the stack.
*
* Under Windows CE, the arguments are placed in r0-r3 and then the stack,
* one dword at a time, with the high order dword of a quad/double coming
* first. No registers are skipped as they are in the EABI case.
* Under legacy ABI:
* - doubles are placed in subsequent arg registers; if the next
* available register is r3, the low order word goes into r3
* and the high order goes on the stack.
* - 32-bit arguments are placed in the next available arg register,
* - both doubles and 32-bit arguments are placed on stack with 32-bit
* alignment.
*/
void
Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
{
// should never be called -- the ARM-specific longer form of
// asm_arg is used on ARM.
NanoAssert(0);
}
/*
* asm_arg will update r and stkd to indicate where the next
* argument should go. If r == UnknownReg, then the argument
* is placed on the stack at stkd, and stkd is updated.
*
* Note that this currently doesn't actually use stkd on input,
* except for figuring out alignment; it always pushes to SP.
* See TODO in asm_call.
*/
void
Assembler::asm_arg(ArgSize sz, LInsp arg, Register& r, int& stkd)
{
if (sz == ARGSIZE_F) {
#ifdef NJ_ARM_EABI
NanoAssert(r == UnknownReg || r == R0 || r == R2);
// if we're about to put this on the stack, make sure the
// stack is 64-bit aligned
if (r == UnknownReg && (stkd&7) != 0) {
SUBi(SP, SP, 4);
stkd += 4;
}
#endif
Reservation* argRes = getresv(arg);
// handle qjoin first; won't ever show up if VFP is available
if (arg->isop(LIR_qjoin)) {
asm_arg(ARGSIZE_LO, arg->oprnd1(), r, stkd);
asm_arg(ARGSIZE_LO, arg->oprnd2(), r, stkd);
} else if (!argRes || argRes->reg == UnknownReg || !AvmCore::config.vfp) {
// if we don't have a register allocated,
// or we're not vfp, just read from memory.
if (arg->isop(LIR_quad)) {
const int32_t* p = (const int32_t*) (arg-2);
// XXX use some load-multiple action here from our const pool?
for (int k = 0; k < 2; k++) {
if (r != UnknownReg) {
asm_ld_imm(r, *p++);
r = nextreg(r);
if (r == R4)
r = UnknownReg;
} else {
STR_preindex(IP, SP, -4);
asm_ld_imm(IP, *p++);
stkd += 4;
}
}
} else {
int d = findMemFor(arg);
for (int k = 0; k < 2; k++) {
if (r != UnknownReg) {
LDR(r, FP, d + k*4);
r = nextreg(r);
if (r == R4)
r = UnknownReg;
} else {
STR_preindex(IP, SP, -4);
LDR(IP, FP, d + k*4);
stkd += 4;
}
}
}
} else {
// handle the VFP with-register case
Register sr = argRes->reg;
if (r != UnknownReg && r < R3) {
FMRRD(r, nextreg(r), sr);
// make sure the next register is correct on return
if (r == R0)
r = R2;
else
r = UnknownReg;
} else if (r == R3) {
// legacy ABI only
STR_preindex(IP, SP, -4);
FMRDL(IP, sr);
FMRDH(r, sr);
stkd += 4;
r = UnknownReg;
} else {
FSTD(sr, SP, 0);
SUB(SP, SP, 8);
stkd += 8;
r = UnknownReg;
}
}
} else if (sz == ARGSIZE_LO) {
if (r != UnknownReg) {
if (arg->isconst()) {
asm_ld_imm(r, arg->constval());
} else {
Reservation* argRes = getresv(arg);
if (argRes) {
if (argRes->reg == UnknownReg) {
// load it into the arg reg
int d = findMemFor(arg);
if (arg->isop(LIR_alloc)) {
asm_add_imm(r, FP, d);
} else {
LDR(r, FP, d);
}
} else {
MOV(r, argRes->reg);
}
} else {
findSpecificRegFor(arg, r);
}
}
if (r < R3)
r = nextreg(r);
else
r = UnknownReg;
} else {
int d = findMemFor(arg);
STR_preindex(IP, SP, -4);
LDR(IP, FP, d);
stkd += 4;
}
} else {
NanoAssert(0);
}
}
void
Assembler::asm_call(LInsp ins)
{
@ -202,30 +352,14 @@ Assembler::asm_call(LInsp ins)
atypes >>= 2;
bool arg0IsInt32FollowedByFloat = false;
#ifndef UNDER_CE
// we need to detect if we have arg0 as LO followed by arg1 as F;
// in that case, we need to skip using r1 -- the F needs to be
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
// generated code.
while ((atypes & 3) != ARGSIZE_NONE) {
if (((atypes >> 2) & 3) == ARGSIZE_LO &&
((atypes >> 0) & 3) == ARGSIZE_F &&
((atypes >> 4) & 3) == ARGSIZE_NONE)
{
arg0IsInt32FollowedByFloat = true;
break;
}
atypes >>= 2;
}
#endif
// if we're using VFP, and the return type is a double,
// it'll come back in R0/R1. We need to either place it
// in the result fp reg, or store it.
if (AvmCore::config.vfp && rsize == ARGSIZE_F) {
NanoAssert(ins->opcode() == LIR_fcall);
NanoAssert(callRes);
//fprintf (stderr, "call ins: %p callRes: %p reg: %d ar: %d\n", ins, callRes, callRes->reg, callRes->arIndex);
Register rr = callRes->reg;
int d = disp(callRes);
freeRsrcOf(ins, rr != UnknownReg);
@ -240,50 +374,37 @@ Assembler::asm_call(LInsp ins)
}
}
// make the call
BL((NIns*)(call->_address));
ArgSize sizes[10];
ArgSize sizes[MAXARGS];
uint32_t argc = call->get_sizes(sizes);
Register r = R0;
int stkd = 0;
// XXX TODO we should go through the args and figure out how much
// stack space we'll need, allocate it up front, and then do
// SP-relative stores using stkd instead of doing STR_preindex for
// every stack write like we currently do in asm_arg.
for(uint32_t i = 0; i < argc; i++) {
uint32_t j = argc - i - 1;
ArgSize sz = sizes[j];
LInsp arg = ins->arg(j);
// pre-assign registers R0-R3 for arguments (if they fit)
Register r = (i + roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
NanoAssert(r < R4 || r == UnknownReg);
#ifdef NJ_ARM_EABI
if (sz == ARGSIZE_F) {
Register rlo = UnknownReg;
Register rhi = UnknownReg;
#ifdef UNDER_CE
if (r >= R0 && r <= R2) {
rlo = r;
rhi = nextreg(r);
roffset++;
} else if (r == R3) {
rlo = r;
rhi = UnknownReg;
}
#else
if (r == R0 || r == R2) {
rlo = r;
rhi = nextreg(r);
roffset++;
} else if (r == R1) {
rlo = R2;
rhi = nextreg(r);
roffset += 2;
}
if (r == R1)
r = R2;
else if (r == R3)
r = UnknownReg;
}
#endif
asm_arm_farg(arg, rlo, rhi);
} else {
asm_arg(sz, arg, r);
}
// Under CE, arg0IsInt32FollowedByFloat will always be false
if (i == 0 && arg0IsInt32FollowedByFloat)
roffset = 1;
asm_arg(sz, arg, r, stkd);
}
}
@ -1612,132 +1733,6 @@ Assembler::asm_int(LInsp ins)
LDi(rr, val);
}
void
Assembler::asm_pusharg(LInsp arg)
{
Reservation* argRes = getresv(arg);
bool quad = arg->isQuad();
if (argRes && argRes->reg != UnknownReg) {
if (!quad) {
STR_preindex(argRes->reg, SP, -4);
} else {
FSTD(argRes->reg, SP, 0);
SUBi(SP, SP, 8);
}
} else {
int d = findMemFor(arg);
if (!quad) {
STR_preindex(IP, SP, -4);
LDR(IP, FP, d);
} else {
STR_preindex(IP, SP, -4);
LDR(IP, FP, d+4);
STR_preindex(IP, SP, -4);
LDR(IP, FP, d);
}
}
}
void
Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
{
// this only handles ARGSIZE_LO; we don't support ARGSIZE_Q,
// and ARGSIZE_F is handled by asm_arm_farg
NanoAssert(sz == ARGSIZE_LO);
if (r != UnknownReg) {
// arg goes in specific register
if (p->isconst()) {
LDi(r, p->constval());
} else {
Reservation* rA = getresv(p);
if (rA) {
if (rA->reg == UnknownReg) {
// load it into the arg reg
int d = findMemFor(p);
if (p->isop(LIR_alloc)) {
asm_add_imm(r, FP, d);
} else {
LDR(r, FP, d);
}
} else {
// it must be in a saved reg
MOV(r, rA->reg);
}
} else {
// this is the last use, so fine to assign it
// to the scratch reg, it's dead after this point.
findSpecificRegFor(p, r);
}
}
} else {
asm_pusharg(p);
}
}
void
Assembler::asm_arm_farg(LInsp arg, Register rlo, Register rhi)
{
if (AvmCore::config.vfp) {
Register sr = findRegFor(arg, FpRegs);
if (rlo != UnknownReg && rhi != UnknownReg) {
NanoAssert(sr != UnknownReg);
FMRRD(rlo, rhi, sr);
} else if (rlo != UnknownReg && rhi == UnknownReg) {
NanoAssert(sr != UnknownReg);
STR_preindex(IP, SP, -4);
FMRDL(IP, sr);
FMRDH(rhi, sr);
} else {
asm_pusharg(arg);
}
return;
}
NanoAssert(arg->opcode() == LIR_qjoin || arg->opcode() == LIR_quad);
if (rlo != UnknownReg && rhi != UnknownReg) {
if (arg->opcode() == LIR_qjoin) {
LIns* lo = arg->oprnd1();
LIns* hi = arg->oprnd2();
findSpecificRegFor(lo, rlo);
findSpecificRegFor(hi, rhi);
} else {
// LIR_quad
const int32_t* p = (const int32_t*) (arg-2);
asm_ld_imm(rhi, p[1]);
asm_ld_imm(rlo, p[0]);
}
} else if (rlo != UnknownReg && rhi == UnknownReg) {
if (arg->opcode() == LIR_qjoin) {
LIns* lo = arg->oprnd1();
LIns* hi = arg->oprnd2();
int d = findMemFor(hi);
findSpecificRegFor(lo, rlo);
STR_preindex(IP, SP, -4);
LDR(IP, FP, d);
} else {
// LIR_quad
const int32_t* p = (const int32_t*) (arg-2);
STR_preindex(IP, SP, -4);
asm_ld_imm(IP, p[1]);
asm_ld_imm(rlo, p[0]);
}
} else {
asm_pusharg(arg);
}
}
}
#endif /* FEATURE_NANOJIT */

View File

@ -192,7 +192,7 @@ verbose_only( extern const char* shiftNames[]; )
void asm_cmpi(Register, int32_t imm); \
void asm_ldr_chk(Register d, Register b, int32_t off, bool chk); \
void asm_ld_imm(Register d, int32_t imm); \
void asm_arm_farg(LInsp arg, Register rlo, Register rhi); \
void asm_arg(ArgSize sz, LInsp arg, Register& r, int& stkd); \
int* _nSlot; \
int* _nExitSlot;
@ -521,7 +521,7 @@ enum {
underrunProtect(4); \
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x52<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x5A<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
asm_output("str %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
asm_output("str %s, [%s, #%d]!", gpn(_d), gpn(_n), (_off)); \
} while(0)
// [Rd] = Rn ; Rd += _off
@ -530,7 +530,7 @@ enum {
underrunProtect(4); \
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x40<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x48<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
asm_output("str %s, [%s], %d", gpn(_d), gpn(_n), (_off)); \
asm_output("str %s, [%s]!, %d", gpn(_d), gpn(_n), (_off)); \
} while(0)
//#define RET() underrunProtect(1); *(--_nIns) = 0xc3; asm_output("ret")