implement EXPANDED_LOADSTORE for ARM (code=vladimir,stejohns; r=vladimir,rreitmai; bug=534765)

--HG--
extra : convert_revision : 75f0d95c8bea8ceb0d9bb2dfd55aeb0d0d200bd1
This commit is contained in:
Steven Johnson 2010-01-25 11:08:42 -08:00
parent dbeceb07e7
commit e0fe64337d
2 changed files with 357 additions and 142 deletions

View File

@ -1205,28 +1205,39 @@ Assembler::asm_qjoin(LIns *ins)
void
Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
{
Register ra, rb;
getBaseReg2(GpRegs, value, ra, GpRegs, base, rb, dr);
switch (op) {
case LIR_sti:
// handled by mainline code below for now
break;
if (isU12(-dr) || isU12(dr)) {
STR(ra, rb, dr);
} else {
STR(ra, IP, 0);
asm_add_imm(IP, rb, dr);
}
return;
case LIR_stb:
if (isU12(-dr) || isU12(dr)) {
STRB(ra, rb, dr);
} else {
STRB(ra, IP, 0);
asm_add_imm(IP, rb, dr);
}
return;
case LIR_sts:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
// Similar to the sti/stb case, but the max offset is smaller.
if (isU8(-dr) || isU8(dr)) {
STRH(ra, rb, dr);
} else {
STRH(ra, IP, 0);
asm_add_imm(IP, rb, dr);
}
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
}
Register ra, rb;
getBaseReg2(GpRegs, value, ra, GpRegs, base, rb, dr);
if (isU12(-dr) || isU12(dr)) {
STR(ra, rb, dr);
} else {
STR(ra, IP, 0);
asm_add_imm(IP, rb, dr);
}
}
void
@ -1308,21 +1319,6 @@ Assembler::asm_load64(LInsp ins)
//asm_output("<<< load64");
switch (ins->opcode()) {
case LIR_ldf:
case LIR_ldfc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
NanoAssert(ins->isF64());
LIns* base = ins->oprnd1();
@ -1337,28 +1333,68 @@ Assembler::asm_load64(LInsp ins)
//outputf("--- load64: Finished register allocation.");
if (ARM_VFP && isKnownReg(rr)) {
// VFP is enabled and the result will go into a register.
NanoAssert(IsFpReg(rr));
switch (ins->opcode()) {
case LIR_ldf:
case LIR_ldfc:
if (ARM_VFP && isKnownReg(rr)) {
// VFP is enabled and the result will go into a register.
NanoAssert(IsFpReg(rr));
if (!isS8(offset >> 2) || (offset&3) != 0) {
FLDD(rr,IP,0);
asm_add_imm(IP, rb, offset);
} else {
FLDD(rr,rb,offset);
}
} else {
// Either VFP is not available or the result needs to go into memory;
// in either case, VFP instructions are not required. Note that the
// result will never be loaded into registers if VFP is not available.
NanoAssert(!isKnownReg(rr));
NanoAssert(d != 0);
if (!isS8(offset >> 2) || (offset&3) != 0) {
FLDD(rr,IP,0);
asm_add_imm(IP, rb, offset);
} else {
FLDD(rr,rb,offset);
}
} else {
// Either VFP is not available or the result needs to go into memory;
// in either case, VFP instructions are not required. Note that the
// result will never be loaded into registers if VFP is not available.
NanoAssert(!isKnownReg(rr));
NanoAssert(d != 0);
// Check that the offset is 8-byte (64-bit) aligned.
NanoAssert((d & 0x7) == 0);
// Check that the offset is 8-byte (64-bit) aligned.
NanoAssert((d & 0x7) == 0);
// *(uint64_t*)(FP+d) = *(uint64_t*)(rb+offset)
asm_mmq(FP, d, rb, offset);
// *(uint64_t*)(FP+d) = *(uint64_t*)(rb+offset)
asm_mmq(FP, d, rb, offset);
}
return;
case LIR_ld32f:
case LIR_ldc32f:
if (ARM_VFP) {
if (isKnownReg(rr)) {
NanoAssert(IsFpReg(rr));
FCVTDS(rr, S14);
} else {
// Normally D7 isn't allowed to be used as an FP reg.
// In this case we make an explicit exception.
if (isS8(d)) {
FSTD_allowD7(D7, FP, d, true);
} else {
FSTD_allowD7(D7, IP, 0, true);
asm_add_imm(IP, FP, d);
}
FCVTDS_allowD7(D7, S14, true);
}
// always load into a VFP reg to do the conversion, and always use
// our S14 scratch reg
if (!isS8(offset >> 2) || (offset&3) != 0) {
FLDS(S14, IP, 0);
asm_add_imm(IP, rb, offset);
} else {
FLDS(S14, rb, offset);
}
} else {
NanoAssertMsg(0, "ld32f not supported with non-VFP, fix me");
}
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
//asm_output(">>> load64");
@ -1373,65 +1409,106 @@ Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
switch (op) {
case LIR_stfi:
// handled by mainline code below for now
break;
if (ARM_VFP) {
Register rb = findRegFor(base, GpRegs);
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
if (value->isconstq()) {
underrunProtect(LD32_size*2 + 8);
// XXX use another reg, get rid of dependency
STR(IP, rb, dr);
asm_ld_imm(IP, value->imm64_0(), false);
STR(IP, rb, dr+4);
asm_ld_imm(IP, value->imm64_1(), false);
return;
}
Register rv = findRegFor(value, FpRegs);
NanoAssert(isKnownReg(rb));
NanoAssert(isKnownReg(rv));
Register baseReg = rb;
intptr_t baseOffset = dr;
if (!isS8(dr)) {
baseReg = IP;
baseOffset = 0;
}
FSTD(rv, baseReg, baseOffset);
if (!isS8(dr)) {
asm_add_imm(IP, rb, dr);
}
// if it's a constant, make sure our baseReg/baseOffset location
// has the right value
if (value->isconstq()) {
underrunProtect(4*4);
asm_quad_nochk(rv, value->imm64_0(), value->imm64_1());
}
} else {
int da = findMemFor(value);
Register rb = findRegFor(base, GpRegs);
// *(uint64_t*)(rb+dr) = *(uint64_t*)(FP+da)
asm_mmq(rb, dr, FP, da);
}
return;
case LIR_st32f:
if (ARM_VFP) {
Register rb = findRegFor(base, GpRegs);
if (value->isconstq()) {
underrunProtect(LD32_size*2 + 8);
// XXX use another reg, get rid of dependency
STR(IP, rb, dr);
asm_ld_imm(IP, value->imm64_0(), false);
STR(IP, rb, dr+4);
asm_ld_imm(IP, value->imm64_1(), false);
return;
}
Register rv = findRegFor(value, FpRegs);
NanoAssert(isKnownReg(rb));
NanoAssert(isKnownReg(rv));
Register baseReg = rb;
intptr_t baseOffset = dr;
if (!isS8(dr)) {
baseReg = IP;
baseOffset = 0;
}
FSTS(S14, baseReg, baseOffset);
if (!isS8(dr)) {
asm_add_imm(IP, rb, dr);
}
FCVTSD(S14, rv);
// if it's a constant, make sure our baseReg/baseOffset location
// has the right value
if (value->isconstq()) {
underrunProtect(4*4);
asm_quad_nochk(rv, value->imm64_0(), value->imm64_1());
}
} else {
NanoAssertMsg(0, "st32f not supported with non-VFP, fix me");
}
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
if (ARM_VFP) {
Register rb = findRegFor(base, GpRegs);
if (value->isconstq()) {
underrunProtect(LD32_size*2 + 8);
// XXX use another reg, get rid of dependency
STR(IP, rb, dr);
asm_ld_imm(IP, value->imm64_0(), false);
STR(IP, rb, dr+4);
asm_ld_imm(IP, value->imm64_1(), false);
return;
}
Register rv = findRegFor(value, FpRegs);
NanoAssert(isKnownReg(rb));
NanoAssert(isKnownReg(rv));
Register baseReg = rb;
intptr_t baseOffset = dr;
if (!isS8(dr)) {
baseReg = IP;
baseOffset = 0;
}
FSTD(rv, baseReg, baseOffset);
if (!isS8(dr)) {
asm_add_imm(IP, rb, dr);
}
// if it's a constant, make sure our baseReg/baseOffset location
// has the right value
if (value->isconstq()) {
underrunProtect(4*4);
asm_quad_nochk(rv, value->imm64_0(), value->imm64_1());
}
} else {
int da = findMemFor(value);
Register rb = findRegFor(base, GpRegs);
// *(uint64_t*)(rb+dr) = *(uint64_t*)(FP+da)
asm_mmq(rb, dr, FP, da);
}
//asm_output(">>> store64");
}
@ -2018,8 +2095,8 @@ Assembler::asm_i2f(LInsp ins)
// todo: support int value in memory, as per x86
NanoAssert(isKnownReg(srcr));
FSITOD(rr, FpSingleScratch);
FMSR(FpSingleScratch, srcr);
FSITOD(rr, S14);
FMSR(S14, srcr);
}
void
@ -2031,8 +2108,8 @@ Assembler::asm_u2f(LInsp ins)
// todo: support int value in memory, as per x86
NanoAssert(isKnownReg(sr));
FUITOD(rr, FpSingleScratch);
FMSR(FpSingleScratch, sr);
FUITOD(rr, S14);
FMSR(S14, sr);
}
void Assembler::asm_f2i(LInsp ins)
@ -2041,8 +2118,8 @@ void Assembler::asm_f2i(LInsp ins)
Register rr = deprecated_prepResultReg(ins, GpRegs);
Register sr = findRegFor(ins->oprnd1(), FpRegs);
FMRS(rr, FpSingleScratch);
FTOSID(FpSingleScratch, sr);
FMRS(rr, S14);
FTOSID(S14, sr);
}
void
@ -2506,8 +2583,7 @@ Assembler::asm_load32(LInsp ins)
return;
case LIR_ldzs:
case LIR_ldcs:
// These are expected to be 2-byte aligned. (Not all ARM machines
// can handle unaligned accesses.)
// Some ARM machines require 2-byte alignment here.
// Similar to the ldcb/ldzb case, but the max offset is smaller.
if (isU8(-d) || isU8(d)) {
LDRH(rr, ra, d);
@ -2518,7 +2594,7 @@ Assembler::asm_load32(LInsp ins)
return;
case LIR_ld:
case LIR_ldc:
// These are expected to be 4-byte aligned.
// Some ARM machines require 4-byte alignment here.
if (isU12(-d) || isU12(d)) {
LDR(rr, ra, d);
} else {
@ -2527,10 +2603,22 @@ Assembler::asm_load32(LInsp ins)
}
return;
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
if (isU8(-d) || isU8(d)) {
LDRSB(rr, ra, d);
} else {
LDRSB(rr, IP, 0);
asm_add_imm(IP, ra, d);
}
return;
case LIR_ldss:
case LIR_ldcss:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
if (isU8(-d) || isU8(d)) {
LDRSH(rr, ra, d);
} else {
LDRSH(rr, IP, 0);
asm_add_imm(IP, ra, d);
}
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");

View File

@ -79,7 +79,7 @@ namespace nanojit
#define NJ_MAX_PARAMETERS 16
#define NJ_ALIGN_STACK 8
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
#define NJ_F2I_SUPPORTED 1
#define NJ_CONSTANT_POOLS
@ -131,8 +131,7 @@ typedef enum {
LastReg = D6,
deprecated_UnknownReg = 32,
// special value referring to S14
FpSingleScratch = 24
S14 = 24
} Register;
/* ARM condition codes */
@ -621,13 +620,26 @@ enum {
asm_output("ldrb %s, [%s,#%d]", gpn(_d),gpn(_n),(_off)); \
} while(0)
// Load and sign-extend a half word (16 bits). The offset range is ±255, and
// must be aligned to two bytes on some architectures, but we never make
// unaligned accesses so a simple assertion is sufficient here.
#define LDRH(_d,_n,_off) do { \
/* TODO: This is actually LDRSH. Is this correct? */ \
// Load a byte (8 bits), sign-extend to 32 bits. The offset range is
// ±255 (different from LDRB, same as LDRH/LDRSH)
#define LDRSB(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
} else { \
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
} \
asm_output("ldrsb %s, [%s,#%d]", gpn(_d),gpn(_n),(_off)); \
} while(0)
// Load and sign-extend a half word (16 bits). The offset range is ±255, and
// must be aligned to two bytes on some architectures (the caller is responsible
// for ensuring appropriate alignment)
#define LDRH(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
NanoAssert(((_off) & ~1) == (_off)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU8(-_off)); \
@ -636,9 +648,26 @@ enum {
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
} \
asm_output("ldrh %s, [%s,#%d]", gpn(_d),gpn(_n),(_off)); \
} while(0)
// Load and sign-extend a half word (16 bits). The offset range is ±255, and
// must be aligned to two bytes on some architectures (the caller is responsible
// for ensuring appropriate alignment)
#define LDRSH(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
} else { \
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
} \
asm_output("ldrsh %s, [%s,#%d]", gpn(_d),gpn(_n),(_off)); \
} while(0)
// Valid offset for STR and STRB is +/- 4095, STRH only has +/- 255
#define STR(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
NanoAssert(isU12(_off) || isU12(-_off)); \
@ -648,6 +677,29 @@ enum {
asm_output("str %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
} while(0)
#define STRB(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
NanoAssert(isU12(_off) || isU12(-_off)); \
underrunProtect(4); \
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x54<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x5C<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
asm_output("strb %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
} while(0)
// Only +/- 255 range, unlike STRB/STR
#define STRH(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if ((_off)<0) { \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x14<<20) | ((_n)<<16) | ((_d)<<12) | (((-(_off))&0xF0)<<4) | (0xB<<4) | ((-(_off))&0xF) ); \
} else { \
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1C<<20) | ((_n)<<16) | ((_d)<<12) | (((_off)&0xF0)<<4) | (0xB<<4) | ((_off)&0xF) ); \
} \
asm_output("strh %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
} while(0)
// Encode a breakpoint. The ID is not important and is ignored by the
// processor, but it can be useful as a marker when debugging emitted code.
#define BKPT_insn ((NIns)( COND_AL | (0x12<<20) | (0x7<<4) ))
@ -813,11 +865,11 @@ enum {
asm_output("fmrdh %s,%s", gpn(_Rd), gpn(_Dn)); \
} while (0)
#define FSTD(_Dd,_Rn,_offs) do { \
#define FSTD_allowD7(_Dd,_Rn,_offs,_allowD7) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \
NanoAssert((IsFpReg(_Dd) || ((_allowD7) && (_Dd) == D7)) && !IsFpReg(_Rn)); \
int negflag = 1<<23; \
intptr_t offs = (_offs); \
if (_offs < 0) { \
@ -828,6 +880,9 @@ enum {
asm_output("fstd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \
} while (0)
#define FSTD(_Dd,_Rn,_offs) \
FSTD_allowD7(_Dd,_Rn,_offs,0)
#define FLDD_chk(_Dd,_Rn,_offs,_chk) do { \
if(_chk) underrunProtect(4); \
NanoAssert(ARM_VFP); \
@ -844,31 +899,14 @@ enum {
} while (0)
#define FLDD(_Dd,_Rn,_offs) FLDD_chk(_Dd,_Rn,_offs,1)
#define FSITOD(_Dd,_Sm) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \
asm_output("fsitod %s,%s", gpn(_Dd), gpn(_Sm)); \
} while (0)
#define FUITOD(_Dd,_Sm) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \
NanoAssert(IsFpReg(_Dd) && ((_Sm) == S14)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x7) ); \
asm_output("fuitod %s,%s", gpn(_Dd), gpn(_Sm)); \
} while (0)
#define FMSR(_Sn,_Rd) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sn) == FpSingleScratch) && IsGpReg(_Rd)); \
*(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd)); \
} while (0)
#define FNEGD(_Dd,_Dm) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
@ -936,17 +974,106 @@ enum {
#define FMRS(_Rd,_Sn) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sn) == FpSingleScratch) && IsGpReg(_Rd)); \
NanoAssert(((_Sn) == S14) && IsGpReg(_Rd)); \
*(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn)); \
} while (0)
#define FTOSID(_Sd,_Dm) do { \
/*
* The following instructions can only be used with S14 as the
* single-precision register; that limitation can be removed if
* needed, but we'd have to teach NJ about all the single precision
* regs, and their encoding is strange (top 4 bits usually in a block,
* low bit elsewhere).
*/
#define FSITOD(_Dd,_Sm) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sd) == FpSingleScratch) && IsFpReg(_Dm)); \
NanoAssert(IsFpReg(_Dd) && ((_Sm) == S14)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \
asm_output("fsitod %s,%s", gpn(_Dd), gpn(_Sm)); \
} while (0)
#define FMSR(_Sn,_Rd) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sn) == S14) && IsGpReg(_Rd)); \
*(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd)); \
} while (0)
#define FMRS(_Rd,_Sn) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sn) == S14) && IsGpReg(_Rd)); \
*(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn)); \
} while (0)
#define FMSR(_Sn,_Rd) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sn) == S14) && IsGpReg(_Rd)); \
*(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd)); \
} while (0)
#define FCVTSD(_Sd,_Dm) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sd) == S14) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (0x7<<12) | (0xBC<<4) | (FpRegNum(_Dm)) ); \
asm_output("[0x%08x] fcvtsd s14,%s", *_nIns, gpn(_Dm)); \
} while (0)
#define FCVTDS_allowD7(_Dd,_Sm,_allowD7) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sm) == S14) && (IsFpReg(_Dd) || ((_allowD7) && (_Dd) == D7))); \
*(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (FpRegNum(_Dd)<<12) | (0xAC<<4) | (0x7) ); \
asm_output("[0x%08x] fcvtds %s,s14", *_nIns, gpn(_Dd)); \
} while(0)
#define FCVTDS(_Dd,_Sm) \
FCVTDS_allowD7(_Dd,_Sm,0)
#define FLDS(_Sd,_Rn,_offs) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sd) == S14) && !IsFpReg(_Rn)); \
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
int addflag = 1<<23; \
intptr_t offs = (_offs); \
if (offs < 0) { \
addflag = 0; \
offs = -offs; \
} \
*(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (0x7<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
asm_output("[0x%08x] flds s14, [%s, #%d]", *_nIns, gpn(_Rn), (_offs)); \
} while (0)
#define FSTS(_Sd,_Rn,_offs) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sd) == S14) && !IsFpReg(_Rn)); \
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
int addflag = 1<<23; \
intptr_t offs = (_offs); \
if (offs < 0) { \
addflag = 0; \
offs = -offs; \
} \
*(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (0x7<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
asm_output("[0x%08x] fsts s14, [%s, #%d]", *_nIns, gpn(_Rn), (_offs)); \
} while (0)
#define FTOSID(_Sd,_Dm) do { \
underrunProtect(4); \
NanoAssert(ARM_VFP); \
NanoAssert(((_Sd) == S14) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xEBD<<16) | (0x7<<12) | (0xB4<<4) | FpRegNum(_Dm) ); \
asm_output("ftosid %s,%s", gpn(_Sd), gpn(_Dm)); \
asm_output("ftosid s14, %s", gpn(_Dm)); \
} while (0)
} // namespace nanojit