Bug 849489. Optimise use of vstm/vldm. r=mjrosenb

--HG--
extra : rebase_source : e5bbcc5fe569c0f81114587330ec80212fc9b560
This commit is contained in:
Nicholas Cameron 2013-03-22 15:37:10 +13:00
parent 2508f07c2a
commit f636036c33
4 changed files with 59 additions and 41 deletions

View File

@ -150,10 +150,11 @@ MacroAssembler::PushRegsInMask(RegisterSet set)
}
JS_ASSERT(diffG == 0);
reserveStack(diffF);
#ifdef JS_CPU_ARM
diffF -= transferMultipleByRuns(set.fpus(), IsStore, StackPointer, IA);
adjustFrame(diffF);
diffF += transferMultipleByRuns(set.fpus(), IsStore, StackPointer, DB);
#else
reserveStack(diffF);
for (FloatRegisterIterator iter(set.fpus()); iter.more(); iter++) {
diffF -= sizeof(double);
storeDouble(*iter, Address(StackPointer, diffF));
@ -175,6 +176,7 @@ MacroAssembler::PopRegsInMaskIgnore(RegisterSet set, RegisterSet ignore)
// the registers we previously saved to the stack.
if (ignore.empty(true)) {
diffF -= transferMultipleByRuns(set.fpus(), IsLoad, StackPointer, IA);
adjustFrame(-reservedF);
} else
#endif
{
@ -183,8 +185,8 @@ MacroAssembler::PopRegsInMaskIgnore(RegisterSet set, RegisterSet ignore)
if (!ignore.has(*iter))
loadDouble(Address(StackPointer, diffF), *iter);
}
freeStack(reservedF);
}
freeStack(reservedF);
JS_ASSERT(diffF == 0);
#ifdef JS_CPU_ARM

View File

@ -1622,14 +1622,19 @@ class Assembler
dtmCond = c;
dtmLastReg = -1;
dtmMode = mode;
dtmDelta = 0;
}
void transferFloatReg(VFPRegister rn)
{
if (dtmLastReg == -1) {
vdtmFirstReg = rn;
vdtmFirstReg = rn.code();
} else {
if (dtmDelta == 0) {
dtmDelta = rn.code() - dtmLastReg;
JS_ASSERT(dtmDelta == 1 || dtmDelta == -1);
}
JS_ASSERT(dtmLastReg >= 0);
JS_ASSERT(rn.code() == unsigned(dtmLastReg) + 1);
JS_ASSERT(rn.code() == unsigned(dtmLastReg) + dtmDelta);
}
dtmLastReg = rn.code();
}
@ -1637,16 +1642,20 @@ class Assembler
JS_ASSERT(dtmActive);
dtmActive = false;
JS_ASSERT(dtmLastReg != -1);
dtmDelta = dtmDelta ? dtmDelta : 1;
// fencepost problem.
int len = dtmLastReg - vdtmFirstReg.code() + 1;
as_vdtm(dtmLoadStore, dtmBase, vdtmFirstReg, len, dtmCond);
int len = dtmDelta * (dtmLastReg - vdtmFirstReg) + 1;
as_vdtm(dtmLoadStore, dtmBase,
VFPRegister(FloatRegister::FromCode(Min(vdtmFirstReg, dtmLastReg))),
len, dtmCond);
}
private:
int dtmRegBitField;
int vdtmFirstReg;
int dtmLastReg;
int dtmDelta;
Register dtmBase;
VFPRegister vdtmFirstReg;
DTMWriteBack dtmUpdate;
DTMMode dtmMode;
LoadStore dtmLoadStore;

View File

@ -1386,37 +1386,6 @@ MacroAssemblerARM::ma_vstr(VFPRegister src, Register base, Register index, int32
ma_vstr(src, Operand(ScratchRegister, 0), cc);
}
int32_t
MacroAssemblerARM::transferMultipleByRuns(FloatRegisterSet set, LoadStore ls,
Register rm, DTMMode mode)
{
int32_t delta;
if (mode == IA) {
delta = sizeof(double);
} else if (mode == DB) {
delta = -sizeof(double);
} else {
JS_NOT_REACHED("Invalid data transfer addressing mode");
}
int32_t offset = 0;
FloatRegisterForwardIterator iter(set);
while (iter.more()) {
startFloatTransferM(ls, rm, mode, WriteBack);
int32_t reg = (*iter).code_;
do {
offset += delta;
transferFloatReg(*iter);
} while ((++iter).more() && (*iter).code_ == ++reg);
finishFloatTransfer();
}
JS_ASSERT(offset == set.size() * sizeof(double) * (mode == DB ? -1 : 1));
ma_sub(Imm32(offset), rm);
return offset;
}
bool
MacroAssemblerARMCompat::buildFakeExitFrame(const Register &scratch, uint32_t *offset)
{

View File

@ -334,10 +334,48 @@ class MacroAssemblerARM : public Assembler
// Float registers can only be loaded/stored in continuous runs
// when using vstm/vldm.
// This function breaks set into continuous runs and loads/stores
// them at [rm]. rm will be modified, but returned to its initial value.
// them at [rm]. rm will be modified and left in a state logically
// suitable for the next load/store.
// Returns the offset from [dm] for the logical next load/store.
int32_t transferMultipleByRuns(FloatRegisterSet set, LoadStore ls,
Register rm, DTMMode mode);
Register rm, DTMMode mode)
{
if (mode == IA) {
return transferMultipleByRunsImpl
<FloatRegisterForwardIterator>(set, ls, rm, mode, 1);
}
if (mode == DB) {
return transferMultipleByRunsImpl
<FloatRegisterIterator>(set, ls, rm, mode, -1);
}
JS_NOT_REACHED("Invalid data transfer addressing mode");
}
private:
// Implementation for transferMultipleByRuns so we can use different
// iterators for forward/backward traversals.
// The sign argument should be 1 if we traverse forwards, -1 if we
// traverse backwards.
template<typename RegisterIterator> int32_t
transferMultipleByRunsImpl(FloatRegisterSet set, LoadStore ls,
Register rm, DTMMode mode, int32_t sign)
{
int32_t delta = sign * sizeof(double);
int32_t offset = 0;
RegisterIterator iter(set);
while (iter.more()) {
startFloatTransferM(ls, rm, mode, WriteBack);
int32_t reg = (*iter).code_;
do {
offset += delta;
transferFloatReg(*iter);
} while ((++iter).more() && (*iter).code_ == (reg += sign));
finishFloatTransfer();
}
JS_ASSERT(offset == set.size() * sizeof(double) * sign);
return offset;
}
};
class MacroAssemblerARMCompat : public MacroAssemblerARM