2013-01-25 23:09:11 +01:00
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
2014-03-29 20:34:17 -07:00
# include <cstring>
2014-12-06 12:26:58 +01:00
2013-06-09 23:15:59 +10:00
# include "Common/CPUDetect.h"
2020-08-15 15:22:44 +02:00
# include "Common/Log.h"
2014-12-06 12:26:58 +01:00
# include "Core/MIPS/MIPS.h"
2013-01-25 23:09:11 +01:00
# include "Core/MIPS/ARM/ArmRegCacheFPU.h"
2014-12-06 12:26:58 +01:00
# include "Core/MIPS/ARM/ArmJit.h"
2014-03-15 10:45:39 +01:00
# include "Core/MIPS/MIPSTables.h"
2013-01-25 23:09:11 +01:00
2013-02-09 18:18:32 +01:00
using namespace ArmGen ;
2014-12-13 21:11:36 +01:00
using namespace ArmJitConstants ;
2013-02-09 18:18:32 +01:00
2022-04-13 10:36:37 +02:00
ArmRegCacheFPU : : ArmRegCacheFPU ( MIPSState * mipsState , MIPSComp : : JitState * js , MIPSComp : : JitOptions * jo ) : mips_ ( mipsState ) , js_ ( js ) , jo_ ( jo ) , vr ( mr + 32 ) { }
2013-02-09 18:18:32 +01:00
void ArmRegCacheFPU : : Start ( MIPSAnalyst : : AnalysisResults & stats ) {
2014-03-29 20:34:17 -07:00
if ( ! initialReady ) {
SetupInitialRegs ( ) ;
initialReady = true ;
}
memcpy ( ar , arInitial , sizeof ( ar ) ) ;
memcpy ( mr , mrInitial , sizeof ( mr ) ) ;
pendingFlush = false ;
}
void ArmRegCacheFPU : : SetupInitialRegs ( ) {
2022-04-13 10:36:37 +02:00
for ( int i = 0 ; i < NUM_ARMFPUREG ; i + + ) {
2014-03-29 20:34:17 -07:00
arInitial [ i ] . mipsReg = - 1 ;
arInitial [ i ] . isDirty = false ;
2013-02-09 18:18:32 +01:00
}
for ( int i = 0 ; i < NUM_MIPSFPUREG ; i + + ) {
2014-03-29 20:34:17 -07:00
mrInitial [ i ] . loc = ML_MEM ;
mrInitial [ i ] . reg = INVALID_REG ;
mrInitial [ i ] . spillLock = false ;
mrInitial [ i ] . tempLock = false ;
2013-02-09 18:18:32 +01:00
}
2022-04-13 10:36:37 +02:00
for ( int i = 0 ; i < NUM_ARMQUADS ; i + + ) {
2014-12-06 12:26:58 +01:00
qr [ i ] . isDirty = false ;
qr [ i ] . mipsVec = - 1 ;
qr [ i ] . sz = V_Invalid ;
qr [ i ] . spillLock = false ;
qr [ i ] . isTemp = false ;
memset ( qr [ i ] . vregs , 0xff , 4 ) ;
}
2013-02-09 18:18:32 +01:00
}
2014-12-06 12:26:58 +01:00
const ARMReg * ArmRegCacheFPU : : GetMIPSAllocationOrder ( int & count ) {
// VFP mapping
// VFPU registers and regular FP registers are mapped interchangably on top of the standard
// 16 FPU registers.
2013-11-24 13:29:56 +01:00
2014-12-06 12:26:58 +01:00
// NEON mapping
// We map FPU and VFPU registers entirely separately. FPU is mapped to 12 of the bottom 16 S registers.
// VFPU is mapped to the upper 48 regs, 32 of which can only be reached through NEON
// (or D16-D31 as doubles, but not relevant).
// Might consider shifting the split in the future, giving more regs to NEON allowing it to map more quads.
2013-11-24 13:29:56 +01:00
// We should attempt to map scalars to low Q registers and wider things to high registers,
// as the NEON instructions are all 2-vector or 4-vector, they don't do scalar, we want to be
// able to use regular VFP instructions too.
2013-02-16 02:06:02 +01:00
static const ARMReg allocationOrderNEON [ ] = {
2014-12-22 21:27:27 -08:00
// Reserve four temp registers. Useful when building quads until we really figure out
// how to do that best.
S4 , S5 , S6 , S7 , // Q1
S8 , S9 , S10 , S11 , // Q2
S12 , S13 , S14 , S15 , // Q3
S16 , S17 , S18 , S19 , // Q4
S20 , S21 , S22 , S23 , // Q5
S24 , S25 , S26 , S27 , // Q6
S28 , S29 , S30 , S31 , // Q7
// Q8-Q15 free for NEON tricks
} ;
static const ARMReg allocationOrderNEONVFPU [ ] = {
2013-11-17 14:17:13 +01:00
// Reserve four temp registers. Useful when building quads until we really figure out
// how to do that best.
S4 , S5 , S6 , S7 , // Q1
S8 , S9 , S10 , S11 , // Q2
S12 , S13 , S14 , S15 , // Q3
2014-12-06 12:26:58 +01:00
// Q4-Q15 free for VFPU
2013-02-16 02:06:02 +01:00
} ;
2013-08-06 19:08:06 +02:00
2014-12-22 21:27:27 -08:00
// NOTE: It's important that S2/S3 are not allocated with bNEON, even if !useNEONVFPU.
// They are used by a few instructions, like vh2f.
2014-12-06 12:26:58 +01:00
if ( jo_ - > useNEONVFPU ) {
2014-12-22 21:27:27 -08:00
count = sizeof ( allocationOrderNEONVFPU ) / sizeof ( const ARMReg ) ;
return allocationOrderNEONVFPU ;
2022-04-13 10:36:37 +02:00
} else {
2014-12-22 21:27:27 -08:00
count = sizeof ( allocationOrderNEON ) / sizeof ( const ARMReg ) ;
2013-02-16 02:06:02 +01:00
return allocationOrderNEON ;
}
2013-02-09 18:18:32 +01:00
}
2014-12-06 12:26:58 +01:00
bool ArmRegCacheFPU : : IsMapped ( MIPSReg r ) {
return mr [ r ] . loc = = ML_ARMREG ;
}
2013-02-09 18:18:32 +01:00
ARMReg ArmRegCacheFPU : : MapReg ( MIPSReg mipsReg , int mapFlags ) {
2024-07-14 14:42:59 +02:00
// INFO_LOG(Log::JIT, "FPR MapReg: %i flags=%i", mipsReg, mapFlags);
2014-12-06 12:26:58 +01:00
if ( jo_ - > useNEONVFPU & & mipsReg > = 32 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Cannot map VFPU registers to ARM VFP registers in NEON mode. PC=%08x " , js_ - > compilerPC ) ;
2014-12-06 12:26:58 +01:00
return S0 ;
}
2014-03-29 20:34:17 -07:00
pendingFlush = true ;
2013-02-09 18:18:32 +01:00
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if ( mr [ mipsReg ] . loc = = ML_ARMREG ) {
if ( ar [ mr [ mipsReg ] . reg ] . mipsReg ! = mipsReg ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Reg mapping out of sync! MR %i " , mipsReg ) ;
2013-02-09 18:18:32 +01:00
}
if ( mapFlags & MAP_DIRTY ) {
ar [ mr [ mipsReg ] . reg ] . isDirty = true ;
}
2024-07-14 14:42:59 +02:00
//INFO_LOG(Log::JIT, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg);
2013-02-10 15:53:56 +01:00
return ( ARMReg ) ( mr [ mipsReg ] . reg + S0 ) ;
2013-02-09 18:18:32 +01:00
}
// Okay, not mapped, so we need to allocate an ARM register.
int allocCount ;
const ARMReg * allocOrder = GetMIPSAllocationOrder ( allocCount ) ;
allocate :
for ( int i = 0 ; i < allocCount ; i + + ) {
int reg = allocOrder [ i ] - S0 ;
if ( ar [ reg ] . mipsReg = = - 1 ) {
// That means it's free. Grab it, and load the value into it (if requested).
ar [ reg ] . isDirty = ( mapFlags & MAP_DIRTY ) ? true : false ;
2014-11-29 00:14:08 -08:00
if ( ( mapFlags & MAP_NOINIT ) ! = MAP_NOINIT ) {
2013-02-16 02:06:02 +01:00
if ( mr [ mipsReg ] . loc = = ML_MEM & & mipsReg < TEMP0 ) {
2013-07-29 00:26:21 +10:00
emit_ - > VLDR ( ( ARMReg ) ( reg + S0 ) , CTXREG , GetMipsRegOffset ( mipsReg ) ) ;
2013-02-09 18:18:32 +01:00
}
}
ar [ reg ] . mipsReg = mipsReg ;
mr [ mipsReg ] . loc = ML_ARMREG ;
2013-02-10 15:53:56 +01:00
mr [ mipsReg ] . reg = reg ;
2024-07-14 14:42:59 +02:00
//INFO_LOG(Log::JIT, "Mapped %i to %i", mipsReg, mr[mipsReg].reg);
2013-02-10 15:53:56 +01:00
return ( ARMReg ) ( reg + S0 ) ;
2013-02-09 18:18:32 +01:00
}
}
2013-02-11 23:10:11 +01:00
2013-02-09 18:18:32 +01:00
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
int bestToSpill = - 1 ;
for ( int i = 0 ; i < allocCount ; i + + ) {
2013-02-10 15:53:56 +01:00
int reg = allocOrder [ i ] - S0 ;
2013-07-31 10:33:44 +02:00
if ( ar [ reg ] . mipsReg ! = - 1 & & ( mr [ ar [ reg ] . mipsReg ] . spillLock | | mr [ ar [ reg ] . mipsReg ] . tempLock ) )
2013-02-09 18:18:32 +01:00
continue ;
bestToSpill = reg ;
break ;
}
if ( bestToSpill ! = - 1 ) {
2013-02-11 23:10:11 +01:00
FlushArmReg ( ( ARMReg ) ( S0 + bestToSpill ) ) ;
2013-02-09 18:18:32 +01:00
goto allocate ;
}
// Uh oh, we have all them spilllocked....
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Out of spillable registers at PC %08x!!! " , js_ - > compilerPC ) ;
2013-02-09 18:18:32 +01:00
return INVALID_REG ;
}
void ArmRegCacheFPU : : MapInIn ( MIPSReg rd , MIPSReg rs ) {
SpillLock ( rd , rs ) ;
MapReg ( rd ) ;
MapReg ( rs ) ;
2013-07-31 00:07:34 +02:00
ReleaseSpillLock ( rd ) ;
ReleaseSpillLock ( rs ) ;
2013-02-09 18:18:32 +01:00
}
void ArmRegCacheFPU : : MapDirtyIn ( MIPSReg rd , MIPSReg rs , bool avoidLoad ) {
SpillLock ( rd , rs ) ;
2019-06-02 22:03:26 -07:00
bool load = ! avoidLoad | | rd = = rs ;
MapReg ( rd , load ? MAP_DIRTY : MAP_NOINIT ) ;
2013-02-09 18:18:32 +01:00
MapReg ( rs ) ;
2013-07-31 00:07:34 +02:00
ReleaseSpillLock ( rd ) ;
ReleaseSpillLock ( rs ) ;
2013-02-09 18:18:32 +01:00
}
void ArmRegCacheFPU : : MapDirtyInIn ( MIPSReg rd , MIPSReg rs , MIPSReg rt , bool avoidLoad ) {
SpillLock ( rd , rs , rt ) ;
2019-06-02 22:03:26 -07:00
bool load = ! avoidLoad | | ( rd = = rs | | rd = = rt ) ;
MapReg ( rd , load ? MAP_DIRTY : MAP_NOINIT ) ;
2013-02-09 18:18:32 +01:00
MapReg ( rt ) ;
MapReg ( rs ) ;
2013-07-31 00:07:34 +02:00
ReleaseSpillLock ( rd ) ;
ReleaseSpillLock ( rs ) ;
ReleaseSpillLock ( rt ) ;
2013-02-09 18:18:32 +01:00
}
2013-02-15 22:38:28 +01:00
void ArmRegCacheFPU : : SpillLockV ( const u8 * v , VectorSize sz ) {
for ( int i = 0 ; i < GetNumVectorElements ( sz ) ; i + + ) {
vr [ v [ i ] ] . spillLock = true ;
}
}
void ArmRegCacheFPU : : SpillLockV ( int vec , VectorSize sz ) {
u8 v [ 4 ] ;
GetVectorRegs ( v , sz , vec ) ;
SpillLockV ( v , sz ) ;
}
void ArmRegCacheFPU : : MapRegV ( int vreg , int flags ) {
MapReg ( vreg + 32 , flags ) ;
}
2013-07-28 16:13:19 +02:00
void ArmRegCacheFPU : : LoadToRegV ( ARMReg armReg , int vreg ) {
if ( vr [ vreg ] . loc = = ML_ARMREG ) {
2013-07-31 10:33:44 +02:00
emit_ - > VMOV ( armReg , ( ARMReg ) ( S0 + vr [ vreg ] . reg ) ) ;
2013-07-28 16:13:19 +02:00
} else {
MapRegV ( vreg ) ;
2013-07-29 00:51:50 +10:00
emit_ - > VMOV ( armReg , V ( vreg ) ) ;
2013-07-28 16:13:19 +02:00
}
}
2013-07-31 00:07:34 +02:00
void ArmRegCacheFPU : : MapRegsAndSpillLockV ( int vec , VectorSize sz , int flags ) {
2013-02-15 22:38:28 +01:00
u8 v [ 4 ] ;
GetVectorRegs ( v , sz , vec ) ;
SpillLockV ( v , sz ) ;
for ( int i = 0 ; i < GetNumVectorElements ( sz ) ; i + + ) {
MapRegV ( v [ i ] , flags ) ;
}
}
2013-07-31 00:07:34 +02:00
void ArmRegCacheFPU : : MapRegsAndSpillLockV ( const u8 * v , VectorSize sz , int flags ) {
2013-02-15 22:38:28 +01:00
SpillLockV ( v , sz ) ;
for ( int i = 0 ; i < GetNumVectorElements ( sz ) ; i + + ) {
MapRegV ( v [ i ] , flags ) ;
}
}
2013-07-28 12:13:43 +02:00
void ArmRegCacheFPU : : MapInInV ( int vs , int vt ) {
SpillLockV ( vs ) ;
SpillLockV ( vt ) ;
MapRegV ( vs ) ;
MapRegV ( vt ) ;
ReleaseSpillLockV ( vs ) ;
ReleaseSpillLockV ( vt ) ;
}
void ArmRegCacheFPU : : MapDirtyInV ( int vd , int vs , bool avoidLoad ) {
2019-06-02 22:03:26 -07:00
bool load = ! avoidLoad | | ( vd = = vs ) ;
2013-07-28 12:13:43 +02:00
SpillLockV ( vd ) ;
SpillLockV ( vs ) ;
2019-06-02 22:03:26 -07:00
MapRegV ( vd , load ? MAP_DIRTY : MAP_NOINIT ) ;
2013-07-28 12:13:43 +02:00
MapRegV ( vs ) ;
ReleaseSpillLockV ( vd ) ;
ReleaseSpillLockV ( vs ) ;
}
2013-07-30 18:15:48 +02:00
void ArmRegCacheFPU : : MapDirtyInInV ( int vd , int vs , int vt , bool avoidLoad ) {
2019-06-02 22:03:26 -07:00
bool load = ! avoidLoad | | ( vd = = vs | | vd = = vt ) ;
2013-07-30 18:15:48 +02:00
SpillLockV ( vd ) ;
SpillLockV ( vs ) ;
SpillLockV ( vt ) ;
2019-06-02 22:03:26 -07:00
MapRegV ( vd , load ? MAP_DIRTY : MAP_NOINIT ) ;
2013-07-30 18:15:48 +02:00
MapRegV ( vs ) ;
MapRegV ( vt ) ;
ReleaseSpillLockV ( vd ) ;
ReleaseSpillLockV ( vs ) ;
ReleaseSpillLockV ( vt ) ;
}
2013-02-09 18:18:32 +01:00
void ArmRegCacheFPU : : FlushArmReg ( ARMReg r ) {
2014-12-06 12:26:58 +01:00
if ( r > = S0 & & r < = S31 ) {
int reg = r - S0 ;
if ( ar [ reg ] . mipsReg = = - 1 ) {
// Nothing to do, reg not mapped.
return ;
2013-02-11 23:10:11 +01:00
}
2014-12-06 12:26:58 +01:00
if ( ar [ reg ] . mipsReg ! = - 1 ) {
if ( ar [ reg ] . isDirty & & mr [ ar [ reg ] . mipsReg ] . loc = = ML_ARMREG )
{
2024-07-14 14:42:59 +02:00
//INFO_LOG(Log::JIT, "Flushing ARM reg %i", reg);
2014-12-06 12:26:58 +01:00
emit_ - > VSTR ( r , CTXREG , GetMipsRegOffset ( ar [ reg ] . mipsReg ) ) ;
}
// IMMs won't be in an ARM reg.
mr [ ar [ reg ] . mipsReg ] . loc = ML_MEM ;
mr [ ar [ reg ] . mipsReg ] . reg = INVALID_REG ;
} else {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Dirty but no mipsreg? " ) ;
2014-12-06 12:26:58 +01:00
}
ar [ reg ] . isDirty = false ;
ar [ reg ] . mipsReg = - 1 ;
} else if ( r > = D0 & & r < = D31 ) {
// TODO: Convert to S regs and flush them individually.
} else if ( r > = Q0 & & r < = Q15 ) {
QFlush ( r ) ;
2013-02-09 18:18:32 +01:00
}
}
2014-12-06 12:26:58 +01:00
void ArmRegCacheFPU : : FlushV ( MIPSReg r ) {
FlushR ( r + 32 ) ;
}
/*
void ArmRegCacheFPU : : FlushQWithV ( MIPSReg r ) {
// Look for it in all the quads. If it's in any, flush that quad clean.
int flushCount = 0 ;
for ( int i = 0 ; i < MAX_ARMQUADS ; i + + ) {
if ( qr [ i ] . sz = = V_Invalid )
continue ;
int n = qr [ i ] . sz ;
bool flushThis = false ;
for ( int j = 0 ; j < n ; j + + ) {
if ( qr [ i ] . vregs [ j ] = = r ) {
flushThis = true ;
}
}
if ( flushThis ) {
QFlush ( i ) ;
flushCount + + ;
}
}
if ( flushCount > 1 ) {
2024-07-14 14:42:59 +02:00
WARN_LOG ( Log : : JIT , " ERROR: More than one quad was flushed to flush reg %i " , r ) ;
2014-12-06 12:26:58 +01:00
}
}
*/
2013-02-16 02:06:02 +01:00
void ArmRegCacheFPU : : FlushR ( MIPSReg r ) {
2013-02-09 18:18:32 +01:00
switch ( mr [ r ] . loc ) {
case ML_IMM :
// IMM is always "dirty".
2013-02-10 15:53:56 +01:00
// IMM is not allowed for FP (yet).
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Imm in FP register? " ) ;
2013-02-09 18:18:32 +01:00
break ;
case ML_ARMREG :
2014-12-14 14:04:33 -08:00
if ( mr [ r ] . reg = = INVALID_REG ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " FlushR: MipsReg had bad ArmReg " ) ;
2013-02-09 18:18:32 +01:00
}
2014-12-06 12:26:58 +01:00
if ( mr [ r ] . reg > = Q0 & & mr [ r ] . reg < = Q15 ) {
// This should happen rarely, but occasionally we need to flush a single stray
// mipsreg that's been part of a quad.
int quad = mr [ r ] . reg - Q0 ;
if ( qr [ quad ] . isDirty ) {
2024-07-14 14:42:59 +02:00
WARN_LOG ( Log : : JIT , " FlushR found quad register %i - PC=%08x " , quad , js_ - > compilerPC ) ;
2014-12-06 12:26:58 +01:00
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffset ( r ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , ( ARMReg ) mr [ r ] . reg , R0 , mr [ r ] . lane , true ) ;
}
} else {
if ( ar [ mr [ r ] . reg ] . isDirty ) {
2024-07-14 14:42:59 +02:00
//INFO_LOG(Log::JIT, "Flushing dirty reg %i", mr[r].reg);
2014-12-06 12:26:58 +01:00
emit_ - > VSTR ( ( ARMReg ) ( mr [ r ] . reg + S0 ) , CTXREG , GetMipsRegOffset ( r ) ) ;
ar [ mr [ r ] . reg ] . isDirty = false ;
}
ar [ mr [ r ] . reg ] . mipsReg = - 1 ;
2013-02-09 18:18:32 +01:00
}
break ;
case ML_MEM :
// Already there, nothing to do.
break ;
default :
//BAD
break ;
}
mr [ r ] . loc = ML_MEM ;
2013-02-10 15:53:56 +01:00
mr [ r ] . reg = ( int ) INVALID_REG ;
2013-02-09 18:18:32 +01:00
}
2014-12-06 12:26:58 +01:00
// Scalar only. Need a similar one for sequential Q vectors.
2022-04-13 10:36:37 +02:00
int ArmRegCacheFPU : : FlushGetSequential ( int a ) {
2014-03-11 21:43:48 +01:00
int c = 1 ;
int lastMipsOffset = GetMipsRegOffset ( ar [ a ] . mipsReg ) ;
a + + ;
2022-04-13 10:36:37 +02:00
while ( a < 32 ) {
2014-03-11 21:43:48 +01:00
if ( ! ar [ a ] . isDirty | | ar [ a ] . mipsReg = = - 1 )
break ;
int mipsOffset = GetMipsRegOffset ( ar [ a ] . mipsReg ) ;
if ( mipsOffset ! = lastMipsOffset + 4 ) {
break ;
}
lastMipsOffset = mipsOffset ;
a + + ;
c + + ;
}
return c ;
}
2014-03-11 11:02:22 +01:00
void ArmRegCacheFPU : : FlushAll ( ) {
2014-03-29 20:34:17 -07:00
if ( ! pendingFlush ) {
// Nothing allocated. FPU regs are not nearly as common as GPR.
return ;
}
2014-03-11 11:02:22 +01:00
// Discard temps!
for ( int i = TEMP0 ; i < TEMP0 + NUM_TEMPS ; i + + ) {
DiscardR ( i ) ;
}
2014-12-06 12:26:58 +01:00
// Flush quads!
// These could also use sequential detection.
2022-04-13 10:36:37 +02:00
for ( int i = 4 ; i < NUM_ARMQUADS ; i + + ) {
2014-12-06 12:26:58 +01:00
QFlush ( i ) ;
}
2014-03-11 21:43:48 +01:00
// Loop through the ARM registers, then use GetMipsRegOffset to determine if MIPS registers are
// sequential. This is necessary because we store VFPU registers in a staggered order to get
// columns sequential (most VFPU math in nearly all games is in columns, not rows).
int numArmRegs ;
2014-03-12 10:15:09 +01:00
// We rely on the allocation order being sequential.
2014-03-11 21:43:48 +01:00
const ARMReg baseReg = GetMIPSAllocationOrder ( numArmRegs ) [ 0 ] ;
2014-03-11 11:02:22 +01:00
2014-03-11 21:43:48 +01:00
for ( int i = 0 ; i < numArmRegs ; i + + ) {
int a = ( baseReg - S0 ) + i ;
int m = ar [ a ] . mipsReg ;
if ( ar [ a ] . isDirty ) {
if ( m = = - 1 ) {
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " ARM reg %i is dirty but has no mipsreg " , a ) ;
2014-03-11 21:43:48 +01:00
continue ;
2014-03-11 11:02:22 +01:00
}
2014-03-11 21:43:48 +01:00
2022-04-13 10:36:37 +02:00
int c = FlushGetSequential ( a ) ;
2014-03-11 21:43:48 +01:00
if ( c = = 1 ) {
2024-07-14 14:42:59 +02:00
// INFO_LOG(Log::JIT, "Got single register: %i (%i)", a, m);
2014-03-11 21:43:48 +01:00
emit_ - > VSTR ( ( ARMReg ) ( a + S0 ) , CTXREG , GetMipsRegOffset ( m ) ) ;
2014-03-12 10:15:09 +01:00
} else if ( c = = 2 ) {
// Probably not worth using VSTMIA for two.
int offset = GetMipsRegOffset ( m ) ;
emit_ - > VSTR ( ( ARMReg ) ( a + S0 ) , CTXREG , offset ) ;
emit_ - > VSTR ( ( ARMReg ) ( a + 1 + S0 ) , CTXREG , offset + 4 ) ;
2014-03-11 11:02:22 +01:00
} else {
2024-07-14 14:42:59 +02:00
// INFO_LOG(Log::JIT, "Got sequence: %i at %i (%i)", c, a, m);
2014-03-28 18:38:38 -07:00
emit_ - > ADDI2R ( SCRATCHREG1 , CTXREG , GetMipsRegOffset ( m ) , SCRATCHREG2 ) ;
2024-07-14 14:42:59 +02:00
// INFO_LOG(Log::JIT, "VSTMIA R0, %i, %i", a, c);
2014-03-28 18:38:38 -07:00
emit_ - > VSTMIA ( SCRATCHREG1 , false , ( ARMReg ) ( S0 + a ) , c ) ;
2014-03-11 11:02:22 +01:00
}
2014-03-11 21:43:48 +01:00
// Skip past, and mark as non-dirty.
for ( int j = 0 ; j < c ; j + + ) {
int b = a + j ;
mr [ ar [ b ] . mipsReg ] . loc = ML_MEM ;
mr [ ar [ b ] . mipsReg ] . reg = ( int ) INVALID_REG ;
ar [ a + j ] . mipsReg = - 1 ;
ar [ a + j ] . isDirty = false ;
}
i + = c - 1 ;
2014-03-11 11:02:22 +01:00
} else {
2014-03-11 21:43:48 +01:00
if ( m ! = - 1 ) {
mr [ m ] . loc = ML_MEM ;
mr [ m ] . reg = ( int ) INVALID_REG ;
2014-03-11 11:02:22 +01:00
}
2014-03-11 21:43:48 +01:00
ar [ a ] . mipsReg = - 1 ;
// already not dirty
2014-03-11 11:02:22 +01:00
}
}
// Sanity check
2022-04-13 10:36:37 +02:00
for ( int i = 0 ; i < NUM_ARMFPUREG ; i + + ) {
2014-03-11 11:02:22 +01:00
if ( ar [ i ] . mipsReg ! = - 1 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Flush fail: ar[%i].mipsReg=%i " , i , ar [ i ] . mipsReg ) ;
2014-03-11 11:02:22 +01:00
}
}
2014-03-29 20:34:17 -07:00
pendingFlush = false ;
2014-03-11 11:02:22 +01:00
}
2013-02-16 02:06:02 +01:00
void ArmRegCacheFPU : : DiscardR ( MIPSReg r ) {
switch ( mr [ r ] . loc ) {
case ML_IMM :
// IMM is always "dirty".
// IMM is not allowed for FP (yet).
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Imm in FP register? " ) ;
2013-02-16 02:06:02 +01:00
break ;
2013-02-20 00:03:47 +01:00
2013-02-16 02:06:02 +01:00
case ML_ARMREG :
2014-12-14 14:04:33 -08:00
if ( mr [ r ] . reg = = INVALID_REG ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " DiscardR: MipsReg had bad ArmReg " ) ;
2014-03-11 21:43:48 +01:00
} else {
// Note that we DO NOT write it back here. That's the whole point of Discard.
ar [ mr [ r ] . reg ] . isDirty = false ;
ar [ mr [ r ] . reg ] . mipsReg = - 1 ;
2013-02-16 02:06:02 +01:00
}
break ;
case ML_MEM :
// Already there, nothing to do.
break ;
default :
//BAD
break ;
}
mr [ r ] . loc = ML_MEM ;
mr [ r ] . reg = ( int ) INVALID_REG ;
2013-02-20 00:03:47 +01:00
mr [ r ] . tempLock = false ;
2013-07-31 00:07:34 +02:00
mr [ r ] . spillLock = false ;
2013-02-16 02:06:02 +01:00
}
2013-02-20 00:03:47 +01:00
bool ArmRegCacheFPU : : IsTempX ( ARMReg r ) const {
return ar [ r - S0 ] . mipsReg > = TEMP0 ;
}
int ArmRegCacheFPU : : GetTempR ( ) {
2014-12-06 12:26:58 +01:00
if ( jo_ - > useNEONVFPU ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " VFP temps not allowed in NEON mode " ) ;
2014-12-06 12:26:58 +01:00
return 0 ;
}
2014-03-29 20:34:17 -07:00
pendingFlush = true ;
2013-02-20 00:03:47 +01:00
for ( int r = TEMP0 ; r < TEMP0 + NUM_TEMPS ; + + r ) {
if ( mr [ r ] . loc = = ML_MEM & & ! mr [ r ] . tempLock ) {
mr [ r ] . tempLock = true ;
return r ;
}
}
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : CPU , " Out of temp regs! Might need to DiscardR() some " ) ;
2020-07-19 17:47:02 +02:00
_assert_msg_ ( false , " Regcache ran out of temp regs, might need to DiscardR() some. " ) ;
2013-02-20 00:03:47 +01:00
return - 1 ;
}
2013-02-09 18:18:32 +01:00
int ArmRegCacheFPU : : GetMipsRegOffset ( MIPSReg r ) {
2013-07-31 10:33:44 +02:00
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
2013-11-27 22:45:17 +01:00
if ( r < 0 | | r > 32 + 128 + NUM_TEMPS ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " bad mips register %i, out of range " , r ) ;
2013-11-27 22:45:17 +01:00
return 0 ; // or what?
}
2015-01-17 11:57:59 -08:00
if ( r < 32 | | r > = 32 + 128 ) {
2013-11-27 22:45:17 +01:00
return ( 32 + r ) < < 2 ;
} else {
// r is between 32 and 128 + 32
return ( 32 + 32 + voffset [ r - 32 ] ) < < 2 ;
}
2013-02-09 18:18:32 +01:00
}
void ArmRegCacheFPU : : SpillLock ( MIPSReg r1 , MIPSReg r2 , MIPSReg r3 , MIPSReg r4 ) {
mr [ r1 ] . spillLock = true ;
if ( r2 ! = - 1 ) mr [ r2 ] . spillLock = true ;
if ( r3 ! = - 1 ) mr [ r3 ] . spillLock = true ;
if ( r4 ! = - 1 ) mr [ r4 ] . spillLock = true ;
}
// This is actually pretty slow with all the 160 regs...
2013-07-31 00:07:34 +02:00
void ArmRegCacheFPU : : ReleaseSpillLocksAndDiscardTemps ( ) {
2014-12-06 12:26:58 +01:00
for ( int i = 0 ; i < NUM_MIPSFPUREG ; i + + ) {
2013-02-09 18:18:32 +01:00
mr [ i ] . spillLock = false ;
2014-12-06 12:26:58 +01:00
}
for ( int i = TEMP0 ; i < TEMP0 + NUM_TEMPS ; + + i ) {
2013-02-20 00:03:47 +01:00
DiscardR ( i ) ;
2014-12-06 12:26:58 +01:00
}
2022-04-13 10:36:37 +02:00
for ( int i = 0 ; i < NUM_ARMQUADS ; i + + ) {
2014-12-06 12:26:58 +01:00
qr [ i ] . spillLock = false ;
if ( qr [ i ] . isTemp ) {
qr [ i ] . isTemp = false ;
qr [ i ] . sz = V_Invalid ;
}
}
2013-02-09 18:18:32 +01:00
}
2013-07-31 11:25:35 +02:00
ARMReg ArmRegCacheFPU : : R ( int mipsReg ) {
2013-02-09 18:18:32 +01:00
if ( mr [ mipsReg ] . loc = = ML_ARMREG ) {
2013-02-10 15:53:56 +01:00
return ( ARMReg ) ( mr [ mipsReg ] . reg + S0 ) ;
2013-02-09 18:18:32 +01:00
} else {
2013-07-31 00:07:34 +02:00
if ( mipsReg < 32 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " FReg %i not in ARM reg. compilerPC = %08x : %s " , mipsReg , js_ - > compilerPC , MIPSDisasmAt ( js_ - > compilerPC ) . c_str ( ) ) ;
2013-07-31 00:07:34 +02:00
} else if ( mipsReg < 32 + 128 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " VReg %i not in ARM reg. compilerPC = %08x : %s " , mipsReg - 32 , js_ - > compilerPC , MIPSDisasmAt ( js_ - > compilerPC ) . c_str ( ) ) ;
2013-07-31 00:07:34 +02:00
} else {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Tempreg %i not in ARM reg. compilerPC = %08x : %s " , mipsReg - 128 - 32 , js_ - > compilerPC , MIPSDisasmAt ( js_ - > compilerPC ) . c_str ( ) ) ;
2013-07-31 00:07:34 +02:00
}
2013-02-09 18:18:32 +01:00
return INVALID_REG ; // BAAAD
}
}
2014-12-06 12:26:58 +01:00
inline ARMReg QuadAsD ( int quad ) {
return ( ARMReg ) ( D0 + quad * 2 ) ;
}
inline ARMReg QuadAsQ ( int quad ) {
return ( ARMReg ) ( Q0 + quad ) ;
}
bool MappableQ ( int quad ) {
return quad > = 4 ;
}
void ArmRegCacheFPU : : QLoad4x4 ( MIPSGPReg regPtr , int vquads [ 4 ] ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " QLoad4x4 not implemented " ) ;
2014-12-06 12:26:58 +01:00
// TODO
}
void ArmRegCacheFPU : : QFlush ( int quad ) {
if ( ! MappableQ ( quad ) ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Cannot flush non-mappable quad %i " , quad ) ;
2014-12-06 12:26:58 +01:00
return ;
}
if ( qr [ quad ] . isDirty & & ! qr [ quad ] . isTemp ) {
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " Flushing Q%i (%s) " , quad , GetVectorNotation ( qr [ quad ] . mipsVec , qr [ quad ] . sz ) . c_str ( ) ) ;
2014-12-06 12:26:58 +01:00
ARMReg q = QuadAsQ ( quad ) ;
// Unlike reads, when writing to the register file we need to be careful to write the correct
// number of floats.
switch ( qr [ quad ] . sz ) {
case V_Single :
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 0 , true ) ;
2024-07-14 14:42:59 +02:00
// WARN_LOG(Log::JIT, "S: Falling back to individual flush: pc=%08x", js_->compilerPC);
2014-12-06 12:26:58 +01:00
break ;
case V_Pair :
if ( Consecutive ( qr [ quad ] . vregs [ 0 ] , qr [ quad ] . vregs [ 1 ] ) ) {
// Can combine, it's a column!
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1 ( F_32 , q , R0 , 1 , ALIGN_NONE ) ; // TODO: Allow ALIGN_64 when applicable
} else {
2024-07-14 14:42:59 +02:00
// WARN_LOG(Log::JIT, "P: Falling back to individual flush: pc=%08x", js_->compilerPC);
2014-12-06 12:26:58 +01:00
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 0 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 1 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 1 , true ) ;
}
break ;
case V_Triple :
if ( Consecutive ( qr [ quad ] . vregs [ 0 ] , qr [ quad ] . vregs [ 1 ] , qr [ quad ] . vregs [ 2 ] ) ) {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1 ( F_32 , QuadAsD ( quad ) , R0 , 1 , ALIGN_NONE , REG_UPDATE ) ; // TODO: Allow ALIGN_64 when applicable
emit_ - > VST1_lane ( F_32 , q , R0 , 2 , true ) ;
} else {
2024-07-14 14:42:59 +02:00
// WARN_LOG(Log::JIT, "T: Falling back to individual flush: pc=%08x", js_->compilerPC);
2014-12-06 12:26:58 +01:00
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 0 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 1 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 1 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 2 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 2 , true ) ;
}
break ;
case V_Quad :
if ( Consecutive ( qr [ quad ] . vregs [ 0 ] , qr [ quad ] . vregs [ 1 ] , qr [ quad ] . vregs [ 2 ] , qr [ quad ] . vregs [ 3 ] ) ) {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1 ( F_32 , QuadAsD ( quad ) , R0 , 2 , ALIGN_NONE ) ; // TODO: Allow ALIGN_64 when applicable
} else {
2024-07-14 14:42:59 +02:00
// WARN_LOG(Log::JIT, "Q: Falling back to individual flush: pc=%08x", js_->compilerPC);
2014-12-06 12:26:58 +01:00
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 0 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 0 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 1 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 1 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 2 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 2 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( qr [ quad ] . vregs [ 3 ] ) , R1 ) ;
emit_ - > VST1_lane ( F_32 , q , R0 , 3 , true ) ;
}
break ;
default :
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Unknown quad size %i " , qr [ quad ] . sz ) ;
2014-12-06 12:26:58 +01:00
break ;
}
qr [ quad ] . isDirty = false ;
int n = GetNumVectorElements ( qr [ quad ] . sz ) ;
for ( int i = 0 ; i < n ; i + + ) {
int vr = qr [ quad ] . vregs [ i ] ;
if ( vr < 0 | | vr > 128 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Bad vr %i " , vr ) ;
2014-12-06 12:26:58 +01:00
}
FPURegMIPS & m = mr [ 32 + vr ] ;
m . loc = ML_MEM ;
m . lane = - 1 ;
m . reg = - 1 ;
}
} else {
if ( qr [ quad ] . isTemp ) {
2024-07-14 14:42:59 +02:00
WARN_LOG ( Log : : JIT , " Not flushing quad %i; dirty = %i, isTemp = %i " , quad , qr [ quad ] . isDirty , qr [ quad ] . isTemp ) ;
2014-12-06 12:26:58 +01:00
}
}
qr [ quad ] . isTemp = false ;
qr [ quad ] . mipsVec = - 1 ;
qr [ quad ] . sz = V_Invalid ;
memset ( qr [ quad ] . vregs , 0xFF , 4 ) ;
}
int ArmRegCacheFPU : : QGetFreeQuad ( int start , int count , const char * reason ) {
// Search for a free quad. A quad is free if the first register in it is free.
for ( int i = 0 ; i < count ; i + + ) {
int q = ( i + start ) & 15 ;
if ( ! MappableQ ( q ) )
continue ;
// Don't steal temp quads!
if ( qr [ q ] . mipsVec = = ( int ) INVALID_REG & & ! qr [ q ] . isTemp ) {
2024-07-14 14:42:59 +02:00
// INFO_LOG(Log::JIT, "Free quad: %i", q);
2014-12-06 12:26:58 +01:00
// Oh yeah! Free quad!
return q ;
}
}
// Okay, find the "best scoring" reg to replace. Scoring algorithm TBD but may include some
// sort of age.
int bestQuad = - 1 ;
int bestScore = - 1 ;
for ( int i = 0 ; i < count ; i + + ) {
int q = ( i + start ) & 15 ;
if ( ! MappableQ ( q ) )
continue ;
if ( qr [ q ] . spillLock )
continue ;
if ( qr [ q ] . isTemp )
continue ;
int score = 0 ;
if ( ! qr [ q ] . isDirty ) {
score + = 5 ;
}
if ( score > bestScore ) {
bestQuad = q ;
bestScore = score ;
}
}
if ( bestQuad = = - 1 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Failed finding a free quad. Things will now go haywire! " ) ;
2014-12-06 12:26:58 +01:00
return - 1 ;
} else {
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " No register found in %i and the next %i, kicked out #%i (%s) " , start , count , bestQuad , reason ? reason : " no reason " ) ;
2014-12-06 12:26:58 +01:00
QFlush ( bestQuad ) ;
return bestQuad ;
}
}
ARMReg ArmRegCacheFPU : : QAllocTemp ( VectorSize sz ) {
int q = QGetFreeQuad ( 8 , 16 , " allocating temporary " ) ; // Prefer high quads as temps
if ( q < 0 ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Failed to allocate temp quad " ) ;
2014-12-06 12:26:58 +01:00
q = 0 ;
}
qr [ q ] . spillLock = true ;
qr [ q ] . isTemp = true ;
qr [ q ] . sz = sz ;
qr [ q ] . isDirty = false ; // doesn't matter
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " Allocated temp quad %i " , q ) ;
2014-12-06 12:26:58 +01:00
if ( sz = = V_Single | | sz = = V_Pair ) {
return D_0 ( ARMReg ( Q0 + q ) ) ;
} else {
return ARMReg ( Q0 + q ) ;
}
}
bool ArmRegCacheFPU : : Consecutive ( int v1 , int v2 ) const {
return ( voffset [ v1 ] + 1 ) = = voffset [ v2 ] ;
}
bool ArmRegCacheFPU : : Consecutive ( int v1 , int v2 , int v3 ) const {
return Consecutive ( v1 , v2 ) & & Consecutive ( v2 , v3 ) ;
}
bool ArmRegCacheFPU : : Consecutive ( int v1 , int v2 , int v3 , int v4 ) const {
return Consecutive ( v1 , v2 ) & & Consecutive ( v2 , v3 ) & & Consecutive ( v3 , v4 ) ;
}
void ArmRegCacheFPU : : QMapMatrix ( ARMReg * regs , int matrix , MatrixSize mz , int flags ) {
u8 vregs [ 4 ] ;
if ( flags & MAP_MTX_TRANSPOSED ) {
GetMatrixRows ( matrix , mz , vregs ) ;
} else {
GetMatrixColumns ( matrix , mz , vregs ) ;
}
// TODO: Zap existing mappings, reserve 4 consecutive regs, then do a fast load.
int n = GetMatrixSide ( mz ) ;
VectorSize vsz = GetVectorSize ( mz ) ;
for ( int i = 0 ; i < n ; i + + ) {
regs [ i ] = QMapReg ( vregs [ i ] , vsz , flags ) ;
}
}
ARMReg ArmRegCacheFPU : : QMapReg ( int vreg , VectorSize sz , int flags ) {
qTime_ + + ;
int n = GetNumVectorElements ( sz ) ;
u8 vregs [ 4 ] ;
GetVectorRegs ( vregs , sz , vreg ) ;
// Range of registers to consider
int start = 0 ;
int count = 16 ;
if ( flags & MAP_PREFER_HIGH ) {
start = 8 ;
} else if ( flags & MAP_PREFER_LOW ) {
start = 4 ;
} else if ( flags & MAP_FORCE_LOW ) {
start = 4 ;
count = 4 ;
} else if ( flags & MAP_FORCE_HIGH ) {
start = 8 ;
count = 8 ;
}
// Let's check if they are all mapped in a quad somewhere.
// At the same time, check for the quad already being mapped.
// Later we can check for possible transposes as well.
// First just loop over all registers. If it's here and not in range, or overlapped, kick.
std : : vector < int > quadsToFlush ;
for ( int i = 0 ; i < 16 ; i + + ) {
int q = ( i + start ) & 15 ;
if ( ! MappableQ ( q ) )
continue ;
// Skip unmapped quads.
if ( qr [ q ] . sz = = V_Invalid )
continue ;
// Check if completely there already. If so, set spill-lock, transfer dirty flag and exit.
if ( vreg = = qr [ q ] . mipsVec & & sz = = qr [ q ] . sz ) {
if ( i < count ) {
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " Quad already mapped: %i : %i (size %i) " , q , vreg , sz ) ;
2014-12-06 12:26:58 +01:00
qr [ q ] . isDirty = qr [ q ] . isDirty | | ( flags & MAP_DIRTY ) ;
qr [ q ] . spillLock = true ;
// Sanity check vregs
for ( int i = 0 ; i < n ; i + + ) {
if ( vregs [ i ] ! = qr [ q ] . vregs [ i ] ) {
2024-07-14 14:42:59 +02:00
ERROR_LOG ( Log : : JIT , " Sanity check failed: %i vs %i " , vregs [ i ] , qr [ q ] . vregs [ i ] ) ;
2014-12-06 12:26:58 +01:00
}
}
return ( ARMReg ) ( Q0 + q ) ;
} else {
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " Quad already mapped at %i which is out of requested range [%i-%i) (count = %i), needs moving. For now we flush. " , q , start , start + count , count ) ;
2014-12-06 12:26:58 +01:00
quadsToFlush . push_back ( q ) ;
continue ;
}
}
// Check for any overlap. Overlap == flush.
int origN = GetNumVectorElements ( qr [ q ] . sz ) ;
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < origN ; b + + ) {
if ( vregs [ a ] = = qr [ q ] . vregs [ b ] ) {
quadsToFlush . push_back ( q ) ;
goto doubleBreak ;
}
}
}
doubleBreak :
;
}
// We didn't find the extra register, but we got a list of regs to flush. Flush 'em.
// Here we can check for opportunities to do a "transpose-flush" of row vectors, etc.
if ( ! quadsToFlush . empty ( ) ) {
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " New mapping %s collided with %d quads, flushing them. " , GetVectorNotation ( vreg , sz ) . c_str ( ) , ( int ) quadsToFlush . size ( ) ) ;
2014-12-06 12:26:58 +01:00
}
for ( size_t i = 0 ; i < quadsToFlush . size ( ) ; i + + ) {
QFlush ( quadsToFlush [ i ] ) ;
}
// Find where we want to map it, obeying the constraints we gave.
int quad = QGetFreeQuad ( start , count , " mapping " ) ;
2015-01-17 18:48:50 -08:00
if ( quad < 0 )
return INVALID_REG ;
2014-12-06 12:26:58 +01:00
// If parts of our register are elsewhere, and we are dirty, we need to flush them
// before we reload in a new location.
// This may be problematic if inputs overlap irregularly with output, say:
// vdot S700, R000, C000
// It might still work by accident...
if ( flags & MAP_DIRTY ) {
for ( int i = 0 ; i < n ; i + + ) {
FlushV ( vregs [ i ] ) ;
}
}
qr [ quad ] . sz = sz ;
qr [ quad ] . mipsVec = vreg ;
2014-12-08 00:18:13 +01:00
if ( ( flags & MAP_NOINIT ) ! = MAP_NOINIT ) {
2014-12-06 12:26:58 +01:00
// Okay, now we will try to load the whole thing in one go. This is possible
// if it's a row and easy if it's a single.
// Rows are rare, columns are common - but thanks to our register reordering,
// columns are actually in-order in memory.
switch ( sz ) {
case V_Single :
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 0 , true ) ;
break ;
case V_Pair :
if ( Consecutive ( vregs [ 0 ] , vregs [ 1 ] ) ) {
// Can combine, it's a column!
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1 ( F_32 , QuadAsD ( quad ) , R0 , 1 , ALIGN_NONE ) ; // TODO: Allow ALIGN_64 when applicable
} else {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 0 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 1 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 1 , true ) ;
}
break ;
case V_Triple :
if ( Consecutive ( vregs [ 0 ] , vregs [ 1 ] , vregs [ 2 ] ) ) {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1 ( F_32 , QuadAsD ( quad ) , R0 , 1 , ALIGN_NONE , REG_UPDATE ) ; // TODO: Allow ALIGN_64 when applicable
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 2 , true ) ;
} else {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 0 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 1 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 1 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 2 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 2 , true ) ;
}
break ;
case V_Quad :
if ( Consecutive ( vregs [ 0 ] , vregs [ 1 ] , vregs [ 2 ] , vregs [ 3 ] ) ) {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1 ( F_32 , QuadAsD ( quad ) , R0 , 2 , ALIGN_NONE ) ; // TODO: Allow ALIGN_64 when applicable
} else {
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 0 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 0 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 1 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 1 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 2 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 2 , true ) ;
emit_ - > ADDI2R ( R0 , CTXREG , GetMipsRegOffsetV ( vregs [ 3 ] ) , R1 ) ;
emit_ - > VLD1_lane ( F_32 , QuadAsQ ( quad ) , R0 , 3 , true ) ;
}
break ;
default :
;
}
}
// OK, let's fill out the arrays to confirm that we have grabbed these registers.
for ( int i = 0 ; i < n ; i + + ) {
int mipsReg = 32 + vregs [ i ] ;
mr [ mipsReg ] . loc = ML_ARMREG ;
mr [ mipsReg ] . reg = QuadAsQ ( quad ) ;
mr [ mipsReg ] . lane = i ;
qr [ quad ] . vregs [ i ] = vregs [ i ] ;
}
qr [ quad ] . isDirty = ( flags & MAP_DIRTY ) ! = 0 ;
qr [ quad ] . spillLock = true ;
2024-07-14 14:42:59 +02:00
INFO_LOG ( Log : : JIT , " Mapped Q%i to vfpu %i (%s), sz=%i, dirty=%i " , quad , vreg , GetVectorNotation ( vreg , sz ) . c_str ( ) , ( int ) sz , qr [ quad ] . isDirty ) ;
2014-12-06 12:26:58 +01:00
if ( sz = = V_Single | | sz = = V_Pair ) {
return D_0 ( QuadAsQ ( quad ) ) ;
} else {
return QuadAsQ ( quad ) ;
}
}