2012-11-01 16:19:01 +01:00
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
2012-11-04 23:01:49 +01:00
// the Free Software Foundation, version 2.0 or later versions.
2012-11-01 16:19:01 +01:00
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
2017-07-05 15:06:44 +02:00
// Table 13.10 in http://agner.org/optimize/optimizing_assembly.pdf is cool - generate constants with
// short instruction sequences. Surprisingly many are possible.
2016-10-12 17:32:52 +02:00
# include "ppsspp_config.h"
# if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
2013-04-27 21:59:51 +02:00
# include <cmath>
# include <limits>
2017-03-12 17:16:38 +01:00
# include <emmintrin.h>
2013-12-17 23:40:27 +01:00
2020-10-04 00:25:21 +02:00
# include "Common/Math/math_util.h"
2012-11-01 16:19:01 +01:00
2014-11-08 00:39:40 -08:00
# include "Common/CPUDetect.h"
2020-08-15 12:25:39 +02:00
# include "Common/Log.h"
2019-08-04 21:37:33 -07:00
# include "Core/Compatibility.h"
2013-04-27 21:59:51 +02:00
# include "Core/Config.h"
2019-08-04 21:37:33 -07:00
# include "Core/MemMap.h"
2013-04-27 21:59:51 +02:00
# include "Core/Reporting.h"
2019-08-04 21:37:33 -07:00
# include "Core/System.h"
2013-04-27 21:59:51 +02:00
# include "Core/MIPS/MIPSAnalyst.h"
2013-08-24 19:31:12 -07:00
# include "Core/MIPS/MIPSCodeUtils.h"
2013-04-27 21:59:51 +02:00
# include "Core/MIPS/MIPSVFPUUtils.h"
2013-08-24 19:31:12 -07:00
# include "Core/MIPS/x86/Jit.h"
# include "Core/MIPS/x86/RegCache.h"
2012-11-01 16:19:01 +01:00
2012-11-12 14:35:10 +01:00
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
2013-07-30 18:15:48 +02:00
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
2019-02-03 14:01:51 -08:00
# define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }
2013-02-20 00:03:47 +01:00
# define DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
2012-11-12 14:35:10 +01:00
2013-08-24 19:31:12 -07:00
# define _RS MIPS_GET_RS(op)
# define _RT MIPS_GET_RT(op)
# define _RD MIPS_GET_RD(op)
# define _FS MIPS_GET_FS(op)
# define _FT MIPS_GET_FT(op)
# define _FD MIPS_GET_FD(op)
# define _SA MIPS_GET_SA(op)
# define _POS ((op>> 6) & 0x1F)
# define _SIZE ((op>>11) & 0x1F)
# define _IMM16 (signed short)(op & 0xFFFF)
# define _IMM26 (op & 0x03FFFFFF)
2012-11-01 16:19:01 +01:00
namespace MIPSComp
{
2014-12-07 14:44:15 +01:00
using namespace Gen ;
2014-12-13 21:11:36 +01:00
using namespace X64JitConstants ;
2012-11-01 16:19:01 +01:00
2013-01-26 01:33:32 +01:00
static const float one = 1.0f ;
static const float minus_one = - 1.0f ;
2017-08-31 01:14:51 +02:00
alignas ( 16 ) const u32 noSignMask [ 4 ] = { 0x7FFFFFFF , 0x7FFFFFFF , 0x7FFFFFFF , 0x7FFFFFFF } ;
alignas ( 16 ) const u32 signBitAll [ 4 ] = { 0x80000000 , 0x80000000 , 0x80000000 , 0x80000000 } ;
alignas ( 16 ) const u32 signBitLower [ 4 ] = { 0x80000000 , 0 , 0 , 0 } ;
alignas ( 16 ) const float oneOneOneOne [ 4 ] = { 1.0f , 1.0f , 1.0f , 1.0f } ;
alignas ( 16 ) const u32 fourinfnan [ 4 ] = { 0x7F800000 , 0x7F800000 , 0x7F800000 , 0x7F800000 } ;
alignas ( 16 ) const float identityMatrix [ 4 ] [ 4 ] = { { 1.0f , 0 , 0 , 0 } , { 0 , 1.0f , 0 , 0 } , { 0 , 0 , 1.0f , 0 } , { 0 , 0 , 0 , 1.0f } } ;
2014-11-26 22:30:06 +01:00
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VPFX ( MIPSOpcode op )
2013-01-26 01:33:32 +01:00
{
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-01-26 01:33:32 +01:00
int data = op & 0xFFFFF ;
int regnum = ( op > > 24 ) & 3 ;
switch ( regnum ) {
case 0 : // S
js . prefixS = data ;
2013-02-15 01:12:43 -08:00
js . prefixSFlag = JitState : : PREFIX_KNOWN_DIRTY ;
2013-01-26 01:33:32 +01:00
break ;
case 1 : // T
js . prefixT = data ;
2013-02-15 01:12:43 -08:00
js . prefixTFlag = JitState : : PREFIX_KNOWN_DIRTY ;
2013-01-26 01:33:32 +01:00
break ;
case 2 : // D
2019-03-10 20:28:01 -07:00
js . prefixD = data & 0x00000FFF ;
2013-02-15 01:12:43 -08:00
js . prefixDFlag = JitState : : PREFIX_KNOWN_DIRTY ;
2013-01-26 01:33:32 +01:00
break ;
}
}
void Jit : : ApplyPrefixST ( u8 * vregs , u32 prefix , VectorSize sz ) {
if ( prefix = = 0xE4 ) return ;
int n = GetNumVectorElements ( sz ) ;
u8 origV [ 4 ] ;
static const float constantArray [ 8 ] = { 0.f , 1.f , 2.f , 0.5f , 3.f , 1.f / 3.f , 0.25f , 1.f / 6.f } ;
for ( int i = 0 ; i < n ; i + + )
origV [ i ] = vregs [ i ] ;
2016-05-10 21:50:08 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2013-01-26 01:33:32 +01:00
int regnum = ( prefix > > ( i * 2 ) ) & 3 ;
int abs = ( prefix > > ( 8 + i ) ) & 1 ;
int negate = ( prefix > > ( 16 + i ) ) & 1 ;
int constants = ( prefix > > ( 12 + i ) ) & 1 ;
2013-02-17 22:37:56 -08:00
// Unchanged, hurray.
if ( ! constants & & regnum = = i & & ! abs & & ! negate )
continue ;
// This puts the value into a temp reg, so we won't write the modified value back.
vregs [ i ] = fpr . GetTempV ( ) ;
fpr . MapRegV ( vregs [ i ] , MAP_NOINIT | MAP_DIRTY ) ;
2013-01-26 01:33:32 +01:00
if ( ! constants ) {
2013-02-15 21:49:20 -08:00
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
// TODO: But some ops seem to use const 0 instead?
2013-02-20 00:03:47 +01:00
if ( regnum > = n ) {
2013-03-26 00:54:00 -07:00
ERROR_LOG_REPORT ( CPU , " Invalid VFPU swizzle: %08x / %d " , prefix , sz ) ;
2013-02-15 21:49:20 -08:00
regnum = 0 ;
}
2015-01-04 15:48:55 -08:00
fpr . SimpleRegV ( origV [ regnum ] , 0 ) ;
2013-02-17 22:37:56 -08:00
MOVSS ( fpr . VX ( vregs [ i ] ) , fpr . V ( origV [ regnum ] ) ) ;
2013-01-26 01:33:32 +01:00
if ( abs ) {
2017-07-05 15:59:17 +02:00
if ( RipAccessible ( & noSignMask ) ) {
ANDPS ( fpr . VX ( vregs [ i ] ) , M ( & noSignMask ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & noSignMask ) ) ;
ANDPS ( fpr . VX ( vregs [ i ] ) , MatR ( TEMPREG ) ) ;
}
2013-01-26 01:33:32 +01:00
}
2013-02-17 22:37:56 -08:00
} else {
2017-07-05 15:59:17 +02:00
if ( RipAccessible ( constantArray ) ) {
MOVSS ( fpr . VX ( vregs [ i ] ) , M ( & constantArray [ regnum + ( abs < < 2 ) ] ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & constantArray [ regnum + ( abs < < 2 ) ] ) ) ;
MOVSS ( fpr . VX ( vregs [ i ] ) , MatR ( TEMPREG ) ) ;
}
2013-01-26 01:33:32 +01:00
}
2017-07-05 15:59:17 +02:00
if ( negate ) {
if ( RipAccessible ( & signBitLower ) ) {
XORPS ( fpr . VX ( vregs [ i ] ) , M ( & signBitLower ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & signBitLower ) ) ;
XORPS ( fpr . VX ( vregs [ i ] ) , MatR ( TEMPREG ) ) ;
}
}
2013-02-17 22:37:56 -08:00
// TODO: This probably means it will swap out soon, inefficiently...
fpr . ReleaseSpillLockV ( vregs [ i ] ) ;
2013-01-26 01:33:32 +01:00
}
}
2013-02-17 23:15:16 -08:00
void Jit : : GetVectorRegsPrefixD ( u8 * regs , VectorSize sz , int vectorReg ) {
2013-02-15 01:12:43 -08:00
_assert_ ( js . prefixDFlag & JitState : : PREFIX_KNOWN ) ;
2013-02-17 23:15:16 -08:00
GetVectorRegs ( regs , sz , vectorReg ) ;
if ( js . prefixD = = 0 )
return ;
2013-01-26 01:33:32 +01:00
int n = GetNumVectorElements ( sz ) ;
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2013-02-17 23:15:16 -08:00
// Hopefully this is rare, we'll just write it into a reg we drop.
if ( js . VfpuWriteMask ( i ) )
regs [ i ] = fpr . GetTempV ( ) ;
}
}
void Jit : : ApplyPrefixD ( const u8 * vregs , VectorSize sz ) {
_assert_ ( js . prefixDFlag & JitState : : PREFIX_KNOWN ) ;
if ( ! js . prefixD ) return ;
int n = GetNumVectorElements ( sz ) ;
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2013-02-17 23:15:16 -08:00
if ( js . VfpuWriteMask ( i ) )
2013-01-26 01:33:32 +01:00
continue ;
2013-02-17 23:15:16 -08:00
int sat = ( js . prefixD > > ( i * 2 ) ) & 3 ;
2017-07-05 13:51:27 +02:00
if ( sat = = 1 ) {
2013-02-17 23:15:16 -08:00
fpr . MapRegV ( vregs [ i ] , MAP_DIRTY ) ;
2014-05-16 00:55:34 -07:00
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
MOVSS ( R ( XMM0 ) , fpr . VX ( vregs [ i ] ) ) ;
2017-07-05 14:04:19 +02:00
XORPS ( XMM1 , R ( XMM1 ) ) ;
CMPLESS ( XMM0 , R ( XMM1 ) ) ;
2014-05-16 00:55:34 -07:00
ANDNPS ( XMM0 , fpr . V ( vregs [ i ] ) ) ;
// Retain a NAN in XMM0 (must be second operand.)
2017-07-05 15:06:44 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( fpr . VX ( vregs [ i ] ) , MatR ( TEMPREG ) ) ;
2014-05-16 00:55:34 -07:00
MINSS ( fpr . VX ( vregs [ i ] ) , R ( XMM0 ) ) ;
2017-07-05 13:51:27 +02:00
} else if ( sat = = 3 ) {
2013-02-17 23:15:16 -08:00
fpr . MapRegV ( vregs [ i ] , MAP_DIRTY ) ;
2014-05-16 00:55:34 -07:00
// Check for < -1.0f, but careful of NANs.
2017-07-05 15:06:44 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & minus_one ) ) ;
MOVSS ( XMM1 , MatR ( TEMPREG ) ) ;
2014-05-16 00:55:34 -07:00
MOVSS ( R ( XMM0 ) , fpr . VX ( vregs [ i ] ) ) ;
CMPLESS ( XMM0 , R ( XMM1 ) ) ;
// If it was NOT less, the three ops below do nothing.
// Otherwise, they replace the value with -1.0f.
ANDPS ( XMM1 , R ( XMM0 ) ) ;
ANDNPS ( XMM0 , fpr . V ( vregs [ i ] ) ) ;
ORPS ( XMM0 , R ( XMM1 ) ) ;
// Retain a NAN in XMM0 (must be second operand.)
2017-07-05 15:06:44 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( fpr . VX ( vregs [ i ] ) , MatR ( TEMPREG ) ) ;
2014-05-16 00:55:34 -07:00
MINSS ( fpr . VX ( vregs [ i ] ) , R ( XMM0 ) ) ;
2013-01-26 01:33:32 +01:00
}
}
}
2013-02-16 03:15:22 -08:00
// Vector regs can overlap in all sorts of swizzled ways.
2013-02-16 10:19:05 -08:00
// This does allow a single overlap in sregs[i].
2022-12-10 21:02:44 -08:00
bool IsOverlapSafeAllowS ( int dreg , int di , int sn , const u8 sregs [ ] , int tn = 0 , const u8 tregs [ ] = NULL ) {
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < sn ; + + i ) {
2013-02-16 10:19:05 -08:00
if ( sregs [ i ] = = dreg & & i ! = di )
2013-02-16 21:26:32 -08:00
return false ;
2013-02-16 03:15:22 -08:00
}
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < tn ; + + i ) {
2013-02-16 03:15:22 -08:00
if ( tregs [ i ] = = dreg )
2013-02-16 21:26:32 -08:00
return false ;
2013-02-16 03:15:22 -08:00
}
// Hurray, no overlap, we can write directly.
2013-02-16 21:26:32 -08:00
return true ;
}
2022-12-10 21:02:44 -08:00
bool IsOverlapSafe ( int dreg , int di , int sn , const u8 sregs [ ] , int tn = 0 , const u8 tregs [ ] = NULL ) {
2013-02-16 21:26:32 -08:00
return IsOverlapSafeAllowS ( dreg , di , sn , sregs , tn , tregs ) & & sregs [ di ] ! = dreg ;
2013-02-16 03:15:22 -08:00
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_SV ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( LSU_VFPU ) ;
2013-02-10 12:14:55 +01:00
s32 imm = ( signed short ) ( op & 0xFFFC ) ;
int vt = ( ( op > > 16 ) & 0x1f ) | ( ( op & 3 ) < < 5 ) ;
2013-08-24 19:31:12 -07:00
MIPSGPReg rs = _RS ;
2013-02-10 12:14:55 +01:00
2023-04-11 21:22:55 -07:00
CheckMemoryBreakpoint ( 0 , rs , imm ) ;
2017-07-05 13:51:27 +02:00
switch ( op > > 26 ) {
2013-02-10 12:14:55 +01:00
case 50 : //lv.s // VI(vt) = Memory::Read_U32(addr);
{
2014-10-12 14:23:59 -07:00
gpr . Lock ( rs ) ;
2014-11-26 23:18:27 -08:00
fpr . MapRegV ( vt , MAP_DIRTY | MAP_NOINIT ) ;
2013-02-10 12:14:55 +01:00
JitSafeMem safe ( this , rs , imm ) ;
OpArg src ;
2017-07-05 15:06:44 +02:00
if ( safe . PrepareRead ( src , 4 ) ) {
2013-02-10 12:14:55 +01:00
MOVSS ( fpr . VX ( vt ) , safe . NextFastAddress ( 0 ) ) ;
}
2017-07-05 15:06:44 +02:00
if ( safe . PrepareSlowRead ( safeMemFuncs . readU32 ) ) {
2014-03-19 22:08:02 -07:00
MOVD_xmm ( fpr . VX ( vt ) , R ( EAX ) ) ;
2013-02-10 12:14:55 +01:00
}
safe . Finish ( ) ;
gpr . UnlockAll ( ) ;
fpr . ReleaseSpillLocks ( ) ;
}
break ;
case 58 : //sv.s // Memory::Write_U32(VI(vt), addr);
{
2014-10-12 14:23:59 -07:00
gpr . Lock ( rs ) ;
2013-02-10 12:14:55 +01:00
fpr . MapRegV ( vt , 0 ) ;
JitSafeMem safe ( this , rs , imm ) ;
OpArg dest ;
2017-07-05 15:06:44 +02:00
if ( safe . PrepareWrite ( dest , 4 ) ) {
2013-02-10 12:14:55 +01:00
MOVSS ( safe . NextFastAddress ( 0 ) , fpr . VX ( vt ) ) ;
}
2017-07-05 15:06:44 +02:00
if ( safe . PrepareSlowWrite ( ) ) {
MOVSS ( MIPSSTATE_VAR ( temp ) , fpr . VX ( vt ) ) ;
safe . DoSlowWrite ( safeMemFuncs . writeU32 , MIPSSTATE_VAR ( temp ) , 0 ) ;
2013-02-10 12:14:55 +01:00
}
safe . Finish ( ) ;
fpr . ReleaseSpillLocks ( ) ;
gpr . UnlockAll ( ) ;
}
break ;
default :
2013-02-15 00:28:45 -08:00
DISABLE ;
2013-02-10 12:14:55 +01:00
}
}
2017-07-05 13:51:27 +02:00
void Jit : : Comp_SVQ ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( LSU_VFPU ) ;
2013-02-14 00:02:15 -08:00
2013-01-25 19:50:30 +01:00
int imm = ( signed short ) ( op & 0xFFFC ) ;
int vt = ( ( ( op > > 16 ) & 0x1f ) ) | ( ( op & 1 ) < < 5 ) ;
2013-08-24 19:31:12 -07:00
MIPSGPReg rs = _RS ;
2013-01-25 19:50:30 +01:00
2023-04-11 21:22:55 -07:00
CheckMemoryBreakpoint ( 0 , rs , imm ) ;
2017-07-05 13:51:27 +02:00
switch ( op > > 26 ) {
2013-08-11 15:31:54 +02:00
case 53 : //lvl.q/lvr.q
{
if ( ! g_Config . bFastMemory ) {
DISABLE ;
}
DISABLE ;
2014-10-12 14:23:59 -07:00
gpr . MapReg ( rs , true , false ) ;
2013-08-11 15:31:54 +02:00
gpr . FlushLockX ( ECX ) ;
u8 vregs [ 4 ] ;
GetVectorRegs ( vregs , V_Quad , vt ) ;
MOV ( 32 , R ( EAX ) , gpr . R ( rs ) ) ;
ADD ( 32 , R ( EAX ) , Imm32 ( imm ) ) ;
2017-02-28 01:47:13 +01:00
# ifdef MASKED_PSP_MEMORY
2013-08-11 15:31:54 +02:00
AND ( 32 , R ( EAX ) , Imm32 ( Memory : : MEMVIEW32_MASK ) ) ;
# endif
MOV ( 32 , R ( ECX ) , R ( EAX ) ) ;
SHR ( 32 , R ( EAX ) , Imm8 ( 2 ) ) ;
AND ( 32 , R ( EAX ) , Imm32 ( 0x3 ) ) ;
CMP ( 32 , R ( EAX ) , Imm32 ( 0 ) ) ;
FixupBranch next = J_CC ( CC_NE ) ;
2014-12-17 08:50:31 -08:00
auto PSPMemAddr = [ ] ( X64Reg scaled , int offset ) {
2021-03-02 21:49:21 -08:00
# if PPSSPP_ARCH(X86)
2014-12-17 08:50:31 -08:00
return MDisp ( scaled , ( u32 ) Memory : : base + offset ) ;
# else
return MComplex ( MEMBASEREG , scaled , 1 , offset ) ;
# endif
} ;
2013-08-11 15:31:54 +02:00
fpr . MapRegsV ( vregs , V_Quad , MAP_DIRTY ) ;
// Offset = 0
2014-12-17 08:50:31 -08:00
MOVSS ( fpr . RX ( vregs [ 3 ] ) , PSPMemAddr ( EAX , 0 ) ) ;
2013-08-11 15:31:54 +02:00
FixupBranch skip0 = J ( ) ;
SetJumpTarget ( next ) ;
CMP ( 32 , R ( EAX ) , Imm32 ( 1 ) ) ;
next = J_CC ( CC_NE ) ;
// Offset = 1
2014-12-17 08:50:31 -08:00
MOVSS ( fpr . RX ( vregs [ 3 ] ) , PSPMemAddr ( EAX , 4 ) ) ;
MOVSS ( fpr . RX ( vregs [ 2 ] ) , PSPMemAddr ( EAX , 0 ) ) ;
2013-08-11 15:31:54 +02:00
FixupBranch skip1 = J ( ) ;
SetJumpTarget ( next ) ;
CMP ( 32 , R ( EAX ) , Imm32 ( 2 ) ) ;
next = J_CC ( CC_NE ) ;
// Offset = 2
2014-12-17 08:50:31 -08:00
MOVSS ( fpr . RX ( vregs [ 3 ] ) , PSPMemAddr ( EAX , 8 ) ) ;
MOVSS ( fpr . RX ( vregs [ 2 ] ) , PSPMemAddr ( EAX , 4 ) ) ;
MOVSS ( fpr . RX ( vregs [ 1 ] ) , PSPMemAddr ( EAX , 0 ) ) ;
2013-08-11 15:31:54 +02:00
FixupBranch skip2 = J ( ) ;
SetJumpTarget ( next ) ;
CMP ( 32 , R ( EAX ) , Imm32 ( 3 ) ) ;
next = J_CC ( CC_NE ) ;
// Offset = 3
2014-12-17 08:50:31 -08:00
MOVSS ( fpr . RX ( vregs [ 3 ] ) , PSPMemAddr ( EAX , 12 ) ) ;
MOVSS ( fpr . RX ( vregs [ 2 ] ) , PSPMemAddr ( EAX , 8 ) ) ;
MOVSS ( fpr . RX ( vregs [ 1 ] ) , PSPMemAddr ( EAX , 4 ) ) ;
MOVSS ( fpr . RX ( vregs [ 0 ] ) , PSPMemAddr ( EAX , 0 ) ) ;
2013-08-11 15:31:54 +02:00
SetJumpTarget ( next ) ;
SetJumpTarget ( skip0 ) ;
SetJumpTarget ( skip1 ) ;
SetJumpTarget ( skip2 ) ;
gpr . UnlockAll ( ) ;
fpr . ReleaseSpillLocks ( ) ;
}
break ;
2013-01-25 19:50:30 +01:00
case 54 : //lv.q
{
2014-10-12 14:23:59 -07:00
gpr . Lock ( rs ) ;
2015-04-14 23:16:34 -07:00
// This must be in a reg or an immediate.
// Otherwise, it'll get put in EAX and we'll clobber that during NextSlowRead().
if ( ! gpr . IsImm ( rs ) )
gpr . MapReg ( rs , true , false ) ;
2013-01-25 19:50:30 +01:00
u8 vregs [ 4 ] ;
GetVectorRegs ( vregs , V_Quad , vt ) ;
2014-11-25 23:28:29 +01:00
2015-04-10 20:44:39 -07:00
if ( fpr . TryMapRegsVS ( vregs , V_Quad , MAP_NOINIT | MAP_DIRTY ) ) {
2014-11-25 23:28:29 +01:00
JitSafeMem safe ( this , rs , imm ) ;
OpArg src ;
if ( safe . PrepareRead ( src , 16 ) ) {
2015-04-10 20:44:39 -07:00
// Should be safe, since lv.q must be aligned, but let's try to avoid crashing in safe mode.
if ( g_Config . bFastMemory ) {
MOVAPS ( fpr . VSX ( vregs ) , safe . NextFastAddress ( 0 ) ) ;
} else {
MOVUPS ( fpr . VSX ( vregs ) , safe . NextFastAddress ( 0 ) ) ;
}
}
if ( safe . PrepareSlowRead ( safeMemFuncs . readU32 ) ) {
for ( int i = 0 ; i < 4 ; i + + ) {
safe . NextSlowRead ( safeMemFuncs . readU32 , i * 4 ) ;
// We use XMM0 as a temporary since MOVSS and MOVD would clear the higher bits.
MOVD_xmm ( XMM0 , R ( EAX ) ) ;
MOVSS ( fpr . VSX ( vregs ) , R ( XMM0 ) ) ;
// Rotate things so we can read in the next higher float.
// By the end (4 rotates), they'll all be back into place.
SHUFPS ( fpr . VSX ( vregs ) , fpr . VS ( vregs ) , _MM_SHUFFLE ( 0 , 3 , 2 , 1 ) ) ;
}
2014-11-25 23:28:29 +01:00
}
safe . Finish ( ) ;
gpr . UnlockAll ( ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-01-26 01:33:32 +01:00
fpr . MapRegsV ( vregs , V_Quad , MAP_DIRTY | MAP_NOINIT ) ;
2013-01-26 10:07:05 -08:00
JitSafeMem safe ( this , rs , imm ) ;
OpArg src ;
2017-07-05 13:51:27 +02:00
if ( safe . PrepareRead ( src , 16 ) ) {
2013-01-26 10:07:05 -08:00
// Just copy 4 words the easiest way while not wasting registers.
for ( int i = 0 ; i < 4 ; i + + )
MOVSS ( fpr . VX ( vregs [ i ] ) , safe . NextFastAddress ( i * 4 ) ) ;
2013-01-25 19:50:30 +01:00
}
2017-07-05 13:51:27 +02:00
if ( safe . PrepareSlowRead ( safeMemFuncs . readU32 ) ) {
for ( int i = 0 ; i < 4 ; i + + ) {
2014-05-04 15:59:58 -07:00
safe . NextSlowRead ( safeMemFuncs . readU32 , i * 4 ) ;
2014-03-19 22:08:02 -07:00
MOVD_xmm ( fpr . VX ( vregs [ i ] ) , R ( EAX ) ) ;
2013-01-26 10:07:05 -08:00
}
}
safe . Finish ( ) ;
2013-01-25 19:50:30 +01:00
gpr . UnlockAll ( ) ;
2013-01-26 01:33:32 +01:00
fpr . ReleaseSpillLocks ( ) ;
2013-01-25 19:50:30 +01:00
}
break ;
case 62 : //sv.q
{
2014-10-12 14:23:59 -07:00
gpr . Lock ( rs ) ;
2015-04-14 23:16:34 -07:00
// This must be in a reg or an immediate.
// Otherwise, it'll get put in EAX and we'll clobber that during NextSlowRead().
if ( ! gpr . IsImm ( rs ) )
gpr . MapReg ( rs , true , false ) ;
2013-01-25 19:50:30 +01:00
u8 vregs [ 4 ] ;
GetVectorRegs ( vregs , V_Quad , vt ) ;
2014-11-25 23:28:29 +01:00
2015-04-10 20:44:39 -07:00
if ( fpr . TryMapRegsVS ( vregs , V_Quad , 0 ) ) {
2014-11-25 23:28:29 +01:00
JitSafeMem safe ( this , rs , imm ) ;
OpArg dest ;
if ( safe . PrepareWrite ( dest , 16 ) ) {
2015-04-10 20:44:39 -07:00
// Should be safe, since sv.q must be aligned, but let's try to avoid crashing in safe mode.
if ( g_Config . bFastMemory ) {
MOVAPS ( safe . NextFastAddress ( 0 ) , fpr . VSX ( vregs ) ) ;
} else {
MOVUPS ( safe . NextFastAddress ( 0 ) , fpr . VSX ( vregs ) ) ;
}
}
if ( safe . PrepareSlowWrite ( ) ) {
MOVAPS ( XMM0 , fpr . VS ( vregs ) ) ;
for ( int i = 0 ; i < 4 ; i + + ) {
2017-07-05 15:06:44 +02:00
MOVSS ( MIPSSTATE_VAR ( temp ) , XMM0 ) ;
2015-04-10 20:44:39 -07:00
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 3 , 3 , 2 , 1 ) ) ;
2017-07-05 15:06:44 +02:00
safe . DoSlowWrite ( safeMemFuncs . writeU32 , MIPSSTATE_VAR ( temp ) , i * 4 ) ;
2015-04-10 20:44:39 -07:00
}
2014-11-25 23:28:29 +01:00
}
safe . Finish ( ) ;
gpr . UnlockAll ( ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-01-25 19:50:30 +01:00
// Even if we don't use real SIMD there's still 8 or 16 scalar float registers.
2013-01-26 01:33:32 +01:00
fpr . MapRegsV ( vregs , V_Quad , 0 ) ;
2013-01-25 19:50:30 +01:00
2013-01-26 10:07:05 -08:00
JitSafeMem safe ( this , rs , imm ) ;
OpArg dest ;
2017-07-05 13:51:27 +02:00
if ( safe . PrepareWrite ( dest , 16 ) ) {
2013-01-26 10:07:05 -08:00
for ( int i = 0 ; i < 4 ; i + + )
MOVSS ( safe . NextFastAddress ( i * 4 ) , fpr . VX ( vregs [ i ] ) ) ;
2013-01-25 19:50:30 +01:00
}
2017-07-05 13:51:27 +02:00
if ( safe . PrepareSlowWrite ( ) ) {
for ( int i = 0 ; i < 4 ; i + + ) {
2017-07-05 15:06:44 +02:00
MOVSS ( MIPSSTATE_VAR ( temp ) , fpr . VX ( vregs [ i ] ) ) ;
safe . DoSlowWrite ( safeMemFuncs . writeU32 , MIPSSTATE_VAR ( temp ) , i * 4 ) ;
2013-01-26 10:07:05 -08:00
}
}
safe . Finish ( ) ;
2013-01-25 19:50:30 +01:00
gpr . UnlockAll ( ) ;
2013-07-04 15:46:07 -07:00
fpr . ReleaseSpillLocks ( ) ;
2013-01-25 19:50:30 +01:00
}
break ;
default :
DISABLE ;
break ;
}
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VVectorInit ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-02-20 00:03:47 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2014-11-25 00:52:39 +01:00
VectorSize sz = GetVecSize ( op ) ;
int type = ( op > > 16 ) & 0xF ;
u8 dregs [ 4 ] ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-26 22:30:06 +01:00
if ( fpr . TryMapRegsVS ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ) {
if ( type = = 6 ) {
2014-11-27 00:07:17 -08:00
XORPS ( fpr . VSX ( dregs ) , fpr . VS ( dregs ) ) ;
2014-11-26 22:30:06 +01:00
} else if ( type = = 7 ) {
2017-07-05 15:06:44 +02:00
if ( RipAccessible ( & oneOneOneOne ) ) {
2017-07-05 15:59:17 +02:00
MOVAPS ( fpr . VSX ( dregs ) , M ( & oneOneOneOne ) ) ; // rip accessible
2017-07-05 15:06:44 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , MatR ( TEMPREG ) ) ;
}
2014-11-26 22:30:06 +01:00
} else {
DISABLE ;
}
2014-11-25 00:52:39 +01:00
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
switch ( type ) {
2013-03-03 20:56:22 +01:00
case 6 : // v=zeros; break; //vzero
2014-11-16 16:46:04 +01:00
XORPS ( XMM0 , R ( XMM0 ) ) ;
2013-03-03 20:56:22 +01:00
break ;
case 7 : // v=ones; break; //vone
2017-07-05 15:06:44 +02:00
if ( RipAccessible ( & one ) ) {
2017-07-05 15:59:17 +02:00
MOVSS ( XMM0 , M ( & one ) ) ; // rip accessible
2017-07-05 15:06:44 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( XMM0 , MatR ( TEMPREG ) ) ;
}
2013-03-03 20:56:22 +01:00
break ;
default :
DISABLE ;
break ;
}
2013-02-20 00:03:47 +01:00
int n = GetNumVectorElements ( sz ) ;
fpr . MapRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
for ( int i = 0 ; i < n ; + + i )
2013-03-03 12:19:06 -08:00
MOVSS ( fpr . VX ( dregs [ i ] ) , R ( XMM0 ) ) ;
2013-02-20 00:03:47 +01:00
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VIdt ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2014-02-21 09:47:12 -08:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2013-07-28 16:13:19 +02:00
int vd = _VD ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
2014-11-26 22:30:06 +01:00
2013-07-28 16:13:19 +02:00
u8 dregs [ 4 ] ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-27 00:07:17 -08:00
if ( fpr . TryMapRegsVS ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ) {
int row = vd & ( n - 1 ) ;
2017-07-07 14:50:50 +02:00
if ( RipAccessible ( identityMatrix ) ) {
MOVAPS ( fpr . VSX ( dregs ) , M ( identityMatrix [ row ] ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & identityMatrix [ row ] ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , MatR ( TEMPREG ) ) ;
}
2014-11-26 22:30:06 +01:00
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
XORPS ( XMM0 , R ( XMM0 ) ) ;
2017-07-05 15:06:44 +02:00
if ( RipAccessible ( & one ) ) {
2017-07-05 15:59:17 +02:00
MOVSS ( XMM1 , M ( & one ) ) ; // rip accessible
2017-07-05 15:06:44 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( XMM1 , MatR ( TEMPREG ) ) ;
}
2013-07-28 16:13:19 +02:00
fpr . MapRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2017-07-05 13:51:27 +02:00
switch ( sz ) {
2013-07-28 16:13:19 +02:00
case V_Pair :
MOVSS ( fpr . VX ( dregs [ 0 ] ) , R ( ( vd & 1 ) = = 0 ? XMM1 : XMM0 ) ) ;
MOVSS ( fpr . VX ( dregs [ 1 ] ) , R ( ( vd & 1 ) = = 1 ? XMM1 : XMM0 ) ) ;
break ;
case V_Quad :
MOVSS ( fpr . VX ( dregs [ 0 ] ) , R ( ( vd & 3 ) = = 0 ? XMM1 : XMM0 ) ) ;
MOVSS ( fpr . VX ( dregs [ 1 ] ) , R ( ( vd & 3 ) = = 1 ? XMM1 : XMM0 ) ) ;
MOVSS ( fpr . VX ( dregs [ 2 ] ) , R ( ( vd & 3 ) = = 2 ? XMM1 : XMM0 ) ) ;
MOVSS ( fpr . VX ( dregs [ 3 ] ) , R ( ( vd & 3 ) = = 3 ? XMM1 : XMM0 ) ) ;
break ;
default :
2020-07-19 17:47:02 +02:00
_dbg_assert_msg_ ( false , " Trying to interpret instruction that can't be interpreted " ) ;
2013-07-28 16:13:19 +02:00
break ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VDot ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-02-14 23:47:03 -08:00
2013-02-20 00:03:47 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2013-01-26 01:33:32 +01:00
VectorSize sz = GetVecSize ( op ) ;
2013-02-20 00:03:47 +01:00
int n = GetNumVectorElements ( sz ) ;
2014-11-26 23:47:18 +01:00
2013-01-26 01:33:32 +01:00
// TODO: Force read one of them into regs? probably not.
2013-02-17 15:21:51 -08:00
u8 sregs [ 4 ] , tregs [ 4 ] , dregs [ 1 ] ;
2013-02-20 00:03:47 +01:00
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixT ( tregs , sz , _VT ) ;
GetVectorRegsPrefixD ( dregs , V_Single , _VD ) ;
2013-01-26 01:33:32 +01:00
2014-11-26 23:47:18 +01:00
// With SSE2, these won't really give any performance benefit on their own, but may reduce
// conversion costs from/to SIMD form. However, the SSE4.1 DPPS may be worth it.
// Benchmarking will have to decide whether to enable this on < SSE4.1. Also a HADDPS version
// for SSE3 could be written.
if ( fpr . TryMapDirtyInInVS ( dregs , V_Single , sregs , sz , tregs , sz ) ) {
switch ( sz ) {
case V_Pair :
if ( cpu_info . bSSE4_1 ) {
2014-11-27 23:33:03 +01:00
if ( fpr . VSX ( dregs ) ! = fpr . VSX ( sregs ) & & fpr . VSX ( dregs ) ! = fpr . VSX ( tregs ) ) {
2014-11-27 23:05:13 +01:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
DPPS ( fpr . VSX ( dregs ) , fpr . VS ( tregs ) , 0x31 ) ;
} else {
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
DPPS ( XMM0 , fpr . VS ( tregs ) , 0x31 ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
}
2014-11-26 23:47:18 +01:00
} else {
2014-11-27 23:05:13 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2014-11-27 00:07:17 -08:00
MULPS ( XMM0 , fpr . VS ( tregs ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( R ( XMM1 ) , XMM0 ) ;
2014-11-26 23:47:18 +01:00
SHUFPS ( XMM1 , R ( XMM0 ) , _MM_SHUFFLE ( 1 , 1 , 1 , 1 ) ) ;
ADDPS ( XMM1 , R ( XMM0 ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( fpr . VS ( dregs ) , XMM1 ) ;
2014-11-26 23:47:18 +01:00
}
break ;
case V_Triple :
if ( cpu_info . bSSE4_1 ) {
2014-11-27 23:33:03 +01:00
if ( fpr . VSX ( dregs ) ! = fpr . VSX ( sregs ) & & fpr . VSX ( dregs ) ! = fpr . VSX ( tregs ) ) {
2014-11-27 23:05:13 +01:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
DPPS ( fpr . VSX ( dregs ) , fpr . VS ( tregs ) , 0x71 ) ;
} else {
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
DPPS ( XMM0 , fpr . VS ( tregs ) , 0x71 ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
}
2014-11-26 23:47:18 +01:00
} else {
2014-11-27 23:05:13 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2014-11-27 00:07:17 -08:00
MULPS ( XMM0 , fpr . VS ( tregs ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( R ( XMM1 ) , XMM0 ) ;
2014-11-26 23:47:18 +01:00
SHUFPS ( XMM1 , R ( XMM0 ) , _MM_SHUFFLE ( 3 , 2 , 1 , 1 ) ) ;
ADDSS ( XMM1 , R ( XMM0 ) ) ;
SHUFPS ( XMM0 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 2 , 2 , 2 ) ) ;
ADDSS ( XMM1 , R ( XMM0 ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( fpr . VS ( dregs ) , XMM1 ) ;
2014-11-26 23:47:18 +01:00
}
break ;
case V_Quad :
if ( cpu_info . bSSE4_1 ) {
2014-11-27 23:33:03 +01:00
if ( fpr . VSX ( dregs ) ! = fpr . VSX ( sregs ) & & fpr . VSX ( dregs ) ! = fpr . VSX ( tregs ) ) {
2014-11-27 23:05:13 +01:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
DPPS ( fpr . VSX ( dregs ) , fpr . VS ( tregs ) , 0xF1 ) ;
} else {
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
DPPS ( XMM0 , fpr . VS ( tregs ) , 0xF1 ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
}
2014-11-28 00:18:19 +01:00
} /* else if (cpu_info.bSSE3) { // This is slower than the SSE2 solution on my Ivy!
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
MOVAPS ( XMM1 , fpr . VS ( tregs ) ) ;
HADDPS ( XMM0 , R ( XMM1 ) ) ;
HADDPS ( XMM0 , R ( XMM0 ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
} */ else {
2014-11-27 23:05:13 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
MOVAPS ( XMM1 , fpr . VS ( tregs ) ) ;
2014-11-26 23:47:18 +01:00
MULPS ( XMM0 , R ( XMM1 ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( XMM1 , R ( XMM0 ) ) ;
2014-11-26 23:47:18 +01:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 2 , 3 , 0 , 1 ) ) ;
ADDPS ( XMM0 , R ( XMM1 ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( XMM1 , R ( XMM0 ) ) ;
2014-11-26 23:47:18 +01:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 0 , 1 , 2 , 3 ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
2014-11-27 23:05:13 +01:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
2014-11-26 23:47:18 +01:00
}
2014-12-20 09:10:09 -08:00
break ;
default :
DISABLE ;
2014-11-26 23:47:18 +01:00
}
ApplyPrefixD ( dregs , V_Single ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
2014-11-26 23:18:27 -08:00
fpr . SimpleRegsV ( dregs , V_Single , MAP_DIRTY | MAP_NOINIT ) ;
2014-11-10 21:58:08 -08:00
2013-02-16 10:19:05 -08:00
X64Reg tempxreg = XMM0 ;
2014-11-10 21:58:08 -08:00
if ( IsOverlapSafe ( dregs [ 0 ] , 0 , n , sregs , n , tregs ) ) {
2014-11-26 23:18:27 -08:00
fpr . MapRegsV ( dregs , V_Single , MAP_DIRTY | MAP_NOINIT ) ;
2013-02-16 10:19:05 -08:00
tempxreg = fpr . VX ( dregs [ 0 ] ) ;
}
2013-02-16 21:26:32 -08:00
// Need to start with +0.0f so it doesn't result in -0.0f.
2013-07-28 16:13:19 +02:00
MOVSS ( tempxreg , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( tempxreg , fpr . V ( tregs [ 0 ] ) ) ;
for ( int i = 1 ; i < n ; i + + )
2013-01-26 01:33:32 +01:00
{
// sum += s[i]*t[i];
MOVSS ( XMM1 , fpr . V ( sregs [ i ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ i ] ) ) ;
2013-02-16 10:19:05 -08:00
ADDSS ( tempxreg , R ( XMM1 ) ) ;
2013-01-26 01:33:32 +01:00
}
2013-07-28 16:22:40 +02:00
if ( ! fpr . V ( dregs [ 0 ] ) . IsSimpleReg ( tempxreg ) ) {
2014-11-26 23:18:27 -08:00
fpr . MapRegsV ( dregs , V_Single , MAP_DIRTY | MAP_NOINIT ) ;
2013-07-28 16:22:40 +02:00
MOVSS ( fpr . V ( dregs [ 0 ] ) , tempxreg ) ;
}
ApplyPrefixD ( dregs , V_Single ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VHdp ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-07-28 16:22:40 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 sregs [ 4 ] , tregs [ 4 ] , dregs [ 1 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixT ( tregs , sz , _VT ) ;
GetVectorRegsPrefixD ( dregs , V_Single , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
2014-11-26 23:18:27 -08:00
fpr . SimpleRegsV ( dregs , V_Single , MAP_DIRTY | MAP_NOINIT ) ;
2014-11-10 21:58:08 -08:00
2013-07-28 16:22:40 +02:00
X64Reg tempxreg = XMM0 ;
2017-07-05 13:51:27 +02:00
if ( IsOverlapSafe ( dregs [ 0 ] , 0 , n , sregs , n , tregs ) ) {
2014-11-26 23:18:27 -08:00
fpr . MapRegsV ( dregs , V_Single , MAP_DIRTY | MAP_NOINIT ) ;
2013-07-28 16:22:40 +02:00
tempxreg = fpr . VX ( dregs [ 0 ] ) ;
}
// Need to start with +0.0f so it doesn't result in -0.0f.
MOVSS ( tempxreg , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( tempxreg , fpr . V ( tregs [ 0 ] ) ) ;
2017-07-05 13:51:27 +02:00
for ( int i = 1 ; i < n ; i + + ) {
2013-07-28 16:22:40 +02:00
// sum += (i == n-1) ? t[i] : s[i]*t[i];
if ( i = = n - 1 ) {
ADDSS ( tempxreg , fpr . V ( tregs [ i ] ) ) ;
} else {
MOVSS ( XMM1 , fpr . V ( sregs [ i ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ i ] ) ) ;
ADDSS ( tempxreg , R ( XMM1 ) ) ;
}
}
if ( ! fpr . V ( dregs [ 0 ] ) . IsSimpleReg ( tempxreg ) ) {
2014-11-26 23:18:27 -08:00
fpr . MapRegsV ( dregs , V_Single , MAP_DIRTY | MAP_NOINIT ) ;
2013-02-16 21:26:32 -08:00
MOVSS ( fpr . V ( dregs [ 0 ] ) , tempxreg ) ;
2013-02-16 10:19:05 -08:00
}
2013-01-26 01:33:32 +01:00
2013-02-17 23:15:16 -08:00
ApplyPrefixD ( dregs , V_Single ) ;
2013-01-26 01:33:32 +01:00
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VCrossQuat ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-08-01 00:14:45 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
u8 sregs [ 4 ] , tregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegs ( sregs , sz , _VS ) ;
GetVectorRegs ( tregs , sz , _VT ) ;
GetVectorRegs ( dregs , sz , _VD ) ;
if ( sz = = V_Triple ) {
// Cross product vcrsp.t
2014-11-27 00:18:19 +01:00
if ( fpr . TryMapDirtyInInVS ( dregs , sz , sregs , sz , tregs , sz ) ) {
2014-11-27 00:07:17 -08:00
MOVAPS ( XMM0 , fpr . VS ( tregs ) ) ;
MOVAPS ( XMM1 , fpr . VS ( sregs ) ) ;
2014-11-27 00:18:19 +01:00
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 3 , 0 , 2 , 1 ) ) ;
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 0 , 2 , 1 ) ) ;
2014-11-27 00:07:17 -08:00
MULPS ( XMM0 , fpr . VS ( sregs ) ) ;
MULPS ( XMM1 , fpr . VS ( tregs ) ) ;
2014-11-27 00:18:19 +01:00
SUBPS ( XMM0 , R ( XMM1 ) ) ;
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 3 , 0 , 2 , 1 ) ) ;
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VS ( dregs ) , XMM0 ) ;
2014-11-27 00:18:19 +01:00
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2014-11-28 01:06:32 +01:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-08-01 00:14:45 +02:00
fpr . MapRegsV ( sregs , sz , 0 ) ;
// Compute X
MOVSS ( XMM0 , fpr . V ( sregs [ 1 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 2 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 2 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 1 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 0 ] ) , XMM0 ) ;
// Compute Y
MOVSS ( XMM0 , fpr . V ( sregs [ 2 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 0 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 2 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 1 ] ) , XMM0 ) ;
// Compute Z
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 1 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 1 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 0 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 2 ] ) , XMM0 ) ;
} else if ( sz = = V_Quad ) {
2014-11-28 01:06:32 +01:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2014-11-16 18:37:16 +01:00
// Quaternion product vqmul.q
2013-08-01 00:14:45 +02:00
fpr . MapRegsV ( sregs , sz , 0 ) ;
// Compute X
2014-11-16 18:37:16 +01:00
// d[0] = s[0] * t[3] + s[1] * t[2] - s[2] * t[1] + s[3] * t[0];
2013-08-01 00:14:45 +02:00
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 3 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 1 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 2 ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 2 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 1 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 3 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 0 ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 0 ] ) , XMM0 ) ;
// Compute Y
2014-11-16 18:37:16 +01:00
//d[1] = s[1] * t[3] + s[2] * t[0] + s[3] * t[1] - s[0] * t[2];
2013-08-01 00:14:45 +02:00
MOVSS ( XMM0 , fpr . V ( sregs [ 1 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 3 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 2 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 0 ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 3 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 1 ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 2 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 1 ] ) , XMM0 ) ;
// Compute Z
2014-11-16 18:37:16 +01:00
//d[2] = s[0] * t[1] - s[1] * t[0] + s[2] * t[3] + s[3] * t[2];
2013-08-01 00:14:45 +02:00
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 1 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 1 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 0 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 2 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 3 ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 3 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 2 ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 2 ] ) , XMM0 ) ;
// Compute W
2014-11-16 18:37:16 +01:00
//d[3] = -s[0] * t[0] - s[1] * t[1] - s[2] * t[2] + s[3] * t[3];
2013-08-01 00:14:45 +02:00
MOVSS ( XMM0 , fpr . V ( sregs [ 3 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 3 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 1 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 1 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 2 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 2 ] ) ) ;
2014-11-16 18:37:16 +01:00
SUBSS ( XMM0 , R ( XMM1 ) ) ;
2013-08-01 00:14:45 +02:00
MOVSS ( XMM1 , fpr . V ( sregs [ 0 ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ 0 ] ) ) ;
SUBSS ( XMM0 , R ( XMM1 ) ) ;
MOVSS ( fpr . V ( dregs [ 3 ] ) , XMM0 ) ;
}
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vcmov ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_COMP ) ;
2013-08-06 13:28:37 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
int tf = ( op > > 19 ) & 1 ;
int imm3 = ( op > > 16 ) & 7 ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
2013-08-06 13:28:37 +02:00
for ( int i = 0 ; i < n ; + + i ) {
// Simplification: Disable if overlap unsafe
if ( ! IsOverlapSafeAllowS ( dregs [ i ] , i , n , sregs ) ) {
DISABLE ;
}
}
if ( imm3 < 6 ) {
2014-06-27 22:30:45 -07:00
gpr . MapReg ( MIPS_REG_VFPUCC , true , false ) ;
2013-08-06 13:28:37 +02:00
fpr . MapRegsV ( dregs , sz , MAP_DIRTY ) ;
2014-06-27 22:30:45 -07:00
// Test one bit of CC. This bit decides whether none or all subregisters are copied.
TEST ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , Imm32 ( 1 < < imm3 ) ) ;
2013-08-06 13:28:37 +02:00
FixupBranch skip = J_CC ( tf ? CC_NZ : CC_Z , true ) ;
for ( int i = 0 ; i < n ; i + + ) {
MOVSS ( fpr . VX ( dregs [ i ] ) , fpr . V ( sregs [ i ] ) ) ;
}
SetJumpTarget ( skip ) ;
} else {
2014-06-27 22:30:45 -07:00
gpr . MapReg ( MIPS_REG_VFPUCC , true , false ) ;
2013-08-06 13:28:37 +02:00
fpr . MapRegsV ( dregs , sz , MAP_DIRTY ) ;
2014-06-27 22:30:45 -07:00
// Look at the bottom four bits of CC to individually decide if the subregisters should be copied.
2013-08-06 13:28:37 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2014-06-27 22:30:45 -07:00
TEST ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , Imm32 ( 1 < < i ) ) ;
2013-08-06 13:28:37 +02:00
FixupBranch skip = J_CC ( tf ? CC_NZ : CC_Z , true ) ;
MOVSS ( fpr . VX ( dregs [ i ] ) , fpr . V ( sregs [ i ] ) ) ;
SetJumpTarget ( skip ) ;
}
}
2013-12-10 13:06:57 +01:00
ApplyPrefixD ( dregs , sz ) ;
2013-08-06 13:28:37 +02:00
fpr . ReleaseSpillLocks ( ) ;
}
2015-03-29 12:23:47 -07:00
static s32 DoVminSS ( s32 treg ) {
2017-07-07 11:28:49 +02:00
s32 sreg = currentMIPS - > temp ;
2015-03-29 12:23:47 -07:00
// If both are negative, we flip the comparison (not two's compliment.)
if ( sreg < 0 & & treg < 0 ) {
// If at least one side is NAN, we take the highest mantissa bits.
return treg < sreg ? sreg : treg ;
} else {
// Otherwise, we take the lowest value (negative or lowest mantissa.)
return treg > sreg ? sreg : treg ;
}
}
static s32 DoVmaxSS ( s32 treg ) {
2017-07-07 11:28:49 +02:00
s32 sreg = currentMIPS - > temp ;
2015-03-29 12:23:47 -07:00
// This is the same logic as vmin, just reversed.
if ( sreg < 0 & & treg < 0 ) {
return treg < sreg ? treg : sreg ;
} else {
return treg > sreg ? treg : sreg ;
}
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VecDo3 ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-02-15 00:29:40 -08:00
2013-02-20 00:03:47 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2013-02-15 00:29:40 -08:00
2013-08-10 18:32:48 +02:00
// Check that we can support the ops, and prepare temporary values for ops that need it.
2015-03-29 12:23:47 -07:00
bool allowSIMD = true ;
2013-08-10 18:32:48 +02:00
switch ( op > > 26 ) {
2013-02-15 00:29:40 -08:00
case 24 : //VFPU0
2013-08-10 18:32:48 +02:00
switch ( ( op > > 23 ) & 7 ) {
2013-02-15 00:29:40 -08:00
case 0 : // d[i] = s[i] + t[i]; break; //vadd
case 1 : // d[i] = s[i] - t[i]; break; //vsub
case 7 : // d[i] = s[i] / t[i]; break; //vdiv
break ;
2013-08-10 18:32:48 +02:00
default :
DISABLE ;
2013-02-15 00:29:40 -08:00
}
break ;
case 25 : //VFPU1
2013-08-10 18:32:48 +02:00
switch ( ( op > > 23 ) & 7 ) {
2013-02-15 00:29:40 -08:00
case 0 : // d[i] = s[i] * t[i]; break; //vmul
break ;
2013-08-10 18:32:48 +02:00
default :
DISABLE ;
2013-02-15 00:29:40 -08:00
}
break ;
2013-04-27 20:52:42 +02:00
case 27 : //VFPU3
2013-08-10 23:32:12 +02:00
switch ( ( op > > 23 ) & 7 ) {
2013-04-27 20:52:42 +02:00
case 2 : // vmin
case 3 : // vmax
2015-03-29 12:23:47 -07:00
allowSIMD = false ;
2013-04-27 20:52:42 +02:00
break ;
2013-08-10 18:32:48 +02:00
case 6 : // vsge
case 7 : // vslt
break ;
default :
DISABLE ;
2013-04-27 20:52:42 +02:00
}
break ;
default :
2013-08-10 18:32:48 +02:00
DISABLE ;
2013-04-27 20:52:42 +02:00
break ;
2013-02-15 00:29:40 -08:00
}
2013-02-20 00:03:47 +01:00
VectorSize sz = GetVecSize ( op ) ;
2013-02-15 00:29:40 -08:00
int n = GetNumVectorElements ( sz ) ;
2013-02-16 03:15:22 -08:00
2013-02-20 00:03:47 +01:00
u8 sregs [ 4 ] , tregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixT ( tregs , sz , _VT ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2015-03-29 12:23:47 -07:00
if ( allowSIMD & & fpr . TryMapDirtyInInVS ( dregs , sz , sregs , sz , tregs , sz ) ) {
2014-11-14 23:51:48 -08:00
void ( XEmitter : : * opFunc ) ( X64Reg , OpArg ) = nullptr ;
bool symmetric = false ;
switch ( op > > 26 ) {
case 24 : //VFPU0
switch ( ( op > > 23 ) & 7 ) {
case 0 : // d[i] = s[i] + t[i]; break; //vadd
opFunc = & XEmitter : : ADDPS ;
symmetric = true ;
break ;
case 1 : // d[i] = s[i] - t[i]; break; //vsub
opFunc = & XEmitter : : SUBPS ;
break ;
case 7 : // d[i] = s[i] / t[i]; break; //vdiv
opFunc = & XEmitter : : DIVPS ;
break ;
}
break ;
case 25 : //VFPU1
switch ( ( op > > 23 ) & 7 )
{
case 0 : // d[i] = s[i] * t[i]; break; //vmul
opFunc = & XEmitter : : MULPS ;
symmetric = true ;
break ;
}
break ;
case 27 : //VFPU3
switch ( ( op > > 23 ) & 7 )
{
case 2 : // vmin
2015-03-29 12:23:47 -07:00
// TODO: Mishandles NaN. Disabled for now.
2014-11-27 00:07:17 -08:00
MOVAPS ( XMM1 , fpr . VS ( sregs ) ) ;
MINPS ( XMM1 , fpr . VS ( tregs ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM1 ) ) ;
2014-11-14 23:51:48 -08:00
break ;
case 3 : // vmax
2015-03-29 12:23:47 -07:00
// TODO: Mishandles NaN. Disabled for now.
2014-11-27 00:07:17 -08:00
MOVAPS ( XMM1 , fpr . VS ( sregs ) ) ;
MAXPS ( XMM1 , fpr . VS ( tregs ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM1 ) ) ;
2014-11-14 23:51:48 -08:00
break ;
case 6 : // vsge
2015-03-29 12:23:47 -07:00
MOVAPS ( XMM0 , fpr . VS ( tregs ) ) ;
2014-11-27 00:07:17 -08:00
MOVAPS ( XMM1 , fpr . VS ( sregs ) ) ;
2015-03-29 12:23:47 -07:00
CMPPS ( XMM0 , R ( XMM1 ) , CMP_ORD ) ;
2014-11-27 00:07:17 -08:00
CMPPS ( XMM1 , fpr . VS ( tregs ) , CMP_NLT ) ;
2015-03-29 12:23:47 -07:00
ANDPS ( XMM1 , R ( XMM0 ) ) ;
2017-07-05 14:10:36 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
ANDPS ( XMM1 , MatR ( TEMPREG ) ) ;
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM1 ) ) ;
2014-11-14 23:51:48 -08:00
break ;
case 7 : // vslt
2014-11-27 00:07:17 -08:00
MOVAPS ( XMM1 , fpr . VS ( sregs ) ) ;
CMPPS ( XMM1 , fpr . VS ( tregs ) , CMP_LT ) ;
2017-07-05 14:10:36 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
ANDPS ( XMM1 , MatR ( TEMPREG ) ) ;
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM1 ) ) ;
2014-11-14 23:51:48 -08:00
break ;
}
break ;
}
if ( opFunc ! = nullptr ) {
2014-11-27 00:07:17 -08:00
if ( fpr . VSX ( dregs ) ! = fpr . VSX ( tregs ) ) {
if ( fpr . VSX ( dregs ) ! = fpr . VSX ( sregs ) ) {
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
2014-11-14 23:51:48 -08:00
}
2014-11-27 00:07:17 -08:00
( this - > * opFunc ) ( fpr . VSX ( dregs ) , fpr . VS ( tregs ) ) ;
2014-11-14 23:51:48 -08:00
} else if ( symmetric ) {
// We already know d = t.
2014-11-27 00:07:17 -08:00
( this - > * opFunc ) ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
2014-11-14 23:51:48 -08:00
} else {
2014-11-27 00:07:17 -08:00
MOVAPS ( XMM1 , fpr . VS ( sregs ) ) ;
( this - > * opFunc ) ( XMM1 , fpr . VS ( tregs ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM1 ) ) ;
2014-11-14 23:51:48 -08:00
}
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-02-16 03:15:22 -08:00
X64Reg tempxregs [ 4 ] ;
for ( int i = 0 ; i < n ; + + i )
{
2013-02-16 21:26:32 -08:00
if ( ! IsOverlapSafeAllowS ( dregs [ i ] , i , n , sregs , n , tregs ) )
2013-02-16 03:15:22 -08:00
{
// On 32-bit we only have 6 xregs for mips regs, use XMM0/XMM1 if possible.
2015-03-29 12:23:47 -07:00
// But for vmin/vmax/vsge, we need XMM0/XMM1, so avoid.
if ( i < 2 & & ( op > > 26 ) ! = 27 )
2013-02-16 03:15:22 -08:00
tempxregs [ i ] = ( X64Reg ) ( XMM0 + i ) ;
else
{
2013-02-17 23:15:16 -08:00
int reg = fpr . GetTempV ( ) ;
fpr . MapRegV ( reg , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SpillLockV ( reg ) ;
tempxregs [ i ] = fpr . VX ( reg ) ;
2013-02-16 03:15:22 -08:00
}
}
else
{
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( dregs [ i ] , dregs [ i ] = = sregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
2013-02-16 03:15:22 -08:00
fpr . SpillLockV ( dregs [ i ] ) ;
tempxregs [ i ] = fpr . VX ( dregs [ i ] ) ;
}
}
2013-02-15 00:29:40 -08:00
for ( int i = 0 ; i < n ; + + i )
2013-02-16 10:19:05 -08:00
{
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
}
2013-04-27 20:52:42 +02:00
2013-08-10 18:32:48 +02:00
for ( int i = 0 ; i < n ; + + i ) {
switch ( op > > 26 ) {
case 24 : //VFPU0
switch ( ( op > > 23 ) & 7 ) {
case 0 : // d[i] = s[i] + t[i]; break; //vadd
ADDSS ( tempxregs [ i ] , fpr . V ( tregs [ i ] ) ) ;
break ;
case 1 : // d[i] = s[i] - t[i]; break; //vsub
SUBSS ( tempxregs [ i ] , fpr . V ( tregs [ i ] ) ) ;
break ;
case 7 : // d[i] = s[i] / t[i]; break; //vdiv
DIVSS ( tempxregs [ i ] , fpr . V ( tregs [ i ] ) ) ;
break ;
}
break ;
case 25 : //VFPU1
switch ( ( op > > 23 ) & 7 )
{
case 0 : // d[i] = s[i] * t[i]; break; //vmul
MULSS ( tempxregs [ i ] , fpr . V ( tregs [ i ] ) ) ;
break ;
}
break ;
case 27 : //VFPU3
2013-08-10 23:32:12 +02:00
switch ( ( op > > 23 ) & 7 )
2013-08-10 18:32:48 +02:00
{
case 2 : // vmin
2015-03-29 12:23:47 -07:00
{
MOVSS ( XMM0 , fpr . V ( tregs [ i ] ) ) ;
UCOMISS ( tempxregs [ i ] , R ( XMM0 ) ) ;
FixupBranch skip = J_CC ( CC_NP , true ) ;
2017-07-07 11:28:49 +02:00
MOVSS ( MIPSSTATE_VAR ( temp ) , tempxregs [ i ] ) ;
2015-03-29 12:23:47 -07:00
MOVD_xmm ( R ( EAX ) , XMM0 ) ;
CallProtectedFunction ( & DoVminSS , R ( EAX ) ) ;
MOVD_xmm ( tempxregs [ i ] , R ( EAX ) ) ;
FixupBranch finish = J ( ) ;
SetJumpTarget ( skip ) ;
MINSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
SetJumpTarget ( finish ) ;
}
2013-08-10 18:32:48 +02:00
break ;
case 3 : // vmax
2015-03-29 12:23:47 -07:00
{
MOVSS ( XMM0 , fpr . V ( tregs [ i ] ) ) ;
UCOMISS ( tempxregs [ i ] , R ( XMM0 ) ) ;
FixupBranch skip = J_CC ( CC_NP , true ) ;
2017-07-07 11:28:49 +02:00
MOVSS ( MIPSSTATE_VAR ( temp ) , tempxregs [ i ] ) ;
2015-03-29 12:23:47 -07:00
MOVD_xmm ( R ( EAX ) , XMM0 ) ;
CallProtectedFunction ( & DoVmaxSS , R ( EAX ) ) ;
MOVD_xmm ( tempxregs [ i ] , R ( EAX ) ) ;
FixupBranch finish = J ( ) ;
SetJumpTarget ( skip ) ;
MAXSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
SetJumpTarget ( finish ) ;
}
2013-08-10 18:32:48 +02:00
break ;
case 6 : // vsge
2015-03-29 12:23:47 -07:00
// We can't just reverse, because of 0/-0.
MOVSS ( XMM0 , fpr . V ( tregs [ i ] ) ) ;
MOVSS ( XMM1 , R ( tempxregs [ i ] ) ) ;
CMPORDSS ( XMM1 , R ( XMM0 ) ) ;
CMPNLTSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
ANDPS ( tempxregs [ i ] , R ( XMM1 ) ) ;
2017-07-05 14:10:36 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
ANDPS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
2013-08-10 18:32:48 +02:00
break ;
case 7 : // vslt
CMPLTSS ( tempxregs [ i ] , fpr . V ( tregs [ i ] ) ) ;
2017-07-05 14:10:36 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
ANDPS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
2013-08-10 18:32:48 +02:00
break ;
}
break ;
}
}
2013-04-27 20:52:42 +02:00
2013-02-15 00:29:40 -08:00
for ( int i = 0 ; i < n ; + + i )
2013-02-16 03:15:22 -08:00
{
2013-02-16 10:19:05 -08:00
if ( ! fpr . V ( dregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
2013-02-16 03:15:22 -08:00
MOVSS ( fpr . V ( dregs [ i ] ) , tempxregs [ i ] ) ;
}
2013-02-15 00:29:40 -08:00
2013-02-18 00:09:19 -08:00
ApplyPrefixD ( dregs , sz ) ;
2013-02-15 00:29:40 -08:00
fpr . ReleaseSpillLocks ( ) ;
}
2017-08-31 01:14:51 +02:00
alignas ( 16 ) static const u32 vcmpMask [ 4 ] [ 4 ] = {
2014-11-09 19:24:37 -08:00
{ 0x00000031 , 0x00000000 , 0x00000000 , 0x00000000 } ,
2014-11-10 22:26:41 -08:00
{ 0x00000011 , 0x00000012 , 0x00000000 , 0x00000000 } ,
{ 0x00000011 , 0x00000012 , 0x00000014 , 0x00000000 } ,
{ 0x00000011 , 0x00000012 , 0x00000014 , 0x00000018 } ,
2014-11-09 19:24:37 -08:00
} ;
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vcmp ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_COMP ) ;
2013-07-31 22:29:16 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
VCondition cond = ( VCondition ) ( op & 0xF ) ;
u8 sregs [ 4 ] , tregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixT ( tregs , sz , _VT ) ;
// Some, we just fall back to the interpreter.
switch ( cond ) {
case VC_EI : // c = my_isinf(s[i]); break;
case VC_NI : // c = !my_isinf(s[i]); break;
DISABLE ;
2013-11-12 00:18:49 +01:00
break ;
2013-11-12 14:07:48 +01:00
case VC_ES : // c = my_isnan(s[i]) || my_isinf(s[i]); break; // Tekken Dark Resurrection
case VC_NS : // c = !my_isnan(s[i]) && !my_isinf(s[i]); break;
2013-11-12 00:18:49 +01:00
case VC_EN : // c = my_isnan(s[i]); break;
case VC_NN : // c = !my_isnan(s[i]); break;
2013-11-12 14:07:48 +01:00
if ( _VS ! = _VT )
DISABLE ;
break ;
2013-10-13 12:12:53 -07:00
default :
break ;
2013-07-31 22:29:16 +02:00
}
// First, let's get the trivial ones.
2013-08-08 21:03:40 +02:00
static const int true_bits [ 4 ] = { 0x31 , 0x33 , 0x37 , 0x3f } ;
if ( cond = = VC_TR ) {
2014-06-27 22:30:45 -07:00
gpr . MapReg ( MIPS_REG_VFPUCC , true , true ) ;
OR ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , Imm32 ( true_bits [ n - 1 ] ) ) ;
2013-08-08 21:03:40 +02:00
return ;
} else if ( cond = = VC_FL ) {
2014-06-27 22:30:45 -07:00
gpr . MapReg ( MIPS_REG_VFPUCC , true , true ) ;
AND ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , Imm32 ( ~ true_bits [ n - 1 ] ) ) ;
2013-08-08 21:03:40 +02:00
return ;
}
2013-07-31 22:29:16 +02:00
2014-11-09 19:24:37 -08:00
if ( n > 1 )
gpr . FlushLockX ( ECX ) ;
// Start with zero in each lane for the compare to zero.
2014-11-10 22:26:41 -08:00
if ( cond = = VC_EZ | | cond = = VC_NZ ) {
XORPS ( XMM0 , R ( XMM0 ) ) ;
if ( n > 1 ) {
XORPS ( XMM1 , R ( XMM1 ) ) ;
}
}
2014-11-09 19:24:37 -08:00
bool inverse = false ;
if ( cond = = VC_GE | | cond = = VC_GT ) {
// We flip, and we need them in regs so we don't clear the high lanes.
2014-11-10 21:58:08 -08:00
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
2014-11-09 19:24:37 -08:00
fpr . MapRegsV ( tregs , sz , 0 ) ;
} else {
2014-11-10 21:58:08 -08:00
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
2014-11-09 19:24:37 -08:00
fpr . MapRegsV ( sregs , sz , 0 ) ;
}
2013-07-31 22:29:16 +02:00
2014-11-10 22:26:41 -08:00
// We go backwards because it's more convenient to put things in the right lanes.
2013-08-08 21:03:40 +02:00
int affected_bits = ( 1 < < 4 ) | ( 1 < < 5 ) ; // 4 and 5
2014-11-10 22:26:41 -08:00
for ( int i = n - 1 ; i > = 0 ; - - i ) {
// Alternate between XMM0 and XMM1
X64Reg reg = i = = 1 | | i = = 3 ? XMM1 : XMM0 ;
if ( ( i = = 0 | | i = = 1 ) & & n > 2 ) {
// We need to swap lanes... this also puts them in the right place.
SHUFPS ( reg , R ( reg ) , _MM_SHUFFLE ( 3 , 2 , 0 , 1 ) ) ;
2014-11-09 19:24:37 -08:00
}
2013-07-31 22:29:16 +02:00
// Let's only handle the easy ones, and fall back on the interpreter for the rest.
bool compareTwo = false ;
bool compareToZero = false ;
int comparison = - 1 ;
bool flip = false ;
2013-11-12 00:18:49 +01:00
2013-07-31 22:29:16 +02:00
switch ( cond ) {
2013-11-12 14:07:48 +01:00
case VC_ES :
2014-11-10 22:15:18 -08:00
comparison = - 1 ; // We will do the compare at the end. XMM1 will have the bits.
2014-11-10 22:26:41 -08:00
MOVSS ( reg , fpr . V ( sregs [ i ] ) ) ;
2013-11-12 14:07:48 +01:00
break ;
case VC_NS :
2014-11-10 22:15:18 -08:00
comparison = - 1 ; // We will do the compare at the end. XMM1 will have the bits.
2014-11-10 22:26:41 -08:00
MOVSS ( reg , fpr . V ( sregs [ i ] ) ) ;
2014-11-09 19:24:37 -08:00
// Note that we do this all at once at the end.
inverse = true ;
2013-11-12 14:07:48 +01:00
break ;
2013-11-12 00:18:49 +01:00
case VC_EN :
comparison = CMP_UNORD ;
compareTwo = true ;
break ;
case VC_NN :
comparison = CMP_UNORD ;
compareTwo = true ;
2014-11-09 19:24:37 -08:00
// Note that we do this all at once at the end.
2013-11-12 00:18:49 +01:00
inverse = true ;
break ;
2013-07-31 22:29:16 +02:00
case VC_EQ : // c = s[i] == t[i]; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_EQ ;
2013-07-31 22:29:16 +02:00
compareTwo = true ;
break ;
case VC_LT : // c = s[i] < t[i]; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_LT ;
2013-07-31 22:29:16 +02:00
compareTwo = true ;
break ;
case VC_LE : // c = s[i] <= t[i]; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_LE ;
2013-07-31 22:29:16 +02:00
compareTwo = true ;
break ;
case VC_NE : // c = s[i] != t[i]; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_NEQ ;
2013-07-31 22:29:16 +02:00
compareTwo = true ;
break ;
case VC_GE : // c = s[i] >= t[i]; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_LE ;
2013-07-31 22:29:16 +02:00
flip = true ;
compareTwo = true ;
break ;
case VC_GT : // c = s[i] > t[i]; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_LT ;
2013-07-31 22:29:16 +02:00
flip = true ;
compareTwo = true ;
break ;
case VC_EZ : // c = s[i] == 0.0f || s[i] == -0.0f; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_EQ ;
2013-07-31 22:29:16 +02:00
compareToZero = true ;
break ;
case VC_NZ : // c = s[i] != 0; break;
2013-08-06 11:09:09 +02:00
comparison = CMP_NEQ ;
2013-07-31 22:29:16 +02:00
compareToZero = true ;
break ;
default :
DISABLE ;
}
2013-11-12 14:07:48 +01:00
if ( comparison ! = - 1 ) {
if ( compareTwo ) {
if ( ! flip ) {
2014-11-10 22:26:41 -08:00
MOVSS ( reg , fpr . V ( sregs [ i ] ) ) ;
CMPSS ( reg , fpr . V ( tregs [ i ] ) , comparison ) ;
2013-11-12 14:07:48 +01:00
} else {
2014-11-10 22:26:41 -08:00
MOVSS ( reg , fpr . V ( tregs [ i ] ) ) ;
CMPSS ( reg , fpr . V ( sregs [ i ] ) , comparison ) ;
2013-11-12 14:07:48 +01:00
}
} else if ( compareToZero ) {
2014-11-10 22:26:41 -08:00
CMPSS ( reg , fpr . V ( sregs [ i ] ) , comparison ) ;
2013-07-31 22:29:16 +02:00
}
2013-11-12 00:18:49 +01:00
}
2013-08-08 21:03:40 +02:00
2013-07-31 22:29:16 +02:00
affected_bits | = 1 < < i ;
}
2013-08-06 11:49:10 +02:00
if ( n > 1 ) {
2014-11-09 19:24:37 -08:00
XOR ( 32 , R ( ECX ) , R ( ECX ) ) ;
2014-11-10 22:26:41 -08:00
// This combines them together.
UNPCKLPS ( XMM0 , R ( XMM1 ) ) ;
// Finalize the comparison for ES/NS.
if ( cond = = VC_ES | | cond = = VC_NS ) {
2017-07-05 15:06:44 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & fourinfnan ) ) ;
ANDPS ( XMM0 , MatR ( TEMPREG ) ) ;
PCMPEQD ( XMM0 , MatR ( TEMPREG ) ) ; // Integer comparison
2014-11-10 22:26:41 -08:00
// It's inversed below for NS.
2014-11-09 19:24:37 -08:00
}
2014-11-10 22:26:41 -08:00
if ( inverse ) {
2017-07-05 15:06:44 +02:00
// The canonical way to generate a bunch of ones, see https://stackoverflow.com/questions/35085059/what-are-the-best-instruction-sequences-to-generate-vector-constants-on-the-fly
PCMPEQW ( XMM1 , R ( XMM1 ) ) ;
XORPS ( XMM0 , R ( XMM1 ) ) ;
2014-11-10 22:26:41 -08:00
}
2017-07-07 15:46:14 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & vcmpMask [ n - 1 ] ) ) ;
ANDPS ( XMM0 , MatR ( TEMPREG ) ) ;
2017-07-05 15:59:17 +02:00
MOVAPS ( MIPSSTATE_VAR ( vcmpResult ) , XMM0 ) ;
2014-11-09 19:24:37 -08:00
2017-07-05 15:59:17 +02:00
MOV ( 32 , R ( TEMPREG ) , MIPSSTATE_VAR ( vcmpResult [ 0 ] ) ) ;
2014-11-09 19:24:37 -08:00
for ( int i = 1 ; i < n ; + + i ) {
2017-07-07 12:59:23 +02:00
OR ( 32 , R ( TEMPREG ) , MIPSSTATE_VAR_ELEM32 ( vcmpResult [ 0 ] , i ) ) ;
2014-11-09 19:24:37 -08:00
}
// Aggregate the bits. Urgh, expensive. Can optimize for the case of one comparison,
// which is the most common after all.
2014-11-16 17:38:26 +01:00
CMP ( 32 , R ( TEMPREG ) , Imm8 ( affected_bits & 0x1F ) ) ;
2013-07-31 22:29:16 +02:00
SETcc ( CC_E , R ( ECX ) ) ;
SHL ( 32 , R ( ECX ) , Imm8 ( 5 ) ) ;
2014-11-16 17:38:26 +01:00
OR ( 32 , R ( TEMPREG ) , R ( ECX ) ) ;
2014-11-09 19:24:37 -08:00
} else {
2014-11-10 22:26:41 -08:00
// Finalize the comparison for ES/NS.
if ( cond = = VC_ES | | cond = = VC_NS ) {
2017-07-05 15:06:44 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & fourinfnan ) ) ;
ANDPS ( XMM0 , MatR ( TEMPREG ) ) ;
PCMPEQD ( XMM0 , MatR ( TEMPREG ) ) ; // Integer comparison
2014-11-10 22:26:41 -08:00
// It's inversed below for NS.
}
2014-11-16 17:38:26 +01:00
MOVD_xmm ( R ( TEMPREG ) , XMM0 ) ;
2014-11-09 19:24:37 -08:00
if ( inverse ) {
2014-11-16 17:38:26 +01:00
XOR ( 32 , R ( TEMPREG ) , Imm32 ( 0xFFFFFFFF ) ) ;
2014-11-09 19:24:37 -08:00
}
2014-11-16 17:38:26 +01:00
AND ( 32 , R ( TEMPREG ) , Imm32 ( 0x31 ) ) ;
2013-07-31 22:29:16 +02:00
}
2014-06-27 22:30:45 -07:00
gpr . UnlockAllX ( ) ;
gpr . MapReg ( MIPS_REG_VFPUCC , true , true ) ;
AND ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , Imm32 ( ~ affected_bits ) ) ;
2014-11-16 17:38:26 +01:00
OR ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , R ( TEMPREG ) ) ;
2013-07-31 22:29:16 +02:00
fpr . ReleaseSpillLocks ( ) ;
}
2013-04-27 21:46:00 +02:00
// There are no immediates for floating point, so we need to load these
// from RAM. Might as well have a table ready.
2013-07-31 17:26:14 +02:00
extern const float mulTableVi2f [ 32 ] = {
2013-04-27 21:46:00 +02:00
1.0f / ( 1UL < < 0 ) , 1.0f / ( 1UL < < 1 ) , 1.0f / ( 1UL < < 2 ) , 1.0f / ( 1UL < < 3 ) ,
1.0f / ( 1UL < < 4 ) , 1.0f / ( 1UL < < 5 ) , 1.0f / ( 1UL < < 6 ) , 1.0f / ( 1UL < < 7 ) ,
1.0f / ( 1UL < < 8 ) , 1.0f / ( 1UL < < 9 ) , 1.0f / ( 1UL < < 10 ) , 1.0f / ( 1UL < < 11 ) ,
1.0f / ( 1UL < < 12 ) , 1.0f / ( 1UL < < 13 ) , 1.0f / ( 1UL < < 14 ) , 1.0f / ( 1UL < < 15 ) ,
1.0f / ( 1UL < < 16 ) , 1.0f / ( 1UL < < 17 ) , 1.0f / ( 1UL < < 18 ) , 1.0f / ( 1UL < < 19 ) ,
1.0f / ( 1UL < < 20 ) , 1.0f / ( 1UL < < 21 ) , 1.0f / ( 1UL < < 22 ) , 1.0f / ( 1UL < < 23 ) ,
1.0f / ( 1UL < < 24 ) , 1.0f / ( 1UL < < 25 ) , 1.0f / ( 1UL < < 26 ) , 1.0f / ( 1UL < < 27 ) ,
1.0f / ( 1UL < < 28 ) , 1.0f / ( 1UL < < 29 ) , 1.0f / ( 1UL < < 30 ) , 1.0f / ( 1UL < < 31 ) ,
} ;
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vi2f ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-04-27 21:46:00 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
int imm = ( op > > 16 ) & 0x1f ;
2013-07-31 17:26:14 +02:00
const float * mult = & mulTableVi2f [ imm ] ;
2013-04-27 21:46:00 +02:00
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-08-06 19:08:06 +02:00
int tempregs [ 4 ] ;
for ( int i = 0 ; i < n ; + + i ) {
if ( ! IsOverlapSafe ( dregs [ i ] , i , n , sregs ) ) {
tempregs [ i ] = fpr . GetTempV ( ) ;
} else {
tempregs [ i ] = dregs [ i ] ;
}
}
2017-07-07 15:04:04 +02:00
if ( * mult ! = 1.0f ) {
if ( RipAccessible ( mult ) ) {
2017-08-29 11:45:12 +02:00
MOVSS ( XMM1 , M ( mult ) ) ; // rip accessible
2017-07-07 15:04:04 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( mult ) ) ;
MOVSS ( XMM1 , MatR ( TEMPREG ) ) ;
}
}
2013-04-27 21:46:00 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( tempregs [ i ] , sregs [ i ] = = dregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
2014-11-08 13:07:01 -08:00
if ( fpr . V ( sregs [ i ] ) . IsSimpleReg ( ) ) {
CVTDQ2PS ( fpr . VX ( tempregs [ i ] ) , fpr . V ( sregs [ i ] ) ) ;
} else {
MOVSS ( fpr . VX ( tempregs [ i ] ) , fpr . V ( sregs [ i ] ) ) ;
CVTDQ2PS ( fpr . VX ( tempregs [ i ] ) , R ( fpr . VX ( tempregs [ i ] ) ) ) ;
}
2013-08-06 15:22:19 +02:00
if ( * mult ! = 1.0f )
2014-11-08 13:07:01 -08:00
MULSS ( fpr . VX ( tempregs [ i ] ) , R ( XMM1 ) ) ;
2013-08-06 19:08:06 +02:00
}
for ( int i = 0 ; i < n ; + + i ) {
if ( dregs [ i ] ! = tempregs [ i ] ) {
fpr . MapRegV ( dregs [ i ] , MAP_DIRTY | MAP_NOINIT ) ;
MOVSS ( fpr . VX ( dregs [ i ] ) , fpr . V ( tempregs [ i ] ) ) ;
}
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-09-28 14:01:26 +02:00
// Planning for true SIMD
// Sequence for gathering sparse registers into one SIMD:
// MOVSS(XMM0, fpr.R(sregs[0]));
// MOVSS(XMM1, fpr.R(sregs[1]));
// MOVSS(XMM2, fpr.R(sregs[2]));
// MOVSS(XMM3, fpr.R(sregs[3]));
// SHUFPS(XMM0, R(XMM1), _MM_SHUFFLE(0, 0, 0, 0)); // XMM0 = S1 S1 S0 S0
// SHUFPS(XMM2, R(XMM3), _MM_SHUFFLE(0, 0, 0, 0)); // XMM2 = S3 S3 S2 S2
// SHUFPS(XMM0, R(XMM2), _MM_SHUFFLE(2, 0, 2, 0)); // XMM0 = S3 S2 S1 S0
// Some punpckwd etc would also work.
2013-10-07 22:57:44 +02:00
// Alternatively, MOVSS and three PINSRD (SSE4) with mem source.
// Why PINSRD instead of INSERTPS?
// http://software.intel.com/en-us/blogs/2009/01/07/using-sse41-for-mp3-encoding-quantization
2013-09-28 14:01:26 +02:00
// Sequence for scattering a SIMD register to sparse registers:
// (Very serial though, better methods may be possible)
// MOVSS(fpr.R(sregs[0]), XMM0);
// SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
// MOVSS(fpr.R(sregs[1]), XMM0);
// SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
// MOVSS(fpr.R(sregs[2]), XMM0);
// SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
// MOVSS(fpr.R(sregs[3]), XMM0);
2013-10-07 22:57:44 +02:00
// On SSE4 we should use EXTRACTPS.
2013-09-28 14:01:26 +02:00
// Translation of ryg's half_to_float5_SSE2
void Jit : : Comp_Vh2f ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-12-10 13:06:57 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2017-08-31 01:14:51 +02:00
# define SSE_CONST4(name, val) alignas(16) static const u32 name[4] = { (val), (val), (val), (val) }
2013-09-28 14:01:26 +02:00
SSE_CONST4 ( mask_nosign , 0x7fff ) ;
2022-10-23 01:09:29 -07:00
SSE_CONST4 ( nan_mantissa , 0x800003ff ) ;
2013-09-28 14:01:26 +02:00
SSE_CONST4 ( magic , ( 254 - 15 ) < < 23 ) ;
SSE_CONST4 ( was_infnan , 0x7bff ) ;
SSE_CONST4 ( exp_infnan , 255 < < 23 ) ;
2017-07-07 14:50:50 +02:00
2022-10-23 01:09:29 -07:00
OpArg mask_nosign_arg , nan_mantissa_arg , magic_arg , was_infnan_arg , exp_infnan_arg ;
if ( RipAccessible ( mask_nosign ) ) {
mask_nosign_arg = M ( & mask_nosign [ 0 ] ) ;
nan_mantissa_arg = M ( & nan_mantissa [ 0 ] ) ;
magic_arg = M ( & magic [ 0 ] ) ;
was_infnan_arg = M ( & was_infnan [ 0 ] ) ;
exp_infnan_arg = M ( & exp_infnan [ 0 ] ) ;
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & mask_nosign [ 0 ] ) ) ;
mask_nosign_arg = MAccessibleDisp ( TEMPREG , & mask_nosign [ 0 ] , & mask_nosign [ 0 ] ) ;
nan_mantissa_arg = MAccessibleDisp ( TEMPREG , & mask_nosign [ 0 ] , & nan_mantissa [ 0 ] ) ;
magic_arg = MAccessibleDisp ( TEMPREG , & mask_nosign [ 0 ] , & magic [ 0 ] ) ;
was_infnan_arg = MAccessibleDisp ( TEMPREG , & mask_nosign [ 0 ] , & was_infnan [ 0 ] ) ;
exp_infnan_arg = MAccessibleDisp ( TEMPREG , & mask_nosign [ 0 ] , & exp_infnan [ 0 ] ) ;
2017-07-07 14:50:50 +02:00
}
2013-09-28 14:01:26 +02:00
# undef SSE_CONST4
VectorSize sz = GetVecSize ( op ) ;
VectorSize outsize ;
switch ( sz ) {
case V_Single :
outsize = V_Pair ;
break ;
case V_Pair :
outsize = V_Quad ;
break ;
2013-09-29 01:42:30 +02:00
default :
DISABLE ;
2013-09-28 14:01:26 +02:00
}
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , outsize , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
2013-09-28 14:01:26 +02:00
// Force ourselves an extra xreg as temp space.
X64Reg tempR = fpr . GetFreeXReg ( ) ;
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
2013-11-19 14:24:56 +01:00
if ( sz ! = V_Single ) {
2013-09-28 14:01:26 +02:00
MOVSS ( XMM1 , fpr . V ( sregs [ 1 ] ) ) ;
PUNPCKLDQ ( XMM0 , R ( XMM1 ) ) ;
}
XORPS ( XMM1 , R ( XMM1 ) ) ;
PUNPCKLWD ( XMM0 , R ( XMM1 ) ) ;
// OK, 16 bits in each word.
// Let's go. Deep magic here.
MOVAPS ( XMM1 , R ( XMM0 ) ) ;
2022-10-23 01:09:29 -07:00
ANDPS ( XMM0 , mask_nosign_arg ) ; // xmm0 = expmant
2013-09-28 14:01:26 +02:00
XORPS ( XMM1 , R ( XMM0 ) ) ; // xmm1 = justsign = expmant ^ xmm0
MOVAPS ( tempR , R ( XMM0 ) ) ;
PSLLD ( XMM0 , 13 ) ;
2022-10-23 01:09:29 -07:00
MULPS ( XMM0 , magic_arg ) ; /// xmm0 = scaled
2013-09-28 14:01:26 +02:00
PSLLD ( XMM1 , 16 ) ; // xmm1 = sign
ORPS ( XMM0 , R ( XMM1 ) ) ;
2022-10-23 01:09:29 -07:00
// Now create a NAN mask, adding in the sign.
ORPS ( XMM1 , R ( tempR ) ) ; // xmm1 = sign + original mantissa.
ANDPS ( XMM1 , nan_mantissa_arg ) ; // xmm1 = original mantissa
PCMPGTD ( tempR , was_infnan_arg ) ; // xmm2 = b_wasinfnan
ORPS ( XMM1 , exp_infnan_arg ) ; // xmm1 = infnan result
ANDPS ( XMM1 , R ( tempR ) ) ; // xmm1 = infnan result OR zero if not infnan
ANDNPS ( tempR , R ( XMM0 ) ) ; // tempR = result OR zero if infnan
ORPS ( XMM1 , R ( tempR ) ) ;
2013-09-28 14:01:26 +02:00
fpr . MapRegsV ( dregs , outsize , MAP_NOINIT | MAP_DIRTY ) ;
// TODO: Could apply D-prefix in parallel here...
2022-10-23 01:09:29 -07:00
MOVSS ( fpr . V ( dregs [ 0 ] ) , XMM1 ) ;
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 3 , 2 , 1 ) ) ;
MOVSS ( fpr . V ( dregs [ 1 ] ) , XMM1 ) ;
2013-09-28 14:01:26 +02:00
if ( sz ! = V_Single ) {
2022-10-23 01:09:29 -07:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 3 , 2 , 1 ) ) ;
MOVSS ( fpr . V ( dregs [ 2 ] ) , XMM1 ) ;
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 3 , 2 , 1 ) ) ;
MOVSS ( fpr . V ( dregs [ 3 ] ) , XMM1 ) ;
2013-09-28 14:01:26 +02:00
}
ApplyPrefixD ( dregs , outsize ) ;
gpr . UnlockAllX ( ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2014-11-08 00:39:40 -08:00
// The goal is to map (reversed byte order for clarity):
// AABBCCDD -> 000000AA 000000BB 000000CC 000000DD
2017-08-31 01:14:51 +02:00
alignas ( 16 ) static s8 vc2i_shuffle [ 16 ] = { - 1 , - 1 , - 1 , 0 , - 1 , - 1 , - 1 , 1 , - 1 , - 1 , - 1 , 2 , - 1 , - 1 , - 1 , 3 } ;
2014-11-08 00:39:40 -08:00
// AABBCCDD -> AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
2017-08-31 01:14:51 +02:00
alignas ( 16 ) static s8 vuc2i_shuffle [ 16 ] = { 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 } ;
2014-11-08 00:39:40 -08:00
2013-10-07 21:59:23 +02:00
void Jit : : Comp_Vx2i ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-10-07 21:59:23 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2014-11-08 00:39:40 -08:00
int bits = ( ( op > > 16 ) & 2 ) = = 0 ? 8 : 16 ; // vuc2i/vc2i (0/1), vus2i/vs2i (2/3)
bool unsignedOp = ( ( op > > 16 ) & 1 ) = = 0 ; // vuc2i (0), vus2i (2)
2013-10-07 21:59:23 +02:00
2014-11-08 00:39:40 -08:00
// vs2i or vus2i unpack pairs of 16-bit integers into 32-bit integers, with the values
// at the top. vus2i shifts it an extra bit right afterward.
// vc2i and vuc2i unpack quads of 8-bit integers into 32-bit integers, with the values
// at the top too. vuc2i is a bit special (see below.)
2013-10-07 21:59:23 +02:00
// Let's do this similarly as h2f - we do a solution that works for both singles and pairs
// then use it for both.
VectorSize sz = GetVecSize ( op ) ;
VectorSize outsize ;
2014-11-08 00:39:40 -08:00
if ( bits = = 8 ) {
2013-10-07 21:59:23 +02:00
outsize = V_Quad ;
2014-11-08 00:39:40 -08:00
} else {
switch ( sz ) {
case V_Single :
outsize = V_Pair ;
break ;
case V_Pair :
outsize = V_Quad ;
break ;
default :
DISABLE ;
}
2013-10-07 21:59:23 +02:00
}
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , outsize , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
2014-11-08 00:39:40 -08:00
if ( bits = = 16 ) {
MOVSS ( XMM1 , fpr . V ( sregs [ 0 ] ) ) ;
if ( sz ! = V_Single ) {
MOVSS ( XMM0 , fpr . V ( sregs [ 1 ] ) ) ;
PUNPCKLDQ ( XMM1 , R ( XMM0 ) ) ;
}
// Unpack 16-bit words into 32-bit words, upper position, and we're done!
PXOR ( XMM0 , R ( XMM0 ) ) ;
PUNPCKLWD ( XMM0 , R ( XMM1 ) ) ;
} else if ( bits = = 8 ) {
if ( unsignedOp ) {
// vuc2i is a bit special. It spreads out the bits like this:
// s[0] = 0xDDCCBBAA -> d[0] = (0xAAAAAAAA >> 1), d[1] = (0xBBBBBBBB >> 1), etc.
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
2017-08-29 11:45:12 +02:00
if ( cpu_info . bSSSE3 & & RipAccessible ( vuc2i_shuffle ) ) {
2014-11-08 00:39:40 -08:00
// Not really different speed. Generates a bit less code.
2017-08-29 11:45:12 +02:00
PSHUFB ( XMM0 , M ( & vuc2i_shuffle [ 0 ] ) ) ; // rip accessible
2014-11-08 00:39:40 -08:00
} else {
// First, we change 0xDDCCBBAA to 0xDDDDCCCCBBBBAAAA.
PUNPCKLBW ( XMM0 , R ( XMM0 ) ) ;
// Now, interleave each 16 bits so they're all 32 bits wide.
PUNPCKLWD ( XMM0 , R ( XMM0 ) ) ;
}
} else {
2017-08-29 11:45:12 +02:00
if ( cpu_info . bSSSE3 & & RipAccessible ( vc2i_shuffle ) ) {
2014-11-08 00:39:40 -08:00
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
2017-07-07 14:50:50 +02:00
PSHUFB ( XMM0 , M ( & vc2i_shuffle [ 0 ] ) ) ;
2014-11-08 00:39:40 -08:00
} else {
PXOR ( XMM1 , R ( XMM1 ) ) ;
MOVSS ( XMM0 , fpr . V ( sregs [ 0 ] ) ) ;
PUNPCKLBW ( XMM1 , R ( XMM0 ) ) ;
PXOR ( XMM0 , R ( XMM0 ) ) ;
PUNPCKLWD ( XMM0 , R ( XMM1 ) ) ;
}
}
}
// At this point we have the regs in the 4 lanes.
// In the "u" mode, we need to shift it out of the sign bit.
if ( unsignedOp ) {
PSRLD ( XMM0 , 1 ) ;
2013-10-07 21:59:23 +02:00
}
2014-11-26 22:30:06 +01:00
if ( fpr . TryMapRegsVS ( dregs , outsize , MAP_NOINIT | MAP_DIRTY ) ) {
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
2014-11-26 22:30:06 +01:00
} else {
// Done! TODO: The rest of this should be possible to extract into a function.
fpr . MapRegsV ( dregs , outsize , MAP_NOINIT | MAP_DIRTY ) ;
2013-10-07 21:59:23 +02:00
2014-11-26 22:30:06 +01:00
// TODO: Could apply D-prefix in parallel here...
2013-10-07 21:59:23 +02:00
2014-11-26 22:30:06 +01:00
MOVSS ( fpr . V ( dregs [ 0 ] ) , XMM0 ) ;
2014-11-06 22:45:54 -08:00
PSRLDQ ( XMM0 , 4 ) ;
2014-11-26 22:30:06 +01:00
MOVSS ( fpr . V ( dregs [ 1 ] ) , XMM0 ) ;
if ( outsize ! = V_Pair ) {
PSRLDQ ( XMM0 , 4 ) ;
MOVSS ( fpr . V ( dregs [ 2 ] ) , XMM0 ) ;
PSRLDQ ( XMM0 , 4 ) ;
MOVSS ( fpr . V ( dregs [ 3 ] ) , XMM0 ) ;
}
2013-10-07 21:59:23 +02:00
}
ApplyPrefixD ( dregs , outsize ) ;
gpr . UnlockAllX ( ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-07 21:30:57 +02:00
extern const double mulTableVf2i [ 32 ] = {
( 1ULL < < 0 ) , ( 1ULL < < 1 ) , ( 1ULL < < 2 ) , ( 1ULL < < 3 ) ,
( 1ULL < < 4 ) , ( 1ULL < < 5 ) , ( 1ULL < < 6 ) , ( 1ULL < < 7 ) ,
( 1ULL < < 8 ) , ( 1ULL < < 9 ) , ( 1ULL < < 10 ) , ( 1ULL < < 11 ) ,
( 1ULL < < 12 ) , ( 1ULL < < 13 ) , ( 1ULL < < 14 ) , ( 1ULL < < 15 ) ,
( 1ULL < < 16 ) , ( 1ULL < < 17 ) , ( 1ULL < < 18 ) , ( 1ULL < < 19 ) ,
( 1ULL < < 20 ) , ( 1ULL < < 21 ) , ( 1ULL < < 22 ) , ( 1ULL < < 23 ) ,
( 1ULL < < 24 ) , ( 1ULL < < 25 ) , ( 1ULL < < 26 ) , ( 1ULL < < 27 ) ,
( 1ULL < < 28 ) , ( 1ULL < < 29 ) , ( 1ULL < < 30 ) , ( 1ULL < < 31 ) ,
2013-08-06 19:08:06 +02:00
} ;
2017-07-05 15:19:03 +02:00
static const double maxMinIntAsDouble [ 2 ] = { ( double ) 0x7fffffff , ( double ) ( int ) 0x80000000 } ; // that's not equal to 0x80000000
2014-11-03 23:22:08 -08:00
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vf2i ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-08-06 19:08:06 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
int imm = ( op > > 16 ) & 0x1f ;
2013-08-07 21:30:57 +02:00
const double * mult = & mulTableVf2i [ imm ] ;
2013-08-06 19:08:06 +02:00
2014-11-03 23:22:08 -08:00
int setMXCSR = - 1 ;
2016-05-15 23:35:33 +02:00
int rmode = ( op > > 21 ) & 0x1f ;
switch ( rmode ) {
2013-08-06 19:08:06 +02:00
case 17 :
break ; //z - truncate. Easy to support.
case 16 :
2014-11-03 23:22:08 -08:00
setMXCSR = 0 ;
2013-08-06 19:08:06 +02:00
break ;
2014-11-03 23:22:08 -08:00
case 18 :
setMXCSR = 2 ;
break ;
case 19 :
setMXCSR = 1 ;
break ;
}
2014-11-08 17:59:38 -08:00
// Small optimization: 0 is our default mode anyway.
if ( setMXCSR = = 0 & & ! js . hasSetRounding ) {
setMXCSR = - 1 ;
}
2014-11-03 23:22:08 -08:00
// Except for truncate, we need to update MXCSR to our preferred rounding mode.
if ( setMXCSR ! = - 1 ) {
2017-07-05 14:10:36 +02:00
STMXCSR ( MIPSSTATE_VAR ( mxcsrTemp ) ) ;
MOV ( 32 , R ( TEMPREG ) , MIPSSTATE_VAR ( mxcsrTemp ) ) ;
2014-11-16 17:38:26 +01:00
AND ( 32 , R ( TEMPREG ) , Imm32 ( ~ ( 3 < < 13 ) ) ) ;
2014-11-08 17:59:38 -08:00
if ( setMXCSR ! = 0 ) {
2014-11-16 17:38:26 +01:00
OR ( 32 , R ( TEMPREG ) , Imm32 ( setMXCSR < < 13 ) ) ;
2014-11-08 17:59:38 -08:00
}
2017-07-05 13:12:42 +02:00
MOV ( 32 , MIPSSTATE_VAR ( temp ) , R ( TEMPREG ) ) ;
LDMXCSR ( MIPSSTATE_VAR ( temp ) ) ;
2013-08-06 19:08:06 +02:00
}
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-27 23:05:13 +01:00
// Really tricky to SIMD due to double precision requirement...
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
2014-11-26 23:18:27 -08:00
fpr . SimpleRegsV ( dregs , sz , MAP_DIRTY | MAP_NOINIT ) ;
2014-11-10 21:58:08 -08:00
2013-08-07 18:07:08 +02:00
u8 tempregs [ 4 ] ;
for ( int i = 0 ; i < n ; + + i ) {
if ( ! IsOverlapSafe ( dregs [ i ] , i , n , sregs ) ) {
tempregs [ i ] = fpr . GetTempV ( ) ;
} else {
tempregs [ i ] = dregs [ i ] ;
}
}
2017-07-07 14:50:50 +02:00
if ( * mult ! = 1.0f ) {
if ( RipAccessible ( mult ) ) {
MOVSD ( XMM1 , M ( mult ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( mult ) ) ;
MOVSD ( XMM1 , MatR ( TEMPREG ) ) ;
}
}
2013-08-06 19:08:06 +02:00
2013-08-07 18:07:08 +02:00
fpr . MapRegsV ( tempregs , sz , MAP_DIRTY | MAP_NOINIT ) ;
2013-08-06 19:08:06 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2013-08-07 21:30:57 +02:00
// Need to do this in double precision to clamp correctly as float
// doesn't have enough precision to represent 0x7fffffff for example exactly.
2013-08-07 18:07:08 +02:00
MOVSS ( XMM0 , fpr . V ( sregs [ i ] ) ) ;
2013-08-07 21:30:57 +02:00
CVTSS2SD ( XMM0 , R ( XMM0 ) ) ; // convert to double precision
2013-08-06 19:08:06 +02:00
if ( * mult ! = 1.0f ) {
2013-08-07 21:30:57 +02:00
MULSD ( XMM0 , R ( XMM1 ) ) ;
2013-08-06 19:08:06 +02:00
}
2017-07-05 15:19:03 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( maxMinIntAsDouble ) ) ;
MINSD ( XMM0 , MDisp ( TEMPREG , 0 ) ) ;
MAXSD ( XMM0 , MDisp ( TEMPREG , sizeof ( double ) ) ) ;
2014-11-03 23:22:08 -08:00
// We've set the rounding mode above, so this part's easy.
2013-08-06 19:08:06 +02:00
switch ( ( op > > 21 ) & 0x1f ) {
2014-11-16 17:38:26 +01:00
case 16 : CVTSD2SI ( TEMPREG , R ( XMM0 ) ) ; break ; //n
case 17 : CVTTSD2SI ( TEMPREG , R ( XMM0 ) ) ; break ; //z - truncate
case 18 : CVTSD2SI ( TEMPREG , R ( XMM0 ) ) ; break ; //u
case 19 : CVTSD2SI ( TEMPREG , R ( XMM0 ) ) ; break ; //d
2013-08-06 19:08:06 +02:00
}
2014-11-16 17:38:26 +01:00
MOVD_xmm ( fpr . VX ( tempregs [ i ] ) , R ( TEMPREG ) ) ;
2013-08-07 18:07:08 +02:00
}
for ( int i = 0 ; i < n ; + + i ) {
if ( dregs [ i ] ! = tempregs [ i ] ) {
fpr . MapRegV ( dregs [ i ] , MAP_DIRTY | MAP_NOINIT ) ;
MOVSS ( fpr . VX ( dregs [ i ] ) , fpr . V ( tempregs [ i ] ) ) ;
2014-11-30 10:37:24 -08:00
fpr . DiscardV ( tempregs [ i ] ) ;
2013-08-07 18:07:08 +02:00
}
2013-04-27 21:46:00 +02:00
}
2014-11-03 23:22:08 -08:00
if ( setMXCSR ! = - 1 ) {
2017-07-05 14:10:36 +02:00
LDMXCSR ( MIPSSTATE_VAR ( mxcsrTemp ) ) ;
2014-11-03 23:22:08 -08:00
}
2013-04-27 21:46:00 +02:00
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vcst ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-04-27 21:59:51 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
int conNum = ( op > > 16 ) & 0x1f ;
int vd = _VD ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 dregs [ 4 ] ;
2021-02-15 09:28:07 -08:00
GetVectorRegsPrefixD ( dregs , sz , vd ) ;
2013-04-27 21:59:51 +02:00
2017-07-05 15:19:03 +02:00
if ( RipAccessible ( cst_constants ) ) {
2017-07-05 15:59:17 +02:00
MOVSS ( XMM0 , M ( & cst_constants [ conNum ] ) ) ; // rip accessible
2017-07-05 15:19:03 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & cst_constants [ conNum ] ) ) ;
MOVSS ( XMM0 , MatR ( TEMPREG ) ) ;
}
2014-11-25 00:52:39 +01:00
if ( fpr . TryMapRegsVS ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ) {
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VS ( dregs ) , XMM0 ) ;
2014-11-25 00:52:39 +01:00
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-04-27 21:59:51 +02:00
fpr . MapRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
for ( int i = 0 ; i < n ; i + + ) {
MOVSS ( fpr . V ( dregs [ i ] ) , XMM0 ) ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-11-07 14:34:08 +01:00
void Jit : : Comp_Vsgn ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-11-07 14:34:08 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-11-07 14:34:08 +01:00
X64Reg tempxregs [ 4 ] ;
2017-07-05 15:59:17 +02:00
for ( int i = 0 ; i < n ; + + i ) {
if ( ! IsOverlapSafeAllowS ( dregs [ i ] , i , n , sregs ) ) {
2013-11-07 14:34:08 +01:00
int reg = fpr . GetTempV ( ) ;
fpr . MapRegV ( reg , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SpillLockV ( reg ) ;
tempxregs [ i ] = fpr . VX ( reg ) ;
2017-07-05 15:59:17 +02:00
} else {
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( dregs [ i ] , dregs [ i ] = = sregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
2013-11-07 14:34:08 +01:00
fpr . SpillLockV ( dregs [ i ] ) ;
tempxregs [ i ] = fpr . VX ( dregs [ i ] ) ;
}
}
2017-07-05 15:59:17 +02:00
// Would be nice with more temp regs here so we could put signBitLower and oneOneOneOne into regs...
for ( int i = 0 ; i < n ; + + i ) {
2013-11-07 18:10:14 +01:00
XORPS ( XMM0 , R ( XMM0 ) ) ;
2013-11-07 14:34:08 +01:00
CMPEQSS ( XMM0 , fpr . V ( sregs [ i ] ) ) ; // XMM0 = s[i] == 0.0f
MOVSS ( XMM1 , fpr . V ( sregs [ i ] ) ) ;
// Preserve sign bit, replace rest with ones
2017-07-05 15:59:17 +02:00
if ( RipAccessible ( signBitLower ) ) {
ANDPS ( XMM1 , M ( & signBitLower ) ) ; // rip accessible
ORPS ( XMM1 , M ( & oneOneOneOne ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & signBitLower ) ) ;
ANDPS ( XMM1 , MatR ( TEMPREG ) ) ;
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
ORPS ( XMM1 , MatR ( TEMPREG ) ) ;
}
2013-11-07 14:34:08 +01:00
// If really was equal to zero, zap. Note that ANDN negates the destination.
ANDNPS ( XMM0 , R ( XMM1 ) ) ;
MOVAPS ( tempxregs [ i ] , R ( XMM0 ) ) ;
}
for ( int i = 0 ; i < n ; + + i ) {
if ( ! fpr . V ( dregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( fpr . V ( dregs [ i ] ) , tempxregs [ i ] ) ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-11-19 14:24:56 +01:00
void Jit : : Comp_Vocp ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-11-19 14:24:56 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
2019-02-23 09:15:26 -08:00
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js . prefixS | = 0x000F0000 ;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js . prefixT = ( js . prefixT & ~ 0x000000FF ) | 0x00000055 | 0x0000F000 ;
u8 sregs [ 4 ] , tregs [ 4 ] , dregs [ 4 ] ;
// Actually uses the T prefixes (despite being VS.)
2013-11-19 14:24:56 +01:00
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
2019-02-23 09:15:26 -08:00
if ( js . prefixT ! = 0x0000F055 )
GetVectorRegsPrefixT ( tregs , sz , _VS ) ;
2013-11-19 14:24:56 +01:00
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
2019-02-23 09:15:26 -08:00
if ( js . prefixT ! = 0x0000F055 )
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
2014-11-10 21:58:08 -08:00
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-11-19 14:24:56 +01:00
X64Reg tempxregs [ 4 ] ;
2017-07-05 15:06:44 +02:00
for ( int i = 0 ; i < n ; + + i ) {
if ( ! IsOverlapSafeAllowS ( dregs [ i ] , i , n , sregs ) ) {
2013-11-19 14:24:56 +01:00
int reg = fpr . GetTempV ( ) ;
fpr . MapRegV ( reg , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SpillLockV ( reg ) ;
tempxregs [ i ] = fpr . VX ( reg ) ;
2017-07-05 15:06:44 +02:00
} else {
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( dregs [ i ] , dregs [ i ] = = sregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
2013-11-19 14:24:56 +01:00
fpr . SpillLockV ( dregs [ i ] ) ;
tempxregs [ i ] = fpr . VX ( dregs [ i ] ) ;
}
}
2019-02-23 09:15:26 -08:00
if ( js . prefixT = = 0x0000F055 ) {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( XMM1 , MatR ( TEMPREG ) ) ;
}
2017-07-05 15:06:44 +02:00
for ( int i = 0 ; i < n ; + + i ) {
2019-02-23 09:15:26 -08:00
if ( js . prefixT = = 0x0000F055 ) {
MOVSS ( XMM0 , R ( XMM1 ) ) ;
} else {
MOVSS ( XMM0 , fpr . V ( tregs [ i ] ) ) ;
}
ADDSS ( XMM0 , fpr . V ( sregs [ i ] ) ) ;
2013-11-19 14:24:56 +01:00
MOVSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
}
for ( int i = 0 ; i < n ; + + i ) {
if ( ! fpr . V ( dregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( fpr . V ( dregs [ i ] ) , tempxregs [ i ] ) ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2014-12-03 23:18:53 +01:00
void Jit : : Comp_Vbfy ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2014-12-03 23:18:53 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
if ( n ! = 2 & & n ! = 4 ) {
DISABLE ;
}
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
X64Reg tempxregs [ 4 ] ;
for ( int i = 0 ; i < n ; + + i ) {
if ( ! IsOverlapSafe ( dregs [ i ] , i , n , sregs ) ) {
int reg = fpr . GetTempV ( ) ;
fpr . MapRegV ( reg , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SpillLockV ( reg ) ;
tempxregs [ i ] = fpr . VX ( reg ) ;
} else {
fpr . MapRegV ( dregs [ i ] , dregs [ i ] = = sregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
fpr . SpillLockV ( dregs [ i ] ) ;
tempxregs [ i ] = fpr . VX ( dregs [ i ] ) ;
}
}
int subop = ( op > > 16 ) & 0x1F ;
if ( subop = = 3 ) {
// vbfy2
MOVSS ( tempxregs [ 0 ] , fpr . V ( sregs [ 0 ] ) ) ;
MOVSS ( tempxregs [ 1 ] , fpr . V ( sregs [ 1 ] ) ) ;
MOVSS ( tempxregs [ 2 ] , fpr . V ( sregs [ 0 ] ) ) ;
MOVSS ( tempxregs [ 3 ] , fpr . V ( sregs [ 1 ] ) ) ;
ADDSS ( tempxregs [ 0 ] , fpr . V ( sregs [ 2 ] ) ) ;
ADDSS ( tempxregs [ 1 ] , fpr . V ( sregs [ 3 ] ) ) ;
SUBSS ( tempxregs [ 2 ] , fpr . V ( sregs [ 2 ] ) ) ;
SUBSS ( tempxregs [ 3 ] , fpr . V ( sregs [ 3 ] ) ) ;
} else if ( subop = = 2 ) {
// vbfy1
MOVSS ( tempxregs [ 0 ] , fpr . V ( sregs [ 0 ] ) ) ;
MOVSS ( tempxregs [ 1 ] , fpr . V ( sregs [ 0 ] ) ) ;
ADDSS ( tempxregs [ 0 ] , fpr . V ( sregs [ 1 ] ) ) ;
SUBSS ( tempxregs [ 1 ] , fpr . V ( sregs [ 1 ] ) ) ;
if ( n = = 4 ) {
MOVSS ( tempxregs [ 2 ] , fpr . V ( sregs [ 2 ] ) ) ;
MOVSS ( tempxregs [ 3 ] , fpr . V ( sregs [ 2 ] ) ) ;
ADDSS ( tempxregs [ 2 ] , fpr . V ( sregs [ 3 ] ) ) ;
SUBSS ( tempxregs [ 3 ] , fpr . V ( sregs [ 3 ] ) ) ;
}
} else {
DISABLE ;
}
for ( int i = 0 ; i < n ; + + i ) {
if ( ! fpr . V ( dregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( fpr . V ( dregs [ i ] ) , tempxregs [ i ] ) ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2017-07-05 14:15:42 +02:00
2014-11-03 22:13:38 -08:00
union u32float {
u32 u ;
float f ;
operator float ( ) const {
return f ;
}
inline u32float & operator * = ( const float & other ) {
f * = other ;
return * this ;
}
} ;
2021-03-02 21:49:21 -08:00
# if PPSSPP_ARCH(AMD64)
2014-11-03 22:13:38 -08:00
typedef float SinCosArg ;
# else
typedef u32float SinCosArg ;
# endif
2017-07-05 16:44:22 +02:00
void SinCos ( SinCosArg angle , float * output ) {
vfpu_sincos ( angle , output [ 0 ] , output [ 1 ] ) ;
2014-11-03 22:13:38 -08:00
}
2017-07-05 16:44:22 +02:00
void SinOnly ( SinCosArg angle , float * output ) {
output [ 0 ] = vfpu_sin ( angle ) ;
2014-11-03 22:13:38 -08:00
}
2017-07-05 16:44:22 +02:00
void NegSinOnly ( SinCosArg angle , float * output ) {
output [ 0 ] = - vfpu_sin ( angle ) ;
2014-11-03 22:13:38 -08:00
}
2017-07-05 16:44:22 +02:00
void CosOnly ( SinCosArg angle , float * output ) {
output [ 1 ] = vfpu_cos ( angle ) ;
2014-11-03 22:13:38 -08:00
}
2023-06-04 10:09:20 +02:00
void ASinScaled ( SinCosArg sine , float * output ) {
output [ 0 ] = vfpu_asin ( sine ) ;
2014-11-03 22:13:38 -08:00
}
2017-07-05 16:44:22 +02:00
void SinCosNegSin ( SinCosArg angle , float * output ) {
vfpu_sincos ( angle , output [ 0 ] , output [ 1 ] ) ;
output [ 0 ] = - output [ 0 ] ;
2014-11-03 22:13:38 -08:00
}
2023-06-04 10:09:20 +02:00
void Exp2 ( SinCosArg arg , float * output ) {
output [ 0 ] = vfpu_exp2 ( arg ) ;
}
void Log2 ( SinCosArg arg , float * output ) {
output [ 0 ] = vfpu_log2 ( arg ) ;
}
void RExp2 ( SinCosArg arg , float * output ) {
output [ 0 ] = vfpu_rexp2 ( arg ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VV2Op ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-02-20 00:03:47 +01:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2023-06-04 10:09:20 +02:00
auto specialFuncCallHelper = [ this ] ( void ( * specialFunc ) ( SinCosArg , float * output ) , u8 sreg ) {
2021-03-02 21:49:21 -08:00
# if PPSSPP_ARCH(AMD64)
2014-11-03 23:50:23 -08:00
MOVSS ( XMM0 , fpr . V ( sreg ) ) ;
2017-07-05 16:44:22 +02:00
// TODO: This reg might be different on Linux...
# ifdef _WIN32
2017-07-07 14:15:10 +02:00
LEA ( 64 , RDX , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2017-07-05 16:44:22 +02:00
# else
2017-07-07 14:22:35 +02:00
LEA ( 64 , RDI , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2017-07-05 16:44:22 +02:00
# endif
2023-06-04 10:09:20 +02:00
ABI_CallFunction ( thunks . ProtectFunction ( ( const void * ) specialFunc , 0 ) ) ;
2014-11-03 23:50:23 -08:00
# else
// Sigh, passing floats with cdecl isn't pretty, ends up on the stack.
if ( fpr . V ( sreg ) . IsSimpleReg ( ) ) {
MOVD_xmm ( R ( EAX ) , fpr . VX ( sreg ) ) ;
} else {
MOV ( 32 , R ( EAX ) , fpr . V ( sreg ) ) ;
}
2023-06-04 10:09:20 +02:00
CallProtectedFunction ( ( const void * ) specialFunc , R ( EAX ) , Imm32 ( ( uint32_t ) ( uintptr_t ) & mips_ - > sincostemp [ 0 ] ) ) ;
2014-11-03 23:50:23 -08:00
# endif
} ;
2013-06-15 00:19:48 +02:00
// Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure
if ( ( ( op > > 16 ) & 0x1f ) = = 0 & & _VS = = _VD & & js . HasNoPrefix ( ) ) {
return ;
}
2013-02-20 00:03:47 +01:00
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-25 23:56:46 +01:00
bool canSIMD = false ;
// Some can be SIMD'd.
switch ( ( op > > 16 ) & 0x1f ) {
case 0 : // vmov
case 1 : // vabs
case 2 : // vneg
canSIMD = true ;
break ;
}
if ( canSIMD & & fpr . TryMapDirtyInVS ( dregs , sz , sregs , sz ) ) {
switch ( ( op > > 16 ) & 0x1f ) {
case 0 : // vmov
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
2014-11-25 23:56:46 +01:00
break ;
case 1 : // vabs
if ( dregs [ 0 ] ! = sregs [ 0 ] )
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
2017-07-05 15:59:17 +02:00
if ( RipAccessible ( & noSignMask ) ) {
ANDPS ( fpr . VSX ( dregs ) , M ( & noSignMask ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & noSignMask ) ) ;
ANDPS ( fpr . VSX ( dregs ) , MatR ( TEMPREG ) ) ;
}
2014-11-25 23:56:46 +01:00
break ;
case 2 : // vneg
if ( dregs [ 0 ] ! = sregs [ 0 ] )
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
2017-07-05 15:59:17 +02:00
if ( RipAccessible ( & signBitAll ) ) {
XORPS ( fpr . VSX ( dregs ) , M ( & signBitAll ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & signBitAll ) ) ;
XORPS ( fpr . VSX ( dregs ) , MatR ( TEMPREG ) ) ;
}
2014-11-25 23:56:46 +01:00
break ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-02-20 00:03:47 +01:00
X64Reg tempxregs [ 4 ] ;
for ( int i = 0 ; i < n ; + + i )
{
if ( ! IsOverlapSafeAllowS ( dregs [ i ] , i , n , sregs ) )
{
int reg = fpr . GetTempV ( ) ;
fpr . MapRegV ( reg , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SpillLockV ( reg ) ;
tempxregs [ i ] = fpr . VX ( reg ) ;
}
else
{
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( dregs [ i ] , dregs [ i ] = = sregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
2013-02-20 00:03:47 +01:00
fpr . SpillLockV ( dregs [ i ] ) ;
tempxregs [ i ] = fpr . VX ( dregs [ i ] ) ;
}
}
// Warning: sregs[i] and tempxregs[i] may be the same reg.
// Helps for vmov, hurts for vrcp, etc.
for ( int i = 0 ; i < n ; + + i )
{
switch ( ( op > > 16 ) & 0x1f )
{
case 0 : // d[i] = s[i]; break; //vmov
// Probably for swizzle.
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
break ;
case 1 : // d[i] = fabsf(s[i]); break; //vabs
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
2017-07-05 15:15:20 +02:00
if ( RipAccessible ( & noSignMask ) ) {
2017-07-05 15:59:17 +02:00
ANDPS ( tempxregs [ i ] , M ( & noSignMask ) ) ; // rip accessible
2017-07-05 15:15:20 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & noSignMask ) ) ;
ANDPS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
}
2013-02-20 00:03:47 +01:00
break ;
case 2 : // d[i] = -s[i]; break; //vneg
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
2017-07-05 15:15:20 +02:00
if ( RipAccessible ( & signBitLower ) ) {
2017-07-05 15:59:17 +02:00
XORPS ( tempxregs [ i ] , M ( & signBitLower ) ) ; // rip accessible
2017-07-05 15:15:20 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & signBitLower ) ) ;
XORPS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
}
2013-02-20 00:03:47 +01:00
break ;
case 4 : // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
2014-05-16 00:59:18 -07:00
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
MOVSS ( R ( XMM0 ) , tempxregs [ i ] ) ;
2017-07-05 13:51:27 +02:00
XORPS ( XMM1 , R ( XMM1 ) ) ;
CMPLESS ( XMM0 , R ( XMM1 ) ) ;
2014-05-16 00:59:18 -07:00
ANDNPS ( XMM0 , R ( tempxregs [ i ] ) ) ;
// Retain a NAN in XMM0 (must be second operand.)
2017-07-05 15:15:20 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
2014-05-16 00:59:18 -07:00
MINSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 5 : // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
2014-05-16 00:59:18 -07:00
// Check for < -1.0f, but careful of NANs.
2017-07-05 15:59:17 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & minus_one ) ) ;
MOVSS ( XMM1 , MatR ( TEMPREG ) ) ;
2014-05-16 00:59:18 -07:00
MOVSS ( R ( XMM0 ) , tempxregs [ i ] ) ;
CMPLESS ( XMM0 , R ( XMM1 ) ) ;
// If it was NOT less, the three ops below do nothing.
// Otherwise, they replace the value with -1.0f.
ANDPS ( XMM1 , R ( XMM0 ) ) ;
ANDNPS ( XMM0 , R ( tempxregs [ i ] ) ) ;
ORPS ( XMM0 , R ( XMM1 ) ) ;
// Retain a NAN in XMM0 (must be second operand.)
2017-07-05 15:15:20 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
2014-05-16 00:59:18 -07:00
MINSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 16 : // d[i] = 1.0f / s[i]; break; //vrcp
2017-07-05 15:06:44 +02:00
if ( RipAccessible ( & one ) ) {
2017-07-05 15:59:17 +02:00
MOVSS ( XMM0 , M ( & one ) ) ; // rip accessible
2017-07-05 15:06:44 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( XMM0 , MatR ( TEMPREG ) ) ;
}
2013-02-20 00:03:47 +01:00
DIVSS ( XMM0 , fpr . V ( sregs [ i ] ) ) ;
MOVSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
break ;
case 17 : // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
SQRTSS ( XMM0 , fpr . V ( sregs [ i ] ) ) ;
2017-07-05 15:06:44 +02:00
if ( RipAccessible ( & one ) ) {
2017-07-05 15:59:17 +02:00
MOVSS ( tempxregs [ i ] , M ( & one ) ) ; // rip accessible
2017-07-05 15:06:44 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
}
2013-02-20 00:03:47 +01:00
DIVSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
break ;
case 18 : // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & SinOnly , sregs [ i ] ) ;
2017-07-05 16:44:22 +02:00
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 19 : // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & CosOnly , sregs [ i ] ) ;
2017-07-05 16:44:22 +02:00
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 1 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 20 : // d[i] = powf(2.0f, s[i]); break; //vexp2
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & Exp2 , sregs [ i ] ) ;
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 21 : // d[i] = logf(s[i])/log(2.0f); break; //vlog2
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & Log2 , sregs [ i ] ) ;
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 22 : // d[i] = sqrtf(s[i]); break; //vsqrt
SQRTSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
2017-07-05 15:15:20 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & noSignMask ) ) ;
ANDPS ( tempxregs [ i ] , MatR ( TEMPREG ) ) ;
2013-02-20 00:03:47 +01:00
break ;
2014-11-03 22:13:38 -08:00
case 23 : // d[i] = asinf(s[i]) / M_PI_2; break; //vasin
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & ASinScaled , sregs [ i ] ) ;
2017-07-05 16:44:22 +02:00
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 24 : // d[i] = -1.0f / s[i]; break; // vnrcp
2017-07-05 15:06:44 +02:00
// Rare so let's not bother checking for RipAccessible.
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & minus_one ) ) ;
MOVSS ( XMM0 , MatR ( TEMPREG ) ) ;
2013-02-20 00:03:47 +01:00
DIVSS ( XMM0 , fpr . V ( sregs [ i ] ) ) ;
MOVSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
break ;
case 26 : // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & NegSinOnly , sregs [ i ] ) ;
2017-07-05 16:44:22 +02:00
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
case 28 : // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
2023-06-04 10:09:20 +02:00
specialFuncCallHelper ( & RExp2 , sregs [ i ] ) ;
MOVSS ( tempxregs [ i ] , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
2013-02-20 00:03:47 +01:00
break ;
}
}
for ( int i = 0 ; i < n ; + + i )
{
if ( ! fpr . V ( dregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( fpr . V ( dregs [ i ] ) , tempxregs [ i ] ) ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Mftv ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-02-14 00:02:15 -08:00
2013-02-10 12:14:55 +01:00
int imm = op & 0xFF ;
2013-08-24 19:31:12 -07:00
MIPSGPReg rt = _RT ;
2013-02-10 12:14:55 +01:00
switch ( ( op > > 21 ) & 0x1f )
{
case 3 : //mfv / mfvc
2013-02-13 23:56:10 -08:00
// rt = 0, imm = 255 appears to be used as a CPU interlock by some games.
2013-08-24 19:31:12 -07:00
if ( rt ! = MIPS_REG_ZERO ) {
2013-02-13 23:56:10 -08:00
if ( imm < 128 ) { //R(rt) = VI(imm);
2014-11-10 21:58:08 -08:00
fpr . SimpleRegV ( imm , 0 ) ;
2014-11-08 12:38:08 -08:00
if ( fpr . V ( imm ) . IsSimpleReg ( ) ) {
fpr . MapRegV ( imm , 0 ) ;
gpr . MapReg ( rt , false , true ) ;
MOVD_xmm ( gpr . R ( rt ) , fpr . VX ( imm ) ) ;
} else {
// Let's not bother mapping the vreg.
gpr . MapReg ( rt , false , true ) ;
MOV ( 32 , gpr . R ( rt ) , fpr . V ( imm ) ) ;
}
2013-11-08 19:58:00 +01:00
} else if ( imm < 128 + VFPU_CTRL_MAX ) { //mfvc
2014-06-27 22:30:45 -07:00
if ( imm - 128 = = VFPU_CTRL_CC ) {
if ( gpr . IsImm ( MIPS_REG_VFPUCC ) ) {
gpr . SetImm ( rt , gpr . GetImm ( MIPS_REG_VFPUCC ) ) ;
} else {
gpr . Lock ( rt , MIPS_REG_VFPUCC ) ;
gpr . MapReg ( rt , false , true ) ;
gpr . MapReg ( MIPS_REG_VFPUCC , true , false ) ;
MOV ( 32 , gpr . R ( rt ) , gpr . R ( MIPS_REG_VFPUCC ) ) ;
gpr . UnlockAll ( ) ;
}
} else {
// In case we have a saved prefix.
FlushPrefixV ( ) ;
gpr . MapReg ( rt , false , true ) ;
2017-07-07 12:59:23 +02:00
MOV ( 32 , gpr . R ( rt ) , MIPSSTATE_VAR_ELEM32 ( vfpuCtrl [ 0 ] , imm - 128 ) ) ;
2014-06-27 22:30:45 -07:00
}
2013-02-13 23:56:10 -08:00
} else {
//ERROR - maybe need to make this value too an "interlock" value?
2020-07-19 17:47:02 +02:00
_dbg_assert_msg_ ( false , " mfv - invalid register " ) ;
2013-02-13 23:56:10 -08:00
}
2013-02-10 12:14:55 +01:00
}
break ;
case 7 : //mtv
2013-11-09 13:56:10 +01:00
if ( imm < 128 ) { // VI(imm) = R(rt);
2014-11-08 12:38:08 -08:00
fpr . MapRegV ( imm , MAP_DIRTY | MAP_NOINIT ) ;
// Let's not bother mapping rt if we don't have to.
2014-11-08 18:41:16 -08:00
if ( gpr . IsImm ( rt ) & & gpr . GetImm ( rt ) = = 0 ) {
XORPS ( fpr . VX ( imm ) , fpr . V ( imm ) ) ;
} else {
gpr . KillImmediate ( rt , true , false ) ;
MOVD_xmm ( fpr . VX ( imm ) , gpr . R ( rt ) ) ;
}
2013-02-10 12:14:55 +01:00
} else if ( imm < 128 + VFPU_CTRL_MAX ) { //mtvc //currentMIPS->vfpuCtrl[imm - 128] = R(rt);
2014-06-27 22:30:45 -07:00
if ( imm - 128 = = VFPU_CTRL_CC ) {
if ( gpr . IsImm ( rt ) ) {
gpr . SetImm ( MIPS_REG_VFPUCC , gpr . GetImm ( rt ) ) ;
} else {
gpr . Lock ( rt , MIPS_REG_VFPUCC ) ;
gpr . MapReg ( rt , true , false ) ;
gpr . MapReg ( MIPS_REG_VFPUCC , false , true ) ;
MOV ( 32 , gpr . R ( MIPS_REG_VFPUCC ) , gpr . R ( rt ) ) ;
gpr . UnlockAll ( ) ;
}
} else {
gpr . MapReg ( rt , true , false ) ;
2017-07-07 12:59:23 +02:00
MOV ( 32 , MIPSSTATE_VAR_ELEM32 ( vfpuCtrl [ 0 ] , imm - 128 ) , gpr . R ( rt ) ) ;
2014-06-27 22:30:45 -07:00
}
2013-02-14 00:25:32 -08:00
// TODO: Optimization if rt is Imm?
if ( imm - 128 = = VFPU_CTRL_SPREFIX ) {
2013-02-15 01:12:43 -08:00
js . prefixSFlag = JitState : : PREFIX_UNKNOWN ;
2023-08-22 21:45:49 -07:00
js . blockWrotePrefixes = true ;
2013-02-14 00:25:32 -08:00
} else if ( imm - 128 = = VFPU_CTRL_TPREFIX ) {
2013-02-15 01:12:43 -08:00
js . prefixTFlag = JitState : : PREFIX_UNKNOWN ;
2023-08-22 21:45:49 -07:00
js . blockWrotePrefixes = true ;
2013-02-14 00:25:32 -08:00
} else if ( imm - 128 = = VFPU_CTRL_DPREFIX ) {
2013-02-15 01:12:43 -08:00
js . prefixDFlag = JitState : : PREFIX_UNKNOWN ;
2023-08-22 21:45:49 -07:00
js . blockWrotePrefixes = true ;
2013-02-14 00:25:32 -08:00
}
2013-02-10 12:14:55 +01:00
} else {
//ERROR
2020-07-19 17:47:02 +02:00
_dbg_assert_msg_ ( false , " mtv - invalid register " ) ;
2013-02-10 12:14:55 +01:00
}
break ;
default :
DISABLE ;
}
}
2013-01-26 01:33:32 +01:00
2014-09-01 23:13:07 -07:00
void Jit : : Comp_Vmfvc ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2019-03-10 20:25:47 -07:00
int vd = _VD ;
2019-03-30 12:14:15 -07:00
int imm = ( op > > 8 ) & 0x7F ;
if ( imm < VFPU_CTRL_MAX ) {
2019-03-10 20:25:47 -07:00
fpr . MapRegV ( vd , MAP_DIRTY | MAP_NOINIT ) ;
2019-03-30 12:14:15 -07:00
if ( imm = = VFPU_CTRL_CC ) {
2014-09-01 23:13:07 -07:00
gpr . MapReg ( MIPS_REG_VFPUCC , true , false ) ;
2019-03-10 20:25:47 -07:00
MOVD_xmm ( fpr . VX ( vd ) , gpr . R ( MIPS_REG_VFPUCC ) ) ;
2014-09-01 23:13:07 -07:00
} else {
2019-03-30 12:14:15 -07:00
MOVSS ( fpr . VX ( vd ) , MIPSSTATE_VAR_ELEM32 ( vfpuCtrl [ 0 ] , imm ) ) ;
2014-09-01 23:13:07 -07:00
}
fpr . ReleaseSpillLocks ( ) ;
2019-03-30 12:14:15 -07:00
} else {
fpr . MapRegV ( vd , MAP_DIRTY | MAP_NOINIT ) ;
XORPS ( fpr . VX ( vd ) , fpr . V ( vd ) ) ;
fpr . ReleaseSpillLocks ( ) ;
2014-09-01 23:13:07 -07:00
}
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vmtvc ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-02-14 00:25:32 -08:00
int vs = _VS ;
2019-03-30 12:14:15 -07:00
int imm = op & 0x7F ;
if ( imm < VFPU_CTRL_MAX ) {
2013-02-14 00:25:32 -08:00
fpr . MapRegV ( vs , 0 ) ;
2019-03-30 12:14:15 -07:00
if ( imm = = VFPU_CTRL_CC ) {
2014-06-27 22:30:45 -07:00
gpr . MapReg ( MIPS_REG_VFPUCC , false , true ) ;
MOVD_xmm ( gpr . R ( MIPS_REG_VFPUCC ) , fpr . VX ( vs ) ) ;
} else {
2019-03-30 12:14:15 -07:00
MOVSS ( MIPSSTATE_VAR_ELEM32 ( vfpuCtrl [ 0 ] , imm ) , fpr . VX ( vs ) ) ;
2014-06-27 22:30:45 -07:00
}
2013-02-14 00:25:32 -08:00
fpr . ReleaseSpillLocks ( ) ;
2019-03-30 12:14:15 -07:00
if ( imm = = VFPU_CTRL_SPREFIX ) {
2013-02-15 01:12:43 -08:00
js . prefixSFlag = JitState : : PREFIX_UNKNOWN ;
2023-08-22 21:45:49 -07:00
js . blockWrotePrefixes = true ;
2019-03-30 12:14:15 -07:00
} else if ( imm = = VFPU_CTRL_TPREFIX ) {
2013-02-15 01:12:43 -08:00
js . prefixTFlag = JitState : : PREFIX_UNKNOWN ;
2023-08-22 21:45:49 -07:00
js . blockWrotePrefixes = true ;
2019-03-30 12:14:15 -07:00
} else if ( imm = = VFPU_CTRL_DPREFIX ) {
2013-02-15 01:12:43 -08:00
js . prefixDFlag = JitState : : PREFIX_UNKNOWN ;
2023-08-22 21:45:49 -07:00
js . blockWrotePrefixes = true ;
2013-02-14 00:25:32 -08:00
}
}
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VMatrixInit ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-07-04 18:16:57 -07:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
MatrixSize sz = GetMtxSize ( op ) ;
int n = GetMatrixSide ( sz ) ;
2014-11-29 12:29:25 +01:00
// Not really about trying here, it will work if enabled.
if ( jo . enableVFPUSIMD ) {
VectorSize vsz = GetVectorSize ( sz ) ;
u8 vecs [ 4 ] ;
GetMatrixColumns ( _VD , sz , vecs ) ;
2017-07-05 14:15:42 +02:00
switch ( ( op > > 16 ) & 0xF ) {
case 3 :
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & identityMatrix [ 0 ] ) ) ;
break ;
case 7 :
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
MOVAPS ( XMM0 , MatR ( TEMPREG ) ) ;
break ;
}
2014-11-29 12:29:25 +01:00
for ( int i = 0 ; i < n ; i + + ) {
u8 vec [ 4 ] ;
GetVectorRegs ( vec , vsz , vecs [ i ] ) ;
fpr . MapRegsVS ( vec , vsz , MAP_NOINIT | MAP_DIRTY ) ;
switch ( ( op > > 16 ) & 0xF ) {
case 3 :
2017-07-05 14:15:42 +02:00
MOVAPS ( fpr . VSX ( vec ) , MDisp ( TEMPREG , 16 * i ) ) ;
2014-11-29 12:29:25 +01:00
break ;
case 6 :
XORPS ( fpr . VSX ( vec ) , fpr . VS ( vec ) ) ;
break ;
case 7 :
2017-07-05 14:15:42 +02:00
MOVAPS ( fpr . VSX ( vec ) , R ( XMM0 ) ) ;
2014-11-29 12:29:25 +01:00
break ;
}
}
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-07-04 18:16:57 -07:00
u8 dregs [ 16 ] ;
GetMatrixRegs ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-07-04 18:16:57 -07:00
switch ( ( op > > 16 ) & 0xF ) {
case 3 : // vmidt
2017-07-05 13:51:27 +02:00
XORPS ( XMM0 , R ( XMM0 ) ) ;
2017-07-05 14:15:42 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( XMM1 , MatR ( TEMPREG ) ) ;
2013-07-04 18:16:57 -07:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , a = = b ? XMM1 : XMM0 ) ;
}
}
break ;
case 6 : // vmzero
2017-07-05 13:51:27 +02:00
XORPS ( XMM0 , R ( XMM0 ) ) ;
2013-07-04 18:16:57 -07:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , XMM0 ) ;
}
}
break ;
case 7 : // vmone
2017-07-05 14:15:42 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & one ) ) ;
MOVSS ( XMM0 , MatR ( TEMPREG ) ) ;
2013-07-04 18:16:57 -07:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , XMM0 ) ;
}
}
break ;
}
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vmmov ( MIPSOpcode op ) {
2019-06-03 23:03:37 +02:00
CONDITIONAL_DISABLE ( VFPU_MTX_VMMOV ) ;
2013-02-20 00:03:47 +01:00
// TODO: This probably ignores prefixes?
2013-09-28 12:33:16 +02:00
if ( js . HasUnknownPrefix ( ) )
2013-02-20 00:03:47 +01:00
DISABLE ;
MatrixSize sz = GetMtxSize ( op ) ;
int n = GetMatrixSide ( sz ) ;
2014-11-30 13:06:34 -08:00
if ( jo . enableVFPUSIMD ) {
2014-11-29 18:46:38 -08:00
VectorSize vsz = GetVectorSize ( sz ) ;
u8 dest [ 4 ] [ 4 ] ;
MatrixOverlapType overlap = GetMatrixOverlap ( _VD , _VS , sz ) ;
u8 vecs [ 4 ] ;
if ( overlap = = OVERLAP_NONE ) {
GetMatrixColumns ( _VD , sz , vecs ) ;
for ( int i = 0 ; i < n ; + + i ) {
GetVectorRegs ( dest [ i ] , vsz , vecs [ i ] ) ;
}
} else {
for ( int i = 0 ; i < n ; + + i ) {
fpr . GetTempVS ( dest [ i ] , vsz ) ;
}
}
GetMatrixColumns ( _VS , sz , vecs ) ;
for ( int i = 0 ; i < n ; i + + ) {
u8 vec [ 4 ] ;
GetVectorRegs ( vec , vsz , vecs [ i ] ) ;
fpr . MapRegsVS ( vec , vsz , 0 ) ;
fpr . MapRegsVS ( dest [ i ] , vsz , MAP_NOINIT ) ;
MOVAPS ( fpr . VSX ( dest [ i ] ) , fpr . VS ( vec ) ) ;
2014-11-30 10:36:44 -08:00
fpr . ReleaseSpillLocks ( ) ;
2014-11-29 18:46:38 -08:00
}
if ( overlap ! = OVERLAP_NONE ) {
// Okay, move from the temps to VD now.
GetMatrixColumns ( _VD , sz , vecs ) ;
for ( int i = 0 ; i < n ; i + + ) {
u8 vec [ 4 ] ;
GetVectorRegs ( vec , vsz , vecs [ i ] ) ;
fpr . MapRegsVS ( vec , vsz , MAP_NOINIT ) ;
fpr . MapRegsVS ( dest [ i ] , vsz , 0 ) ;
MOVAPS ( fpr . VSX ( vec ) , fpr . VS ( dest [ i ] ) ) ;
2014-11-30 10:36:44 -08:00
fpr . ReleaseSpillLocks ( ) ;
2014-11-29 18:46:38 -08:00
}
}
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-02-20 00:03:47 +01:00
u8 sregs [ 16 ] , dregs [ 16 ] ;
GetMatrixRegs ( sregs , sz , _VS ) ;
GetMatrixRegs ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-02-20 00:03:47 +01:00
// TODO: gas doesn't allow overlap, what does the PSP do?
// Potentially detect overlap or the safe direction to move in, or just DISABLE?
// This is very not optimal, blows the regcache everytime.
u8 tempregs [ 16 ] ;
2017-07-05 13:51:27 +02:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
2013-02-20 00:03:47 +01:00
u8 temp = ( u8 ) fpr . GetTempV ( ) ;
fpr . MapRegV ( temp , MAP_NOINIT | MAP_DIRTY ) ;
MOVSS ( fpr . VX ( temp ) , fpr . V ( sregs [ a * 4 + b ] ) ) ;
fpr . StoreFromRegisterV ( temp ) ;
tempregs [ a * 4 + b ] = temp ;
}
}
2017-07-05 13:51:27 +02:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
2013-02-20 00:03:47 +01:00
u8 temp = tempregs [ a * 4 + b ] ;
fpr . MapRegV ( temp , 0 ) ;
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , fpr . VX ( temp ) ) ;
}
}
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VScl ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2013-04-19 22:47:49 -07:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 sregs [ 4 ] , dregs [ 4 ] , scale ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixT ( & scale , V_Single , _VT ) ;
GetVectorRegsPrefixD ( dregs , sz , _VD ) ;
2014-11-26 01:33:05 +01:00
if ( fpr . TryMapDirtyInInVS ( dregs , sz , sregs , sz , & scale , V_Single , true ) ) {
2014-11-27 00:07:17 -08:00
MOVSS ( XMM0 , fpr . VS ( & scale ) ) ;
2014-11-26 01:33:05 +01:00
if ( sz ! = V_Single )
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
if ( dregs [ 0 ] ! = sregs [ 0 ] ) {
2014-11-27 00:07:17 -08:00
MOVAPS ( fpr . VSX ( dregs ) , fpr . VS ( sregs ) ) ;
2014-11-26 01:33:05 +01:00
}
2014-11-27 00:07:17 -08:00
MULPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
2014-11-26 01:33:05 +01:00
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( & scale , V_Single , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-04-19 22:47:49 -07:00
// Move to XMM0 early, so we don't have to worry about overlap with scale.
MOVSS ( XMM0 , fpr . V ( scale ) ) ;
X64Reg tempxregs [ 4 ] ;
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < n ; + + i ) {
if ( dregs [ i ] ! = scale | | ! IsOverlapSafeAllowS ( dregs [ i ] , i , n , sregs ) ) {
2013-04-19 22:47:49 -07:00
int reg = fpr . GetTempV ( ) ;
fpr . MapRegV ( reg , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SpillLockV ( reg ) ;
tempxregs [ i ] = fpr . VX ( reg ) ;
2017-07-05 13:51:27 +02:00
} else {
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( dregs [ i ] , dregs [ i ] = = sregs [ i ] ? MAP_DIRTY : MAP_NOINIT ) ;
2013-04-19 22:47:49 -07:00
fpr . SpillLockV ( dregs [ i ] ) ;
tempxregs [ i ] = fpr . VX ( dregs [ i ] ) ;
}
}
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < n ; + + i ) {
2013-04-19 22:47:49 -07:00
if ( ! fpr . V ( sregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( tempxregs [ i ] , fpr . V ( sregs [ i ] ) ) ;
MULSS ( tempxregs [ i ] , R ( XMM0 ) ) ;
}
2017-07-05 13:51:27 +02:00
for ( int i = 0 ; i < n ; + + i ) {
2013-04-19 22:47:49 -07:00
if ( ! fpr . V ( dregs [ i ] ) . IsSimpleReg ( tempxregs [ i ] ) )
MOVSS ( fpr . V ( dregs [ i ] ) , tempxregs [ i ] ) ;
}
ApplyPrefixD ( dregs , sz ) ;
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vmmul ( MIPSOpcode op ) {
2019-06-03 23:03:37 +02:00
CONDITIONAL_DISABLE ( VFPU_MTX_VMMUL ) ;
2019-08-04 21:37:33 -07:00
if ( ! js . HasNoPrefix ( ) ) {
2013-04-20 01:11:40 -07:00
DISABLE ;
2019-08-04 21:37:33 -07:00
}
if ( PSP_CoreParameter ( ) . compat . flags ( ) . MoreAccurateVMMUL ) {
// Fall back to interpreter, which has the accurate implementation.
// Later we might do something more optimized here.
DISABLE ;
}
2013-04-20 01:11:40 -07:00
MatrixSize sz = GetMtxSize ( op ) ;
2014-11-30 19:27:43 +01:00
VectorSize vsz = GetVectorSize ( sz ) ;
2013-04-20 01:11:40 -07:00
int n = GetMatrixSide ( sz ) ;
2014-11-30 19:27:43 +01:00
MatrixOverlapType soverlap = GetMatrixOverlap ( _VS , _VD , sz ) ;
MatrixOverlapType toverlap = GetMatrixOverlap ( _VT , _VD , sz ) ;
2016-05-21 23:02:29 -07:00
// If these overlap, we won't be able to map T as singles.
MatrixOverlapType stoverlap = GetMatrixOverlap ( _VS , _VT , sz ) ;
2014-11-30 19:27:43 +01:00
2016-05-21 23:02:29 -07:00
if ( jo . enableVFPUSIMD & & ! soverlap & & ! toverlap & & ! stoverlap ) {
2014-11-30 19:27:43 +01:00
u8 scols [ 4 ] , dcols [ 4 ] , tregs [ 16 ] ;
int vs = _VS ;
int vd = _VD ;
int vt = _VT ;
2015-01-01 12:42:38 +01:00
bool transposeDest = false ;
bool transposeS = false ;
2015-01-03 14:48:54 -07:00
if ( ( vd & 0x20 ) & & sz = = M_4x4 ) {
vd ^ = 0x20 ;
transposeDest = true ;
}
2015-01-01 12:42:38 +01:00
2015-01-03 14:48:54 -07:00
// Our algorithm needs a transposed S (which is the usual).
if ( ! ( vs & 0x20 ) & & sz = = M_4x4 ) {
vs ^ = 0x20 ;
transposeS = true ;
2014-11-30 19:27:43 +01:00
}
// The T matrix we will address individually.
GetMatrixColumns ( vd , sz , dcols ) ;
GetMatrixRows ( vs , sz , scols ) ;
memset ( tregs , 255 , sizeof ( tregs ) ) ;
GetMatrixRegs ( tregs , sz , vt ) ;
for ( int i = 0 ; i < 16 ; i + + ) {
if ( tregs [ i ] ! = 255 )
fpr . StoreFromRegisterV ( tregs [ i ] ) ;
}
u8 scol [ 4 ] [ 4 ] ;
// Map all of S's columns into registers.
for ( int i = 0 ; i < n ; i + + ) {
2015-01-03 14:48:54 -07:00
if ( transposeS ) {
fpr . StoreFromRegisterV ( scols [ i ] ) ;
}
2014-11-30 19:27:43 +01:00
GetVectorRegs ( scol [ i ] , vsz , scols [ i ] ) ;
fpr . MapRegsVS ( scol [ i ] , vsz , 0 ) ;
fpr . SpillLockV ( scols [ i ] , vsz ) ;
}
2015-01-01 12:42:38 +01:00
// Shorter than manually stuffing the registers. But it feels like ther'es room for optimization here...
auto transposeInPlace = [ = ] ( u8 col [ 4 ] [ 4 ] ) {
MOVAPS ( XMM0 , fpr . VS ( col [ 0 ] ) ) ;
UNPCKLPS ( fpr . VSX ( col [ 0 ] ) , fpr . VS ( col [ 2 ] ) ) ;
UNPCKHPS ( XMM0 , fpr . VS ( col [ 2 ] ) ) ;
MOVAPS ( fpr . VSX ( col [ 2 ] ) , fpr . VS ( col [ 1 ] ) ) ;
UNPCKLPS ( fpr . VSX ( col [ 1 ] ) , fpr . VS ( col [ 3 ] ) ) ;
UNPCKHPS ( fpr . VSX ( col [ 2 ] ) , fpr . VS ( col [ 3 ] ) ) ;
MOVAPS ( fpr . VSX ( col [ 3 ] ) , fpr . VS ( col [ 0 ] ) ) ;
UNPCKLPS ( fpr . VSX ( col [ 0 ] ) , fpr . VS ( col [ 1 ] ) ) ;
UNPCKHPS ( fpr . VSX ( col [ 3 ] ) , fpr . VS ( col [ 1 ] ) ) ;
MOVAPS ( fpr . VSX ( col [ 1 ] ) , R ( XMM0 ) ) ;
UNPCKLPS ( fpr . VSX ( col [ 1 ] ) , fpr . VS ( col [ 2 ] ) ) ;
UNPCKHPS ( XMM0 , fpr . VS ( col [ 2 ] ) ) ;
MOVAPS ( fpr . VSX ( col [ 2 ] ) , fpr . VS ( col [ 1 ] ) ) ;
MOVAPS ( fpr . VSX ( col [ 1 ] ) , fpr . VS ( col [ 3 ] ) ) ;
MOVAPS ( fpr . VSX ( col [ 3 ] ) , R ( XMM0 ) ) ;
} ;
// Some games pass in S as an E matrix (transposed). Let's just transpose the data before we do the multiplication instead.
// This is shorter than trying to combine a discontinous matrix with lots of shufps.
if ( transposeS ) {
transposeInPlace ( scol ) ;
}
2014-11-30 19:27:43 +01:00
// Now, work our way through the matrix, loading things as we go.
// TODO: With more temp registers, can generate much more efficient code.
for ( int i = 0 ; i < n ; i + + ) {
MOVSS ( XMM1 , fpr . V ( tregs [ 4 * i ] ) ) ; // TODO: AVX broadcastss to replace this and the SHUFPS
2014-12-06 00:27:31 +01:00
MOVSS ( XMM0 , fpr . V ( tregs [ 4 * i + 1 ] ) ) ;
2014-11-30 19:27:43 +01:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
2014-12-06 00:27:31 +01:00
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
2014-11-30 19:27:43 +01:00
MULPS ( XMM1 , fpr . VS ( scol [ 0 ] ) ) ;
2014-12-06 00:27:31 +01:00
MULPS ( XMM0 , fpr . VS ( scol [ 1 ] ) ) ;
ADDPS ( XMM1 , R ( XMM0 ) ) ;
for ( int j = 2 ; j < n ; j + + ) {
2014-11-30 19:27:43 +01:00
MOVSS ( XMM0 , fpr . V ( tregs [ 4 * i + j ] ) ) ;
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
MULPS ( XMM0 , fpr . VS ( scol [ j ] ) ) ;
ADDPS ( XMM1 , R ( XMM0 ) ) ;
}
// Map the D column.
u8 dcol [ 4 ] ;
GetVectorRegs ( dcol , vsz , dcols [ i ] ) ;
2021-03-02 21:49:21 -08:00
# if !PPSSPP_ARCH(AMD64)
2014-12-04 00:17:18 +01:00
fpr . MapRegsVS ( dcol , vsz , MAP_DIRTY | MAP_NOINIT | MAP_NOLOCK ) ;
# else
2014-11-30 19:27:43 +01:00
fpr . MapRegsVS ( dcol , vsz , MAP_DIRTY | MAP_NOINIT ) ;
2014-12-04 00:17:18 +01:00
# endif
2014-11-30 19:27:43 +01:00
MOVAPS ( fpr . VS ( dcol ) , XMM1 ) ;
}
2015-01-03 16:58:03 -07:00
if ( transposeS ) {
for ( int i = 0 ; i < n ; i + + ) {
fpr . DiscardVS ( scols [ i ] ) ;
}
}
2015-01-01 12:42:38 +01:00
2021-03-02 21:49:21 -08:00
# if !PPSSPP_ARCH(AMD64)
2015-01-01 12:42:38 +01:00
fpr . ReleaseSpillLocks ( ) ;
# endif
if ( transposeDest ) {
u8 dcol [ 4 ] [ 4 ] ;
for ( int i = 0 ; i < n ; i + + ) {
GetVectorRegs ( dcol [ i ] , vsz , dcols [ i ] ) ;
fpr . MapRegsVS ( dcol [ i ] , vsz , MAP_DIRTY ) ;
}
transposeInPlace ( dcol ) ;
}
2014-11-30 19:27:43 +01:00
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-04-20 01:11:40 -07:00
u8 sregs [ 16 ] , tregs [ 16 ] , dregs [ 16 ] ;
GetMatrixRegs ( sregs , sz , _VS ) ;
GetMatrixRegs ( tregs , sz , _VT ) ;
GetMatrixRegs ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-07-28 12:13:43 +02:00
// Rough overlap check.
bool overlap = false ;
if ( GetMtx ( _VS ) = = GetMtx ( _VD ) | | GetMtx ( _VT ) = = GetMtx ( _VD ) ) {
// Potential overlap (guaranteed for 3x3 or more).
overlap = true ;
2013-04-20 01:11:40 -07:00
}
2013-07-28 12:13:43 +02:00
if ( overlap ) {
u8 tempregs [ 16 ] ;
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
MOVSS ( XMM0 , fpr . V ( sregs [ b * 4 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ a * 4 ] ) ) ;
for ( int c = 1 ; c < n ; c + + ) {
MOVSS ( XMM1 , fpr . V ( sregs [ b * 4 + c ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ a * 4 + c ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
}
u8 temp = ( u8 ) fpr . GetTempV ( ) ;
fpr . MapRegV ( temp , MAP_NOINIT | MAP_DIRTY ) ;
MOVSS ( fpr . VX ( temp ) , R ( XMM0 ) ) ;
fpr . StoreFromRegisterV ( temp ) ;
tempregs [ a * 4 + b ] = temp ;
}
}
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
u8 temp = tempregs [ a * 4 + b ] ;
fpr . MapRegV ( temp , 0 ) ;
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , fpr . VX ( temp ) ) ;
}
}
} else {
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
MOVSS ( XMM0 , fpr . V ( sregs [ b * 4 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ a * 4 ] ) ) ;
for ( int c = 1 ; c < n ; c + + ) {
MOVSS ( XMM1 , fpr . V ( sregs [ b * 4 + c ] ) ) ;
MULSS ( XMM1 , fpr . V ( tregs [ a * 4 + c ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
}
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , XMM0 ) ;
}
}
}
2013-04-20 01:11:40 -07:00
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vmscl ( MIPSOpcode op ) {
2019-06-03 23:03:37 +02:00
CONDITIONAL_DISABLE ( VFPU_MTX_VMSCL ) ;
2013-04-20 01:34:16 -07:00
2017-07-07 15:46:14 +02:00
// TODO: This op probably ignores prefixes?
2013-09-28 12:33:16 +02:00
if ( js . HasUnknownPrefix ( ) )
2013-04-20 01:34:16 -07:00
DISABLE ;
MatrixSize sz = GetMtxSize ( op ) ;
int n = GetMatrixSide ( sz ) ;
u8 sregs [ 16 ] , dregs [ 16 ] , scale ;
GetMatrixRegs ( sregs , sz , _VS ) ;
GetVectorRegs ( & scale , V_Single , _VT ) ;
GetMatrixRegs ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( & scale , V_Single , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-04-20 01:34:16 -07:00
// Move to XMM0 early, so we don't have to worry about overlap with scale.
MOVSS ( XMM0 , fpr . V ( scale ) ) ;
// TODO: test overlap, optimize.
u8 tempregs [ 16 ] ;
2017-07-05 13:51:27 +02:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
2013-04-20 01:34:16 -07:00
u8 temp = ( u8 ) fpr . GetTempV ( ) ;
fpr . MapRegV ( temp , MAP_NOINIT | MAP_DIRTY ) ;
MOVSS ( fpr . VX ( temp ) , fpr . V ( sregs [ a * 4 + b ] ) ) ;
MULSS ( fpr . VX ( temp ) , R ( XMM0 ) ) ;
fpr . StoreFromRegisterV ( temp ) ;
tempregs [ a * 4 + b ] = temp ;
}
}
2017-07-05 13:51:27 +02:00
for ( int a = 0 ; a < n ; a + + ) {
for ( int b = 0 ; b < n ; b + + ) {
2013-04-20 01:34:16 -07:00
u8 temp = tempregs [ a * 4 + b ] ;
fpr . MapRegV ( temp , 0 ) ;
MOVSS ( fpr . V ( dregs [ a * 4 + b ] ) , fpr . VX ( temp ) ) ;
}
}
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vtfm ( MIPSOpcode op ) {
2019-06-03 23:03:37 +02:00
CONDITIONAL_DISABLE ( VFPU_MTX_VTFM ) ;
2013-04-20 01:52:06 -07:00
// TODO: This probably ignores prefixes? Or maybe uses D?
2013-09-28 12:33:16 +02:00
if ( js . HasUnknownPrefix ( ) )
2013-04-20 01:52:06 -07:00
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
MatrixSize msz = GetMtxSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
int ins = ( op > > 23 ) & 7 ;
bool homogenous = false ;
2014-12-03 01:44:45 +01:00
if ( n = = ins ) {
2013-04-20 01:52:06 -07:00
n + + ;
2014-12-03 01:44:45 +01:00
sz = ( VectorSize ) ( ( int ) ( sz ) + 1 ) ;
msz = ( MatrixSize ) ( ( int ) ( msz ) + 1 ) ;
2013-04-20 01:52:06 -07:00
homogenous = true ;
}
// Otherwise, n should already be ins + 1.
2014-12-03 01:44:45 +01:00
else if ( n ! = ins + 1 ) {
2013-04-20 01:52:06 -07:00
DISABLE ;
2014-12-03 01:44:45 +01:00
}
if ( jo . enableVFPUSIMD ) {
u8 scols [ 4 ] , dcol [ 4 ] , tregs [ 4 ] ;
int vs = _VS ;
int vd = _VD ;
int vt = _VT ; // vector!
// The T matrix we will address individually.
GetVectorRegs ( dcol , sz , vd ) ;
GetMatrixRows ( vs , msz , scols ) ;
GetVectorRegs ( tregs , sz , vt ) ;
2014-12-03 22:42:33 +01:00
for ( int i = 0 ; i < n ; i + + ) {
fpr . StoreFromRegisterV ( tregs [ i ] ) ;
2014-12-03 01:44:45 +01:00
}
2015-01-07 18:10:53 -08:00
// We need the T regs in individual regs, but they could overlap with S regs.
// If that happens, we copy the T reg to a temp.
auto flushConflictingTRegsToTemps = [ & ] ( u8 regs [ 4 ] ) {
for ( int i = 0 ; i < n ; + + i ) {
for ( int j = 0 ; j < n ; + + j ) {
if ( regs [ i ] ! = tregs [ j ] ) {
continue ;
}
// They match. Let's replace this treg with a temp reg.
// Note that it will spill if there's contention, unfortunately...
tregs [ j ] = fpr . GetTempV ( ) ;
fpr . MapRegV ( tregs [ j ] , MAP_NOINIT ) ;
MOVSS ( fpr . VX ( tregs [ j ] ) , fpr . V ( regs [ i ] ) ) ;
}
}
} ;
2014-12-03 01:44:45 +01:00
u8 scol [ 4 ] [ 4 ] ;
// Map all of S's columns into registers.
for ( int i = 0 ; i < n ; i + + ) {
GetVectorRegs ( scol [ i ] , sz , scols [ i ] ) ;
2015-01-07 18:10:53 -08:00
flushConflictingTRegsToTemps ( scol [ i ] ) ;
2014-12-03 01:44:45 +01:00
fpr . MapRegsVS ( scol [ i ] , sz , 0 ) ;
}
// Now, work our way through the matrix, loading things as we go.
// TODO: With more temp registers, can generate much more efficient code.
2015-01-07 18:10:53 -08:00
MOVSS ( XMM1 , fpr . V ( tregs [ 0 ] ) ) ; // TODO: AVX broadcastss to replace this and the SHUFPS (but take care of temps, unless we force store them.)
2014-12-03 01:44:45 +01:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
MULPS ( XMM1 , fpr . VS ( scol [ 0 ] ) ) ;
for ( int j = 1 ; j < n ; j + + ) {
if ( ! homogenous | | j ! = n - 1 ) {
MOVSS ( XMM0 , fpr . V ( tregs [ j ] ) ) ;
SHUFPS ( XMM0 , R ( XMM0 ) , _MM_SHUFFLE ( 0 , 0 , 0 , 0 ) ) ;
MULPS ( XMM0 , fpr . VS ( scol [ j ] ) ) ;
ADDPS ( XMM1 , R ( XMM0 ) ) ;
} else {
ADDPS ( XMM1 , fpr . VS ( scol [ j ] ) ) ;
}
}
2015-01-07 18:10:53 -08:00
// Map the D column. Release first in case of overlap.
for ( int i = 0 ; i < n ; i + + ) {
fpr . ReleaseSpillLockV ( scol [ i ] , sz ) ;
}
2014-12-03 01:44:45 +01:00
fpr . MapRegsVS ( dcol , sz , MAP_DIRTY | MAP_NOINIT ) ;
MOVAPS ( fpr . VS ( dcol ) , XMM1 ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2013-04-20 01:52:06 -07:00
u8 sregs [ 16 ] , dregs [ 4 ] , tregs [ 4 ] ;
GetMatrixRegs ( sregs , msz , _VS ) ;
GetVectorRegs ( tregs , sz , _VT ) ;
GetVectorRegs ( dregs , sz , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , msz , 0 ) ;
fpr . SimpleRegsV ( tregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , sz , MAP_NOINIT | MAP_DIRTY ) ;
2013-04-20 01:52:06 -07:00
// TODO: test overlap, optimize.
u8 tempregs [ 4 ] ;
2013-07-28 16:13:19 +02:00
for ( int i = 0 ; i < n ; i + + ) {
MOVSS ( XMM0 , fpr . V ( sregs [ i * 4 ] ) ) ;
MULSS ( XMM0 , fpr . V ( tregs [ 0 ] ) ) ;
for ( int k = 1 ; k < n ; k + + )
2013-04-20 01:52:06 -07:00
{
MOVSS ( XMM1 , fpr . V ( sregs [ i * 4 + k ] ) ) ;
if ( ! homogenous | | k ! = n - 1 )
MULSS ( XMM1 , fpr . V ( tregs [ k ] ) ) ;
ADDSS ( XMM0 , R ( XMM1 ) ) ;
}
u8 temp = ( u8 ) fpr . GetTempV ( ) ;
fpr . MapRegV ( temp , MAP_NOINIT | MAP_DIRTY ) ;
MOVSS ( fpr . VX ( temp ) , R ( XMM0 ) ) ;
fpr . StoreFromRegisterV ( temp ) ;
tempregs [ i ] = temp ;
}
2013-07-28 16:13:19 +02:00
for ( int i = 0 ; i < n ; i + + ) {
2013-04-20 01:52:06 -07:00
u8 temp = tempregs [ i ] ;
fpr . MapRegV ( temp , 0 ) ;
MOVSS ( fpr . V ( dregs [ i ] ) , fpr . VX ( temp ) ) ;
}
fpr . ReleaseSpillLocks ( ) ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VCrs ( MIPSOpcode op ) {
2013-04-27 19:35:42 +02:00
DISABLE ;
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VDet ( MIPSOpcode op ) {
2013-04-27 19:35:42 +02:00
DISABLE ;
}
2014-11-08 11:58:59 -08:00
// The goal is to map (reversed byte order for clarity):
// 000000AA 000000BB 000000CC 000000DD -> AABBCCDD
2017-08-31 01:14:51 +02:00
alignas ( 16 ) static const s8 vi2xc_shuffle [ 16 ] = { 3 , 7 , 11 , 15 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 } ;
2014-11-08 11:58:59 -08:00
// 0000AAAA 0000BBBB 0000CCCC 0000DDDD -> AAAABBBB CCCCDDDD
2017-08-31 01:14:51 +02:00
alignas ( 16 ) static const s8 vi2xs_shuffle [ 16 ] = { 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 } ;
2014-11-08 11:58:59 -08:00
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vi2x ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2014-11-08 11:31:54 -08:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
int bits = ( ( op > > 16 ) & 2 ) = = 0 ? 8 : 16 ; // vi2uc/vi2c (0/1), vi2us/vi2s (2/3)
bool unsignedOp = ( ( op > > 16 ) & 1 ) = = 0 ; // vi2uc (0), vi2us (2)
// These instructions pack pairs or quads of integers into 32 bits.
// The unsigned (u) versions skip the sign bit when packing.
VectorSize sz = GetVecSize ( op ) ;
VectorSize outsize ;
if ( bits = = 8 ) {
outsize = V_Single ;
if ( sz ! = V_Quad ) {
DISABLE ;
}
} else {
switch ( sz ) {
case V_Pair :
outsize = V_Single ;
break ;
case V_Quad :
outsize = V_Pair ;
break ;
default :
DISABLE ;
}
}
u8 sregs [ 4 ] , dregs [ 4 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , outsize , _VD ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , outsize , MAP_NOINIT | MAP_DIRTY ) ;
2014-11-08 11:31:54 -08:00
// First, let's assemble the sregs into lanes of a single xmm reg.
// For quad inputs, we need somewhere for the bottom regs. Ideally dregs[0].
X64Reg dst0 = XMM0 ;
if ( sz = = V_Quad ) {
int vreg = dregs [ 0 ] ;
if ( ! IsOverlapSafeAllowS ( dregs [ 0 ] , 0 , 4 , sregs ) ) {
// Will be discarded on release.
vreg = fpr . GetTempV ( ) ;
}
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( vreg , vreg = = sregs [ 0 ] ? MAP_DIRTY : MAP_NOINIT ) ;
2014-11-08 11:31:54 -08:00
fpr . SpillLockV ( vreg ) ;
dst0 = fpr . VX ( vreg ) ;
} else {
// Pair, let's check if we should use dregs[0] directly. No temp needed.
int vreg = dregs [ 0 ] ;
if ( IsOverlapSafeAllowS ( dregs [ 0 ] , 0 , 2 , sregs ) ) {
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( vreg , vreg = = sregs [ 0 ] ? MAP_DIRTY : MAP_NOINIT ) ;
2014-11-08 11:31:54 -08:00
fpr . SpillLockV ( vreg ) ;
dst0 = fpr . VX ( vreg ) ;
}
}
if ( ! fpr . V ( sregs [ 0 ] ) . IsSimpleReg ( dst0 ) ) {
MOVSS ( dst0 , fpr . V ( sregs [ 0 ] ) ) ;
}
MOVSS ( XMM1 , fpr . V ( sregs [ 1 ] ) ) ;
// With this, we have the lower half in dst0.
PUNPCKLDQ ( dst0 , R ( XMM1 ) ) ;
if ( sz = = V_Quad ) {
MOVSS ( XMM0 , fpr . V ( sregs [ 2 ] ) ) ;
MOVSS ( XMM1 , fpr . V ( sregs [ 3 ] ) ) ;
PUNPCKLDQ ( XMM0 , R ( XMM1 ) ) ;
// Now we need to combine XMM0 into dst0.
PUNPCKLQDQ ( dst0 , R ( XMM0 ) ) ;
} else {
// Otherwise, we need to zero out the top 2.
2014-11-08 11:58:59 -08:00
// We expect XMM1 to be zero below.
2014-11-08 11:31:54 -08:00
PXOR ( XMM1 , R ( XMM1 ) ) ;
PUNPCKLQDQ ( dst0 , R ( XMM1 ) ) ;
}
// For "u" type ops, we clamp to zero and shift off the sign bit first.
if ( unsignedOp ) {
2014-11-08 11:58:59 -08:00
if ( cpu_info . bSSE4_1 ) {
if ( sz = = V_Quad ) {
// Zeroed in the other case above.
PXOR ( XMM1 , R ( XMM1 ) ) ;
}
PMAXSD ( dst0 , R ( XMM1 ) ) ;
PSLLD ( dst0 , 1 ) ;
} else {
// Get a mask of the sign bit in dst0, then and in the values. This clamps to 0.
MOVDQA ( XMM1 , R ( dst0 ) ) ;
PSRAD ( dst0 , 31 ) ;
PSLLD ( XMM1 , 1 ) ;
PANDN ( dst0 , R ( XMM1 ) ) ;
}
2014-11-08 11:31:54 -08:00
}
// At this point, everything is aligned in the high bits of our lanes.
2014-11-08 11:58:59 -08:00
if ( cpu_info . bSSSE3 ) {
2017-07-07 15:04:04 +02:00
if ( RipAccessible ( vi2xc_shuffle ) ) {
2017-08-29 11:45:12 +02:00
PSHUFB ( dst0 , bits = = 8 ? M ( vi2xc_shuffle ) : M ( vi2xs_shuffle ) ) ; // rip accessible
2017-07-07 15:04:04 +02:00
} else {
MOV ( PTRBITS , R ( TEMPREG ) , bits = = 8 ? ImmPtr ( vi2xc_shuffle ) : ImmPtr ( vi2xs_shuffle ) ) ;
PSHUFB ( dst0 , MatR ( TEMPREG ) ) ;
}
2014-11-08 11:58:59 -08:00
} else {
// Let's *arithmetically* shift in the sign so we can use saturating packs.
PSRAD ( dst0 , 32 - bits ) ;
// XMM1 used for the high part just so there's no dependency. It contains garbage or 0.
PACKSSDW ( dst0 , R ( XMM1 ) ) ;
if ( bits = = 8 ) {
PACKSSWB ( dst0 , R ( XMM1 ) ) ;
}
2014-11-08 11:31:54 -08:00
}
if ( ! fpr . V ( dregs [ 0 ] ) . IsSimpleReg ( dst0 ) ) {
MOVSS ( fpr . V ( dregs [ 0 ] ) , dst0 ) ;
}
if ( outsize = = V_Pair ) {
fpr . MapRegV ( dregs [ 1 ] , MAP_NOINIT | MAP_DIRTY ) ;
MOVDQA ( fpr . V ( dregs [ 1 ] ) , dst0 ) ;
// Shift out the lower result to get the result we want.
PSRLDQ ( fpr . VX ( dregs [ 1 ] ) , 4 ) ;
}
ApplyPrefixD ( dregs , outsize ) ;
fpr . ReleaseSpillLocks ( ) ;
2013-04-27 19:35:42 +02:00
}
2017-08-31 01:14:51 +02:00
alignas ( 16 ) static const float vavg_table [ 4 ] = { 1.0f , 1.0f / 2.0f , 1.0f / 3.0f , 1.0f / 4.0f } ;
2014-11-08 09:39:55 -08:00
void Jit : : Comp_Vhoriz ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2014-11-08 09:39:55 -08:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 sregs [ 4 ] , dregs [ 1 ] ;
GetVectorRegsPrefixS ( sregs , sz , _VS ) ;
GetVectorRegsPrefixD ( dregs , V_Single , _VD ) ;
2014-11-29 11:37:45 +01:00
if ( fpr . TryMapDirtyInVS ( dregs , V_Single , sregs , sz ) ) {
2014-12-03 22:42:33 +01:00
if ( cpu_info . bSSE4_1 ) {
2017-07-05 14:04:19 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & oneOneOneOne ) ) ;
2014-12-03 22:42:33 +01:00
switch ( sz ) {
case V_Pair :
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2017-07-05 14:04:19 +02:00
DPPS ( XMM0 , MatR ( TEMPREG ) , 0x31 ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
break ;
case V_Triple :
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2017-07-05 14:04:19 +02:00
DPPS ( XMM0 , MatR ( TEMPREG ) , 0x71 ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
break ;
case V_Quad :
2015-01-03 10:39:26 -08:00
XORPS ( XMM1 , R ( XMM1 ) ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2017-07-05 14:04:19 +02:00
DPPS ( XMM0 , MatR ( TEMPREG ) , 0xF1 ) ;
2015-01-03 10:39:26 -08:00
// In every other case, +0.0 is selected by the mask and added.
// But, here we need to manually add it to the result.
ADDPS ( XMM0 , R ( XMM1 ) ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
break ;
2014-12-20 09:10:09 -08:00
default :
DISABLE ;
2014-12-03 22:42:33 +01:00
}
} else {
switch ( sz ) {
case V_Pair :
2015-01-03 10:39:26 -08:00
XORPS ( XMM1 , R ( XMM1 ) ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2015-01-03 10:39:26 -08:00
ADDPS ( XMM1 , R ( XMM0 ) ) ;
2014-12-03 22:42:33 +01:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 2 , 1 , 1 ) ) ;
ADDPS ( XMM0 , R ( XMM1 ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
break ;
case V_Triple :
2015-01-03 10:39:26 -08:00
XORPS ( XMM1 , R ( XMM1 ) ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2015-01-03 10:39:26 -08:00
ADDPS ( XMM1 , R ( XMM0 ) ) ;
2014-12-03 22:42:33 +01:00
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 2 , 1 , 1 ) ) ;
ADDPS ( XMM0 , R ( XMM1 ) ) ;
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 3 , 2 , 1 , 2 ) ) ;
ADDPS ( XMM0 , R ( XMM1 ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
break ;
case V_Quad :
2015-01-03 10:39:26 -08:00
XORPS ( XMM1 , R ( XMM1 ) ) ;
2014-12-03 22:42:33 +01:00
MOVAPS ( XMM0 , fpr . VS ( sregs ) ) ;
2015-01-03 10:39:26 -08:00
// This flips the sign of any -0.000.
ADDPS ( XMM0 , R ( XMM1 ) ) ;
2014-12-03 22:42:33 +01:00
MOVHLPS ( XMM1 , XMM0 ) ;
ADDPS ( XMM0 , R ( XMM1 ) ) ;
MOVAPS ( XMM1 , R ( XMM0 ) ) ;
SHUFPS ( XMM1 , R ( XMM1 ) , _MM_SHUFFLE ( 1 , 1 , 1 , 1 ) ) ;
ADDPS ( XMM0 , R ( XMM1 ) ) ;
MOVAPS ( fpr . VSX ( dregs ) , R ( XMM0 ) ) ;
break ;
2014-12-20 09:10:09 -08:00
default :
DISABLE ;
2014-12-03 22:42:33 +01:00
}
2014-11-29 11:37:45 +01:00
}
if ( ( ( op > > 16 ) & 31 ) = = 7 ) { // vavg
2017-07-05 14:04:19 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & vavg_table [ n - 1 ] ) ) ;
MULSS ( fpr . VSX ( dregs ) , MatR ( TEMPREG ) ) ;
2014-11-29 11:37:45 +01:00
}
ApplyPrefixD ( dregs , V_Single ) ;
fpr . ReleaseSpillLocks ( ) ;
return ;
}
2014-11-08 09:39:55 -08:00
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( sregs , sz , 0 ) ;
fpr . SimpleRegsV ( dregs , V_Single , MAP_NOINIT | MAP_DIRTY ) ;
2014-11-08 09:39:55 -08:00
X64Reg reg = XMM0 ;
2015-01-03 10:39:26 -08:00
if ( IsOverlapSafe ( dregs [ 0 ] , 0 , n , sregs ) ) {
2014-11-29 00:14:08 -08:00
fpr . MapRegV ( dregs [ 0 ] , dregs [ 0 ] = = sregs [ 0 ] ? MAP_DIRTY : MAP_NOINIT ) ;
2014-11-08 09:39:55 -08:00
fpr . SpillLockV ( dregs [ 0 ] ) ;
reg = fpr . VX ( dregs [ 0 ] ) ;
}
2015-01-03 10:39:26 -08:00
// We have to start zt +0.000 in case any values are -0.000.
XORPS ( reg , R ( reg ) ) ;
for ( int i = 0 ; i < n ; + + i ) {
2014-11-08 09:39:55 -08:00
ADDSS ( reg , fpr . V ( sregs [ i ] ) ) ;
}
2013-11-19 15:29:36 +01:00
switch ( ( op > > 16 ) & 31 ) {
case 6 : // vfad
break ;
case 7 : // vavg
2017-07-05 14:04:19 +02:00
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & vavg_table [ n - 1 ] ) ) ;
MULSS ( reg , MatR ( TEMPREG ) ) ;
2013-11-19 15:29:36 +01:00
break ;
}
2014-11-08 09:39:55 -08:00
if ( reg = = XMM0 ) {
MOVSS ( fpr . V ( dregs [ 0 ] ) , XMM0 ) ;
}
ApplyPrefixD ( dregs , V_Single ) ;
fpr . ReleaseSpillLocks ( ) ;
2013-04-27 19:35:42 +02:00
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Viim ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-07-31 17:26:14 +02:00
2013-07-31 22:29:16 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2013-07-31 17:26:14 +02:00
u8 dreg ;
GetVectorRegs ( & dreg , V_Single , _VT ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( & dreg , V_Single , MAP_NOINIT | MAP_DIRTY ) ;
2021-01-31 08:39:21 -08:00
s32 imm = SignExtend16ToS32 ( op ) ;
2013-07-31 18:21:23 +02:00
FP32 fp ;
fp . f = ( float ) imm ;
2014-11-16 17:38:26 +01:00
MOV ( 32 , R ( TEMPREG ) , Imm32 ( fp . u ) ) ;
2013-07-31 17:26:14 +02:00
fpr . MapRegV ( dreg , MAP_DIRTY | MAP_NOINIT ) ;
2014-11-16 17:38:26 +01:00
MOVD_xmm ( fpr . VX ( dreg ) , R ( TEMPREG ) ) ;
2013-07-31 17:26:14 +02:00
ApplyPrefixD ( & dreg , V_Single ) ;
fpr . ReleaseSpillLocks ( ) ;
2013-07-29 22:35:06 +02:00
}
2013-08-24 14:43:49 -07:00
void Jit : : Comp_Vfim ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_XFER ) ;
2013-07-30 22:25:08 +02:00
2013-07-31 22:29:16 +02:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2013-07-30 22:25:08 +02:00
u8 dreg ;
GetVectorRegs ( & dreg , V_Single , _VT ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( & dreg , V_Single , MAP_NOINIT | MAP_DIRTY ) ;
2013-07-30 22:25:08 +02:00
FP16 half ;
half . u = op & 0xFFFF ;
FP32 fval = half_to_float_fast5 ( half ) ;
2014-11-16 17:38:26 +01:00
MOV ( 32 , R ( TEMPREG ) , Imm32 ( fval . u ) ) ;
2013-07-30 22:25:08 +02:00
fpr . MapRegV ( dreg , MAP_DIRTY | MAP_NOINIT ) ;
2014-11-16 17:38:26 +01:00
MOVD_xmm ( fpr . VX ( dreg ) , R ( TEMPREG ) ) ;
2013-07-30 22:25:08 +02:00
ApplyPrefixD ( & dreg , V_Single ) ;
fpr . ReleaseSpillLocks ( ) ;
2013-07-29 22:35:06 +02:00
}
2014-11-30 11:04:13 +01:00
void Jit : : CompVrotShuffle ( u8 * dregs , int imm , int n , bool negSin ) {
char what [ 4 ] = { ' 0 ' , ' 0 ' , ' 0 ' , ' 0 ' } ;
if ( ( ( imm > > 2 ) & 3 ) = = ( imm & 3 ) ) {
for ( int i = 0 ; i < 4 ; i + + )
what [ i ] = ' S ' ;
}
what [ ( imm > > 2 ) & 3 ] = ' S ' ;
what [ imm & 3 ] = ' C ' ;
// TODO: shufps SIMD version
for ( int i = 0 ; i < n ; i + + ) {
fpr . MapRegV ( dregs [ i ] , MAP_DIRTY | MAP_NOINIT ) ;
switch ( what [ i ] ) {
case ' C ' : MOVSS ( fpr . V ( dregs [ i ] ) , XMM1 ) ; break ;
case ' S ' :
MOVSS ( fpr . V ( dregs [ i ] ) , XMM0 ) ;
if ( negSin ) {
2017-07-07 14:50:50 +02:00
if ( RipAccessible ( & signBitLower ) ) {
XORPS ( fpr . VX ( dregs [ i ] ) , M ( & signBitLower ) ) ; // rip accessible
} else {
MOV ( PTRBITS , R ( TEMPREG ) , ImmPtr ( & signBitLower ) ) ;
XORPS ( fpr . VX ( dregs [ i ] ) , MatR ( TEMPREG ) ) ;
}
2014-11-30 11:04:13 +01:00
}
break ;
case ' 0 ' :
{
XORPS ( fpr . VX ( dregs [ i ] ) , fpr . V ( dregs [ i ] ) ) ;
break ;
}
default :
ERROR_LOG ( JIT , " Bad what in vrot " ) ;
break ;
}
}
}
2013-07-28 18:21:16 +02:00
// Very heavily used by FF:CC
2013-08-24 14:43:49 -07:00
void Jit : : Comp_VRot ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2016-05-15 14:03:45 -07:00
if ( js . HasUnknownPrefix ( ) ) {
DISABLE ;
}
if ( ! js . HasNoPrefix ( ) ) {
// Prefixes work strangely for this, see IRCompVFPU.
WARN_LOG_REPORT ( JIT , " vrot instruction using prefixes at %08x " , GetCompilerPC ( ) ) ;
DISABLE ;
}
2013-07-28 18:21:16 +02:00
int vd = _VD ;
int vs = _VS ;
VectorSize sz = GetVecSize ( op ) ;
int n = GetNumVectorElements ( sz ) ;
u8 dregs [ 4 ] ;
2014-11-30 11:04:13 +01:00
u8 dregs2 [ 4 ] ;
2021-04-10 09:20:06 -07:00
MIPSOpcode nextOp = GetOffsetInstruction ( 1 ) ;
2014-11-30 11:04:13 +01:00
int vd2 = - 1 ;
int imm2 = - 1 ;
2014-12-03 22:42:33 +01:00
if ( ( nextOp > > 26 ) = = 60 & & ( ( nextOp > > 21 ) & 0x1F ) = = 29 & & _VS = = MIPS_GET_VS ( nextOp ) ) {
2015-01-01 12:42:38 +01:00
// Pair of vrot with the same angle argument. Let's join them (can share sin/cos results).
2014-11-30 11:04:13 +01:00
vd2 = MIPS_GET_VD ( nextOp ) ;
imm2 = ( nextOp > > 16 ) & 0x1f ;
2014-12-03 22:42:33 +01:00
// NOTICE_LOG(JIT, "Joint VFPU at %08x", js.blockStart);
2014-11-30 11:04:13 +01:00
}
2013-07-28 18:21:16 +02:00
u8 sreg ;
GetVectorRegs ( dregs , sz , vd ) ;
2014-11-30 11:04:13 +01:00
if ( vd2 > = 0 )
GetVectorRegs ( dregs2 , sz , vd2 ) ;
2013-07-28 18:21:16 +02:00
GetVectorRegs ( & sreg , V_Single , vs ) ;
2014-11-10 21:58:08 -08:00
// Flush SIMD.
fpr . SimpleRegsV ( & sreg , V_Single , 0 ) ;
2013-07-28 18:21:16 +02:00
int imm = ( op > > 16 ) & 0x1f ;
2013-07-28 22:21:28 +02:00
gpr . FlushBeforeCall ( ) ;
2013-07-28 18:21:16 +02:00
fpr . Flush ( ) ;
2014-11-30 11:04:13 +01:00
bool negSin1 = ( imm & 0x10 ) ? true : false ;
2013-07-28 18:21:16 +02:00
2021-03-02 21:49:21 -08:00
# if PPSSPP_ARCH(AMD64)
2017-07-05 16:44:22 +02:00
# ifdef _WIN32
LEA ( 64 , RDX , MIPSSTATE_VAR ( sincostemp ) ) ;
# else
2017-07-07 14:22:35 +02:00
LEA ( 64 , RDI , MIPSSTATE_VAR ( sincostemp ) ) ;
2017-07-05 16:44:22 +02:00
# endif
2013-07-28 18:21:16 +02:00
MOVSS ( XMM0 , fpr . V ( sreg ) ) ;
2014-11-30 11:04:13 +01:00
ABI_CallFunction ( negSin1 ? ( const void * ) & SinCosNegSin : ( const void * ) & SinCos ) ;
2013-07-28 22:20:32 +02:00
# else
2013-11-03 07:36:53 -08:00
// Sigh, passing floats with cdecl isn't pretty, ends up on the stack.
2017-07-05 16:44:22 +02:00
ABI_CallFunctionAC ( negSin1 ? ( const void * ) & SinCosNegSin : ( const void * ) & SinCos , fpr . V ( sreg ) , ( uintptr_t ) mips_ - > sincostemp ) ;
2013-07-28 22:20:32 +02:00
# endif
2017-07-05 16:44:22 +02:00
MOVSS ( XMM0 , MIPSSTATE_VAR ( sincostemp [ 0 ] ) ) ;
MOVSS ( XMM1 , MIPSSTATE_VAR ( sincostemp [ 1 ] ) ) ;
2013-07-28 18:21:16 +02:00
2014-11-30 11:04:13 +01:00
CompVrotShuffle ( dregs , imm , n , false ) ;
if ( vd2 ! = - 1 ) {
// If the negsin setting differs between the two joint invocations, we need to flip the second one.
bool negSin2 = ( imm2 & 0x10 ) ? true : false ;
CompVrotShuffle ( dregs2 , imm2 , n , negSin1 ! = negSin2 ) ;
2021-04-10 09:20:06 -07:00
EatInstruction ( nextOp ) ;
2013-07-28 18:21:16 +02:00
}
fpr . ReleaseSpillLocks ( ) ;
2013-07-28 16:13:19 +02:00
}
2013-07-28 18:21:16 +02:00
2014-11-30 11:04:13 +01:00
void Jit : : Comp_ColorConv ( MIPSOpcode op ) {
2019-02-03 14:01:51 -08:00
CONDITIONAL_DISABLE ( VFPU_VEC ) ;
2016-05-15 13:16:03 -07:00
if ( js . HasUnknownPrefix ( ) )
DISABLE ;
2014-12-07 14:12:13 +01:00
int vd = _VD ;
int vs = _VS ;
2014-11-30 11:04:13 +01:00
DISABLE ;
2014-12-07 14:12:13 +01:00
#if 0
VectorSize sz = V_Quad ;
int n = GetNumVectorElements ( sz ) ;
switch ( ( op > > 16 ) & 3 ) {
case 1 :
break ;
default :
DISABLE ;
}
u8 sregs [ 4 ] ;
u8 dregs [ 1 ] ;
2016-05-15 13:16:03 -07:00
// WARNING: Prefixes.
2014-12-07 14:12:13 +01:00
GetVectorRegs ( sregs , sz , vs ) ;
GetVectorRegs ( dregs , V_Pair , vd ) ;
if ( fpr . TryMapDirtyInVS ( dregs , V_Single , sregs , sz ) ) {
switch ( ( op > > 16 ) & 3 ) {
case 1 : // 4444
{
//int a = ((in >> 24) & 0xFF) >> 4;
//int b = ((in >> 16) & 0xFF) >> 4;
//int g = ((in >> 8) & 0xFF) >> 4;
//int r = ((in)& 0xFF) >> 4;
//col = (a << 12) | (b << 8) | (g << 4) | (r);
//PACKUSW
break ;
}
case 2 : // 5551
{
//int a = ((in >> 24) & 0xFF) >> 7;
//int b = ((in >> 16) & 0xFF) >> 3;
//int g = ((in >> 8) & 0xFF) >> 3;
//int r = ((in)& 0xFF) >> 3;
//col = (a << 15) | (b << 10) | (g << 5) | (r);
break ;
}
case 3 : // 565
{
//int b = ((in >> 16) & 0xFF) >> 3;
//int g = ((in >> 8) & 0xFF) >> 2;
//int r = ((in)& 0xFF) >> 3;
//col = (b << 11) | (g << 5) | (r);
break ;
}
}
DISABLE ;
// Flush SIMD.
fpr . SimpleRegsV ( & sreg , V_Pair , MAP_NOINIT | MAP_DIRTY ) ;
fpr . SimpleRegsV ( & dreg , V_Pair , MAP_NOINIT | MAP_DIRTY ) ;
# endif
2014-11-30 11:04:13 +01:00
2013-02-19 00:50:33 +01:00
}
2014-12-07 14:12:13 +01:00
}
2016-10-12 17:32:52 +02:00
# endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)