mirror of
https://github.com/HackerN64/F3DEX3.git
synced 2026-01-21 10:37:45 -08:00
331 lines
16 KiB
ArmAsm
331 lines
16 KiB
ArmAsm
.include "rsp/lighting/ltadv_regs.inc"
|
|
|
|
ltadv_spec_fres_setup: // Odd instruction
|
|
// Get aDIR = normalize(camera - vertex), aDOT = (vpWNrm dot aDIR)
|
|
ldv aDPosI[0], (cameraWorldPos - altBase)(altBaseReg) // Camera world pos
|
|
j ltadv_normal_to_vertex
|
|
ldv aDPosI[8], (cameraWorldPos - altBase)(altBaseReg)
|
|
// nop; nop
|
|
ltadv_after_camera:
|
|
// vnop; vnop
|
|
vmov aOAFrs[0], aDOT[0] // Save Fresnel dot product in aOAFrs[0h]
|
|
vmov aOAFrs[4], aDOT[4] // elems 0, 4
|
|
bgez laSpecular, ltadv_loop // Sign bit clear = not specular
|
|
li laSpecFres, 0 // Clear flag for specular or fresnel
|
|
// aProj <- aLenF
|
|
vmulf aProj, vpWNrm, aDOT[0h] // Projection of camera vec onto normal
|
|
vmudh $v29, aDIR, $v31[1] // -camera vec
|
|
j ltadv_normals_to_regs // For specular, replace vpWNrm with reflected vector
|
|
// vnop; vnop
|
|
vmadh vpWNrm, aProj, $v31[3] // + 2 * projection
|
|
// vnop; vnop
|
|
// aDPosI <- aProj
|
|
|
|
ltadv_xfrm: // Even instruction
|
|
vmudn $v29, vMTX0F, vpMdl[0h]
|
|
lbu curLight, numLightsxSize // Scalar instructions here must be OK to do twice
|
|
vmadh $v29, vMTX0I, vpMdl[0h]
|
|
luv vpRGBA, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2:1 RGBA
|
|
vmadn $v29, vMTX1F, vpMdl[1h]
|
|
vmadh $v29, vMTX1I, vpMdl[1h]
|
|
addi curLight, curLight, altBase // Point to ambient light
|
|
vmadn aDPosF, vMTX2F, vpMdl[2h]
|
|
jr $ra
|
|
vmadh aDPosI, vMTX2I, vpMdl[2h]
|
|
|
|
ltadv_after_mtx: // Even instruction
|
|
move laPtr, inVtx
|
|
vcopy aPNScl, vOne
|
|
move laVtxLeft, vtxLeft
|
|
vmudn aDPosF, vMTX1F, $v31[7] // 0x7FFF; transform a normal (0, 7FFF, 0)
|
|
// 0001 00[20 0800 XX]01 = (1<<0),(1<<5),(1<<11),XX, repeat
|
|
llv aPNScl[3], (packedNormalsConstants - altBase)(altBaseReg)
|
|
vmadh aDPosI, vMTX1I, $v31[7]
|
|
j ltadv_normalize
|
|
llv aPNScl[11], (packedNormalsConstants - altBase)(altBaseReg)
|
|
ltadv_continue_setup:
|
|
lqv aParam, (fxParams - altBase)(altBaseReg)
|
|
vcopy aNrmSc, aRcpLn // aRcpLn[0:1] is int:frac scale (1 / length)
|
|
lsv aPNScl[6], (packedNormalsMaskConstant - altBase)(altBaseReg) // F800
|
|
vge $v29, $v31, $v31[3] // Set VCC to 00011111
|
|
andi $11, vGeomMid, G_AMBOCCLUSION >> 8
|
|
bnez $11, @@skip_zero_ao
|
|
andi laL2A, vGeomMid, G_LIGHTTOALPHA >> 8
|
|
vmrg aParam, aParam, $v31[2] // 0
|
|
@@skip_zero_ao:
|
|
jal while_wait_dma_busy
|
|
andi laTexgen, vGeomMid, G_TEXTURE_GEN >> 8
|
|
ldv vpMdl[0], (VTX_IN_OB + 1 * inputVtxSize)(laPtr) // Vtx 2 Model pos + PN
|
|
ldv vpMdl[8], (VTX_IN_OB + 0 * inputVtxSize)(laPtr) // Vtx 1 Model pos + PN
|
|
align_with_warning 8, "One instruction of padding before ltadv_vtx_loop"
|
|
ltadv_vtx_loop: // Even instruction
|
|
vmudm $v29, aPNScl, vpMdl[3h] // Packed normals from elem 3,7 of model pos
|
|
lw $11, (VTX_IN_CN + 1 * inputVtxSize)(laPtr) // Vtx 2 RGBA
|
|
vmadn vpNrmlY, $v31, $v31[2] // 0; load lower (vpMdl unsigned but must be T operand)
|
|
lw laSTKept,(VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 1 ST
|
|
vand vpNrmlX, vpMdl, aPNScl[3] // 0xF800; X component masked in elem 3, 7
|
|
jal ltadv_xfrm
|
|
sw $11, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2 RGBA -> Vtx 1 ST
|
|
vmadn vpWrlF, vMTX3F, vOne // Finish vertex pos transform
|
|
vmadh vpWrlI, vMTX3I, vOne
|
|
andi laPacked, vGeomMid, G_PACKED_NORMALS >> 8
|
|
// aOAFrs <- vpST
|
|
vsub aOAFrs, vpRGBA, $v31[7] // 0x7FFF; offset alpha elems 3, 7
|
|
luv vpLtTot, (ltBufOfs + 0)(curLight) // Total light level, init to ambient
|
|
vne $v29, $v31, $v31[0h] // Set VCC to 01110111
|
|
beqz laPacked, @@skip_packed_normals
|
|
lpv vpMdl, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2:1 regular normals
|
|
vmrg vpMdl, vpNrmlY, vpNrmlX[3h] // Masked X to 0, 4; multiplied Y, Z in 1, 2, 5, 6
|
|
@@skip_packed_normals:
|
|
vmudh $v29, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15)
|
|
jal ltadv_xfrm
|
|
// aAOF2 <- aDOT
|
|
vmadm aAOF2, aOAFrs, aParam[0] // + (alpha - 1) * aoAmb factor; elems 3, 7
|
|
// aLTC <- vpMdl
|
|
vmulf vpLtTot, vpLtTot, aAOF2[3h] // light color *= ambient factor
|
|
// aDOT <- aAOF2
|
|
vmudn $v29, aDPosF, aNrmSc[0h] // Vec frac * int scaling, discard result
|
|
// aDIR <- aDPosF
|
|
addi laPtr, laPtr, 2 * inputVtxSize
|
|
vmadm $v29, aDPosI, aNrmSc[1h] // Vec int * frac scaling, discard result
|
|
addi laVtxLeft, laVtxLeft, -2 * inputVtxSize
|
|
// vpWNrm <- vpNrmlX
|
|
vmadh vpWNrm, aDPosI, aNrmSc[0h] // Vec int * int scaling
|
|
sll laSpecular, vGeomMid, (31 - 5) // G_LIGHTING_SPECULAR to sign bit
|
|
vmudn vpWrlF, vpWrlF, $v31[1] // -1; negate world pos so add light/cam pos to it
|
|
andi laSpecFres, vGeomMid, (G_LIGHTING_SPECULAR | G_FRESNEL_COLOR | G_FRESNEL_ALPHA) >> 8
|
|
vmadh vpWrlI, vpWrlI, $v31[1] // -1
|
|
|
|
.if CFG_DEBUG_NORMALS
|
|
vmudh $v29, vOne, $v31[5] // 0x4000; middle gray
|
|
li laTexgen, 0
|
|
vmacf vpRGBA, vpWNrm, $v31[5] // 0x4000; + 0.5 * normal
|
|
ltadv_finish_light:
|
|
ltadv_loop:
|
|
ltadv_normals_to_regs:
|
|
ltadv_specular:
|
|
.else
|
|
|
|
ltadv_normals_to_regs:
|
|
vmudh vpNrmlY, vOne, vpWNrm[1h] // Move normals to separate registers
|
|
bnez laSpecFres, ltadv_spec_fres_setup
|
|
vmudh vpNrmlZ, vOne, vpWNrm[2h] // per component, in elems 0-3, 4-7
|
|
// vpNrmlX <- vpWNrm
|
|
// aAOF <- aDPosI
|
|
align_with_warning 8, "One instruction of padding before ltadv_loop"
|
|
ltadv_loop: // Even instruction
|
|
vmudh $v29, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15)
|
|
lbu $11, (ltBufOfs + 3 - lightSize)(curLight) // Light type / constant attenuation
|
|
vmadm aAOF, aOAFrs, aParam[1] // + (alpha - 1) * aoDir factor; elems 3, 7
|
|
beq curLight, altBaseReg, ltadv_post
|
|
lpv aDOT[0], (ltBufOfs + 8 - lightSize)(curLight) // Light or lookat 0 dir in elems 0-2
|
|
bnez $11, ltadv_point
|
|
luv aLTC, (ltBufOfs + 0 - lightSize)(curLight) // Light color
|
|
// vnop
|
|
vmulf $v29, vpNrmlX, aDOT[0]
|
|
vmacf $v29, vpNrmlY, aDOT[1]
|
|
bltzal laSpecular, ltadv_specular
|
|
vmacf aDOT, vpNrmlZ, aDOT[2]
|
|
// vnop; vnop
|
|
ltadv_finish_light:
|
|
vmulf aLTC, aLTC, aAOF[3h] // light color *= dir or point light factor
|
|
vge aDOT, aDOT, $v31[2] // 0; clamp dot product to >= 0
|
|
addi curLight, curLight, -lightSize
|
|
vmudh $v29, vOne, vpLtTot // Load accum mid with current light level
|
|
j ltadv_loop
|
|
// vnop; vnop
|
|
vmacf vpLtTot, aLTC, aDOT[0h] // + light color * dot product
|
|
|
|
ltadv_specular: // aDOT in/out, uses vpLtTot[3] and $11 as temps
|
|
lb $11, (ltBufOfs + 0xF - lightSize)(curLight) // Light size factor
|
|
// nop; nop
|
|
mtc2 $11, vpLtTot[6] // Light size factor in elem 3 as temp
|
|
vxor aDOT, aDOT, $v31[7] // = 0x7FFF - dot product
|
|
// vnop; vnop; vnop
|
|
vmudh aDOT, aDOT, vpLtTot[3] // * size factor
|
|
jr $ra
|
|
// vnop; vnop; vnop
|
|
vxor aDOT, aDOT, $v31[7] // = 0x7FFF - result
|
|
// land then one vnop before vmulf; replaces two vnops if not specular
|
|
|
|
.align 8
|
|
ltadv_post:
|
|
// aClOut <- vpWrlF
|
|
// aAlOut <- vpWrlI
|
|
// vpMdl <- aLTC
|
|
vge aAOF, vpLtTot, vpLtTot[1h] // elem 0 = max(R0, G0); elem 4 = max(R1, G1)
|
|
ldv vpMdl[0], (VTX_IN_OB + 1 * inputVtxSize)(laPtr) // Vtx 2 Model pos + PN
|
|
vmulf aClOut, vpRGBA, vpLtTot // RGB output is RGB * light
|
|
beqz laL2A, @@skip_cel
|
|
vcopy aAlOut, vpRGBA // Alpha output = vertex alpha (only 3, 7 matter)
|
|
// Cel: alpha = max of light components, RGB = vertex color
|
|
vge aAOF, aAOF, aAOF[2h] // elem 0 = max(R0, G0, B0); equiv for elem 4
|
|
vcopy aClOut, vpRGBA // RGB output is vertex color
|
|
vmudh aAlOut, vOne, aAOF[0h] // move light level elem 0, 4 to 3, 7
|
|
@@skip_cel:
|
|
vne $v29, $v31, $v31[3h] // Set VCC to 11101110
|
|
bnez laPacked, @@skip_novtxcolor
|
|
andi $11, vGeomMid, (G_FRESNEL_COLOR | G_FRESNEL_ALPHA) >> 8
|
|
vcopy aClOut, vpLtTot // If no packed normals, base output is just light
|
|
@@skip_novtxcolor:
|
|
vmrg vpRGBA, aClOut, aAlOut // Merge base output and alpha output
|
|
beqz $11, ltadv_skip_fresnel
|
|
ldv vpMdl[8], (VTX_IN_OB + 0 * inputVtxSize)(laPtr) // Vtx 1 Model pos + PN
|
|
lsv aAOF[0], (vTRC_0100_addr - altBase)(altBaseReg) // Load constant 0x0100 to temp
|
|
vabs aOAFrs, aOAFrs, aOAFrs // Fresnel dot in aOAFrs[0h]; absolute value for underwater
|
|
andi $11, vGeomMid, G_FRESNEL_COLOR >> 8
|
|
vmudh $v29, vOne, aParam[7] // Fresnel offset
|
|
// vnop; vnop
|
|
vmacu aOAFrs, aOAFrs, aParam[6] // + factor * scale, clamp to >= 0.
|
|
beqz $11, @@skip // vmacu bad oflow bhv @ 7FFF is OK b/c here max values should be about 0200.
|
|
// vnop; vnop; vnop
|
|
vmudh aOAFrs, aOAFrs, aAOF[0] // Result * 0x0100, clamped to 0x7FFF
|
|
veq $v29, $v31, $v31[3h] // Set VCC to 00010001 if G_FRESNEL_COLOR
|
|
@@skip:
|
|
// vnop; vnop
|
|
vmrg vpRGBA, vpRGBA, aOAFrs[0h] // Replace color or alpha with fresnel
|
|
// vnop; vnop
|
|
|
|
.endif // CFG_DEBUG_NORMALS
|
|
|
|
ltadv_skip_fresnel:
|
|
beqz laTexgen, ltadv_after_texgen
|
|
suv vpRGBA, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Vtx 2:1 RGBA
|
|
// Texgen: aDOT still contains lookat 0 in elems 0-2, lookat 1 in elems 4-6
|
|
// vpST <- aOAFrs
|
|
texgen_dots aDOT, aLkDt0, aLkDt1
|
|
texgen_body aDOT, aLkDt0, aLkDt1, 0h, ltadv_texgen_end
|
|
texgen_lastinstr aLkDt0, aLkDt1
|
|
ltadv_texgen_end: // Vtx 2 ST in vpST elem 0, 1; vtx 1 ST in vpST elem 4, 5
|
|
slv vpST[8], (tempVtx1ST)(rdpCmdBufEndP1) // Vtx 1 ST
|
|
bltz laVtxLeft, ltadv_after_texgen // Only vtx 1 is valid, don't write vtx 2
|
|
lw laSTKept, (tempVtx1ST)(rdpCmdBufEndP1) // Overwrite stored Vtx 1 ST
|
|
slv vpST[0], (VTX_IN_TC - 1 * inputVtxSize)(laPtr) // Vtx 2 ST
|
|
ltadv_after_texgen:
|
|
lw $11, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Vtx 2 RGBA from vtx 1 ST slot
|
|
bltz laVtxLeft, vtx_setup_no_lighting
|
|
sw laSTKept, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Restore vtx 1 ST
|
|
ltadv_vtx_loop_end:
|
|
bgtz laVtxLeft, ltadv_vtx_loop
|
|
sw $11, (VTX_IN_CN - 1 * inputVtxSize)(laPtr) // Real vtx 2 RGBA
|
|
j vtx_setup_no_lighting
|
|
// Delay slot is OK
|
|
|
|
ltadv_point:
|
|
/*
|
|
Input vector 1 elem size 7FFF.0000 -> len^2 3FFF0001 -> 1/len 0001.0040 -> vec +801E.FFC0 -> clamped 7FFF
|
|
len^2 * 1/len = 400E.FFC1 so about half actual length
|
|
Input vector 1 elem size 0100.0000 -> len^2 00010000 -> 1/len 007F.FFC0 -> vec 7FFF.C000 -> clamped 7FFF
|
|
len^2 * 1/len = 007F.FFC0 so about half actual length
|
|
Input vector 1 elem size 0010.0000 -> len^2 00000100 -> 1/len 07FF.FC00 -> vec 7FFF.C000
|
|
Input vector 1 elem size 0001.0000 -> len^2 00000001 -> 1/len 7FFF.C000 -> vec 7FFF.C000
|
|
*/
|
|
// aDPosI <- aAOF
|
|
ldv aDPosI[0], (ltBufOfs + 8 - lightSize)(curLight) // Light position int part 0-3
|
|
ldv aDPosI[8], (ltBufOfs + 8 - lightSize)(curLight) // 4-7
|
|
lbu $10, (ltBufOfs + 7 - lightSize)(curLight) // PL: Linear factor
|
|
// vnop; vnop
|
|
lbu $24, (ltBufOfs + 0xE - lightSize)(curLight) // PL: Quadratic factor
|
|
ltadv_normal_to_vertex:
|
|
vadd aDPosI, aDPosI, vpWrlI // Not using aDPosF; frac part is just vpWrlF
|
|
// vnop; vnop; vnop
|
|
ltadv_normalize: // Normalize vector in aDPosI:vpWrlF i/f
|
|
vmudm $v29, aDPosI, vpWrlF // Squared. Don't care about frac*frac term
|
|
sll $11, $11, 8 // Constant factor, 00000100 - 0000FF00
|
|
vmadn $v29, vpWrlF, aDPosI
|
|
sll $10, $10, 6 // Linear factor, 00000040 - 00003FC0
|
|
vmadh $v29, aDPosI, aDPosI
|
|
mtc2 $11, aNrmSc[4] // Constant frac part in elem 2
|
|
// aLen2F <- aLTC
|
|
vreadacc aLen2F, ACC_MIDDLE
|
|
mtc2 $10, aNrmSc[6] // Linear frac part in elem 3
|
|
vreadacc aLen2I, ACC_UPPER
|
|
srl $11, $24, 5 // Top 3 bits
|
|
// vnop; vnop
|
|
vmudm $v29, vOne, aLen2F[2h] // Sum of squared components
|
|
andi $10, $24, 0x1F // Bottom 5 bits
|
|
vmadh $v29, vOne, aLen2I[2h]
|
|
ori $10, $10, 0x20 // Append leading 1 to mantissa
|
|
vmadm $v29, vOne, aLen2F[1h]
|
|
sllv $10, $10, $11 // Left shift to create floating point
|
|
vmadh $v29, vOne, aLen2I[1h]
|
|
sll $10, $10, 8 // Min range 00002000, 00002100... 00003F00, max 00100000...001F8000
|
|
vmadn aLen2F, aLen2F, vOne // elem 0; swapped so we can do vmadn and get result
|
|
bnez $24, @@skip // If original value is zero, set to zero
|
|
vmadh aLen2I, aLen2I, vOne
|
|
li $10, 0
|
|
@@skip:
|
|
// vnop; vnop
|
|
// aRcpLn <- $v29
|
|
vrsqh aRcpLn[2], aLen2I[0] // High input, garbage output
|
|
vrsql aRcpLn[1], aLen2F[0] // Low input, low output
|
|
mtc2 $10, aNrmSc[12] // Quadratic frac part in elem 6
|
|
vrsqh aRcpLn[0], aLen2I[4] // High input, high output
|
|
srl $10, $10, 16
|
|
vrsql aRcpLn[5], aLen2F[4] // Low input, low output
|
|
beq laPtr, inVtx, ltadv_continue_setup // Return aRcpLn; cond works only iter 0
|
|
vrsqh aRcpLn[4], $v31[2] // 0 input, high output
|
|
// vnop; vnop; vnop
|
|
vmudn aDIR, vpWrlF, aRcpLn[0h] // Vec frac * int scaling, discard result
|
|
mtc2 $10, aNrmSc[14] // Quadratic int part in elem 7
|
|
vmadm aDIR, aDPosI, aRcpLn[1h] // Vec int * frac scaling, discard result
|
|
vmadh aDIR, aDPosI, aRcpLn[0h] // Vec int * int scaling
|
|
// aLenF <- aDPosI
|
|
vmudm aLenF, aLen2I, aRcpLn[1h] // len^2 int * 1/len frac; ignoring frac*frac
|
|
vmadn aLenF, aLen2F, aRcpLn[0h] // len^2 frac * 1/len int = len frac
|
|
// aLenI <- aRcpLn
|
|
vmadh aLenI, aLen2I, aRcpLn[0h] // len^2 int * 1/len int = len int
|
|
vmulf aDOT, vpNrmlX, aDIR[0h] // Normalized light dir * normalized normals
|
|
vmacf aDOT, vpNrmlY, aDIR[1h]
|
|
bnez laSpecFres, ltadv_after_camera // Return if initial spec/fres; returns aDOT, aDIR
|
|
vmacf aDOT, vpNrmlZ, aDIR[2h]
|
|
// $v29 <- aLenI
|
|
vmudm $v29, aLenI, aNrmSc[3] // len int * linear factor frac
|
|
vmadl $v29, aLenF, aNrmSc[3] // + len frac * linear factor frac
|
|
vmadm $v29, vOne, aNrmSc[2] // + 1 * constant factor frac
|
|
vmadl $v29, aLen2F, aNrmSc[6] // + len^2 frac * quadratic factor frac
|
|
vmadm $v29, aLen2I, aNrmSc[6] // + len^2 int * quadratic factor frac
|
|
// aPLFcF <- aLen2F
|
|
vmadn aPLFcF, aLen2F, aNrmSc[7] // + len^2 frac * quadratic factor int = aPLFcF frac
|
|
bltzal laSpecular, ltadv_specular
|
|
// aPLFcI <- aLen2I
|
|
vmadh aPLFcI, aLen2I, aNrmSc[7] // + len^2 int * quadratic factor int = aLen2I int
|
|
// aAOF <- aLenF
|
|
vmudh aAOF, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15)
|
|
vmadm aAOF, aOAFrs, aParam[2] // + (alpha - 1) * aoPoint factor; elems 3, 7
|
|
// vnop
|
|
// aDotSc <- aDIR
|
|
vrcph aDotSc[1], aPLFcI[0] // 1/(2*light factor), input of 0000.8000 -> no change normals
|
|
vrcpl aDotSc[2], aPLFcF[0] // Light factor 0001.0000 -> normals /= 2
|
|
vrcph aDotSc[3], aPLFcI[4] // Light factor 0000.1000 -> normals *= 8 (with clamping)
|
|
// aLen2I <- aPLFcI
|
|
vrcpl aDotSc[6], aPLFcF[4] // Light factor 0010.0000 -> normals /= 32
|
|
vrcph aDotSc[7], $v31[2] // 0
|
|
// aLTC <- aPLFcF
|
|
luv aLTC, (ltBufOfs + 0 - lightSize)(curLight) // aLTC = light color
|
|
// vnop; vnop; vnop
|
|
// This is a scale on the dot product, not the light, because the scale can
|
|
// increase a small dot product (close to perpendicular), while it can't
|
|
// increase a light beyond white.
|
|
vmudm $v29, aDOT, aDotSc[2h] // Dot product int * scale frac
|
|
j ltadv_finish_light // Returns aLTC, aAOF, aDOT
|
|
vmadh aDOT, aDOT, aDotSc[3h] // Dot product int * scale int, clamp to 0x7FFF
|
|
// vnop
|
|
// aDIR <- aDotSc
|
|
|
|
/*
|
|
ltadv per vertex pair up to light loop: 36
|
|
ltadv per vertex pair last loop iter: 4
|
|
ltadv per vertex pair after to next vtx pair, no packed normals: 23
|
|
total ltadv per vertex pair: 63
|
|
light loop directional: 18
|
|
light loop point through jump: 6
|
|
point: 64
|
|
light loop point after return: 7
|
|
total point: 77
|
|
*/
|
|
|
|
ovl4_end:
|
|
.align 8
|
|
ovl4_padded_end:
|