From 274509f103afcf1b5851be21506db5a5d85b9545 Mon Sep 17 00:00:00 2001
From: Sauraen <sauraen@gmail.com>
Date: Sun, 22 Jun 2025 17:35:29 -0700
Subject: [PATCH] Implemented texgen in ltadv

---
 f3dex3.s | 108 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 65 insertions(+), 43 deletions(-)

diff --git a/f3dex3.s b/f3dex3.s
index 559bbbf..21d5dad 100644
--- a/f3dex3.s
+++ b/f3dex3.s
@@ -967,14 +967,16 @@ vpWrlF equ $v13 // vertex pair World position Frac part
 vpWrlI equ $v14 // vertex pair World position Int part
 aDPosF equ $v15 // ltadv Delta Position Frac part
 aDPosI equ $v16 // ltadv Delta Position Int part
-aOffsA equ $v17 // ltadv Offset Alpha
+aOAFrs equ $v17 // ltadv Offset Alpha (elem 3,7) and Fresnel (elem 0,4)
 // Uses vpRGBA, vpLtTot, vpNrmlX, vpNrmlY, vpNrmlZ = $v18, $v19, $v20, $v21, $v22
 aParam equ $v23 // ltadv Parameters = AO, texgen, and Fresnel params
 
 aAOF2  equ aLen2F // Version of aAOF in init, can't be aDPosI/F or vpMdl there
-aPLFcF equ aLen2F // Point Light Factor Frac part
+aPLFcF equ aLen2F // ltadv Point Light Factor Frac part
 aLTC   equ aLen2F // ltadv Light Color
-aPLFcI equ aLen2I // Point Light Factor Int part
+aLkDt0 equ aLen2F // ltadv Lookat Dot product 0 for texgen
+aPLFcI equ aLen2I // ltadv Point Light Factor Int part
+aLkDt1 equ aLen2I // ltadv Lookat Dot product 1 for texgen
 aDOT   equ vpMdl  // ltadv Dot product = normals dot direction; also briefly light dir
 aClOut equ vpWrlF // ltadv Color Out
 aAlOut equ vpWrlI // ltadv Alpha Out
@@ -983,6 +985,7 @@ aDotSc equ aDPosF // ltadv Dot product Scale factor
 aLenF  equ aDPosI // ltadv Length Frac part
 aAOF   equ aDPosI // ltadv Ambient Occlusion Factor
 aProj  equ aDPosI // ltadv Projection
+// vpST equ aOAFrs // ST used in texgen
 vpWNrm equ vpNrmlX // vertex pair World space Normals
 aRcpLn equ $v29 // ltadv Reciprocal of Length
 aLenI  equ $v29 // ltadv Length Int part
@@ -993,9 +996,8 @@ aLenI  equ $v29 // ltadv Length Int part
 // always be free during vtx load or clipping.
 tempVpRGBA            equ 0x00        // Only used during loop
 tempXfrmLt            equ tempVpRGBA  // ltbasic only used during init
-tempUnpackings        equ tempVpRGBA  // ltadv only during init
+tempVtx1ST            equ tempVpRGBA  // ltadv only during init
 tempAmbient           equ 0x10        // ltbasic set during init, used during loop
-tempNormalScales      equ tempAmbient // ltadv only during init
 tempPrevInvalVtxStart equ 0x20
 tempPrevInvalVtx      equ (tempPrevInvalVtxStart + vtxSize) // 0x46; fog writes here
 tempPrevInvalVtxEnd   equ (tempPrevInvalVtx + vtxSize)      // 0x6C; rest of vtx writes here
@@ -3297,35 +3299,45 @@ lLkDrs equ lDTC    // lighting Lookat Directions
 lLkDt0 equ vpLtTot // lighting Lookat Dot product 0
 lLkDt1 equ lDOT    // lighting Lookat Dot product 1
     lpv     lLkDrs[0], (xfrmLookatDirs + 0)($zero) // Lookat 0 in 0-2, 1 in 4-6
-    vmulf   $v29,   vpNrmlX, lLkDrs[0]  // Normals X elems 0, 4 * lookat 0 X
-    vmacf   $v29,   vpNrmlY, lLkDrs[1]  // Normals Y elems 0, 4 * lookat 0 Y
+.macro texgen_dots, lookats, dot0, dot1
+    vmulf   $v29, vpNrmlX, lookats[0]  // Normals X * lookat 0 X
+    vmacf   $v29, vpNrmlY, lookats[1]  // Normals Y * lookat 0 Y
+    vmacf   dot0, vpNrmlZ, lookats[2]  // Normals Z * lookat 0 Z
+    vmulf   $v29, vpNrmlX, lookats[4]  // Normals X * lookat 1 X
+    vmacf   $v29, vpNrmlY, lookats[5]  // Normals Y * lookat 1 Y
+    vmacf   dot1, vpNrmlZ, lookats[6]  // Normals Z * lookat 1 Z
+.endmacro
+    texgen_dots lLkDrs, lLkDt0, lLkDt1
 .if !CFG_NO_OCCLUSION_PLANE
     addi    outVtxBase, outVtxBase, -2*vtxSize // Undo doing this twice due to repeating ST scale
 .endif
-    vmacf   lLkDt0, vpNrmlZ, lLkDrs[2]  // Normals Z elems 0, 4 * lookat 0 Z
-    vmulf   $v29,   vpNrmlX, lLkDrs[4]  // Normals X elems 0, 4 * lookat 1 X
-    vmacf   $v29,   vpNrmlY, lLkDrs[5]  // Normals Y elems 0, 4 * lookat 1 Y
-    vmacf   lLkDt1, vpNrmlZ, lLkDrs[6]  // Normals Z elems 0, 4 * lookat 1 Z
-    vmudh   lLkDt0, vOne, lLkDt0[3h]    // Move lookat 0 dot product to elem 0
-lLkCns equ lLkDrs  // lighting Lookat Constants
-    llv     lLkCns[0], (texgenLinearCoeffs - altBase)(altBaseReg)
-    vne     $v29, $v31, $v31[1h]        // Set VCC to 10111011
+// In ltbasic, normals are in elems 3, 7; in ltadv, elems 0, 4
+    vmudh   lLkDt0, vOne, lLkDt0[3h] // Move dot 0 from elems 3, 7 to 0, 4
+.macro texgen_body, lookats, dot0, dot1, normalselem, branch_no_texgen_linear
+// lookats now holds texgen linear coefficients elems 0, 1
+    llv     lookats[0], (texgenLinearCoeffs - altBase)(altBaseReg)
+    vne     $v29, $v31, $v31[1h]    // Set VCC to 10111011
     andi    $11, vGeomMid, G_TEXTURE_GEN_LINEAR >> 8
-    vmrg    lLkDt0, lLkDt0, lLkDt1[3h]  // Dot products in elements 0, 1, 4, 5
-    vmudh   $v29, vOne, $v31[5]         // 1 * 0x4000
-    beqz    $11, vtx_return_from_texgen
-     vmacf  vpST, lLkDt0, $v31[5]       // + dot products * 0x4000 ( / 2)
+    vmrg    dot0, dot0, dot1[normalselem] // Dot products in elements 0, 1, 4, 5
+    vmudh   $v29, vOne, $v31[5]     // 1 * 0x4000
+    beqz    $11, branch_no_texgen_linear
+     vmacf  vpST, dot0, $v31[5]     // + dot products * 0x4000 ( / 2)
     // Texgen_Linear:
-    vmulf   vpST, lLkDt0, $v31[5]       // dot products * 0x4000 ( / 2)
-lLkST2 equ lLkDt0  // lighting Lookat ST squared
-    vmulf   lLkST2, vpST, vpST          // ST squared
-    vmulf   $v29, vpST, $v31[7]         // Move ST to accumulator (0x7FFF = 1)
-lLkTmp equ lLkDt1  // lighting Lookat Temp
-    vmacf   lLkTmp, vpST, lLkCns[1]     // + ST * 0x6CB3
-    vmudh   $v29, vOne, $v31[5]         // 1 * 0x4000
-    vmacf   vpST, vpST, lLkCns[0]       // + ST * 0x44D3
+    vmulf   vpST, dot0, $v31[5]     // dot products * 0x4000 ( / 2)
+// dot0 now holds lighting Lookat ST squared
+    vmulf   dot0, vpST, vpST        // ST squared
+    vmulf   $v29, vpST, $v31[7]     // Move ST to accumulator (0x7FFF = 1)
+// dot1 now holds lighting Lookat Temp
+    vmacf   dot1, vpST, lookats[1]  // + ST * 0x6CB3
+    vmudh   $v29, vOne, $v31[5]     // 1 * 0x4000
+    vmacf   vpST, vpST, lookats[0]  // + ST * 0x44D3
+.endmacro
+    texgen_body lLkDrs, lLkDt0, lLkDt1, 3h, vtx_return_from_texgen
     j       vtx_return_from_texgen
-     vmacf  vpST, lLkST2, lLkTmp        // + ST squared * (ST + ST * coeff)
+.macro texgen_lastinstr, dot0, dot1
+     vmacf  vpST, dot0, dot1        // + ST squared * (ST + ST * coeff)
+.endmacro
+     texgen_lastinstr lLkDt0, lLkDt1
     
 ovl2_end:
 .align 8
@@ -3396,7 +3408,8 @@ ltadv_vtx_loop:
     andi    laPacked, vGeomMid, G_PACKED_NORMALS >> 8
     vmadh   vpWrlI, vMTX3I, vOne
     luv     vpLtTot, (ltBufOfs + 0)(curLight) // Total light level, init to ambient
-    vsub    aOffsA, vpRGBA, $v31[7]  // 0x7FFF; offset alpha
+// aOAFrs <- vpST
+    vsub    aOAFrs, vpRGBA, $v31[7]  // 0x7FFF; offset alpha elems 3, 7
     vmudm   $v29, aPNScl, vpMdl[3h] // Packed normals from elem 3,7 of model pos
     bnez    laPacked, @@skip_regular_normals
      vmadn  vpMdl, $v31, $v31[2] // 0; load lower (vpMdl unsigned but must be T operand)
@@ -3404,7 +3417,7 @@ ltadv_vtx_loop:
 @@skip_regular_normals:
     vmudh   $v29, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15)
     jal     ltadv_xfrm
-     vmadm  aAOF2, aOffsA, aParam[0] // + (alpha - 1) * aoAmb factor; elems 3, 7
+     vmadm  aAOF2, aOAFrs, aParam[0] // + (alpha - 1) * aoAmb factor; elems 3, 7
 // aDOT <- vpMdl
     vmulf   vpLtTot, vpLtTot, aAOF2[3h] // light color *= ambient factor
 // aLTC <- aAOF2
@@ -3427,7 +3440,7 @@ ltadv_normals_to_regs:
 ltadv_loop:
     vmudh   $v29, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15)
     lbu     $11,     (ltBufOfs + 3 - lightSize)(curLight) // Light type / constant attenuation
-    vmadm   aAOF, aOffsA, aParam[1] // + (alpha - 1) * aoDir factor; elems 3, 7
+    vmadm   aAOF, aOAFrs, aParam[1] // + (alpha - 1) * aoDir factor; elems 3, 7
     beq     curLight, altBaseReg, ltadv_post
      lpv    aDOT[0], (ltBufOfs + 8 - lightSize)(curLight) // Light or lookat 0 dir in elems 0-2
     bnez    $11, ltadv_point
@@ -3462,8 +3475,8 @@ ltadv_spec_fres_setup:
     j       ltadv_normal_to_vertex
      ldv    aDPosI[8], (cameraWorldPos - altBase)(altBaseReg)
 ltadv_after_camera:
-    vmov    aOffsA[0], aDOT[0]       // Save Fresnel dot product in aOffsA[0h]
-    vmov    aOffsA[4], aDOT[4]
+    vmov    aOAFrs[0], aDOT[0]       // Save Fresnel dot product in aOAFrs[0h]
+    vmov    aOAFrs[4], aDOT[4]       // elems 0, 4
     bgez    laSpecular, ltadv_loop   // Sign bit clear = not specular
      li     laSpecFres, 0            // Clear flag for specular or fresnel
 // aProj <- aLenF
@@ -3501,26 +3514,35 @@ ltadv_post:
     beqz    $11, @@skip_fresnel
      vmrg   vpRGBA, aClOut, aAlOut     // Merge base output and alpha output
     lsv     aAOF[0], (vTRC_0100_addr - altBase)(altBaseReg) // Load constant 0x0100 to temp
-    vabs    aOffsA, aOffsA, aOffsA     // Fresnel dot in aOffsA[0h]; absolute value for underwater
+    vabs    aOAFrs, aOAFrs, aOAFrs     // Fresnel dot in aOAFrs[0h]; absolute value for underwater
     andi    $11, vGeomMid, G_FRESNEL_COLOR >> 8
     vmudh   $v29, vOne, aParam[7]      // Fresnel offset
-    vmacf   aOffsA, aOffsA, aParam[6]  // + factor * scale
+    vmacf   aOAFrs, aOAFrs, aParam[6]  // + factor * scale
     beqz    $11, @@skip
-     vmudh  aOffsA, aOffsA, aAOF[0]    // Result * 0x0100, clamped to 0x7FFF
+     vmudh  aOAFrs, aOAFrs, aAOF[0]    // Result * 0x0100, clamped to 0x7FFF
     veq     $v29, $v31, $v31[3h]       // Set VCC to 00010001 if G_FRESNEL_COLOR
 @@skip:
-    vmrg    vpRGBA, vpRGBA, aOffsA[0h] // Replace color or alpha with fresnel
+    vmrg    vpRGBA, vpRGBA, aOAFrs[0h] // Replace color or alpha with fresnel
     vge     vpRGBA, vpRGBA, $v31[2]    // Clamp to >= 0 for fresnel; doesn't affect others
 @@skip_fresnel:
-    beqz    laTexgen, @@skip_texgen    // no texgen
+    beqz    laTexgen, ltadv_after_texgen
      suv    vpRGBA,   (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Vtx 2:1 RGBA
-    // Texgen: aDOT still contains lookat 0 in elems 0-2, lookat 1 in elems 4-6
-    nop     // TODO
-@@skip_texgen:
+// Texgen: aDOT still contains lookat 0 in elems 0-2, lookat 1 in elems 4-6
+// vpST <- aOAFrs
+    texgen_dots aDOT, aLkDt0, aLkDt1
+    texgen_body aDOT, aLkDt0, aLkDt1, 0h, ltadv_texgen_end
+    texgen_lastinstr aLkDt0, aLkDt1
+ltadv_texgen_end:  // Vtx 2 ST in vpST elem 0, 1; vtx 1 ST in vpST elem 4, 5
+    slv     vpST[8],  (tempVtx1ST)(rdpCmdBufEndP1) // Vtx 1 ST
+    bltz    laVtxLeft, ltadv_after_texgen  // Only vtx 1 is valid, don't write vtx 2
+     lw     laSTKept, (tempVtx1ST)(rdpCmdBufEndP1) // Overwrite stored Vtx 1 ST
+    slv     vpST[0],  (VTX_IN_TC - 1 * inputVtxSize)(laPtr) // Vtx 2 ST
+ltadv_after_texgen:
 // vpMdl <- aDOT
     lw      $11,      (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Vtx 2 RGBA from vtx 1 ST slot
     bltz    laVtxLeft, vtx_setup_no_lighting
-     sw     laSTKept, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Real vtx 1 ST
+     sw     laSTKept, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Restore vtx 1 ST
+ltadv_vtx_loop_end:
     bgtz    laVtxLeft, ltadv_vtx_loop
      sw     $11,      (VTX_IN_CN - 1 * inputVtxSize)(laPtr) // Real vtx 2 RGBA
     j       vtx_setup_no_lighting
@@ -3605,7 +3627,7 @@ ltadv_normalize: // Normalize vector in aDPosI:vpWrlF i/f
      vmadh  aPLFcI, aLen2I, aNrmSc[7]  // + len^2 int * quadratic factor int  = aLen2I int
 // aAOF <- aLenF
     vmudh   aAOF, vOne, $v31[7]        // Load accum mid with 0x7FFF (1 in s.15)
-    vmadm   aAOF, aOffsA, aParam[2]    // + (alpha - 1) * aoPoint factor; elems 3, 7
+    vmadm   aAOF, aOAFrs, aParam[2]    // + (alpha - 1) * aoPoint factor; elems 3, 7
 // aDotSc <- aDIR
     vrcph   aDotSc[1], aPLFcI[0]       // 1/(2*light factor), input of 0000.8000 -> no change normals
     vrcpl   aDotSc[2], aPLFcF[0]       // Light factor 0001.0000 -> normals /= 2