From 818596fbf3de481cbcd5838056fd76dea4a18623 Mon Sep 17 00:00:00 2001 From: Sauraen Date: Fri, 17 Jan 2025 20:53:47 -0800 Subject: [PATCH] Builds --- Makefile | 25 +++-------- f3dex3.s | 127 +++++++++++++++++++++++++------------------------------ 2 files changed, 65 insertions(+), 87 deletions(-) diff --git a/Makefile b/Makefile index 29982be..393c4e6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# To build, run something like `make F3DEX3_BrZ` or`make F3DEX3_BrW_LVP_NOC_PA`. +# To build, run something like `make F3DEX3_BrZ` or`make F3DEX3_BrW_NOC_PA`. # For an explanation of what all the suffixes mean, see README.md. MAKEFLAGS += --no-builtin-rules @@ -11,7 +11,6 @@ default: F3DEX3_BrZ F3DEX3_BrW ALL_OPTIONS := \ CFG_G_BRANCH_W \ CFG_NO_OCCLUSION_PLANE \ - CFG_LEGACY_VTX_PIPE \ CFG_PROFILING_A \ CFG_PROFILING_B \ CFG_PROFILING_C @@ -197,24 +196,14 @@ define rule_builder_noc $$(eval $$(call rule_builder_prof)) endef -define rule_builder_lvp - NAME_NOC := $(NAME_LVP) - OPTIONS_NOC := $(OPTIONS_LVP) - $$(eval $$(call rule_builder_noc)) - - NAME_NOC := $(NAME_LVP)_LVP - OPTIONS_NOC := $(OPTIONS_LVP) CFG_LEGACY_VTX_PIPE - $$(eval $$(call rule_builder_noc)) -endef - define rule_builder_br - NAME_LVP := $(NAME_BR)_BrZ - OPTIONS_LVP := $(OPTIONS_BR) - $$(eval $$(call rule_builder_lvp)) + NAME_NOC := $(NAME_BR)_BrZ + OPTIONS_NOC := $(OPTIONS_BR) + $$(eval $$(call rule_builder_noc)) - NAME_LVP := $(NAME_BR)_BrW - OPTIONS_LVP := $(OPTIONS_BR) CFG_G_BRANCH_W - $$(eval $$(call rule_builder_lvp)) + NAME_NOC := $(NAME_BR)_BrW + OPTIONS_NOC := $(OPTIONS_BR) CFG_G_BRANCH_W + $$(eval $$(call rule_builder_noc)) endef NAME_BR := diff --git a/f3dex3.s b/f3dex3.s index 7bbf107..6164b28 100644 --- a/f3dex3.s +++ b/f3dex3.s @@ -162,14 +162,15 @@ for boot code Overlay 0 Overlay 1 start task) handlers) (initialization) | | -Many command -handlers +Rest command handlers +Vertex start +All tri write cmds Overlay 2 Overlay 3 Overlay 4 -(Lighting) (Clipping) (mIT, rare cmds) +(Basic lighting) (Clipping, (Advanced + rare cmds) lighting) -Vertex and -tri handlers +Main vertex write DMA code @@ -1158,11 +1159,6 @@ G_SETxIMG_handler: j G_RDP_handler sb $7, materialCullMode -.if CFG_LEGACY_VTX_PIPE - -G_DMA_IO_handler: - instantiate_dma_io - G_BRANCH_WZ_handler: lhu $10, (vertexTable)(cmd_w0) // Vertex addr from byte 3 .if CFG_G_BRANCH_W // G_BRANCH_W/G_BRANCH_Z difference; this defines F3DZEX vs. F3DEX2 @@ -1176,11 +1172,6 @@ G_BRANCH_WZ_handler: j branch_dl // need $2 < 0 for nopush and cmd_w1_dram li cmd_w0, 0 // No count of DL cmds to skip -G_MEMSET_handler: - instantiate_memset - -.endif - G_LOAD_UCODE_handler: j load_overlay_0_and_enter // Delay slot is harmless G_MODIFYVTX_handler: @@ -1759,11 +1750,11 @@ clip_init_used_loop: li clipPolySelect, 6 // Everything being indexed from 6 saves one instruction at the end of the loop sh $1, (clipPoly - 6 + 0)(clipPolySelect) // Write the current three verts sh $2, (clipPoly - 6 + 2)(clipPolySelect) // as the initial polygon - sh $3, (clipPoly - 6 + 4)(clipPolySelect) // Initial state $3 = clipVLastOfs + sh $3, (clipPoly - 6 + 4)(clipPolySelect) // Initial state $3 = clipVLastOfsc sh $zero, (clipPoly)(clipPolySelect) // nullptr to mark end of polygon // Available locals here: $11, $1, $7, $20, $24, $10 clip_condlooptop: - lhu clipFlags, VTX_CLIP(clipVLastOfs) // Load flags for final vertex of the last polygon + lhu clipFlags, VTX_CLIP(clipVLastOfsc) // Load flags for final vertex of the last polygon addi clipPolyRead, clipPolySelect, -6 // Start reading at the beginning of the old polygon xori clipPolySelect, clipPolySelect, 6 ^ ((clipPoly2 - clipPoly) + 6) // Swap to the other polygon memory addi clipPolyWrite, clipPolySelect, -6 // Start writing at the beginning of the new polygon @@ -1790,7 +1781,7 @@ clip_find_unused_loop: move clipVOnsc, clipVLastOfsc // Otherwise swap; note we are overwriting move clipVLastOfsc, clipVNext // clipVLastOfsc but not clipVNext clip_skipswap23: - // Interpolate between clipVLastOfs and clipVOns; create a new vertex which is on the + // Interpolate between clipVLastOfsc and clipVOns; create a new vertex which is on the // clipping boundary (e.g. at the screen edge) cPosOnOfF equ vpClpF cPosOnOfI equ vpClpI @@ -1852,7 +1843,7 @@ clip_skipxy: addi $11, $11, 6 - ((MAX_CLIP_POLY_VERTS) * 2) // Write ptr to last zero slot vrcpl cRRF[3], cDiffF[3] // 1 / (x+y+z+w), vtx on screen - vtx off screen bgez $11, clip_done // If so, give up - vrcph $cRRI[3], $v31[2] // 0; get int result of reciprocal + vrcph cRRI[3], $v31[2] // 0; get int result of reciprocal vabs cTemp, cTemp, $v31[3] // 2; cTemp = +/- 2 based on sum positive (incl. zero) or negative lhu vGeomMid, geometryModeLabel + 1 // Load middle 2 bytes of geom mode, incl fog setting vmudn cRRF, cRRF, cTemp[3] // multiply reciprocal by +/- 2 @@ -2043,6 +2034,44 @@ ovl3_padded_end: .orga max(max(ovl2_padded_end - ovl2_start, ovl4_padded_end - ovl4_start) + orga(ovl3_start), orga()) ovl234_end: +G_MTX_end: +// Multiplies the temp loaded matrix into the M or VP matrix + lhu $6, (movememTable + G_MV_MMTX)($1) // Output; $1 holds 0 for M or 4 for VP. + li $3, tempMatrix // Input 1 = temp mem (loaded mtx) + jal while_wait_dma_busy + move $2, $6 // Input 0 = output +mtx_multiply: + // $3, $2 are input matrices; $6 is output matrix; $7 is 0 for return to vtx + addi $10, $3, 0x0018 +@@loop: + vmadn $v7, $v31, $v31[2] // 0 + addi $11, $3, 0x0008 + vmadh $v6, $v31, $v31[2] // 0 + addi $2, $2, -0x0020 + vmudh $v29, $v31, $v31[2] // 0 +@@innerloop: + ldv $v3[0], 0x0040($2) + ldv $v3[8], 0x0040($2) + lqv $v1[0], 0x0020($3) // Input 1 + ldv $v2[0], 0x0020($2) + ldv $v2[8], 0x0020($2) + lqv $v0[0], 0x0000($3) // Input 1 + vmadl $v29, $v3, $v1[0h] + addi $3, $3, 0x0002 + vmadm $v29, $v2, $v1[0h] + addi $2, $2, 0x0008 // Increment input 0 pointer + vmadn $v5, $v3, $v0[0h] + bne $3, $11, @@innerloop + vmadh $v4, $v2, $v0[0h] + bne $3, $10, @@loop + addi $3, $3, 0x0008 + sqv $v7[0], (0x0020)($6) + sqv $v6[0], (0x0000)($6) + beqz $7, vtx_after_mtx_multiply + sqv $v4[0], (0x0010)($6) + j run_next_DL_command + sqv $v5[0], (0x0030)($6) + vtx_after_dma: srl $2, cmd_w0, 11 // n << 1 sub $2, cmd_w0, $2 // = v0 << 1 @@ -2148,7 +2177,10 @@ vtx_after_lt_setup: llv vpST[0], (VTX_IN_TC + 0 * inputVtxSize)(inVtx) // ST in 0:1 j vtx_store_loop_entry llv vpST[8], (VTX_IN_TC + 1 * inputVtxSize)(inVtx) // ST in 4:5 - + +.if (. & 4) + .warning "One instruction of padding before vertex loop" +.endif .align 8 .if CFG_NO_OCCLUSION_PLANE @@ -2500,45 +2532,6 @@ tris_end: lqv $v30, (v30Value)($zero) // Restore value overwritten in vtx_store .endif -G_MTX_end: -// Multiplies the temp loaded matrix into the M or VP matrix - lhu $6, (movememTable + G_MV_MMTX)($1) // Output; $1 holds 0 for M or 4 for VP. - li $3, tempMatrix // Input 1 = temp mem (loaded mtx) - jal while_wait_dma_busy - move $2, $6 // Input 0 = output -mtx_multiply: - // $3, $2 are input matrices; $6 is output matrix; $7 is 0 for return to vtx - addi $10, $3, 0x0018 -@@loop: - vmadn $v7, $v31, $v31[2] // 0 - addi $11, $3, 0x0008 - vmadh $v6, $v31, $v31[2] // 0 - addi $2, $2, -0x0020 - vmudh $v29, $v31, $v31[2] // 0 -@@innerloop: - ldv $v3[0], 0x0040($2) - ldv $v3[8], 0x0040($2) - lqv $v1[0], 0x0020($3) // Input 1 - ldv $v2[0], 0x0020($2) - ldv $v2[8], 0x0020($2) - lqv $v0[0], 0x0000($3) // Input 1 - vmadl $v29, $v3, $v1[0h] - addi $3, $3, 0x0002 - vmadm $v29, $v2, $v1[0h] - addi $2, $2, 0x0008 // Increment input 0 pointer - vmadn $v5, $v3, $v0[0h] - bne $3, $11, @@innerloop - vmadh $v4, $v2, $v0[0h] - bne $3, $10, @@loop - addi $3, $3, 0x0008 - sqv $v7[0], (0x0020)($6) - sqv $v6[0], (0x0000)($6) -.if CFG_LEGACY_VTX_PIPE - beqz $7, vtx_after_mtx_multiply -.endif - sqv $v4[0], (0x0010)($6) - j run_next_DL_command - sqv $v5[0], (0x0030)($6) .if CFG_PROFILING_B @@ -2737,7 +2730,7 @@ ovl0_padded_end: .error "Automatic resizing for overlay 0 failed" .endif -// overlay 1 (0x170 bytes loaded into 0x1000) +// overlay 1 (0x178 bytes loaded into 0x1000) .headersize 0x00001000 - orga() ovl1_start: @@ -3252,7 +3245,10 @@ ltbasic_common_end: beqz $11, vtx_return_from_lighting vmrg vpRGBA, vLtRGBOut, vLtAOut // Merge base output and alpha output ltbasic_texgen: -// Texgen uses vLookat0:1 = vpLtTot and vAAA, vCCC:vDDD, and of course vpST. +// Texgen uses vpLtTot, vAAA, vCCC:vDDD, and of course vpST. +ltLookAt equ vCCC +vLookat0 equ vpLtTot +vLookat1 equ vAAA lpv ltLookAt[0], (xfrmLookatDirs + 0)($zero) // Lookat 0 in 0-2, 1 in 4-6; = vNrmOut vmulf $v29, vpNrmlX, ltLookAt[0] // Normals X elems 0, 4 * lookat 0 X vmacf $v29, vpNrmlY, ltLookAt[1] // Normals Y elems 0, 4 * lookat 0 Y @@ -3367,7 +3363,6 @@ vNrmOut equ $v18 // Output of lt_normalize (rarely used, but needed as all te .endif vPackPXY equ $v25 // = vCCC; positive X and Y in packed normals vPackZ equ $v26 // = vDDD; Z in packed normals -.if !CFG_LEGACY_VTX_PIPE vand vPackPXY, vAAA, $v31[6] // 0x7F00; positive X, Y vmudh $v29, vOne, $v31[1] // -1; set all elems of $v29 to -1 // vnop; vnop @@ -3399,14 +3394,12 @@ lt_skip_packed_normals: beqz $11, lt_after_xfrm_normals // Skip if G_NORMALSMODE_FAST vmadh vAAA, vM2I, vPairNrml[2h] // vAAA = normals int // Transform normals by M inverse transpose, for G_NORMALSMODE_AUTO or G_NORMALSMODE_MANUAL -.endif vLtMIT0I equ $v26 // = vDDD vLtMIT1I equ $v25 // = vCCC vLtMIT2I equ $v23 // = vAAA; last in multiply vLtMIT0F equ $v29 // = temp; first vLtMIT1F equ $v17 // = vPairLt vLtMIT2F equ $v24 // = vBBB; second to last -.if !CFG_LEGACY_VTX_PIPE lqv vLtMIT0I, (mvpMatrix + 0x00)($zero) // x int, y int lqv vLtMIT2I, (mvpMatrix + 0x10)($zero) // z int, x frac lqv vLtMIT1F, (mvpMatrix + 0x20)($zero) // y frac, z frac @@ -3504,10 +3497,8 @@ lt_skip_specular: lt_post: // Valid: vpMdlI/F, vpST, modified vpRGBA ([3h] = alpha - 1), // vPairNrml normal [0h:2h] fresnel [3h], vPairLt [0h:2h], vAAA lookat 0 dir -.endif vLtRGBOut equ $v25 // = vCCC: light / effects RGB output vLtAOut equ $v26 // = vDDD: light / effects alpha output -.if !CFG_LEGACY_VTX_PIPE vadd vpRGBA, vpRGBA, $v31[7] // 0x7FFF; undo change for ambient occlusion andi $11, vGeomMid, G_LIGHTTOALPHA >> 8 // vnop @@ -3560,11 +3551,9 @@ lt_skip_fresnel: vmudh $v29, vOne, vLookat1[0h] vmadh $v29, vOne, vLookat1[1h] vmadh vLookat1, vOne, vLookat1[2h] -.endif - TODO Rest of texgen shared by F3DEX3 native and LVP + TODO Rest of texgen -.if !CFG_LEGACY_VTX_PIPE lt_point: comment Input vector 1 elem size 7FFF.0000 -> len^2 3FFF0001 -> 1/len 0001.0040 -> vec +801E.FFC0 -> clamped 7FFF