diff --git a/f3dex3.s b/f3dex3.s index e0eb61a..ccdadfb 100644 --- a/f3dex3.s +++ b/f3dex3.s @@ -914,6 +914,9 @@ G_LIGHTTORDP_handler: vertex_end: tri_end: .endif +.if CFG_LEGACY_VTX_PIPE +G_MEMSET_handler: +.endif G_SPNOOP_handler: run_next_DL_command: mfc0 $1, SP_STATUS // load the status word into register $1 @@ -3348,7 +3351,7 @@ ovl234_lighting_entrypoint_ovl4ver: // same IMEM address as ovl234_lighti // Jump here for all overlay 4 features. If overlay 4 is loaded (this code), jumps // to the instruction selection below. ovl234_ovl4_entrypoint: -.if !CFG_LEGACY_VTX_PIPE && !CFG_NO_OCCLUSION_PLANE +.if !CFG_NO_OCCLUSION_PLANE G_MTX_end: .endif .if CFG_PROFILING_B @@ -3428,7 +3431,7 @@ g_memset_real: memsetBufferStart equ ((vertexBuffer + 0xF) & 0xFF0) memsetBufferEnd equ (clipTempVertsEnd & 0xFF0) memsetBufferSize equ (memsetBufferEnd - memsetBufferStart) - llv $v2[0], rdpHalf1Val // Load the memset value + llv $v2[0], (rdpHalf1Val)($zero) // Load the memset value sll cmd_w0, cmd_w0, 8 // Clear upper byte jal segmented_to_physical srl cmd_w0, cmd_w0, 8 // Number of bytes to memset (must be mult of 16) @@ -3448,7 +3451,7 @@ memsetBufferSize equ (memsetBufferEnd - memsetBufferStart) sub cmd_w0, cmd_w0, $2 bgtz cmd_w0, @@transaction_loop add cmd_w1_dram, cmd_w1_dram, $2 - jr $ra + j wait_for_dma_and_run_next_command // Delay slot harmless clamp_to_memset_buffer: addi $11, cmd_w0, -memsetBufferSize // Is more than a whole buffer left? diff --git a/gbi.h b/gbi.h index f179962..d6b493a 100644 --- a/gbi.h +++ b/gbi.h @@ -44,6 +44,7 @@ of warnings if you use -Wpedantic. */ /*#define G_SPECIAL_3 0xD3 no-op in F3DEX2 */ /*#define G_SPECIAL_2 0xD4 no-op in F3DEX2 */ /*#define G_SPECIAL_1 0xD5 triggered MVP recalculation, not supported in F3DEX3 */ +#define G_MEMSET 0xD5 #define G_DMA_IO 0xD6 #define G_TEXTURE 0xD7 #define G_POPMTX 0xD8 @@ -2384,6 +2385,22 @@ _DW({ \ #define gSPDmaWrite(pkt,dmem,dram,size) gSPDma_io((pkt),1,(dmem),(dram),(size)) #define gsSPDmaWrite(dmem,dram,size) gsSPDma_io( 1,(dmem),(dram),(size)) +/** + * Use RSP DMAs to set a region of memory to a repeated 16-bit value. This can + * clear the color framebuffer or Z-buffer faster than the RDP can in fill mode. + * dram: Segmented or physical start address. Must be aligned to 16 bytes. + * value: 16-bit value to fill the memory with. e.g. 0 for color, 0xFFFC for Z. + * size: Size in bytes to fill, must be nonzero and a multiple of 16 bytes. + */ +#define gSPMemset(pkt, dram, value, size) \ +_DW({ \ + gImmp1(pkt, G_RDPHALF_1, ((value) & 0xFFFF)); \ + gDma0p(pkt, G_MEMSET, (dram), ((size) & 0xFFFFF0)); \ +}) + +#define gsSPMemset(pkt, dram, value, size) \ + gsImmp1(G_RDPHALF_1, ((value) & 0xFFFF)), \ + gsDma0p(G_MEMSET, (dram), ((size) & 0xFFFFF0)) /* * RSP short command (no DMA required) macros