Replaced tri strip and fan with SPTriSnake

This commit is contained in:
Sauraen
2025-07-21 22:05:12 -07:00
parent 0ecae1d20e
commit 87d1cf5681
3 changed files with 221 additions and 88 deletions

View File

@@ -254,13 +254,17 @@ otherMode0: // command byte included, same as above
otherMode1:
.dw 0x00000000
// TODO: This is unnecessary, the state only has to be saved between the two
// commands making up the texrect command. Could put this in the part of the
// clip buffer that's kept over yields.
// Saved texrect state for combining the multiple input commands into one RDP texrect command
texrectWord1:
.fill 4 // first word, has command byte, xh and yh
texrectWord2:
.fill 4 // second word, has tile, xl, yl
// First half of RDP value for split commands
// First half of RDP value for split commands. Also used as temp storage for
// tri vertices during tri commands.
rdpHalf1Val:
.fill 4
@@ -542,6 +546,8 @@ numLightsxSize:
// RDP/Immediate Command Mini Table
// 1 byte per entry, after << 2 points to an addr in first 1/4 of IMEM
miniTableEntry G_LIGHTTORDP_handler
miniTableEntry G_RELSEGMENT_handler
miniTableEntry G_FLUSH_handler
miniTableEntry G_MEMSET_handler
miniTableEntry G_DMA_IO_handler
@@ -595,10 +601,7 @@ miniTableEntry G_BRANCH_WZ_handler
miniTableEntry G_TRI1_handler
miniTableEntry G_TRI2_handler
miniTableEntry G_QUAD_handler
miniTableEntry G_TRISTRIP_handler
miniTableEntry G_TRIFAN_handler
miniTableEntry G_LIGHTTORDP_handler
miniTableEntry G_RELSEGMENT_handler
miniTableEntry G_TRISNAKE_handler
// The maximum number of generated vertices in a clip polygon. In reality, this
@@ -837,7 +840,7 @@ clipPolyWrite equ $21 // Write pointer within current polygon being clipped
viLtFlag equ $9 // Holds pointLightFlag or dirLightsXfrmValid
// Misc
nextRA equ $10 // Address to return to after overlay load
nextRA equ $10 // Address to return to after overlay load
ovlInitClock equ $16 // Temp for profiling
dmaLen equ $19 // DMA length in bytes minus 1
dmemAddr equ $20 // DMA address in DMEM or IMEM. Also = rdpCmdBufPtr - rdpCmdBufEndP1 for flush_rdp_buffer
@@ -1159,6 +1162,8 @@ call_ret_common:
sb $1, displayListStackLength
andi inputBufferPos, cmd_w0, 0x00F8 // Byte 3, how many cmds to drop from load (max 0xA0)
displaylist_dma:
li nextRA, run_next_DL_command
displaylist_dma_tri_snake:
// Load INPUT_BUFFER_SIZE_BYTES - inputBufferPos cmds (inputBufferPos >= 0, mult of 8)
addi inputBufferPos, inputBufferPos, -INPUT_BUFFER_SIZE_BYTES // inputBufferPos = - num cmds
.if CFG_PROFILING_A
@@ -1169,8 +1174,6 @@ displaylist_dma:
move cmd_w1_dram, taskDataPtr // set up the DRAM address to read from
sub taskDataPtr, taskDataPtr, inputBufferPos // increment the DRAM address to read from next time
addi dmemAddr, inputBufferPos, inputBufferEnd // set the address to DMA read to
dma_and_wait_goto_next_command:
li nextRA, run_next_DL_command
dma_and_wait_goto_next_ra:
j dma_read_write
li $ra, wait_goto_next_ra
@@ -1266,26 +1269,29 @@ G_MODIFYVTX_handler:
j do_moveword // Moveword adds cmd_w0 to $10 for final addr
lbu cmd_w0, (inputBufferEnd - 0x07)(inputBufferPos) // offset in vtx, bit 15 clear
G_TRIFAN_handler: // 17
li $1, 0x8000 // $ra negative = flag for G_TRIFAN
G_TRISTRIP_handler:
addi $ra, $1, tri_strip_fan_loop // otherwise $1 == 0
addi cmd_w0, inputBufferPos, inputBufferEnd - 8 // Start pointing to cmd byte
tri_strip_fan_loop:
lw cmd_w1_dram, 0(cmd_w0) // Load tri indices to lower 3 bytes of word
addi $11, inputBufferPos, inputBufferEnd - 3 // Off end of command
beq $11, cmd_w0, tris_end // If off end of command, exit
sll $10, cmd_w1_dram, 24 // Put sign bit of vtx 3 in sign bit
bltz $10, tris_end // If negative, exit
sw cmd_w1_dram, 4(rdpCmdBufPtr) // Store non-shuffled indices
bltz $ra, tri_fan_store // Finish handling G_TRIFAN
addi cmd_w0, cmd_w0, 1 // Increment
andi $11, cmd_w0, 1 // If odd, this is the 1st/3rd/5th tri
bnez $11, tri_main // Draw as is
srl $10, cmd_w1_dram, 8 // Move vtx 2 to LSBs
sb cmd_w1_dram, 6(rdpCmdBufPtr) // Store vtx 3 to spot for 2
// Index = bits 1-6; direction flag = bit 0; end flag = bit 7
// CM 02 01 03 04 05 06 07
// [bb^cc] Indices b and c
// |
// cmd_w0 + inputBufferEnd
G_TRISNAKE_handler:
sw cmd_w0, rdpHalf1Val // Store indices a, b, c
addi inputBufferPos, inputBufferPos, -5 // Point to byte 3, index c of 1st tri
tri_snake_loop:
lh $3, (inputBufferEnd - 1)(inputBufferPos) // Load indices b and c
tri_snake_loop_from_input_buffer:
lb $2, rdpHalf1Val + 1 // Old v1; == index b, except when bridging between old and new load
li $ra, tri_snake_loop // For tri_main
bltz $3, tri_snake_end // Upper bit of real index b set = done
andi $11, $3, 1 // Get direction flag from index c
beqz inputBufferPos, tri_snake_over_input_buffer // == 0 at end of input buffer
andi $3, $3, 0x7E // Mask out flags from index c
sb $3, rdpHalf1Val + 1 // Store index c as vertex 1
sb $2, (rdpHalf1Val + 2)($11) // Store old v1 as 2 if dir clear or 3 if set
j tri_main
sb $10, 7(rdpCmdBufPtr) // Store vtx 2 to spot for 3
addi inputBufferPos, inputBufferPos, 1 // Increment indices being read
// H = highest on screen = lowest Y value; then M = mid, L = low
tHAtF equ $v5
@@ -1304,15 +1310,15 @@ tPosHmM equ $v11
G_TRI2_handler:
G_QUAD_handler:
jal tri_main // Send second tri; return here for first tri
sw cmd_w1_dram, 4(rdpCmdBufPtr) // Store second tri indices
sw cmd_w1_dram, rdpHalf1Val // Store second tri indices
G_TRI1_handler:
li $ra, tris_end // After done with this tri, exit tri processing
sw cmd_w0, 4(rdpCmdBufPtr) // Store first tri indices
sw cmd_w0, rdpHalf1Val // Store first tri indices
tri_main:
lpv $v27[0], 0(rdpCmdBufPtr) // To vector unit
lbu $1, 5(rdpCmdBufPtr)
lbu $2, 6(rdpCmdBufPtr)
lbu $3, 7(rdpCmdBufPtr)
lpv $v27[4], 0(rdpHalf1Val) // To vector unit in elems 5-7
lbu $1, 1(rdpHalf1Val)
lbu $2, 2(rdpHalf1Val)
lbu $3, 3(rdpHalf1Val)
vclr vZero
lhu $1, (vertexTable)($1)
vmudn $v29, vOne, vTRC_VB // Address of vertex buffer
@@ -1827,7 +1833,8 @@ g_dma_io_ovl3:
jal segmented_to_physical // Convert the provided segmented address (in cmd_w1_dram) to a virtual one
lh dmemAddr, (inputBufferEnd - 0x07)(inputBufferPos) // Get the 16 bits in the middle of the command word (since inputBufferPos was already incremented for the next command)
andi dmaLen, cmd_w0, 0x0FF8 // Mask out any bits in the length to ensure 8-byte alignment
j dma_and_wait_goto_next_command // Trigger a DMA read or write, depending on the G_DMA_IO flag (which will occupy the sign bit of dmemAddr)
li nextRA, run_next_DL_command
j dma_and_wait_goto_next_ra // Trigger a DMA read or write, depending on the G_DMA_IO flag (which will occupy the sign bit of dmemAddr)
// At this point, dmemAddr's highest bit is the flag, it's next 13 bits are the DMEM address, and then it's last two bits are the upper 2 of size
// So an arithmetic shift right 2 will preserve the flag as being the sign bit and get rid of the 2 size bits, shifting the DMEM address to start at the LSbit
sra dmemAddr, dmemAddr, 2
@@ -2596,11 +2603,18 @@ tris_end:
lqv vTRC, (vTRCValue)($zero) // Restore value overwritten by matrix
.endif
tri_fan_store:
lb $11, (inputBufferEnd - 7)(inputBufferPos) // Load vtx 1
sh cmd_w1_dram, 5(rdpCmdBufPtr) // Store vtx N+2 and N+3 as 1 and 2
j tri_main
sb $11, 7(rdpCmdBufPtr) // Store vtx 1 as 3
tri_snake_end:
addi inputBufferPos, inputBufferPos, 7 // Round up to whole input command
addi $11, $zero, 0xFFF8 // Sign-extend; andi is zero-extend!
j tris_end
and inputBufferPos, inputBufferPos, $11 // inputBufferPos has to be negative
tri_snake_over_input_buffer:
j displaylist_dma_tri_snake // inputBufferPos is now 0; load whole buffer
li nextRA, tri_snake_ret_from_input_buffer
tri_snake_ret_from_input_buffer:
j tri_snake_loop_from_input_buffer // inputBufferPos pointing to first byte loaded
lbu $3, (inputBufferEnd)(inputBufferPos) // Load c; clear real index b sign bit -> don't exit
// Converts the segmented address in cmd_w1_dram to the corresponding physical address
segmented_to_physical: // 7

210
gbi.h
View File

@@ -63,10 +63,12 @@ of warnings if you use -Wpedantic. */
/*
* GBI commands in order
*/
#define G_LIGHTTORDP 0xD2
/*#define G_SPECIAL_3 0xD3 no-op in F3DEX2 */
#define G_RELSEGMENT 0xD3
/*#define G_SPECIAL_2 0xD4 no-op in F3DEX2 */
/*#define G_SPECIAL_1 0xD5 triggered MVP recalculation in F3DEX2 for debug */
#define G_FLUSH 0xD4
/*#define G_SPECIAL_1 0xD5 triggered MVP recalculation in F3DEX2 for debug */
#define G_MEMSET 0xD5
#define G_DMA_IO 0xD6
#define G_TEXTURE 0xD7
@@ -118,10 +120,7 @@ of warnings if you use -Wpedantic. */
#define G_TRI1 0x05
#define G_TRI2 0x06
#define G_QUAD 0x07
#define G_TRISTRIP 0x08 /* = G_LINE3D was a no-op in F3DEX2, has been removed */
#define G_TRIFAN 0x09
#define G_LIGHTTORDP 0x0A
#define G_RELSEGMENT 0x0B
#define G_TRISNAKE 0x08 /* = G_LINE3D was a no-op in F3DEX2, has been removed */
/* names differ between F3DEX2 and F3DZEX */
#define G_BRANCH_Z G_BRANCH_WZ
@@ -2679,67 +2678,188 @@ _DW({ \
__gsSP1Triangle_w1f(v10, v11, v12, flag1) \
}
/*
* 5 Triangles base commands
/**
* Make the triangle snake turn left before drawing this triangle.
* @see gSPTriSnake
*/
#define _gSP5Triangles(pkt, cmd, v1, v2, v3, v4, v5, v6, v7) \
_DW({ \
Gfx *_g = (Gfx *)(pkt); \
_g->words.w0 = (_SHIFTL(cmd, 24, 8) | \
_SHIFTL((v1)*2, 16, 8) | \
_SHIFTL((v2)*2, 8, 8) | \
_SHIFTL((v3)*2, 0, 8)); \
_g->words.w1 = (_SHIFTL((v4)*2, 24, 8) | \
_SHIFTL((v5)*2, 16, 8) | \
_SHIFTL((v6)*2, 8, 8) | \
_SHIFTL((v7)*2, 0, 8)); \
#define G_SNAKE_LEFT 0
/**
* Make the triangle snake turn right before drawing this triangle.
* @see gSPTriSnake
*/
#define G_SNAKE_RIGHT 1
/**
* Logical-OR this into a triangle index to mark it as the last triangle of the
* snake. In other words, this gets OR'd into the last valid index, not the
* first invalid index.
*
* @note Due to tri indices being multiplied by 2 in the binary encoding, this
* is actually 0x80--the byte's sign bit--in the binary encoding.
*
* @see gSPTriSnake
*/
#define G_SNAKE_LAST 0x40
#define _gSPTriSnakeW0(i1, i2, i3) \
(_SHIFTL(G_TRISNAKE, 24, 8) | \
_SHIFTL((i2)*2, 16, 8) | \
_SHIFTL((i1)*2, 8, 8) | \
_SHIFTL((i3)*2|G_SNAKE_RIGHT, 0, 8))
#define _gSPTriSnakeW1(i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
(_SHIFTL((i4)*2|(i4d), 24, 8) | \
_SHIFTL((i5)*2|(i5d), 16, 8) | \
_SHIFTL((i6)*2|(i6d), 8, 8) | \
_SHIFTL((i7)*2|(i7d), 0, 8))
/**
* Triangle snake is F3DEX3's accelerated triangles command. It is a generalized
* form of a triangle strip or fan, which can represent any sequential chain of
* connected triangles by encoding which side of the current triangle the next
* triangle attaches to. This allows the chain of triangles to "snake" around
* and double back next to itself, unlike a triangle strip. For more information
* on the design, see Triangle Snake in the documentation.
*
* The drawing algorithm is:
* - Initialize 3 bytes of stored triangle indices, A-B-C, to i3-i1-i2, and draw
* this triangle. (This initialization and draw is actually implemented by
* storing i2-i1-i3 and then running the algorithm below with G_SNAKE_RIGHT,
* which ends up storing i2 to C and i3 to A, ultimately creating i3-i1-i2.)
* - Loop:
* - If the index in A has G_SNAKE_LAST or'd into it, exit.
* - Increment the input pointer, and read the next index and its direction
* flag (currently i4 and i4d).
* - If the direction flag is G_SNAKE_LEFT, copy A to B; else
* (G_SNAKE_RIGHT), copy A to C.
* - Store the new index (currently i4) to A.
* - Draw the triangle A-B-C and repeat the loop.
*
* For example, after drawing the first triangle i3-i1-i2, if i4 is
* G_SNAKE_LEFT, the snake turns left and draws i4-i3-i2:
* 4 -->-- 3
* \' /'\ (winding order and
* \ / \ first vertex for flat
* \ / \ shading are marked)
* 2 --<-- 1
* Conversely, after the first triangle i3-i1-i2, if i4 is G_SNAKE_RIGHT, the
* snake turns right and draws i4-i1-i3:
* 3 -->-- 4
* /'\ '/
* / \ /
* / \ /
* 2 --<-- 1
* If the snake turns in the same direction repeatedly, it will coil up, forming
* a triangle fan. If it slithers left and right alternately, this will form a
* triangle strip. Any combination of these is also possible. In particular, a
* useful shape is a triangle strip for a few tris, then a tri fan for a couple
* tris to "turn around", then another tri strip alongside the first, and so on.
* This shape can cover almost all tris of a typical surface with a single
* snake, except for tris which have two unconnected edges which can only be the
* first or last tris of the snake.
*
* @see gSPContinueSnake to extend the snake to more than 5 triangles.
*/
#define gSPTriSnake(pkt, i1, i2, i3, i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
_DW({ \
Gfx *_g = (Gfx *)(pkt); \
_g->words.w0 = _gSPTriSnakeW0(i1, i2, i3); \
_g->words.w1 = _gSPTriSnakeW1(i4, i4d, i5, i5d, i6, i6d, i7, i7d); \
})
#define _gsSP5Triangles(cmd, v1, v2, v3, v4, v5, v6, v7) \
{ \
(_SHIFTL(cmd, 24, 8) | \
_SHIFTL((v1)*2, 16, 8) | \
_SHIFTL((v2)*2, 8, 8) | \
_SHIFTL((v3)*2, 0, 8)), \
(_SHIFTL((v4)*2, 24, 8) | \
_SHIFTL((v5)*2, 16, 8) | \
_SHIFTL((v6)*2, 8, 8) | \
_SHIFTL((v7)*2, 0, 8)) \
/**
* @copydetails gSPTriSnake
*/
#define gsSPTriSnake(i1, i2, i3, i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
{ \
_gSPTriSnakeW0(i1, i2, i3), \
_gSPTriSnakeW1(i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
}
/**
* Continue a triangle snake for up to 8 more triangles. This is actually not
* a display list command--there's no command byte. The data is just the next
* 8 bytes of the display list data, still being processed by the previous
* gSPTriSnake. Note that the microcode implementation does correctly handle
* the case when the snake continues past the end of the current data in the
* input buffer (which is a copy in DMEM of a chunk of the display list); the
* input buffer is reloaded like it would be for more commands. So the snake can
* be an unlimited length by continuing to append gSPContinueSnake commands.
*/
#define gSPContinueSnake(pkt, i0, i0d, i1, i1d, i2, i2d, i3, i3d, \
i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
_DW({ \
Gfx *_g = (Gfx *)(pkt); \
_g->words.w0 = _gSPTriSnakeW1(i0, i0d, i1, i1d, i2, i2d, i3, i3d); \
_g->words.w1 = _gSPTriSnakeW1(i4, i4d, i5, i5d, i6, i6d, i7, i7d); \
})
/**
* @copydetails gSPContinueSnake
*/
#define gsSPContinueSnake(i0, i0d, i1, i1d, i2, i2d, i3, i3d, \
i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
{ \
_gSPTriSnakeW1(i0, i0d, i1, i1d, i2, i2d, i3, i3d) \
_gSPTriSnakeW1(i4, i4d, i5, i5d, i6, i6d, i7, i7d) \
}
/**
* 5 Triangles in strip arrangement. Draws the following tris:
* v1-v2-v3, v2-v4-v3, v3-v4-v5, v4-v6-v5, v5-v6-v7
* If you want to draw fewer tris, set indices to -1 from the right.
* e.g. to draw 4 tris, set v7 to -1; to draw 3 tris, set v6 to -1.
* v3-v1-v2, v4-v3-v2, v5-v3-v4, v6-v5-v4, v7-v5-v6
* To draw fewer than 5 tris, set indices to -1 from the right; for example to
* draw 4 tris, set v7 to -1, or to draw 3 tris set v6 to -1.
*
* @note Any set of 3 adjacent tris can be drawn with either SPTriStrip
* or SPTriFan. For arbitrary sets of 4 adjacent tris, four out of five of them
* can be drawn with one of SPTriStrip or SPTriFan. The 4-triangle formation
* which can't be drawn with either command looks like the Triforce--maybe
* F3DEX4 will support gsSPTriForce. :)
*
* @note The first index of each triangle drawn is different, so that in
* !G_SHADING_SMOOTH (flat shading) mode, the single color or single normal of
* each triangle can be set independently.
*
* @deprecated This used to be directly implemented in the microcode, but is
* now implemented as a special case of gSPTriSnake. The latter is more general
* and should be used directly.
*
* @note One of the two handednesses of a 4 tri strip cannot be drawn directly
* with gSPTriStrip, unless v1 and v2 are set to the same vertex to create a
* degenerate triangle, which costs a little performance. However, now this
* shape can be drawn with gSPTriSnake (directions right-left-right).
*/
#define gSPTriStrip(pkt, v1, v2, v3, v4, v5, v6, v7) \
_gSP5Triangles(pkt, G_TRISTRIP, v1, v2, v3, v4, v5, v6, v7)
#define gSPTriStrip(pkt, v1, v2, v3, v4, v5, v6, v7) \
gSPTriSnake(pkt, v1, v2, \
v3 | ((v4 & 0x80) ? G_SNAKE_LAST : 0), \
v4 | ((v5 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_LEFT, \
v5 | ((v6 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v6 | ((v7 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_LEFT, \
v7, G_SNAKE_RIGHT)
/**
* @copydetails gSPTriStrip
*/
#define gsSPTriStrip(v1, v2, v3, v4, v5, v6, v7) \
_gsSP5Triangles(G_TRISTRIP, v1, v2, v3, v4, v5, v6, v7)
#define gsSPTriStrip(v1, v2, v3, v4, v5, v6, v7) \
gsSPTriSnake(v1, v2, \
v3 | ((v4 & 0x80) ? G_SNAKE_LAST : 0), \
v4 | ((v5 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_LEFT, \
v5 | ((v6 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v6 | ((v7 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_LEFT, \
v7, G_SNAKE_RIGHT)
/**
* 5 Triangles in fan arrangement. Draws the following tris:
* v2-v3-v1, v3-v4-v1, v4-v5-v1, v5-v6-v1, v6-v7-v1
* Otherwise works the same as @see SPTriStrip.
* v3-v1-v2, v4-v1-v3, v5-v1-v4, v6-v1-v5, v7-v1-v6
* Otherwise works the same as @see gSPTriStrip.
*
* @deprecated Use gSPTriSnake directly.
*/
#define gSPTriFan(pkt, v1, v2, v3, v4, v5, v6, v7) \
_gSP5Triangles(pkt, G_TRIFAN, v1, v2, v3, v4, v5, v6, v7)
gSPTriSnake(pkt, v1, v2, \
v3 | ((v4 & 0x80) ? G_SNAKE_LAST : 0), \
v4 | ((v5 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v5 | ((v6 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v6 | ((v7 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v7, G_SNAKE_RIGHT)
/**
* @copydetails gSPTriFan
*/
#define gsSPTriFan(v1, v2, v3, v4, v5, v6, v7) \
_gsSP5Triangles(G_TRIFAN, v1, v2, v3, v4, v5, v6, v7)
gsSPTriSnake(v1, v2, \
v3 | ((v4 & 0x80) ? G_SNAKE_LAST : 0), \
v4 | ((v5 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v5 | ((v6 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v6 | ((v7 & 0x80) ? G_SNAKE_LAST : 0), G_SNAKE_RIGHT, \
v7, G_SNAKE_RIGHT)
/*

View File

@@ -31,6 +31,8 @@ G_SHADING_SMOOTH equ 0x00200000
G_TRI_FILL equ 0xc8 // not a GBI command
G_LIGHTTORDP equ 0xd2
G_RELSEGMENT equ 0xd3
G_FLUSH equ 0xd4
G_MEMSET equ 0xd5
G_DMA_IO equ 0xd6
@@ -83,10 +85,7 @@ G_BRANCH_WZ equ 0x04
G_TRI1 equ 0x05
G_TRI2 equ 0x06
G_QUAD equ 0x07
G_TRISTRIP equ 0x08
G_TRIFAN equ 0x09
G_LIGHTTORDP equ 0x0a
G_RELSEGMENT equ 0x0b
G_TRISNAKE equ 0x08
G_BRANCH_Z equ G_BRANCH_WZ