align_with_warning 8, "One instruction of padding before tris" .macro tri_v1_move vmov $v6[1], $v7[5] // Move next to cur vertex 1 addr. Must be after main tri code cause $v6 not saved. .endmacro G_TRI2_handler: // If we jumped here, want $ra next to be G_TRI1_handler G_QUAD_handler: li $ra, (G_TRI1_handler - (tris_end - G_TRI1_handler)) G_TRI1_handler: // Whether we get here from cmd handler or prev tri, $ra == G_TRI1_handler // $v6: -- V1 -- -- -- -- -- -- This vertex address 1 // $v7: -- -- V2 V3 -- N1 N2 N3 This and next vertex addresses mfc2 $2, $v7[4] mfc2 origV1Addr, $v6[2] // Can't move this up, $v6 is not ready yet when coming from return_and_end_mat vmudh $v6, vOne, $v6[1] // elem 2 of v6 = vertex 1 addr addi $ra, $ra, (tris_end - G_TRI1_handler) // So next go to tris_end tri_from_snake: vmudh $v4, vOne, $v7[2] // elem 2 of v4 = vertex 2 addr .if !ENABLE_PROFILING addi perfCounterB, perfCounterB, 0x4000 // Increment number of tris requested .endif vmudh $v8, vOne, $v7[3] // elem 2 of v8 = vertex 3 addr mfc2 $3, $v7[6] vmov $v7[3], $v7[7] // Move next to cur vertex 3 addr. tri_from_clip: vnxor tHAtF, vZero, $v31[7] // v5 = 0x8000; init frac value for attrs for rounding llv $v6[0], VTX_SCR_VEC(origV1Addr) // Load pixel coords of vertex 1 into v6 (elems 0, 1 = x, y) vnxor tMAtF, vZero, $v31[7] // v7 = 0x8000; init frac value for attrs for rounding llv $v4[0], VTX_SCR_VEC($2) // Load pixel coords of vertex 2 into v4 vnxor tLAtF, vZero, $v31[7] // v9 = 0x8000; init frac value for attrs for rounding llv $v8[0], VTX_SCR_VEC($3) // Load pixel coords of vertex 3 into v8 vmov $v7[2], $v7[6] // Move next to cur vertex 2 addr. lhu $6, VTX_CLIP(origV1Addr) vmudh $v2, vOne, $v6[1] // v2 all elems = y-coord of vertex 1 lhu $7, VTX_CLIP($2) vsub $v10, $v6, $v4 // v10 = vertex 1 - vertex 2 (x, y, addr) lhu $8, VTX_CLIP($3) vsub $v12, $v6, $v8 // v12 = vertex 1 - vertex 3 (x, y, addr) andi $11, $6, CLIP_SCRN_NPXY | CLIP_CAMPLANE // All three verts on wrong side of same plane vsub $v11, $v4, $v6 // v11 = vertex 2 - vertex 1 (x, y, addr) and $11, $11, $7 vlt $v13, $v2, $v4[1] // v13 = min(v1.y, v2.y), VCO = v1.y < v2.y and $11, $11, $8 vmrg tHPos, $v6, $v4 // v14 = v1.y < v2.y ? v1 : v2 (lower vertex of v1, v2) bnez $11, return_and_end_mat // Then the whole tri is offscreen, cull // 16 cycles (for tri2 first tri; tri1/only subtract 1 from counts) vmudh $v29, $v10, $v12[1] // x = (v1 - v2).x * (v1 - v3).y ... vmadh $v26, $v12, $v11[1] // ... + (v1 - v3).x * (v2 - v1).y = cross product = dir tri is facing lhu $24, activeClipPlanes vge $v2, $v2, $v4[1] // v2 = max(vert1.y, vert2.y), VCO = vert1.y > vert2.y sll $20, vGeomMid, 29 // Original bit 10 (now bit 2) in the sign bit, for facing cull // tLPos <- $v10 vmrg tLPos, $v6, $v4 // v10 = vert1.y > vert2.y ? vert1 : vert2 (higher vertex of vert1, vert2) or $10, $6, $7 vge $v6, $v13, $v8[1] // v6 = max(max(vert1.y, vert2.y), vert3.y), VCO = max(vert1.y, vert2.y) > vert3.y or $10, $10, $8 // $10 = all clip bits which are true for any verts vmrg $v4, tHPos, $v8 // v4 = max(vert1.y, vert2.y) > vert3.y : higher(vert1, vert2) ? vert3 (highest vertex of vert1, vert2, vert3) mfc2 $9, $v26[0] // elem 0 = x = cross product => lower 16 bits, sign extended vmrg tHPos, $v8, tHPos // v14 = max(vert1.y, vert2.y) > vert3.y : vert3 ? higher(vert1, vert2) and $10, $10, $24 // If clipping is enabled, check clip flags vlt $v29, $v6, $v2 // VCO = max(vert1.y, vert2.y, vert3.y) < max(vert1.y, vert2.y) bnez $10, clip_triangle // Facing info and occlusion may be garbage if need to clip // 24 cycles srl $11, $9, 31 // = 0 if x prod positive (back facing), 1 if x prod negative (front facing) vmudh $v3, vOne, $v31[5] // 0x4000; some rounding factor sllv $11, $20, $11 // Sign bit = bit 10 of geom mode if back facing, bit 9 if front facing // tMPos <- $v2 vmrg tMPos, $v4, tLPos // v2 = max(vert1.y, vert2.y, vert3.y) < max(vert1.y, vert2.y) : highest(vert1, vert2, vert3) ? highest(vert1, vert2) bltz $11, return_and_end_mat // Cull if bit is set (culled based on facing) // 27 cycles vmrg tLPos, tLPos, $v4 // v10 = max(vert1.y, vert2.y, vert3.y) < max(vert1.y, vert2.y) : highest(vert1, vert2) ? highest(vert1, vert2, vert3) // tSubPxHF <- $v4 vmudn tSubPxHF, tHPos, $v31[5] // 0x4000 beqz $9, return_and_end_mat // If cross product is 0, tri is degenerate (zero area), cull. // 29 cycles .if !CFG_NO_OCCLUSION_PLANE and $6, $6, $7 .endif // tPosMmH <- $v6 vsub tPosMmH, tMPos, tHPos .if !CFG_NO_OCCLUSION_PLANE and $6, $6, $8 .endif // tPosLmH <- $v8 vsub tPosLmH, tLPos, tHPos .if !CFG_NO_OCCLUSION_PLANE andi $6, $6, CLIP_OCCLUDED .endif // tPosHmM <- $v11 vsub tPosHmM, tHPos, tMPos .if !CFG_NO_OCCLUSION_PLANE bnez $6, tri_culled_by_occlusion_plane // Cull if all verts occluded // 33 cycles .endif mfc2 $1, tHPos[4] // tHPos = lowest Y value = highest on screen (x, y, addr) // 32 cycles if NOC (34 if occlusion plane) vsub tPosCatI, tLPos, tMPos // 0 X L-M; 1 Y L-M; 2 X M-H; 3 X L-H; 4-7 garbage mfc2 $2, tMPos[4] // tMPos = mid vertex (x, y, addr) vmov tPosCatI[2], tPosMmH[0] .if !ENABLE_PROFILING andi $11, vGeomMid, G_SHADING_SMOOTH >> 8 .endif vmudh $v29, tPosMmH, tPosLmH[0] li $20, -8 // 0xFFF8; constant for some mask below vmadh $v29, tPosLmH, tPosHmM[0] mfc2 $3, tLPos[4] // tLPos = highest Y value = lowest on screen (x, y, addr) vreadacc tXPI, ACC_UPPER // Triangle cross product add $19, origV1Addr, flatV1Offset vreadacc tXPF, ACC_MIDDLE lpv tHAtI[0], VTX_COLOR_VEC($1) // Load vert color of vertex 1 vrcp $v20[0], tPosCatI[1] lpv tMAtI[0], VTX_COLOR_VEC($2) // Load vert color of vertex 2 vmov tPosCatI[3], tPosLmH[0] lpv tLAtI[0], VTX_COLOR_VEC($3) // Load vert color of vertex 3 vrcph $v22[0], tXPI[1] .if !ENABLE_PROFILING lpv $v25[0], VTX_COLOR_VEC($19) // Load RGB from orig vtx 1 for flat shading .endif vrcpl tXPRcpF[1], tXPF[1] // Reciprocal of cross product (becomes that * 4) .if !ENABLE_PROFILING beqz $11, tri_flat_shading // Branch if G_SHADING_SMOOTH is clear .endif vrcph tXPRcpI[1], $v31[2] // 0 tri_return_from_flat_shading: // Uses $v25 // 43 cycles vrcp $v20[2], tPosMmH[1] ssv tPosMmH[2], 0x0030(rdpCmdBufPtr) // MmHY -> first short (temp mem) // t1WI <- $v13 // elems 0, 4, 6 vrcph $v22[2], tPosMmH[1] llv t1WI[0], VTX_INV_W_VEC($1) vrcp $v20[3], tPosLmH[1] llv t1WI[8], VTX_INV_W_VEC($2) vrcph $v22[3], tPosLmH[1] llv t1WI[12], VTX_INV_W_VEC($3) vmudl tHAtI, tHAtI, vTRC_0100 // vertex color 1 >>= 8 lb $11, (alphaCompareCullMode)($zero) vmudl tMAtI, tMAtI, vTRC_0100 // vertex color 2 >>= 8 lw $6, VTX_INV_W_VEC($1) // $6, $7, $8 = 1/W for H, M, L vmudl tLAtI, tLAtI, vTRC_0100 // vertex color 3 >>= 8 lw $7, VTX_INV_W_VEC($2) vmudl $v29, $v20, vTRC_0020 lw $8, VTX_INV_W_VEC($3) vmadm $v22, $v22, vTRC_0020 bnez $11, tri_alpha_compare_cull vmadn $v20, $v31, $v31[2] // 0 // $v6 <- tPosMmH; $v6 clobbered in alpha compare cull tri_return_from_alpha_compare_cull: // Uses $v25, $v26 // 53 cycles // tPosCatF <- $v25 vmudm tPosCatF, tPosCatI, vTRC_1000 mtc2 $20, tMPos[14] // 0xFFF8; only elem 0, 1, 2 of this reg used now vmadn tPosCatI, $v31, $v31[2] // 0 sub $11, $6, $7 // Four instr: $6 = max($6, $7) vsubc tSubPxHF, vZero, tSubPxHF sra $10, $11, 31 // tSubPxHI <- $v26 vsub tSubPxHI, vZero, vZero and $11, $11, $10 vmudm $v29, tPosCatF, $v20 sub $6, $6, $11 vmadl $v29, tPosCatI, $v20 sub $11, $6, $8 // Four instr: $6 = max($6, $8) vmadn $v20, tPosCatI, $v22 sra $10, $11, 31 vmadh tPosCatI, tPosCatF, $v22 and $11, $11, $10 vmudl $v29, tXPRcpF, tXPF sub $6, $6, $11 vmadm $v29, tXPRcpI, tXPF mfc2 $7, tXPI[1] vmadn tXPF, tXPRcpF, tXPI lbu $14, geometryModeLabel + 3 // Load lowest byte for G_SHADE, G_ZBUFFER. Also has G_ATTROFFSET_ST_ENABLE, but G_TRI_FILL will get OR'd into it and force that set. vmadh tXPI, tXPRcpI, tXPI lbu $9, textureSettings1 + 3 // Texture enabled = 0x2 vand $v22, $v20, tMPos[7] // 0xFFF8 lsv tMAtI[14], VTX_SCR_Z($2) vcr tPosCatI, tPosCatI, vTRC_0100 lsv tLAtI[14], VTX_SCR_Z($3) vmudh $v29, vOne, $v31[4] // 4 ori $11, $14, G_TRI_FILL // Combine geometry mode (only the low byte will matter) with the base triangle type to make the triangle command id vmadn tXPF, tXPF, $v31[0] // -4 or $11, $11, $9 // Incorporate whether textures are enabled into the triangle command id vmadh tXPI, tXPI, $v31[0] // -4 sw $6, 0x0010(rdpCmdBufPtr) // Store max of three verts' 1/W (upper) to temp mem // tMx1W <- tPosCatF vmudn $v29, $v3, tHPos[0] llv tMx1W[0], 0x0010(rdpCmdBufPtr) // Load max of three verts' 1/W vmadl $v29, $v22, tSubPxHF[1] ssv tMPos[2], 0x0004(rdpCmdBufPtr) // Store YM edge coefficient vmadm $v29, tPosCatI, tSubPxHF[1] lsv tMAtF[14], VTX_SCR_Z_FRAC($2) // $v2 <- tMPos vmadn $v2, $v22, tSubPxHI[1] ssv tLPos[2], 0x0002(rdpCmdBufPtr) // Store YL edge coefficient vmadh $v3, tPosCatI, tSubPxHI[1] lsv tLAtF[14], VTX_SCR_Z_FRAC($3) vrcph $v29[0], tMx1W[0] // Reciprocal of max 1/W = min W ssv tHPos[2], 0x0006(rdpCmdBufPtr) // Store YH edge coefficient // tMnWF <- tLPos vrcpl tMnWF[0], tMx1W[1] lbu $10, textureSettings1 + 2 // Level and tile // t1WF <- tHPos vmudh t1WF, vOne, t1WI[1q] sb $11, 0x0000(rdpCmdBufPtr) // Store the triangle command id // tMnWI <- tMx1W vrcph tMnWI[0], $v31[2] // 0 lw $19, otherMode1 // tSTWHMI <- $v22 // H = elems 0-2, M = elems 4-6; init W = 7FFF vmudh tSTWHMI, vOne, $v31[7] // 0x7FFF sb $zero, materialCullMode // Covers tri write (non early exit) vmudm $v29, t1WI, tMnWF[0] // 1/W each vtx * min W = 1 for one of the verts, < 1 for others llv tSTWHMI[0], VTX_TC_VEC($1) vmadl $v29, t1WF, tMnWF[0] ssv tPosLmH[0], 0x0032(rdpCmdBufPtr) // LmHX -> second short (temp mem) vmadn t1WF, t1WF, tMnWI[0] llv tSTWHMI[8], VTX_TC_VEC($2) vmadh t1WI, t1WI, tMnWI[0] ssv tPosHmM[0], 0x0034(rdpCmdBufPtr) // HmMX -> third short (temp mem) // tSTWLI <- tMnWF // L = elems 4-6; init W = 7FFF vmudh tSTWLI, vOne, $v31[7] // 0x7FFF andi $19, $19, ZMODE_DEC // Mask to two Z mode bits set_vcc_11110001 // select RGBA___Z or ____STW_ llv tSTWLI[8], VTX_TC_VEC($3) vmudm $v29, tSTWHMI, t1WF[0h] // (S, T, 7FFF) * (1 or <1) for H and M addi $19, $19, -ZMODE_DEC // Check if equal to decal mode vmadh tSTWHMI, tSTWHMI, t1WI[0h] ldv tPosLmH[8], 0x0030(rdpCmdBufPtr) // MmHY -> e4, LmHX -> e5, HmMX -> e6 // tSTWHMF <- tMnWI vmadn tSTWHMF, $v31, $v31[2] // 0 andi $7, $7, 0x0080 // Extract the left major flag from $7 vmudm $v29, tSTWLI, t1WF[6] // (S, T, 7FFF) * (1 or <1) for L or $7, $7, $10 // Combine the left major flag with the level and tile from the texture settings vmadh tSTWLI, tSTWLI, t1WI[6] sb $7, 0x0001(rdpCmdBufPtr) // Store the left major flag, level, and tile settings // tSTWLF <- t1WI vmadn tSTWLF, $v31, $v31[2] // 0 sdv tSTWHMI[0], 0x0020(rdpCmdBufPtr) // Move S, T, W Hi Int to temp mem vmrg tMAtI, tMAtI, tSTWHMI // Merge S, T, W Mid into elems 4-6 sdv tSTWHMF[0], 0x0028(rdpCmdBufPtr) // Move S, T, W Hi Frac to temp mem vmrg tMAtF, tMAtF, tSTWHMF // Merge S, T, W Mid into elems 4-6 ldv tHAtI[8], 0x0020(rdpCmdBufPtr) // Move S, T, W Hi Int from temp mem vmrg tLAtI, tLAtI, tSTWLI // Merge S, T, W Low into elems 4-6 ldv tHAtF[8], 0x0028(rdpCmdBufPtr) // Move S, T, W Hi Frac from temp mem vmrg tLAtF, tLAtF, tSTWLF // Merge S, T, W Low into elems 4-6 .if !ENABLE_PROFILING addi perfCounterA, perfCounterA, 1 // Increment number of tris sent to RDP .endif // 96 cycles vmudl $v29, tXPF, tXPRcpF lsv tHAtF[14], VTX_SCR_Z_FRAC($1) vmadm $v29, tXPI, tXPRcpF lsv tHAtI[14], VTX_SCR_Z($1) // contains R, G, B, A, S, T, W, Z vmadn tXPRcpF, tXPF, tXPRcpI lh $1, VTX_SCR_VEC($2) vmadh tXPRcpI, tXPI, tXPRcpI addi $2, rdpCmdBufPtr, 0x20 // Increment the triangle pointer by 0x20 bytes (edge coefficients) vmudh tPosLmH, tPosLmH, $v31[0h] // e1 LmHY * -4 = 4*HmLY; e456 MmHY,LmHX,HmMX *= 4 andi $3, $14, G_SHADE // tAtLmHF <- tSTWLI vsubc tAtLmHF, tLAtF, tHAtF sll $1, $1, 14 // tAtLmHI <- tLAtF vsub tAtLmHI, tLAtI, tHAtI sb $zero, materialCullMode // This covers tri write out // tAtMmHF <- tSTWLF vsubc tAtMmHF, tMAtF, tHAtF sw $1, 0x0008(rdpCmdBufPtr) // Store XL edge coefficient // tAtMmHI <- tMAtF vsub tAtMmHI, tMAtI, tHAtI ssv $v3[6], 0x0010(rdpCmdBufPtr) // Store XH edge coefficient (integer part) // DaDx = (v3 - v1) * factor + (v2 - v1) * factor vmudn $v29, tAtLmHF, tPosLmH[4] // MmHY * 4 ssv $v2[6], 0x0012(rdpCmdBufPtr) // Store XH edge coefficient (fractional part) vmadh $v29, tAtLmHI, tPosLmH[4] // MmHY * 4 ssv $v3[4], 0x0018(rdpCmdBufPtr) // Store XM edge coefficient (integer part) vmadn $v29, tAtMmHF, tPosLmH[1] // LmHY * -4 = HmLY * 4 ssv $v2[4], 0x001A(rdpCmdBufPtr) // Store XM edge coefficient (fractional part) vmadh $v29, tAtMmHI, tPosLmH[1] // LmHY * -4 = HmLY * 4 ssv tPosCatI[0], 0x000C(rdpCmdBufPtr) // Store DxLDy edge coefficient (integer part) // tDaDxF <- $v2 vreadacc tDaDxF, ACC_MIDDLE ssv $v20[0], 0x000E(rdpCmdBufPtr) // Store DxLDy edge coefficient (fractional part) // tDaDxI <- $v3 vreadacc tDaDxI, ACC_UPPER ssv tPosCatI[6], 0x0014(rdpCmdBufPtr) // Store DxHDy edge coefficient (integer part) // DaDy = (v2 - v1) * factor + (v3 - v1) * factor vmudn $v29, tAtMmHF, tPosLmH[5] // LmHX * 4 ssv $v20[6], 0x0016(rdpCmdBufPtr) // Store DxHDy edge coefficient (fractional part) vmadh $v29, tAtMmHI, tPosLmH[5] // LmHX * 4 ssv tPosCatI[4], 0x001C(rdpCmdBufPtr) // Store DxMDy edge coefficient (integer part) vmadn $v29, tAtLmHF, tPosLmH[6] // HmMX * 4 ssv $v20[4], 0x001E(rdpCmdBufPtr) // Store DxMDy edge coefficient (fractional part) vmadh $v29, tAtLmHI, tPosLmH[6] // HmMX * 4 sll $11, $3, 4 // Shift (geometry mode & G_SHADE) by 4 to get 0x40 if G_SHADE is set // tDaDyF <- $v6 vreadacc tDaDyF, ACC_MIDDLE add $1, $2, $11 // Increment the triangle pointer by 0x40 bytes (shade coefficients) if G_SHADE is set // tDaDyI <- tAtMmHI vreadacc tDaDyI, ACC_UPPER sll $11, $9, 5 // Shift texture enabled (which is 2 when on) by 5 to get 0x40 if textures are on // DaDx, DaDy *= more factors vmudl $v29, tDaDxF, tXPRcpF[1] add rdpCmdBufPtr, $1, $11 // Increment the triangle pointer by 0x40 bytes (texture coefficients) if textures are on vmadm $v29, tDaDxI, tXPRcpF[1] andi $14, $14, G_ZBUFFER // Get the value of G_ZBUFFER from the current geometry mode vmadn tDaDxF, tDaDxF, tXPRcpI[1] sll $11, $14, 4 // Shift (geometry mode & G_ZBUFFER) by 4 to get 0x10 if G_ZBUFFER is set vmadh tDaDxI, tDaDxI, tXPRcpI[1] move $10, rdpCmdBufPtr // Write Z here vmudl $v29, tDaDyF, tXPRcpF[1] add rdpCmdBufPtr, rdpCmdBufPtr, $11 // Increment the triangle pointer by 0x10 bytes (depth coefficients) if G_ZBUFFER is set vmadm $v29, tDaDyI, tXPRcpF[1] sub dmemAddr, rdpCmdBufPtr, rdpCmdBufEndP1 // Check if we need to write out to RDP vmadn tDaDyF, tDaDyF, tXPRcpI[1] sdv tDaDxF[0], 0x0018($2) // Store DrDx, DgDx, DbDx, DaDx shade coefficients (fractional) vmadh tDaDyI, tDaDyI, tXPRcpI[1] sdv tDaDxI[0], 0x0008($2) // Store DrDx, DgDx, DbDx, DaDx shade coefficients (integer) // DaDe = DaDx * factor // 125 cycles vmadl $v29, tDaDxF, $v20[3] sdv tDaDxF[8], 0x0018($1) // Store DsDx, DtDx, DwDx texture coefficients (fractional) vmadm $v29, tDaDxI, $v20[3] sdv tDaDxI[8], 0x0008($1) // Store DsDx, DtDx, DwDx texture coefficients (integer) // tDaDeF <- tPosLmH vmadn tDaDeF, tDaDxF, tPosCatI[3] sdv tDaDyF[0], 0x0038($2) // Store DrDy, DgDy, DbDy, DaDy shade coefficients (fractional) // tDaDeI <- tAtLmHI vmadh tDaDeI, tDaDxI, tPosCatI[3] sdv tDaDyI[0], 0x0028($2) // Store DrDy, DgDy, DbDy, DaDy shade coefficients (integer) // Base value += DaDe * factor vmudn $v29, tHAtF, vOne[0] sdv tDaDyF[8], 0x0038($1) // Store DsDy, DtDy, DwDy texture coefficients (fractional) vmadh $v29, tHAtI, vOne[0] sdv tDaDyI[8], 0x0028($1) // Store DsDy, DtDy, DwDy texture coefficients (integer) vmadl $v29, tDaDeF, tSubPxHF[1] sdv tDaDeF[0], 0x0030($2) // Store DrDe, DgDe, DbDe, DaDe shade coefficients (fractional) vmadm $v29, tDaDeI, tSubPxHF[1] sdv tDaDeI[0], 0x0020($2) // Store DrDe, DgDe, DbDe, DaDe shade coefficients (integer) vmadn tHAtF, tDaDeF, tSubPxHI[1] sdv tDaDeF[8], 0x0030($1) // Store DsDe, DtDe, DwDe texture coefficients (fractional) vmadh tHAtI, tDaDeI, tSubPxHI[1] sdv tDaDeI[8], 0x0020($1) // Store DsDe, DtDe, DwDe texture coefficients (integer) // All values start in element 7. "a", attribute, is Z. Need // tHAtI, tHAtF, tDaDxI, tDaDxF, tDaDeI, tDaDeF, tDaDyI, tDaDyF // VCC is still 11110001 // 135 cycles vmrg tDaDyI, tDaDyF, tDaDyI[7] // Elems 6-7: DzDyI:F beqz $19, tri_decal_fix_z vmrg tDaDxI, tDaDxF, tDaDxI[7] // Elems 6-7: DzDxI:F tri_return_from_decal_fix_z: vmrg tDaDeI, tDaDeF, tDaDeI[7] // Elems 6-7: DzDeI:F sdv tHAtF[0], 0x0010($2) // Store RGBA shade color (fractional) // $v10 <- tAtLmHF vmrg $v10, tHAtF, tHAtI[7] // Elems 6-7: ZI:F sdv tHAtI[0], 0x0000($2) // Store RGBA shade color (integer) tri_v1_move // From return_and_end_mat, we didn't go there sdv tHAtF[8], 0x0010($1) // Store S, T, W texture coefficients (fractional) sdv tHAtI[8], 0x0000($1) // Store S, T, W texture coefficients (integer) slv tDaDyI[12], 0x0C($10) // DzDyI:F slv tDaDxI[12], 0x04($10) // DzDxI:F slv tDaDeI[12], 0x08($10) // DzDeI:F bltz dmemAddr, return_and_end_mat // Return if rdpCmdBufPtr < end+1 i.e. ptr <= end slv $v10[12], 0x00($10) // ZI:F // 146 cycles .include "rsp/sys/flush_rdp_buffer.s"