From 60eba7dff99400aeb61dbdfe13fd7a0cbd68efaa Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 18 Feb 2026 13:41:42 -0700 Subject: [PATCH] Fog & TCS FIFO-ification --- include/dolphin/gx/GXTexture.h | 2 + lib/dolphin/gx/GXFrameBuffer.cpp | 24 +- lib/dolphin/gx/GXGeometry.cpp | 8 +- lib/dolphin/gx/GXManage.cpp | 14 +- lib/dolphin/gx/GXPixel.cpp | 6 - lib/dolphin/gx/GXTev.cpp | 14 -- lib/dolphin/gx/GXTexture.cpp | 32 ++- lib/dolphin/gx/__gx.h | 1 + lib/gfx/command_processor.cpp | 175 +++++++++++--- lib/gfx/common.cpp | 8 +- lib/gfx/gx.hpp | 37 ++- lib/gfx/shader_info.cpp | 9 +- tests/gx_fifo_test.cpp | 379 +++++++++++++++++++++++++++++++ 13 files changed, 632 insertions(+), 77 deletions(-) diff --git a/include/dolphin/gx/GXTexture.h b/include/dolphin/gx/GXTexture.h index c7cc8e5..461be55 100644 --- a/include/dolphin/gx/GXTexture.h +++ b/include/dolphin/gx/GXTexture.h @@ -24,6 +24,8 @@ void GXInitTexObjWrapMode(GXTexObj* obj, GXTexWrapMode s, GXTexWrapMode t); void GXInitTlutObj(GXTlutObj* obj, const void* data, GXTlutFmt format, u16 entries); void GXLoadTlut(const GXTlutObj* obj, GXTlut idx); void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts); +void GXSetTexCoordCylWrap(GXTexCoordID coord, GXBool s_enable, GXBool t_enable); +void GXSetTexCoordBias(GXTexCoordID coord, GXBool s_enable, GXBool t_enable); void GXInitTexCacheRegion(GXTexRegion* region, GXBool is_32b_mipmap, u32 tmem_even, GXTexCacheSize size_even, u32 tmem_odd, GXTexCacheSize size_odd); GXTexRegionCallback GXSetTexRegionCallback(GXTexRegionCallback callback); diff --git a/lib/dolphin/gx/GXFrameBuffer.cpp b/lib/dolphin/gx/GXFrameBuffer.cpp index c942b0d..55f1e3a 100644 --- a/lib/dolphin/gx/GXFrameBuffer.cpp +++ b/lib/dolphin/gx/GXFrameBuffer.cpp @@ -1,4 +1,5 @@ #include "gx.hpp" +#include "__gx.h" #include "../../window.hpp" #include "../../webgpu/wgpu.hpp" @@ -39,7 +40,28 @@ void GXSetTexCopyDst(u16 wd, u16 ht, GXTexFmt fmt, GXBool mipmap) { u32 GXSetDispCopyYScale(f32 vscale) { return 0; } -void GXSetCopyClear(GXColor color, u32 depth) { update_gx_state(g_gxState.clearColor, from_gx_color(color)); } +void GXSetCopyClear(GXColor color, u32 depth) { + // BP 0x4F: clear color R + A + u32 reg0 = 0; + SET_REG_FIELD(0, reg0, 8, 0, color.r); + SET_REG_FIELD(0, reg0, 8, 8, color.a); + SET_REG_FIELD(0, reg0, 8, 24, 0x4F); + GX_WRITE_RAS_REG(reg0); + + // BP 0x50: clear color B + G + u32 reg1 = 0; + SET_REG_FIELD(0, reg1, 8, 0, color.b); + SET_REG_FIELD(0, reg1, 8, 8, color.g); + SET_REG_FIELD(0, reg1, 8, 24, 0x50); + GX_WRITE_RAS_REG(reg1); + + // BP 0x51: clear Z (24-bit) + u32 reg2 = 0; + SET_REG_FIELD(0, reg2, 24, 0, depth); + SET_REG_FIELD(0, reg2, 8, 24, 0x51); + GX_WRITE_RAS_REG(reg2); + __gx->bpSent = 1; +} void GXSetCopyFilter(GXBool aa, u8 sample_pattern[12][2], GXBool vf, u8 vfilter[7]) {} diff --git a/lib/dolphin/gx/GXGeometry.cpp b/lib/dolphin/gx/GXGeometry.cpp index 3061dab..ecc98f3 100644 --- a/lib/dolphin/gx/GXGeometry.cpp +++ b/lib/dolphin/gx/GXGeometry.cpp @@ -364,5 +364,11 @@ void GXSetLineWidth(u8 width, GXTexOffset offs) { } // TODO GXSetPointSize -// TODO GXEnableTexOffsets + +void GXEnableTexOffsets(GXTexCoordID coord, GXBool line_enable, GXBool point_enable) { + SET_REG_FIELD(0, __gx->suTs0[coord], 1, 18, line_enable); + SET_REG_FIELD(0, __gx->suTs0[coord], 1, 19, point_enable); + GX_WRITE_RAS_REG(__gx->suTs0[coord]); + __gx->bpSent = 1; +} } diff --git a/lib/dolphin/gx/GXManage.cpp b/lib/dolphin/gx/GXManage.cpp index e4e85ab..c52f4bf 100644 --- a/lib/dolphin/gx/GXManage.cpp +++ b/lib/dolphin/gx/GXManage.cpp @@ -250,8 +250,12 @@ static void __SetSURegs(u32 tmap, u32 tcoord) { void __GXSetSUTexRegs() { // Write SU texture size/bias registers for each active TEV stage and indirect stage. - // tcsManEnab == 0xFF means all coords are manually managed (skip auto-setup). - // Aurora doesn't use tcsManEnab, so we always auto-setup (tcsManEnab = 0). + // Skip coords that have manual scale enabled (tcsManEnab bit set). + // If all coords are manual (0xFF), skip entirely. + if (__gx->tcsManEnab == 0xFF) { + return; + } + u32 nStages = GET_REG_FIELD(__gx->genMode, 4, 10) + 1; u32 nIndStages = GET_REG_FIELD(__gx->genMode, 3, 16); @@ -276,7 +280,9 @@ void __GXSetSUTexRegs() { coord = GET_REG_FIELD(__gx->iref, 3, 21); break; } - __SetSURegs(tmap, coord); + if (!(__gx->tcsManEnab & (1 << coord))) { + __SetSURegs(tmap, coord); + } } // Direct TEV stages @@ -290,7 +296,7 @@ void __GXSetSUTexRegs() { } else { coord = GET_REG_FIELD(*ptref, 3, 3); } - if (tmap != 0xFF) { + if (tmap != 0xFF && !(__gx->tcsManEnab & (1 << coord))) { __SetSURegs(tmap, coord); } } diff --git a/lib/dolphin/gx/GXPixel.cpp b/lib/dolphin/gx/GXPixel.cpp index 200d349..be6b040 100644 --- a/lib/dolphin/gx/GXPixel.cpp +++ b/lib/dolphin/gx/GXPixel.cpp @@ -76,9 +76,6 @@ void GXSetFog(GXFogType type, float startZ, float endZ, float nearZ, float farZ, GX_WRITE_RAS_REG(fog3); GX_WRITE_RAS_REG(fogclr); __gx->bpSent = 1; - - // Side channel: direct update for inline rendering (full precision) - update_gx_state(g_gxState.fog, {type, startZ, endZ, nearZ, farZ, from_gx_color(color)}); } void GXSetFogColor(GXColor color) { @@ -90,9 +87,6 @@ void GXSetFogColor(GXColor color) { SET_REG_FIELD(0, fogclr, 8, 24, 0xF2); GX_WRITE_RAS_REG(fogclr); __gx->bpSent = 1; - - // Side channel: direct update for inline rendering - update_gx_state(g_gxState.fog.color, from_gx_color(color)); } void GXSetBlendMode(GXBlendMode mode, GXBlendFactor src, GXBlendFactor dst, GXLogicOp op) { diff --git a/lib/dolphin/gx/GXTev.cpp b/lib/dolphin/gx/GXTev.cpp index 9f46680..402d387 100644 --- a/lib/dolphin/gx/GXTev.cpp +++ b/lib/dolphin/gx/GXTev.cpp @@ -107,9 +107,6 @@ void GXSetTevColor(GXTevRegID id, GXColor color) { // We omit the redundant writes since they don't change the register value and // our software command processor doesn't need the sync delay. __gx->bpSent = 1; - - // Side channel: direct update for inline rendering (full precision) - update_gx_state(g_gxState.colorRegs[id], from_gx_color(color)); } void GXSetTevColorS10(GXTevRegID id, GXColorS10 color) { @@ -128,14 +125,6 @@ void GXSetTevColorS10(GXTevRegID id, GXColorS10 color) { // We omit the redundant writes since they don't change the register value and // our software command processor doesn't need the sync delay. __gx->bpSent = 1; - - // Side channel: direct update for inline rendering (full precision) - update_gx_state(g_gxState.colorRegs[id], aurora::Vec4{ - static_cast(color.r) / 255.f, - static_cast(color.g) / 255.f, - static_cast(color.b) / 255.f, - static_cast(color.a) / 255.f, - }); } void GXSetAlphaCompare(GXCompare comp0, u8 ref0, GXAlphaOp op, GXCompare comp1, u8 ref1) { @@ -204,9 +193,6 @@ void GXSetTevKColor(GXTevKColorID id, GXColor color) { GX_WRITE_RAS_REG(regRA); GX_WRITE_RAS_REG(regBG); __gx->bpSent = 1; - - // Side channel: direct update for inline rendering (full precision) - update_gx_state(g_gxState.kcolors[id], from_gx_color(color)); } void GXSetTevKColorSel(GXTevStageID id, GXTevKColorSel sel) { diff --git a/lib/dolphin/gx/GXTexture.cpp b/lib/dolphin/gx/GXTexture.cpp index 628a281..5eb91c7 100644 --- a/lib/dolphin/gx/GXTexture.cpp +++ b/lib/dolphin/gx/GXTexture.cpp @@ -1,4 +1,5 @@ #include "gx.hpp" +#include "__gx.h" #include "../../gfx/texture.hpp" @@ -234,8 +235,33 @@ void GXInvalidateTexAll() { // TODO GXSetTlutRegionCallback // TODO GXLoadTexObjPreLoaded void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts) { - // TODO + __gx->tcsManEnab = (__gx->tcsManEnab & ~(1 << coord)) | (enable << coord); + if (enable) { + SET_REG_FIELD(0, __gx->suTs0[coord], 16, 0, static_cast(ss - 1)); + SET_REG_FIELD(0, __gx->suTs1[coord], 16, 0, static_cast(ts - 1)); + GX_WRITE_RAS_REG(__gx->suTs0[coord]); + GX_WRITE_RAS_REG(__gx->suTs1[coord]); + __gx->bpSent = 1; + } +} + +void GXSetTexCoordCylWrap(GXTexCoordID coord, GXBool s_enable, GXBool t_enable) { + SET_REG_FIELD(0, __gx->suTs0[coord], 1, 17, s_enable); + SET_REG_FIELD(0, __gx->suTs1[coord], 1, 17, t_enable); + if (__gx->tcsManEnab & (1 << coord)) { + GX_WRITE_RAS_REG(__gx->suTs0[coord]); + GX_WRITE_RAS_REG(__gx->suTs1[coord]); + __gx->bpSent = 1; + } +} + +void GXSetTexCoordBias(GXTexCoordID coord, GXBool s_enable, GXBool t_enable) { + SET_REG_FIELD(0, __gx->suTs0[coord], 1, 16, s_enable); + SET_REG_FIELD(0, __gx->suTs1[coord], 1, 16, t_enable); + if (__gx->tcsManEnab & (1 << coord)) { + GX_WRITE_RAS_REG(__gx->suTs0[coord]); + GX_WRITE_RAS_REG(__gx->suTs1[coord]); + __gx->bpSent = 1; + } } -// TODO GXSetTexCoordCylWrap -// TODO GXSetTexCoordBias } \ No newline at end of file diff --git a/lib/dolphin/gx/__gx.h b/lib/dolphin/gx/__gx.h index a5c5157..31283b9 100644 --- a/lib/dolphin/gx/__gx.h +++ b/lib/dolphin/gx/__gx.h @@ -99,6 +99,7 @@ struct __GXData_struct { u32 suTs0[8]; // SU texture S0 registers u32 suTs1[8]; // SU texture S1 registers + u8 tcsManEnab; // bitmask: manual tex coord scale enabled per coord u32 suScis0; // scissor top-left u32 suScis1; // scissor bottom-right diff --git a/lib/gfx/command_processor.cpp b/lib/gfx/command_processor.cpp index 436914a..22f6fdf 100644 --- a/lib/gfx/command_processor.cpp +++ b/lib/gfx/command_processor.cpp @@ -8,6 +8,9 @@ #include +#include +#include + static aurora::Module Log("aurora::gfx::cp"); using aurora::gfx::gx::g_gxState; @@ -333,17 +336,11 @@ void process(const u8* data, u32 size, bool bigEndian) { while (pos < size) { u8 cmd = data[pos++]; - - if (cmd == CP_CMD_NOP) { - continue; - } - u8 opcode = cmd & CP_OPCODE_MASK; // Log.warn("Processing opcode {:02x} at pos {} (size {})", opcode, pos - 1, size); switch (opcode) { case CP_CMD_NOP: - // Already handled above, but could be 0x00 with VAT bits set continue; case CP_CMD_LOAD_BP_REG: { @@ -374,7 +371,7 @@ void process(const u8* data, u32 size, bool bigEndian) { case CP_CMD_LOAD_INDX_D: { // Indexed XF load: 4 bytes of data CHECK(pos + 4 <= size, "indexed XF read overrun"); - // TODO: handle indexed XF loads + Log.warn("Unimplemented indexed XF load (opcode 0x{:02X})", opcode); pos += 4; break; } @@ -388,7 +385,7 @@ void process(const u8* data, u32 size, bool bigEndian) { } case CP_CMD_INVAL_VTX: { - // Invalidate vertex cache - no data + // Invalidate vertex cache break; } @@ -533,7 +530,7 @@ static void handle_bp(u32 value, bool bigEndian) { // BP mask (0x0F) - internal, applies to next BP write case 0x0F: // The BP mask is used by the hardware to selectively update fields. - // For our purposes, we don't need to implement masking. + // TODO implement break; // TEV indirect stages (0x10-0x1F) @@ -559,21 +556,16 @@ static void handle_bp(u32 value, bool bigEndian) { } // Scissor registers (0x20, 0x21) - case 0x20: { - u32 top = bp_get(value, 11, 0) - 340; - u32 left = bp_get(value, 11, 12) - 340; - aurora::gfx::set_scissor(left, top, 1, 1); // Width/height set by 0x21 - break; - } - case 0x21: { - // Need both 0x20 and 0x21 to reconstruct full scissor rect. - // For now, just consume - the GX function already called set_scissor. + case 0x20: case 0x21: { + Log.warn("Unimplemented: BP register {:x} (scissor)", regId); break; } - // Line/point size (0x22) - informational, not used for rendering - case 0x22: + // Line/point size (0x22) + case 0x22: { + Log.warn("Unimplemented: BP register {:x} (line/point size)", regId); break; + } // Indirect texture scale (0x25, 0x26) case 0x25: { @@ -690,8 +682,10 @@ static void handle_bp(u32 value, bool bigEndian) { } // PE control (0x43) - zcomp location - case 0x43: + case 0x43: { + // Log.warn("Unimplemented: BP register {:x} (zcomp loc)", regId); break; + } // Alpha compare (0xF3) case 0xF3: { @@ -734,10 +728,62 @@ static void handle_bp(u32 value, bool bigEndian) { break; } - // Fog type from FOG3 (0xF1) - decode fog type for display list playback + // Fog A/B parameters (0xEE-0xF0) + // FOG0 (0xEE): A parameter - sign(1)|exp(8)|mantissa(11) partial IEEE 754 float + case 0xEE: { + g_gxState.fog.fog0Raw = value; + // Reconstruct A = a_encoded * 2^b_s + u32 a_mant = bp_get(value, 11, 0); + u32 a_exp = bp_get(value, 8, 11); + u32 a_sign = bp_get(value, 1, 19); + u32 a_bits = (a_sign << 31) | (a_exp << 23) | (a_mant << 12); + float a_encoded; + std::memcpy(&a_encoded, &a_bits, sizeof(a_encoded)); + u32 b_s = g_gxState.fog.fog2Raw & 0x1F; + g_gxState.fog.a = std::ldexp(a_encoded, static_cast(b_s)); + g_gxState.stateDirty = true; + break; + } + // FOG1 (0xEF): B mantissa (24-bit) + case 0xEF: { + g_gxState.fog.fog1Raw = value; + u32 b_m = bp_get(value, 24, 0); + u32 b_s = g_gxState.fog.fog2Raw & 0x1F; + float B_mant = static_cast(b_m) / 8388638.0f; + g_gxState.fog.b = std::ldexp(B_mant, static_cast(b_s) - 1); + g_gxState.stateDirty = true; + break; + } + // FOG2 (0xF0): B shift/exponent (5-bit) + case 0xF0: { + g_gxState.fog.fog2Raw = value; + u32 b_s = bp_get(value, 5, 0); + // Recompute A with updated b_s + u32 a_mant = bp_get(g_gxState.fog.fog0Raw, 11, 0); + u32 a_exp = bp_get(g_gxState.fog.fog0Raw, 8, 11); + u32 a_sign = bp_get(g_gxState.fog.fog0Raw, 1, 19); + u32 a_bits = (a_sign << 31) | (a_exp << 23) | (a_mant << 12); + float a_encoded; + std::memcpy(&a_encoded, &a_bits, sizeof(a_encoded)); + g_gxState.fog.a = std::ldexp(a_encoded, static_cast(b_s)); + // Recompute B with updated b_s + u32 b_m = bp_get(g_gxState.fog.fog1Raw, 24, 0); + float B_mant = static_cast(b_m) / 8388638.0f; + g_gxState.fog.b = std::ldexp(B_mant, static_cast(b_s) - 1); + g_gxState.stateDirty = true; + break; + } + + // Fog type + C parameter from FOG3 (0xF1) case 0xF1: { GXFogType fogType = static_cast(bp_get(value, 3, 21)); g_gxState.fog.type = fogType; + // Decode C parameter (same partial float encoding as A) + u32 c_mant = bp_get(value, 11, 0); + u32 c_exp = bp_get(value, 8, 11); + u32 c_sign = bp_get(value, 1, 19); + u32 c_bits = (c_sign << 31) | (c_exp << 23) | (c_mant << 12); + std::memcpy(&g_gxState.fog.c, &c_bits, sizeof(g_gxState.fog.c)); g_gxState.stateDirty = true; break; } @@ -757,12 +803,6 @@ static void handle_bp(u32 value, bool bigEndian) { break; } - // Fog A/B parameters (0xEE-0xF0) - these carry encoded float coefficients. - // Aurora uses the high-level fog params (startZ/endZ/nearZ/farZ) from the side channel, - // so we only need the type and color from the FIFO for DL playback. - case 0xEE: case 0xEF: case 0xF0: - break; - // TEV color registers / K color registers (0xE0-0xE7) // RA registers: 0xE0, 0xE2, 0xE4, 0xE6 (even) // BG registers: 0xE1, 0xE3, 0xE5, 0xE7 (odd) @@ -813,20 +853,91 @@ static void handle_bp(u32 value, bool bigEndian) { } // Indirect texture matrices (0x06-0x0E) + // Each matrix uses 3 consecutive registers (one per row of the 3x2 matrix). + // Matrix 0: 0x06-0x08, Matrix 1: 0x09-0x0B, Matrix 2: 0x0C-0x0E case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B: - case 0x0C: case 0x0D: case 0x0E: - // These carry 2x3 matrix values packed into BP registers. - // The matrix data is already applied via GXSetIndTexMtx which updates g_gxState directly. + case 0x0C: case 0x0D: case 0x0E: { + u32 idx = (regId - 0x06) / 3; // matrix index (0-2) + u32 row = (regId - 0x06) % 3; // row index (0-2) + auto& info = g_gxState.indTexMtxs[idx]; + + // Decode 11-bit signed matrix elements (scaled by 1024) + s32 col0 = bp_get(value, 11, 0); + if (col0 & 0x400) col0 |= ~0x7FF; // sign-extend from 11 bits + s32 col1 = bp_get(value, 11, 11); + if (col1 & 0x400) col1 |= ~0x7FF; + + auto& r = row == 0 ? info.mtx.m0 : (row == 1 ? info.mtx.m1 : info.mtx.m2); + r.x = static_cast(col0) / 1024.0f; + r.y = static_cast(col1) / 1024.0f; + + // Accumulate 2-bit scale exponent part (adjScale = scaleExp + 17, split across 3 registers) + u32 scaleBits = bp_get(value, 2, 22); + u32 shift = row * 2; + info.adjScaleRaw = (info.adjScaleRaw & ~(3u << shift)) | (scaleBits << shift); + info.scaleExp = static_cast(info.adjScaleRaw) - 17; + + g_gxState.stateDirty = true; break; + } + + // SU texture coordinate scale registers (0x30-0x3F) + // Even registers (suTs0): S-axis scale, bias, cyl wrap, line/point offset + // Odd registers (suTs1): T-axis scale, bias, cyl wrap + case 0x30: case 0x31: case 0x32: case 0x33: + case 0x34: case 0x35: case 0x36: case 0x37: + case 0x38: case 0x39: case 0x3A: case 0x3B: + case 0x3C: case 0x3D: case 0x3E: case 0x3F: { + u32 coordIdx = (regId - 0x30) / 2; + bool isT = (regId & 1) != 0; + auto& tcs = g_gxState.texCoordScales[coordIdx]; + if (isT) { + tcs.scaleT = static_cast(bp_get(value, 16, 0)); + tcs.biasT = bp_get(value, 1, 16) != 0; + tcs.cylWrapT = bp_get(value, 1, 17) != 0; + } else { + tcs.scaleS = static_cast(bp_get(value, 16, 0)); + tcs.biasS = bp_get(value, 1, 16) != 0; + tcs.cylWrapS = bp_get(value, 1, 17) != 0; + tcs.lineOffset = bp_get(value, 1, 18) != 0; + tcs.pointOffset = bp_get(value, 1, 19) != 0; + } + g_gxState.stateDirty = true; + break; + } + + // Copy clear color (0x4F-0x50) and depth (0x51) + case 0x4F: { + u8 r = bp_get(value, 8, 0); + u8 a = bp_get(value, 8, 8); + g_gxState.clearColor[0] = static_cast(r) / 255.f; + g_gxState.clearColor[3] = static_cast(a) / 255.f; + g_gxState.stateDirty = true; + break; + } + case 0x50: { + u8 b = bp_get(value, 8, 0); + u8 g = bp_get(value, 8, 8); + g_gxState.clearColor[2] = static_cast(b) / 255.f; + g_gxState.clearColor[1] = static_cast(g) / 255.f; + g_gxState.stateDirty = true; + break; + } + case 0x51: { + g_gxState.clearDepth = bp_get(value, 24, 0); + g_gxState.stateDirty = true; + break; + } // Texture mode/image registers (0x80-0xBB) - texture config default: if (regId >= 0x80 && regId <= 0xBB) { // Texture format/wrap/filter configuration. // These are handled pragmatically - GXLoadTexObj sets texture handles directly. + } else { + Log.warn("Unhandled BP register 0x{:02X} (value 0x{:06X})", regId, value & 0xFFFFFF); } - // Silently ignore unknown BP registers break; } } diff --git a/lib/gfx/common.cpp b/lib/gfx/common.cpp index 7f6b8f5..914cee8 100644 --- a/lib/gfx/common.cpp +++ b/lib/gfx/common.cpp @@ -151,6 +151,7 @@ struct RenderPass { TextureHandle resolveTarget; ClipRect resolveRect; Vec4 clearColor{0.f, 0.f, 0.f, 0.f}; + float clearDepth = gx::UseReversedZ ? 0.f : 1.f; CommandList commands; bool clear = true; }; @@ -269,6 +270,7 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4 currentPass.resolveRect = rect; auto& newPass = g_renderPasses.emplace_back(); newPass.clearColor = clearColor; + newPass.clearDepth = g_renderPasses[g_currentRenderPass].clearDepth; newPass.clear = clear; ++g_currentRenderPass; } @@ -507,6 +509,10 @@ void begin_frame() { g_renderPasses.emplace_back(); g_renderPasses[0].clearColor = gx::g_gxState.clearColor; + { + float normalizedDepth = static_cast(gx::g_gxState.clearDepth) / 16777215.f; + g_renderPasses[0].clearDepth = gx::UseReversedZ ? (1.f - normalizedDepth) : normalizedDepth; + } g_currentRenderPass = 0; // push_command(CommandType::SetViewport, Command::Data{.setViewport = g_cachedViewport}); // push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor}); @@ -586,7 +592,7 @@ void render(wgpu::CommandEncoder& cmd) { .view = webgpu::g_depthBuffer.view, .depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load, .depthStoreOp = wgpu::StoreOp::Store, - .depthClearValue = gx::UseReversedZ ? 0.f : 1.f, + .depthClearValue = passInfo.clearDepth, }; const auto label = fmt::format("Render pass {}", i); const wgpu::RenderPassDescriptor renderPassDescriptor{ diff --git a/lib/gfx/gx.hpp b/lib/gfx/gx.hpp index 4c5d16d..3551a0b 100644 --- a/lib/gfx/gx.hpp +++ b/lib/gfx/gx.hpp @@ -167,15 +167,17 @@ struct TcgConfig { static_assert(std::has_unique_object_representations_v); struct FogState { GXFogType type = GX_FOG_NONE; - float startZ = 0.f; - float endZ = 0.f; - float nearZ = 0.f; - float farZ = 0.f; + float a = 0.f; + float b = 0.5f; + float c = 0.f; Vec4 color; + // Raw encoded register values for A/B reconstruction across separate BP writes + u32 fog0Raw = 0; // 0xEE: encoded A parameter + u32 fog1Raw = 0; // 0xEF: B mantissa + u32 fog2Raw = 0; // 0xF0: B shift bool operator==(const FogState& rhs) const { - return type == rhs.type && startZ == rhs.startZ && endZ == rhs.endZ && nearZ == rhs.nearZ && farZ == rhs.farZ && - color == rhs.color; + return type == rhs.type && a == rhs.a && b == rhs.b && c == rhs.c && color == rhs.color; } bool operator!=(const FogState& rhs) const { return !(*this == rhs); } }; @@ -204,11 +206,30 @@ struct AlphaCompare { static_assert(std::has_unique_object_representations_v); struct IndTexMtxInfo { aurora::Mat3x2 mtx; - s8 scaleExp; + s8 scaleExp = 0; + // Accumulated adjScale bits from BP registers (2 bits per row, 3 rows) + u8 adjScaleRaw = 0; bool operator==(const IndTexMtxInfo& rhs) const { return mtx == rhs.mtx && scaleExp == rhs.scaleExp; } bool operator!=(const IndTexMtxInfo& rhs) const { return !(*this == rhs); } }; +struct TexCoordScale { + u16 scaleS = 0; // texture width - 1 + u16 scaleT = 0; // texture height - 1 + bool biasS = false; + bool biasT = false; + bool cylWrapS = false; + bool cylWrapT = false; + bool lineOffset = false; + bool pointOffset = false; + + bool operator==(const TexCoordScale& rhs) const { + return scaleS == rhs.scaleS && scaleT == rhs.scaleT && biasS == rhs.biasS && biasT == rhs.biasT && + cylWrapS == rhs.cylWrapS && cylWrapT == rhs.cylWrapT && lineOffset == rhs.lineOffset && + pointOffset == rhs.pointOffset; + } + bool operator!=(const TexCoordScale& rhs) const { return !(*this == rhs); } +}; struct VtxAttrFmt { GXCompCnt cnt; GXCompType type; @@ -267,6 +288,7 @@ struct GXState { GXLogicOp blendOp = GX_LO_CLEAR; GXCompare depthFunc = GX_LEQUAL; Vec4 clearColor{0.f, 0.f, 0.f, 1.f}; + u32 clearDepth = 0xFFFFFF; u32 dstAlpha; // u8; UINT32_MAX = disabled AlphaCompare alphaCompare; std::array, MaxTevRegs> colorRegs; @@ -280,6 +302,7 @@ struct GXState { std::array texMtxs; std::array, MaxPTTexMtx> ptTexMtxs; std::array tcgs; + std::array texCoordScales; std::array vtxDesc; std::array vtxFmts; std::array tevSwapTable{ diff --git a/lib/gfx/shader_info.cpp b/lib/gfx/shader_info.cpp index f69a336..0bc60f0 100644 --- a/lib/gfx/shader_info.cpp +++ b/lib/gfx/shader_info.cpp @@ -317,14 +317,7 @@ Range build_uniform(const ShaderInfo& info) noexcept { } if (info.usesFog) { const auto& state = g_gxState.fog; - Fog fog{.color = state.color}; - if (state.nearZ != state.farZ && state.startZ != state.endZ) { - const float depthRange = state.farZ - state.nearZ; - const float fogRange = state.endZ - state.startZ; - fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange); - fog.b = state.farZ / depthRange; - fog.c = state.startZ / fogRange; - } + Fog fog{.color = state.color, .a = state.a, .b = state.b, .c = state.c}; buf.append(fog); } for (int i = 0; i < info.sampledTextures.size(); ++i) { diff --git a/tests/gx_fifo_test.cpp b/tests/gx_fifo_test.cpp index 4f1fc49..7b0b2a3 100644 --- a/tests/gx_fifo_test.cpp +++ b/tests/gx_fifo_test.cpp @@ -1531,6 +1531,385 @@ TEST_F(GXFifoTest, ChanMatColor_Color1A1_Compound) { EXPECT_NEAR(stateA.matColor[3], 128.f / 255.f, 1.f / 255.f); } +// ============================================================================ +// GXSetFog (BP 0xEE-0xF2) - Fog A/B/C parameters, type, and color +// ============================================================================ + +// --- Fog with perspective linear fog, typical parameters --- +TEST_F(GXFifoTest, Fog_PerspLin_Typical) { + GXColor fogColor = {128, 200, 255, 255}; + GXSetFog(GX_FOG_PERSP_LIN, 100.f, 900.f, 0.1f, 1000.f, fogColor); + auto bytes = capture_fifo(); + + // Should produce 5 BP writes (0xEE-0xF2): 5 * 5 = 25 bytes + ASSERT_EQ(bytes.size(), 25u); + // Verify BP opcodes and register IDs + EXPECT_EQ(bytes[0], 0x61); + EXPECT_EQ(bytes[1], 0xEE); + EXPECT_EQ(bytes[5], 0x61); + EXPECT_EQ(bytes[6], 0xEF); + EXPECT_EQ(bytes[10], 0x61); + EXPECT_EQ(bytes[11], 0xF0); + EXPECT_EQ(bytes[15], 0x61); + EXPECT_EQ(bytes[16], 0xF1); + EXPECT_EQ(bytes[20], 0x61); + EXPECT_EQ(bytes[21], 0xF2); + + reset_gx_state(); + decode_fifo(bytes); + + // Compute expected A, B, C from the SDK formula + float nearZ = 0.1f, farZ = 1000.f, startZ = 100.f, endZ = 900.f; + float A = (farZ * nearZ) / ((farZ - nearZ) * (endZ - startZ)); + float B = farZ / (farZ - nearZ); + float C = startZ / (endZ - startZ); + + // Allow tolerance for encoding precision loss (11-bit mantissa) + EXPECT_NEAR(g_gxState.fog.a, A, std::abs(A) * 1e-3f); + EXPECT_NEAR(g_gxState.fog.b, B, std::abs(B) * 1e-3f); + EXPECT_NEAR(g_gxState.fog.c, C, std::abs(C) * 1e-3f); + EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_LIN); + EXPECT_NEAR(g_gxState.fog.color[0], 128.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.fog.color[1], 200.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.fog.color[2], 255.f / 255.f, 1.f / 255.f); +} + +// --- Fog with degenerate parameters (nearZ == farZ) --- +TEST_F(GXFifoTest, Fog_Degenerate_EqualDepths) { + GXColor fogColor = {0, 0, 0, 255}; + GXSetFog(GX_FOG_PERSP_EXP, 0.f, 100.f, 10.f, 10.f, fogColor); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + // When nearZ == farZ, SDK sets A=0, B=0.5, C=0 + EXPECT_FLOAT_EQ(g_gxState.fog.a, 0.f); + EXPECT_NEAR(g_gxState.fog.b, 0.5f, 1e-3f); + EXPECT_FLOAT_EQ(g_gxState.fog.c, 0.f); + EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_EXP); +} + +// --- Fog type: none --- +TEST_F(GXFifoTest, Fog_None) { + GXColor fogColor = {64, 64, 64, 255}; + GXSetFog(GX_FOG_NONE, 0.f, 0.f, 0.f, 0.f, fogColor); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + EXPECT_EQ(g_gxState.fog.type, GX_FOG_NONE); + EXPECT_FLOAT_EQ(g_gxState.fog.a, 0.f); + EXPECT_NEAR(g_gxState.fog.b, 0.5f, 1e-3f); + EXPECT_FLOAT_EQ(g_gxState.fog.c, 0.f); + EXPECT_NEAR(g_gxState.fog.color[0], 64.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.fog.color[1], 64.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.fog.color[2], 64.f / 255.f, 1.f / 255.f); +} + +// --- Fog with perspective reverse exponential squared type --- +TEST_F(GXFifoTest, Fog_PerspRevExp2) { + GXColor fogColor = {255, 0, 0, 255}; + GXSetFog(GX_FOG_PERSP_REVEXP2, 50.f, 500.f, 1.f, 1000.f, fogColor); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + float nearZ = 1.f, farZ = 1000.f, startZ = 50.f, endZ = 500.f; + float A = (farZ * nearZ) / ((farZ - nearZ) * (endZ - startZ)); + float B = farZ / (farZ - nearZ); + float C = startZ / (endZ - startZ); + + EXPECT_NEAR(g_gxState.fog.a, A, std::abs(A) * 1e-3f); + EXPECT_NEAR(g_gxState.fog.b, B, std::abs(B) * 1e-3f); + EXPECT_NEAR(g_gxState.fog.c, C, std::abs(C) * 1e-3f); + EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_REVEXP2); + EXPECT_NEAR(g_gxState.fog.color[0], 1.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.fog.color[1], 0.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.fog.color[2], 0.f, 1.f / 255.f); +} + +// ============================================================================ +// GXSetIndTexMtx (BP 0x06-0x0E) - Indirect texture matrix parameters +// ============================================================================ + +// --- IndTexMtx 0 with half-scale diagonal matrix --- +// Note: 11-bit signed range limits values to [-1.0, 0.999], so 1.0 is not representable. +TEST_F(GXFifoTest, IndTexMtx0_HalfScale) { + f32 mtx[2][3] = { + {0.5f, 0.0f, 0.0f}, + {0.0f, 0.5f, 0.0f}, + }; + GXSetIndTexMtx(GX_ITM_0, mtx, 0); + auto bytes = capture_fifo(); + + // Should produce 3 BP writes: 3 * 5 = 15 bytes + ASSERT_EQ(bytes.size(), 15u); + // Verify BP opcodes and register IDs (0x06, 0x07, 0x08 for matrix 0) + EXPECT_EQ(bytes[0], 0x61); + EXPECT_EQ(bytes[1], 0x06); + EXPECT_EQ(bytes[5], 0x61); + EXPECT_EQ(bytes[6], 0x07); + EXPECT_EQ(bytes[10], 0x61); + EXPECT_EQ(bytes[11], 0x08); + + reset_gx_state(); + decode_fifo(bytes); + + const auto& info = g_gxState.indTexMtxs[0]; + // 11-bit fixed-point (1/1024) precision + float tol = 1.0f / 1024.0f; + EXPECT_NEAR(info.mtx.m0.x, 0.5f, tol); + EXPECT_NEAR(info.mtx.m0.y, 0.0f, tol); + EXPECT_NEAR(info.mtx.m1.x, 0.0f, tol); + EXPECT_NEAR(info.mtx.m1.y, 0.5f, tol); + EXPECT_NEAR(info.mtx.m2.x, 0.0f, tol); + EXPECT_NEAR(info.mtx.m2.y, 0.0f, tol); + EXPECT_EQ(info.scaleExp, 0); +} + +// --- IndTexMtx 1 with fractional values and positive scale --- +TEST_F(GXFifoTest, IndTexMtx1_FractionalWithScale) { + f32 mtx[2][3] = { + {0.5f, 0.25f, -0.125f}, + {-0.5f, 0.75f, 0.0f}, + }; + GXSetIndTexMtx(GX_ITM_1, mtx, 3); + auto bytes = capture_fifo(); + + // Register IDs for matrix 1: 0x09, 0x0A, 0x0B + ASSERT_EQ(bytes.size(), 15u); + EXPECT_EQ(bytes[1], 0x09); + EXPECT_EQ(bytes[6], 0x0A); + EXPECT_EQ(bytes[11], 0x0B); + + reset_gx_state(); + decode_fifo(bytes); + + const auto& info = g_gxState.indTexMtxs[1]; + float tol = 1.0f / 1024.0f; + EXPECT_NEAR(info.mtx.m0.x, 0.5f, tol); + EXPECT_NEAR(info.mtx.m0.y, -0.5f, tol); + EXPECT_NEAR(info.mtx.m1.x, 0.25f, tol); + EXPECT_NEAR(info.mtx.m1.y, 0.75f, tol); + EXPECT_NEAR(info.mtx.m2.x, -0.125f, tol); + EXPECT_NEAR(info.mtx.m2.y, 0.0f, tol); + EXPECT_EQ(info.scaleExp, 3); +} + +// --- IndTexMtx 2 with negative scale exponent --- +TEST_F(GXFifoTest, IndTexMtx2_NegativeScale) { + f32 mtx[2][3] = { + {0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f}, + }; + GXSetIndTexMtx(GX_ITM_2, mtx, -5); + auto bytes = capture_fifo(); + + // Register IDs for matrix 2: 0x0C, 0x0D, 0x0E + ASSERT_EQ(bytes.size(), 15u); + EXPECT_EQ(bytes[1], 0x0C); + EXPECT_EQ(bytes[6], 0x0D); + EXPECT_EQ(bytes[11], 0x0E); + + reset_gx_state(); + decode_fifo(bytes); + + const auto& info = g_gxState.indTexMtxs[2]; + EXPECT_EQ(info.scaleExp, -5); +} + +// --- IndTexMtx 0 does not affect matrix 1 --- +TEST_F(GXFifoTest, IndTexMtx0_Isolation) { + f32 mtx0[2][3] = { + {0.5f, 0.0f, 0.0f}, + {0.0f, 0.5f, 0.0f}, + }; + f32 mtx1[2][3] = { + {-1.0f, 0.0f, 0.0f}, + {0.0f, -1.0f, 0.0f}, + }; + GXSetIndTexMtx(GX_ITM_0, mtx0, 1); + GXSetIndTexMtx(GX_ITM_1, mtx1, -2); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + float tol = 1.0f / 1024.0f; + // Matrix 0 + EXPECT_NEAR(g_gxState.indTexMtxs[0].mtx.m0.x, 0.5f, tol); + EXPECT_NEAR(g_gxState.indTexMtxs[0].mtx.m1.y, 0.5f, tol); + EXPECT_EQ(g_gxState.indTexMtxs[0].scaleExp, 1); + // Matrix 1 + EXPECT_NEAR(g_gxState.indTexMtxs[1].mtx.m0.x, -1.0f, tol); + EXPECT_NEAR(g_gxState.indTexMtxs[1].mtx.m1.y, -1.0f, tol); + EXPECT_EQ(g_gxState.indTexMtxs[1].scaleExp, -2); +} + +// ============================================================================ +// SU Texture Coordinate Scale (BP 0x30-0x3F) +// ============================================================================ + +// --- GXSetTexCoordScaleManually sets width/height --- +TEST_F(GXFifoTest, TexCoordScale_Manual_Coord0) { + GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 256, 128); + auto bytes = capture_fifo(); + + // Two BP writes (suTs0 + suTs1): 2 * 5 = 10 bytes + ASSERT_EQ(bytes.size(), 10u); + EXPECT_EQ(bytes[0], 0x61); + EXPECT_EQ(bytes[1], 0x30); // suTs0[0] + EXPECT_EQ(bytes[5], 0x61); + EXPECT_EQ(bytes[6], 0x31); // suTs1[0] + + reset_gx_state(); + decode_fifo(bytes); + + const auto& tcs = g_gxState.texCoordScales[0]; + EXPECT_EQ(tcs.scaleS, 255u); // width - 1 + EXPECT_EQ(tcs.scaleT, 127u); // height - 1 +} + +// --- GXSetTexCoordScaleManually for coord 3 --- +TEST_F(GXFifoTest, TexCoordScale_Manual_Coord3) { + GXSetTexCoordScaleManually(GX_TEXCOORD3, GX_TRUE, 512, 512); + auto bytes = capture_fifo(); + + ASSERT_EQ(bytes.size(), 10u); + EXPECT_EQ(bytes[1], 0x36); // suTs0[3] = 0x30 + 3*2 + EXPECT_EQ(bytes[6], 0x37); // suTs1[3] = 0x31 + 3*2 + + reset_gx_state(); + decode_fifo(bytes); + + const auto& tcs = g_gxState.texCoordScales[3]; + EXPECT_EQ(tcs.scaleS, 511u); + EXPECT_EQ(tcs.scaleT, 511u); +} + +// --- GXSetTexCoordScaleManually with bias and cyl wrap --- +TEST_F(GXFifoTest, TexCoordScale_BiasAndCylWrap) { + // Enable manual mode first, then set bias and cyl wrap + GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 64, 64); + capture_fifo(); // discard + + GXSetTexCoordBias(GX_TEXCOORD0, GX_TRUE, GX_FALSE); + auto biasBytes = capture_fifo(); + + GXSetTexCoordCylWrap(GX_TEXCOORD0, GX_FALSE, GX_TRUE); + auto cylBytes = capture_fifo(); + + // Each writes 2 BP regs + ASSERT_EQ(biasBytes.size(), 10u); + ASSERT_EQ(cylBytes.size(), 10u); + + reset_gx_state(); + decode_fifo(biasBytes); + decode_fifo(cylBytes); + + const auto& tcs = g_gxState.texCoordScales[0]; + EXPECT_TRUE(tcs.biasS); + EXPECT_FALSE(tcs.biasT); + EXPECT_FALSE(tcs.cylWrapS); + EXPECT_TRUE(tcs.cylWrapT); +} + +// --- GXEnableTexOffsets --- +TEST_F(GXFifoTest, TexCoordScale_TexOffsets) { + GXEnableTexOffsets(GX_TEXCOORD2, GX_TRUE, GX_TRUE); + auto bytes = capture_fifo(); + + // One BP write (suTs0 only): 5 bytes + ASSERT_EQ(bytes.size(), 5u); + EXPECT_EQ(bytes[1], 0x34); // suTs0[2] = 0x30 + 2*2 + + reset_gx_state(); + decode_fifo(bytes); + + const auto& tcs = g_gxState.texCoordScales[2]; + EXPECT_TRUE(tcs.lineOffset); + EXPECT_TRUE(tcs.pointOffset); +} + +// --- Coord isolation: writing coord 0 doesn't affect coord 1 --- +TEST_F(GXFifoTest, TexCoordScale_Isolation) { + GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 100, 200); + GXSetTexCoordScaleManually(GX_TEXCOORD1, GX_TRUE, 300, 400); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + EXPECT_EQ(g_gxState.texCoordScales[0].scaleS, 99u); + EXPECT_EQ(g_gxState.texCoordScales[0].scaleT, 199u); + EXPECT_EQ(g_gxState.texCoordScales[1].scaleS, 299u); + EXPECT_EQ(g_gxState.texCoordScales[1].scaleT, 399u); +} + +// ============================================================================ +// GXSetCopyClear (BP 0x4F-0x51) - Clear color and depth +// ============================================================================ + +// --- Clear color and depth round-trip --- +TEST_F(GXFifoTest, CopyClear_ColorAndDepth) { + GXColor color = {64, 128, 192, 255}; + GXSetCopyClear(color, 0x00ABCDEF); + auto bytes = capture_fifo(); + + // 3 BP writes: 3 * 5 = 15 bytes + ASSERT_EQ(bytes.size(), 15u); + EXPECT_EQ(bytes[0], 0x61); + EXPECT_EQ(bytes[1], 0x4F); // R + A + EXPECT_EQ(bytes[5], 0x61); + EXPECT_EQ(bytes[6], 0x50); // B + G + EXPECT_EQ(bytes[10], 0x61); + EXPECT_EQ(bytes[11], 0x51); // Z + + reset_gx_state(); + decode_fifo(bytes); + + EXPECT_NEAR(g_gxState.clearColor[0], 64.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[1], 128.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[2], 192.f / 255.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[3], 255.f / 255.f, 1.f / 255.f); + EXPECT_EQ(g_gxState.clearDepth, 0x00ABCDEFu); +} + +// --- Clear with black and zero depth --- +TEST_F(GXFifoTest, CopyClear_BlackZeroDepth) { + GXColor color = {0, 0, 0, 0}; + GXSetCopyClear(color, 0); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + EXPECT_NEAR(g_gxState.clearColor[0], 0.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[1], 0.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[2], 0.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[3], 0.f, 1.f / 255.f); + EXPECT_EQ(g_gxState.clearDepth, 0u); +} + +// --- Clear with max depth --- +TEST_F(GXFifoTest, CopyClear_MaxDepth) { + GXColor color = {255, 255, 255, 128}; + GXSetCopyClear(color, 0xFFFFFF); + auto bytes = capture_fifo(); + + reset_gx_state(); + decode_fifo(bytes); + + EXPECT_NEAR(g_gxState.clearColor[0], 1.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[1], 1.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[2], 1.f, 1.f / 255.f); + EXPECT_NEAR(g_gxState.clearColor[3], 128.f / 255.f, 1.f / 255.f); + EXPECT_EQ(g_gxState.clearDepth, 0xFFFFFFu); +} + // ============================================================================ // Composite tests (multiple state changes in a single FIFO stream) // ============================================================================