Fog & TCS FIFO-ification

2026-03-30 10:57:39 -07:00 · 2026-02-18 13:41:42 -07:00
parent 73b81f53aa
commit 60eba7dff9
13 changed files with 632 additions and 77 deletions
@@ -24,6 +24,8 @@ void GXInitTexObjWrapMode(GXTexObj* obj, GXTexWrapMode s, GXTexWrapMode t);
 void GXInitTlutObj(GXTlutObj* obj, const void* data, GXTlutFmt format, u16 entries);
 void GXLoadTlut(const GXTlutObj* obj, GXTlut idx);
 void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts);
+void GXSetTexCoordCylWrap(GXTexCoordID coord, GXBool s_enable, GXBool t_enable);
+void GXSetTexCoordBias(GXTexCoordID coord, GXBool s_enable, GXBool t_enable);
 void GXInitTexCacheRegion(GXTexRegion* region, GXBool is_32b_mipmap, u32 tmem_even, GXTexCacheSize size_even,
                          u32 tmem_odd, GXTexCacheSize size_odd);
 GXTexRegionCallback GXSetTexRegionCallback(GXTexRegionCallback callback);
@@ -1,4 +1,5 @@
 #include "gx.hpp"
+#include "__gx.h"

 #include "../../window.hpp"
 #include "../../webgpu/wgpu.hpp"
@@ -39,7 +40,28 @@ void GXSetTexCopyDst(u16 wd, u16 ht, GXTexFmt fmt, GXBool mipmap) {

 u32 GXSetDispCopyYScale(f32 vscale) { return 0; }

-void GXSetCopyClear(GXColor color, u32 depth) { update_gx_state(g_gxState.clearColor, from_gx_color(color)); }
+void GXSetCopyClear(GXColor color, u32 depth) {
+  // BP 0x4F: clear color R + A
+  u32 reg0 = 0;
+  SET_REG_FIELD(0, reg0, 8, 0, color.r);
+  SET_REG_FIELD(0, reg0, 8, 8, color.a);
+  SET_REG_FIELD(0, reg0, 8, 24, 0x4F);
+  GX_WRITE_RAS_REG(reg0);
+
+  // BP 0x50: clear color B + G
+  u32 reg1 = 0;
+  SET_REG_FIELD(0, reg1, 8, 0, color.b);
+  SET_REG_FIELD(0, reg1, 8, 8, color.g);
+  SET_REG_FIELD(0, reg1, 8, 24, 0x50);
+  GX_WRITE_RAS_REG(reg1);
+
+  // BP 0x51: clear Z (24-bit)
+  u32 reg2 = 0;
+  SET_REG_FIELD(0, reg2, 24, 0, depth);
+  SET_REG_FIELD(0, reg2, 8, 24, 0x51);
+  GX_WRITE_RAS_REG(reg2);
+  __gx->bpSent = 1;
+}

 void GXSetCopyFilter(GXBool aa, u8 sample_pattern[12][2], GXBool vf, u8 vfilter[7]) {}

@@ -364,5 +364,11 @@ void GXSetLineWidth(u8 width, GXTexOffset offs) {
 }

 // TODO GXSetPointSize
-// TODO GXEnableTexOffsets
+
+void GXEnableTexOffsets(GXTexCoordID coord, GXBool line_enable, GXBool point_enable) {
+  SET_REG_FIELD(0, __gx->suTs0[coord], 1, 18, line_enable);
+  SET_REG_FIELD(0, __gx->suTs0[coord], 1, 19, point_enable);
+  GX_WRITE_RAS_REG(__gx->suTs0[coord]);
+  __gx->bpSent = 1;
+}
 }
@@ -250,8 +250,12 @@ static void __SetSURegs(u32 tmap, u32 tcoord) {

 void __GXSetSUTexRegs() {
  // Write SU texture size/bias registers for each active TEV stage and indirect stage.
-  // tcsManEnab == 0xFF means all coords are manually managed (skip auto-setup).
-  // Aurora doesn't use tcsManEnab, so we always auto-setup (tcsManEnab = 0).
+  // Skip coords that have manual scale enabled (tcsManEnab bit set).
+  // If all coords are manual (0xFF), skip entirely.
+  if (__gx->tcsManEnab == 0xFF) {
+    return;
+  }
+
  u32 nStages = GET_REG_FIELD(__gx->genMode, 4, 10) + 1;
  u32 nIndStages = GET_REG_FIELD(__gx->genMode, 3, 16);

@@ -276,7 +280,9 @@ void __GXSetSUTexRegs() {
      coord = GET_REG_FIELD(__gx->iref, 3, 21);
      break;
    }
-    __SetSURegs(tmap, coord);
+    if (!(__gx->tcsManEnab & (1 << coord))) {
+      __SetSURegs(tmap, coord);
+    }
  }

  // Direct TEV stages
@@ -290,7 +296,7 @@ void __GXSetSUTexRegs() {
    } else {
      coord = GET_REG_FIELD(*ptref, 3, 3);
    }
-    if (tmap != 0xFF) {
+    if (tmap != 0xFF && !(__gx->tcsManEnab & (1 << coord))) {
      __SetSURegs(tmap, coord);
    }
  }
@@ -76,9 +76,6 @@ void GXSetFog(GXFogType type, float startZ, float endZ, float nearZ, float farZ,
  GX_WRITE_RAS_REG(fog3);
  GX_WRITE_RAS_REG(fogclr);
  __gx->bpSent = 1;
-
-  // Side channel: direct update for inline rendering (full precision)
-  update_gx_state(g_gxState.fog, {type, startZ, endZ, nearZ, farZ, from_gx_color(color)});
 }

 void GXSetFogColor(GXColor color) {
@@ -90,9 +87,6 @@ void GXSetFogColor(GXColor color) {
  SET_REG_FIELD(0, fogclr, 8, 24, 0xF2);
  GX_WRITE_RAS_REG(fogclr);
  __gx->bpSent = 1;
-
-  // Side channel: direct update for inline rendering
-  update_gx_state(g_gxState.fog.color, from_gx_color(color));
 }

 void GXSetBlendMode(GXBlendMode mode, GXBlendFactor src, GXBlendFactor dst, GXLogicOp op) {
@@ -107,9 +107,6 @@ void GXSetTevColor(GXTevRegID id, GXColor color) {
  // We omit the redundant writes since they don't change the register value and
  // our software command processor doesn't need the sync delay.
  __gx->bpSent = 1;
-
-  // Side channel: direct update for inline rendering (full precision)
-  update_gx_state(g_gxState.colorRegs[id], from_gx_color(color));
 }

 void GXSetTevColorS10(GXTevRegID id, GXColorS10 color) {
@@ -128,14 +125,6 @@ void GXSetTevColorS10(GXTevRegID id, GXColorS10 color) {
  // We omit the redundant writes since they don't change the register value and
  // our software command processor doesn't need the sync delay.
  __gx->bpSent = 1;
-
-  // Side channel: direct update for inline rendering (full precision)
-  update_gx_state(g_gxState.colorRegs[id], aurora::Vec4<float>{
-                                               static_cast<float>(color.r) / 255.f,
-                                               static_cast<float>(color.g) / 255.f,
-                                               static_cast<float>(color.b) / 255.f,
-                                               static_cast<float>(color.a) / 255.f,
-                                           });
 }

 void GXSetAlphaCompare(GXCompare comp0, u8 ref0, GXAlphaOp op, GXCompare comp1, u8 ref1) {
@@ -204,9 +193,6 @@ void GXSetTevKColor(GXTevKColorID id, GXColor color) {
  GX_WRITE_RAS_REG(regRA);
  GX_WRITE_RAS_REG(regBG);
  __gx->bpSent = 1;
-
-  // Side channel: direct update for inline rendering (full precision)
-  update_gx_state(g_gxState.kcolors[id], from_gx_color(color));
 }

 void GXSetTevKColorSel(GXTevStageID id, GXTevKColorSel sel) {
@@ -1,4 +1,5 @@
 #include "gx.hpp"
+#include "__gx.h"

 #include "../../gfx/texture.hpp"

@@ -234,8 +235,33 @@ void GXInvalidateTexAll() {
 // TODO GXSetTlutRegionCallback
 // TODO GXLoadTexObjPreLoaded
 void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts) {
-  // TODO
+  __gx->tcsManEnab = (__gx->tcsManEnab & ~(1 << coord)) | (enable << coord);
+  if (enable) {
+    SET_REG_FIELD(0, __gx->suTs0[coord], 16, 0, static_cast<u16>(ss - 1));
+    SET_REG_FIELD(0, __gx->suTs1[coord], 16, 0, static_cast<u16>(ts - 1));
+    GX_WRITE_RAS_REG(__gx->suTs0[coord]);
+    GX_WRITE_RAS_REG(__gx->suTs1[coord]);
+    __gx->bpSent = 1;
+  }
+}
+
+void GXSetTexCoordCylWrap(GXTexCoordID coord, GXBool s_enable, GXBool t_enable) {
+  SET_REG_FIELD(0, __gx->suTs0[coord], 1, 17, s_enable);
+  SET_REG_FIELD(0, __gx->suTs1[coord], 1, 17, t_enable);
+  if (__gx->tcsManEnab & (1 << coord)) {
+    GX_WRITE_RAS_REG(__gx->suTs0[coord]);
+    GX_WRITE_RAS_REG(__gx->suTs1[coord]);
+    __gx->bpSent = 1;
+  }
+}
+
+void GXSetTexCoordBias(GXTexCoordID coord, GXBool s_enable, GXBool t_enable) {
+  SET_REG_FIELD(0, __gx->suTs0[coord], 1, 16, s_enable);
+  SET_REG_FIELD(0, __gx->suTs1[coord], 1, 16, t_enable);
+  if (__gx->tcsManEnab & (1 << coord)) {
+    GX_WRITE_RAS_REG(__gx->suTs0[coord]);
+    GX_WRITE_RAS_REG(__gx->suTs1[coord]);
+    __gx->bpSent = 1;
+  }
 }
-// TODO GXSetTexCoordCylWrap
-// TODO GXSetTexCoordBias
 }
@@ -99,6 +99,7 @@ struct __GXData_struct {

  u32 suTs0[8];     // SU texture S0 registers
  u32 suTs1[8];     // SU texture S1 registers
+  u8 tcsManEnab;    // bitmask: manual tex coord scale enabled per coord
  u32 suScis0;      // scissor top-left
  u32 suScis1;      // scissor bottom-right

@@ -8,6 +8,9 @@

 #include <absl/container/flat_hash_map.h>

+#include <cmath>
+#include <cstring>
+
 static aurora::Module Log("aurora::gfx::cp");

 using aurora::gfx::gx::g_gxState;
@@ -333,17 +336,11 @@ void process(const u8* data, u32 size, bool bigEndian) {

  while (pos < size) {
    u8 cmd = data[pos++];
-
-    if (cmd == CP_CMD_NOP) {
-      continue;
-    }
-
    u8 opcode = cmd & CP_OPCODE_MASK;
    // Log.warn("Processing opcode {:02x} at pos {} (size {})", opcode, pos - 1, size);

    switch (opcode) {
    case CP_CMD_NOP:
-      // Already handled above, but could be 0x00 with VAT bits set
      continue;

    case CP_CMD_LOAD_BP_REG: {
@@ -374,7 +371,7 @@ void process(const u8* data, u32 size, bool bigEndian) {
    case CP_CMD_LOAD_INDX_D: {
      // Indexed XF load: 4 bytes of data
      CHECK(pos + 4 <= size, "indexed XF read overrun");
-      // TODO: handle indexed XF loads
+      Log.warn("Unimplemented indexed XF load (opcode 0x{:02X})", opcode);
      pos += 4;
      break;
    }
@@ -388,7 +385,7 @@ void process(const u8* data, u32 size, bool bigEndian) {
    }

    case CP_CMD_INVAL_VTX: {
-      // Invalidate vertex cache - no data
+      // Invalidate vertex cache
      break;
    }

@@ -533,7 +530,7 @@ static void handle_bp(u32 value, bool bigEndian) {
  // BP mask (0x0F) - internal, applies to next BP write
  case 0x0F:
    // The BP mask is used by the hardware to selectively update fields.
-    // For our purposes, we don't need to implement masking.
+    // TODO implement
    break;

  // TEV indirect stages (0x10-0x1F)
@@ -559,21 +556,16 @@ static void handle_bp(u32 value, bool bigEndian) {
  }

  // Scissor registers (0x20, 0x21)
-  case 0x20: {
-    u32 top = bp_get(value, 11, 0) - 340;
-    u32 left = bp_get(value, 11, 12) - 340;
-    aurora::gfx::set_scissor(left, top, 1, 1); // Width/height set by 0x21
-    break;
-  }
-  case 0x21: {
-    // Need both 0x20 and 0x21 to reconstruct full scissor rect.
-    // For now, just consume - the GX function already called set_scissor.
+  case 0x20: case 0x21: {
+    Log.warn("Unimplemented: BP register {:x} (scissor)", regId);
    break;
  }

-  // Line/point size (0x22) - informational, not used for rendering
-  case 0x22:
+  // Line/point size (0x22)
+  case 0x22: {
+    Log.warn("Unimplemented: BP register {:x} (line/point size)", regId);
    break;
+  }

  // Indirect texture scale (0x25, 0x26)
  case 0x25: {
@@ -690,8 +682,10 @@ static void handle_bp(u32 value, bool bigEndian) {
  }

  // PE control (0x43) - zcomp location
-  case 0x43:
+  case 0x43: {
+    // Log.warn("Unimplemented: BP register {:x} (zcomp loc)", regId);
    break;
+  }

  // Alpha compare (0xF3)
  case 0xF3: {
@@ -734,10 +728,62 @@ static void handle_bp(u32 value, bool bigEndian) {
    break;
  }

-  // Fog type from FOG3 (0xF1) - decode fog type for display list playback
+  // Fog A/B parameters (0xEE-0xF0)
+  // FOG0 (0xEE): A parameter - sign(1)|exp(8)|mantissa(11) partial IEEE 754 float
+  case 0xEE: {
+    g_gxState.fog.fog0Raw = value;
+    // Reconstruct A = a_encoded * 2^b_s
+    u32 a_mant = bp_get(value, 11, 0);
+    u32 a_exp = bp_get(value, 8, 11);
+    u32 a_sign = bp_get(value, 1, 19);
+    u32 a_bits = (a_sign << 31) | (a_exp << 23) | (a_mant << 12);
+    float a_encoded;
+    std::memcpy(&a_encoded, &a_bits, sizeof(a_encoded));
+    u32 b_s = g_gxState.fog.fog2Raw & 0x1F;
+    g_gxState.fog.a = std::ldexp(a_encoded, static_cast<int>(b_s));
+    g_gxState.stateDirty = true;
+    break;
+  }
+  // FOG1 (0xEF): B mantissa (24-bit)
+  case 0xEF: {
+    g_gxState.fog.fog1Raw = value;
+    u32 b_m = bp_get(value, 24, 0);
+    u32 b_s = g_gxState.fog.fog2Raw & 0x1F;
+    float B_mant = static_cast<float>(b_m) / 8388638.0f;
+    g_gxState.fog.b = std::ldexp(B_mant, static_cast<int>(b_s) - 1);
+    g_gxState.stateDirty = true;
+    break;
+  }
+  // FOG2 (0xF0): B shift/exponent (5-bit)
+  case 0xF0: {
+    g_gxState.fog.fog2Raw = value;
+    u32 b_s = bp_get(value, 5, 0);
+    // Recompute A with updated b_s
+    u32 a_mant = bp_get(g_gxState.fog.fog0Raw, 11, 0);
+    u32 a_exp = bp_get(g_gxState.fog.fog0Raw, 8, 11);
+    u32 a_sign = bp_get(g_gxState.fog.fog0Raw, 1, 19);
+    u32 a_bits = (a_sign << 31) | (a_exp << 23) | (a_mant << 12);
+    float a_encoded;
+    std::memcpy(&a_encoded, &a_bits, sizeof(a_encoded));
+    g_gxState.fog.a = std::ldexp(a_encoded, static_cast<int>(b_s));
+    // Recompute B with updated b_s
+    u32 b_m = bp_get(g_gxState.fog.fog1Raw, 24, 0);
+    float B_mant = static_cast<float>(b_m) / 8388638.0f;
+    g_gxState.fog.b = std::ldexp(B_mant, static_cast<int>(b_s) - 1);
+    g_gxState.stateDirty = true;
+    break;
+  }
+
+  // Fog type + C parameter from FOG3 (0xF1)
  case 0xF1: {
    GXFogType fogType = static_cast<GXFogType>(bp_get(value, 3, 21));
    g_gxState.fog.type = fogType;
+    // Decode C parameter (same partial float encoding as A)
+    u32 c_mant = bp_get(value, 11, 0);
+    u32 c_exp = bp_get(value, 8, 11);
+    u32 c_sign = bp_get(value, 1, 19);
+    u32 c_bits = (c_sign << 31) | (c_exp << 23) | (c_mant << 12);
+    std::memcpy(&g_gxState.fog.c, &c_bits, sizeof(g_gxState.fog.c));
    g_gxState.stateDirty = true;
    break;
  }
@@ -757,12 +803,6 @@ static void handle_bp(u32 value, bool bigEndian) {
    break;
  }

-  // Fog A/B parameters (0xEE-0xF0) - these carry encoded float coefficients.
-  // Aurora uses the high-level fog params (startZ/endZ/nearZ/farZ) from the side channel,
-  // so we only need the type and color from the FIFO for DL playback.
-  case 0xEE: case 0xEF: case 0xF0:
-    break;
-
  // TEV color registers / K color registers (0xE0-0xE7)
  // RA registers: 0xE0, 0xE2, 0xE4, 0xE6 (even)
  // BG registers: 0xE1, 0xE3, 0xE5, 0xE7 (odd)
@@ -813,20 +853,91 @@ static void handle_bp(u32 value, bool bigEndian) {
  }

  // Indirect texture matrices (0x06-0x0E)
+  // Each matrix uses 3 consecutive registers (one per row of the 3x2 matrix).
+  // Matrix 0: 0x06-0x08, Matrix 1: 0x09-0x0B, Matrix 2: 0x0C-0x0E
  case 0x06: case 0x07: case 0x08:
  case 0x09: case 0x0A: case 0x0B:
-  case 0x0C: case 0x0D: case 0x0E:
-    // These carry 2x3 matrix values packed into BP registers.
-    // The matrix data is already applied via GXSetIndTexMtx which updates g_gxState directly.
+  case 0x0C: case 0x0D: case 0x0E: {
+    u32 idx = (regId - 0x06) / 3; // matrix index (0-2)
+    u32 row = (regId - 0x06) % 3; // row index (0-2)
+    auto& info = g_gxState.indTexMtxs[idx];
+
+    // Decode 11-bit signed matrix elements (scaled by 1024)
+    s32 col0 = bp_get(value, 11, 0);
+    if (col0 & 0x400) col0 |= ~0x7FF; // sign-extend from 11 bits
+    s32 col1 = bp_get(value, 11, 11);
+    if (col1 & 0x400) col1 |= ~0x7FF;
+
+    auto& r = row == 0 ? info.mtx.m0 : (row == 1 ? info.mtx.m1 : info.mtx.m2);
+    r.x = static_cast<float>(col0) / 1024.0f;
+    r.y = static_cast<float>(col1) / 1024.0f;
+
+    // Accumulate 2-bit scale exponent part (adjScale = scaleExp + 17, split across 3 registers)
+    u32 scaleBits = bp_get(value, 2, 22);
+    u32 shift = row * 2;
+    info.adjScaleRaw = (info.adjScaleRaw & ~(3u << shift)) | (scaleBits << shift);
+    info.scaleExp = static_cast<s8>(info.adjScaleRaw) - 17;
+
+    g_gxState.stateDirty = true;
    break;
+  }
+
+  // SU texture coordinate scale registers (0x30-0x3F)
+  // Even registers (suTs0): S-axis scale, bias, cyl wrap, line/point offset
+  // Odd registers (suTs1): T-axis scale, bias, cyl wrap
+  case 0x30: case 0x31: case 0x32: case 0x33:
+  case 0x34: case 0x35: case 0x36: case 0x37:
+  case 0x38: case 0x39: case 0x3A: case 0x3B:
+  case 0x3C: case 0x3D: case 0x3E: case 0x3F: {
+    u32 coordIdx = (regId - 0x30) / 2;
+    bool isT = (regId & 1) != 0;
+    auto& tcs = g_gxState.texCoordScales[coordIdx];
+    if (isT) {
+      tcs.scaleT = static_cast<u16>(bp_get(value, 16, 0));
+      tcs.biasT = bp_get(value, 1, 16) != 0;
+      tcs.cylWrapT = bp_get(value, 1, 17) != 0;
+    } else {
+      tcs.scaleS = static_cast<u16>(bp_get(value, 16, 0));
+      tcs.biasS = bp_get(value, 1, 16) != 0;
+      tcs.cylWrapS = bp_get(value, 1, 17) != 0;
+      tcs.lineOffset = bp_get(value, 1, 18) != 0;
+      tcs.pointOffset = bp_get(value, 1, 19) != 0;
+    }
+    g_gxState.stateDirty = true;
+    break;
+  }
+
+  // Copy clear color (0x4F-0x50) and depth (0x51)
+  case 0x4F: {
+    u8 r = bp_get(value, 8, 0);
+    u8 a = bp_get(value, 8, 8);
+    g_gxState.clearColor[0] = static_cast<float>(r) / 255.f;
+    g_gxState.clearColor[3] = static_cast<float>(a) / 255.f;
+    g_gxState.stateDirty = true;
+    break;
+  }
+  case 0x50: {
+    u8 b = bp_get(value, 8, 0);
+    u8 g = bp_get(value, 8, 8);
+    g_gxState.clearColor[2] = static_cast<float>(b) / 255.f;
+    g_gxState.clearColor[1] = static_cast<float>(g) / 255.f;
+    g_gxState.stateDirty = true;
+    break;
+  }
+  case 0x51: {
+    g_gxState.clearDepth = bp_get(value, 24, 0);
+    g_gxState.stateDirty = true;
+    break;
+  }

  // Texture mode/image registers (0x80-0xBB) - texture config
  default:
    if (regId >= 0x80 && regId <= 0xBB) {
      // Texture format/wrap/filter configuration.
      // These are handled pragmatically - GXLoadTexObj sets texture handles directly.
+    } else {
+      Log.warn("Unhandled BP register 0x{:02X} (value 0x{:06X})", regId, value & 0xFFFFFF);
    }
-    // Silently ignore unknown BP registers
    break;
  }
 }
@@ -151,6 +151,7 @@ struct RenderPass {
  TextureHandle resolveTarget;
  ClipRect resolveRect;
  Vec4<float> clearColor{0.f, 0.f, 0.f, 0.f};
+  float clearDepth = gx::UseReversedZ ? 0.f : 1.f;
  CommandList commands;
  bool clear = true;
 };
@@ -269,6 +270,7 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4<float>
  currentPass.resolveRect = rect;
  auto& newPass = g_renderPasses.emplace_back();
  newPass.clearColor = clearColor;
+  newPass.clearDepth = g_renderPasses[g_currentRenderPass].clearDepth;
  newPass.clear = clear;
  ++g_currentRenderPass;
 }
@@ -507,6 +509,10 @@ void begin_frame() {

  g_renderPasses.emplace_back();
  g_renderPasses[0].clearColor = gx::g_gxState.clearColor;
+  {
+    float normalizedDepth = static_cast<float>(gx::g_gxState.clearDepth) / 16777215.f;
+    g_renderPasses[0].clearDepth = gx::UseReversedZ ? (1.f - normalizedDepth) : normalizedDepth;
+  }
  g_currentRenderPass = 0;
  // push_command(CommandType::SetViewport, Command::Data{.setViewport = g_cachedViewport});
  // push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor});
@@ -586,7 +592,7 @@ void render(wgpu::CommandEncoder& cmd) {
        .view = webgpu::g_depthBuffer.view,
        .depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load,
        .depthStoreOp = wgpu::StoreOp::Store,
-        .depthClearValue = gx::UseReversedZ ? 0.f : 1.f,
+        .depthClearValue = passInfo.clearDepth,
    };
    const auto label = fmt::format("Render pass {}", i);
    const wgpu::RenderPassDescriptor renderPassDescriptor{
@@ -167,15 +167,17 @@ struct TcgConfig {
 static_assert(std::has_unique_object_representations_v<TcgConfig>);
 struct FogState {
  GXFogType type = GX_FOG_NONE;
-  float startZ = 0.f;
-  float endZ = 0.f;
-  float nearZ = 0.f;
-  float farZ = 0.f;
+  float a = 0.f;
+  float b = 0.5f;
+  float c = 0.f;
  Vec4<float> color;
+  // Raw encoded register values for A/B reconstruction across separate BP writes
+  u32 fog0Raw = 0; // 0xEE: encoded A parameter
+  u32 fog1Raw = 0; // 0xEF: B mantissa
+  u32 fog2Raw = 0; // 0xF0: B shift

  bool operator==(const FogState& rhs) const {
-    return type == rhs.type && startZ == rhs.startZ && endZ == rhs.endZ && nearZ == rhs.nearZ && farZ == rhs.farZ &&
-           color == rhs.color;
+    return type == rhs.type && a == rhs.a && b == rhs.b && c == rhs.c && color == rhs.color;
  }
  bool operator!=(const FogState& rhs) const { return !(*this == rhs); }
 };
@@ -204,11 +206,30 @@ struct AlphaCompare {
 static_assert(std::has_unique_object_representations_v<AlphaCompare>);
 struct IndTexMtxInfo {
  aurora::Mat3x2<float> mtx;
-  s8 scaleExp;
+  s8 scaleExp = 0;
+  // Accumulated adjScale bits from BP registers (2 bits per row, 3 rows)
+  u8 adjScaleRaw = 0;

  bool operator==(const IndTexMtxInfo& rhs) const { return mtx == rhs.mtx && scaleExp == rhs.scaleExp; }
  bool operator!=(const IndTexMtxInfo& rhs) const { return !(*this == rhs); }
 };
+struct TexCoordScale {
+  u16 scaleS = 0; // texture width - 1
+  u16 scaleT = 0; // texture height - 1
+  bool biasS = false;
+  bool biasT = false;
+  bool cylWrapS = false;
+  bool cylWrapT = false;
+  bool lineOffset = false;
+  bool pointOffset = false;
+
+  bool operator==(const TexCoordScale& rhs) const {
+    return scaleS == rhs.scaleS && scaleT == rhs.scaleT && biasS == rhs.biasS && biasT == rhs.biasT &&
+           cylWrapS == rhs.cylWrapS && cylWrapT == rhs.cylWrapT && lineOffset == rhs.lineOffset &&
+           pointOffset == rhs.pointOffset;
+  }
+  bool operator!=(const TexCoordScale& rhs) const { return !(*this == rhs); }
+};
 struct VtxAttrFmt {
  GXCompCnt cnt;
  GXCompType type;
@@ -267,6 +288,7 @@ struct GXState {
  GXLogicOp blendOp = GX_LO_CLEAR;
  GXCompare depthFunc = GX_LEQUAL;
  Vec4<float> clearColor{0.f, 0.f, 0.f, 1.f};
+  u32 clearDepth = 0xFFFFFF;
  u32 dstAlpha; // u8; UINT32_MAX = disabled
  AlphaCompare alphaCompare;
  std::array<Vec4<float>, MaxTevRegs> colorRegs;
@@ -280,6 +302,7 @@ struct GXState {
  std::array<TexMtxVariant, MaxTexMtx> texMtxs;
  std::array<Mat3x4<float>, MaxPTTexMtx> ptTexMtxs;
  std::array<TcgConfig, MaxTexCoord> tcgs;
+  std::array<TexCoordScale, MaxTexCoord> texCoordScales;
  std::array<GXAttrType, MaxVtxAttr> vtxDesc;
  std::array<VtxFmt, MaxVtxFmt> vtxFmts;
  std::array<TevSwap, MaxTevSwap> tevSwapTable{
@@ -317,14 +317,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
  }
  if (info.usesFog) {
    const auto& state = g_gxState.fog;
-    Fog fog{.color = state.color};
-    if (state.nearZ != state.farZ && state.startZ != state.endZ) {
-      const float depthRange = state.farZ - state.nearZ;
-      const float fogRange = state.endZ - state.startZ;
-      fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange);
-      fog.b = state.farZ / depthRange;
-      fog.c = state.startZ / fogRange;
-    }
+    Fog fog{.color = state.color, .a = state.a, .b = state.b, .c = state.c};
    buf.append(fog);
  }
  for (int i = 0; i < info.sampledTextures.size(); ++i) {
@@ -1531,6 +1531,385 @@ TEST_F(GXFifoTest, ChanMatColor_Color1A1_Compound) {
  EXPECT_NEAR(stateA.matColor[3], 128.f / 255.f, 1.f / 255.f);
 }

+// ============================================================================
+// GXSetFog (BP 0xEE-0xF2) - Fog A/B/C parameters, type, and color
+// ============================================================================
+
+// --- Fog with perspective linear fog, typical parameters ---
+TEST_F(GXFifoTest, Fog_PerspLin_Typical) {
+  GXColor fogColor = {128, 200, 255, 255};
+  GXSetFog(GX_FOG_PERSP_LIN, 100.f, 900.f, 0.1f, 1000.f, fogColor);
+  auto bytes = capture_fifo();
+
+  // Should produce 5 BP writes (0xEE-0xF2): 5 * 5 = 25 bytes
+  ASSERT_EQ(bytes.size(), 25u);
+  // Verify BP opcodes and register IDs
+  EXPECT_EQ(bytes[0], 0x61);
+  EXPECT_EQ(bytes[1], 0xEE);
+  EXPECT_EQ(bytes[5], 0x61);
+  EXPECT_EQ(bytes[6], 0xEF);
+  EXPECT_EQ(bytes[10], 0x61);
+  EXPECT_EQ(bytes[11], 0xF0);
+  EXPECT_EQ(bytes[15], 0x61);
+  EXPECT_EQ(bytes[16], 0xF1);
+  EXPECT_EQ(bytes[20], 0x61);
+  EXPECT_EQ(bytes[21], 0xF2);
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  // Compute expected A, B, C from the SDK formula
+  float nearZ = 0.1f, farZ = 1000.f, startZ = 100.f, endZ = 900.f;
+  float A = (farZ * nearZ) / ((farZ - nearZ) * (endZ - startZ));
+  float B = farZ / (farZ - nearZ);
+  float C = startZ / (endZ - startZ);
+
+  // Allow tolerance for encoding precision loss (11-bit mantissa)
+  EXPECT_NEAR(g_gxState.fog.a, A, std::abs(A) * 1e-3f);
+  EXPECT_NEAR(g_gxState.fog.b, B, std::abs(B) * 1e-3f);
+  EXPECT_NEAR(g_gxState.fog.c, C, std::abs(C) * 1e-3f);
+  EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_LIN);
+  EXPECT_NEAR(g_gxState.fog.color[0], 128.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.fog.color[1], 200.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.fog.color[2], 255.f / 255.f, 1.f / 255.f);
+}
+
+// --- Fog with degenerate parameters (nearZ == farZ) ---
+TEST_F(GXFifoTest, Fog_Degenerate_EqualDepths) {
+  GXColor fogColor = {0, 0, 0, 255};
+  GXSetFog(GX_FOG_PERSP_EXP, 0.f, 100.f, 10.f, 10.f, fogColor);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  // When nearZ == farZ, SDK sets A=0, B=0.5, C=0
+  EXPECT_FLOAT_EQ(g_gxState.fog.a, 0.f);
+  EXPECT_NEAR(g_gxState.fog.b, 0.5f, 1e-3f);
+  EXPECT_FLOAT_EQ(g_gxState.fog.c, 0.f);
+  EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_EXP);
+}
+
+// --- Fog type: none ---
+TEST_F(GXFifoTest, Fog_None) {
+  GXColor fogColor = {64, 64, 64, 255};
+  GXSetFog(GX_FOG_NONE, 0.f, 0.f, 0.f, 0.f, fogColor);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  EXPECT_EQ(g_gxState.fog.type, GX_FOG_NONE);
+  EXPECT_FLOAT_EQ(g_gxState.fog.a, 0.f);
+  EXPECT_NEAR(g_gxState.fog.b, 0.5f, 1e-3f);
+  EXPECT_FLOAT_EQ(g_gxState.fog.c, 0.f);
+  EXPECT_NEAR(g_gxState.fog.color[0], 64.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.fog.color[1], 64.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.fog.color[2], 64.f / 255.f, 1.f / 255.f);
+}
+
+// --- Fog with perspective reverse exponential squared type ---
+TEST_F(GXFifoTest, Fog_PerspRevExp2) {
+  GXColor fogColor = {255, 0, 0, 255};
+  GXSetFog(GX_FOG_PERSP_REVEXP2, 50.f, 500.f, 1.f, 1000.f, fogColor);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  float nearZ = 1.f, farZ = 1000.f, startZ = 50.f, endZ = 500.f;
+  float A = (farZ * nearZ) / ((farZ - nearZ) * (endZ - startZ));
+  float B = farZ / (farZ - nearZ);
+  float C = startZ / (endZ - startZ);
+
+  EXPECT_NEAR(g_gxState.fog.a, A, std::abs(A) * 1e-3f);
+  EXPECT_NEAR(g_gxState.fog.b, B, std::abs(B) * 1e-3f);
+  EXPECT_NEAR(g_gxState.fog.c, C, std::abs(C) * 1e-3f);
+  EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_REVEXP2);
+  EXPECT_NEAR(g_gxState.fog.color[0], 1.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.fog.color[1], 0.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.fog.color[2], 0.f, 1.f / 255.f);
+}
+
+// ============================================================================
+// GXSetIndTexMtx (BP 0x06-0x0E) - Indirect texture matrix parameters
+// ============================================================================
+
+// --- IndTexMtx 0 with half-scale diagonal matrix ---
+// Note: 11-bit signed range limits values to [-1.0, 0.999], so 1.0 is not representable.
+TEST_F(GXFifoTest, IndTexMtx0_HalfScale) {
+  f32 mtx[2][3] = {
+      {0.5f, 0.0f, 0.0f},
+      {0.0f, 0.5f, 0.0f},
+  };
+  GXSetIndTexMtx(GX_ITM_0, mtx, 0);
+  auto bytes = capture_fifo();
+
+  // Should produce 3 BP writes: 3 * 5 = 15 bytes
+  ASSERT_EQ(bytes.size(), 15u);
+  // Verify BP opcodes and register IDs (0x06, 0x07, 0x08 for matrix 0)
+  EXPECT_EQ(bytes[0], 0x61);
+  EXPECT_EQ(bytes[1], 0x06);
+  EXPECT_EQ(bytes[5], 0x61);
+  EXPECT_EQ(bytes[6], 0x07);
+  EXPECT_EQ(bytes[10], 0x61);
+  EXPECT_EQ(bytes[11], 0x08);
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  const auto& info = g_gxState.indTexMtxs[0];
+  // 11-bit fixed-point (1/1024) precision
+  float tol = 1.0f / 1024.0f;
+  EXPECT_NEAR(info.mtx.m0.x, 0.5f, tol);
+  EXPECT_NEAR(info.mtx.m0.y, 0.0f, tol);
+  EXPECT_NEAR(info.mtx.m1.x, 0.0f, tol);
+  EXPECT_NEAR(info.mtx.m1.y, 0.5f, tol);
+  EXPECT_NEAR(info.mtx.m2.x, 0.0f, tol);
+  EXPECT_NEAR(info.mtx.m2.y, 0.0f, tol);
+  EXPECT_EQ(info.scaleExp, 0);
+}
+
+// --- IndTexMtx 1 with fractional values and positive scale ---
+TEST_F(GXFifoTest, IndTexMtx1_FractionalWithScale) {
+  f32 mtx[2][3] = {
+      {0.5f, 0.25f, -0.125f},
+      {-0.5f, 0.75f, 0.0f},
+  };
+  GXSetIndTexMtx(GX_ITM_1, mtx, 3);
+  auto bytes = capture_fifo();
+
+  // Register IDs for matrix 1: 0x09, 0x0A, 0x0B
+  ASSERT_EQ(bytes.size(), 15u);
+  EXPECT_EQ(bytes[1], 0x09);
+  EXPECT_EQ(bytes[6], 0x0A);
+  EXPECT_EQ(bytes[11], 0x0B);
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  const auto& info = g_gxState.indTexMtxs[1];
+  float tol = 1.0f / 1024.0f;
+  EXPECT_NEAR(info.mtx.m0.x, 0.5f, tol);
+  EXPECT_NEAR(info.mtx.m0.y, -0.5f, tol);
+  EXPECT_NEAR(info.mtx.m1.x, 0.25f, tol);
+  EXPECT_NEAR(info.mtx.m1.y, 0.75f, tol);
+  EXPECT_NEAR(info.mtx.m2.x, -0.125f, tol);
+  EXPECT_NEAR(info.mtx.m2.y, 0.0f, tol);
+  EXPECT_EQ(info.scaleExp, 3);
+}
+
+// --- IndTexMtx 2 with negative scale exponent ---
+TEST_F(GXFifoTest, IndTexMtx2_NegativeScale) {
+  f32 mtx[2][3] = {
+      {0.0f, 0.0f, 0.0f},
+      {0.0f, 0.0f, 0.0f},
+  };
+  GXSetIndTexMtx(GX_ITM_2, mtx, -5);
+  auto bytes = capture_fifo();
+
+  // Register IDs for matrix 2: 0x0C, 0x0D, 0x0E
+  ASSERT_EQ(bytes.size(), 15u);
+  EXPECT_EQ(bytes[1], 0x0C);
+  EXPECT_EQ(bytes[6], 0x0D);
+  EXPECT_EQ(bytes[11], 0x0E);
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  const auto& info = g_gxState.indTexMtxs[2];
+  EXPECT_EQ(info.scaleExp, -5);
+}
+
+// --- IndTexMtx 0 does not affect matrix 1 ---
+TEST_F(GXFifoTest, IndTexMtx0_Isolation) {
+  f32 mtx0[2][3] = {
+      {0.5f, 0.0f, 0.0f},
+      {0.0f, 0.5f, 0.0f},
+  };
+  f32 mtx1[2][3] = {
+      {-1.0f, 0.0f, 0.0f},
+      {0.0f, -1.0f, 0.0f},
+  };
+  GXSetIndTexMtx(GX_ITM_0, mtx0, 1);
+  GXSetIndTexMtx(GX_ITM_1, mtx1, -2);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  float tol = 1.0f / 1024.0f;
+  // Matrix 0
+  EXPECT_NEAR(g_gxState.indTexMtxs[0].mtx.m0.x, 0.5f, tol);
+  EXPECT_NEAR(g_gxState.indTexMtxs[0].mtx.m1.y, 0.5f, tol);
+  EXPECT_EQ(g_gxState.indTexMtxs[0].scaleExp, 1);
+  // Matrix 1
+  EXPECT_NEAR(g_gxState.indTexMtxs[1].mtx.m0.x, -1.0f, tol);
+  EXPECT_NEAR(g_gxState.indTexMtxs[1].mtx.m1.y, -1.0f, tol);
+  EXPECT_EQ(g_gxState.indTexMtxs[1].scaleExp, -2);
+}
+
+// ============================================================================
+// SU Texture Coordinate Scale (BP 0x30-0x3F)
+// ============================================================================
+
+// --- GXSetTexCoordScaleManually sets width/height ---
+TEST_F(GXFifoTest, TexCoordScale_Manual_Coord0) {
+  GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 256, 128);
+  auto bytes = capture_fifo();
+
+  // Two BP writes (suTs0 + suTs1): 2 * 5 = 10 bytes
+  ASSERT_EQ(bytes.size(), 10u);
+  EXPECT_EQ(bytes[0], 0x61);
+  EXPECT_EQ(bytes[1], 0x30); // suTs0[0]
+  EXPECT_EQ(bytes[5], 0x61);
+  EXPECT_EQ(bytes[6], 0x31); // suTs1[0]
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  const auto& tcs = g_gxState.texCoordScales[0];
+  EXPECT_EQ(tcs.scaleS, 255u); // width - 1
+  EXPECT_EQ(tcs.scaleT, 127u); // height - 1
+}
+
+// --- GXSetTexCoordScaleManually for coord 3 ---
+TEST_F(GXFifoTest, TexCoordScale_Manual_Coord3) {
+  GXSetTexCoordScaleManually(GX_TEXCOORD3, GX_TRUE, 512, 512);
+  auto bytes = capture_fifo();
+
+  ASSERT_EQ(bytes.size(), 10u);
+  EXPECT_EQ(bytes[1], 0x36); // suTs0[3] = 0x30 + 3*2
+  EXPECT_EQ(bytes[6], 0x37); // suTs1[3] = 0x31 + 3*2
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  const auto& tcs = g_gxState.texCoordScales[3];
+  EXPECT_EQ(tcs.scaleS, 511u);
+  EXPECT_EQ(tcs.scaleT, 511u);
+}
+
+// --- GXSetTexCoordScaleManually with bias and cyl wrap ---
+TEST_F(GXFifoTest, TexCoordScale_BiasAndCylWrap) {
+  // Enable manual mode first, then set bias and cyl wrap
+  GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 64, 64);
+  capture_fifo(); // discard
+
+  GXSetTexCoordBias(GX_TEXCOORD0, GX_TRUE, GX_FALSE);
+  auto biasBytes = capture_fifo();
+
+  GXSetTexCoordCylWrap(GX_TEXCOORD0, GX_FALSE, GX_TRUE);
+  auto cylBytes = capture_fifo();
+
+  // Each writes 2 BP regs
+  ASSERT_EQ(biasBytes.size(), 10u);
+  ASSERT_EQ(cylBytes.size(), 10u);
+
+  reset_gx_state();
+  decode_fifo(biasBytes);
+  decode_fifo(cylBytes);
+
+  const auto& tcs = g_gxState.texCoordScales[0];
+  EXPECT_TRUE(tcs.biasS);
+  EXPECT_FALSE(tcs.biasT);
+  EXPECT_FALSE(tcs.cylWrapS);
+  EXPECT_TRUE(tcs.cylWrapT);
+}
+
+// --- GXEnableTexOffsets ---
+TEST_F(GXFifoTest, TexCoordScale_TexOffsets) {
+  GXEnableTexOffsets(GX_TEXCOORD2, GX_TRUE, GX_TRUE);
+  auto bytes = capture_fifo();
+
+  // One BP write (suTs0 only): 5 bytes
+  ASSERT_EQ(bytes.size(), 5u);
+  EXPECT_EQ(bytes[1], 0x34); // suTs0[2] = 0x30 + 2*2
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  const auto& tcs = g_gxState.texCoordScales[2];
+  EXPECT_TRUE(tcs.lineOffset);
+  EXPECT_TRUE(tcs.pointOffset);
+}
+
+// --- Coord isolation: writing coord 0 doesn't affect coord 1 ---
+TEST_F(GXFifoTest, TexCoordScale_Isolation) {
+  GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 100, 200);
+  GXSetTexCoordScaleManually(GX_TEXCOORD1, GX_TRUE, 300, 400);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  EXPECT_EQ(g_gxState.texCoordScales[0].scaleS, 99u);
+  EXPECT_EQ(g_gxState.texCoordScales[0].scaleT, 199u);
+  EXPECT_EQ(g_gxState.texCoordScales[1].scaleS, 299u);
+  EXPECT_EQ(g_gxState.texCoordScales[1].scaleT, 399u);
+}
+
+// ============================================================================
+// GXSetCopyClear (BP 0x4F-0x51) - Clear color and depth
+// ============================================================================
+
+// --- Clear color and depth round-trip ---
+TEST_F(GXFifoTest, CopyClear_ColorAndDepth) {
+  GXColor color = {64, 128, 192, 255};
+  GXSetCopyClear(color, 0x00ABCDEF);
+  auto bytes = capture_fifo();
+
+  // 3 BP writes: 3 * 5 = 15 bytes
+  ASSERT_EQ(bytes.size(), 15u);
+  EXPECT_EQ(bytes[0], 0x61);
+  EXPECT_EQ(bytes[1], 0x4F); // R + A
+  EXPECT_EQ(bytes[5], 0x61);
+  EXPECT_EQ(bytes[6], 0x50); // B + G
+  EXPECT_EQ(bytes[10], 0x61);
+  EXPECT_EQ(bytes[11], 0x51); // Z
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  EXPECT_NEAR(g_gxState.clearColor[0], 64.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[1], 128.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[2], 192.f / 255.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[3], 255.f / 255.f, 1.f / 255.f);
+  EXPECT_EQ(g_gxState.clearDepth, 0x00ABCDEFu);
+}
+
+// --- Clear with black and zero depth ---
+TEST_F(GXFifoTest, CopyClear_BlackZeroDepth) {
+  GXColor color = {0, 0, 0, 0};
+  GXSetCopyClear(color, 0);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  EXPECT_NEAR(g_gxState.clearColor[0], 0.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[1], 0.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[2], 0.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[3], 0.f, 1.f / 255.f);
+  EXPECT_EQ(g_gxState.clearDepth, 0u);
+}
+
+// --- Clear with max depth ---
+TEST_F(GXFifoTest, CopyClear_MaxDepth) {
+  GXColor color = {255, 255, 255, 128};
+  GXSetCopyClear(color, 0xFFFFFF);
+  auto bytes = capture_fifo();
+
+  reset_gx_state();
+  decode_fifo(bytes);
+
+  EXPECT_NEAR(g_gxState.clearColor[0], 1.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[1], 1.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[2], 1.f, 1.f / 255.f);
+  EXPECT_NEAR(g_gxState.clearColor[3], 128.f / 255.f, 1.f / 255.f);
+  EXPECT_EQ(g_gxState.clearDepth, 0xFFFFFFu);
+}
+
 // ============================================================================
 // Composite tests (multiple state changes in a single FIFO stream)
 // ============================================================================