Fog & TCS FIFO-ification

This commit is contained in:
Luke Street
2026-02-18 13:41:42 -07:00
parent 73b81f53aa
commit 60eba7dff9
13 changed files with 632 additions and 77 deletions
+2
View File
@@ -24,6 +24,8 @@ void GXInitTexObjWrapMode(GXTexObj* obj, GXTexWrapMode s, GXTexWrapMode t);
void GXInitTlutObj(GXTlutObj* obj, const void* data, GXTlutFmt format, u16 entries);
void GXLoadTlut(const GXTlutObj* obj, GXTlut idx);
void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts);
void GXSetTexCoordCylWrap(GXTexCoordID coord, GXBool s_enable, GXBool t_enable);
void GXSetTexCoordBias(GXTexCoordID coord, GXBool s_enable, GXBool t_enable);
void GXInitTexCacheRegion(GXTexRegion* region, GXBool is_32b_mipmap, u32 tmem_even, GXTexCacheSize size_even,
u32 tmem_odd, GXTexCacheSize size_odd);
GXTexRegionCallback GXSetTexRegionCallback(GXTexRegionCallback callback);
+23 -1
View File
@@ -1,4 +1,5 @@
#include "gx.hpp"
#include "__gx.h"
#include "../../window.hpp"
#include "../../webgpu/wgpu.hpp"
@@ -39,7 +40,28 @@ void GXSetTexCopyDst(u16 wd, u16 ht, GXTexFmt fmt, GXBool mipmap) {
u32 GXSetDispCopyYScale(f32 vscale) { return 0; }
void GXSetCopyClear(GXColor color, u32 depth) { update_gx_state(g_gxState.clearColor, from_gx_color(color)); }
void GXSetCopyClear(GXColor color, u32 depth) {
// BP 0x4F: clear color R + A
u32 reg0 = 0;
SET_REG_FIELD(0, reg0, 8, 0, color.r);
SET_REG_FIELD(0, reg0, 8, 8, color.a);
SET_REG_FIELD(0, reg0, 8, 24, 0x4F);
GX_WRITE_RAS_REG(reg0);
// BP 0x50: clear color B + G
u32 reg1 = 0;
SET_REG_FIELD(0, reg1, 8, 0, color.b);
SET_REG_FIELD(0, reg1, 8, 8, color.g);
SET_REG_FIELD(0, reg1, 8, 24, 0x50);
GX_WRITE_RAS_REG(reg1);
// BP 0x51: clear Z (24-bit)
u32 reg2 = 0;
SET_REG_FIELD(0, reg2, 24, 0, depth);
SET_REG_FIELD(0, reg2, 8, 24, 0x51);
GX_WRITE_RAS_REG(reg2);
__gx->bpSent = 1;
}
void GXSetCopyFilter(GXBool aa, u8 sample_pattern[12][2], GXBool vf, u8 vfilter[7]) {}
+7 -1
View File
@@ -364,5 +364,11 @@ void GXSetLineWidth(u8 width, GXTexOffset offs) {
}
// TODO GXSetPointSize
// TODO GXEnableTexOffsets
void GXEnableTexOffsets(GXTexCoordID coord, GXBool line_enable, GXBool point_enable) {
SET_REG_FIELD(0, __gx->suTs0[coord], 1, 18, line_enable);
SET_REG_FIELD(0, __gx->suTs0[coord], 1, 19, point_enable);
GX_WRITE_RAS_REG(__gx->suTs0[coord]);
__gx->bpSent = 1;
}
}
+10 -4
View File
@@ -250,8 +250,12 @@ static void __SetSURegs(u32 tmap, u32 tcoord) {
void __GXSetSUTexRegs() {
// Write SU texture size/bias registers for each active TEV stage and indirect stage.
// tcsManEnab == 0xFF means all coords are manually managed (skip auto-setup).
// Aurora doesn't use tcsManEnab, so we always auto-setup (tcsManEnab = 0).
// Skip coords that have manual scale enabled (tcsManEnab bit set).
// If all coords are manual (0xFF), skip entirely.
if (__gx->tcsManEnab == 0xFF) {
return;
}
u32 nStages = GET_REG_FIELD(__gx->genMode, 4, 10) + 1;
u32 nIndStages = GET_REG_FIELD(__gx->genMode, 3, 16);
@@ -276,7 +280,9 @@ void __GXSetSUTexRegs() {
coord = GET_REG_FIELD(__gx->iref, 3, 21);
break;
}
__SetSURegs(tmap, coord);
if (!(__gx->tcsManEnab & (1 << coord))) {
__SetSURegs(tmap, coord);
}
}
// Direct TEV stages
@@ -290,7 +296,7 @@ void __GXSetSUTexRegs() {
} else {
coord = GET_REG_FIELD(*ptref, 3, 3);
}
if (tmap != 0xFF) {
if (tmap != 0xFF && !(__gx->tcsManEnab & (1 << coord))) {
__SetSURegs(tmap, coord);
}
}
-6
View File
@@ -76,9 +76,6 @@ void GXSetFog(GXFogType type, float startZ, float endZ, float nearZ, float farZ,
GX_WRITE_RAS_REG(fog3);
GX_WRITE_RAS_REG(fogclr);
__gx->bpSent = 1;
// Side channel: direct update for inline rendering (full precision)
update_gx_state(g_gxState.fog, {type, startZ, endZ, nearZ, farZ, from_gx_color(color)});
}
void GXSetFogColor(GXColor color) {
@@ -90,9 +87,6 @@ void GXSetFogColor(GXColor color) {
SET_REG_FIELD(0, fogclr, 8, 24, 0xF2);
GX_WRITE_RAS_REG(fogclr);
__gx->bpSent = 1;
// Side channel: direct update for inline rendering
update_gx_state(g_gxState.fog.color, from_gx_color(color));
}
void GXSetBlendMode(GXBlendMode mode, GXBlendFactor src, GXBlendFactor dst, GXLogicOp op) {
-14
View File
@@ -107,9 +107,6 @@ void GXSetTevColor(GXTevRegID id, GXColor color) {
// We omit the redundant writes since they don't change the register value and
// our software command processor doesn't need the sync delay.
__gx->bpSent = 1;
// Side channel: direct update for inline rendering (full precision)
update_gx_state(g_gxState.colorRegs[id], from_gx_color(color));
}
void GXSetTevColorS10(GXTevRegID id, GXColorS10 color) {
@@ -128,14 +125,6 @@ void GXSetTevColorS10(GXTevRegID id, GXColorS10 color) {
// We omit the redundant writes since they don't change the register value and
// our software command processor doesn't need the sync delay.
__gx->bpSent = 1;
// Side channel: direct update for inline rendering (full precision)
update_gx_state(g_gxState.colorRegs[id], aurora::Vec4<float>{
static_cast<float>(color.r) / 255.f,
static_cast<float>(color.g) / 255.f,
static_cast<float>(color.b) / 255.f,
static_cast<float>(color.a) / 255.f,
});
}
void GXSetAlphaCompare(GXCompare comp0, u8 ref0, GXAlphaOp op, GXCompare comp1, u8 ref1) {
@@ -204,9 +193,6 @@ void GXSetTevKColor(GXTevKColorID id, GXColor color) {
GX_WRITE_RAS_REG(regRA);
GX_WRITE_RAS_REG(regBG);
__gx->bpSent = 1;
// Side channel: direct update for inline rendering (full precision)
update_gx_state(g_gxState.kcolors[id], from_gx_color(color));
}
void GXSetTevKColorSel(GXTevStageID id, GXTevKColorSel sel) {
+29 -3
View File
@@ -1,4 +1,5 @@
#include "gx.hpp"
#include "__gx.h"
#include "../../gfx/texture.hpp"
@@ -234,8 +235,33 @@ void GXInvalidateTexAll() {
// TODO GXSetTlutRegionCallback
// TODO GXLoadTexObjPreLoaded
void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts) {
// TODO
__gx->tcsManEnab = (__gx->tcsManEnab & ~(1 << coord)) | (enable << coord);
if (enable) {
SET_REG_FIELD(0, __gx->suTs0[coord], 16, 0, static_cast<u16>(ss - 1));
SET_REG_FIELD(0, __gx->suTs1[coord], 16, 0, static_cast<u16>(ts - 1));
GX_WRITE_RAS_REG(__gx->suTs0[coord]);
GX_WRITE_RAS_REG(__gx->suTs1[coord]);
__gx->bpSent = 1;
}
}
void GXSetTexCoordCylWrap(GXTexCoordID coord, GXBool s_enable, GXBool t_enable) {
SET_REG_FIELD(0, __gx->suTs0[coord], 1, 17, s_enable);
SET_REG_FIELD(0, __gx->suTs1[coord], 1, 17, t_enable);
if (__gx->tcsManEnab & (1 << coord)) {
GX_WRITE_RAS_REG(__gx->suTs0[coord]);
GX_WRITE_RAS_REG(__gx->suTs1[coord]);
__gx->bpSent = 1;
}
}
void GXSetTexCoordBias(GXTexCoordID coord, GXBool s_enable, GXBool t_enable) {
SET_REG_FIELD(0, __gx->suTs0[coord], 1, 16, s_enable);
SET_REG_FIELD(0, __gx->suTs1[coord], 1, 16, t_enable);
if (__gx->tcsManEnab & (1 << coord)) {
GX_WRITE_RAS_REG(__gx->suTs0[coord]);
GX_WRITE_RAS_REG(__gx->suTs1[coord]);
__gx->bpSent = 1;
}
}
// TODO GXSetTexCoordCylWrap
// TODO GXSetTexCoordBias
}
+1
View File
@@ -99,6 +99,7 @@ struct __GXData_struct {
u32 suTs0[8]; // SU texture S0 registers
u32 suTs1[8]; // SU texture S1 registers
u8 tcsManEnab; // bitmask: manual tex coord scale enabled per coord
u32 suScis0; // scissor top-left
u32 suScis1; // scissor bottom-right
+143 -32
View File
@@ -8,6 +8,9 @@
#include <absl/container/flat_hash_map.h>
#include <cmath>
#include <cstring>
static aurora::Module Log("aurora::gfx::cp");
using aurora::gfx::gx::g_gxState;
@@ -333,17 +336,11 @@ void process(const u8* data, u32 size, bool bigEndian) {
while (pos < size) {
u8 cmd = data[pos++];
if (cmd == CP_CMD_NOP) {
continue;
}
u8 opcode = cmd & CP_OPCODE_MASK;
// Log.warn("Processing opcode {:02x} at pos {} (size {})", opcode, pos - 1, size);
switch (opcode) {
case CP_CMD_NOP:
// Already handled above, but could be 0x00 with VAT bits set
continue;
case CP_CMD_LOAD_BP_REG: {
@@ -374,7 +371,7 @@ void process(const u8* data, u32 size, bool bigEndian) {
case CP_CMD_LOAD_INDX_D: {
// Indexed XF load: 4 bytes of data
CHECK(pos + 4 <= size, "indexed XF read overrun");
// TODO: handle indexed XF loads
Log.warn("Unimplemented indexed XF load (opcode 0x{:02X})", opcode);
pos += 4;
break;
}
@@ -388,7 +385,7 @@ void process(const u8* data, u32 size, bool bigEndian) {
}
case CP_CMD_INVAL_VTX: {
// Invalidate vertex cache - no data
// Invalidate vertex cache
break;
}
@@ -533,7 +530,7 @@ static void handle_bp(u32 value, bool bigEndian) {
// BP mask (0x0F) - internal, applies to next BP write
case 0x0F:
// The BP mask is used by the hardware to selectively update fields.
// For our purposes, we don't need to implement masking.
// TODO implement
break;
// TEV indirect stages (0x10-0x1F)
@@ -559,21 +556,16 @@ static void handle_bp(u32 value, bool bigEndian) {
}
// Scissor registers (0x20, 0x21)
case 0x20: {
u32 top = bp_get(value, 11, 0) - 340;
u32 left = bp_get(value, 11, 12) - 340;
aurora::gfx::set_scissor(left, top, 1, 1); // Width/height set by 0x21
break;
}
case 0x21: {
// Need both 0x20 and 0x21 to reconstruct full scissor rect.
// For now, just consume - the GX function already called set_scissor.
case 0x20: case 0x21: {
Log.warn("Unimplemented: BP register {:x} (scissor)", regId);
break;
}
// Line/point size (0x22) - informational, not used for rendering
case 0x22:
// Line/point size (0x22)
case 0x22: {
Log.warn("Unimplemented: BP register {:x} (line/point size)", regId);
break;
}
// Indirect texture scale (0x25, 0x26)
case 0x25: {
@@ -690,8 +682,10 @@ static void handle_bp(u32 value, bool bigEndian) {
}
// PE control (0x43) - zcomp location
case 0x43:
case 0x43: {
// Log.warn("Unimplemented: BP register {:x} (zcomp loc)", regId);
break;
}
// Alpha compare (0xF3)
case 0xF3: {
@@ -734,10 +728,62 @@ static void handle_bp(u32 value, bool bigEndian) {
break;
}
// Fog type from FOG3 (0xF1) - decode fog type for display list playback
// Fog A/B parameters (0xEE-0xF0)
// FOG0 (0xEE): A parameter - sign(1)|exp(8)|mantissa(11) partial IEEE 754 float
case 0xEE: {
g_gxState.fog.fog0Raw = value;
// Reconstruct A = a_encoded * 2^b_s
u32 a_mant = bp_get(value, 11, 0);
u32 a_exp = bp_get(value, 8, 11);
u32 a_sign = bp_get(value, 1, 19);
u32 a_bits = (a_sign << 31) | (a_exp << 23) | (a_mant << 12);
float a_encoded;
std::memcpy(&a_encoded, &a_bits, sizeof(a_encoded));
u32 b_s = g_gxState.fog.fog2Raw & 0x1F;
g_gxState.fog.a = std::ldexp(a_encoded, static_cast<int>(b_s));
g_gxState.stateDirty = true;
break;
}
// FOG1 (0xEF): B mantissa (24-bit)
case 0xEF: {
g_gxState.fog.fog1Raw = value;
u32 b_m = bp_get(value, 24, 0);
u32 b_s = g_gxState.fog.fog2Raw & 0x1F;
float B_mant = static_cast<float>(b_m) / 8388638.0f;
g_gxState.fog.b = std::ldexp(B_mant, static_cast<int>(b_s) - 1);
g_gxState.stateDirty = true;
break;
}
// FOG2 (0xF0): B shift/exponent (5-bit)
case 0xF0: {
g_gxState.fog.fog2Raw = value;
u32 b_s = bp_get(value, 5, 0);
// Recompute A with updated b_s
u32 a_mant = bp_get(g_gxState.fog.fog0Raw, 11, 0);
u32 a_exp = bp_get(g_gxState.fog.fog0Raw, 8, 11);
u32 a_sign = bp_get(g_gxState.fog.fog0Raw, 1, 19);
u32 a_bits = (a_sign << 31) | (a_exp << 23) | (a_mant << 12);
float a_encoded;
std::memcpy(&a_encoded, &a_bits, sizeof(a_encoded));
g_gxState.fog.a = std::ldexp(a_encoded, static_cast<int>(b_s));
// Recompute B with updated b_s
u32 b_m = bp_get(g_gxState.fog.fog1Raw, 24, 0);
float B_mant = static_cast<float>(b_m) / 8388638.0f;
g_gxState.fog.b = std::ldexp(B_mant, static_cast<int>(b_s) - 1);
g_gxState.stateDirty = true;
break;
}
// Fog type + C parameter from FOG3 (0xF1)
case 0xF1: {
GXFogType fogType = static_cast<GXFogType>(bp_get(value, 3, 21));
g_gxState.fog.type = fogType;
// Decode C parameter (same partial float encoding as A)
u32 c_mant = bp_get(value, 11, 0);
u32 c_exp = bp_get(value, 8, 11);
u32 c_sign = bp_get(value, 1, 19);
u32 c_bits = (c_sign << 31) | (c_exp << 23) | (c_mant << 12);
std::memcpy(&g_gxState.fog.c, &c_bits, sizeof(g_gxState.fog.c));
g_gxState.stateDirty = true;
break;
}
@@ -757,12 +803,6 @@ static void handle_bp(u32 value, bool bigEndian) {
break;
}
// Fog A/B parameters (0xEE-0xF0) - these carry encoded float coefficients.
// Aurora uses the high-level fog params (startZ/endZ/nearZ/farZ) from the side channel,
// so we only need the type and color from the FIFO for DL playback.
case 0xEE: case 0xEF: case 0xF0:
break;
// TEV color registers / K color registers (0xE0-0xE7)
// RA registers: 0xE0, 0xE2, 0xE4, 0xE6 (even)
// BG registers: 0xE1, 0xE3, 0xE5, 0xE7 (odd)
@@ -813,20 +853,91 @@ static void handle_bp(u32 value, bool bigEndian) {
}
// Indirect texture matrices (0x06-0x0E)
// Each matrix uses 3 consecutive registers (one per row of the 3x2 matrix).
// Matrix 0: 0x06-0x08, Matrix 1: 0x09-0x0B, Matrix 2: 0x0C-0x0E
case 0x06: case 0x07: case 0x08:
case 0x09: case 0x0A: case 0x0B:
case 0x0C: case 0x0D: case 0x0E:
// These carry 2x3 matrix values packed into BP registers.
// The matrix data is already applied via GXSetIndTexMtx which updates g_gxState directly.
case 0x0C: case 0x0D: case 0x0E: {
u32 idx = (regId - 0x06) / 3; // matrix index (0-2)
u32 row = (regId - 0x06) % 3; // row index (0-2)
auto& info = g_gxState.indTexMtxs[idx];
// Decode 11-bit signed matrix elements (scaled by 1024)
s32 col0 = bp_get(value, 11, 0);
if (col0 & 0x400) col0 |= ~0x7FF; // sign-extend from 11 bits
s32 col1 = bp_get(value, 11, 11);
if (col1 & 0x400) col1 |= ~0x7FF;
auto& r = row == 0 ? info.mtx.m0 : (row == 1 ? info.mtx.m1 : info.mtx.m2);
r.x = static_cast<float>(col0) / 1024.0f;
r.y = static_cast<float>(col1) / 1024.0f;
// Accumulate 2-bit scale exponent part (adjScale = scaleExp + 17, split across 3 registers)
u32 scaleBits = bp_get(value, 2, 22);
u32 shift = row * 2;
info.adjScaleRaw = (info.adjScaleRaw & ~(3u << shift)) | (scaleBits << shift);
info.scaleExp = static_cast<s8>(info.adjScaleRaw) - 17;
g_gxState.stateDirty = true;
break;
}
// SU texture coordinate scale registers (0x30-0x3F)
// Even registers (suTs0): S-axis scale, bias, cyl wrap, line/point offset
// Odd registers (suTs1): T-axis scale, bias, cyl wrap
case 0x30: case 0x31: case 0x32: case 0x33:
case 0x34: case 0x35: case 0x36: case 0x37:
case 0x38: case 0x39: case 0x3A: case 0x3B:
case 0x3C: case 0x3D: case 0x3E: case 0x3F: {
u32 coordIdx = (regId - 0x30) / 2;
bool isT = (regId & 1) != 0;
auto& tcs = g_gxState.texCoordScales[coordIdx];
if (isT) {
tcs.scaleT = static_cast<u16>(bp_get(value, 16, 0));
tcs.biasT = bp_get(value, 1, 16) != 0;
tcs.cylWrapT = bp_get(value, 1, 17) != 0;
} else {
tcs.scaleS = static_cast<u16>(bp_get(value, 16, 0));
tcs.biasS = bp_get(value, 1, 16) != 0;
tcs.cylWrapS = bp_get(value, 1, 17) != 0;
tcs.lineOffset = bp_get(value, 1, 18) != 0;
tcs.pointOffset = bp_get(value, 1, 19) != 0;
}
g_gxState.stateDirty = true;
break;
}
// Copy clear color (0x4F-0x50) and depth (0x51)
case 0x4F: {
u8 r = bp_get(value, 8, 0);
u8 a = bp_get(value, 8, 8);
g_gxState.clearColor[0] = static_cast<float>(r) / 255.f;
g_gxState.clearColor[3] = static_cast<float>(a) / 255.f;
g_gxState.stateDirty = true;
break;
}
case 0x50: {
u8 b = bp_get(value, 8, 0);
u8 g = bp_get(value, 8, 8);
g_gxState.clearColor[2] = static_cast<float>(b) / 255.f;
g_gxState.clearColor[1] = static_cast<float>(g) / 255.f;
g_gxState.stateDirty = true;
break;
}
case 0x51: {
g_gxState.clearDepth = bp_get(value, 24, 0);
g_gxState.stateDirty = true;
break;
}
// Texture mode/image registers (0x80-0xBB) - texture config
default:
if (regId >= 0x80 && regId <= 0xBB) {
// Texture format/wrap/filter configuration.
// These are handled pragmatically - GXLoadTexObj sets texture handles directly.
} else {
Log.warn("Unhandled BP register 0x{:02X} (value 0x{:06X})", regId, value & 0xFFFFFF);
}
// Silently ignore unknown BP registers
break;
}
}
+7 -1
View File
@@ -151,6 +151,7 @@ struct RenderPass {
TextureHandle resolveTarget;
ClipRect resolveRect;
Vec4<float> clearColor{0.f, 0.f, 0.f, 0.f};
float clearDepth = gx::UseReversedZ ? 0.f : 1.f;
CommandList commands;
bool clear = true;
};
@@ -269,6 +270,7 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4<float>
currentPass.resolveRect = rect;
auto& newPass = g_renderPasses.emplace_back();
newPass.clearColor = clearColor;
newPass.clearDepth = g_renderPasses[g_currentRenderPass].clearDepth;
newPass.clear = clear;
++g_currentRenderPass;
}
@@ -507,6 +509,10 @@ void begin_frame() {
g_renderPasses.emplace_back();
g_renderPasses[0].clearColor = gx::g_gxState.clearColor;
{
float normalizedDepth = static_cast<float>(gx::g_gxState.clearDepth) / 16777215.f;
g_renderPasses[0].clearDepth = gx::UseReversedZ ? (1.f - normalizedDepth) : normalizedDepth;
}
g_currentRenderPass = 0;
// push_command(CommandType::SetViewport, Command::Data{.setViewport = g_cachedViewport});
// push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor});
@@ -586,7 +592,7 @@ void render(wgpu::CommandEncoder& cmd) {
.view = webgpu::g_depthBuffer.view,
.depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load,
.depthStoreOp = wgpu::StoreOp::Store,
.depthClearValue = gx::UseReversedZ ? 0.f : 1.f,
.depthClearValue = passInfo.clearDepth,
};
const auto label = fmt::format("Render pass {}", i);
const wgpu::RenderPassDescriptor renderPassDescriptor{
+30 -7
View File
@@ -167,15 +167,17 @@ struct TcgConfig {
static_assert(std::has_unique_object_representations_v<TcgConfig>);
struct FogState {
GXFogType type = GX_FOG_NONE;
float startZ = 0.f;
float endZ = 0.f;
float nearZ = 0.f;
float farZ = 0.f;
float a = 0.f;
float b = 0.5f;
float c = 0.f;
Vec4<float> color;
// Raw encoded register values for A/B reconstruction across separate BP writes
u32 fog0Raw = 0; // 0xEE: encoded A parameter
u32 fog1Raw = 0; // 0xEF: B mantissa
u32 fog2Raw = 0; // 0xF0: B shift
bool operator==(const FogState& rhs) const {
return type == rhs.type && startZ == rhs.startZ && endZ == rhs.endZ && nearZ == rhs.nearZ && farZ == rhs.farZ &&
color == rhs.color;
return type == rhs.type && a == rhs.a && b == rhs.b && c == rhs.c && color == rhs.color;
}
bool operator!=(const FogState& rhs) const { return !(*this == rhs); }
};
@@ -204,11 +206,30 @@ struct AlphaCompare {
static_assert(std::has_unique_object_representations_v<AlphaCompare>);
struct IndTexMtxInfo {
aurora::Mat3x2<float> mtx;
s8 scaleExp;
s8 scaleExp = 0;
// Accumulated adjScale bits from BP registers (2 bits per row, 3 rows)
u8 adjScaleRaw = 0;
bool operator==(const IndTexMtxInfo& rhs) const { return mtx == rhs.mtx && scaleExp == rhs.scaleExp; }
bool operator!=(const IndTexMtxInfo& rhs) const { return !(*this == rhs); }
};
struct TexCoordScale {
u16 scaleS = 0; // texture width - 1
u16 scaleT = 0; // texture height - 1
bool biasS = false;
bool biasT = false;
bool cylWrapS = false;
bool cylWrapT = false;
bool lineOffset = false;
bool pointOffset = false;
bool operator==(const TexCoordScale& rhs) const {
return scaleS == rhs.scaleS && scaleT == rhs.scaleT && biasS == rhs.biasS && biasT == rhs.biasT &&
cylWrapS == rhs.cylWrapS && cylWrapT == rhs.cylWrapT && lineOffset == rhs.lineOffset &&
pointOffset == rhs.pointOffset;
}
bool operator!=(const TexCoordScale& rhs) const { return !(*this == rhs); }
};
struct VtxAttrFmt {
GXCompCnt cnt;
GXCompType type;
@@ -267,6 +288,7 @@ struct GXState {
GXLogicOp blendOp = GX_LO_CLEAR;
GXCompare depthFunc = GX_LEQUAL;
Vec4<float> clearColor{0.f, 0.f, 0.f, 1.f};
u32 clearDepth = 0xFFFFFF;
u32 dstAlpha; // u8; UINT32_MAX = disabled
AlphaCompare alphaCompare;
std::array<Vec4<float>, MaxTevRegs> colorRegs;
@@ -280,6 +302,7 @@ struct GXState {
std::array<TexMtxVariant, MaxTexMtx> texMtxs;
std::array<Mat3x4<float>, MaxPTTexMtx> ptTexMtxs;
std::array<TcgConfig, MaxTexCoord> tcgs;
std::array<TexCoordScale, MaxTexCoord> texCoordScales;
std::array<GXAttrType, MaxVtxAttr> vtxDesc;
std::array<VtxFmt, MaxVtxFmt> vtxFmts;
std::array<TevSwap, MaxTevSwap> tevSwapTable{
+1 -8
View File
@@ -317,14 +317,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
}
if (info.usesFog) {
const auto& state = g_gxState.fog;
Fog fog{.color = state.color};
if (state.nearZ != state.farZ && state.startZ != state.endZ) {
const float depthRange = state.farZ - state.nearZ;
const float fogRange = state.endZ - state.startZ;
fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange);
fog.b = state.farZ / depthRange;
fog.c = state.startZ / fogRange;
}
Fog fog{.color = state.color, .a = state.a, .b = state.b, .c = state.c};
buf.append(fog);
}
for (int i = 0; i < info.sampledTextures.size(); ++i) {
+379
View File
@@ -1531,6 +1531,385 @@ TEST_F(GXFifoTest, ChanMatColor_Color1A1_Compound) {
EXPECT_NEAR(stateA.matColor[3], 128.f / 255.f, 1.f / 255.f);
}
// ============================================================================
// GXSetFog (BP 0xEE-0xF2) - Fog A/B/C parameters, type, and color
// ============================================================================
// --- Fog with perspective linear fog, typical parameters ---
TEST_F(GXFifoTest, Fog_PerspLin_Typical) {
GXColor fogColor = {128, 200, 255, 255};
GXSetFog(GX_FOG_PERSP_LIN, 100.f, 900.f, 0.1f, 1000.f, fogColor);
auto bytes = capture_fifo();
// Should produce 5 BP writes (0xEE-0xF2): 5 * 5 = 25 bytes
ASSERT_EQ(bytes.size(), 25u);
// Verify BP opcodes and register IDs
EXPECT_EQ(bytes[0], 0x61);
EXPECT_EQ(bytes[1], 0xEE);
EXPECT_EQ(bytes[5], 0x61);
EXPECT_EQ(bytes[6], 0xEF);
EXPECT_EQ(bytes[10], 0x61);
EXPECT_EQ(bytes[11], 0xF0);
EXPECT_EQ(bytes[15], 0x61);
EXPECT_EQ(bytes[16], 0xF1);
EXPECT_EQ(bytes[20], 0x61);
EXPECT_EQ(bytes[21], 0xF2);
reset_gx_state();
decode_fifo(bytes);
// Compute expected A, B, C from the SDK formula
float nearZ = 0.1f, farZ = 1000.f, startZ = 100.f, endZ = 900.f;
float A = (farZ * nearZ) / ((farZ - nearZ) * (endZ - startZ));
float B = farZ / (farZ - nearZ);
float C = startZ / (endZ - startZ);
// Allow tolerance for encoding precision loss (11-bit mantissa)
EXPECT_NEAR(g_gxState.fog.a, A, std::abs(A) * 1e-3f);
EXPECT_NEAR(g_gxState.fog.b, B, std::abs(B) * 1e-3f);
EXPECT_NEAR(g_gxState.fog.c, C, std::abs(C) * 1e-3f);
EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_LIN);
EXPECT_NEAR(g_gxState.fog.color[0], 128.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.fog.color[1], 200.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.fog.color[2], 255.f / 255.f, 1.f / 255.f);
}
// --- Fog with degenerate parameters (nearZ == farZ) ---
TEST_F(GXFifoTest, Fog_Degenerate_EqualDepths) {
GXColor fogColor = {0, 0, 0, 255};
GXSetFog(GX_FOG_PERSP_EXP, 0.f, 100.f, 10.f, 10.f, fogColor);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
// When nearZ == farZ, SDK sets A=0, B=0.5, C=0
EXPECT_FLOAT_EQ(g_gxState.fog.a, 0.f);
EXPECT_NEAR(g_gxState.fog.b, 0.5f, 1e-3f);
EXPECT_FLOAT_EQ(g_gxState.fog.c, 0.f);
EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_EXP);
}
// --- Fog type: none ---
TEST_F(GXFifoTest, Fog_None) {
GXColor fogColor = {64, 64, 64, 255};
GXSetFog(GX_FOG_NONE, 0.f, 0.f, 0.f, 0.f, fogColor);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
EXPECT_EQ(g_gxState.fog.type, GX_FOG_NONE);
EXPECT_FLOAT_EQ(g_gxState.fog.a, 0.f);
EXPECT_NEAR(g_gxState.fog.b, 0.5f, 1e-3f);
EXPECT_FLOAT_EQ(g_gxState.fog.c, 0.f);
EXPECT_NEAR(g_gxState.fog.color[0], 64.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.fog.color[1], 64.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.fog.color[2], 64.f / 255.f, 1.f / 255.f);
}
// --- Fog with perspective reverse exponential squared type ---
TEST_F(GXFifoTest, Fog_PerspRevExp2) {
GXColor fogColor = {255, 0, 0, 255};
GXSetFog(GX_FOG_PERSP_REVEXP2, 50.f, 500.f, 1.f, 1000.f, fogColor);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
float nearZ = 1.f, farZ = 1000.f, startZ = 50.f, endZ = 500.f;
float A = (farZ * nearZ) / ((farZ - nearZ) * (endZ - startZ));
float B = farZ / (farZ - nearZ);
float C = startZ / (endZ - startZ);
EXPECT_NEAR(g_gxState.fog.a, A, std::abs(A) * 1e-3f);
EXPECT_NEAR(g_gxState.fog.b, B, std::abs(B) * 1e-3f);
EXPECT_NEAR(g_gxState.fog.c, C, std::abs(C) * 1e-3f);
EXPECT_EQ(g_gxState.fog.type, GX_FOG_PERSP_REVEXP2);
EXPECT_NEAR(g_gxState.fog.color[0], 1.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.fog.color[1], 0.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.fog.color[2], 0.f, 1.f / 255.f);
}
// ============================================================================
// GXSetIndTexMtx (BP 0x06-0x0E) - Indirect texture matrix parameters
// ============================================================================
// --- IndTexMtx 0 with half-scale diagonal matrix ---
// Note: 11-bit signed range limits values to [-1.0, 0.999], so 1.0 is not representable.
TEST_F(GXFifoTest, IndTexMtx0_HalfScale) {
f32 mtx[2][3] = {
{0.5f, 0.0f, 0.0f},
{0.0f, 0.5f, 0.0f},
};
GXSetIndTexMtx(GX_ITM_0, mtx, 0);
auto bytes = capture_fifo();
// Should produce 3 BP writes: 3 * 5 = 15 bytes
ASSERT_EQ(bytes.size(), 15u);
// Verify BP opcodes and register IDs (0x06, 0x07, 0x08 for matrix 0)
EXPECT_EQ(bytes[0], 0x61);
EXPECT_EQ(bytes[1], 0x06);
EXPECT_EQ(bytes[5], 0x61);
EXPECT_EQ(bytes[6], 0x07);
EXPECT_EQ(bytes[10], 0x61);
EXPECT_EQ(bytes[11], 0x08);
reset_gx_state();
decode_fifo(bytes);
const auto& info = g_gxState.indTexMtxs[0];
// 11-bit fixed-point (1/1024) precision
float tol = 1.0f / 1024.0f;
EXPECT_NEAR(info.mtx.m0.x, 0.5f, tol);
EXPECT_NEAR(info.mtx.m0.y, 0.0f, tol);
EXPECT_NEAR(info.mtx.m1.x, 0.0f, tol);
EXPECT_NEAR(info.mtx.m1.y, 0.5f, tol);
EXPECT_NEAR(info.mtx.m2.x, 0.0f, tol);
EXPECT_NEAR(info.mtx.m2.y, 0.0f, tol);
EXPECT_EQ(info.scaleExp, 0);
}
// --- IndTexMtx 1 with fractional values and positive scale ---
TEST_F(GXFifoTest, IndTexMtx1_FractionalWithScale) {
f32 mtx[2][3] = {
{0.5f, 0.25f, -0.125f},
{-0.5f, 0.75f, 0.0f},
};
GXSetIndTexMtx(GX_ITM_1, mtx, 3);
auto bytes = capture_fifo();
// Register IDs for matrix 1: 0x09, 0x0A, 0x0B
ASSERT_EQ(bytes.size(), 15u);
EXPECT_EQ(bytes[1], 0x09);
EXPECT_EQ(bytes[6], 0x0A);
EXPECT_EQ(bytes[11], 0x0B);
reset_gx_state();
decode_fifo(bytes);
const auto& info = g_gxState.indTexMtxs[1];
float tol = 1.0f / 1024.0f;
EXPECT_NEAR(info.mtx.m0.x, 0.5f, tol);
EXPECT_NEAR(info.mtx.m0.y, -0.5f, tol);
EXPECT_NEAR(info.mtx.m1.x, 0.25f, tol);
EXPECT_NEAR(info.mtx.m1.y, 0.75f, tol);
EXPECT_NEAR(info.mtx.m2.x, -0.125f, tol);
EXPECT_NEAR(info.mtx.m2.y, 0.0f, tol);
EXPECT_EQ(info.scaleExp, 3);
}
// --- IndTexMtx 2 with negative scale exponent ---
TEST_F(GXFifoTest, IndTexMtx2_NegativeScale) {
f32 mtx[2][3] = {
{0.0f, 0.0f, 0.0f},
{0.0f, 0.0f, 0.0f},
};
GXSetIndTexMtx(GX_ITM_2, mtx, -5);
auto bytes = capture_fifo();
// Register IDs for matrix 2: 0x0C, 0x0D, 0x0E
ASSERT_EQ(bytes.size(), 15u);
EXPECT_EQ(bytes[1], 0x0C);
EXPECT_EQ(bytes[6], 0x0D);
EXPECT_EQ(bytes[11], 0x0E);
reset_gx_state();
decode_fifo(bytes);
const auto& info = g_gxState.indTexMtxs[2];
EXPECT_EQ(info.scaleExp, -5);
}
// --- IndTexMtx 0 does not affect matrix 1 ---
TEST_F(GXFifoTest, IndTexMtx0_Isolation) {
f32 mtx0[2][3] = {
{0.5f, 0.0f, 0.0f},
{0.0f, 0.5f, 0.0f},
};
f32 mtx1[2][3] = {
{-1.0f, 0.0f, 0.0f},
{0.0f, -1.0f, 0.0f},
};
GXSetIndTexMtx(GX_ITM_0, mtx0, 1);
GXSetIndTexMtx(GX_ITM_1, mtx1, -2);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
float tol = 1.0f / 1024.0f;
// Matrix 0
EXPECT_NEAR(g_gxState.indTexMtxs[0].mtx.m0.x, 0.5f, tol);
EXPECT_NEAR(g_gxState.indTexMtxs[0].mtx.m1.y, 0.5f, tol);
EXPECT_EQ(g_gxState.indTexMtxs[0].scaleExp, 1);
// Matrix 1
EXPECT_NEAR(g_gxState.indTexMtxs[1].mtx.m0.x, -1.0f, tol);
EXPECT_NEAR(g_gxState.indTexMtxs[1].mtx.m1.y, -1.0f, tol);
EXPECT_EQ(g_gxState.indTexMtxs[1].scaleExp, -2);
}
// ============================================================================
// SU Texture Coordinate Scale (BP 0x30-0x3F)
// ============================================================================
// --- GXSetTexCoordScaleManually sets width/height ---
TEST_F(GXFifoTest, TexCoordScale_Manual_Coord0) {
GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 256, 128);
auto bytes = capture_fifo();
// Two BP writes (suTs0 + suTs1): 2 * 5 = 10 bytes
ASSERT_EQ(bytes.size(), 10u);
EXPECT_EQ(bytes[0], 0x61);
EXPECT_EQ(bytes[1], 0x30); // suTs0[0]
EXPECT_EQ(bytes[5], 0x61);
EXPECT_EQ(bytes[6], 0x31); // suTs1[0]
reset_gx_state();
decode_fifo(bytes);
const auto& tcs = g_gxState.texCoordScales[0];
EXPECT_EQ(tcs.scaleS, 255u); // width - 1
EXPECT_EQ(tcs.scaleT, 127u); // height - 1
}
// --- GXSetTexCoordScaleManually for coord 3 ---
TEST_F(GXFifoTest, TexCoordScale_Manual_Coord3) {
GXSetTexCoordScaleManually(GX_TEXCOORD3, GX_TRUE, 512, 512);
auto bytes = capture_fifo();
ASSERT_EQ(bytes.size(), 10u);
EXPECT_EQ(bytes[1], 0x36); // suTs0[3] = 0x30 + 3*2
EXPECT_EQ(bytes[6], 0x37); // suTs1[3] = 0x31 + 3*2
reset_gx_state();
decode_fifo(bytes);
const auto& tcs = g_gxState.texCoordScales[3];
EXPECT_EQ(tcs.scaleS, 511u);
EXPECT_EQ(tcs.scaleT, 511u);
}
// --- GXSetTexCoordScaleManually with bias and cyl wrap ---
TEST_F(GXFifoTest, TexCoordScale_BiasAndCylWrap) {
// Enable manual mode first, then set bias and cyl wrap
GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 64, 64);
capture_fifo(); // discard
GXSetTexCoordBias(GX_TEXCOORD0, GX_TRUE, GX_FALSE);
auto biasBytes = capture_fifo();
GXSetTexCoordCylWrap(GX_TEXCOORD0, GX_FALSE, GX_TRUE);
auto cylBytes = capture_fifo();
// Each writes 2 BP regs
ASSERT_EQ(biasBytes.size(), 10u);
ASSERT_EQ(cylBytes.size(), 10u);
reset_gx_state();
decode_fifo(biasBytes);
decode_fifo(cylBytes);
const auto& tcs = g_gxState.texCoordScales[0];
EXPECT_TRUE(tcs.biasS);
EXPECT_FALSE(tcs.biasT);
EXPECT_FALSE(tcs.cylWrapS);
EXPECT_TRUE(tcs.cylWrapT);
}
// --- GXEnableTexOffsets ---
TEST_F(GXFifoTest, TexCoordScale_TexOffsets) {
GXEnableTexOffsets(GX_TEXCOORD2, GX_TRUE, GX_TRUE);
auto bytes = capture_fifo();
// One BP write (suTs0 only): 5 bytes
ASSERT_EQ(bytes.size(), 5u);
EXPECT_EQ(bytes[1], 0x34); // suTs0[2] = 0x30 + 2*2
reset_gx_state();
decode_fifo(bytes);
const auto& tcs = g_gxState.texCoordScales[2];
EXPECT_TRUE(tcs.lineOffset);
EXPECT_TRUE(tcs.pointOffset);
}
// --- Coord isolation: writing coord 0 doesn't affect coord 1 ---
TEST_F(GXFifoTest, TexCoordScale_Isolation) {
GXSetTexCoordScaleManually(GX_TEXCOORD0, GX_TRUE, 100, 200);
GXSetTexCoordScaleManually(GX_TEXCOORD1, GX_TRUE, 300, 400);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
EXPECT_EQ(g_gxState.texCoordScales[0].scaleS, 99u);
EXPECT_EQ(g_gxState.texCoordScales[0].scaleT, 199u);
EXPECT_EQ(g_gxState.texCoordScales[1].scaleS, 299u);
EXPECT_EQ(g_gxState.texCoordScales[1].scaleT, 399u);
}
// ============================================================================
// GXSetCopyClear (BP 0x4F-0x51) - Clear color and depth
// ============================================================================
// --- Clear color and depth round-trip ---
TEST_F(GXFifoTest, CopyClear_ColorAndDepth) {
GXColor color = {64, 128, 192, 255};
GXSetCopyClear(color, 0x00ABCDEF);
auto bytes = capture_fifo();
// 3 BP writes: 3 * 5 = 15 bytes
ASSERT_EQ(bytes.size(), 15u);
EXPECT_EQ(bytes[0], 0x61);
EXPECT_EQ(bytes[1], 0x4F); // R + A
EXPECT_EQ(bytes[5], 0x61);
EXPECT_EQ(bytes[6], 0x50); // B + G
EXPECT_EQ(bytes[10], 0x61);
EXPECT_EQ(bytes[11], 0x51); // Z
reset_gx_state();
decode_fifo(bytes);
EXPECT_NEAR(g_gxState.clearColor[0], 64.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[1], 128.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[2], 192.f / 255.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[3], 255.f / 255.f, 1.f / 255.f);
EXPECT_EQ(g_gxState.clearDepth, 0x00ABCDEFu);
}
// --- Clear with black and zero depth ---
TEST_F(GXFifoTest, CopyClear_BlackZeroDepth) {
GXColor color = {0, 0, 0, 0};
GXSetCopyClear(color, 0);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
EXPECT_NEAR(g_gxState.clearColor[0], 0.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[1], 0.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[2], 0.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[3], 0.f, 1.f / 255.f);
EXPECT_EQ(g_gxState.clearDepth, 0u);
}
// --- Clear with max depth ---
TEST_F(GXFifoTest, CopyClear_MaxDepth) {
GXColor color = {255, 255, 255, 128};
GXSetCopyClear(color, 0xFFFFFF);
auto bytes = capture_fifo();
reset_gx_state();
decode_fifo(bytes);
EXPECT_NEAR(g_gxState.clearColor[0], 1.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[1], 1.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[2], 1.f, 1.f / 255.f);
EXPECT_NEAR(g_gxState.clearColor[3], 128.f / 255.f, 1.f / 255.f);
EXPECT_EQ(g_gxState.clearDepth, 0xFFFFFFu);
}
// ============================================================================
// Composite tests (multiple state changes in a single FIFO stream)
// ============================================================================