diff --git a/docs/Documentation/Backwards Compatibility.md b/docs/Documentation/Backwards Compatibility.md index 1e89bc0..8f46dbe 100644 --- a/docs/Documentation/Backwards Compatibility.md +++ b/docs/Documentation/Backwards Compatibility.md @@ -2,68 +2,210 @@ # Backwards Compatibility with F3DEX2 -## C GBI Compatibility - -F3DEX3 is backwards compatible with F3DEX2 at the C GBI level for all features -and commands except: - -- The viewport Y scale has been negated, and `G_MAXZ` has been renamed as its - value has changed. See the comment near `G_MAXZ` in the GBI. -- For the same reason, in `BrZ` configuration, any Z threshold values in - `SPBranchLessZ*` which are hard-coded into display lists (not based on - `G_MAXZ`) must be multiplied by 0x20. -- The `G_SPECIAL_*` command IDs have been removed. `G_SPECIAL_2` and - `G_SPECIAL_3` were no-ops in F3DEX2, and `G_SPECIAL_1` was a trigger to - recalculate the MVP matrix. There is no MVP matrix in F3DEX3 so this is - useless. -- `G_LINE3D` (and `Gfx.line`) has been removed. This command did not actually - work in F3DEX2 (it behaved as a no-op). -- `G_MW_CLIP` has been removed, and `SPClipRatio` has been converted into a - no-op. Clipping is handled differently in F3DEX3 and the clip ratio cannot be - changed from 2. -- `G_MV_MATRIX`, `G_MW_MATRIX`, and `G_MW_FORCEMTX` have been removed, and - `SPForceMatrix` has been converted into a no-op. This is because there is no - MVP matrix in F3DEX3. -- `G_MV_POINT` has been removed. This was not used in any command; it would have - likely been used for debugging to copy vertices from DMEM to examine them. - This does not affect `SPModifyVertex`, which is still supported. -- `G_MW_PERSPNORM` has been removed; `SPPerspNormalize` is still supported but - is encoded differently, no longer using this define. -- `G_MVO_LOOKATX` and `G_MVO_LOOKATY` have been removed, and `SPLookAtX` and - `SPLookAtY` are deprecated. `SPLookAtX` has been changed to set both - directions and `SPLookAtY` has been converted to a no-op. To set the lookat - directions, use `SPLookAt`. The lookat directions are now in one 8-byte DMA - word, so they must always be set at the same time as each other. Most of the - non-functional fields (e.g. color) of `LookAt` and its sub-types have been - removed, so code which accesses these fields needs to change. Code which only - accesses lookat directions should be compatible with no changes. -- As discussed above, the `pad1` field of `Light_t` is renamed to `type` and - must be set to zero. -- If you do not raise the maximum number of lights from 7 to 9, the lighting GBI - commands are backwards compatible. However, if you do raise the number of - lights, you must use `SPAmbient` to write the ambient light, as discussed - above. Note that you can now load all your lights with one command, - `SPSetLights`, so it is not usually necessary to use `SPLight` and `SPAmbient` - at all. - -## Binary Display List Compatibility +F3DEX3 is backwards compatible with F3DEX2 at the C GBI level for almost all +features and commands. See @ref porting for the relatively small list of code +changes you have to make to your romhack codebase to move from F3DEX2 to F3DEX3. +Also, some relatively obscure internal GBI definitions have been removed. F3DEX3 is generally binary backwards compatible with OoT-style display lists for -objects, scenes, etc. **It is not compatible at the binary level with SM64-style +objects, scenes, etc. **It is not binary compatible with vanilla SM64-style display lists which encode object colors as light colors**, as all the command -encodings related to lighting have changed. Of course, if you recompile these -display lists with the new `gbi.h`, it can run them. +encodings related to lighting have changed. -The deprecated commands mentioned above in the C GBI section have had their -encodings changed (the original encodings will do bad things / crash). In -addition, all lighting-related commands--e.g. `gdSPDefLights*`, `SPNumLights`, -`SPLight`, `SPLightColor`, `SPLookAt`--have had their encodings changed, making -them binary incompatible. The lighting data structures, e.g. `Light_t`, -`PosLight_t`, `LookAt_t`, `Lightsn`, `Lights*`, `PosLights*`, etc., have also -changed--generally only slightly, so most code is compatible with no changes. +## GBI Changes Reference -`SPSegment` has been given a different command id (`G_RELSEGMENT` vs. -`G_MOVEWORD`) to facilitate relative segmented address translation. The -original binary encoding is still valid, but does not support relative -translation like the new encoding. However, recompiling with the C GBI will -always use the new encoding. +This is a reference if you run into GBI-related problems when building your +romhack after porting it to F3DEX3, or for HLE emulator authors implementing +changes from F3DEX2 to F3DEX3. The "Bin" and "C" columns indicate whether there +have been changes from F3DEX2 to F3DEX3 in binary encoding and C GBI usage +respectively. The "Perf" column indicates whether the performance of this +command (or the overall game performace if this command is used properly) has +significantly increased or decreased relative to F3DEX2 in a way that the +programmer should be aware of. The `g`,`gs`, or `gd` prefixes are all omitted, +e.g. `SPMatrix` refers to `gSPMatrix` and `gsSPMatrix`. `*` means wildcard. + +### RDP Commands + +| Command | Bin | C | Perf | Notes | +|----------------------|-----|-----|------|-------| +| `DPLoadTLUT*` | = | = | Up | Load is not sent to RDP if repeated in auto-batched rendering. See the GBI comment near `SPDontSkipTexLoadsAcross`. This is a performance optimization only and doesn't affect on-screen output unless the game is buggy / misusing the feature, so this behavior need not be emulated in HLE. | +| `DPLoadBlock*` | = | = | Up | Same as `DPLoadTLUT*` above. | +| `DPLoadTile*` | = | = | Up | Same as `DPLoadTLUT*` above. | +| `SPSetOtherMode` | = | = | | | +| All other `DP*` | = | = | | Microcode generally can't change RDP command behavior. | + +### Main Drawing + +| Command | Bin | C | Perf | Notes | +|----------------------|-----|-----|------|-------| +| `SPVertex` | = | = | Up | Vertex buffer size in F3DEX3 is 56, up from 32 in F3DEX2. Also, many of the new features in F3DEX3 (new lighting, occlusion plane, etc.) are during `SPVertex` processing. | +| `Vtx_t` / `Vtx` | * | * | | Generally the same, but packed normals go in the `flag` field if enabled. | +| `SPModifyVertex` | = | = | | | +| `G_MWO_POINT_RGBA` | = | = | | | +| `G_MWO_POINT_ST` | = | = | | | +| `G_MWO_POINT_XYSCREEN` | = | = | | | +| `G_MWO_POINT_ZSCREEN` | = | = | | | +| `G_MV_POINT` | Rem | Rem | | Removed because the internal vertex format is no longer a multiple of 8 (DMA word). | +| `SPTexture` | = | = | | | +| `SPTextureL` | = | = | | HW V1 workaround; long since deprecated. | +| `SP1Triangle` | = | = | Up | Some of the new features in F3DEX3 (occlusion plane, alpha compare culling, decal fix) are during triangle processing. +| `SP2Triangles` | = | = | Up | Same as `SP1Triangle` above. | +| `SP1Quadrangle` | = | = | Up | Same as `SP1Triangle` above. | +| `SPTriStrip` | New | New | Up | New command that draws 5 tris from 7 indexes, see GBI. | +| `SPTriFan` | New | New | Up | New command that draws 5 tris from 7 indexes, see GBI. | +| `SPMemset` | New | New | Up | New command that memsets a RDRAM region faster than the RDP can, for framebuffer or Z-buffer clear. | +| `G_LINE3D` | Rem | Rem | | Removed; no-op in F3DEX2. | + +### Control Logic + +| Command | Bin | C | Perf | Notes | +|----------------------|-----|-----|------|-------| +| `SPNoOp` | = | = | | | +| `SPDisplayList*` | = | = | | Hints are encoded into previously unused bits, but this is a performance optimization only and will never affect on-screen output, so the hints encoding can be ignored by HLE. | +| `G_DL_PUSH` | = | = | | | +| `SPBranchList*` | = | = | | Same as `SPDisplayList*` above. | +| `G_DL_NOPUSH` | = | = | | | +| `SPEndDisplayList*` | = | = | | Same as `SPDisplayList*` above. | +| `SPCullDisplayList` | = | = | | | +| `SPBranchLess*` | * | * | | In `BrZ` configuration, Z threshold values which are hard-coded into display lists (not based on `G_MAXZ`) must be multiplied by 0x20. See `G_MAXZ` below. | +| `SPLoadUcode*` | = | = | | Note that F3DEX3_PC (CFG_PROFILING_C) may have compatibility problems with other microcodes. It is specially designed to work with S2DEX for OoT but other microcodes are not guaranteed to work. This is not a limitation in other F3DEX3 variants. | +| `SPDma*` | = | = | Down | Moved to Overlay 3 (slower) as it is rarely used. HLE can't emulate this by definition so must treat it as a no-op; games therefore use it for HLE/LLE detection. | +| `SPSegment` | * | * | | F3DEX3 supports F3DEX2 binary encoding for SPSegment, but this does not have the relative segment resolution behavior. The new behavior is obtained with the new command encoding with `G_RELSEGMENT`. | +| `G_MW_SEGMENT` | = | = | | | +| `G_MWO_SEGMENT_*` | = | = | | These were never needed. | +| `SPFlush` | New | New | Up | This is a performance optimization only and can't be HLE emulated, so it should be treated as a no-op. | +| `G*` (`Gfx` subtypes) | ? | ? | | Deprecated. These did not fully reflect the bits usage in actual commands even in F3DEX2. These have mostly not been updated for F3DEX3. | + +### 3D Space + +| Command | Bin | C | Perf | Notes | +|----------------------|-----|-----|------|-------| +| `Mtx` | = | = | | | +| `SPMatrix` | Chg | = | * | Encoding changed due to multiple flags below changing. | +| `G_MTX_PUSH` | = | = | Down | `SPMatrix` processing with `G_MTX_PUSH` set is moved to Overlay 3 (slower) as games should not use the RSP matrix stack for accuracy and performance reasons (see GBI). | +| `G_MTX_NOPUSH` | = | = | | | +| `G_MTX_LOAD` | Chg | = | | Encoding inverted (in SPMatrix, not in the definition of `G_MTX_LOAD`). | +| `G_MTX_MUL` | Chg | = | | Encoding inverted (in SPMatrix, not in the definition of `G_MTX_MUL`). | +| `G_MTX_MODEL` | = | New | | New name for `G_MTX_MODELVIEW` as the view matrix must be multiplied into the projection matrix stack in F3DEX3. | +| `G_MTX_VIEWPROJECTION` | = | New | | New name for `G_MTX_PROJECTION` as the view matrix must be multiplied into the projection matrix stack in F3DEX3. | +| `G_MV_MMTX` | Chg | New | | Encoding changed. | +| `G_MV_TEMPMTX0` | Chg | = | | Encoding changed. | +| `G_MV_VPMTX` | Chg | New | | New name for `G_MV_PMTX`, encoding changed. | +| `G_MV_TEMPMTX1` | Chg | = | | Encoding changed. | +| `SPPopMatrix*` | Chg | = | Down | Moved to Overlay 3 (slower) as games should not use the RSP matrix stack for accuracy and performance reasons (see GBI). Encoding is changed due to `G_MV_MMTX` changing. | +| `SPForceMatrix` | Chg | Chg | | Converted into no-op. | +| `G_MV_MATRIX` | Rem | Rem | | Removed. | +| `G_MW_MATRIX` | Rem | Rem | | Removed. | +| `G_MW_FORCEMTX` | Rem | Rem | | Removed. | +| `SPViewport` | * | * | | Command itself is the same, but see `Vp` below. | +| `Vp_t` / `Vp` | Chg | Chg | | The Y scale is now negated, and the Z values are different due to the change from `G_MAXZ` to `G_NEW_MAXZ`. +| `G_MAXZ` | Rem | Rem | | Replaced with `G_NEW_MAXZ`. The name change is to force you to update your code--especially viewport definitions with hardcoded constants which are NOT defined in terms of `G_MAXZ`. | +| `G_NEW_MAXZ` | New | New | | The equivalent of `G_MAXZ` constant used in viewport calculations. | +| `G_MV_VIEWPORT` | = | = | | | +| `SPPerspNormalize` | Chg | = | | Encoding changed. | +| `G_MW_PERSPNORM` | Rem | Rem | | Removed. The perspective normalization factor is set via `G_MW_FX` with the changed encoding of `SPPerspNormalize`. | +| `G_MWO_PERSPNORM` | New | New | | | +| `SPClipRatio` | Chg | Chg | | Converted into no-op. It is not possible to change the clip ratio from 2 in F3DEX3. | +| `G_MW_CLIP` | Rem | Rem | | Removed. See `SPClipRatio` above. | + +### Lighting + +| Command | Bin | C | Perf | Notes | +|----------------------|-----|-----|------|-------| +| `Light_t`, `Light` | Chg | * | | `type` field must be set to 0 (`LIGHT_TYPE_DIR`) to indicate directional light. `size` field for specular added. Otherwise the same, though note that now there is not an extra 8 bytes of padding between lights (the offset between them is 16, not 24). | +| `LIGHT_TYPE_DIR` | New | New | | New macro, but the encoding is the same as F3DEX2_PL. | +| `PointLight_t` | Chg | * | | Same changes as `Light_t`. Also note that the `kq` field is now interpreted as an E3M5 floating-point number. | +| `LIGHT_TYPE_POINT` | New | New | | New macro, but the encoding is the same as F3DEX2_PL. | +| `Ambient_t`, `Ambient` | = | = | | Note that you must use `Ambient`, not `Light`, for the ambient light if you have 9 directional/point lights. | +| `Lights1`, `Lights2`, ... | Chg | * | | The ambient light is at the end, not the beginning. The data layout matches the RSP internal data layout to enable `SPSetLights`. | +| `Lightsn` | Chg | * | | Same as `Lights1` etc. Also, now 9 directional/point lights. | +| `Lights0` | Chg | Chg | | Now only contains Ambient (no Light) because F3DEX3 properly supports zero directional/point lights. | +| `SPDefLights*` | Chg | = | | Matches changes in `Lights*`. Also, there is no need for these in a game with a real lighting engine. | +| `SPDefPointLights*` | Chg | = | | Matches changes in `Lights*`. Also, there is no need for these in a game with a real lighting engine. | +| `SPNumLights` | Chg | Chg | | Encoding changed. `ENABLE_POINT_LIGHTS` can now be included. Zero lights is properly supported unlike in F3DEX2. The maximum number of point/directional lights is 9, up from 7. | +| `G_MW_NUMLIGHT` | = | = | | | +| `G_MWO_NUMLIGHT` | = | = | | | +| `NUML` | Chg | = | | Encoding changed. | +| `NUMLIGHTS_*` | Chg | = | | Deprecated as these are just defined equal to their number, because F3DEX3 supports zero lights. | +| `LIGHT_*` | = | = | | Deprecated and were never useful. | +| `SPLight` | Chg | = | | Encoding changed. Note that you must use `SPAmbient`, not `SPLight`, for the ambient light if you have 9 directional/point lights. Also note that you should usually use `SPSetLights` unless you need to set individual lights without affecting the others. | +| `SPAmbient` | New | New | | New command to upload the ambient light. If you have 0-8 directional/point lights, you can also use `SPLight` for this (slightly slower), but if you have 9 directional/point lights you must use `SPAmbient`. | +| `SPLightColor*` | Chg | = | | Encoding changed. | +| `G_MW_LIGHTCOL` | = | = | | | +| `G_MV_LIGHT` | = | = | | | +| `SPSetLights*` | Chg | * | Up | Implementation completely different from F3DEX2, uses one DMA transaction regardless of the number of lights. In C, you can/should use dynamically allocated memory for the `Lights*` struct being uploaded, as opposed to `SPDefLights*`, but you need to dereference the pointer passed to `SPSetLights*`. | +| `G_MWO_aLIGHT_*` | Chg | = | | Encodings changed. No longer needed. | +| `G_MWO_bLIGHT_*` | Chg | = | | Encodings changed. No longer needed. | +| `G_MVO_L*` | Rem | Rem | | Removed. | +| `SPCameraWorld` | New | New | | New command to set the camera position for Fresnel. | +| `PlainVtx` | New | New | | For `SPCameraWorld`. +| `SPLookAt` | New | New | | Replaces `SPLookAtX` and `SPLookAtY`. | +| `SPLookAtX` | Chg | * | | Encoding changed; in an attempt at backwards compatibility, defined as `SPLookAt`, which works with basic usage. | +| `SPLookAtY` | Chg | * | | Converted to no-op. | +| `G_MVO_LOOKAT*` | Rem | Rem | | Removed with `SPLookAt` changes. | +| `LookAt_t`, `LookAt` | Chg | * | | The size is different and most of the non-functional fields have been removed. Code which only accesses the functional fields does not need to change. | +| `Hilite_t`, `Hilite` | = | = | | | +| `SPFog*` | = | = | | | +| `G_MW_FOG` | = | = | | | +| `G_MWO_FOG` | = | = | | | + +### Geometry Mode and New Effect Parameters + +| Command | Bin | C | Perf | Notes | +|--------------------------|-----|-----|------|-------| +| `SP*GeometryMode*` | * | * | | Commands themselves are the same, but many new geometry mode flags, see below. | +| `G_ZBUFFER` | = | = | | | +| `G_TEXTURE_ENABLE` | = | = | | Very old (F3D / HW v1) display lists with this bit set will no longer crash on F3DEX3, unlike F3DEX2. | +| `G_SHADE` | = | = | | | +| `G_ATTROFFSET_ST_ENABLE` | New | New | | New geometry mode bit that enables ST attribute offsets, usually for smooth scrolling. | +| `SPAttrOffsetST` | New | New | | New command which writes ST attribute offsets using `G_MWO_ATTR_OFFSET_*`. | +| `G_MWO_ATTR_OFFSET_S` | New | New | | | +| `G_MWO_ATTR_OFFSET_T` | New | New | | | +| `G_AMBOCCLUSION` | New | New | | | +| `SPAmbOcclusion*` | New | New | | New commands which write ambient occlusion parameters using `G_MWO_AO_*`. | +| `G_MWO_AO_AMBIENT` | New | New | | | +| `G_MWO_AO_DIRECTIONAL` | New | New | | | +| `G_MWO_AO_POINT` | New | New | | | +| `G_CULL_NEITHER` | = | = | | | +| `G_CULL_FRONT` | = | = | | | +| `G_CULL_BACK` | = | = | | | +| `G_CULL_BOTH` | = | = | | | +| `G_PACKED_NORMALS` | New | New | | New geometry mode bit that enables packed normals (simultaneous lighting and vertex colors). | +| `G_LIGHTTOALPHA` | New | New | | New geometry mode bit that moves the maximum of the three light color channels to shade alpha, usually for cel shading. | +| `G_LIGHTING_SPECULAR` | New | New | | New geometry mode bit that changes lighting from diffuse to specular. | +| `G_FRESNEL_COLOR` | New | New | | New geometry mode bit that computes Fresnel and places it in all three shade color channels. | +| `G_FRESNEL_ALPHA` | New | New | | New geometry mode bit that computes Fresnel and places it in shade alpha. | +| `SPFresnel*` | New | New | | New commands which write Fresnel parameters using `G_MWO_FRESNEL_*`. | +| `G_MWO_FRESNEL_SCALE` | New | New | | | +| `G_MWO_FRESNEL_OFFSET` | New | New | | | +| `G_FOG` | = | = | | | +| `G_LIGHTING` | = | = | | | +| `G_TEXTURE_GEN` | = | = | | | +| `G_TEXTURE_GEN_LINEAR` | = | = | | | +| `G_LOD` | = | = | | Ignored by all F3DEX* variants. | +| `G_SHADING_SMOOTH` | = | = | | | +| `G_LIGHTING_POSITIONAL` | Chg | Chg | | This bit is ignored by F3DEX3--both in order to allow point lighting on all vanilla geometry, and because the F3DEX2_PL design of having this as a property of an object/model rather than a property of the lights state is poor design. In F3DEX3, whether point lights are present or not is determined by the `ENABLE_POINT_LIGHTS` flag in `SPNumLights` and `SPSetLights*`. | +| `G_CLIPPING` | = | = | | Ignored by all F3DEX* variants. | + +### Miscellaneous + +| Command | Bin | C | Perf | Notes | +|----------------------|-----|-----|------|-------| +| `SPOcclusionPlane` | New | New | | New command that uploads the occlusion plane coefficients. | +| `OcclusionPlane*` | New | New | | Structs for occlusion plane. | +| `SPLightToRDP` | New | New | | New command that copies RSP light color to RDP color, see GBI. | +| `SPLightToPrimColor` | New | New | | Same as `SPLightToRDP` above. | +| `SPLightToFogColor` | New | New | | Same as `SPLightToRDP` above. | +| `SPDontSkipTexLoadsAcross` | New | New | Up | New command which locally cancels auto-batched rendering by writing an invalid address to `G_MWO_LAST_MAT_DL_ADDR`. | +| `G_MWO_LAST_MAT_DL_ADDR` | New | New | | | +| `SPAlphaCompareCull` | New | New | Up | New command which enables culling of tris based on shade alpha values, for cel shading. Normal use of this command in cel shading is a performance optimization only and doesn't affect on-screen output, so it can be treated as a no-op by an initial HLE implementation. But it is easy to write a display list where it does affect on-screen output, so a good HLE implementation should emulate it. | +| `G_ALPHA_COMPARE_CULL_DISABLE` | New | New | | Settings for `SPAlphaCompareCull`. | +| `G_ALPHA_COMPARE_CULL_BELOW` | New | New | | Settings for `SPAlphaCompareCull`. | +| `G_ALPHA_COMPARE_CULL_ABOVE` | New | New | | Settings for `SPAlphaCompareCull`. | +| `G_MWO_ALPHA_COMPARE_CULL` | New | New | | | +| `MoveWd` | = | = | | Regular/valid encodings are the same. | +| `MoveHalfwd` | New | New | | Like `MoveWd` but writes 2 bytes instead of 4. | +| `G_MW_FX` | New | New | | New moveword table index for base address for many parameters. | +| `G_SPECIAL_1` | Rem | Rem | | Removed; in F3DEX2, triggered MVP matrix recalculation. | +| `G_SPECIAL_2` | Rem | Rem | | Removed; no-op in F3DEX2. | +| `G_SPECIAL_3` | Rem | Rem | | Removed; no-op in F3DEX2. | diff --git a/f3dex3.s b/f3dex3.s index 21d5dad..e371f43 100644 --- a/f3dex3.s +++ b/f3dex3.s @@ -853,6 +853,7 @@ vOne equ $v28 // All elements = 1; global // Vertex / lighting vector regs: // Prefixes: v = vector register, vp = vertex pair, s = vertex store, // l = basic lighting, a = advanced lighting +// Sadly, "vp" stands for vertex pair, view*projection matrix, and viewport vMTX0I equ $v0 // Matrix rows int/frac; MVP normally, or M in ltadv vMTX1I equ $v1 @@ -960,7 +961,7 @@ sSTS equ vPerm4 // ltadv: aPNScl equ $v8 // ltadv Packed Normals Scales = (1<<0),(1<<5),(1<<11),XX, repeat aNrmSc equ $v9 // ltadv Normals Scale = [0h:1h] scale to normalize all normals; elems 2,3,6,7 used for point light factors -aLen2F equ $v10 // ltadv Length 2quared Frac part +aDOT equ $v10 // ltadv Dot product = normals dot direction; also briefly light dir aLen2I equ $v11 // ltadv Length 2quared Int part // Uses vpMdl = $v12 vpWrlF equ $v13 // vertex pair World position Frac part @@ -971,20 +972,20 @@ aOAFrs equ $v17 // ltadv Offset Alpha (elem 3,7) and Fresnel (elem 0,4) // Uses vpRGBA, vpLtTot, vpNrmlX, vpNrmlY, vpNrmlZ = $v18, $v19, $v20, $v21, $v22 aParam equ $v23 // ltadv Parameters = AO, texgen, and Fresnel params -aAOF2 equ aLen2F // Version of aAOF in init, can't be aDPosI/F or vpMdl there -aPLFcF equ aLen2F // ltadv Point Light Factor Frac part -aLTC equ aLen2F // ltadv Light Color -aLkDt0 equ aLen2F // ltadv Lookat Dot product 0 for texgen +aAOF2 equ aDOT // Version of aAOF in init, can't be aDPosI/F or vpMdl there aPLFcI equ aLen2I // ltadv Point Light Factor Int part -aLkDt1 equ aLen2I // ltadv Lookat Dot product 1 for texgen -aDOT equ vpMdl // ltadv Dot product = normals dot direction; also briefly light dir +aLen2F equ vpMdl // ltadv Length 2quared Frac part +aPLFcF equ vpMdl // ltadv Point Light Factor Frac part +aLTC equ vpMdl // ltadv Light Color aClOut equ vpWrlF // ltadv Color Out aAlOut equ vpWrlI // ltadv Alpha Out aDIR equ aDPosF // ltadv Direction = normalize(light or cam - vertex) aDotSc equ aDPosF // ltadv Dot product Scale factor +aLkDt0 equ aDPosF // ltadv Lookat Dot product 0 for texgen aLenF equ aDPosI // ltadv Length Frac part aAOF equ aDPosI // ltadv Ambient Occlusion Factor aProj equ aDPosI // ltadv Projection +aLkDt1 equ aDPosI // ltadv Lookat Dot product 1 for texgen // vpST equ aOAFrs // ST used in texgen vpWNrm equ vpNrmlX // vertex pair World space Normals aRcpLn equ $v29 // ltadv Reciprocal of Length @@ -1263,6 +1264,7 @@ G_MODIFYVTX_handler: j do_moveword // Moveword adds cmd_w0 to $10 for final addr lbu cmd_w0, (inputBufferEnd - 0x07)(inputBufferPos) // offset in vtx, bit 15 clear +TODO check vtx 1 behavior G_TRIFAN_handler: // 17 li $1, 0x8000 // $ra negative = flag for G_TRIFAN G_TRISTRIP_handler: @@ -3376,7 +3378,36 @@ ovl234_clipmisc_entrypoint_ovl4ver: // same IMEM address as ovl234_clipmi jal load_overlays_2_3_4 // Not a call; returns to $ra-8 = here li cmd_w1_dram, orga(ovl3_start) // set up a load for overlay 3 -ltadv_after_mtx: +ltadv_spec_fres_setup: // Odd instruction + // Get aDIR = normalize(camera - vertex), aDOT = (vpWNrm dot aDIR) + ldv aDPosI[0], (cameraWorldPos - altBase)(altBaseReg) // Camera world pos + j ltadv_normal_to_vertex + ldv aDPosI[8], (cameraWorldPos - altBase)(altBaseReg) +ltadv_after_camera: + vmov aOAFrs[0], aDOT[0] // Save Fresnel dot product in aOAFrs[0h] + vmov aOAFrs[4], aDOT[4] // elems 0, 4 + bgez laSpecular, ltadv_loop // Sign bit clear = not specular + li laSpecFres, 0 // Clear flag for specular or fresnel +// aProj <- aLenF + vmulf aProj, vpWNrm, aDOT[0h] // Projection of camera vec onto normal + vmudh $v29, aDIR, $v31[1] // -camera vec + j ltadv_normals_to_regs // For specular, replace vpWNrm with reflected vector + vmadh vpWNrm, aProj, $v31[3] // + 2 * projection + // aDPosI <- aProj + +ltadv_xfrm: // Even instruction + vmudn $v29, vMTX0F, vpMdl[0h] + lbu curLight, numLightsxSize // Scalar instructions here must be OK to do twice + vmadh $v29, vMTX0I, vpMdl[0h] + luv vpRGBA, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2:1 RGBA + vmadn $v29, vMTX1F, vpMdl[1h] + vmadh $v29, vMTX1I, vpMdl[1h] + addi curLight, curLight, altBase // Point to ambient light + vmadn aDPosF, vMTX2F, vpMdl[2h] + jr $ra + vmadh aDPosI, vMTX2I, vpMdl[2h] + +ltadv_after_mtx: // Even instruction move laPtr, inVtx vcopy aPNScl, vOne move laVtxLeft, vtxLeft @@ -3389,55 +3420,63 @@ ltadv_after_mtx: ltadv_continue_setup: lqv aParam, (fxParams - altBase)(altBaseReg) vcopy aNrmSc, aRcpLn // aRcpLn[0:1] is int:frac scale (1 / length) - andi $11, vGeomMid, G_AMBOCCLUSION >> 8 + lsv aPNScl[6], (packedNormalsMaskConstant - altBase)(altBaseReg) // F800 vge $v29, $v31, $v31[3] // Set VCC to 00011111 + andi $11, vGeomMid, G_AMBOCCLUSION >> 8 bnez $11, @@skip_zero_ao andi laL2A, vGeomMid, G_LIGHTTOALPHA >> 8 vmrg aParam, aParam, $v31[2] // 0 @@skip_zero_ao: jal while_wait_dma_busy andi laTexgen, vGeomMid, G_TEXTURE_GEN >> 8 -ltadv_vtx_loop: ldv vpMdl[0], (VTX_IN_OB + 1 * inputVtxSize)(laPtr) // Vtx 2 Model pos + PN ldv vpMdl[8], (VTX_IN_OB + 0 * inputVtxSize)(laPtr) // Vtx 1 Model pos + PN +align_with_warning 8, "One instruction of padding before ltadv_vtx_loop" +ltadv_vtx_loop: // Even instruction + vmudm $v29, aPNScl, vpMdl[3h] // Packed normals from elem 3,7 of model pos lw $11, (VTX_IN_CN + 1 * inputVtxSize)(laPtr) // Vtx 2 RGBA + vmadn vpNrmlY, $v31, $v31[2] // 0; load lower (vpMdl unsigned but must be T operand) lw laSTKept,(VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 1 ST + vand vpNrmlX, vpMdl, aPNScl[3] // 0xF800; X component masked in elem 3, 7 jal ltadv_xfrm sw $11, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2 RGBA -> Vtx 1 ST vmadn vpWrlF, vMTX3F, vOne // Finish vertex pos transform - andi laPacked, vGeomMid, G_PACKED_NORMALS >> 8 vmadh vpWrlI, vMTX3I, vOne - luv vpLtTot, (ltBufOfs + 0)(curLight) // Total light level, init to ambient + andi laPacked, vGeomMid, G_PACKED_NORMALS >> 8 // aOAFrs <- vpST vsub aOAFrs, vpRGBA, $v31[7] // 0x7FFF; offset alpha elems 3, 7 - vmudm $v29, aPNScl, vpMdl[3h] // Packed normals from elem 3,7 of model pos - bnez laPacked, @@skip_regular_normals - vmadn vpMdl, $v31, $v31[2] // 0; load lower (vpMdl unsigned but must be T operand) - lpv vpMdl, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2:1 regular normals -@@skip_regular_normals: + luv vpLtTot, (ltBufOfs + 0)(curLight) // Total light level, init to ambient + vne $v29, $v31, $v31[0h] // Set VCC to 01110111 + beqz laPacked, @@skip_packed_normals + lpv vpMdl, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2:1 regular normals + vmrg vpMdl, vpNrmlY, vpNrmlX[3h] // Masked X to 0, 4; multiplied Y, Z in 1, 2, 5, 6 +@@skip_packed_normals: vmudh $v29, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15) jal ltadv_xfrm +// aAOF2 <- aDOT vmadm aAOF2, aOAFrs, aParam[0] // + (alpha - 1) * aoAmb factor; elems 3, 7 -// aDOT <- vpMdl +// aLTC <- vpMdl vmulf vpLtTot, vpLtTot, aAOF2[3h] // light color *= ambient factor -// aLTC <- aAOF2 +// aDOT <- aAOF2 vmudn $v29, aDPosF, aNrmSc[0h] // Vec frac * int scaling, discard result // aDIR <- aDPosF addi laPtr, laPtr, 2 * inputVtxSize vmadm $v29, aDPosI, aNrmSc[1h] // Vec int * frac scaling, discard result addi laVtxLeft, laVtxLeft, -2 * inputVtxSize +// vpWNrm <- vpNrmlX vmadh vpWNrm, aDPosI, aNrmSc[0h] // Vec int * int scaling sll laSpecular, vGeomMid, (31 - 5) // G_LIGHTING_SPECULAR to sign bit vmudn vpWrlF, vpWrlF, $v31[1] // -1; negate world pos so add light/cam pos to it andi laSpecFres, vGeomMid, (G_LIGHTING_SPECULAR | G_FRESNEL_COLOR | G_FRESNEL_ALPHA) >> 8 vmadh vpWrlI, vpWrlI, $v31[1] // -1 ltadv_normals_to_regs: -// vpNrmlX <- vpWNrm vmudh vpNrmlY, vOne, vpWNrm[1h] // Move normals to separate registers bnez laSpecFres, ltadv_spec_fres_setup vmudh vpNrmlZ, vOne, vpWNrm[2h] // per component, in elems 0-3, 4-7 +// vpNrmlX <- vpWNrm // aAOF <- aDPosI -ltadv_loop: +align_with_warning 8, "One instruction of padding before ltadv_loop" +ltadv_loop: // Even instruction vmudh $v29, vOne, $v31[7] // Load accum mid with 0x7FFF (1 in s.15) lbu $11, (ltBufOfs + 3 - lightSize)(curLight) // Light type / constant attenuation vmadm aAOF, aOAFrs, aParam[1] // + (alpha - 1) * aoDir factor; elems 3, 7 @@ -3457,35 +3496,6 @@ ltadv_finish_light: j ltadv_loop vmacf vpLtTot, aLTC, aDOT[0h] // + light color * dot product -ltadv_xfrm: - vmudn $v29, vMTX0F, vpMdl[0h] - lbu curLight, numLightsxSize // Scalar instructions here must be OK to do twice - vmadh $v29, vMTX0I, vpMdl[0h] - luv vpRGBA, (VTX_IN_TC + 0 * inputVtxSize)(laPtr) // Vtx 2:1 RGBA - vmadn $v29, vMTX1F, vpMdl[1h] - vmadh $v29, vMTX1I, vpMdl[1h] - addi curLight, curLight, altBase // Point to ambient light - vmadn aDPosF, vMTX2F, vpMdl[2h] - jr $ra - vmadh aDPosI, vMTX2I, vpMdl[2h] - -ltadv_spec_fres_setup: - // Get aDIR = normalize(camera - vertex), aDOT = (vpWNrm dot aDIR) - ldv aDPosI[0], (cameraWorldPos - altBase)(altBaseReg) // Camera world pos - j ltadv_normal_to_vertex - ldv aDPosI[8], (cameraWorldPos - altBase)(altBaseReg) -ltadv_after_camera: - vmov aOAFrs[0], aDOT[0] // Save Fresnel dot product in aOAFrs[0h] - vmov aOAFrs[4], aDOT[4] // elems 0, 4 - bgez laSpecular, ltadv_loop // Sign bit clear = not specular - li laSpecFres, 0 // Clear flag for specular or fresnel -// aProj <- aLenF - vmulf aProj, vpWNrm, aDOT[0h] // Projection of camera vec onto normal - vmudh $v29, aDIR, $v31[1] // -camera vec - j ltadv_normals_to_regs // For specular, replace vpWNrm with reflected vector - vmadh vpWNrm, aProj, $v31[3] // + 2 * projection - // aDPosI <- aProj - ltadv_specular: // aDOT in/out, uses vpLtTot[3] and $11 as temps lb $11, (ltBufOfs + 0xF - lightSize)(curLight) // Light size factor mtc2 $11, vpLtTot[6] // Light size factor in elem 3 as temp @@ -3494,25 +3504,29 @@ ltadv_specular: // aDOT in/out, uses vpLtTot[3] and $11 as temps jr $ra vxor aDOT, aDOT, $v31[7] // = 0x7FFF - result +align_with_warning 8, "One instruction of padding before ltadv_post" ltadv_post: // aClOut <- vpWrlF // aAlOut <- vpWrlI +// vpMdl <- aLTC + vge aAOF, vpLtTot, vpLtTot[1h] // elem 0 = max(R0, G0); elem 4 = max(R1, G1) + ldv vpMdl[0], (VTX_IN_OB + 1 * inputVtxSize)(laPtr) // Vtx 2 Model pos + PN vmulf aClOut, vpRGBA, vpLtTot // RGB output is RGB * light beqz laL2A, @@skip_cel vcopy aAlOut, vpRGBA // Alpha output = vertex alpha (only 3, 7 matter) // Cel: alpha = max of light components, RGB = vertex color - vge aAlOut, vpLtTot, vpLtTot[1h] // elem 0 = max(R0, G0); elem 4 = max(R1, G1) - vge aAlOut, aAlOut, aAlOut[2h] // elem 0 = max(R0, G0, B0); equiv for elem 4 + vge aAOF, aAOF, aAOF[2h] // elem 0 = max(R0, G0, B0); equiv for elem 4 vcopy aClOut, vpRGBA // RGB output is vertex color - vmudh aAlOut, vOne, aAlOut[0h] // move light level elem 0, 4 to 3, 7 + vmudh aAlOut, vOne, aAOF[0h] // move light level elem 0, 4 to 3, 7 @@skip_cel: vne $v29, $v31, $v31[3h] // Set VCC to 11101110 bnez laPacked, @@skip_novtxcolor andi $11, vGeomMid, (G_FRESNEL_COLOR | G_FRESNEL_ALPHA) >> 8 vcopy aClOut, vpLtTot // If no packed normals, base output is just light @@skip_novtxcolor: + vmrg vpRGBA, aClOut, aAlOut // Merge base output and alpha output beqz $11, @@skip_fresnel - vmrg vpRGBA, aClOut, aAlOut // Merge base output and alpha output + ldv vpMdl[8], (VTX_IN_OB + 0 * inputVtxSize)(laPtr) // Vtx 1 Model pos + PN lsv aAOF[0], (vTRC_0100_addr - altBase)(altBaseReg) // Load constant 0x0100 to temp vabs aOAFrs, aOAFrs, aOAFrs // Fresnel dot in aOAFrs[0h]; absolute value for underwater andi $11, vGeomMid, G_FRESNEL_COLOR >> 8 @@ -3538,7 +3552,6 @@ ltadv_texgen_end: // Vtx 2 ST in vpST elem 0, 1; vtx 1 ST in vpST elem 4, 5 lw laSTKept, (tempVtx1ST)(rdpCmdBufEndP1) // Overwrite stored Vtx 1 ST slv vpST[0], (VTX_IN_TC - 1 * inputVtxSize)(laPtr) // Vtx 2 ST ltadv_after_texgen: -// vpMdl <- aDOT lw $11, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Vtx 2 RGBA from vtx 1 ST slot bltz laVtxLeft, vtx_setup_no_lighting sw laSTKept, (VTX_IN_TC - 2 * inputVtxSize)(laPtr) // Restore vtx 1 ST diff --git a/gbi.h b/gbi.h index 7fe0323..50c6d6b 100644 --- a/gbi.h +++ b/gbi.h @@ -2775,7 +2775,7 @@ other segments. */ /** * @brief Load new MVP matrix directly. * - * This is no longer supported as there is no MVP matrix in F3DEX3. + * This is no longer supported as it was not used in production games. * @deprecated */ #define gSPForceMatrix(pkt, mptr) gSPNoOp(pkt) @@ -2949,7 +2949,7 @@ _DW({ \ /** * Alpha compare culling. Optimization for cel shading, could also be used for - * other scenarios where lots of tris are being drawn with alpha compare. + * other scenarios where tris are being drawn with alpha compare. * * If mode == G_ALPHA_COMPARE_CULL_DISABLE, tris are drawn normally. * diff --git a/rsp/gbi.inc b/rsp/gbi.inc index 19a0cf8..691ea80 100644 --- a/rsp/gbi.inc +++ b/rsp/gbi.inc @@ -56,14 +56,14 @@ G_RDPFULLSYNC equ 0xe9 G_SETKEYGB equ 0xea G_SETKEYR equ 0xeb G_SETCONVERT equ 0xec -G_SETSCISSOR equ 0xed // scis/oth handler +G_SETSCISSOR equ 0xed G_SETPRIMDEPTH equ 0xee -G_RDPSETOTHERMODE equ 0xef // scis/oth handler -G_LOADTLUT equ 0xf0 // load handler -G_RDPHALF_2 equ 0xf1 // own handler +G_RDPSETOTHERMODE equ 0xef +G_LOADTLUT equ 0xf0 +G_RDPHALF_2 equ 0xf1 G_SETTILESIZE equ 0xf2 -G_LOADBLOCK equ 0xf3 // load handler -G_LOADTILE equ 0xf4 // load handler +G_LOADBLOCK equ 0xf3 +G_LOADTILE equ 0xf4 G_SETTILE equ 0xf5 G_FILLRECT equ 0xf6 G_SETFILLCOLOR equ 0xf7