mirror of
https://github.com/HackerN64/F3DEX3.git
synced 2026-01-21 10:37:45 -08:00
Working, but minor graphical glitches
This commit is contained in:
@@ -74,8 +74,8 @@ However, it retains all other F3DEX3 features:
|
||||
- All features not related to vertex/lighting: auto-batched rendering, packed 5
|
||||
triangles commands, hints system, etc.
|
||||
|
||||
The performance of F3DEX3 vertex processing with both LVP and NOC is almost the
|
||||
same as that of F3DEX2; see the Performance Results section below.
|
||||
The performance of F3DEX3 vertex processing with both LVP and NOC is nearly
|
||||
identical that of F3DEX2; see the Performance page.
|
||||
|
||||
## Profiling
|
||||
|
||||
|
||||
@@ -1,30 +1,53 @@
|
||||
@page performance Performance Results
|
||||
|
||||
# Philosophy
|
||||
|
||||
The base version of F3DEX3 was created for RDP bound games like OoT, where new
|
||||
visual effects are desired and increasing the RSP time a bit does not affect the
|
||||
overall performance. F3DEX3_LVP_NOC was created
|
||||
|
||||
# Performance Results
|
||||
|
||||
Cycle counts; lower is better. These are hand-counted timings taking into
|
||||
account all pipeline stalls and all dual-issue conditions. Instruction alignment
|
||||
is sometimes taken into account, otherwise assumed to be optimal.
|
||||
These are cycle counts for all the critical paths in the microcode. Lower is
|
||||
better. The timings are hand-counted taking into account all pipeline stalls and
|
||||
all dual-issue conditions. Instruction alignment is sometimes taken into
|
||||
account, otherwise assumed to be optimal.
|
||||
|
||||
Vertex / lighting numbers assume no special features (texgen, packed normals,
|
||||
etc.) Tri numbers assume texture, shade, and Z. Empty cells are "not measured
|
||||
yet".
|
||||
etc.) Tri numbers assume texture, shade, and Z. All numbers assume default
|
||||
profiling configuration. Empty cells are "not measured yet".
|
||||
|
||||
| | F3DEX2 | F3DEX3_LVP_NOC | F3DEX3_LVP | F3DEX3_NOC | F3DEX3 |
|
||||
|-----------------------|--------|----------------|------------|------------|--------|
|
||||
| Vtx pair, no lighting | 54 | 54 | 81 | 79 | 98 |
|
||||
| Vtx pair, 0 dir lts | Can't | | | | |
|
||||
| Vtx pair, 1 dir lt | 73 | 70 | 96 | 182 | 201 |
|
||||
| Vtx pair, 2 dir lts | 76 | 77 | 103 | 211 | 230 |
|
||||
| Vtx pair, 3 dir lts | 88 | 84 | 110 | 240 | 259 |
|
||||
| Vtx pair, 4 dir lts | 91 | 91 | 117 | 269 | 288 |
|
||||
| Vtx pair, 5 dir lts | 103 | 98 | 124 | 298 | 317 |
|
||||
| Vtx pair, 6 dir lts | 106 | 105 | 131 | 327 | 346 |
|
||||
| Vtx pair, 7 dir lts | 118 | 112 | 138 | 356 | 375 |
|
||||
| Vtx pair, 8 dir lts | Can't | 119 | 145 | 385 | 404 |
|
||||
| Vtx pair, 9 dir lts | Can't | 126 | 152 | 414 | 433 |
|
||||
| | F3DEX2 | F3DEX3_LVP_NOC | F3DEX3_LVP | F3DEX3_NOC | F3DEX3 |
|
||||
|----------------------------|--------|----------------|------------|------------|--------|
|
||||
| Vtx pair, no lighting | 54 | 54 | 81 | 79 | 98 |
|
||||
| Vtx pair, 0 dir lts | Can't | 64 | | | |
|
||||
| Vtx pair, 1 dir lt | 73 | 70 | 96 | 182 | 201 |
|
||||
| Vtx pair, 2 dir lts | 76 | 77 | 103 | 211 | 230 |
|
||||
| Vtx pair, 3 dir lts | 88 | 84 | 110 | 240 | 259 |
|
||||
| Vtx pair, 4 dir lts | 91 | 91 | 117 | 269 | 288 |
|
||||
| Vtx pair, 5 dir lts | 103 | 98 | 124 | 298 | 317 |
|
||||
| Vtx pair, 6 dir lts | 106 | 105 | 131 | 327 | 346 |
|
||||
| Vtx pair, 7 dir lts | 118 | 112 | 138 | 356 | 375 |
|
||||
| Vtx pair, 8 dir lts | Can't | 119 | 145 | 385 | 404 |
|
||||
| Vtx pair, 9 dir lts | Can't | 126 | 152 | 414 | 433 |
|
||||
| Command dispatch | 12 | 12 |
|
||||
| Only/2nd tri to offscreen | 27 | 29 |
|
||||
| 1st tri to offscreen | 28 | 29 |
|
||||
| Only/2nd tri to clip | 32 | 31 |
|
||||
| 1st tri to clip | 33 | 31 |
|
||||
| Only/2nd tri to backface | 38 | 40 |
|
||||
| 1st tri to backface | 39 | 40 |
|
||||
| Only/2nd tri to degenerate | 42 | 42 |
|
||||
| 1st tri to degenerate | 43 | 42 |
|
||||
| Only/2nd tri to occluded | Can't | Can't | 49 | Can't | 49 |
|
||||
| 1st tri to occluded | Can't | Can't | 49 | Can't | 49 |
|
||||
| Only/2nd tri to draw | 172 | 170 | 171 | 170 | 171 |
|
||||
| 1st tri to draw | 173 | 170 | 171 | 170 | 171 |
|
||||
|
||||
|
||||
Tri numbers are measured from the first cycle of the command handler inclusive,
|
||||
to the first cycle of whatever is after the return exclusive. This is in order
|
||||
to capture the extra mfc0 to mfc0 stall due to return_routine in F3DEX2.
|
||||
|
||||
|
||||
Vertex processing time as reported by the performance counter in the `PA`
|
||||
|
||||
9
f3dex3.s
9
f3dex3.s
@@ -2454,6 +2454,7 @@ tri_noinit: // ra is next cmd, second tri in TRI2, or middle of clipping
|
||||
and $10, $10, $24 // If clipping is enabled, check clip flags
|
||||
vge $v6, $v13, $v8[1] // v6 = max(max(vert1.y, vert2.y), vert3.y), VCO = max(vert1.y, vert2.y) > vert3.y
|
||||
bnez $10, ovl234_clipping_entrypoint // Facing info and occlusion may be garbage if need to clip
|
||||
// 29 cycles
|
||||
mfc2 $9, $v26[0] // elem 0 = x = cross product => lower 16 bits, sign extended
|
||||
vmrg $v4, $v14, $v8 // v4 = max(vert1.y, vert2.y) > vert3.y : higher(vert1, vert2) ? vert3 (highest vertex of vert1, vert2, vert3)
|
||||
and $5, $5, $7
|
||||
@@ -2484,9 +2485,11 @@ tri_noinit: // ra is next cmd, second tri in TRI2, or middle of clipping
|
||||
vsub $v15, $v10, $v2
|
||||
.if !CFG_NO_OCCLUSION_PLANE
|
||||
andi $5, $5, CLIP_OCCLUDED
|
||||
bnez $5, tri_culled_by_occlusion_plane // Cull if all verts occluded
|
||||
.endif
|
||||
vmudh $v29, $v6, $v8[0]
|
||||
.if !CFG_NO_OCCLUSION_PLANE
|
||||
bnez $5, tri_culled_by_occlusion_plane // Cull if all verts occluded
|
||||
.endif
|
||||
llv $v13[0], VTX_INV_W_VEC($1)
|
||||
vmadh $v29, $v8, $v11[0]
|
||||
lpv tV1AtI[0], VTX_COLOR_VEC($1) // Load vert color of vertex 1
|
||||
@@ -2699,6 +2702,7 @@ tDaDyI equ $v7
|
||||
// DaDe = DaDx * factor
|
||||
tDaDeF equ $v8
|
||||
tDaDeI equ $v9
|
||||
// 137 cycles
|
||||
vmadl $v29, tDaDxF, $v20[3]
|
||||
sdv tDaDxF[8], 0x0018($1) // Store DsDx, DtDx, DwDx texture coefficients (fractional)
|
||||
vmadm $v29, tDaDxI, $v20[3]
|
||||
@@ -2752,7 +2756,8 @@ tV1AtFF equ $v10
|
||||
// 162 cycles
|
||||
|
||||
.if CFG_NO_OCCLUSION_PLANE || CFG_LEGACY_VTX_PIPE
|
||||
// If we have room for the extra instructions. Z disabled is rare.
|
||||
// If we have room for the extra instructions. Z disabled is rare, so the
|
||||
// extra 8 cycles of finishing the dummy Z write above isn't too much of a problem.
|
||||
no_z_buffer:
|
||||
sdv tV1AtF[0], 0x0010($2) // Store RGBA shade color (fractional)
|
||||
sdv tV1AtI[0], 0x0000($2) // Store RGBA shade color (integer)
|
||||
|
||||
Reference in New Issue
Block a user