mirror of
https://github.com/HackerN64/F3DEX3.git
synced 2026-01-21 10:37:45 -08:00
Before dispatch change
This commit is contained in:
14
Makefile
14
Makefile
@@ -8,10 +8,12 @@ default: F3DEX3_BrZ F3DEX3_BrW
|
||||
ALL_OPTIONS := \
|
||||
CFG_G_BRANCH_W \
|
||||
CFG_DEBUG_NORMALS \
|
||||
CFG_GCLK_SAMPLE
|
||||
CFG_PROFILING_A \
|
||||
CFG_PROFILING_B \
|
||||
CFG_PROFILING_C
|
||||
|
||||
ARMIPS ?= armips
|
||||
PARENT_OUTPUT_DIR ?= ../test
|
||||
PARENT_OUTPUT_DIR ?= ./build
|
||||
ifeq ($(PARENT_OUTPUT_DIR),.)
|
||||
$(error Cannot build directly in repo directory; see Makefile for details.)
|
||||
# The problem is that we want to be able to have targets like F3DEX2_2.08,
|
||||
@@ -129,17 +131,15 @@ $(eval $(call reset_vars))
|
||||
NAME := F3DEX3_BrZ
|
||||
DESCRIPTION := Will make you want to finally ditch HLE (G_BRANCH_Z version)
|
||||
ID_STR := F3DEX3 by Sauraen & Nintendo, G_BRANCH_Z version______________________
|
||||
# Add options you want here, e.g. CFG_GCLK_SAMPLE
|
||||
# Add options you want here, e.g. CFG_PROFILING_A
|
||||
OPTIONS :=
|
||||
$(eval $(call ucode_rule))
|
||||
|
||||
NAME := F3DEX3_BrW
|
||||
DESCRIPTION := Will make you want to finally ditch HLE (G_BRANCH_W version)
|
||||
ID_STR := F3DEX3 by Sauraen & Nintendo, G_BRANCH_W version______________________
|
||||
# Add options you want here, e.g. CFG_GCLK_SAMPLE
|
||||
OPTIONS := \
|
||||
CFG_GCLK_SAMPLE \
|
||||
CFG_G_BRANCH_W
|
||||
# Add options you want here, e.g. CFG_PROFILING_A
|
||||
OPTIONS := CFG_PROFILING_A CFG_G_BRANCH_W
|
||||
$(eval $(call ucode_rule))
|
||||
|
||||
.PHONY: default ok all clean
|
||||
|
||||
150
cpu/counters.c
150
cpu/counters.c
@@ -1,27 +1,83 @@
|
||||
/* This example code is for HackerOoT. The F3DEX3PerfCounters struct and the
|
||||
method of reading it will be the same for any other game. */
|
||||
/* This example code is for HackerOoT. The structs and the general method of
|
||||
reading the counters will be the same for any game.
|
||||
|
||||
Build the microcode with one of the CFG_PROFILING_* options below to select one
|
||||
of these sets of performance counters, or without any CFG_PROFILING_* option for
|
||||
the default set. You can even include all the microcode versions in your game,
|
||||
and let the player/developer swap which one is used for a given frame in order
|
||||
to switch which set of performance counters they're seeing. You only need to
|
||||
keep the currently used one in RDRAM, you can load a different one from the cart
|
||||
over it when the user swaps.
|
||||
|
||||
For the options other than the default, the microcode uses the RDP's CLK counter
|
||||
for its own timing. You should clear this counter just before launching F3DEX3
|
||||
on the RSP (in the graphics task setup); usually you'd also read the counter
|
||||
value, to optionally print on screen, after the RDP is finished. Make sure not
|
||||
to clear/modify the CLK counter while the RSP is running, or the profiling
|
||||
results may be garbage.
|
||||
*/
|
||||
|
||||
/* In some header, needs to be accessible to variables.h */
|
||||
typedef struct { /* Default performance counters, if no CFG_PROFILING_* is enabled */
|
||||
/* Number of vertices processed by the RSP */
|
||||
u16 vertexCount;
|
||||
/* Number of tris actually drawn, after clipping and all types of culling */
|
||||
u16 rdpOutTriCount;
|
||||
/* Number of tris which processing started on the RSP (before clipping / culling) */
|
||||
u32 rspInTriCount:18;
|
||||
/* Number of fill rects and tex rects drawn */
|
||||
u32 rectCount:14;
|
||||
u32 stallRDPFifoFullCycles;
|
||||
u32 dummy;
|
||||
} F3DEX3ProfilingDefault;
|
||||
|
||||
typedef struct { /* Counters for CFG_PROFILING_A */
|
||||
u32 vertexProcCycles;
|
||||
u16 fetchedDLCommandCount;
|
||||
u16 dlCommandCount;
|
||||
u32 stallRDPFifoFullCycles;
|
||||
u32 triProcCycles;
|
||||
} F3DEX3ProfilingA;
|
||||
|
||||
typedef struct { /* Counters for CFG_PROFILING_B */
|
||||
u16 vertexCount;
|
||||
u16 litVertexCount;
|
||||
u32 smallRDPCommandCount:18; /* All RDP commands except tris */
|
||||
u32 clippedTriCount:14; /* Number of RSP/input triangles which got clipped */
|
||||
u32 allOverlayLoadCount:18;
|
||||
u32 lightingOverlayLoadCount:14;
|
||||
u32 clippingOverlayLoadCount:18;
|
||||
u32 miscOverlayLoadCount:14;
|
||||
} F3DEX3ProfilingB;
|
||||
|
||||
typedef struct { /* Counters for CFG_PROFILING_C */
|
||||
/* Total cycles F3DEX3 believes it was running, not including SPLoadUcode */
|
||||
u32 ex3UcodeCycles;
|
||||
/* The "GCLK is alive" bit of the RDP status is sampled once every time a
|
||||
display list command is started. This counts the number of times that bit
|
||||
was 1. Divide by dlCommandCount to get an approximate measurement of the
|
||||
percentage of time the RDP was doing useful work, as opposed to waiting
|
||||
for framebuffer / Z buffer memory transactions to complete. */
|
||||
u16 commandsSampledGclkActive;
|
||||
u16 dlCommandCount;
|
||||
u32 stallRDPFifoFullCycles;
|
||||
u32 stallDMACycles;
|
||||
} F3DEX3ProfilingC;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
F3DEX3ProfilingDefault def;
|
||||
F3DEX3ProfilingA a;
|
||||
F3DEX3ProfilingB b;
|
||||
F3DEX3ProfilingC c;
|
||||
u64 dummy_alignment[2];
|
||||
};
|
||||
u32 taskdataptr; /* Not a perf counter, can ignore */
|
||||
u32 ucode; /* Not a perf counter, can ignore */
|
||||
} F3DEX3YieldDataFooter;
|
||||
|
||||
/* In variables.h with the ENABLE_SPEEDMETER section */
|
||||
extern volatile u32 gRSPGfxRDPWaitCycles;
|
||||
extern volatile u16 gRSPGfxCommandsSampledGclkActive;
|
||||
extern volatile u16 gRSPGfxCommandCount;
|
||||
extern volatile u16 gRSPGfxVertexCount;
|
||||
extern volatile u16 gRSPGfxTriDrawCount;
|
||||
extern volatile u32 gRSPGfxTriRequestCount;
|
||||
extern volatile u16 gRSPGfxRectCount;
|
||||
|
||||
/* In sched.c somewhere before Sched_TaskComplete, or in some header */
|
||||
typedef struct {
|
||||
u32 rdpWaitCycles;
|
||||
u16 commandsSampledGclkActive;
|
||||
u16 commandCount;
|
||||
u16 vertexCount;
|
||||
u16 triDrawCount;
|
||||
u32 triRequestCount:18;
|
||||
u32 rectCount:14;
|
||||
u32 taskdataptr; /* Not a perf counter */
|
||||
u32 ucode; /* Not a perf counter */
|
||||
} F3DEX3YieldDataFooter;
|
||||
extern volatile F3DEX3YieldDataFooter gRSPProfilingResults;
|
||||
|
||||
/* In the true codepath of Sched_TaskComplete: */
|
||||
#ifdef ENABLE_SPEEDMETER
|
||||
@@ -31,35 +87,12 @@ typedef struct {
|
||||
(u8*)gGfxSPTaskYieldBuffer +
|
||||
OS_YIELD_DATA_SIZE - sizeof(F3DEX3YieldDataFooter));
|
||||
osInvalDCache(footer, sizeof(F3DEX3YieldDataFooter));
|
||||
gRSPGfxRDPWaitCycles = footer->rdpWaitCycles;
|
||||
gRSPGfxCommandsSampledGclkActive = footer->commandsSampledGclkActive;
|
||||
gRSPGfxCommandCount = footer->commandCount;
|
||||
gRSPGfxVertexCount = footer->vertexCount;
|
||||
gRSPGfxTriDrawCount = footer->triDrawCount;
|
||||
gRSPGfxTriRequestCount = footer->triRequestCount;
|
||||
gRSPGfxRectCount = footer->rectCount;
|
||||
bcopy(footer, &gRSPProfilingResults, sizeof(F3DEX3YieldDataFooter));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* In speed_meter.c */
|
||||
/* Number of cycles the RSP is waiting for space in the RDP FIFO in DRAM */
|
||||
volatile u32 gRSPGfxRDPWaitCycles;
|
||||
/* If CFG_GCLK_SAMPLE is enabled, the "GCLK is alive" bit of the RDP status is
|
||||
sampled once every time a display list command is started. This counts the
|
||||
number of times that bit was 1. */
|
||||
volatile u16 gRSPGfxCommandsSampledGclkActive;
|
||||
/* Number of display list commands the microcode processed. If CFG_GCLK_SAMPLE
|
||||
is disabled, this will be zero, so be careful about dividing the glck cycles
|
||||
above by this. */
|
||||
volatile u16 gRSPGfxCommandCount;
|
||||
/* Number of vertices processed by the RSP */
|
||||
volatile u16 gRSPGfxVertexCount;
|
||||
/* Number of tris actually drawn, after clipping and all types of culling */
|
||||
volatile u16 gRSPGfxTriDrawCount;
|
||||
/* Number of tris which processing started on the RSP (before clipping / culling) */
|
||||
volatile u32 gRSPGfxTriRequestCount;
|
||||
/* Number of fill rects and tex rects drawn */
|
||||
volatile u16 gRSPGfxRectCount;
|
||||
volatile F3DEX3YieldDataFooter gRSPProfilingResults;
|
||||
|
||||
/* You can display them on screen however you wish. Here is an example, in
|
||||
SpeedMeter_DrawTimeEntries */
|
||||
@@ -74,14 +107,21 @@ gSPDisplayList(OVERLAY_DISP++, gfx);
|
||||
GfxPrint_Open(&printer, gfx);
|
||||
|
||||
GfxPrint_SetColor(&printer, 255, 100, 0, 255);
|
||||
GfxPrint_SetPos(&printer, 33, 25);
|
||||
GfxPrint_Printf(&printer, "%5dV", gRSPGfxVertexCount);
|
||||
GfxPrint_SetPos(&printer, 33, 26);
|
||||
GfxPrint_Printf(&printer, "%5dt", gRSPGfxTriRequestCount);
|
||||
GfxPrint_SetPos(&printer, 33, 27);
|
||||
GfxPrint_Printf(&printer, "%5dT", gRSPGfxTriDrawCount);
|
||||
GfxPrint_SetPos(&printer, 33, 28);
|
||||
GfxPrint_Printf(&printer, "%5dR", gRSPGfxRectCount);
|
||||
if(f3dex3_version_CFG_PROFILING_A){
|
||||
|
||||
}else if(f3dex3_version_CFG_PROFILING_B){
|
||||
...
|
||||
}else if(f3dex3_version_CFG_PROFILING_C){
|
||||
...
|
||||
}else{
|
||||
GfxPrint_SetPos(&printer, 33, 25);
|
||||
GfxPrint_Printf(&printer, "%5dV", gRSPProfilingResults.def.vertexCount);
|
||||
GfxPrint_SetPos(&printer, 33, 26);
|
||||
GfxPrint_Printf(&printer, "%5dt", gRSPProfilingResults.def.rspInTriCount);
|
||||
GfxPrint_SetPos(&printer, 33, 27);
|
||||
GfxPrint_Printf(&printer, "%5dT", gRSPProfilingResults.def.rdpOutTriCount);
|
||||
...
|
||||
}
|
||||
|
||||
gfx = GfxPrint_Close(&printer);
|
||||
gSPEndDisplayList(gfx++);
|
||||
|
||||
44
f3dex3.s
44
f3dex3.s
@@ -63,11 +63,6 @@ ACC_LOWER equ 2
|
||||
// are removed, i.e. G_LIGHTTORDP behaves as a no-op and all tris are smooth
|
||||
// shaded.
|
||||
//
|
||||
ENABLE_PROFILING equ 0
|
||||
COUNTER_A_UPPER_VERTEX_COUNT equ 0
|
||||
COUNTER_B_LOWER_CMD_COUNT equ 0
|
||||
COUNTER_C_FIFO_FULL equ 1
|
||||
NEED_START_COUNTER_DMEM equ 0
|
||||
|
||||
// Config A TODO
|
||||
// perfCounterA:
|
||||
@@ -80,10 +75,14 @@ NEED_START_COUNTER_DMEM equ 0
|
||||
// perfCounterD:
|
||||
// cycles RSP spent processing triangle commands (incl. buffer flushes)
|
||||
.if CFG_PROFILING_A
|
||||
ENABLE_PROFILING equ 1
|
||||
COUNTER_B_LOWER_CMD_COUNT equ 1
|
||||
NEED_START_COUNTER_DMEM equ 1
|
||||
.if CFG_PROFILING_B || CFG_PROFILING_C
|
||||
.error "At most one CFG_PROFILING_ option can be enabled at a time"
|
||||
.endif
|
||||
ENABLE_PROFILING equ 1
|
||||
COUNTER_A_UPPER_VERTEX_COUNT equ 0
|
||||
COUNTER_B_LOWER_CMD_COUNT equ 1
|
||||
COUNTER_C_FIFO_FULL equ 1
|
||||
NEED_START_COUNTER_DMEM equ 1
|
||||
|
||||
// Config B TODO
|
||||
// perfCounterA:
|
||||
@@ -98,18 +97,19 @@ NEED_START_COUNTER_DMEM equ 1
|
||||
// perfCounterD:
|
||||
// upper 18 bits: overlay 3 (clipping) load count TODO
|
||||
// lower 14 bits: overlay 4 (misc) load count TODO
|
||||
.if CFG_PROFILING_B
|
||||
.if ENABLE_PROFILING
|
||||
.elseif CFG_PROFILING_B
|
||||
.if CFG_PROFILING_C
|
||||
.error "At most one CFG_PROFILING_ option can be enabled at a time"
|
||||
.endif
|
||||
ENABLE_PROFILING equ 1
|
||||
COUNTER_C_FIFO_FULL equ 0
|
||||
COUNTER_A_UPPER_VERTEX_COUNT equ 1
|
||||
.endif
|
||||
COUNTER_B_LOWER_CMD_COUNT equ 0
|
||||
COUNTER_C_FIFO_FULL equ 0
|
||||
NEED_START_COUNTER_DMEM equ 0
|
||||
|
||||
// Config C TODO
|
||||
// perfCounterA:
|
||||
// cycles RSP believes it was running
|
||||
// cycles RSP believes it was running (this ucode only)
|
||||
// perfCounterB:
|
||||
// upper 16 bits: samples GCLK was alive (sampled once per DL command count)
|
||||
// lower 16 bits: DL command count
|
||||
@@ -117,14 +117,12 @@ COUNTER_A_UPPER_VERTEX_COUNT equ 1
|
||||
// cycles RSP was stalled because RDP FIFO was full
|
||||
// perfCounterD:
|
||||
// cycles RSP was stalled waiting for miscellaneous DMAs to finish
|
||||
.if CFG_PROFILING_C
|
||||
.if ENABLE_PROFILING
|
||||
.error "At most one CFG_PROFILING_ option can be enabled at a time"
|
||||
.endif
|
||||
.elseif CFG_PROFILING_C
|
||||
ENABLE_PROFILING equ 1
|
||||
COUNTER_A_UPPER_VERTEX_COUNT equ 0
|
||||
COUNTER_B_LOWER_CMD_COUNT equ 1
|
||||
COUNTER_C_FIFO_FULL equ 1
|
||||
NEED_START_COUNTER_DMEM equ 1
|
||||
.endif
|
||||
|
||||
// Default (extra profiling disabled)
|
||||
// perfCounterA:
|
||||
@@ -137,9 +135,15 @@ NEED_START_COUNTER_DMEM equ 1
|
||||
// cycles RSP was stalled because RDP FIFO was full
|
||||
// perfCounterD:
|
||||
// unused/zero
|
||||
.if !ENABLE_PROFILING
|
||||
.else
|
||||
ENABLE_PROFILING equ 0
|
||||
COUNTER_A_UPPER_VERTEX_COUNT equ 1
|
||||
COUNTER_B_LOWER_CMD_COUNT equ 0
|
||||
COUNTER_C_FIFO_FULL equ 1
|
||||
NEED_START_COUNTER_DMEM equ 0
|
||||
|
||||
.endif
|
||||
.warning "TODO matrix count"
|
||||
|
||||
/*
|
||||
There are two different memory spaces for the overlays: (a) IMEM and (b) the
|
||||
@@ -915,7 +919,7 @@ call_ret_common:
|
||||
j displaylist_dma_with_count
|
||||
sb $1, displayListStackLength
|
||||
|
||||
.if !CFG_GCLK_SAMPLE
|
||||
.if !ENABLE_PROFILING
|
||||
G_LIGHTTORDP_handler:
|
||||
lbu $11, numLightsxSize // Ambient light
|
||||
lbu $1, (inputBufferEnd - 0x6)(inputBufferPos) // Byte 2 = light count from end * size
|
||||
|
||||
Reference in New Issue
Block a user