Files
HackerOoT/include/debug/profiler.h
Sauraen ae2010e9b5 New profiler with RSP and CPU tracing, replacing vanilla SPEEDMETER (#116)
* Working on it

* Loading ucode from ROM working

* Menu implemented

* Fixed ifdefs in debug

* Fixed crashing

* Cleanup

* Suppressed make auto generated messages

* Reorganized RSP wrapper assembly files

* Fixed merge bug

* Removing speed_meter

* Porting profiler

* Basics working

* Port basically complete

* Basic tracing working

* CPU tracing working

* Added colors to CPU trace

* Fixed some issues

* Profiler basically done

* Update F3DEX3 to resolve issues

* Cleanup

* Cleaned up things for profiler or F3DEX3 disabled

* Fixed a couple counter names
2024-05-27 18:38:44 +02:00

155 lines
5.5 KiB
C

#ifndef PROFILER_H
#define PROFILER_H
#define PROFILER_EVENT_COUNT 384
// These can't be an enum because they're used by asm.
#define PROFILER_EVENT_TYPE_MAINGFXSTART 0
#define PROFILER_EVENT_TYPE_RDPEND 1
#define PROFILER_EVENT_TYPE_RSPGFXLASTEND 2
#define PROFILER_EVENT_TYPE_MAINGFXEND 3
#define PROFILER_EVENT_TYPE_RSPGFXSTART 4
#define PROFILER_EVENT_TYPE_RSPGFXEND 5
#define PROFILER_EVENT_TYPE_RSPAUDIOSTART 6
#define PROFILER_EVENT_TYPE_RSPAUDIOEND 7
#define PROFILER_EVENT_TYPE_RSPOTHERSTART 8
#define PROFILER_EVENT_TYPE_RSPOTHEREND 9
#define PROFILER_EVENT_TYPE_THREADSTART 50
#define PROFILER_EVENT_TYPE_THREADEND 100
#ifdef _LANGUAGE_C
#if ENABLE_F3DEX3
typedef enum {
F3DEX3_PROF_DEF,
F3DEX3_PROF_A,
F3DEX3_PROF_B,
F3DEX3_PROF_C,
} F3DEX3ProfType;
typedef struct { /* Default performance counters, if no CFG_PROFILING_* is enabled */
/* Number of vertices processed by the RSP */
u16 vertexCount;
/* Number of tris actually drawn, after clipping and all types of culling */
u16 rdpOutTriCount;
/* Number of tris which processing started on the RSP (before clipping / culling) */
u32 rspInTriCount:18;
/* Number of fill rects and tex rects drawn */
u32 rectCount:14;
/* Number of cycles the RSP was stalled because the RDP FIFO was full */
u32 stallRDPFifoFullCycles;
/* Unused, zero */
u32 dummy;
} F3DEX3ProfilingDefault;
typedef struct { /* Counters for CFG_PROFILING_A */
/* Number of cycles the RSP spent processing vertex commands, including vertex DMAs */
u32 vertexProcCycles;
/* Number of display list commands fetched from DRAM, >= dlCommandCount */
u16 fetchedDLCommandCount;
/* Number of display list commands executed */
u16 dlCommandCount;
/* Number of cycles the RSP was stalled because the RDP FIFO was full */
u32 stallRDPFifoFullCycles;
/* Number of cycles the RSP spent processing triangle commands, NOT including buffer flushes (i.e. FIFO full) */
u32 triProcCycles;
} F3DEX3ProfilingA;
typedef struct { /* Counters for CFG_PROFILING_B */
/* Number of vertices processed by the RSP */
u16 vertexCount;
/* Number of vertices processed which had lighting enabled */
u16 litVertexCount;
/* Number of tris culled by the occlusion plane */
u32 occlusionPlaneCullCount:18;
/* Number of RSP/input triangles which got clipped */
u32 clippedTriCount:14;
/* Number of times any microcode overlay was loaded */
u32 allOverlayLoadCount:18;
/* Number of times overlay 2 (lighting) was loaded */
u32 lightingOverlayLoadCount:14;
/* Number of times overlay 3 (clipping) was loaded */
u32 clippingOverlayLoadCount:18;
/* Number of times overlay 4 (mIT matrix, matrix multiply, etc.) was loaded */
u32 miscOverlayLoadCount:14;
} F3DEX3ProfilingB;
typedef struct { /* Counters for CFG_PROFILING_C */
/* Total cycles F3DEX3 believes it was running, not including SPLoadUcode */
u32 ex3UcodeCycles;
/* The "GCLK is alive" bit of the RDP status is sampled once every time a
display list command is started. This counts the number of times that bit
was 1. Divide by dlCommandCount to get an approximate measurement of the
percentage of time the RDP was doing useful work, as opposed to waiting
for framebuffer / Z buffer memory transactions to complete. */
u16 commandsSampledGclkActive;
/* Number of display list commands executed */
u16 dlCommandCount;
/* Number of commands sent to the RDP except for triangle commands */
u32 smallRDPCommandCount:18;
/* Number of matrix loads, of any type */
u32 matrixCount:14;
/* Number of cycles the RSP was stalled waiting for any DMAs: vertex loads,
matrix loads, copying command buffers to the RDP FIFO, overlay loads, etc. */
u32 stallDMACycles;
} F3DEX3ProfilingC;
typedef struct {
union {
F3DEX3ProfilingDefault def;
F3DEX3ProfilingA a;
F3DEX3ProfilingB b;
F3DEX3ProfilingC c;
u64 dummy_alignment[2];
};
u32 taskdataptr; /* Not a perf counter, can ignore */
u32 ucode; /* Not a perf counter, can ignore */
} F3DEX3YieldDataFooter;
#endif
typedef struct {
// These fields are accessed via asm, so must maintain their layout.
OSTime eventTimes[PROFILER_EVENT_COUNT];
u8 eventTypes[PROFILER_EVENT_COUNT];
s32 numEvents;
// The rest are not
OSTime lastRSPStartTime;
OSTime traceStartTime;
OSTime traceEndTime;
u32 rdpClockCount; // counts clock (not gclk)
u32 rdpCmdCount; // counts cmd_busy "DP CMDBUF is not empty". RDP command shuffle FIFO not empty.
u32 rdpPipeCount; // counts pipe_busy, which is true until fullsync is complete.
u32 rdpTmemCount; // counts tmem_busy "DP TMEM is loading". Cycles valid tex data on copy bus, doesn't count waiting for RDRAM.
#if ENABLE_F3DEX3
F3DEX3YieldDataFooter footer;
s8 f3dex3Version;
#endif
} ProfilerState;
// These need to be triple buffered because otherwise either (a) active and last
// may be swapped while drawing, or (b) if we skip that swap if it would happen
// while drawing then one frame of data will be lost.
extern ProfilerState* activeProfilerState;
extern ProfilerState* lastProfilerState;
extern ProfilerState* drawProfilerState;
typedef enum {
PROFILER_MODE_DISABLE,
PROFILER_MODE_REAL_FPS,
PROFILER_MODE_VIRTUAL_FPS,
PROFILER_MODE_GFX,
PROFILER_MODE_GFX_TRACE,
PROFILER_MODE_CPU,
PROFILER_MODE_CPU_TRACE,
PROFILER_MODE_ALL_TRACE,
PROFILER_MODE_COUNT
} ProfilerMode;
extern u8 gProfilerMode;
#endif
#endif