mirror of
https://github.com/HackerN64/F3DEX3.git
synced 2026-01-21 10:37:45 -08:00
247 lines
15 KiB
HTML
247 lines
15 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=11"/>
|
|
<meta name="generator" content="Doxygen 1.11.0"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|
<title>F3DEX3: Performance Counters</title>
|
|
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="jquery.js"></script>
|
|
<script type="text/javascript" src="dynsections.js"></script>
|
|
<script type="text/javascript" src="clipboard.js"></script>
|
|
<link href="navtree.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="navtreedata.js"></script>
|
|
<script type="text/javascript" src="navtree.js"></script>
|
|
<script type="text/javascript" src="resize.js"></script>
|
|
<script type="text/javascript" src="cookie.js"></script>
|
|
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="search/searchdata.js"></script>
|
|
<script type="text/javascript" src="search/search.js"></script>
|
|
<script type="text/javascript" src="darkmode_toggle.js"></script>
|
|
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
|
<link href="doxygen-awesome.css" rel="stylesheet" type="text/css"/>
|
|
<link href="doxygen-awesome-sidebar-only.css" rel="stylesheet" type="text/css"/>
|
|
<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
|
|
</head>
|
|
<body>
|
|
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
|
<div id="titlearea">
|
|
<table cellspacing="0" cellpadding="0">
|
|
<tbody>
|
|
<tr id="projectrow">
|
|
<td id="projectalign">
|
|
<div id="projectname">F3DEX3
|
|
</div>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<!-- end header part -->
|
|
<!-- Generated by Doxygen 1.11.0 -->
|
|
<script type="text/javascript">
|
|
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */
|
|
var searchBox = new SearchBox("searchBox", "search/",'.html');
|
|
/* @license-end */
|
|
</script>
|
|
<script type="text/javascript">
|
|
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */
|
|
$(function() { codefold.init(0); });
|
|
/* @license-end */
|
|
</script>
|
|
<script type="text/javascript" src="menudata.js"></script>
|
|
<script type="text/javascript" src="menu.js"></script>
|
|
<script type="text/javascript">
|
|
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */
|
|
$(function() {
|
|
initMenu('',true,false,'search.php','Search',true);
|
|
$(function() { init_search(); });
|
|
});
|
|
/* @license-end */
|
|
</script>
|
|
<div id="main-nav"></div>
|
|
</div><!-- top -->
|
|
<div id="side-nav" class="ui-resizable side-nav-resizable">
|
|
<div id="nav-tree">
|
|
<div id="nav-tree-contents">
|
|
<div id="nav-sync" class="sync"></div>
|
|
</div>
|
|
</div>
|
|
<div id="splitbar" style="-moz-user-select:none;"
|
|
class="ui-resizable-handle">
|
|
</div>
|
|
</div>
|
|
<script type="text/javascript">
|
|
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */
|
|
$(function(){initNavTree('counters.html',''); initResizable(true); });
|
|
/* @license-end */
|
|
</script>
|
|
<div id="doc-content">
|
|
<!-- window showing the filter options -->
|
|
<div id="MSearchSelectWindow"
|
|
onmouseover="return searchBox.OnSearchSelectShow()"
|
|
onmouseout="return searchBox.OnSearchSelectHide()"
|
|
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
|
</div>
|
|
|
|
<!-- iframe showing the search results (closed by default) -->
|
|
<div id="MSearchResultsWindow">
|
|
<div id="MSearchResults">
|
|
<div class="SRPage">
|
|
<div id="SRIndex">
|
|
<div id="SRResults"></div>
|
|
<div class="SRStatus" id="Loading">Loading...</div>
|
|
<div class="SRStatus" id="Searching">Searching...</div>
|
|
<div class="SRStatus" id="NoMatches">No Matches</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div><div class="header">
|
|
<div class="headertitle"><div class="title">Performance Counters</div></div>
|
|
</div><!--header-->
|
|
<div class="contents">
|
|
<div class="textblock"><p>This example code is for HackerOoT. The structs and the general method of reading the counters will be the same for any game. The structs are valid though the other code is a little simplistic. A full implementation exists in HackerOoT, which includes a full CPU+RSP profiler with tracing, see <code>src/debug/profiler.c</code> and related files.</p>
|
|
<p>Build the microcode with one of the <code>CFG_PROFILING_*</code> options below to select one of these sets of performance counters, or without any <code>CFG_PROFILING_*</code> option for the default set. You can even include all the microcode versions in your game, and let the player/developer swap which one is used for a given frame in order to switch which set of performance counters they're seeing. If you want, you only need to keep the currently used one in RDRAM–you can load a different one from the cart over it when the user swaps.</p>
|
|
<p>For the options other than the default, the microcode uses the RDP's CLK counter for its own timing. You should clear this counter just before launching F3DEX3 on the RSP (in the graphics task setup); usually you'd also read the counter value, to optionally print on screen, after the RDP is finished. Make sure not to clear/modify the CLK counter while the RSP is running, or the profiling results may be garbage.</p>
|
|
<p>Note that all "cycles" counters reported by F3DEX3 are RCP cycles, at 62.5 MHz.</p>
|
|
<p>Finally, note that the implementation of the stallDMACycles counter in <code>CFG_PROFILING_C</code> is compatible with loading S2DEX via SPLoadUcode, but it may not be compatible with other microcodes. If you run into crashes when using <code>CFG_PROFILING_C</code> but not A or B or the default, contact Sauraen, as you will need a customized implementation based on the other microcode you are using.</p>
|
|
<p>In some header, needs to be accessible to variables.h: </p><div class="fragment"><div class="line">typedef struct { /* Default performance counters, if no CFG_PROFILING_* is enabled */</div>
|
|
<div class="line"> /* Number of vertices processed by the RSP */</div>
|
|
<div class="line"> u16 vertexCount;</div>
|
|
<div class="line"> /* Number of tris actually drawn, after clipping and all types of culling */</div>
|
|
<div class="line"> u16 rdpOutTriCount;</div>
|
|
<div class="line"> /* Number of tris which processing started on the RSP (before clipping / culling) */</div>
|
|
<div class="line"> u32 rspInTriCount:18;</div>
|
|
<div class="line"> /* Number of fill rects and tex rects drawn */</div>
|
|
<div class="line"> u32 rectCount:14;</div>
|
|
<div class="line"> /* Number of cycles the RSP was stalled because the RDP FIFO was full */</div>
|
|
<div class="line"> u32 stallRDPFifoFullCycles;</div>
|
|
<div class="line"> /* Unused, zero */</div>
|
|
<div class="line"> u32 dummy;</div>
|
|
<div class="line">} F3DEX3ProfilingDefault;</div>
|
|
<div class="line"> </div>
|
|
<div class="line">typedef struct { /* Counters for CFG_PROFILING_A */</div>
|
|
<div class="line"> /* Number of cycles the RSP spent processing vertex commands, including vertex DMAs */</div>
|
|
<div class="line"> u32 vertexProcCycles;</div>
|
|
<div class="line"> /* Number of display list commands fetched from DRAM, >= dlCommandCount */</div>
|
|
<div class="line"> u16 fetchedDLCommandCount;</div>
|
|
<div class="line"> /* Number of display list commands executed */</div>
|
|
<div class="line"> u16 dlCommandCount;</div>
|
|
<div class="line"> /* Number of cycles the RSP was stalled because the RDP FIFO was full */</div>
|
|
<div class="line"> u32 stallRDPFifoFullCycles;</div>
|
|
<div class="line"> /* Number of cycles the RSP spent processing triangle commands, NOT including buffer flushes (i.e. FIFO full) */</div>
|
|
<div class="line"> u32 triProcCycles;</div>
|
|
<div class="line">} F3DEX3ProfilingA;</div>
|
|
<div class="line"> </div>
|
|
<div class="line">typedef struct { /* Counters for CFG_PROFILING_B */</div>
|
|
<div class="line"> /* Number of vertices processed by the RSP */</div>
|
|
<div class="line"> u16 vertexCount;</div>
|
|
<div class="line"> /* Number of vertices processed which had lighting enabled */</div>
|
|
<div class="line"> u16 litVertexCount;</div>
|
|
<div class="line"> /* Number of tris culled by the occlusion plane */</div>
|
|
<div class="line"> u32 occlusionPlaneCullCount:18;</div>
|
|
<div class="line"> /* Number of RSP/input triangles which got clipped */</div>
|
|
<div class="line"> u32 clippedTriCount:14;</div>
|
|
<div class="line"> /* Number of times any microcode overlay was loaded */</div>
|
|
<div class="line"> u32 allOverlayLoadCount:18;</div>
|
|
<div class="line"> /* Number of times overlay 2 (lighting) was loaded */</div>
|
|
<div class="line"> u32 lightingOverlayLoadCount:14;</div>
|
|
<div class="line"> /* Number of times overlay 3 (clipping) was loaded */</div>
|
|
<div class="line"> u32 clippingOverlayLoadCount:18;</div>
|
|
<div class="line"> /* Number of times overlay 4 (mIT matrix, matrix multiply, etc.) was loaded */</div>
|
|
<div class="line"> u32 miscOverlayLoadCount:14;</div>
|
|
<div class="line">} F3DEX3ProfilingB;</div>
|
|
<div class="line"> </div>
|
|
<div class="line">typedef struct { /* Counters for CFG_PROFILING_C */</div>
|
|
<div class="line"> /* Total cycles F3DEX3 believes it was running, not including SPLoadUcode */</div>
|
|
<div class="line"> u32 ex3UcodeCycles;</div>
|
|
<div class="line"> /* The "GCLK is alive" bit of the RDP status is sampled once every time a</div>
|
|
<div class="line"> display list command is started. This counts the number of times that bit</div>
|
|
<div class="line"> was 1. Divide by dlCommandCount to get an approximate measurement of the</div>
|
|
<div class="line"> percentage of time the RDP was doing useful work, as opposed to waiting</div>
|
|
<div class="line"> for framebuffer / Z buffer memory transactions to complete. */</div>
|
|
<div class="line"> u16 commandsSampledGclkActive;</div>
|
|
<div class="line"> /* Number of display list commands executed */</div>
|
|
<div class="line"> u16 dlCommandCount;</div>
|
|
<div class="line"> /* Number of commands sent to the RDP except for triangle commands */</div>
|
|
<div class="line"> u32 smallRDPCommandCount:18;</div>
|
|
<div class="line"> /* Number of matrix loads, of any type */</div>
|
|
<div class="line"> u32 matrixCount:14;</div>
|
|
<div class="line"> /* Number of cycles the RSP was stalled waiting for any DMAs: vertex loads,</div>
|
|
<div class="line"> matrix loads, copying command buffers to the RDP FIFO, overlay loads, etc. */</div>
|
|
<div class="line"> u32 stallDMACycles;</div>
|
|
<div class="line">} F3DEX3ProfilingC;</div>
|
|
<div class="line"> </div>
|
|
<div class="line">typedef struct {</div>
|
|
<div class="line"> union {</div>
|
|
<div class="line"> F3DEX3ProfilingDefault def;</div>
|
|
<div class="line"> F3DEX3ProfilingA a;</div>
|
|
<div class="line"> F3DEX3ProfilingB b;</div>
|
|
<div class="line"> F3DEX3ProfilingC c;</div>
|
|
<div class="line"> u64 dummy_alignment[2];</div>
|
|
<div class="line"> };</div>
|
|
<div class="line"> u32 taskdataptr; /* Not a perf counter, can ignore */</div>
|
|
<div class="line"> u32 ucode; /* Not a perf counter, can ignore */</div>
|
|
<div class="line">} F3DEX3YieldDataFooter;</div>
|
|
</div><!-- fragment --><p>In variables.h with the ENABLE_SPEEDMETER section: </p><div class="fragment"><div class="line">extern volatile F3DEX3YieldDataFooter gRSPProfilingResults;</div>
|
|
</div><!-- fragment --><p>In the <code>true</code> codepath of Sched_TaskComplete: </p><div class="fragment"><div class="line">#ifdef ENABLE_SPEEDMETER</div>
|
|
<div class="line"> /* Fetch number of primitives drawn from yield data */</div>
|
|
<div class="line"> if(task->list.t.type == M_GFXTASK){</div>
|
|
<div class="line"> F3DEX3YieldDataFooter* footer = (F3DEX3YieldDataFooter*)(</div>
|
|
<div class="line"> (u8*)gGfxSPTaskYieldBuffer +</div>
|
|
<div class="line"> OS_YIELD_DATA_SIZE - sizeof(F3DEX3YieldDataFooter));</div>
|
|
<div class="line"> osInvalDCache(footer, sizeof(F3DEX3YieldDataFooter));</div>
|
|
<div class="line"> bcopy(footer, &gRSPProfilingResults, sizeof(F3DEX3YieldDataFooter));</div>
|
|
<div class="line"> /* The second invalidate is to get the footer out of the CPU cache,</div>
|
|
<div class="line"> because it could get written back and overwrite newer data in RAM */</div>
|
|
<div class="line"> osInvalDCache(footer, sizeof(F3DEX3YieldDataFooter));</div>
|
|
<div class="line"> }</div>
|
|
<div class="line">#endif</div>
|
|
</div><!-- fragment --><p>In speed_meter.c: </p><div class="fragment"><div class="line">volatile F3DEX3YieldDataFooter gRSPProfilingResults;</div>
|
|
</div><!-- fragment --><p>You can display them on screen however you wish. Here is an example, in SpeedMeter_DrawTimeEntries: </p><div class="fragment"><div class="line">GfxPrint printer;</div>
|
|
<div class="line">Gfx* opaStart;</div>
|
|
<div class="line">Gfx* gfx;</div>
|
|
<div class="line"> </div>
|
|
<div class="line">GfxPrint_Init(&printer);</div>
|
|
<div class="line">opaStart = POLY_OPA_DISP;</div>
|
|
<div class="line">gfx = Graph_GfxPlusOne(POLY_OPA_DISP);</div>
|
|
<div class="line">gSPDisplayList(OVERLAY_DISP++, gfx);</div>
|
|
<div class="line">GfxPrint_Open(&printer, gfx);</div>
|
|
<div class="line"> </div>
|
|
<div class="line">GfxPrint_SetColor(&printer, 255, 100, 0, 255);</div>
|
|
<div class="line">if(f3dex3_version_CFG_PROFILING_A){</div>
|
|
<div class="line"> ...</div>
|
|
<div class="line">}else if(f3dex3_version_CFG_PROFILING_B){</div>
|
|
<div class="line"> ...</div>
|
|
<div class="line">}else if(f3dex3_version_CFG_PROFILING_C){</div>
|
|
<div class="line"> ...</div>
|
|
<div class="line">}else{</div>
|
|
<div class="line"> GfxPrint_SetPos(&printer, 33, 25);</div>
|
|
<div class="line"> GfxPrint_Printf(&printer, "%5dV", gRSPProfilingResults.def.vertexCount);</div>
|
|
<div class="line"> GfxPrint_SetPos(&printer, 33, 26);</div>
|
|
<div class="line"> GfxPrint_Printf(&printer, "%5dt", gRSPProfilingResults.def.rspInTriCount);</div>
|
|
<div class="line"> GfxPrint_SetPos(&printer, 33, 27);</div>
|
|
<div class="line"> GfxPrint_Printf(&printer, "%5dT", gRSPProfilingResults.def.rdpOutTriCount);</div>
|
|
<div class="line"> ...</div>
|
|
<div class="line">}</div>
|
|
<div class="line"> </div>
|
|
<div class="line">gfx = GfxPrint_Close(&printer);</div>
|
|
<div class="line">gSPEndDisplayList(gfx++);</div>
|
|
<div class="line">Graph_BranchDlist(opaStart, gfx);</div>
|
|
<div class="line">POLY_OPA_DISP = gfx;</div>
|
|
</div><!-- fragment --> </div></div><!-- contents -->
|
|
</div><!-- PageDoc -->
|
|
</div><!-- doc-content -->
|
|
<!-- start footer part -->
|
|
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
|
|
<ul>
|
|
<li class="navelem"><a class="el" href="md_docs_2code.html">Code</a></li>
|
|
<li class="footer">Generated by <a href="https://www.doxygen.org/index.html"><img class="footer" src="doxygen.svg" width="104" height="31" alt="doxygen"/></a> 1.11.0 </li>
|
|
</ul>
|
|
</div>
|
|
</body>
|
|
</html>
|