From 031cc24fa67dc14b709914eb99e11275497c2c70 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy <68165316+Mr-Wiseguy@users.noreply.github.com> Date: Sun, 1 May 2022 00:42:58 -0400 Subject: [PATCH] New profiler (#390) * Implemented new lightweight CPU/RDP profiler * Integrated new profiler into puppyprint debug and improved profiler visuals, removed existing puppyprint profiling code --- include/config/config_debug.h | 4 + include/config/config_safeguards.h | 45 ++--- src/audio/load.c | 30 ---- src/boot/main.c | 33 ++-- src/boot/memory.c | 6 - src/engine/math_util.c | 6 - src/engine/surface_collision.c | 48 ----- src/engine/surface_load.c | 12 -- src/game/area.c | 12 +- src/game/camera.c | 8 - src/game/crash_screen.c | 6 - src/game/fasttext.c | 87 +++++++++ src/game/fasttext.h | 14 ++ src/game/game_init.c | 29 +-- src/game/hud.c | 9 +- src/game/newfont2_swapped.bin | Bin 0 -> 4032 bytes src/game/object_list_processor.c | 32 ++-- src/game/profiling.c | 279 +++++++++++++++++++++++++++++ src/game/profiling.h | 61 +++++++ src/game/puppyprint.c | 249 ++++++------------------- src/game/puppyprint.h | 37 +--- src/game/sound_init.c | 39 +--- 22 files changed, 578 insertions(+), 468 deletions(-) create mode 100644 src/game/fasttext.c create mode 100644 src/game/fasttext.h create mode 100644 src/game/newfont2_swapped.bin create mode 100644 src/game/profiling.c create mode 100644 src/game/profiling.h diff --git a/include/config/config_debug.h b/include/config/config_debug.h index 1d5b3b88..fc5ac8e9 100644 --- a/include/config/config_debug.h +++ b/include/config/config_debug.h @@ -10,6 +10,10 @@ // Disables all debug options (except PUPPYPRINT). #define DISABLE_ALL +// Enables a comprehensive standalone profiler. Automatically enabled by PUPPYPRINT_DEBUG. +// If not using PUPPYPRINT_DEBUG, press L to toggle the profiler. +// #define USE_PROFILER + // TEST LEVEL // Uncomment this define and set a test level in order to boot straight into said level. // This allows you to quickly test the level you're working on. diff --git a/include/config/config_safeguards.h b/include/config/config_safeguards.h index d7729686..299226ce 100644 --- a/include/config/config_safeguards.h +++ b/include/config/config_safeguards.h @@ -33,15 +33,21 @@ * config_debug.h */ -#ifdef PUPPYPRINT_DEBUG - #undef PUPPYPRINT - #define PUPPYPRINT -#endif // PUPPYPRINT_DEBUG - -#ifdef COMPLETE_SAVE_FILE +#ifdef DISABLE_ALL + #undef DEBUG_ALL + #undef TEST_LEVEL + #undef DEBUG_LEVEL_SELECT + #undef ENABLE_DEBUG_FREE_MOVE + #undef VANILLA_DEBUG + #undef VANILLA_STYLE_CUSTOM_DEBUG + #undef PUPPYPRINT_DEBUG + #undef PUPPYPRINT_DEBUG_CYCLES + #undef VISUAL_DEBUG #undef UNLOCK_ALL - #define UNLOCK_ALL -#endif // COMPLETE_SAVE_FILE + #undef COMPLETE_SAVE_FILE + #undef DEBUG_FORCE_CRASH_ON_BOOT + #undef USE_PROFILER +#endif // DISABLE_ALL #ifdef DEBUG_ALL #undef DEBUG_LEVEL_SELECT @@ -66,20 +72,17 @@ #define COMPLETE_SAVE_FILE #endif // DEBUG_ALL -#ifdef DISABLE_ALL - #undef DEBUG_ALL - #undef TEST_LEVEL - #undef DEBUG_LEVEL_SELECT - #undef ENABLE_DEBUG_FREE_MOVE - #undef VANILLA_DEBUG - #undef VANILLA_STYLE_CUSTOM_DEBUG - #undef PUPPYPRINT_DEBUG - #undef PUPPYPRINT_DEBUG_CYCLES - #undef VISUAL_DEBUG +#ifdef PUPPYPRINT_DEBUG + #undef PUPPYPRINT + #define PUPPYPRINT + #undef USE_PROFILER + #define USE_PROFILER +#endif // PUPPYPRINT_DEBUG + +#ifdef COMPLETE_SAVE_FILE #undef UNLOCK_ALL - #undef COMPLETE_SAVE_FILE - #undef DEBUG_FORCE_CRASH_ON_BOOT -#endif // DISABLE_ALL + #define UNLOCK_ALL +#endif // COMPLETE_SAVE_FILE /***************** diff --git a/src/audio/load.c b/src/audio/load.c index 268fbf2a..f484871a 100644 --- a/src/audio/load.c +++ b/src/audio/load.c @@ -137,14 +137,8 @@ u8 audioString49[] = "BANK LOAD MISS! FOR %d\n"; * Performs an asynchronus (normal priority) DMA copy */ void audio_dma_copy_async(uintptr_t devAddr, void *vAddr, size_t nbytes, OSMesgQueue *queue, OSIoMesg *mesg) { -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif osInvalDCache(vAddr, nbytes); osPiStartDma(mesg, OS_MESG_PRI_NORMAL, OS_READ, devAddr, vAddr, nbytes, queue); -#if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); -#endif } /** @@ -152,18 +146,12 @@ void audio_dma_copy_async(uintptr_t devAddr, void *vAddr, size_t nbytes, OSMesgQ * to 0x1000 bytes transfer at once. */ void audio_dma_partial_copy_async(uintptr_t *devAddr, u8 **vAddr, ssize_t *remaining, OSMesgQueue *queue, OSIoMesg *mesg) { -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif ssize_t transfer = MIN(*remaining, 0x1000); *remaining -= transfer; osInvalDCache(*vAddr, transfer); osPiStartDma(mesg, OS_MESG_PRI_NORMAL, OS_READ, *devAddr, *vAddr, transfer, queue); *devAddr += transfer; *vAddr += transfer; -#if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); -#endif } void decrease_sample_dma_ttls() { @@ -208,9 +196,6 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { u32 i; u32 dmaIndex; ssize_t bufferPos; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif if (arg2 != 0 || *dmaIndexRef >= sSampleDmaListSize1) { for (i = sSampleDmaListSize1; i < gSampleDmaNumListItems; i++) { @@ -235,9 +220,6 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { } dma->ttl = 60; *dmaIndexRef = (u8) i; -#if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); -#endif #if defined(VERSION_EU) return &dma->buffer[(devAddr - dma->source)]; #else @@ -277,14 +259,8 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { } dma->ttl = 2; #if defined(VERSION_EU) - #if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); - #endif return dma->buffer + (devAddr - dma->source); #else - #if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); - #endif return (devAddr - dma->source) + dma->buffer; #endif } @@ -310,18 +286,12 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { osPiStartDma(&gCurrAudioFrameDmaIoMesgBufs[gCurrAudioFrameDmaCount++], OS_MESG_PRI_NORMAL, OS_READ, dmaDevAddr, dma->buffer, transfer, &gCurrAudioFrameDmaQueue); *dmaIndexRef = dmaIndex; -#if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); -#endif return (devAddr - dmaDevAddr) + dma->buffer; #else gCurrAudioFrameDmaCount++; osPiStartDma(&gCurrAudioFrameDmaIoMesgBufs[gCurrAudioFrameDmaCount - 1], OS_MESG_PRI_NORMAL, OS_READ, dmaDevAddr, dma->buffer, transfer, &gCurrAudioFrameDmaQueue); *dmaIndexRef = dmaIndex; -#if PUPPYPRINT_DEBUG - dmaAudioTime[perfIteration] += (osGetTime() - first); -#endif return dma->buffer + (devAddr - dmaDevAddr); #endif } diff --git a/src/boot/main.c b/src/boot/main.c index d673fa05..c3f82d02 100644 --- a/src/boot/main.c +++ b/src/boot/main.c @@ -19,6 +19,7 @@ #endif #include "game/puppyprint.h" #include "game/puppylights.h" +#include "game/profiling.h" // Message IDs enum MessageIDs { @@ -185,10 +186,8 @@ void start_gfx_sptask(void) { if (gActiveSPTask == NULL && sCurrentDisplaySPTask != NULL && sCurrentDisplaySPTask->state == SPTASK_STATE_NOT_STARTED) { -#if PUPPYPRINT_DEBUG - rspDelta = osGetTime(); -#endif start_sptask(M_GFXTASK); + profiler_rsp_started(PROFILER_RSP_GFX); } } @@ -220,15 +219,14 @@ void handle_vblank(void) { } else { pretend_audio_sptask_done(); } + profiler_rsp_started(PROFILER_RSP_AUDIO); } } else { if (gActiveSPTask == NULL && sCurrentDisplaySPTask != NULL && sCurrentDisplaySPTask->state != SPTASK_STATE_FINISHED) { -#if PUPPYPRINT_DEBUG - rspDelta = osGetTime(); -#endif start_sptask(M_GFXTASK); + profiler_rsp_started(PROFILER_RSP_GFX); } } #if ENABLE_RUMBLE @@ -253,9 +251,9 @@ void handle_sp_complete(void) { // The gfx task completed before we had time to interrupt it. // Mark it finished, just like below. curSPTask->state = SPTASK_STATE_FINISHED; -#if PUPPYPRINT_DEBUG - profiler_update(rspGenTime, rspDelta); -#endif + profiler_rsp_completed(PROFILER_RSP_GFX); + } else { + profiler_rsp_yielded(); } // Start the audio task, as expected by handle_vblank. @@ -264,12 +262,19 @@ void handle_sp_complete(void) { } else { pretend_audio_sptask_done(); } + profiler_rsp_started(PROFILER_RSP_AUDIO); } else { curSPTask->state = SPTASK_STATE_FINISHED; if (curSPTask->task.t.type == M_AUDTASK) { + profiler_rsp_completed(PROFILER_RSP_AUDIO); // After audio tasks come gfx tasks. if ((sCurrentDisplaySPTask != NULL) && (sCurrentDisplaySPTask->state != SPTASK_STATE_FINISHED)) { + if (sCurrentDisplaySPTask->state == SPTASK_STATE_INTERRUPTED) { + profiler_rsp_resumed(); + } else { + profiler_rsp_started(PROFILER_RSP_GFX); + } start_sptask(M_GFXTASK); } sCurrentAudioSPTask = NULL; @@ -280,9 +285,7 @@ void handle_sp_complete(void) { // The SP process is done, but there is still a Display Processor notification // that needs to arrive before we can consider the task completely finished and // null out sCurrentDisplaySPTask. That happens in handle_dp_complete. -#if PUPPYPRINT_DEBUG - profiler_update(rspGenTime, rspDelta); -#endif + profiler_rsp_completed(PROFILER_RSP_GFX); } } } @@ -326,9 +329,6 @@ void thread3_main(UNUSED void *arg) { while (TRUE) { OSMesg msg; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif osRecvMesg(&gIntrMesgQueue, &msg, OS_MESG_BLOCK); switch ((uintptr_t) msg) { case MESG_VI_VBLANK: @@ -347,9 +347,6 @@ void thread3_main(UNUSED void *arg) { handle_nmi_request(); break; } -#if PUPPYPRINT_DEBUG - profiler_update(taskTime, first); -#endif } } diff --git a/src/boot/memory.c b/src/boot/memory.c index 4c3bd940..5abf5c72 100644 --- a/src/boot/memory.c +++ b/src/boot/memory.c @@ -259,9 +259,6 @@ u32 main_pool_pop_state(void) { */ void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { u32 size = ALIGN16(srcEnd - srcStart); -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif osInvalDCache(dest, size); while (size != 0) { @@ -275,9 +272,6 @@ void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { srcStart += copySize; size -= copySize; } -#if PUPPYPRINT_DEBUG - dmaTime[perfIteration] += (osGetTime() - first); -#endif } /** diff --git a/src/engine/math_util.c b/src/engine/math_util.c index e7fd2f31..7732eec7 100644 --- a/src/engine/math_util.c +++ b/src/engine/math_util.c @@ -1377,9 +1377,6 @@ void find_surface_on_ray_list(struct SurfaceNode *list, Vec3f orig, Vec3f dir, f f32 length; Vec3f chk_hit_pos; f32 top, bottom; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif // Get upper and lower bounds of ray if (dir[1] >= 0.0f) { // Ray is upwards. @@ -1403,9 +1400,6 @@ void find_surface_on_ray_list(struct SurfaceNode *list, Vec3f orig, Vec3f dir, f *max_length = length; } } -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif } void find_surface_on_ray_cell(s32 cellX, s32 cellZ, Vec3f orig, Vec3f normalized_dir, f32 dir_length, struct Surface **hit_surface, Vec3f hit_pos, f32 *max_length, s32 flags) { diff --git a/src/engine/surface_collision.c b/src/engine/surface_collision.c index 01f0a3c1..73bb7034 100644 --- a/src/engine/surface_collision.c +++ b/src/engine/surface_collision.c @@ -175,16 +175,10 @@ s32 find_wall_collisions(struct WallCollisionData *colData) { s32 numCollisions = 0; s32 x = colData->x; s32 z = colData->z; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif colData->numWalls = 0; if (is_outside_level_bounds(x, z)) { -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif return numCollisions; } @@ -207,9 +201,6 @@ s32 find_wall_collisions(struct WallCollisionData *colData) { // Increment the debug tracker. gNumCalls.wall++; #endif -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif return numCollisions; } @@ -326,18 +317,12 @@ static struct Surface *find_ceil_from_list(struct SurfaceNode *surfaceNode, s32 f32 find_ceil(f32 posX, f32 posY, f32 posZ, struct Surface **pceil) { f32 height = CELL_HEIGHT_LIMIT; f32 dynamicHeight = CELL_HEIGHT_LIMIT; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif s32 x = posX; s32 y = posY; s32 z = posZ; *pceil = NULL; if (is_outside_level_bounds(x, z)) { -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += (osGetTime() - first); -#endif return height; } @@ -379,9 +364,6 @@ f32 find_ceil(f32 posX, f32 posY, f32 posZ, struct Surface **pceil) { // Increment the debug tracker. gNumCalls.ceil++; #endif -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif return height; } @@ -563,10 +545,6 @@ f32 unused_find_dynamic_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfl * Find the highest floor under a given position and return the height. */ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) { -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif - f32 height = FLOOR_LOWER_LIMIT; f32 dynamicHeight = FLOOR_LOWER_LIMIT; @@ -580,9 +558,6 @@ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) { *pfloor = NULL; if (is_outside_level_bounds(x, z)) { -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += (osGetTime() - first); -#endif return height; } // Each level is split into cells to limit load, find the appropriate cell. @@ -626,9 +601,6 @@ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) { #ifdef VANILLA_DEBUG // Increment the debug tracker. gNumCalls.floor++; -#endif -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += (osGetTime() - first); #endif return height; } @@ -682,9 +654,6 @@ s32 find_water_level_and_floor(s32 x, s32 y, s32 z, struct Surface **pfloor) { s32 loX, hiX, loZ, hiZ; TerrainData *p = gEnvironmentRegions; struct Surface *floor = NULL; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif s32 waterLevel = find_water_floor(x, y, z, &floor); if (p != NULL && waterLevel == FLOOR_LOWER_LIMIT) { @@ -710,9 +679,6 @@ s32 find_water_level_and_floor(s32 x, s32 y, s32 z, struct Surface **pfloor) { *pfloor = floor; } -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += (osGetTime() - first); -#endif return waterLevel; } @@ -724,9 +690,6 @@ s32 find_water_level(s32 x, s32 z) { // TODO: Allow y pos s32 loX, hiX, loZ, hiZ; TerrainData *p = gEnvironmentRegions; struct Surface *floor = NULL; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif s32 waterLevel = find_water_floor(x, ((gCollisionFlags & COLLISION_FLAG_CAMERA) ? gLakituState.pos[1] : gMarioState->pos[1]), z, &floor); if ((p != NULL) && (waterLevel == FLOOR_LOWER_LIMIT)) { @@ -750,10 +713,6 @@ s32 find_water_level(s32 x, s32 z) { // TODO: Allow y pos } } -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif - return waterLevel; } @@ -765,9 +724,6 @@ s32 find_poison_gas_level(s32 x, s32 z) { s32 loX, hiX, loZ, hiZ; s32 gasLevel = FLOOR_LOWER_LIMIT; TerrainData *p = gEnvironmentRegions; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif if (p != NULL) { s32 numRegions = *p++; @@ -794,10 +750,6 @@ s32 find_poison_gas_level(s32 x, s32 z) { } } -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif - return gasLevel; } diff --git a/src/engine/surface_load.c b/src/engine/surface_load.c index 1553bab4..786c97f0 100644 --- a/src/engine/surface_load.c +++ b/src/engine/surface_load.c @@ -470,9 +470,6 @@ u32 get_area_terrain_size(TerrainData *data) { void load_area_terrain(s32 index, TerrainData *data, RoomData *surfaceRooms, s16 *macroObjects) { s32 terrainLoadType; TerrainData *vertexData = NULL; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif // Initialize the data for this. gEnvironmentRegions = NULL; @@ -519,9 +516,6 @@ void load_area_terrain(s32 index, TerrainData *data, RoomData *surfaceRooms, s16 gNumStaticSurfaceNodes = gSurfaceNodesAllocated; gNumStaticSurfaces = gSurfacesAllocated; -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif } /** @@ -646,9 +640,6 @@ static void get_optimal_coll_dist(struct Object *obj) { */ void load_object_collision_model(void) { TerrainData vertexData[600]; -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif TerrainData *collisionData = o->collisionData; f32 marioDist = o->oDistanceToMario; @@ -686,7 +677,4 @@ void load_object_collision_model(void) { } } COND_BIT((marioDist < o->oDrawingDistance), o->header.gfx.node.flags, GRAPH_RENDER_ACTIVE); -#if PUPPYPRINT_DEBUG - collisionTime[perfIteration] += osGetTime() - first; -#endif } diff --git a/src/game/area.c b/src/game/area.c index f52c3135..70e90707 100644 --- a/src/game/area.c +++ b/src/game/area.c @@ -25,6 +25,7 @@ #include "puppyprint.h" #include "debug_box.h" #include "engine/colors.h" +#include "profiling.h" struct SpawnInfo gPlayerSpawnInfos[1]; struct GraphNode *gGraphNodePointers[MODEL_ID_COUNT]; @@ -375,10 +376,6 @@ void play_transition_after_delay(s16 transType, s16 time, u8 red, u8 green, u8 b } void render_game(void) { -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); - OSTime colTime = collisionTime[perfIteration]; -#endif if (gCurrentArea != NULL && !gWarpTransition.pauseRendering) { if (gCurrentArea->graphNode) { geo_process_root(gCurrentArea->graphNode, gViewportOverride, gViewportClip, gFBSetColor); @@ -434,11 +431,10 @@ void render_game(void) { gViewportOverride = NULL; gViewportClip = NULL; - + + profiler_update(PROFILER_TIME_GFX); + profiler_print_times(); #if PUPPYPRINT_DEBUG - profiler_update(graphTime, first); - graphTime[perfIteration] -= (collisionTime[perfIteration] - colTime); - // graphTime[perfIteration] -= profilerTime[perfIteration]; //! Graph time is inaccurate and wrongly reaches 0 sometimes puppyprint_render_profiler(); #endif } diff --git a/src/game/camera.c b/src/game/camera.c index 819f235c..21d40773 100644 --- a/src/game/camera.c +++ b/src/game/camera.c @@ -2871,10 +2871,6 @@ void update_lakitu(struct Camera *c) { * Gets controller input, checks for cutscenes, handles mode changes, and moves the camera */ void update_camera(struct Camera *c) { -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); - OSTime colTime = collisionTime[perfIteration]; -#endif gCamera = c; update_camera_hud_status(c); if (c->cutscene == CUTSCENE_NONE @@ -3117,10 +3113,6 @@ void update_camera(struct Camera *c) { } #endif gLakituState.lastFrameAction = sMarioCamState->action; -#if PUPPYPRINT_DEBUG - profiler_update(cameraTime, first); - cameraTime[perfIteration] -= collisionTime[perfIteration]-colTime; -#endif } /** diff --git a/src/game/crash_screen.c b/src/game/crash_screen.c index 6214e8fb..f95b0286 100644 --- a/src/game/crash_screen.c +++ b/src/game/crash_screen.c @@ -402,9 +402,6 @@ void thread2_crash_screen(UNUSED void *arg) { osSetEventMesg(OS_EVENT_CPU_BREAK, &gCrashScreen.mesgQueue, (OSMesg) 1); osSetEventMesg(OS_EVENT_FAULT, &gCrashScreen.mesgQueue, (OSMesg) 2); while (TRUE) { -#if PUPPYPRINT_DEBUG - OSTime first = osGetTime(); -#endif if (thread == NULL) { osRecvMesg(&gCrashScreen.mesgQueue, &mesg, 1); thread = get_crashed_thread(); @@ -433,9 +430,6 @@ void thread2_crash_screen(UNUSED void *arg) { read_controller_inputs(THREAD_2_CRASH_SCREEN); draw_crash_screen(thread); } -#if PUPPYPRINT_DEBUG - profiler_update(faultTime, first); -#endif } } diff --git a/src/game/fasttext.c b/src/game/fasttext.c new file mode 100644 index 00000000..3cb5bf9c --- /dev/null +++ b/src/game/fasttext.c @@ -0,0 +1,87 @@ +// This file is a modification of a file from https://github.com/danbolt/n64-jam-1, which was licensed under the MPL-2.0 License +// See the original repo for more details. + +#include + +#define TEX_ASCII_START '!' +#define TAB_WIDTH 16 + +#define G_CC_TEXT PRIMITIVE, 0, TEXEL0, 0, 0, 0, 0, TEXEL0 + +__asm__( + ".section \".rodata\", \"a\", @progbits\n" + ".global fast_font\n" + "fast_font:\n" + ".incbin \"src/game/newfont2_swapped.bin\"\n" + ".previous\n" +); + +extern u8 fast_font[]; + +int computeS(unsigned char letter) { + int idx = letter; + if (letter > 'z') { + idx -= (3 + 2 + 3 + 1 + 3); + } else if (letter > '^') { + idx -= (2 + 3 + 1 + 3); + } else if (letter > 'Z') { + idx -= (3 + 1 + 3); + } else if (letter > '?') { + idx -= (1 + 3); + } else if (letter > ';') { + idx -= (3); + } + + return (idx - TEX_ASCII_START) * 8; +} + +static const u8 fast_text_font_kerning[] = { + /* */ 2, /*!*/ 4, /*"*/ 5, /*#*/ 0, /*$*/ 0, /*%*/ 7, /*&*/ 7, /*'*/ 2, /*(*/ 5, /*)*/ 5, /***/ 0, /*+*/ 7, /*,*/ 3, /*-*/ 7, /*.*/ 3, /*/*/ 7, + /*0*/ 7, /*1*/ 7, /*2*/ 7, /*3*/ 7, /*4*/ 7, /*5*/ 7, /*6*/ 7, /*7*/ 7, /*8*/ 7, /*9*/ 7, /*:*/ 3, /*;*/ 3, /*<*/ 0, /*=*/ 0, /*>*/ 0, /*?*/ 7, + /*@*/ 0, /*A*/ 7, /*B*/ 7, /*C*/ 7, /*D*/ 7, /*E*/ 7, /*F*/ 7, /*G*/ 7, /*H*/ 7, /*I*/ 6, /*J*/ 5, /*K*/ 7, /*L*/ 7, /*M*/ 7, /*N*/ 7, /*O*/ 7, + /*P*/ 7, /*Q*/ 7, /*R*/ 7, /*S*/ 7, /*T*/ 7, /*U*/ 7, /*V*/ 7, /*W*/ 7, /*X*/ 7, /*Y*/ 7, /*Z*/ 7, /*[*/ 0, /*\*/ 0, /*]*/ 0, /*^*/ 6, /*_*/ 0, + /*`*/ 0, /*a*/ 6, /*b*/ 6, /*c*/ 6, /*d*/ 6, /*e*/ 6, /*f*/ 6, /*g*/ 6, /*h*/ 6, /*i*/ 2, /*j*/ 6, /*k*/ 5, /*l*/ 3, /*m*/ 6, /*n*/ 6, /*o*/ 6, + /*p*/ 6, /*q*/ 6, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 6, /*w*/ 6, /*x*/ 6, /*y*/ 6, /*z*/ 6, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 7, +}; + +void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int g, int b) { + int i = 0; + int xPos = x; + int yPos = y; + int s = 0; + Gfx *dlHead = *dl; + + gDPLoadTextureBlock_4bS(dlHead++, fast_font, G_IM_FMT_IA, 672, 12, 0, G_TX_MIRROR | G_TX_WRAP, G_TX_MIRROR | G_TX_WRAP, G_TX_NOMASK, G_TX_NOMASK, G_TX_NOLOD, G_TX_NOLOD); + gDPSetPrimColor(dlHead++, 0, 0, r, g, b, 255); + gDPSetCombineMode(dlHead++, G_CC_TEXT, G_CC_TEXT); + gDPPipeSync(dlHead++); + + while (string[i] != '\0') { + unsigned int cur_char = string[i]; + + if (cur_char == '\n') { + xPos = x; + yPos += 12; + i++; + continue; + } + + if (cur_char == '\t') { + int xDist = xPos - x + 1; + int tabCount = (xDist + TAB_WIDTH - 1) / TAB_WIDTH; + xPos = tabCount * TAB_WIDTH + x; + } else { + if (cur_char != ' ') { + s = computeS(cur_char); + gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, s << 5, 0, 1 << 10, 1 << 10); + } + xPos += fast_text_font_kerning[cur_char - ' ']; + } + + i++; + } + gDPSetPrimColor(dlHead++, 0, 0, 255, 255, 255, 255); + gDPPipeSync(dlHead++); + + *dl = dlHead; +} diff --git a/src/game/fasttext.h b/src/game/fasttext.h new file mode 100644 index 00000000..9842d164 --- /dev/null +++ b/src/game/fasttext.h @@ -0,0 +1,14 @@ +#ifndef __FASTTEXT_H__ +#define __FASTTEXT_H__ + +void drawSmallString_impl(Gfx**, int, int, const char*, int, int , int); + +static inline void drawSmallString(Gfx **dl, int x, int y, const char* string) { + drawSmallString_impl(dl, x, y, string, 255, 255, 255); +} + +static inline void drawSmallStringCol(Gfx **dl, int x, int y, const char* string, int r, int g, int b) { + drawSmallString_impl(dl, x, y, string, r, g, b); +} + +#endif diff --git a/src/game/game_init.c b/src/game/game_init.c index 8ab354f8..3fd9ff70 100644 --- a/src/game/game_init.c +++ b/src/game/game_init.c @@ -29,6 +29,7 @@ #include "puppycam2.h" #include "debug_box.h" #include "vc_check.h" +#include "profiling.h" // First 3 controller slots struct Controller gControllers[3]; @@ -729,10 +730,6 @@ void setup_game_memory(void) { * Main game loop thread. Runs forever as long as the game continues. */ void thread5_game_loop(UNUSED void *arg) { -#if PUPPYPRINT_DEBUG - OSTime lastTime = 0; -#endif - setup_game_memory(); #if ENABLE_RUMBLE init_rumble_pak_scheduler_queue(); @@ -762,19 +759,12 @@ void thread5_game_loop(UNUSED void *arg) { render_init(); while (TRUE) { + profiler_frame_setup(); // If the reset timer is active, run the process to reset the game. if (gResetTimer != 0) { draw_reset_bars(); continue; } -#if PUPPYPRINT_DEBUG - while (TRUE) { - lastTime = osGetTime(); - collisionTime[perfIteration] = 0; - // graphTime[perfIteration] = 0; - behaviourTime[perfIteration] = 0; - dmaTime[perfIteration] = 0; -#endif // If any controllers are plugged in, start read the data for when // read_controller_inputs is called later. @@ -788,25 +778,12 @@ void thread5_game_loop(UNUSED void *arg) { audio_game_loop_tick(); select_gfx_pool(); read_controller_inputs(THREAD_5_GAME_LOOP); + profiler_update(PROFILER_TIME_CONTROLLERS); addr = level_script_execute(addr); #if !PUPPYPRINT_DEBUG && defined(VISUAL_DEBUG) debug_box_input(); #endif #if PUPPYPRINT_DEBUG - profiler_update(scriptTime, lastTime); - scriptTime[perfIteration] -= profilerTime[perfIteration]; - scriptTime[perfIteration] -= profilerTime2[perfIteration]; - if (benchmarkLoop > 0 && benchOption == 0) { - benchmarkLoop--; - benchMark[benchmarkLoop] = (osGetTime() - lastTime); - if (benchmarkLoop == 0) { - puppyprint_profiler_finished(); - break; - } - } else { - break; - } - } puppyprint_profiler_process(); #endif diff --git a/src/game/hud.c b/src/game/hud.c index 93f7fa3f..2138e067 100644 --- a/src/game/hud.c +++ b/src/game/hud.c @@ -52,6 +52,9 @@ u8 curFrameTimeIndex = 0; #include "PR/os_convert.h" +#ifdef USE_PROFILER +float profiler_get_fps(); +#else // Call once per frame f32 calculate_and_update_fps() { OSTime newTime = osGetTime(); @@ -64,9 +67,14 @@ f32 calculate_and_update_fps() { } return ((f32)FRAMETIME_COUNT * 1000000.0f) / (s32)OS_CYCLES_TO_USEC(newTime - oldTime); } +#endif void print_fps(s32 x, s32 y) { +#ifdef USE_PROFILER + f32 fps = profiler_get_fps(); +#else f32 fps = calculate_and_update_fps(); +#endif char text[14]; sprintf(text, "FPS %2.2f", fps); @@ -75,7 +83,6 @@ void print_fps(s32 x, s32 y) { #else print_text(x, y, text); #endif - } // ------------ END OF FPS COUNER ----------------- diff --git a/src/game/newfont2_swapped.bin b/src/game/newfont2_swapped.bin new file mode 100644 index 0000000000000000000000000000000000000000..72cf2c53ed95e374a692e3647832d7c2fdec7223 GIT binary patch literal 4032 zcmZQzU|_S;1Hd#`Lue?74Z@+J5H^Sg=>&;^ zFo;c0%nEfl$fXc_p{&r*01yj?8NjIqMnY6#BU!P8L+Jm|uOPLcumoY&{{al3Fa@ze zIR0yB0PENI08m)6ehpv%sRoII(kw_H9J9s;g#P~@${PQb0Yrl^L+Jmn@&EsS1+if? zhz$yEF#i8P6l6|lXec-yKr|@KK^PR?AR2^0@eQKEdO#`>@dM%ofb=2MKvZIoU%!HK z7&uKp+zj#&D4qO=i$g>peuc0hDzTBQ;84RR!@v+9`X8P?L3te%rl2qbVQ^S}jSm2Y zBPjkLVqhmQFvKGajsF_|m4WqZ0EiC#|CNC?{{L4H8-hbaK{SK}X^RJkGe~V zG$^b=;SR=MOlz?9A=>O85;i;6n3Hi!Qlqxfzv9? zk6%Gy3c`>u1L;FxuugFJgTz3ZS;4&jq3|#TnF}`yqzZ&X!5Tp<7zW1=C|*MUgF`Iz z|JQ){|6c<@bm;&8U^*U5{||_d{|e573=HuBp!^96OHla3)I(^99+2Mn|6f5W13;-6 zj6=aBC|!UP8z_uG7!)?3Fa@ze7*rmB${mpTAUTj2EYpC}5l9R~gZ#h%3h>bXp`dgN z;MAQa*Qa2SF7j)Xz!6C@iCD%U`M0EN`o|KRY& zR391u77GC7SxB0PhGak}v`hh|K~Ok>FeofRVFY4>Fxalpui%UgvJ(`RU~y0q3ljVHtd`4wDtf*cADyHH4YfWjCYrUBrb9uOZ7 zO>>}d3I)~8&~OCtL1hJq2B{7G52>fW{s*T^P}&4xkR6~j2ntIO#z>!`;4%Z8K0$R7 zD9k|WK=m@nPaypu8YBny11SAK>vV9){QnQGH$eUc$%8ORJqSb0fRwRdaZnxx>Ho?A zDia_v0yPU9ieNb~3tU%$%mC{HrF(FB0%==-(qufS&Vu9%P?-X8|NsAJ_I?GIpP`_34kQ(W(<7*!0);&&kAl-Rs2%wqkuE{;1Wu#>A?*cF7=bV-e2~*8 zs2u`okb=SyBnMIh4m+?5LHPqD3UU)DzCnCY4Fzu5z}ugowhst{!Ve?^W`pW2P`E+r zLy#s=y$NpXf;b=yQU}5yHYSGH2@+=omD!-87SslY(jakgD-2W`LB&8WMXs+QW`pW{ zP>l}4p`f-Gxb6ql*Pw6!s}BH~1PXDm|G=$NkY2EVKrPo$P&*RT*8#PFz-r+2JIGED z28A04gV-<(&gY?!vH>IpN*^E^ghBBJqKU;|y`aD)Rs$||paw54IU>bD>5Y(5i2ooY zF14^S5hM=^I}ir3K^VjaVGtXPAwdr=KS4}vK>-p6t0j`aW(JX3(Ug&I5-9ni8HR%k RO0hVkAYE}3Nl@5 +#include +#include "game_init.h" + +#include "profiling.h" +#include "fasttext.h" + +#ifdef USE_PROFILER + +#define RDP_CYCLE_CONV(x) ((10 * (x)) / 625) // 62.5 million cycles per frame + +typedef struct { + u32 counts[PROFILING_BUFFER_SIZE]; + u32 total; +} ProfileTimeData; + +ProfileTimeData all_profiling_data[PROFILER_TIME_COUNT]; + +int profile_buffer_index = -1; +int rsp_buffer_indices[PROFILER_RSP_COUNT]; +// Holds either the start time if the task is running, or the amount of time the task has run for so far if yielded +u32 rsp_pending_times[PROFILER_RSP_COUNT]; +u32 prev_start; +u32 start; +u32 prev_time; +u32 audio_start; +u32 audio_buffer_index; +u32 preempted_time; + +static void buffer_update(ProfileTimeData* data, u32 new, int buffer_index) { + u32 old = data->counts[buffer_index]; + data->total -= old; + data->total += new; + data->counts[buffer_index] = new; +} + +void profiler_update(enum ProfilerTime which) { + u32 cur_time = osGetCount(); + u32 diff; + ProfileTimeData* cur_data = &all_profiling_data[which]; + + diff = cur_time - prev_time; + + u32 saved = __osDisableInt(); + u32 cur_preempted_time = preempted_time; + preempted_time = 0; + __osRestoreInt(saved); + if (cur_preempted_time > 0) { + diff -= cur_preempted_time; + start += cur_preempted_time; + } + + buffer_update(cur_data, diff, profile_buffer_index); + prev_time = cur_time; +} + +void profiler_rsp_started(enum ProfilerRSPTime which) { + rsp_pending_times[which] = osGetCount(); +} + +void profiler_rsp_completed(enum ProfilerRSPTime which) { + ProfileTimeData* cur_data = &all_profiling_data[PROFILER_TIME_RSP_GFX + which]; + int cur_index = rsp_buffer_indices[which]; + u32 time = osGetCount() - rsp_pending_times[which]; + rsp_pending_times[which] = 0; + + buffer_update(cur_data, time, cur_index); + cur_index++; + if (cur_index >= PROFILING_BUFFER_SIZE) { + cur_index = 0; + } + rsp_buffer_indices[which] = cur_index; +} + +void profiler_rsp_resumed() { + rsp_pending_times[PROFILER_RSP_GFX] = osGetCount() - rsp_pending_times[PROFILER_RSP_GFX]; +} + +// This ends up being the same math as resumed, so we just use resumed for both +// void profiler_rsp_yielded() { +// rsp_pending_times[PROFILER_RSP_GFX] = osGetCount() - rsp_pending_times[PROFILER_RSP_GFX]; +// } + +void profiler_audio_started() { + audio_start = osGetCount(); +} + +void profiler_audio_completed() { + ProfileTimeData* cur_data = &all_profiling_data[PROFILER_TIME_AUDIO]; + u32 time = osGetCount() - audio_start; + u32 cur_index = audio_buffer_index; + + preempted_time = time; + buffer_update(cur_data, time, cur_index); + cur_index++; + if (cur_index >= PROFILING_BUFFER_SIZE) { + cur_index = 0; + } + + audio_buffer_index = cur_index; +} + +static void update_fps_timer() { + u32 diff = start - prev_start; + + buffer_update(&all_profiling_data[PROFILER_TIME_FPS], diff, profile_buffer_index); + prev_start = start; +} + +static void update_total_timer() { + u32 saved = __osDisableInt(); + u32 cur_preempted_time = preempted_time; + preempted_time = 0; + __osRestoreInt(saved); + + prev_time = start + cur_preempted_time; + profiler_update(PROFILER_TIME_TOTAL); +} + +static void update_rdp_timers() { + u32 tmem = IO_READ(DPC_TMEM_REG); + u32 cmd = IO_READ(DPC_BUFBUSY_REG); + u32 pipe = IO_READ(DPC_PIPEBUSY_REG); + + if (gGlobalTimer > 5) { + IO_WRITE(DPC_STATUS_REG, (DPC_CLR_CLOCK_CTR | DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR)); + } + + buffer_update(&all_profiling_data[PROFILER_TIME_TMEM], tmem, profile_buffer_index); + buffer_update(&all_profiling_data[PROFILER_TIME_CMD], cmd, profile_buffer_index); + buffer_update(&all_profiling_data[PROFILER_TIME_PIPE], pipe, profile_buffer_index); +} + +#ifdef PUPPYPRINT_DEBUG +extern u8 sPPDebugPage; +extern u8 fDebug; +#endif + +float profiler_get_fps() { + return (1000000.0f * PROFILING_BUFFER_SIZE) / (OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_FPS].total)); +} + +u32 profiler_get_cpu_cycles() { + u32 cpu_normal_time = all_profiling_data[PROFILER_TIME_TOTAL].total / PROFILING_BUFFER_SIZE; + u32 cpu_audio_time = all_profiling_data[PROFILER_TIME_AUDIO].total / PROFILING_BUFFER_SIZE; + return cpu_normal_time + cpu_audio_time * 2; +} + +u32 profiler_get_rsp_cycles() { + u32 rsp_graphics_time = all_profiling_data[PROFILER_TIME_RSP_GFX].total / PROFILING_BUFFER_SIZE; + u32 rsp_audio_time = all_profiling_data[PROFILER_TIME_RSP_AUDIO].total / PROFILING_BUFFER_SIZE; + return rsp_graphics_time + rsp_audio_time; +} + +u32 profiler_get_rdp_cycles() { + u32 rdp_pipe_cycles = all_profiling_data[PROFILER_TIME_PIPE].total; + u32 rdp_tmem_cycles = all_profiling_data[PROFILER_TIME_TMEM].total; + u32 rdp_cmd_cycles = all_profiling_data[PROFILER_TIME_CMD].total; + + u32 rdp_max_cycles = MAX(MAX(rdp_pipe_cycles, rdp_tmem_cycles), rdp_cmd_cycles); + + return rdp_max_cycles / PROFILING_BUFFER_SIZE; +} + +u32 profiler_get_cpu_microseconds() { + u32 cpu_normal_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_TOTAL].total / PROFILING_BUFFER_SIZE); + u32 cpu_audio_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_AUDIO].total / PROFILING_BUFFER_SIZE); + return cpu_normal_time + cpu_audio_time * 2; +} + +u32 profiler_get_rsp_microseconds() { + u32 rsp_graphics_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_RSP_GFX].total / PROFILING_BUFFER_SIZE); + u32 rsp_audio_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_RSP_AUDIO].total / PROFILING_BUFFER_SIZE); + return rsp_graphics_time + rsp_audio_time; +} + +u32 profiler_get_rdp_microseconds() { + u32 rdp_pipe_cycles = all_profiling_data[PROFILER_TIME_PIPE].total; + u32 rdp_tmem_cycles = all_profiling_data[PROFILER_TIME_TMEM].total; + u32 rdp_cmd_cycles = all_profiling_data[PROFILER_TIME_CMD].total; + + u32 rdp_max_cycles = MAX(MAX(rdp_pipe_cycles, rdp_tmem_cycles), rdp_cmd_cycles); + + return RDP_CYCLE_CONV(rdp_max_cycles / PROFILING_BUFFER_SIZE); +} + +void profiler_print_times() { + u32 microseconds[PROFILER_TIME_COUNT]; + char text_buffer[196]; + + update_fps_timer(); + update_total_timer(); + update_rdp_timers(); + +#ifndef PUPPYPRINT_DEBUG + static u8 show_profiler = 0; + if (gPlayer1Controller->buttonPressed & L_TRIG) { + show_profiler ^= 1; + } +#endif + +#ifdef PUPPYPRINT_DEBUG + if (fDebug && sPPDebugPage == 0) { +#else + if (show_profiler) { +#endif + for (int i = 0; i < PROFILER_TIME_COUNT; i++) { + if (i < PROFILER_TIME_TMEM) { + microseconds[i] = OS_CYCLES_TO_USEC(all_profiling_data[i].total / PROFILING_BUFFER_SIZE); + } else { + microseconds[i] = RDP_CYCLE_CONV(all_profiling_data[i].total / PROFILING_BUFFER_SIZE); + } + } + + // audio time is removed from the main thread profiling, so add it back here + u32 total_cpu = microseconds[PROFILER_TIME_TOTAL] + microseconds[PROFILER_TIME_AUDIO] * 2; + u32 total_rsp = microseconds[PROFILER_TIME_RSP_GFX] + microseconds[PROFILER_TIME_RSP_AUDIO]; + u32 max_rdp = MAX(MAX(microseconds[PROFILER_TIME_TMEM], microseconds[PROFILER_TIME_CMD]), microseconds[PROFILER_TIME_PIPE]); + + sprintf(text_buffer, + "FPS: %5.2f\n" + "\n" + "CPU\t\t%d (%d%%)\n" + " Input\t\t%d\n" + " Dynamic\t\t%d\n" + " Mario\t\t\t%d\n" + " Behavior\t\t%d\n" + " Graph\t\t%d\n" + " Audio\t\t\t%d\n" + "\n" + "RDP\t\t%d (%d%%)\n" + " Tmem\t\t\t%d\n" + " Cmd\t\t\t%d\n" + " Pipe\t\t\t%d\n" + "\n" + "RSP\t\t%d (%d%%)\n" + " Gfx\t\t\t%d\n" + " Audio\t\t\t%d\n", + 1000000.0f / microseconds[PROFILER_TIME_FPS], + total_cpu, total_cpu / 333, + microseconds[PROFILER_TIME_CONTROLLERS], + microseconds[PROFILER_TIME_DYNAMIC], + microseconds[PROFILER_TIME_MARIO], + microseconds[PROFILER_TIME_BEHAVIOR_BEFORE_MARIO] + microseconds[PROFILER_TIME_BEHAVIOR_AFTER_MARIO], + microseconds[PROFILER_TIME_GFX], + microseconds[PROFILER_TIME_AUDIO] * 2, // audio is 60Hz, so double the average + max_rdp, max_rdp / 333, + microseconds[PROFILER_TIME_TMEM], + microseconds[PROFILER_TIME_CMD], + microseconds[PROFILER_TIME_PIPE], + total_rsp, total_rsp / 333, + microseconds[PROFILER_TIME_RSP_GFX], + microseconds[PROFILER_TIME_RSP_AUDIO] + ); + + Gfx* dlHead = gDisplayListHead; + gDPPipeSync(dlHead++); + gDPSetCycleType(dlHead++, G_CYC_1CYCLE); + gDPSetRenderMode(dlHead++, G_RM_TEX_EDGE, G_RM_TEX_EDGE2); + gDPSetTexturePersp(dlHead++, G_TP_NONE); + gDPSetTextureFilter(dlHead++, G_TF_POINT); + gDPSetTextureLUT(dlHead++, G_TT_NONE); + drawSmallStringCol(&dlHead, 10, 10, text_buffer, 255, 255, 255); + gDisplayListHead = dlHead; + } +} + +void profiler_frame_setup() { + profile_buffer_index++; + preempted_time = 0; + + if (profile_buffer_index >= PROFILING_BUFFER_SIZE) { + profile_buffer_index = 0; + } + + prev_time = start = osGetCount(); +} + +#endif diff --git a/src/game/profiling.h b/src/game/profiling.h new file mode 100644 index 00000000..48bbe34c --- /dev/null +++ b/src/game/profiling.h @@ -0,0 +1,61 @@ +#ifndef __PROFILING_H__ +#define __PROFILING_H__ + +#include +#include "macros.h" +#include "config/config_debug.h" +#include "config/config_safeguards.h" + +#define PROFILING_BUFFER_SIZE 64 + +enum ProfilerTime { + PROFILER_TIME_FPS, + PROFILER_TIME_CONTROLLERS, + PROFILER_TIME_SPAWNER, + PROFILER_TIME_DYNAMIC, + PROFILER_TIME_BEHAVIOR_BEFORE_MARIO, + PROFILER_TIME_MARIO, + PROFILER_TIME_BEHAVIOR_AFTER_MARIO, + PROFILER_TIME_GFX, + PROFILER_TIME_AUDIO, + PROFILER_TIME_TOTAL, + PROFILER_TIME_RSP_GFX, + PROFILER_TIME_RSP_AUDIO, + PROFILER_TIME_TMEM, + PROFILER_TIME_PIPE, + PROFILER_TIME_CMD, + PROFILER_TIME_COUNT, +}; + +enum ProfilerRSPTime { + PROFILER_RSP_GFX, + PROFILER_RSP_AUDIO, + PROFILER_RSP_COUNT +}; + +#ifdef USE_PROFILER +void profiler_update(enum ProfilerTime which); +void profiler_print_times(); +void profiler_frame_setup(); +void profiler_rsp_started(enum ProfilerRSPTime which); +void profiler_rsp_completed(enum ProfilerRSPTime which); +void profiler_rsp_resumed(); +void profiler_audio_started(); +void profiler_audio_completed(); +// See profiling.c to see why profiler_rsp_yielded isn't its own function +static ALWAYS_INLINE void profiler_rsp_yielded() { + profiler_rsp_resumed(); +} +#else +#define profiler_update(which) +#define profiler_print_times() +#define profiler_frame_setup() +#define profiler_rsp_started(which) +#define profiler_rsp_completed(which) +#define profiler_rsp_resumed() +#define profiler_audio_started() +#define profiler_audio_completed() +#define profiler_rsp_yielded() +#endif + +#endif diff --git a/src/game/puppyprint.c b/src/game/puppyprint.c index 9987026b..ab9dce05 100644 --- a/src/game/puppyprint.c +++ b/src/game/puppyprint.c @@ -68,34 +68,9 @@ s32 benchmarkTimer = 0; s32 benchmarkProgramTimer = 0; s8 benchmarkType = 0; // General -u32 cpuTime = 0; u32 rspTime = 0; u32 rdpTime = 0; -u32 ramTime = 0; -u32 loadTime = 0; -u32 gLastOSTime = 0; -u32 rspDelta = 0; s32 benchMark[NUM_BENCH_ITERATIONS + 2]; -// CPU -u32 collisionTime[NUM_PERF_ITERATIONS + 1]; -u32 behaviourTime[NUM_PERF_ITERATIONS + 1]; -u32 scriptTime[NUM_PERF_ITERATIONS + 1]; -u32 graphTime[NUM_PERF_ITERATIONS + 1]; -u32 audioTime[NUM_PERF_ITERATIONS + 1]; -u32 dmaTime[NUM_PERF_ITERATIONS + 1]; -u32 dmaAudioTime[NUM_PERF_ITERATIONS + 1]; -u32 faultTime[NUM_PERF_ITERATIONS + 1]; -u32 taskTime[NUM_PERF_ITERATIONS + 1]; -u32 profilerTime[NUM_PERF_ITERATIONS + 1]; -u32 profilerTime2[NUM_PERF_ITERATIONS + 1]; -u32 cameraTime[NUM_PERF_ITERATIONS + 1]; -// RSP -u32 audioTime[NUM_PERF_ITERATIONS + 1]; -u32 rspGenTime[NUM_PERF_ITERATIONS + 1]; -// RDP -u32 bufferTime[NUM_PERF_ITERATIONS + 1]; -u32 tmemTime[NUM_PERF_ITERATIONS + 1]; -u32 busTime[NUM_PERF_ITERATIONS + 1]; // RAM s8 ramViewer = FALSE; s32 ramsizeSegment[NUM_TLB_SEGMENTS + 1] = { @@ -306,6 +281,7 @@ void print_ram_overview(void) { sprintf(textBytes, "RAM: %06X/%06X (%d_)", main_pool_available(), mempool, (s32)(((f32)main_pool_available() / (f32)mempool) * 100)); print_small_text(SCREEN_CENTER_X, (SCREEN_HEIGHT - 16), textBytes, PRINT_TEXT_ALIGN_CENTRE, PRINT_ALL, FONT_OUTLINE); + puppyprint_calculate_ram_usage(); print_ram_bar(); } @@ -477,12 +453,6 @@ extern s16 gVisualSurfaceCount; void puppyprint_render_collision(void) { char textBytes[200]; -#ifdef PUPPYPRINT_DEBUG_CYCLES - sprintf(textBytes, "Collision: %dc", collisionTime[NUM_PERF_ITERATIONS]); -#else - sprintf(textBytes, "Collision: %dus", collisionTime[NUM_PERF_ITERATIONS]); -#endif - print_small_text(304, 48, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, 1); sprintf(textBytes, "Pool Size: %X#Node Size: %X#Surfaces Allocated: %d#Nodes Allocated: %d#Current Cell: %d", (SURFACE_NODE_POOL_SIZE * sizeof(struct SurfaceNode)), (SURFACE_POOL_SIZE * sizeof(struct Surface)), gSurfacesAllocated, gSurfaceNodesAllocated, gVisualSurfaceCount); @@ -512,55 +482,47 @@ void puppyprint_render_collision(void) { #endif } -struct CPUBar { - u32 *time; - ColorRGB colour; - const char str[32]; -}; - extern void print_fps(s32 x, s32 y); -struct CPUBar cpu_ordering_table[] = { - { collisionTime, COLOR_RGB_RED, { "Collision: " }}, - { graphTime, COLOR_RGB_BLUE, { "Graph: " }}, - { behaviourTime, COLOR_RGB_GREEN, { "Behaviour: " }}, - { audioTime, COLOR_RGB_YELLOW, { "Audio: " }}, - { cameraTime, COLOR_RGB_CYAN, { "Camera: " }}, - { dmaTime, COLOR_RGB_MAGENTA, { "DMA: " }}, -}; +u32 profiler_get_cpu_cycles(); +u32 profiler_get_rsp_cycles(); +u32 profiler_get_rdp_cycles(); -#define CPU_TABLE_MAX (sizeof(cpu_ordering_table) / sizeof(struct CPUBar)) -#define ADDTIMES MAX(((collisionTime[MX] + graphTime[MX] + behaviourTime[MX] + audioTime[MX] + cameraTime[MX] + dmaTime[MX]) / 80), 1) +u32 profiler_get_cpu_microseconds(); +u32 profiler_get_rsp_microseconds(); +u32 profiler_get_rdp_microseconds(); void print_basic_profiling(void) { + u32 cpuTime; + u32 rspTime; + u32 rdpTime; char textBytes[90]; - print_fps(16, 40); + print_fps(16, 16); #ifdef PUPPYPRINT_DEBUG_CYCLES + cpuTime = profiler_get_cpu_cycles(); + rspTime = profiler_get_rsp_cycles(); + rdpTime = profiler_get_rdp_cycles(); sprintf(textBytes, "CPU: %dc (%d_)#RSP: %dc (%d_)#RDP: %dc (%d_)", cpuTime, (cpuTime / 15625), rspTime, (rspTime / 15625), rdpTime, (rdpTime / 15625)); #else + cpuTime = profiler_get_cpu_microseconds(); + rspTime = profiler_get_rsp_microseconds(); + rdpTime = profiler_get_rdp_microseconds(); sprintf(textBytes, "CPU: %dus (%d_)#RSP: %dus (%d_)#RDP: %dus (%d_)", cpuTime, (cpuTime / 333), rspTime, (rspTime / 333), rdpTime, (rdpTime / 333)); #endif - print_small_text(16, 52, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE); + print_small_text(16, 28, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE); } void puppyprint_render_standard(void) { - s32 perfPercentage[CPU_TABLE_MAX]; - s32 graphPos; - s32 prevGraph; - u32 i; - s32 viewedNums; char textBytes[80]; - print_basic_profiling(); - sprintf(textBytes, "OBJ: %d/%d", gObjectCounter, OBJECT_POOL_CAPACITY); - print_small_text(16, 124, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE); + print_small_text((SCREEN_WIDTH - 16), 16, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, FONT_OUTLINE); #ifndef ENABLE_CREDITS_BENCHMARK // Very little point printing useless info if Mario doesn't even exist. @@ -571,7 +533,7 @@ void puppyprint_render_standard(void) { (s32)(gMarioState->pos[2]), (u16)(gMarioState->faceAngle[1]), (u32)(gMarioState->action & ACT_ID_MASK)); - print_small_text(16, 140, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE); + print_small_text((SCREEN_WIDTH - 16), 32, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, FONT_OUTLINE); } // Same for the camera, especially so because this will crash otherwise. if (gCamera) { @@ -584,66 +546,51 @@ void puppyprint_render_standard(void) { } #endif - // Just to keep screen estate a little friendlier. -#define MX NUM_PERF_ITERATIONS - for (i = 0; i < CPU_TABLE_MAX; i++) { - perfPercentage[i] = MAX((cpu_ordering_table[i].time[MX] / ADDTIMES), 0); - } -#undef ADDTIMES -#undef MX + // Old profiler bar code, can be restored one day if desired to work with new profiler + // s32 graphPos; + // s32 prevGraph; + // u32 i; + // s32 viewedNums; - viewedNums = 0; - for (i = 0; i < CPU_TABLE_MAX; i++) { - s32 num = cpu_ordering_table[i].time[NUM_PERF_ITERATIONS]; - if (num != 0) { -#ifdef PUPPYPRINT_DEBUG_CYCLES - sprintf(textBytes, "%s%dc", cpu_ordering_table[i].str, num); -#else - sprintf(textBytes, "%s%dus", cpu_ordering_table[i].str, num); -#endif - print_small_text((SCREEN_WIDTH - 16), (40 + (viewedNums * 12)), textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, FONT_OUTLINE); - viewedNums++; - } - } + // prepare_blank_box(); + // viewedNums = 0; - s32 barY = (28 + (viewedNums * 12)) + 16; - prepare_blank_box(); - viewedNums = 0; + // // Render CPU breakdown bar. + // for (i = 0; i < CPU_TABLE_MAX; i++) { + // if (perfPercentage[i] == 0 + // && (i != CPU_TABLE_MAX - 1)) { + // continue; + // } - // Render CPU breakdown bar. - for (i = 0; i < CPU_TABLE_MAX; i++) { - if (perfPercentage[i] == 0 - && (i != CPU_TABLE_MAX - 1)) { - continue; - } + // if (viewedNums == 0) { + // graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]); + // render_blank_box((SCREEN_WIDTH - 96), barY, graphPos, (barY + 8), + // cpu_ordering_table[i].colour[0], + // cpu_ordering_table[i].colour[1], + // cpu_ordering_table[i].colour[2], 255); + // } else if (i == (CPU_TABLE_MAX - 1)) { + // graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]); + // render_blank_box(prevGraph, barY, (SCREEN_WIDTH - 16), (barY + 8), + // cpu_ordering_table[i].colour[0], + // cpu_ordering_table[i].colour[1], + // cpu_ordering_table[i].colour[2], 255); + // } else { + // graphPos += perfPercentage[i]; + // render_blank_box(prevGraph, barY, graphPos, (barY + 8), + // cpu_ordering_table[i].colour[0], + // cpu_ordering_table[i].colour[1], + // cpu_ordering_table[i].colour[2], 255); + // } - if (viewedNums == 0) { - graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]); - render_blank_box((SCREEN_WIDTH - 96), barY, graphPos, (barY + 8), - cpu_ordering_table[i].colour[0], - cpu_ordering_table[i].colour[1], - cpu_ordering_table[i].colour[2], 255); - } else if (i == (CPU_TABLE_MAX - 1)) { - graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]); - render_blank_box(prevGraph, barY, (SCREEN_WIDTH - 16), (barY + 8), - cpu_ordering_table[i].colour[0], - cpu_ordering_table[i].colour[1], - cpu_ordering_table[i].colour[2], 255); - } else { - graphPos += perfPercentage[i]; - render_blank_box(prevGraph, barY, graphPos, (barY + 8), - cpu_ordering_table[i].colour[0], - cpu_ordering_table[i].colour[1], - cpu_ordering_table[i].colour[2], 255); - } + // viewedNums++; + // prevGraph = graphPos; + // } - viewedNums++; - prevGraph = graphPos; - } - - finish_blank_box(); + // finish_blank_box(); } + + void puppyprint_render_minimal(void) { print_basic_profiling(); } @@ -686,12 +633,10 @@ void render_page_menu(void) { } void puppyprint_render_profiler(void) { - OSTime first = osGetTime(); print_set_envcolour(255, 255, 255, 255); if (!fDebug) { - profiler_update(profilerTime, first); return; } @@ -700,39 +645,9 @@ void puppyprint_render_profiler(void) { if (sDebugMenu) { render_page_menu(); } - profiler_update(profilerTime, first); -} - -void profiler_update(u32 *time, OSTime time2) { - time[perfIteration] = (osGetTime() - time2); -} - -void get_average_perf_time(u32 *time, s32 is_rdp) { - // This takes all but the last index of the timer array, and creates an average value, which is written to the last index. - s32 i = 0; - s32 total = 0; - for (i = 0; i < NUM_PERF_ITERATIONS; i++) { - total += time[i]; - } - - total /= NUM_PERF_ITERATIONS; - total = MAX(total, 0); - if (is_rdp) - { - time[NUM_PERF_ITERATIONS] = RDP_CYCLE_CONV(total); - } - else - { - time[NUM_PERF_ITERATIONS] = CYCLE_CONV(total); - } } void puppyprint_profiler_process(void) { - bufferTime[perfIteration] = (IO_READ(DPC_BUFBUSY_REG)); - tmemTime[perfIteration] = (IO_READ(DPC_TMEM_REG)); - busTime[perfIteration] = (IO_READ(DPC_PIPEBUSY_REG)); - OSTime newTime = osGetTime(); - if (fDebug && (gPlayer1Controller->buttonPressed & L_TRIG)) { sDebugMenu ^= TRUE; if (sDebugMenu == FALSE) { @@ -760,54 +675,6 @@ void puppyprint_profiler_process(void) { sDebugOption = 0; } } - - if (!(gGlobalTimer % NUM_PERF_ITERATIONS)) { - get_average_perf_time( scriptTime, FALSE); - get_average_perf_time( behaviourTime, FALSE); - get_average_perf_time( collisionTime, FALSE); - get_average_perf_time( graphTime, FALSE); - get_average_perf_time( audioTime, FALSE); - get_average_perf_time( dmaTime, FALSE); - get_average_perf_time( dmaAudioTime, FALSE); - get_average_perf_time( faultTime, FALSE); - get_average_perf_time( taskTime, FALSE); - get_average_perf_time( profilerTime, FALSE); - get_average_perf_time( profilerTime2, FALSE); - get_average_perf_time( cameraTime, FALSE); - - // Performed twice a frame without fail, so doubled to have a more representative value. - audioTime[NUM_PERF_ITERATIONS] *= 2; - dmaAudioTime[NUM_PERF_ITERATIONS] *= 2; - dmaTime[NUM_PERF_ITERATIONS] += dmaAudioTime[NUM_PERF_ITERATIONS]; - - get_average_perf_time(rspGenTime, FALSE); - - get_average_perf_time(bufferTime, TRUE); - get_average_perf_time( tmemTime, TRUE); - get_average_perf_time( busTime, TRUE); - - rdpTime = bufferTime[NUM_PERF_ITERATIONS]; - rdpTime = MAX(rdpTime, tmemTime[NUM_PERF_ITERATIONS]); - rdpTime = MAX(rdpTime, busTime[NUM_PERF_ITERATIONS]); -#if BBPLAYER == 1 // iQue RDP registers need to be halved to be correct. - rdpTime /= 2; -#endif - cpuTime = (scriptTime[NUM_PERF_ITERATIONS] - + taskTime[NUM_PERF_ITERATIONS] - + faultTime[NUM_PERF_ITERATIONS] - + audioTime[NUM_PERF_ITERATIONS]); - rspTime = rspGenTime[NUM_PERF_ITERATIONS]; - puppyprint_calculate_ram_usage(); - } - - gLastOSTime = newTime; - if (gGlobalTimer > 5) { - IO_WRITE(DPC_STATUS_REG, (DPC_CLR_CLOCK_CTR | DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR)); - } - if (perfIteration++ == (NUM_PERF_ITERATIONS - 1)) { - perfIteration = 0; - } - profiler_update(profilerTime2, newTime); } #endif diff --git a/src/game/puppyprint.h b/src/game/puppyprint.h index 7608adc9..a43d51a1 100644 --- a/src/game/puppyprint.h +++ b/src/game/puppyprint.h @@ -2,17 +2,9 @@ #include "segment2.h" -// This is how many indexes of timers are saved at once. higher creates a smoother average, but naturally uses more RAM. 15's fine. -// #define NUM_PERF_ITERATIONS 15 -#define NUM_PERF_ITERATIONS 16 #define NUM_BENCH_ITERATIONS 150 #define LOG_BUFFER_SIZE 16 -#ifdef ENABLE_CREDITS_BENCHMARK -#undef NUM_PERF_ITERATIONS -#define NUM_PERF_ITERATIONS 60 -#endif - struct PuppyPrintPage{ void (*func)(); char name[32]; @@ -58,35 +50,8 @@ extern s32 mempool; extern u8 benchOption; // General -extern u32 cpuTime; -extern u32 rspTime; -extern u32 rdpTime; -extern u32 ramTime; -extern u32 loadTime; -extern u32 rspDelta; -extern s32 benchMark[NUM_BENCH_ITERATIONS + 2]; +extern s32 benchMark[NUM_BENCH_ITERATIONS + 2]; -// CPU -extern u32 collisionTime[NUM_PERF_ITERATIONS + 1]; -extern u32 behaviourTime[NUM_PERF_ITERATIONS + 1]; -extern u32 scriptTime[NUM_PERF_ITERATIONS + 1]; -extern u32 graphTime[NUM_PERF_ITERATIONS + 1]; -extern u32 audioTime[NUM_PERF_ITERATIONS + 1]; -extern u32 dmaTime[NUM_PERF_ITERATIONS + 1]; -extern u32 dmaAudioTime[NUM_PERF_ITERATIONS + 1]; -extern u32 faultTime[NUM_PERF_ITERATIONS + 1]; -extern u32 taskTime[NUM_PERF_ITERATIONS + 1]; -extern u32 cameraTime[NUM_PERF_ITERATIONS + 1]; -extern u32 profilerTime[NUM_PERF_ITERATIONS + 1]; -extern u32 profilerTime2[NUM_PERF_ITERATIONS + 1]; -// RSP -extern u32 rspGenTime[NUM_PERF_ITERATIONS + 1]; -// RDP -extern u32 bufferTime[NUM_PERF_ITERATIONS + 1]; -extern u32 tmemTime[NUM_PERF_ITERATIONS + 1]; -extern u32 busTime[NUM_PERF_ITERATIONS + 1]; - -extern void profiler_update(u32 *time, OSTime time2); extern void puppyprint_profiler_process(void); extern void puppyprint_render_profiler(void); extern void puppyprint_profiler_finished(void); diff --git a/src/game/sound_init.c b/src/game/sound_init.c index db5c3213..b7d4088a 100644 --- a/src/game/sound_init.c +++ b/src/game/sound_init.c @@ -15,6 +15,7 @@ #include "sound_init.h" #include "rumble_init.h" #include "puppyprint.h" +#include "profiling.h" #define MUSIC_NONE 0xFFFF @@ -341,10 +342,6 @@ void thread4_sound(UNUSED void *arg) { audio_init(); sound_init(); -#if PUPPYPRINT_DEBUG - OSTime lastTime; -#endif - osCreateMesgQueue(&sSoundMesgQueue, sSoundMesgBuf, ARRAY_COUNT(sSoundMesgBuf)); set_vblank_handler(1, &sSoundVblankHandler, &sSoundMesgQueue, (OSMesg) 512); @@ -352,34 +349,14 @@ void thread4_sound(UNUSED void *arg) { OSMesg msg; osRecvMesg(&sSoundMesgQueue, &msg, OS_MESG_BLOCK); -#if PUPPYPRINT_DEBUG - while (TRUE) { - lastTime = osGetTime(); - dmaAudioTime[perfIteration] = 0; -#endif - if (gResetTimer < 25) { - struct SPTask *spTask; - spTask = create_next_audio_frame_task(); - if (spTask != NULL) { - dispatch_audio_sptask(spTask); - } -#if PUPPYPRINT_DEBUG - profiler_update(audioTime, lastTime); - audioTime[perfIteration] -= dmaAudioTime[perfIteration]; - if (benchmarkLoop > 0 && benchOption == 1) { - benchmarkLoop--; - benchMark[benchmarkLoop] = osGetTime() - lastTime; - if (benchmarkLoop == 0) { - puppyprint_profiler_finished(); - break; - } - } else { - break; - } -#endif + profiler_audio_started(); + if (gResetTimer < 25) { + struct SPTask *spTask; + spTask = create_next_audio_frame_task(); + if (spTask != NULL) { + dispatch_audio_sptask(spTask); } -#if PUPPYPRINT_DEBUG } -#endif + profiler_audio_completed(); } }