New profiler (#390)

* Implemented new lightweight CPU/RDP profiler

* Integrated new profiler into puppyprint debug and improved profiler visuals, removed existing puppyprint profiling code
This commit is contained in:
Mr-Wiseguy
2022-05-01 00:42:58 -04:00
committed by GitHub
parent 8d06db06e1
commit 031cc24fa6
22 changed files with 578 additions and 468 deletions

View File

@@ -10,6 +10,10 @@
// Disables all debug options (except PUPPYPRINT).
#define DISABLE_ALL
// Enables a comprehensive standalone profiler. Automatically enabled by PUPPYPRINT_DEBUG.
// If not using PUPPYPRINT_DEBUG, press L to toggle the profiler.
// #define USE_PROFILER
// TEST LEVEL
// Uncomment this define and set a test level in order to boot straight into said level.
// This allows you to quickly test the level you're working on.

View File

@@ -33,15 +33,21 @@
* config_debug.h
*/
#ifdef PUPPYPRINT_DEBUG
#undef PUPPYPRINT
#define PUPPYPRINT
#endif // PUPPYPRINT_DEBUG
#ifdef COMPLETE_SAVE_FILE
#ifdef DISABLE_ALL
#undef DEBUG_ALL
#undef TEST_LEVEL
#undef DEBUG_LEVEL_SELECT
#undef ENABLE_DEBUG_FREE_MOVE
#undef VANILLA_DEBUG
#undef VANILLA_STYLE_CUSTOM_DEBUG
#undef PUPPYPRINT_DEBUG
#undef PUPPYPRINT_DEBUG_CYCLES
#undef VISUAL_DEBUG
#undef UNLOCK_ALL
#define UNLOCK_ALL
#endif // COMPLETE_SAVE_FILE
#undef COMPLETE_SAVE_FILE
#undef DEBUG_FORCE_CRASH_ON_BOOT
#undef USE_PROFILER
#endif // DISABLE_ALL
#ifdef DEBUG_ALL
#undef DEBUG_LEVEL_SELECT
@@ -66,20 +72,17 @@
#define COMPLETE_SAVE_FILE
#endif // DEBUG_ALL
#ifdef DISABLE_ALL
#undef DEBUG_ALL
#undef TEST_LEVEL
#undef DEBUG_LEVEL_SELECT
#undef ENABLE_DEBUG_FREE_MOVE
#undef VANILLA_DEBUG
#undef VANILLA_STYLE_CUSTOM_DEBUG
#undef PUPPYPRINT_DEBUG
#undef PUPPYPRINT_DEBUG_CYCLES
#undef VISUAL_DEBUG
#ifdef PUPPYPRINT_DEBUG
#undef PUPPYPRINT
#define PUPPYPRINT
#undef USE_PROFILER
#define USE_PROFILER
#endif // PUPPYPRINT_DEBUG
#ifdef COMPLETE_SAVE_FILE
#undef UNLOCK_ALL
#undef COMPLETE_SAVE_FILE
#undef DEBUG_FORCE_CRASH_ON_BOOT
#endif // DISABLE_ALL
#define UNLOCK_ALL
#endif // COMPLETE_SAVE_FILE
/*****************

View File

@@ -137,14 +137,8 @@ u8 audioString49[] = "BANK LOAD MISS! FOR %d\n";
* Performs an asynchronus (normal priority) DMA copy
*/
void audio_dma_copy_async(uintptr_t devAddr, void *vAddr, size_t nbytes, OSMesgQueue *queue, OSIoMesg *mesg) {
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
osInvalDCache(vAddr, nbytes);
osPiStartDma(mesg, OS_MESG_PRI_NORMAL, OS_READ, devAddr, vAddr, nbytes, queue);
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
}
/**
@@ -152,18 +146,12 @@ void audio_dma_copy_async(uintptr_t devAddr, void *vAddr, size_t nbytes, OSMesgQ
* to 0x1000 bytes transfer at once.
*/
void audio_dma_partial_copy_async(uintptr_t *devAddr, u8 **vAddr, ssize_t *remaining, OSMesgQueue *queue, OSIoMesg *mesg) {
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
ssize_t transfer = MIN(*remaining, 0x1000);
*remaining -= transfer;
osInvalDCache(*vAddr, transfer);
osPiStartDma(mesg, OS_MESG_PRI_NORMAL, OS_READ, *devAddr, *vAddr, transfer, queue);
*devAddr += transfer;
*vAddr += transfer;
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
}
void decrease_sample_dma_ttls() {
@@ -208,9 +196,6 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
u32 i;
u32 dmaIndex;
ssize_t bufferPos;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
if (arg2 != 0 || *dmaIndexRef >= sSampleDmaListSize1) {
for (i = sSampleDmaListSize1; i < gSampleDmaNumListItems; i++) {
@@ -235,9 +220,6 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
}
dma->ttl = 60;
*dmaIndexRef = (u8) i;
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
#if defined(VERSION_EU)
return &dma->buffer[(devAddr - dma->source)];
#else
@@ -277,14 +259,8 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
}
dma->ttl = 2;
#if defined(VERSION_EU)
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
return dma->buffer + (devAddr - dma->source);
#else
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
return (devAddr - dma->source) + dma->buffer;
#endif
}
@@ -310,18 +286,12 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
osPiStartDma(&gCurrAudioFrameDmaIoMesgBufs[gCurrAudioFrameDmaCount++], OS_MESG_PRI_NORMAL,
OS_READ, dmaDevAddr, dma->buffer, transfer, &gCurrAudioFrameDmaQueue);
*dmaIndexRef = dmaIndex;
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
return (devAddr - dmaDevAddr) + dma->buffer;
#else
gCurrAudioFrameDmaCount++;
osPiStartDma(&gCurrAudioFrameDmaIoMesgBufs[gCurrAudioFrameDmaCount - 1], OS_MESG_PRI_NORMAL,
OS_READ, dmaDevAddr, dma->buffer, transfer, &gCurrAudioFrameDmaQueue);
*dmaIndexRef = dmaIndex;
#if PUPPYPRINT_DEBUG
dmaAudioTime[perfIteration] += (osGetTime() - first);
#endif
return dma->buffer + (devAddr - dmaDevAddr);
#endif
}

View File

@@ -19,6 +19,7 @@
#endif
#include "game/puppyprint.h"
#include "game/puppylights.h"
#include "game/profiling.h"
// Message IDs
enum MessageIDs {
@@ -185,10 +186,8 @@ void start_gfx_sptask(void) {
if (gActiveSPTask == NULL
&& sCurrentDisplaySPTask != NULL
&& sCurrentDisplaySPTask->state == SPTASK_STATE_NOT_STARTED) {
#if PUPPYPRINT_DEBUG
rspDelta = osGetTime();
#endif
start_sptask(M_GFXTASK);
profiler_rsp_started(PROFILER_RSP_GFX);
}
}
@@ -220,15 +219,14 @@ void handle_vblank(void) {
} else {
pretend_audio_sptask_done();
}
profiler_rsp_started(PROFILER_RSP_AUDIO);
}
} else {
if (gActiveSPTask == NULL
&& sCurrentDisplaySPTask != NULL
&& sCurrentDisplaySPTask->state != SPTASK_STATE_FINISHED) {
#if PUPPYPRINT_DEBUG
rspDelta = osGetTime();
#endif
start_sptask(M_GFXTASK);
profiler_rsp_started(PROFILER_RSP_GFX);
}
}
#if ENABLE_RUMBLE
@@ -253,9 +251,9 @@ void handle_sp_complete(void) {
// The gfx task completed before we had time to interrupt it.
// Mark it finished, just like below.
curSPTask->state = SPTASK_STATE_FINISHED;
#if PUPPYPRINT_DEBUG
profiler_update(rspGenTime, rspDelta);
#endif
profiler_rsp_completed(PROFILER_RSP_GFX);
} else {
profiler_rsp_yielded();
}
// Start the audio task, as expected by handle_vblank.
@@ -264,12 +262,19 @@ void handle_sp_complete(void) {
} else {
pretend_audio_sptask_done();
}
profiler_rsp_started(PROFILER_RSP_AUDIO);
} else {
curSPTask->state = SPTASK_STATE_FINISHED;
if (curSPTask->task.t.type == M_AUDTASK) {
profiler_rsp_completed(PROFILER_RSP_AUDIO);
// After audio tasks come gfx tasks.
if ((sCurrentDisplaySPTask != NULL)
&& (sCurrentDisplaySPTask->state != SPTASK_STATE_FINISHED)) {
if (sCurrentDisplaySPTask->state == SPTASK_STATE_INTERRUPTED) {
profiler_rsp_resumed();
} else {
profiler_rsp_started(PROFILER_RSP_GFX);
}
start_sptask(M_GFXTASK);
}
sCurrentAudioSPTask = NULL;
@@ -280,9 +285,7 @@ void handle_sp_complete(void) {
// The SP process is done, but there is still a Display Processor notification
// that needs to arrive before we can consider the task completely finished and
// null out sCurrentDisplaySPTask. That happens in handle_dp_complete.
#if PUPPYPRINT_DEBUG
profiler_update(rspGenTime, rspDelta);
#endif
profiler_rsp_completed(PROFILER_RSP_GFX);
}
}
}
@@ -326,9 +329,6 @@ void thread3_main(UNUSED void *arg) {
while (TRUE) {
OSMesg msg;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
osRecvMesg(&gIntrMesgQueue, &msg, OS_MESG_BLOCK);
switch ((uintptr_t) msg) {
case MESG_VI_VBLANK:
@@ -347,9 +347,6 @@ void thread3_main(UNUSED void *arg) {
handle_nmi_request();
break;
}
#if PUPPYPRINT_DEBUG
profiler_update(taskTime, first);
#endif
}
}

View File

@@ -259,9 +259,6 @@ u32 main_pool_pop_state(void) {
*/
void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
u32 size = ALIGN16(srcEnd - srcStart);
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
osInvalDCache(dest, size);
while (size != 0) {
@@ -275,9 +272,6 @@ void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
srcStart += copySize;
size -= copySize;
}
#if PUPPYPRINT_DEBUG
dmaTime[perfIteration] += (osGetTime() - first);
#endif
}
/**

View File

@@ -1377,9 +1377,6 @@ void find_surface_on_ray_list(struct SurfaceNode *list, Vec3f orig, Vec3f dir, f
f32 length;
Vec3f chk_hit_pos;
f32 top, bottom;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
// Get upper and lower bounds of ray
if (dir[1] >= 0.0f) {
// Ray is upwards.
@@ -1403,9 +1400,6 @@ void find_surface_on_ray_list(struct SurfaceNode *list, Vec3f orig, Vec3f dir, f
*max_length = length;
}
}
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
}
void find_surface_on_ray_cell(s32 cellX, s32 cellZ, Vec3f orig, Vec3f normalized_dir, f32 dir_length, struct Surface **hit_surface, Vec3f hit_pos, f32 *max_length, s32 flags) {

View File

@@ -175,16 +175,10 @@ s32 find_wall_collisions(struct WallCollisionData *colData) {
s32 numCollisions = 0;
s32 x = colData->x;
s32 z = colData->z;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
colData->numWalls = 0;
if (is_outside_level_bounds(x, z)) {
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
return numCollisions;
}
@@ -207,9 +201,6 @@ s32 find_wall_collisions(struct WallCollisionData *colData) {
// Increment the debug tracker.
gNumCalls.wall++;
#endif
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
return numCollisions;
}
@@ -326,18 +317,12 @@ static struct Surface *find_ceil_from_list(struct SurfaceNode *surfaceNode, s32
f32 find_ceil(f32 posX, f32 posY, f32 posZ, struct Surface **pceil) {
f32 height = CELL_HEIGHT_LIMIT;
f32 dynamicHeight = CELL_HEIGHT_LIMIT;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
s32 x = posX;
s32 y = posY;
s32 z = posZ;
*pceil = NULL;
if (is_outside_level_bounds(x, z)) {
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += (osGetTime() - first);
#endif
return height;
}
@@ -379,9 +364,6 @@ f32 find_ceil(f32 posX, f32 posY, f32 posZ, struct Surface **pceil) {
// Increment the debug tracker.
gNumCalls.ceil++;
#endif
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
return height;
}
@@ -563,10 +545,6 @@ f32 unused_find_dynamic_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfl
* Find the highest floor under a given position and return the height.
*/
f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) {
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
f32 height = FLOOR_LOWER_LIMIT;
f32 dynamicHeight = FLOOR_LOWER_LIMIT;
@@ -580,9 +558,6 @@ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) {
*pfloor = NULL;
if (is_outside_level_bounds(x, z)) {
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += (osGetTime() - first);
#endif
return height;
}
// Each level is split into cells to limit load, find the appropriate cell.
@@ -626,9 +601,6 @@ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) {
#ifdef VANILLA_DEBUG
// Increment the debug tracker.
gNumCalls.floor++;
#endif
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += (osGetTime() - first);
#endif
return height;
}
@@ -682,9 +654,6 @@ s32 find_water_level_and_floor(s32 x, s32 y, s32 z, struct Surface **pfloor) {
s32 loX, hiX, loZ, hiZ;
TerrainData *p = gEnvironmentRegions;
struct Surface *floor = NULL;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
s32 waterLevel = find_water_floor(x, y, z, &floor);
if (p != NULL && waterLevel == FLOOR_LOWER_LIMIT) {
@@ -710,9 +679,6 @@ s32 find_water_level_and_floor(s32 x, s32 y, s32 z, struct Surface **pfloor) {
*pfloor = floor;
}
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += (osGetTime() - first);
#endif
return waterLevel;
}
@@ -724,9 +690,6 @@ s32 find_water_level(s32 x, s32 z) { // TODO: Allow y pos
s32 loX, hiX, loZ, hiZ;
TerrainData *p = gEnvironmentRegions;
struct Surface *floor = NULL;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
s32 waterLevel = find_water_floor(x, ((gCollisionFlags & COLLISION_FLAG_CAMERA) ? gLakituState.pos[1] : gMarioState->pos[1]), z, &floor);
if ((p != NULL) && (waterLevel == FLOOR_LOWER_LIMIT)) {
@@ -750,10 +713,6 @@ s32 find_water_level(s32 x, s32 z) { // TODO: Allow y pos
}
}
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
return waterLevel;
}
@@ -765,9 +724,6 @@ s32 find_poison_gas_level(s32 x, s32 z) {
s32 loX, hiX, loZ, hiZ;
s32 gasLevel = FLOOR_LOWER_LIMIT;
TerrainData *p = gEnvironmentRegions;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
if (p != NULL) {
s32 numRegions = *p++;
@@ -794,10 +750,6 @@ s32 find_poison_gas_level(s32 x, s32 z) {
}
}
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
return gasLevel;
}

View File

@@ -470,9 +470,6 @@ u32 get_area_terrain_size(TerrainData *data) {
void load_area_terrain(s32 index, TerrainData *data, RoomData *surfaceRooms, s16 *macroObjects) {
s32 terrainLoadType;
TerrainData *vertexData = NULL;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
// Initialize the data for this.
gEnvironmentRegions = NULL;
@@ -519,9 +516,6 @@ void load_area_terrain(s32 index, TerrainData *data, RoomData *surfaceRooms, s16
gNumStaticSurfaceNodes = gSurfaceNodesAllocated;
gNumStaticSurfaces = gSurfacesAllocated;
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
}
/**
@@ -646,9 +640,6 @@ static void get_optimal_coll_dist(struct Object *obj) {
*/
void load_object_collision_model(void) {
TerrainData vertexData[600];
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
TerrainData *collisionData = o->collisionData;
f32 marioDist = o->oDistanceToMario;
@@ -686,7 +677,4 @@ void load_object_collision_model(void) {
}
}
COND_BIT((marioDist < o->oDrawingDistance), o->header.gfx.node.flags, GRAPH_RENDER_ACTIVE);
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
}

View File

@@ -25,6 +25,7 @@
#include "puppyprint.h"
#include "debug_box.h"
#include "engine/colors.h"
#include "profiling.h"
struct SpawnInfo gPlayerSpawnInfos[1];
struct GraphNode *gGraphNodePointers[MODEL_ID_COUNT];
@@ -375,10 +376,6 @@ void play_transition_after_delay(s16 transType, s16 time, u8 red, u8 green, u8 b
}
void render_game(void) {
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
OSTime colTime = collisionTime[perfIteration];
#endif
if (gCurrentArea != NULL && !gWarpTransition.pauseRendering) {
if (gCurrentArea->graphNode) {
geo_process_root(gCurrentArea->graphNode, gViewportOverride, gViewportClip, gFBSetColor);
@@ -434,11 +431,10 @@ void render_game(void) {
gViewportOverride = NULL;
gViewportClip = NULL;
profiler_update(PROFILER_TIME_GFX);
profiler_print_times();
#if PUPPYPRINT_DEBUG
profiler_update(graphTime, first);
graphTime[perfIteration] -= (collisionTime[perfIteration] - colTime);
// graphTime[perfIteration] -= profilerTime[perfIteration]; //! Graph time is inaccurate and wrongly reaches 0 sometimes
puppyprint_render_profiler();
#endif
}

View File

@@ -2871,10 +2871,6 @@ void update_lakitu(struct Camera *c) {
* Gets controller input, checks for cutscenes, handles mode changes, and moves the camera
*/
void update_camera(struct Camera *c) {
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
OSTime colTime = collisionTime[perfIteration];
#endif
gCamera = c;
update_camera_hud_status(c);
if (c->cutscene == CUTSCENE_NONE
@@ -3117,10 +3113,6 @@ void update_camera(struct Camera *c) {
}
#endif
gLakituState.lastFrameAction = sMarioCamState->action;
#if PUPPYPRINT_DEBUG
profiler_update(cameraTime, first);
cameraTime[perfIteration] -= collisionTime[perfIteration]-colTime;
#endif
}
/**

View File

@@ -402,9 +402,6 @@ void thread2_crash_screen(UNUSED void *arg) {
osSetEventMesg(OS_EVENT_CPU_BREAK, &gCrashScreen.mesgQueue, (OSMesg) 1);
osSetEventMesg(OS_EVENT_FAULT, &gCrashScreen.mesgQueue, (OSMesg) 2);
while (TRUE) {
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
if (thread == NULL) {
osRecvMesg(&gCrashScreen.mesgQueue, &mesg, 1);
thread = get_crashed_thread();
@@ -433,9 +430,6 @@ void thread2_crash_screen(UNUSED void *arg) {
read_controller_inputs(THREAD_2_CRASH_SCREEN);
draw_crash_screen(thread);
}
#if PUPPYPRINT_DEBUG
profiler_update(faultTime, first);
#endif
}
}

87
src/game/fasttext.c Normal file
View File

@@ -0,0 +1,87 @@
// This file is a modification of a file from https://github.com/danbolt/n64-jam-1, which was licensed under the MPL-2.0 License
// See the original repo for more details.
#include <ultra64.h>
#define TEX_ASCII_START '!'
#define TAB_WIDTH 16
#define G_CC_TEXT PRIMITIVE, 0, TEXEL0, 0, 0, 0, 0, TEXEL0
__asm__(
".section \".rodata\", \"a\", @progbits\n"
".global fast_font\n"
"fast_font:\n"
".incbin \"src/game/newfont2_swapped.bin\"\n"
".previous\n"
);
extern u8 fast_font[];
int computeS(unsigned char letter) {
int idx = letter;
if (letter > 'z') {
idx -= (3 + 2 + 3 + 1 + 3);
} else if (letter > '^') {
idx -= (2 + 3 + 1 + 3);
} else if (letter > 'Z') {
idx -= (3 + 1 + 3);
} else if (letter > '?') {
idx -= (1 + 3);
} else if (letter > ';') {
idx -= (3);
}
return (idx - TEX_ASCII_START) * 8;
}
static const u8 fast_text_font_kerning[] = {
/* */ 2, /*!*/ 4, /*"*/ 5, /*#*/ 0, /*$*/ 0, /*%*/ 7, /*&*/ 7, /*'*/ 2, /*(*/ 5, /*)*/ 5, /***/ 0, /*+*/ 7, /*,*/ 3, /*-*/ 7, /*.*/ 3, /*/*/ 7,
/*0*/ 7, /*1*/ 7, /*2*/ 7, /*3*/ 7, /*4*/ 7, /*5*/ 7, /*6*/ 7, /*7*/ 7, /*8*/ 7, /*9*/ 7, /*:*/ 3, /*;*/ 3, /*<*/ 0, /*=*/ 0, /*>*/ 0, /*?*/ 7,
/*@*/ 0, /*A*/ 7, /*B*/ 7, /*C*/ 7, /*D*/ 7, /*E*/ 7, /*F*/ 7, /*G*/ 7, /*H*/ 7, /*I*/ 6, /*J*/ 5, /*K*/ 7, /*L*/ 7, /*M*/ 7, /*N*/ 7, /*O*/ 7,
/*P*/ 7, /*Q*/ 7, /*R*/ 7, /*S*/ 7, /*T*/ 7, /*U*/ 7, /*V*/ 7, /*W*/ 7, /*X*/ 7, /*Y*/ 7, /*Z*/ 7, /*[*/ 0, /*\*/ 0, /*]*/ 0, /*^*/ 6, /*_*/ 0,
/*`*/ 0, /*a*/ 6, /*b*/ 6, /*c*/ 6, /*d*/ 6, /*e*/ 6, /*f*/ 6, /*g*/ 6, /*h*/ 6, /*i*/ 2, /*j*/ 6, /*k*/ 5, /*l*/ 3, /*m*/ 6, /*n*/ 6, /*o*/ 6,
/*p*/ 6, /*q*/ 6, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 6, /*w*/ 6, /*x*/ 6, /*y*/ 6, /*z*/ 6, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 7,
};
void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int g, int b) {
int i = 0;
int xPos = x;
int yPos = y;
int s = 0;
Gfx *dlHead = *dl;
gDPLoadTextureBlock_4bS(dlHead++, fast_font, G_IM_FMT_IA, 672, 12, 0, G_TX_MIRROR | G_TX_WRAP, G_TX_MIRROR | G_TX_WRAP, G_TX_NOMASK, G_TX_NOMASK, G_TX_NOLOD, G_TX_NOLOD);
gDPSetPrimColor(dlHead++, 0, 0, r, g, b, 255);
gDPSetCombineMode(dlHead++, G_CC_TEXT, G_CC_TEXT);
gDPPipeSync(dlHead++);
while (string[i] != '\0') {
unsigned int cur_char = string[i];
if (cur_char == '\n') {
xPos = x;
yPos += 12;
i++;
continue;
}
if (cur_char == '\t') {
int xDist = xPos - x + 1;
int tabCount = (xDist + TAB_WIDTH - 1) / TAB_WIDTH;
xPos = tabCount * TAB_WIDTH + x;
} else {
if (cur_char != ' ') {
s = computeS(cur_char);
gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, s << 5, 0, 1 << 10, 1 << 10);
}
xPos += fast_text_font_kerning[cur_char - ' '];
}
i++;
}
gDPSetPrimColor(dlHead++, 0, 0, 255, 255, 255, 255);
gDPPipeSync(dlHead++);
*dl = dlHead;
}

14
src/game/fasttext.h Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __FASTTEXT_H__
#define __FASTTEXT_H__
void drawSmallString_impl(Gfx**, int, int, const char*, int, int , int);
static inline void drawSmallString(Gfx **dl, int x, int y, const char* string) {
drawSmallString_impl(dl, x, y, string, 255, 255, 255);
}
static inline void drawSmallStringCol(Gfx **dl, int x, int y, const char* string, int r, int g, int b) {
drawSmallString_impl(dl, x, y, string, r, g, b);
}
#endif

View File

@@ -29,6 +29,7 @@
#include "puppycam2.h"
#include "debug_box.h"
#include "vc_check.h"
#include "profiling.h"
// First 3 controller slots
struct Controller gControllers[3];
@@ -729,10 +730,6 @@ void setup_game_memory(void) {
* Main game loop thread. Runs forever as long as the game continues.
*/
void thread5_game_loop(UNUSED void *arg) {
#if PUPPYPRINT_DEBUG
OSTime lastTime = 0;
#endif
setup_game_memory();
#if ENABLE_RUMBLE
init_rumble_pak_scheduler_queue();
@@ -762,19 +759,12 @@ void thread5_game_loop(UNUSED void *arg) {
render_init();
while (TRUE) {
profiler_frame_setup();
// If the reset timer is active, run the process to reset the game.
if (gResetTimer != 0) {
draw_reset_bars();
continue;
}
#if PUPPYPRINT_DEBUG
while (TRUE) {
lastTime = osGetTime();
collisionTime[perfIteration] = 0;
// graphTime[perfIteration] = 0;
behaviourTime[perfIteration] = 0;
dmaTime[perfIteration] = 0;
#endif
// If any controllers are plugged in, start read the data for when
// read_controller_inputs is called later.
@@ -788,25 +778,12 @@ void thread5_game_loop(UNUSED void *arg) {
audio_game_loop_tick();
select_gfx_pool();
read_controller_inputs(THREAD_5_GAME_LOOP);
profiler_update(PROFILER_TIME_CONTROLLERS);
addr = level_script_execute(addr);
#if !PUPPYPRINT_DEBUG && defined(VISUAL_DEBUG)
debug_box_input();
#endif
#if PUPPYPRINT_DEBUG
profiler_update(scriptTime, lastTime);
scriptTime[perfIteration] -= profilerTime[perfIteration];
scriptTime[perfIteration] -= profilerTime2[perfIteration];
if (benchmarkLoop > 0 && benchOption == 0) {
benchmarkLoop--;
benchMark[benchmarkLoop] = (osGetTime() - lastTime);
if (benchmarkLoop == 0) {
puppyprint_profiler_finished();
break;
}
} else {
break;
}
}
puppyprint_profiler_process();
#endif

View File

@@ -52,6 +52,9 @@ u8 curFrameTimeIndex = 0;
#include "PR/os_convert.h"
#ifdef USE_PROFILER
float profiler_get_fps();
#else
// Call once per frame
f32 calculate_and_update_fps() {
OSTime newTime = osGetTime();
@@ -64,9 +67,14 @@ f32 calculate_and_update_fps() {
}
return ((f32)FRAMETIME_COUNT * 1000000.0f) / (s32)OS_CYCLES_TO_USEC(newTime - oldTime);
}
#endif
void print_fps(s32 x, s32 y) {
#ifdef USE_PROFILER
f32 fps = profiler_get_fps();
#else
f32 fps = calculate_and_update_fps();
#endif
char text[14];
sprintf(text, "FPS %2.2f", fps);
@@ -75,7 +83,6 @@ void print_fps(s32 x, s32 y) {
#else
print_text(x, y, text);
#endif
}
// ------------ END OF FPS COUNER -----------------

Binary file not shown.

View File

@@ -21,6 +21,7 @@
#include "spawn_object.h"
#include "puppyprint.h"
#include "puppylights.h"
#include "profiling.h"
/**
@@ -547,7 +548,10 @@ void clear_objects(void) {
*/
void update_terrain_objects(void) {
gObjectCounter = update_objects_in_list(&gObjectLists[OBJ_LIST_SPAWNER]);
profiler_update(PROFILER_TIME_SPAWNER);
gObjectCounter += update_objects_in_list(&gObjectLists[OBJ_LIST_SURFACE]);
profiler_update(PROFILER_TIME_DYNAMIC);
}
/**
@@ -559,7 +563,13 @@ void update_non_terrain_objects(void) {
s32 i = 2;
while ((listIndex = sObjectListUpdateOrder[i]) != -1) {
if (listIndex == OBJ_LIST_PLAYER) {
profiler_update(PROFILER_TIME_BEHAVIOR_BEFORE_MARIO);
}
gObjectCounter += update_objects_in_list(&gObjectLists[listIndex]);
if (listIndex == OBJ_LIST_PLAYER) {
profiler_update(PROFILER_TIME_MARIO);
}
i++;
}
}
@@ -606,13 +616,6 @@ UNUSED static u16 unused_get_elapsed_time(u64 *cycleCounts, s32 index) {
* and object surface management.
*/
void update_objects(UNUSED s32 unused) {
//s64 cycleCounts[30];
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
OSTime colTime = collisionTime[perfIteration];
#endif
// cycleCounts[0] = get_current_clock();
gTimeStopState &= ~TIME_STOP_MARIO_OPENED_DOOR;
@@ -626,11 +629,9 @@ void update_objects(UNUSED s32 unused) {
gObjectLists = gObjectListArray;
// If time stop is not active, unload object surfaces
// cycleCounts[1] = get_clock_difference(cycleCounts[0]);
clear_dynamic_surfaces();
// Update spawners and objects with surfaces
// cycleCounts[2] = get_clock_difference(cycleCounts[0]);
update_terrain_objects();
// If Mario was touching a moving platform at the end of last frame, apply
@@ -640,24 +641,17 @@ void update_objects(UNUSED s32 unused) {
apply_mario_platform_displacement();
// Detect which objects are intersecting
// cycleCounts[3] = get_clock_difference(cycleCounts[0]);
detect_object_collisions();
// Update all other objects that haven't been updated yet
// cycleCounts[4] = get_clock_difference(cycleCounts[0]);
update_non_terrain_objects();
// Unload any objects that have been deactivated
// cycleCounts[5] = get_clock_difference(cycleCounts[0]);
unload_deactivated_objects();
// Check if Mario is on a platform object and save this object
// cycleCounts[6] = get_clock_difference(cycleCounts[0]);
update_mario_platform();
// cycleCounts[7] = get_clock_difference(cycleCounts[0]);
// cycleCounts[0] = 0;
try_print_debug_mario_object_info();
// If time stop was enabled this frame, activate it now so that it will
@@ -669,8 +663,6 @@ void update_objects(UNUSED s32 unused) {
}
gPrevFrameObjectCount = gObjectCounter;
#if PUPPYPRINT_DEBUG
profiler_update(behaviourTime, first);
behaviourTime[perfIteration] -= collisionTime[perfIteration] - colTime;
#endif
profiler_update(PROFILER_TIME_BEHAVIOR_AFTER_MARIO);
}

279
src/game/profiling.c Normal file
View File

@@ -0,0 +1,279 @@
#include <ultra64.h>
#include <PR/os_internal_reg.h>
#include "game_init.h"
#include "profiling.h"
#include "fasttext.h"
#ifdef USE_PROFILER
#define RDP_CYCLE_CONV(x) ((10 * (x)) / 625) // 62.5 million cycles per frame
typedef struct {
u32 counts[PROFILING_BUFFER_SIZE];
u32 total;
} ProfileTimeData;
ProfileTimeData all_profiling_data[PROFILER_TIME_COUNT];
int profile_buffer_index = -1;
int rsp_buffer_indices[PROFILER_RSP_COUNT];
// Holds either the start time if the task is running, or the amount of time the task has run for so far if yielded
u32 rsp_pending_times[PROFILER_RSP_COUNT];
u32 prev_start;
u32 start;
u32 prev_time;
u32 audio_start;
u32 audio_buffer_index;
u32 preempted_time;
static void buffer_update(ProfileTimeData* data, u32 new, int buffer_index) {
u32 old = data->counts[buffer_index];
data->total -= old;
data->total += new;
data->counts[buffer_index] = new;
}
void profiler_update(enum ProfilerTime which) {
u32 cur_time = osGetCount();
u32 diff;
ProfileTimeData* cur_data = &all_profiling_data[which];
diff = cur_time - prev_time;
u32 saved = __osDisableInt();
u32 cur_preempted_time = preempted_time;
preempted_time = 0;
__osRestoreInt(saved);
if (cur_preempted_time > 0) {
diff -= cur_preempted_time;
start += cur_preempted_time;
}
buffer_update(cur_data, diff, profile_buffer_index);
prev_time = cur_time;
}
void profiler_rsp_started(enum ProfilerRSPTime which) {
rsp_pending_times[which] = osGetCount();
}
void profiler_rsp_completed(enum ProfilerRSPTime which) {
ProfileTimeData* cur_data = &all_profiling_data[PROFILER_TIME_RSP_GFX + which];
int cur_index = rsp_buffer_indices[which];
u32 time = osGetCount() - rsp_pending_times[which];
rsp_pending_times[which] = 0;
buffer_update(cur_data, time, cur_index);
cur_index++;
if (cur_index >= PROFILING_BUFFER_SIZE) {
cur_index = 0;
}
rsp_buffer_indices[which] = cur_index;
}
void profiler_rsp_resumed() {
rsp_pending_times[PROFILER_RSP_GFX] = osGetCount() - rsp_pending_times[PROFILER_RSP_GFX];
}
// This ends up being the same math as resumed, so we just use resumed for both
// void profiler_rsp_yielded() {
// rsp_pending_times[PROFILER_RSP_GFX] = osGetCount() - rsp_pending_times[PROFILER_RSP_GFX];
// }
void profiler_audio_started() {
audio_start = osGetCount();
}
void profiler_audio_completed() {
ProfileTimeData* cur_data = &all_profiling_data[PROFILER_TIME_AUDIO];
u32 time = osGetCount() - audio_start;
u32 cur_index = audio_buffer_index;
preempted_time = time;
buffer_update(cur_data, time, cur_index);
cur_index++;
if (cur_index >= PROFILING_BUFFER_SIZE) {
cur_index = 0;
}
audio_buffer_index = cur_index;
}
static void update_fps_timer() {
u32 diff = start - prev_start;
buffer_update(&all_profiling_data[PROFILER_TIME_FPS], diff, profile_buffer_index);
prev_start = start;
}
static void update_total_timer() {
u32 saved = __osDisableInt();
u32 cur_preempted_time = preempted_time;
preempted_time = 0;
__osRestoreInt(saved);
prev_time = start + cur_preempted_time;
profiler_update(PROFILER_TIME_TOTAL);
}
static void update_rdp_timers() {
u32 tmem = IO_READ(DPC_TMEM_REG);
u32 cmd = IO_READ(DPC_BUFBUSY_REG);
u32 pipe = IO_READ(DPC_PIPEBUSY_REG);
if (gGlobalTimer > 5) {
IO_WRITE(DPC_STATUS_REG, (DPC_CLR_CLOCK_CTR | DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR));
}
buffer_update(&all_profiling_data[PROFILER_TIME_TMEM], tmem, profile_buffer_index);
buffer_update(&all_profiling_data[PROFILER_TIME_CMD], cmd, profile_buffer_index);
buffer_update(&all_profiling_data[PROFILER_TIME_PIPE], pipe, profile_buffer_index);
}
#ifdef PUPPYPRINT_DEBUG
extern u8 sPPDebugPage;
extern u8 fDebug;
#endif
float profiler_get_fps() {
return (1000000.0f * PROFILING_BUFFER_SIZE) / (OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_FPS].total));
}
u32 profiler_get_cpu_cycles() {
u32 cpu_normal_time = all_profiling_data[PROFILER_TIME_TOTAL].total / PROFILING_BUFFER_SIZE;
u32 cpu_audio_time = all_profiling_data[PROFILER_TIME_AUDIO].total / PROFILING_BUFFER_SIZE;
return cpu_normal_time + cpu_audio_time * 2;
}
u32 profiler_get_rsp_cycles() {
u32 rsp_graphics_time = all_profiling_data[PROFILER_TIME_RSP_GFX].total / PROFILING_BUFFER_SIZE;
u32 rsp_audio_time = all_profiling_data[PROFILER_TIME_RSP_AUDIO].total / PROFILING_BUFFER_SIZE;
return rsp_graphics_time + rsp_audio_time;
}
u32 profiler_get_rdp_cycles() {
u32 rdp_pipe_cycles = all_profiling_data[PROFILER_TIME_PIPE].total;
u32 rdp_tmem_cycles = all_profiling_data[PROFILER_TIME_TMEM].total;
u32 rdp_cmd_cycles = all_profiling_data[PROFILER_TIME_CMD].total;
u32 rdp_max_cycles = MAX(MAX(rdp_pipe_cycles, rdp_tmem_cycles), rdp_cmd_cycles);
return rdp_max_cycles / PROFILING_BUFFER_SIZE;
}
u32 profiler_get_cpu_microseconds() {
u32 cpu_normal_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_TOTAL].total / PROFILING_BUFFER_SIZE);
u32 cpu_audio_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_AUDIO].total / PROFILING_BUFFER_SIZE);
return cpu_normal_time + cpu_audio_time * 2;
}
u32 profiler_get_rsp_microseconds() {
u32 rsp_graphics_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_RSP_GFX].total / PROFILING_BUFFER_SIZE);
u32 rsp_audio_time = OS_CYCLES_TO_USEC(all_profiling_data[PROFILER_TIME_RSP_AUDIO].total / PROFILING_BUFFER_SIZE);
return rsp_graphics_time + rsp_audio_time;
}
u32 profiler_get_rdp_microseconds() {
u32 rdp_pipe_cycles = all_profiling_data[PROFILER_TIME_PIPE].total;
u32 rdp_tmem_cycles = all_profiling_data[PROFILER_TIME_TMEM].total;
u32 rdp_cmd_cycles = all_profiling_data[PROFILER_TIME_CMD].total;
u32 rdp_max_cycles = MAX(MAX(rdp_pipe_cycles, rdp_tmem_cycles), rdp_cmd_cycles);
return RDP_CYCLE_CONV(rdp_max_cycles / PROFILING_BUFFER_SIZE);
}
void profiler_print_times() {
u32 microseconds[PROFILER_TIME_COUNT];
char text_buffer[196];
update_fps_timer();
update_total_timer();
update_rdp_timers();
#ifndef PUPPYPRINT_DEBUG
static u8 show_profiler = 0;
if (gPlayer1Controller->buttonPressed & L_TRIG) {
show_profiler ^= 1;
}
#endif
#ifdef PUPPYPRINT_DEBUG
if (fDebug && sPPDebugPage == 0) {
#else
if (show_profiler) {
#endif
for (int i = 0; i < PROFILER_TIME_COUNT; i++) {
if (i < PROFILER_TIME_TMEM) {
microseconds[i] = OS_CYCLES_TO_USEC(all_profiling_data[i].total / PROFILING_BUFFER_SIZE);
} else {
microseconds[i] = RDP_CYCLE_CONV(all_profiling_data[i].total / PROFILING_BUFFER_SIZE);
}
}
// audio time is removed from the main thread profiling, so add it back here
u32 total_cpu = microseconds[PROFILER_TIME_TOTAL] + microseconds[PROFILER_TIME_AUDIO] * 2;
u32 total_rsp = microseconds[PROFILER_TIME_RSP_GFX] + microseconds[PROFILER_TIME_RSP_AUDIO];
u32 max_rdp = MAX(MAX(microseconds[PROFILER_TIME_TMEM], microseconds[PROFILER_TIME_CMD]), microseconds[PROFILER_TIME_PIPE]);
sprintf(text_buffer,
"FPS: %5.2f\n"
"\n"
"CPU\t\t%d (%d%%)\n"
" Input\t\t%d\n"
" Dynamic\t\t%d\n"
" Mario\t\t\t%d\n"
" Behavior\t\t%d\n"
" Graph\t\t%d\n"
" Audio\t\t\t%d\n"
"\n"
"RDP\t\t%d (%d%%)\n"
" Tmem\t\t\t%d\n"
" Cmd\t\t\t%d\n"
" Pipe\t\t\t%d\n"
"\n"
"RSP\t\t%d (%d%%)\n"
" Gfx\t\t\t%d\n"
" Audio\t\t\t%d\n",
1000000.0f / microseconds[PROFILER_TIME_FPS],
total_cpu, total_cpu / 333,
microseconds[PROFILER_TIME_CONTROLLERS],
microseconds[PROFILER_TIME_DYNAMIC],
microseconds[PROFILER_TIME_MARIO],
microseconds[PROFILER_TIME_BEHAVIOR_BEFORE_MARIO] + microseconds[PROFILER_TIME_BEHAVIOR_AFTER_MARIO],
microseconds[PROFILER_TIME_GFX],
microseconds[PROFILER_TIME_AUDIO] * 2, // audio is 60Hz, so double the average
max_rdp, max_rdp / 333,
microseconds[PROFILER_TIME_TMEM],
microseconds[PROFILER_TIME_CMD],
microseconds[PROFILER_TIME_PIPE],
total_rsp, total_rsp / 333,
microseconds[PROFILER_TIME_RSP_GFX],
microseconds[PROFILER_TIME_RSP_AUDIO]
);
Gfx* dlHead = gDisplayListHead;
gDPPipeSync(dlHead++);
gDPSetCycleType(dlHead++, G_CYC_1CYCLE);
gDPSetRenderMode(dlHead++, G_RM_TEX_EDGE, G_RM_TEX_EDGE2);
gDPSetTexturePersp(dlHead++, G_TP_NONE);
gDPSetTextureFilter(dlHead++, G_TF_POINT);
gDPSetTextureLUT(dlHead++, G_TT_NONE);
drawSmallStringCol(&dlHead, 10, 10, text_buffer, 255, 255, 255);
gDisplayListHead = dlHead;
}
}
void profiler_frame_setup() {
profile_buffer_index++;
preempted_time = 0;
if (profile_buffer_index >= PROFILING_BUFFER_SIZE) {
profile_buffer_index = 0;
}
prev_time = start = osGetCount();
}
#endif

61
src/game/profiling.h Normal file
View File

@@ -0,0 +1,61 @@
#ifndef __PROFILING_H__
#define __PROFILING_H__
#include <ultra64.h>
#include "macros.h"
#include "config/config_debug.h"
#include "config/config_safeguards.h"
#define PROFILING_BUFFER_SIZE 64
enum ProfilerTime {
PROFILER_TIME_FPS,
PROFILER_TIME_CONTROLLERS,
PROFILER_TIME_SPAWNER,
PROFILER_TIME_DYNAMIC,
PROFILER_TIME_BEHAVIOR_BEFORE_MARIO,
PROFILER_TIME_MARIO,
PROFILER_TIME_BEHAVIOR_AFTER_MARIO,
PROFILER_TIME_GFX,
PROFILER_TIME_AUDIO,
PROFILER_TIME_TOTAL,
PROFILER_TIME_RSP_GFX,
PROFILER_TIME_RSP_AUDIO,
PROFILER_TIME_TMEM,
PROFILER_TIME_PIPE,
PROFILER_TIME_CMD,
PROFILER_TIME_COUNT,
};
enum ProfilerRSPTime {
PROFILER_RSP_GFX,
PROFILER_RSP_AUDIO,
PROFILER_RSP_COUNT
};
#ifdef USE_PROFILER
void profiler_update(enum ProfilerTime which);
void profiler_print_times();
void profiler_frame_setup();
void profiler_rsp_started(enum ProfilerRSPTime which);
void profiler_rsp_completed(enum ProfilerRSPTime which);
void profiler_rsp_resumed();
void profiler_audio_started();
void profiler_audio_completed();
// See profiling.c to see why profiler_rsp_yielded isn't its own function
static ALWAYS_INLINE void profiler_rsp_yielded() {
profiler_rsp_resumed();
}
#else
#define profiler_update(which)
#define profiler_print_times()
#define profiler_frame_setup()
#define profiler_rsp_started(which)
#define profiler_rsp_completed(which)
#define profiler_rsp_resumed()
#define profiler_audio_started()
#define profiler_audio_completed()
#define profiler_rsp_yielded()
#endif
#endif

View File

@@ -68,34 +68,9 @@ s32 benchmarkTimer = 0;
s32 benchmarkProgramTimer = 0;
s8 benchmarkType = 0;
// General
u32 cpuTime = 0;
u32 rspTime = 0;
u32 rdpTime = 0;
u32 ramTime = 0;
u32 loadTime = 0;
u32 gLastOSTime = 0;
u32 rspDelta = 0;
s32 benchMark[NUM_BENCH_ITERATIONS + 2];
// CPU
u32 collisionTime[NUM_PERF_ITERATIONS + 1];
u32 behaviourTime[NUM_PERF_ITERATIONS + 1];
u32 scriptTime[NUM_PERF_ITERATIONS + 1];
u32 graphTime[NUM_PERF_ITERATIONS + 1];
u32 audioTime[NUM_PERF_ITERATIONS + 1];
u32 dmaTime[NUM_PERF_ITERATIONS + 1];
u32 dmaAudioTime[NUM_PERF_ITERATIONS + 1];
u32 faultTime[NUM_PERF_ITERATIONS + 1];
u32 taskTime[NUM_PERF_ITERATIONS + 1];
u32 profilerTime[NUM_PERF_ITERATIONS + 1];
u32 profilerTime2[NUM_PERF_ITERATIONS + 1];
u32 cameraTime[NUM_PERF_ITERATIONS + 1];
// RSP
u32 audioTime[NUM_PERF_ITERATIONS + 1];
u32 rspGenTime[NUM_PERF_ITERATIONS + 1];
// RDP
u32 bufferTime[NUM_PERF_ITERATIONS + 1];
u32 tmemTime[NUM_PERF_ITERATIONS + 1];
u32 busTime[NUM_PERF_ITERATIONS + 1];
// RAM
s8 ramViewer = FALSE;
s32 ramsizeSegment[NUM_TLB_SEGMENTS + 1] = {
@@ -306,6 +281,7 @@ void print_ram_overview(void) {
sprintf(textBytes, "RAM: %06X/%06X (%d_)", main_pool_available(), mempool, (s32)(((f32)main_pool_available() / (f32)mempool) * 100));
print_small_text(SCREEN_CENTER_X, (SCREEN_HEIGHT - 16), textBytes, PRINT_TEXT_ALIGN_CENTRE, PRINT_ALL, FONT_OUTLINE);
puppyprint_calculate_ram_usage();
print_ram_bar();
}
@@ -477,12 +453,6 @@ extern s16 gVisualSurfaceCount;
void puppyprint_render_collision(void) {
char textBytes[200];
#ifdef PUPPYPRINT_DEBUG_CYCLES
sprintf(textBytes, "Collision:<COL_99505099> %dc", collisionTime[NUM_PERF_ITERATIONS]);
#else
sprintf(textBytes, "Collision:<COL_99505099> %dus", collisionTime[NUM_PERF_ITERATIONS]);
#endif
print_small_text(304, 48, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, 1);
sprintf(textBytes, "Pool Size: %X#Node Size: %X#Surfaces Allocated: %d#Nodes Allocated: %d#Current Cell: %d", (SURFACE_NODE_POOL_SIZE * sizeof(struct SurfaceNode)), (SURFACE_POOL_SIZE * sizeof(struct Surface)),
gSurfacesAllocated, gSurfaceNodesAllocated, gVisualSurfaceCount);
@@ -512,55 +482,47 @@ void puppyprint_render_collision(void) {
#endif
}
struct CPUBar {
u32 *time;
ColorRGB colour;
const char str[32];
};
extern void print_fps(s32 x, s32 y);
struct CPUBar cpu_ordering_table[] = {
{ collisionTime, COLOR_RGB_RED, { "Collision: <COL_99505099>" }},
{ graphTime, COLOR_RGB_BLUE, { "Graph: <COL_50509999>" }},
{ behaviourTime, COLOR_RGB_GREEN, { "Behaviour: <COL_50995099>" }},
{ audioTime, COLOR_RGB_YELLOW, { "Audio: <COL_99995099>" }},
{ cameraTime, COLOR_RGB_CYAN, { "Camera: <COL_50999999>" }},
{ dmaTime, COLOR_RGB_MAGENTA, { "DMA: <COL_99509999>" }},
};
u32 profiler_get_cpu_cycles();
u32 profiler_get_rsp_cycles();
u32 profiler_get_rdp_cycles();
#define CPU_TABLE_MAX (sizeof(cpu_ordering_table) / sizeof(struct CPUBar))
#define ADDTIMES MAX(((collisionTime[MX] + graphTime[MX] + behaviourTime[MX] + audioTime[MX] + cameraTime[MX] + dmaTime[MX]) / 80), 1)
u32 profiler_get_cpu_microseconds();
u32 profiler_get_rsp_microseconds();
u32 profiler_get_rdp_microseconds();
void print_basic_profiling(void) {
u32 cpuTime;
u32 rspTime;
u32 rdpTime;
char textBytes[90];
print_fps(16, 40);
print_fps(16, 16);
#ifdef PUPPYPRINT_DEBUG_CYCLES
cpuTime = profiler_get_cpu_cycles();
rspTime = profiler_get_rsp_cycles();
rdpTime = profiler_get_rdp_cycles();
sprintf(textBytes, "CPU: %dc (%d_)#RSP: %dc (%d_)#RDP: %dc (%d_)",
cpuTime, (cpuTime / 15625),
rspTime, (rspTime / 15625),
rdpTime, (rdpTime / 15625));
#else
cpuTime = profiler_get_cpu_microseconds();
rspTime = profiler_get_rsp_microseconds();
rdpTime = profiler_get_rdp_microseconds();
sprintf(textBytes, "CPU: %dus (%d_)#RSP: %dus (%d_)#RDP: %dus (%d_)",
cpuTime, (cpuTime / 333),
rspTime, (rspTime / 333),
rdpTime, (rdpTime / 333));
#endif
print_small_text(16, 52, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE);
print_small_text(16, 28, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE);
}
void puppyprint_render_standard(void) {
s32 perfPercentage[CPU_TABLE_MAX];
s32 graphPos;
s32 prevGraph;
u32 i;
s32 viewedNums;
char textBytes[80];
print_basic_profiling();
sprintf(textBytes, "OBJ: %d/%d", gObjectCounter, OBJECT_POOL_CAPACITY);
print_small_text(16, 124, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE);
print_small_text((SCREEN_WIDTH - 16), 16, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, FONT_OUTLINE);
#ifndef ENABLE_CREDITS_BENCHMARK
// Very little point printing useless info if Mario doesn't even exist.
@@ -571,7 +533,7 @@ void puppyprint_render_standard(void) {
(s32)(gMarioState->pos[2]),
(u16)(gMarioState->faceAngle[1]),
(u32)(gMarioState->action & ACT_ID_MASK));
print_small_text(16, 140, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL, FONT_OUTLINE);
print_small_text((SCREEN_WIDTH - 16), 32, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, FONT_OUTLINE);
}
// Same for the camera, especially so because this will crash otherwise.
if (gCamera) {
@@ -584,66 +546,51 @@ void puppyprint_render_standard(void) {
}
#endif
// Just to keep screen estate a little friendlier.
#define MX NUM_PERF_ITERATIONS
for (i = 0; i < CPU_TABLE_MAX; i++) {
perfPercentage[i] = MAX((cpu_ordering_table[i].time[MX] / ADDTIMES), 0);
}
#undef ADDTIMES
#undef MX
// Old profiler bar code, can be restored one day if desired to work with new profiler
// s32 graphPos;
// s32 prevGraph;
// u32 i;
// s32 viewedNums;
viewedNums = 0;
for (i = 0; i < CPU_TABLE_MAX; i++) {
s32 num = cpu_ordering_table[i].time[NUM_PERF_ITERATIONS];
if (num != 0) {
#ifdef PUPPYPRINT_DEBUG_CYCLES
sprintf(textBytes, "%s%dc", cpu_ordering_table[i].str, num);
#else
sprintf(textBytes, "%s%dus", cpu_ordering_table[i].str, num);
#endif
print_small_text((SCREEN_WIDTH - 16), (40 + (viewedNums * 12)), textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL, FONT_OUTLINE);
viewedNums++;
}
}
// prepare_blank_box();
// viewedNums = 0;
s32 barY = (28 + (viewedNums * 12)) + 16;
prepare_blank_box();
viewedNums = 0;
// // Render CPU breakdown bar.
// for (i = 0; i < CPU_TABLE_MAX; i++) {
// if (perfPercentage[i] == 0
// && (i != CPU_TABLE_MAX - 1)) {
// continue;
// }
// Render CPU breakdown bar.
for (i = 0; i < CPU_TABLE_MAX; i++) {
if (perfPercentage[i] == 0
&& (i != CPU_TABLE_MAX - 1)) {
continue;
}
// if (viewedNums == 0) {
// graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]);
// render_blank_box((SCREEN_WIDTH - 96), barY, graphPos, (barY + 8),
// cpu_ordering_table[i].colour[0],
// cpu_ordering_table[i].colour[1],
// cpu_ordering_table[i].colour[2], 255);
// } else if (i == (CPU_TABLE_MAX - 1)) {
// graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]);
// render_blank_box(prevGraph, barY, (SCREEN_WIDTH - 16), (barY + 8),
// cpu_ordering_table[i].colour[0],
// cpu_ordering_table[i].colour[1],
// cpu_ordering_table[i].colour[2], 255);
// } else {
// graphPos += perfPercentage[i];
// render_blank_box(prevGraph, barY, graphPos, (barY + 8),
// cpu_ordering_table[i].colour[0],
// cpu_ordering_table[i].colour[1],
// cpu_ordering_table[i].colour[2], 255);
// }
if (viewedNums == 0) {
graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]);
render_blank_box((SCREEN_WIDTH - 96), barY, graphPos, (barY + 8),
cpu_ordering_table[i].colour[0],
cpu_ordering_table[i].colour[1],
cpu_ordering_table[i].colour[2], 255);
} else if (i == (CPU_TABLE_MAX - 1)) {
graphPos = ((SCREEN_WIDTH - 96) + perfPercentage[i]);
render_blank_box(prevGraph, barY, (SCREEN_WIDTH - 16), (barY + 8),
cpu_ordering_table[i].colour[0],
cpu_ordering_table[i].colour[1],
cpu_ordering_table[i].colour[2], 255);
} else {
graphPos += perfPercentage[i];
render_blank_box(prevGraph, barY, graphPos, (barY + 8),
cpu_ordering_table[i].colour[0],
cpu_ordering_table[i].colour[1],
cpu_ordering_table[i].colour[2], 255);
}
// viewedNums++;
// prevGraph = graphPos;
// }
viewedNums++;
prevGraph = graphPos;
}
finish_blank_box();
// finish_blank_box();
}
void puppyprint_render_minimal(void) {
print_basic_profiling();
}
@@ -686,12 +633,10 @@ void render_page_menu(void) {
}
void puppyprint_render_profiler(void) {
OSTime first = osGetTime();
print_set_envcolour(255, 255, 255, 255);
if (!fDebug) {
profiler_update(profilerTime, first);
return;
}
@@ -700,39 +645,9 @@ void puppyprint_render_profiler(void) {
if (sDebugMenu) {
render_page_menu();
}
profiler_update(profilerTime, first);
}
void profiler_update(u32 *time, OSTime time2) {
time[perfIteration] = (osGetTime() - time2);
}
void get_average_perf_time(u32 *time, s32 is_rdp) {
// This takes all but the last index of the timer array, and creates an average value, which is written to the last index.
s32 i = 0;
s32 total = 0;
for (i = 0; i < NUM_PERF_ITERATIONS; i++) {
total += time[i];
}
total /= NUM_PERF_ITERATIONS;
total = MAX(total, 0);
if (is_rdp)
{
time[NUM_PERF_ITERATIONS] = RDP_CYCLE_CONV(total);
}
else
{
time[NUM_PERF_ITERATIONS] = CYCLE_CONV(total);
}
}
void puppyprint_profiler_process(void) {
bufferTime[perfIteration] = (IO_READ(DPC_BUFBUSY_REG));
tmemTime[perfIteration] = (IO_READ(DPC_TMEM_REG));
busTime[perfIteration] = (IO_READ(DPC_PIPEBUSY_REG));
OSTime newTime = osGetTime();
if (fDebug && (gPlayer1Controller->buttonPressed & L_TRIG)) {
sDebugMenu ^= TRUE;
if (sDebugMenu == FALSE) {
@@ -760,54 +675,6 @@ void puppyprint_profiler_process(void) {
sDebugOption = 0;
}
}
if (!(gGlobalTimer % NUM_PERF_ITERATIONS)) {
get_average_perf_time( scriptTime, FALSE);
get_average_perf_time( behaviourTime, FALSE);
get_average_perf_time( collisionTime, FALSE);
get_average_perf_time( graphTime, FALSE);
get_average_perf_time( audioTime, FALSE);
get_average_perf_time( dmaTime, FALSE);
get_average_perf_time( dmaAudioTime, FALSE);
get_average_perf_time( faultTime, FALSE);
get_average_perf_time( taskTime, FALSE);
get_average_perf_time( profilerTime, FALSE);
get_average_perf_time( profilerTime2, FALSE);
get_average_perf_time( cameraTime, FALSE);
// Performed twice a frame without fail, so doubled to have a more representative value.
audioTime[NUM_PERF_ITERATIONS] *= 2;
dmaAudioTime[NUM_PERF_ITERATIONS] *= 2;
dmaTime[NUM_PERF_ITERATIONS] += dmaAudioTime[NUM_PERF_ITERATIONS];
get_average_perf_time(rspGenTime, FALSE);
get_average_perf_time(bufferTime, TRUE);
get_average_perf_time( tmemTime, TRUE);
get_average_perf_time( busTime, TRUE);
rdpTime = bufferTime[NUM_PERF_ITERATIONS];
rdpTime = MAX(rdpTime, tmemTime[NUM_PERF_ITERATIONS]);
rdpTime = MAX(rdpTime, busTime[NUM_PERF_ITERATIONS]);
#if BBPLAYER == 1 // iQue RDP registers need to be halved to be correct.
rdpTime /= 2;
#endif
cpuTime = (scriptTime[NUM_PERF_ITERATIONS]
+ taskTime[NUM_PERF_ITERATIONS]
+ faultTime[NUM_PERF_ITERATIONS]
+ audioTime[NUM_PERF_ITERATIONS]);
rspTime = rspGenTime[NUM_PERF_ITERATIONS];
puppyprint_calculate_ram_usage();
}
gLastOSTime = newTime;
if (gGlobalTimer > 5) {
IO_WRITE(DPC_STATUS_REG, (DPC_CLR_CLOCK_CTR | DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR));
}
if (perfIteration++ == (NUM_PERF_ITERATIONS - 1)) {
perfIteration = 0;
}
profiler_update(profilerTime2, newTime);
}
#endif

Some files were not shown because too many files have changed in this diff Show More