diff --git a/bin/segment2.c b/bin/segment2.c index 0e0fdbd9..d46b1bde 100644 --- a/bin/segment2.c +++ b/bin/segment2.c @@ -9,6 +9,11 @@ #include "make_const_nonconst.h" // SM64 (US/JP/EU/SH) Segment 02 +#ifdef PUPPYPRINT +ALIGNED8 const Texture small_font[] = { +#include "textures/segment2/custom_text.i4.inc.c" +}; +#endif ALIGNED8 static const Texture texture_hud_char_0[] = { #include "textures/segment2/segment2.00000.rgba16.inc.c" diff --git a/include/config.h b/include/config.h index 2ca41605..9d6e78ba 100644 --- a/include/config.h +++ b/include/config.h @@ -111,6 +111,8 @@ #define ALL_SURFACES_HAVE_FORCE // Custom debug mode. Press DPAD left to show the debug UI. Press DPAD right to enter the noclip mode. //#define CUSTOM_DEBUG +// Include Puppyprint, a display library for text and large images. Also includes a custom, enhanced performance profiler. +#define PUPPYPRINT // BUG/GAME QOL FIXES // Fix instant warp offset not working when warping across different areas diff --git a/src/audio/heap.c b/src/audio/heap.c index d7580eed..dcfb849c 100644 --- a/src/audio/heap.c +++ b/src/audio/heap.c @@ -7,6 +7,7 @@ #include "seqplayer.h" #include "effects.h" #include "game/game_init.h" +#include "game/puppyprint.h" #define ALIGN16(val) (((val) + 0xF) & ~0xF) @@ -339,6 +340,10 @@ extern s32 D_SH_80315EE8; void sound_init_main_pools(s32 sizeForAudioInitPool) { sound_alloc_pool_init(&gAudioInitPool, gAudioHeap, sizeForAudioInitPool); sound_alloc_pool_init(&gAudioSessionPool, gAudioHeap + sizeForAudioInitPool, gAudioHeapSize - sizeForAudioInitPool); + #ifdef PUPPYPRINT + audioPool[0] = sizeForAudioInitPool; + audioPool[1] = gAudioHeapSize - sizeForAudioInitPool; + #endif } #ifdef VERSION_SH @@ -351,20 +356,32 @@ void session_pools_init(struct PoolSplit *a) { gAudioSessionPool.cur = gAudioSessionPool.start; sound_alloc_pool_init(&gNotesAndBuffersPool, SOUND_ALLOC_FUNC(&gAudioSessionPool, a->wantSeq), a->wantSeq); sound_alloc_pool_init(&gSeqAndBankPool, SOUND_ALLOC_FUNC(&gAudioSessionPool, a->wantCustom), a->wantCustom); + #ifdef PUPPYPRINT + audioPool[2] = a->wantSeq; + audioPool[3] = a->wantCustom; + #endif } void seq_and_bank_pool_init(struct PoolSplit2 *a) { gSeqAndBankPool.cur = gSeqAndBankPool.start; sound_alloc_pool_init(&gPersistentCommonPool, SOUND_ALLOC_FUNC(&gSeqAndBankPool, a->wantPersistent), a->wantPersistent); sound_alloc_pool_init(&gTemporaryCommonPool, SOUND_ALLOC_FUNC(&gSeqAndBankPool, a->wantTemporary), a->wantTemporary); + #ifdef PUPPYPRINT + audioPool[4] = a->wantPersistent; + audioPool[5] = a->wantTemporary; + #endif } void persistent_pools_init(struct PoolSplit *a) { gPersistentCommonPool.cur = gPersistentCommonPool.start; sound_alloc_pool_init(&gSeqLoadedPool.persistent.pool, SOUND_ALLOC_FUNC(&gPersistentCommonPool, a->wantSeq), a->wantSeq); sound_alloc_pool_init(&gBankLoadedPool.persistent.pool, SOUND_ALLOC_FUNC(&gPersistentCommonPool, a->wantBank), a->wantBank); - sound_alloc_pool_init(&gUnusedLoadedPool.persistent.pool, SOUND_ALLOC_FUNC(&gPersistentCommonPool, a->wantUnused), - a->wantUnused); + sound_alloc_pool_init(&gUnusedLoadedPool.persistent.pool, SOUND_ALLOC_FUNC(&gPersistentCommonPool, a->wantUnused), a->wantUnused); + #ifdef PUPPYPRINT + audioPool[6] = a->wantSeq; + audioPool[7] = a->wantBank; + audioPool[8] = a->wantUnused; + #endif persistent_pool_clear(&gSeqLoadedPool.persistent); persistent_pool_clear(&gBankLoadedPool.persistent); persistent_pool_clear(&gUnusedLoadedPool.persistent); @@ -374,8 +391,12 @@ void temporary_pools_init(struct PoolSplit *a) { gTemporaryCommonPool.cur = gTemporaryCommonPool.start; sound_alloc_pool_init(&gSeqLoadedPool.temporary.pool, SOUND_ALLOC_FUNC(&gTemporaryCommonPool, a->wantSeq), a->wantSeq); sound_alloc_pool_init(&gBankLoadedPool.temporary.pool, SOUND_ALLOC_FUNC(&gTemporaryCommonPool, a->wantBank), a->wantBank); - sound_alloc_pool_init(&gUnusedLoadedPool.temporary.pool, SOUND_ALLOC_FUNC(&gTemporaryCommonPool, a->wantUnused), - a->wantUnused); + sound_alloc_pool_init(&gUnusedLoadedPool.temporary.pool, SOUND_ALLOC_FUNC(&gTemporaryCommonPool, a->wantUnused), a->wantUnused); + #ifdef PUPPYPRINT + audioPool[9] = a->wantSeq; + audioPool[10] = a->wantBank; + audioPool[11] = a->wantUnused; + #endif temporary_pool_clear(&gSeqLoadedPool.temporary); temporary_pool_clear(&gBankLoadedPool.temporary); temporary_pool_clear(&gUnusedLoadedPool.temporary); diff --git a/src/audio/load.c b/src/audio/load.c index 6e3e3ffb..d7d7b50a 100644 --- a/src/audio/load.c +++ b/src/audio/load.c @@ -7,6 +7,7 @@ #include "heap.h" #include "load.h" #include "seqplayer.h" +#include "game/puppyprint.h" #define ALIGN16(val) (((val) + 0xF) & ~0xF) @@ -141,8 +142,14 @@ u8 audioString49[] = "BANK LOAD MISS! FOR %d\n"; * Performs an asynchronus (normal priority) DMA copy */ void audio_dma_copy_async(uintptr_t devAddr, void *vAddr, size_t nbytes, OSMesgQueue *queue, OSIoMesg *mesg) { + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif osInvalDCache(vAddr, nbytes); osPiStartDma(mesg, OS_MESG_PRI_NORMAL, OS_READ, devAddr, vAddr, nbytes, queue); + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif } /** @@ -150,6 +157,9 @@ void audio_dma_copy_async(uintptr_t devAddr, void *vAddr, size_t nbytes, OSMesgQ * to 0x1000 bytes transfer at once. */ void audio_dma_partial_copy_async(uintptr_t *devAddr, u8 **vAddr, ssize_t *remaining, OSMesgQueue *queue, OSIoMesg *mesg) { + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif #if defined(VERSION_EU) ssize_t transfer = (*remaining >= 0x1000 ? 0x1000 : *remaining); #else @@ -160,6 +170,9 @@ void audio_dma_partial_copy_async(uintptr_t *devAddr, u8 **vAddr, ssize_t *remai osPiStartDma(mesg, OS_MESG_PRI_NORMAL, OS_READ, *devAddr, *vAddr, transfer, queue); *devAddr += transfer; *vAddr += transfer; + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif } void decrease_sample_dma_ttls() { @@ -207,6 +220,9 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { u32 dmaIndex; ssize_t bufferPos; UNUSED u32 pad; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif if (arg2 != 0 || *dmaIndexRef >= sSampleDmaListSize1) { for (i = sSampleDmaListSize1; i < gSampleDmaNumListItems; i++) { @@ -232,8 +248,14 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { dma->ttl = 60; *dmaIndexRef = (u8) i; #if defined(VERSION_EU) + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif return &dma->buffer[(devAddr - dma->source)]; #else + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif return (devAddr - dma->source) + dma->buffer; #endif } @@ -274,8 +296,14 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { } dma->ttl = 2; #if defined(VERSION_EU) + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif return dma->buffer + (devAddr - dma->source); #else + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif return (devAddr - dma->source) + dma->buffer; #endif } @@ -301,12 +329,18 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) { osPiStartDma(&gCurrAudioFrameDmaIoMesgBufs[gCurrAudioFrameDmaCount++], OS_MESG_PRI_NORMAL, OS_READ, dmaDevAddr, dma->buffer, transfer, &gCurrAudioFrameDmaQueue); *dmaIndexRef = dmaIndex; + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif return (devAddr - dmaDevAddr) + dma->buffer; #else gCurrAudioFrameDmaCount++; osPiStartDma(&gCurrAudioFrameDmaIoMesgBufs[gCurrAudioFrameDmaCount - 1], OS_MESG_PRI_NORMAL, OS_READ, dmaDevAddr, dma->buffer, transfer, &gCurrAudioFrameDmaQueue); *dmaIndexRef = dmaIndex; + #ifdef PUPPYPRINT + dmaAudioTime[perfIteration] += osGetTime()-first; + #endif return dma->buffer + (devAddr - dmaDevAddr); #endif } diff --git a/src/engine/math_util.h b/src/engine/math_util.h index 2938061c..9c10baf4 100644 --- a/src/engine/math_util.h +++ b/src/engine/math_util.h @@ -31,6 +31,7 @@ extern f32 gCosineTable[]; #define min(a, b) ((a) <= (b) ? (a) : (b)) #define max(a, b) ((a) > (b) ? (a) : (b)) +#define CLAMP(x, low, high) (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) #define sqr(x) ((x) * (x)) diff --git a/src/engine/surface_collision.c b/src/engine/surface_collision.c index ece157b1..03482770 100644 --- a/src/engine/surface_collision.c +++ b/src/engine/surface_collision.c @@ -7,6 +7,7 @@ #include "game/object_list_processor.h" #include "surface_collision.h" #include "surface_load.h" +#include "game/puppyprint.h" /************************************************** * WALLS * @@ -187,6 +188,9 @@ s32 find_wall_collisions(struct WallCollisionData *colData) { s32 numCollisions = 0; s16 x = colData->x; s16 z = colData->z; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif colData->numWalls = 0; @@ -213,6 +217,10 @@ s32 find_wall_collisions(struct WallCollisionData *colData) { // Increment the debug tracker. gNumCalls.wall += 1; + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif + return numCollisions; } @@ -299,6 +307,9 @@ f32 find_ceil(f32 posX, f32 posY, f32 posZ, struct Surface **pceil) { f32 height = CELL_HEIGHT_LIMIT; f32 dynamicHeight = CELL_HEIGHT_LIMIT; s16 x, y, z; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif //! (Parallel Universes) Because position is casted to an s16, reaching higher // float locations can return ceilings despite them not existing there. @@ -337,6 +348,10 @@ f32 find_ceil(f32 posX, f32 posY, f32 posZ, struct Surface **pceil) { // Increment the debug tracker. gNumCalls.ceil += 1; + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif + return height; } @@ -562,6 +577,9 @@ f32 unused_find_dynamic_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfl */ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) { s16 cellZ, cellX; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif struct Surface *floor, *dynamicFloor; struct SurfaceNode *surfaceList; @@ -579,9 +597,15 @@ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) { *pfloor = NULL; if (x <= -LEVEL_BOUNDARY_MAX || x >= LEVEL_BOUNDARY_MAX) { + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif return height; } if (z <= -LEVEL_BOUNDARY_MAX || z >= LEVEL_BOUNDARY_MAX) { + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif return height; } @@ -628,6 +652,10 @@ f32 find_floor(f32 xPos, f32 yPos, f32 zPos, struct Surface **pfloor) { // Increment the debug tracker. gNumCalls.floor += 1; + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif + return height; } @@ -685,6 +713,9 @@ f32 find_water_level_and_floor(f32 x, f32 z, struct Surface **pfloor) { f32 waterLevel = FLOOR_LOWER_LIMIT; s16 *p = gEnvironmentRegions; struct Surface *floor = NULL; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif if (gCheckingSurfaceCollisionsForCamera) { waterLevel = find_water_floor(x, gLakituState.pos[1], z, &floor); @@ -715,6 +746,10 @@ f32 find_water_level_and_floor(f32 x, f32 z, struct Surface **pfloor) { *pfloor = floor; } + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif + return waterLevel; } @@ -729,6 +764,9 @@ f32 find_water_level(f32 x, f32 z) { f32 waterLevel = FLOOR_LOWER_LIMIT; s16 *p = gEnvironmentRegions; struct Surface *floor; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif if (gCheckingSurfaceCollisionsForCamera) { waterLevel = find_water_floor(x, gLakituState.pos[1], z, &floor); @@ -757,6 +795,10 @@ f32 find_water_level(f32 x, f32 z) { } } + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif + return waterLevel; } @@ -771,6 +813,9 @@ f32 find_poison_gas_level(f32 x, f32 z) { f32 loX, hiX, loZ, hiZ; f32 gasLevel = FLOOR_LOWER_LIMIT; s16 *p = gEnvironmentRegions; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif if (p != NULL) { numRegions = *p++; @@ -797,6 +842,10 @@ f32 find_poison_gas_level(f32 x, f32 z) { } } + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif + return gasLevel; } diff --git a/src/engine/surface_load.c b/src/engine/surface_load.c index 5072c02e..d889ea95 100644 --- a/src/engine/surface_load.c +++ b/src/engine/surface_load.c @@ -14,6 +14,7 @@ #include "game/mario.h" #include "game/object_list_processor.h" #include "surface_load.h" +#include "game/puppyprint.h" #include "config.h" @@ -617,6 +618,9 @@ void load_area_terrain(s16 index, s16 *data, s8 *surfaceRooms, s16 *macroObjects s16 terrainLoadType; s16 *vertexData = NULL; UNUSED s32 unused; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif // Initialize the data for this. gEnvironmentRegions = NULL; @@ -665,6 +669,9 @@ void load_area_terrain(s16 index, s16 *data, s8 *surfaceRooms, s16 *macroObjects gNumStaticSurfaceNodes = gSurfaceNodesAllocated; gNumStaticSurfaces = gSurfacesAllocated; + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif } /** @@ -796,6 +803,9 @@ void load_object_surfaces(s16 **data, s16 *vertexData) { void load_object_collision_model(void) { UNUSED s32 unused; s16 vertexData[600]; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif s16 *collisionData = gCurrentObject->collisionData; f32 marioDist = gCurrentObject->oDistanceToMario; @@ -830,4 +840,7 @@ void load_object_collision_model(void) { } else { gCurrentObject->header.gfx.node.flags &= ~GRAPH_RENDER_ACTIVE; } + #ifdef PUPPYPRINT + collisionTime[perfIteration] += osGetTime()-first; + #endif } diff --git a/src/game/area.c b/src/game/area.c index 7de856a2..75cb23fb 100644 --- a/src/game/area.c +++ b/src/game/area.c @@ -23,6 +23,7 @@ #include "save_file.h" #include "level_table.h" #include "dialog_ids.h" +#include "puppyprint.h" struct SpawnInfo gPlayerSpawnInfos[1]; struct GraphNode *gGraphNodePointers[MODEL_ID_COUNT]; @@ -413,6 +414,11 @@ void render_game(void) { } } + + #ifdef PUPPYPRINT + puppyprint_render_profiler(); + #endif + D_8032CE74 = NULL; D_8032CE78 = NULL; } diff --git a/src/game/game_init.c b/src/game/game_init.c index 504d5569..c608be8c 100644 --- a/src/game/game_init.c +++ b/src/game/game_init.c @@ -29,6 +29,7 @@ #ifdef SRAM #include "sram.h" #endif +#include "puppyprint.h" #include // First 3 controller slots @@ -421,6 +422,7 @@ void display_and_vsync(void) { gBorderHeight = BORDER_HEIGHT_CONSOLE; } profiler_log_thread5_time(BEFORE_DISPLAY_LISTS); + //gIsConsole = (IO_READ(DPC_PIPEBUSY_REG)); osRecvMesg(&gGfxVblankQueue, &gMainReceivedMesg, OS_MESG_BLOCK); if (gGoddardVblankCallback != NULL) { gGoddardVblankCallback(); @@ -709,6 +711,9 @@ void setup_game_memory(void) { */ void thread5_game_loop(UNUSED void *arg) { struct LevelCommand *addr; + #ifdef PUPPYPRINT + OSTime lastTime = 0; + #endif setup_game_memory(); #if ENABLE_RUMBLE @@ -742,6 +747,14 @@ void thread5_game_loop(UNUSED void *arg) { continue; } profiler_log_thread5_time(THREAD5_START); + #ifdef PUPPYPRINT + while (TRUE) + { + lastTime = osGetTime(); + collisionTime[perfIteration] = 0; + behaviourTime[perfIteration] = 0; + dmaTime[perfIteration] = 0; + #endif // If any controllers are plugged in, start read the data for when // read_controller_inputs is called later. @@ -756,6 +769,23 @@ void thread5_game_loop(UNUSED void *arg) { select_gfx_pool(); read_controller_inputs(); addr = level_script_execute(addr); + #ifdef PUPPYPRINT + profiler_update(scriptTime, lastTime); + if (benchmarkLoop > 0 && benchOption == 0) + { + benchmarkLoop--; + benchMark[benchmarkLoop] = osGetTime() - lastTime; + if (benchmarkLoop == 0) + { + puppyprint_profiler_finished(); + break; + } + } + else + break; + } + puppyprint_profiler_process(); + #endif display_and_vsync(); diff --git a/src/game/hud.c b/src/game/hud.c index cfbd0f0f..80406395 100644 --- a/src/game/hud.c +++ b/src/game/hud.c @@ -14,6 +14,7 @@ #include "save_file.h" #include "print.h" #include "engine/surface_load.h" +#include "puppyprint.h" #include "config.h" @@ -53,8 +54,12 @@ void print_fps(s32 x, s32 y) char text[14]; sprintf(text, "FPS %2.2f", fps); - + #ifdef PUPPYPRINT + print_small_text(x, y, text, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL); + #else print_text(x, y, text); + #endif + } // ------------ END OF FPS COUNER ----------------- @@ -540,5 +545,8 @@ void render_hud(void) { render_debug_mode(); } #endif + #ifdef PUPPYPRINT + print_set_envcolour(255,255,255,255); + #endif } } diff --git a/src/game/main.c b/src/game/main.c index f2e25f43..5988a777 100644 --- a/src/game/main.c +++ b/src/game/main.c @@ -18,6 +18,7 @@ #include "usb/usb.h" #include "usb/debug.h" #endif +#include "puppyprint.h" // Message IDs #define MESG_SP_COMPLETE 100 @@ -188,6 +189,9 @@ void start_gfx_sptask(void) { if (gActiveSPTask == NULL && sCurrentDisplaySPTask != NULL && sCurrentDisplaySPTask->state == SPTASK_STATE_NOT_STARTED) { profiler_log_gfx_time(TASKS_QUEUED); + #ifdef PUPPYPRINT + rspDelta = osGetTime(); + #endif start_sptask(M_GFXTASK); } } @@ -233,6 +237,9 @@ void handle_vblank(void) { if (gActiveSPTask == NULL && sCurrentDisplaySPTask != NULL && sCurrentDisplaySPTask->state != SPTASK_STATE_FINISHED) { profiler_log_gfx_time(TASKS_QUEUED); + #ifdef PUPPYPRINT + rspDelta = osGetTime(); + #endif start_sptask(M_GFXTASK); } } @@ -265,6 +272,9 @@ void handle_sp_complete(void) { // The gfx task completed before we had time to interrupt it. // Mark it finished, just like below. curSPTask->state = SPTASK_STATE_FINISHED; + #ifdef PUPPYPRINT + profiler_update(rspGenTime, rspDelta); + #endif profiler_log_gfx_time(RSP_COMPLETE); } @@ -295,6 +305,9 @@ void handle_sp_complete(void) { // The SP process is done, but there is still a Display Processor notification // that needs to arrive before we can consider the task completely finished and // null out sCurrentDisplaySPTask. That happens in handle_dp_complete. + #ifdef PUPPYPRINT + profiler_update(rspGenTime, rspDelta); + #endif profiler_log_gfx_time(RSP_COMPLETE); } } diff --git a/src/game/memory.c b/src/game/memory.c index 6903ecd9..e87d6613 100644 --- a/src/game/memory.c +++ b/src/game/memory.c @@ -21,6 +21,7 @@ #include "usb/usb.h" #include "usb/debug.h" #endif +#include "puppyprint.h" // round up to the next multiple @@ -134,6 +135,9 @@ void main_pool_init(void *start, void *end) { sPoolListHeadL->next = NULL; sPoolListHeadR->prev = NULL; sPoolListHeadR->next = NULL; + #ifdef PUPPYPRINT + mempool = sPoolFreeSpace; + #endif } /** @@ -256,6 +260,9 @@ u32 main_pool_pop_state(void) { */ void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { u32 size = ALIGN16(srcEnd - srcStart); + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif osInvalDCache(dest, size); while (size != 0) { @@ -269,6 +276,9 @@ void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { srcStart += copySize; size -= copySize; } + #ifdef PUPPYPRINT + dmaTime[perfIteration] += osGetTime()-first; + #endif } /** @@ -293,6 +303,9 @@ static void *dynamic_dma_read(u8 *srcStart, u8 *srcEnd, u32 side) { */ void *load_segment(s32 segment, u8 *srcStart, u8 *srcEnd, u32 side) { void *addr = dynamic_dma_read(srcStart, srcEnd, side); + #ifdef PUPPYPRINT + ramsizeSegment[segment+nameTable-2] = (s32)srcEnd- (s32)srcStart; + #endif if (addr != NULL) { set_segment_base_addr(segment, addr); @@ -374,6 +387,9 @@ void *load_segment_decompress(s32 segment, u8 *srcStart, u8 *srcEnd) { } } else { } + #ifdef PUPPYPRINT + ramsizeSegment[segment+nameTable-2] = (s32)srcEnd - (s32)srcStart; + #endif return dest; } diff --git a/src/game/object_list_processor.c b/src/game/object_list_processor.c index b523f14f..f071e217 100644 --- a/src/game/object_list_processor.c +++ b/src/game/object_list_processor.c @@ -19,6 +19,7 @@ #include "platform_displacement.h" #include "profiler.h" #include "spawn_object.h" +#include "puppyprint.h" /** @@ -625,6 +626,10 @@ UNUSED static u16 unused_get_elapsed_time(u64 *cycleCounts, s32 index) { */ void update_objects(UNUSED s32 unused) { s64 cycleCounts[30]; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + OSTime colTime = collisionTime[perfIteration]; + #endif cycleCounts[0] = get_current_clock(); @@ -683,4 +688,8 @@ void update_objects(UNUSED s32 unused) { } gPrevFrameObjectCount = gObjectCounter; + #ifdef PUPPYPRINT + profiler_update(behaviourTime, first); + behaviourTime[perfIteration] -= collisionTime[perfIteration]+colTime; + #endif } diff --git a/src/game/puppyprint.c b/src/game/puppyprint.c new file mode 100644 index 00000000..80d8b25f --- /dev/null +++ b/src/game/puppyprint.c @@ -0,0 +1,1000 @@ +/** +--------------Puppyprint 1.0 by Fazana-------------- +Includes a few printing functions to fit any purpose. +print_small_text is intended to replace print_generic_string in use, as it uses a far more optimised way of doing things, +supports real time ascii conversion, and also supports many fun effects to spice up the text. +Any usage of gDPSetEnvColor should ideally be replaced with print_set_envcolour because it helps with some optimisations. +render_multi_image can be used to draw large texture rectangles consisting of multiple images on the screen. +You only need have the single image in its full form, with no need for splitting it, and simply just load it. + +As for the profiler, you can hold dpad up, and press L to toggle the display. +Inside this display, if you press up on the dpad again, you can switch between performance, and memory view. +If you press dpad down, you can toggle the benchmarking display. +You can press dpad left or right to change which option, and you can measure game thread or audio thread performance by default. +There's also a custom option that's left blank. It runs benchmark_custom which can contain anything of your choice. +- Collision marks the time it takes to generate and process collision. +- Behaviour marks the time it takes for objects to perform their behaviours. This excludes collision. +- Graph measures the time it takes to process the node graphs, which is all the 3D geometry and rendering. +- Audio measures the time it takes to process the audio samples, this excludes time spent loading. +- DMA measures the time it takes to load things. In Vanilla, Mario's animations and audio samples are loaded from ROM as needed. +**/ + +#include + +#ifdef PUPPYPRINT + +#include "config.h" +#include "game_init.h" +#include "memory.h" +#include "print.h" +#include "segment2.h" +#include "string.h" +#include "engine/math_util.h" +#include "engine/behavior_script.h" +#include "camera.h" +#include "puppyprint.h" +#include "level_update.h" +#include "object_list_processor.h" +#include "engine/surface_load.h" +#include "audio/data.h" +#include "hud.h" + +u8 currEnv[4]; +u8 fDebug = 0; +s8 benchViewer = 0; +u8 benchOption = 0; + +//Profiler values +s8 perfIteration = 0; +s16 benchmarkLoop = 0; +s32 benchmarkTimer = 0; +s32 benchmarkProgramTimer = 0; +s8 benchmarkType = 0; +//General +OSTime cpuTime = 0; +OSTime rspTime = 0; +OSTime rdpTime = 0; +OSTime ramTime = 0; +OSTime loadTime = 0; +OSTime gLastOSTime = 0; +OSTime rspDelta = 0; +s32 benchMark[NUM_BENCH_ITERATIONS+2]; +//CPU +OSTime collisionTime[NUM_PERF_ITERATIONS+1]; +OSTime behaviourTime[NUM_PERF_ITERATIONS+1]; +OSTime scriptTime[NUM_PERF_ITERATIONS+1]; +OSTime graphTime[NUM_PERF_ITERATIONS+1]; +OSTime audioTime[NUM_PERF_ITERATIONS+1]; +OSTime dmaTime[NUM_PERF_ITERATIONS+1]; +OSTime dmaAudioTime[NUM_PERF_ITERATIONS+1]; +//RSP +OSTime audioTime[NUM_PERF_ITERATIONS+1]; +OSTime rspGenTime[NUM_PERF_ITERATIONS+1]; +//RDP +OSTime bufferTime[NUM_PERF_ITERATIONS+1]; +OSTime tmemTime[NUM_PERF_ITERATIONS+1]; +OSTime busTime[NUM_PERF_ITERATIONS+1]; +//RAM +s8 ramViewer = 0; +s32 ramsizeSegment[33] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +s32 audioPool[12]; +s32 mempool; + +extern u8 _mainSegmentStart[]; +extern u8 _mainSegmentEnd[]; +extern u8 _engineSegmentStart[]; +extern u8 _engineSegmentEnd[]; +extern u8 _framebuffersSegmentBssStart[]; +extern u8 _framebuffersSegmentBssEnd[]; +extern u8 _buffersSegmentBssStart[]; +extern u8 _buffersSegmentBssEnd[]; +extern u8 _goddardSegmentStart[]; +extern u8 _goddardSegmentEnd[]; + +//Here is stored the rom addresses of the global code segments. If you get rid of any, it's best to just write them as NULL. +s32 ramP[5][2] = { + {&_buffersSegmentBssStart, &_buffersSegmentBssEnd}, + {&_mainSegmentStart, &_mainSegmentEnd}, + {&_engineSegmentStart, &_engineSegmentEnd}, + {&_framebuffersSegmentBssStart, &_framebuffersSegmentBssEnd}, + {&_goddardSegmentStart, &_goddardSegmentEnd}, +}; + +void puppyprint_calculate_ram_usage(void) +{ + s32 temp[2]; + s32 i = 0; + + for (i = 0; i < 5; i++) + { + if (!ramP[i][0] || !ramP[i][1]) + continue; + temp[0] = ramP[i][0]; + temp[1] = ramP[i][1]; + ramsizeSegment[i] = temp[1] - temp[0]; + } + + //These are a bit hacky, but what can ye do eh? + //gEffectsMemoryPool is 0x4000, gObjectsMemoryPool is 0x800. Epic C limitations mean I can't just sizeof their values :) + ramsizeSegment[5] = 0x4000 + 0x800; + ramsizeSegment[6] = (SURFACE_NODE_POOL_SIZE * sizeof(struct SurfaceNode)) + (SURFACE_POOL_SIZE * sizeof(struct Surface)); + ramsizeSegment[7] = gAudioHeapSize + gAudioInitPoolSize; + ramsizeSegment[8] = audioPool[0] + audioPool[1] + audioPool[2] + audioPool[3] + audioPool[4] + audioPool[5] + + audioPool[6] + audioPool[7] + audioPool[8] + audioPool[9] + audioPool[10] + audioPool[11]; +} + +void puppyprint_profiler_finished(void) +{ + s32 i = 0; + benchMark[NUM_BENCH_ITERATIONS] = 0; + benchMark[NUM_BENCH_ITERATIONS+1] = 0; + benchmarkTimer = 300; + benchViewer = 0; + for (i = 0; i < NUM_BENCH_ITERATIONS-2; i++) + { + benchMark[NUM_BENCH_ITERATIONS] += benchMark[i]; + if (benchMark[i] > benchMark[NUM_BENCH_ITERATIONS+1]) + benchMark[NUM_BENCH_ITERATIONS+1] = benchMark[i]; + } + benchMark[NUM_BENCH_ITERATIONS] /= NUM_BENCH_ITERATIONS; + benchmarkProgramTimer = OS_CYCLES_TO_USEC(osGetTime() - benchmarkProgramTimer); +} + +//RGB colour lookup table for colouring all the funny ram prints. +u8 colourChart[33][3] = { + {255, 0, 0}, + {0, 0, 255}, + {0, 255, 0}, + {255, 255, 0}, + {255, 0, 255}, + {255, 127, 0}, + {0, 255, 255}, + {51, 255, 51}, + {255, 153, 153}, + {204, 0, 102}, + {0, 153, 153}, + {153, 255, 153}, + {0, 0, 128}, + {128, 0, 128}, + {218, 165, 32}, + {107, 142, 35}, + {188, 143, 143}, + {210, 105, 30}, + {154, 205, 50}, + {165, 42, 42}, + {255, 105, 180}, + {139, 69, 19}, + {250, 240, 230}, + {95, 158, 160}, + {60, 179, 113}, + {255, 69, 0}, + {128, 0, 0}, + {216, 191, 216}, + {244, 164, 96}, + {176, 196, 222}, + {255, 255, 255}}; + +//Change this to alter the width of the bar at the bottom. +#define BAR_LENGTH 200 + +void print_ram_bar(void) +{ + s32 i = 0; + f32 perfPercentage; + s32 graphPos = 0; + s32 prevGraph = 160-(BAR_LENGTH/2); + s32 ramsize = osGetMemSize(); + + prepare_blank_box(); + + for (i = 0; i < 32; i++) + { + if (ramsizeSegment[i] == 0) + continue; + perfPercentage = (f32)ramsizeSegment[i]/ramsize; + graphPos = prevGraph + CLAMP((BAR_LENGTH*perfPercentage), 1, 160+(BAR_LENGTH/2)); + render_blank_box(prevGraph, 210, graphPos, 218, colourChart[i][0], colourChart[i][1], colourChart[i][2], 255); + prevGraph = graphPos; + } + perfPercentage = (f32)ramsizeSegment[32]/ramsize; + graphPos = prevGraph + CLAMP((BAR_LENGTH*perfPercentage), 1, 160+(BAR_LENGTH/2)); + render_blank_box(prevGraph, 210, graphPos, 218, 255, 255, 255, 255); + prevGraph = graphPos; + + render_blank_box(prevGraph, 210, 160+(BAR_LENGTH/2), 218, 0, 0, 0, 255); + + finish_blank_box(); +} +//Another epic lookup table, for text this time. +const char ramNames[9][32] = { + "Buffers", + "Main", + "Engine", + "Framebuffers", + "Goddard", + "Pools", + "Collision", + "Audio Heap", + "Audio Pools", +}; + +s8 nameTable = sizeof(ramNames)/32; + +void print_ram_overview(void) +{ + s32 i = 0; + char textBytes[32]; + s32 x = 80; + s32 y = 16; + s32 drawn = 0; + prepare_blank_box(); + render_blank_box(0, 0, 320, 240, 0, 0, 0, 192); + finish_blank_box(); + + for (i = 0; i < 33; i++) + { + if (drawn == 16) + { + x = 240; + y = 16; + } + if (ramsizeSegment[i] == 0) + continue; + if (i < 9) + { + sprintf(textBytes, "%s: %X", ramNames[i], ramsizeSegment[i]); + } + else + { + sprintf(textBytes, "Segment %02X: %X",i-nameTable+2, ramsizeSegment[i]); + } + print_set_envcolour(colourChart[i][0], colourChart[i][1], colourChart[i][2], 255); + print_small_text(x, y, textBytes, PRINT_TEXT_ALIGN_CENTRE, PRINT_ALL); + y += 12; + drawn++; + } +} + +void benchmark_custom(void) +{ + OSTime lastTime; + while (TRUE) + { + lastTime = osGetTime(); + //Insert your function here! + + if (benchmarkLoop > 0 && benchOption == 2) + { + benchmarkLoop--; + benchMark[benchmarkLoop] = osGetTime() - lastTime; + if (benchmarkLoop == 0) + { + puppyprint_profiler_finished(); + break; + } + } + else + break; + } +} + +const char benchNames[][32] = { + "Game Thread", + "Audio Thread", + "Custom", +}; + +void print_which_benchmark(void) +{ + char textBytes[40]; + + prepare_blank_box(); + render_blank_box(110, 115, 210, 160, 0, 0, 0, 255); + finish_blank_box(); + sprintf(textBytes, "Select Option#%s#L: Confirm", benchNames[benchOption]); + print_small_text(160,120, textBytes, PRINT_TEXT_ALIGN_CENTRE, PRINT_ALL); +} + +extern void print_fps(s32 x, s32 y); + +void puppyprint_render_profiler(void) +{ + s32 perfPercentage[5]; + s32 graphPos; + s32 prevGraph; + OSTime cpuCount = OS_CYCLES_TO_USEC(cpuTime+audioTime[NUM_PERF_ITERATIONS]+dmaAudioTime[NUM_PERF_ITERATIONS]); + char textBytes[80]; + + if (!fDebug) + return; + + sprintf(textBytes, "RAM: %06X /%06X (%d_)", main_pool_available(), mempool, (s32)(((f32)main_pool_available()/(f32)mempool)*100)); + print_small_text(160, 224, textBytes, PRINT_TEXT_ALIGN_CENTRE, PRINT_ALL); + + if (!ramViewer && !benchViewer) + { + print_fps(16,40); + sprintf(textBytes, "CPU: %dus (%d_)#RSP: %dus (%d_)#RDP: %dus (%d_)", (s32)cpuCount, (s32)OS_CYCLES_TO_USEC(cpuTime)/333, (s32)OS_CYCLES_TO_USEC(rspTime), (s32)OS_CYCLES_TO_USEC(rspTime)/333, (s32)OS_CYCLES_TO_USEC(rdpTime), (s32)OS_CYCLES_TO_USEC(rdpTime)/333); + print_small_text(16, 52, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL); + + sprintf(textBytes, "OBJ: %d/%d", gObjectCounter, OBJECT_POOL_CAPACITY); + print_small_text(16, 124, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL); + + //Very little point printing useless info if Mayro doesn't even exist. + if (gMarioState->marioObj) + { + sprintf(textBytes, "Mario Pos#X: %d#Y: %d#Z: %d#D: %X", (s32)(gMarioState->pos[0]), (s32)(gMarioState->pos[1]), (s32)(gMarioState->pos[2]), (u16)(gMarioState->faceAngle[1])); + print_small_text(16, 140, textBytes, PRINT_TEXT_ALIGN_LEFT, PRINT_ALL); + } + //Same for the camera, especially so because this will crash otherwise. + if (gCamera) + { + sprintf(textBytes, "Camera Pos#X: %d#Y: %d#Z: %d#D: %X", (s32)(gCamera->pos[0]), (s32)(gCamera->pos[1]), (s32)(gCamera->pos[2]), (u16)(gCamera->yaw)); + print_small_text(304, 140, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL); + } + + if (benchmarkTimer > 0) + { + benchmarkTimer--; + prepare_blank_box(); + //sprintf(textBytes, "Benchmark: %dus#High: %dus", (s32)OS_CYCLES_TO_USEC(benchMark[NUM_BENCH_ITERATIONS]), (s32)OS_CYCLES_TO_USEC(benchMark[NUM_BENCH_ITERATIONS+1])); + sprintf(textBytes, "Done in %0.000f seconds#Benchmark: %dus#High: %dus", (f32)(benchmarkProgramTimer)*0.000001f, (s32)OS_CYCLES_TO_USEC(benchMark[NUM_BENCH_ITERATIONS]), (s32)OS_CYCLES_TO_USEC(benchMark[NUM_BENCH_ITERATIONS+1])); + render_blank_box(160-(get_text_width(textBytes)/2)-4, 158, 160+(get_text_width(textBytes)/2)+4, 196, 0, 0, 0, 255); + print_set_envcolour(255, 255, 255, 255); + print_small_text(160, 160, textBytes, PRINT_TEXT_ALIGN_CENTRE, PRINT_ALL); + finish_blank_box(); + } + + #define ADDTIMES MAX((collisionTime[NUM_PERF_ITERATIONS] + graphTime[NUM_PERF_ITERATIONS] + behaviourTime[NUM_PERF_ITERATIONS] + audioTime[NUM_PERF_ITERATIONS] + dmaTime[NUM_PERF_ITERATIONS])/80, 1) + perfPercentage[0] = MAX((collisionTime[NUM_PERF_ITERATIONS]/ADDTIMES), 1); + perfPercentage[1] = MAX((graphTime[NUM_PERF_ITERATIONS]/ADDTIMES), 1); + perfPercentage[2] = MAX((behaviourTime[NUM_PERF_ITERATIONS]/ADDTIMES), 1); + perfPercentage[3] = MAX((audioTime[NUM_PERF_ITERATIONS]/ADDTIMES), 1); + perfPercentage[4] = MAX((dmaTime[NUM_PERF_ITERATIONS]/ADDTIMES), 1); + #undef ADDTIMES + + sprintf(textBytes, "Collision: %dus", (s32)OS_CYCLES_TO_USEC(collisionTime[NUM_PERF_ITERATIONS])); + print_small_text(304, 40, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL); + sprintf(textBytes, "Graph: %dus", (s32)OS_CYCLES_TO_USEC(graphTime[NUM_PERF_ITERATIONS])); + print_small_text(304, 52, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL); + sprintf(textBytes, "Behaviour: %dus", (s32)OS_CYCLES_TO_USEC(behaviourTime[NUM_PERF_ITERATIONS])); + print_small_text(304, 64, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL); + sprintf(textBytes, "Audio: %dus", (s32)OS_CYCLES_TO_USEC(audioTime[NUM_PERF_ITERATIONS])); + print_small_text(304, 76, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL); + sprintf(textBytes, "DMA: %dus", (s32)OS_CYCLES_TO_USEC(dmaTime[NUM_PERF_ITERATIONS])); + print_small_text(304, 88, textBytes, PRINT_TEXT_ALIGN_RIGHT, PRINT_ALL); + + //Render CPU breakdown bar. + prepare_blank_box(); + graphPos = 224 + perfPercentage[0]; + render_blank_box(224, 104, graphPos, 112, 255, 0, 0, 255); + prevGraph = graphPos; + graphPos += perfPercentage[1]; + render_blank_box(prevGraph, 104, graphPos, 112, 0, 0, 255, 255); + prevGraph = graphPos; + graphPos += perfPercentage[2]; + render_blank_box(prevGraph, 104, graphPos, 112, 0, 255, 0, 255); + prevGraph = graphPos; + graphPos += perfPercentage[3]; + render_blank_box(prevGraph, 104, graphPos, 112, 255, 255, 0, 255); + prevGraph = graphPos; + graphPos += perfPercentage[4]; + render_blank_box(prevGraph, 104, 304, 112, 255, 0, 255, 255); + } + else + if (ramViewer) + print_ram_overview(); + else + if (benchViewer) + print_which_benchmark(); + + print_ram_bar(); +} + +void profiler_update(OSTime *time, OSTime time2) +{ + time[perfIteration] = osGetTime() - time2; +} + +void get_average_perf_time(OSTime *time) +{ + //This takes all but the last index of the timer array, and creates an average value, which is written to the last index. + s32 i = 0; + s32 total = 0; + for (i = 0; i < NUM_PERF_ITERATIONS-1; i++) + { + total += time[i]; + } + time[NUM_PERF_ITERATIONS] = total/NUM_PERF_ITERATIONS; +} + +void puppyprint_profiler_process(void) +{ + bufferTime[perfIteration] = (IO_READ(DPC_BUFBUSY_REG)); + tmemTime[perfIteration] = (IO_READ(DPC_TMEM_REG)); + busTime[perfIteration] = (IO_READ(DPC_PIPEBUSY_REG)); + OSTime newTime = osGetTime(); + + if (gGlobalTimer % 15 == 0) + { + get_average_perf_time(scriptTime); + get_average_perf_time(behaviourTime); + get_average_perf_time(collisionTime); + get_average_perf_time(graphTime); + get_average_perf_time(audioTime); + get_average_perf_time(dmaTime); + get_average_perf_time(dmaAudioTime); + + dmaTime[NUM_PERF_ITERATIONS] += dmaAudioTime[NUM_PERF_ITERATIONS]; + + get_average_perf_time(rspGenTime); + + get_average_perf_time(bufferTime); + get_average_perf_time(tmemTime); + get_average_perf_time(busTime); + + rdpTime = bufferTime[NUM_PERF_ITERATIONS]; + rdpTime = MAX(rdpTime, tmemTime[NUM_PERF_ITERATIONS]); + rdpTime = MAX(rdpTime, busTime[NUM_PERF_ITERATIONS]); + cpuTime = scriptTime[NUM_PERF_ITERATIONS]; + rspTime = rspGenTime[NUM_PERF_ITERATIONS]; + puppyprint_calculate_ram_usage(); + } + + gLastOSTime = newTime; + if (gGlobalTimer > 5) + IO_WRITE(DPC_STATUS_REG, DPC_CLR_CLOCK_CTR | DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR); + + if (fDebug) + { + if (gPlayer1Controller->buttonPressed & D_JPAD) + { + benchViewer ^= 1; + ramViewer = 0; + } + else + if (gPlayer1Controller->buttonPressed & U_JPAD && !(gPlayer1Controller->buttonPressed & L_TRIG)) + { + ramViewer ^= 1; + benchViewer = 0; + } + if (benchViewer) + { + if (gPlayer1Controller->buttonPressed & R_JPAD) + benchOption++; + if (gPlayer1Controller->buttonPressed & L_JPAD) + benchOption--; + if (benchOption == 255) + benchOption = 2; + if (benchOption > 2) + benchOption = 0; + if (gPlayer1Controller->buttonPressed & L_TRIG) + { + benchmarkLoop = NUM_BENCH_ITERATIONS; + benchmarkProgramTimer = osGetTime(); + } + } + benchmark_custom(); + } + if (gPlayer1Controller->buttonDown & U_JPAD && gPlayer1Controller->buttonPressed & L_TRIG) + { + ramViewer = 0; + benchViewer = 0; + fDebug ^= 1; + } + + + if (perfIteration++ == NUM_PERF_ITERATIONS-1) + perfIteration = 0; +} + +void print_set_envcolour(s32 r, s32 g, s32 b, s32 a) +{ + if (r != currEnv[0] || g != currEnv[1] || b != currEnv[2] || a != currEnv[3]) + { + gDPSetEnvColor(gDisplayListHead++, (u8)r, (u8)g, (u8)b, (u8)a); + currEnv[0] = r; + currEnv[1] = g; + currEnv[2] = b; + currEnv[3] = a; + } +} + +#define BLANK 0, 0, 0, ENVIRONMENT, 0, 0, 0, ENVIRONMENT + +void prepare_blank_box(void) +{ + gDPSetCombineMode(gDisplayListHead++, BLANK, BLANK); +} + +void finish_blank_box(void) +{ + print_set_envcolour(255, 255, 255, 255); + gSPDisplayList(gDisplayListHead++,dl_hud_img_end); +} + +//This does some epic shenanigans to figure out the optimal way to draw this. +//If the width is a multiple of 4, then use fillmode (fastest) +//Otherwise, if there's transparency, it uses that rendermode, which is slower than using opaque rendermodes. +void render_blank_box(s16 x1, s16 y1, s16 x2, s16 y2, u8 r, u8 g, u8 b, u8 a) +{ + s32 cycleadd = 0; + if (ABS(x1 - x2) % 4 == 0 && a == 255) + { + gDPSetCycleType(gDisplayListHead++, G_CYC_FILL); + gDPSetRenderMode(gDisplayListHead++, G_RM_NOOP, G_RM_NOOP); + cycleadd = 1; + } + else + { + gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE); + if (a == 255) + { + gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2); + } + else + { + gDPSetRenderMode(gDisplayListHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2); + } + cycleadd = 0; + } + gDPPipeSync(gDisplayListHead++); + gDPSetFillColor(gDisplayListHead++, GPACK_RGBA5551(r, g, b, 1) << 16 | GPACK_RGBA5551(r, g, b, 1)); + print_set_envcolour(r, g, b, a); + gDPFillRectangle(gDisplayListHead++, x1, y1, x2-cycleadd, y2-cycleadd); +} + + +u8 textLen[] = { + /*0*/ 6, /*1*/ 5, /*2*/ 7, /*3*/ 7, /*4*/ 7, /*5*/ 7, /*6*/ 8, /*7*/ 7, /*8*/ 7, /*9*/ 6, /*-*/ 8, /*+*/ 8, /*(*/ 5, /*)*/ 5, /*!*/ 4, /*?*/ 6, + /*A*/ 7, /*B*/ 7, /*C*/ 7, /*D*/ 7, /*E*/ 6, /*F*/ 5, /*G*/ 8, /*H*/ 6, /*I*/ 6, /*J*/ 5, /*K*/ 7, /*L*/ 6, /*M*/ 7, /*N*/ 7, /*O*/ 7, /*P*/ 6, + /*Q*/ 8, /*R*/ 6, /*S*/ 7, /*T*/ 7, /*U*/ 7, /*V*/ 7, /*W*/ 8, /*X*/ 7, /*Y*/ 7, /*Z*/ 7, /*"*/ 5, /*'*/ 2, /*:*/ 3, /*;*/ 3, /*.*/ 3, /*,*/ 3, + /*a*/ 7, /*b*/ 7, /*c*/ 6, /*d*/ 7, /*e*/ 7, /*f*/ 7, /*g*/ 7, /*h*/ 7, /*i*/ 3, /*j*/ 5, /*k*/ 8, /*l*/ 4, /*m*/ 7, /*n*/ 7, /*o*/ 7, /*p*/ 7, + /*q*/ 7, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 7, /*w*/ 8, /*x*/ 6, /*y*/ 8, /*z*/ 7, /*~*/ 8, /*..*/ 7, /*^*/ 8, /*/*/ 8, /*%*/ 8, /*&*/ 8, +}; + +#include "level_update.h" + +void get_char_from_byte(u8 letter, s32 *textX, s32 *textY, s32 *spaceX, s32 *offsetY) +{ + *offsetY = 0; + //Line 1 + if (letter >= '0' && letter <= '9') + { + *textX = (letter - '0') * 4; + *textY = 0; + *spaceX = textLen[letter - '0']; + } + else + //Line 2 + if (letter >= 'A' && letter <= 'P') + { + *textX = ((letter - 'A') * 4); + *textY = 6; + *spaceX = textLen[letter - 'A'+16]; + } + else + //Line 3 + if (letter >= 'Q' && letter <= 'Z') + { + *textX = ((letter - 'Q') * 4); + *textY = 12; + *spaceX = textLen[letter - 'Q'+32]; + } + else + //Line 4 + if (letter >= 'a' && letter <= 'p') + { + *textX = ((letter - 'a') * 4); + *textY = 18; + *spaceX = textLen[letter - 'a'+48]; + } + else + //Line 5 + if (letter >= 'q' && letter <= 'z') + { + *textX = ((letter - 'q') * 4); + *textY = 24; + *spaceX = textLen[letter - 'q'+64]; + } + else + {//Space, the final frontier. + *textX = 128; + *textY = 0; + *spaceX = 2; + } + + switch (letter) + { + case '-': *textX = 40; *textY = 0; *spaceX = textLen[10]; break; //Hyphen + case '+': *textX = 44; *textY = 0; *spaceX = textLen[11]; break; //Plus + case '(': *textX = 48; *textY = 0; *spaceX = textLen[12]; break; //Open Bracket + case ')': *textX = 52; *textY = 0; *spaceX = textLen[13]; break; //Close Bracket + case '!': *textX = 56; *textY = 0; *spaceX = textLen[14]; break; //Exclamation mark + case '?': *textX = 60; *textY = 0; *spaceX = textLen[15]; break; //Question mark + + case '"': *textX = 40; *textY = 12; *spaceX = textLen[42]; break; //Speech mark + case 0x27: *textX = 44; *textY = 12; *spaceX = textLen[43]; break; //Apostrophe. + case ':': *textX = 48; *textY = 12; *spaceX = textLen[44]; break; //Colon + case ';': *textX = 52; *textY = 12; *spaceX = textLen[45]; break; //Semicolon + case '.': *textX = 56; *textY = 12; *spaceX = textLen[46]; break; //Full stop + case ',': *textX = 60; *textY = 12; *spaceX = textLen[47]; break; //Comma + + case '~': *textX = 40; *textY = 24; *spaceX = textLen[74]; break; //Tilde + case '@': *textX = 44; *textY = 24; *spaceX = textLen[75]; break; //Umlaut + case '^': *textX = 48; *textY = 24; *spaceX = textLen[76]; break; //Caret + case '/': *textX = 52; *textY = 24; *spaceX = textLen[77]; break; //Slash + case '_': *textX = 56; *textY = 24; *spaceX = textLen[78]; break; //Percent + case '&': *textX = 60; *textY = 24; *spaceX = textLen[79]; break; //Ampersand + + //This is for the letters that sit differently on the line. It just moves them down a bit. + case 'g': *offsetY = 1; break; + case 'q': *offsetY = 1; break; + case 'p': *offsetY = 3; break; + case 'y': *offsetY = 1; break; + } +} + +s8 shakeToggle = 0; +s8 waveToggle = 0; + +s32 text_iterate_command(const char *str, s32 i, s32 runCMD) +{ + s32 len = 0; + while (str[i+len] != '>' && i+len < (signed)strlen(str)) + len++; + len++; + + if (runCMD) + { + if (strncmp(str+i, "", 5) == 0) //Simple text colour effect. goes up to 99 for each, so 99000000 is red. + { + s32 r, g, b, a; + //Each value is taken from the strong. The first is multiplied by 10, because it's a larger significant value, then it adds the next digit onto it. + r = (str[i+5] - '0')*10; + r += str[i+6] - '0'; + g = (str[i+7] - '0')*10; + g += str[i+8] - '0'; + b = (str[i+9] - '0')*10; + b += str[i+10] - '0'; + a = (str[i+11] - '0')*10; + a += str[i+12] - '0'; + //Multiply each value afterwards by 2.575f to make 255. + print_set_envcolour(r*2.575f, g*2.575f, b*2.575f, a*2.575f); + } + else + if (strncmp(str+i, "", 6) == 0) //Same as above, except it fades between two colours. The third set of numbers is the speed it fades. + { + s32 r, g, b, a, r2, g2, b2, a2, spd, r3, g3, b3, a3, r4, g4, b4, a4; + r = (str[i+6] - '0')*10; + r += str[i+7] - '0'; + g = (str[i+8] - '0')*10; + g += str[i+9] - '0'; + b = (str[i+10] - '0')*10; + b += str[i+11] - '0'; + a = (str[i+12] - '0')*10; + a += str[i+13] - '0'; + r2 = (str[i+15] - '0')*10; + r2 += str[i+16] - '0'; + g2 = (str[i+17] - '0')*10; + g2 += str[i+18] - '0'; + b2 = (str[i+19] - '0')*10; + b2 += str[i+20] - '0'; + a2 = (str[i+21] - '0')*10; + a2 += str[i+22] - '0'; + spd = (str[i+24] - '0')*10; + spd += str[i+25] - '0'; + + //Find the median. + r3 = (r + r2)*1.2875f; + g3 = (g + g2)*1.2875f; + b3 = (b + b2)*1.2875f; + a3 = (a + a2)*1.2875f; + //Find the difference. + r4 = (r - r2)*1.2875f; + g4 = (g - g2)*1.2875f; + b4 = (b - b2)*1.2875f; + a4 = (a - a2)*1.2875f; + //Now start from the median, and wave from end to end with the difference, to create the fading effect. + print_set_envcolour(r3 + ((sins(gGlobalTimer*spd*50)) * r4), g3 + ((sins(gGlobalTimer*spd*50)) * g4), b3 + ((sins(gGlobalTimer*spd*50)) * b4), a3 + ((sins(gGlobalTimer*spd*50)) * a4)); + } + else + if (strncmp(str+i, "", 8) == 0) //Toggles the happy colours :o) Do it again to disable it. + { + s32 r, g, b; + r = (coss(gGlobalTimer*600)+1)*127; + g = (coss((gGlobalTimer*600)+21845)+1)*127; + b = (coss((gGlobalTimer*600)-21845)+1)*127; + print_set_envcolour(r, g, b, 255); + } + else + if (strncmp(str+i, "", 7) == 0) //Toggles text that shakes on the spot. Do it again to disable it. + { + shakeToggle^=1; + } + else + if (strncmp(str+i, "", 6) == 0) //Toggles text that waves around. Do it again to disable it. + { + waveToggle^=1; + } + + } + + return len; +} + +s32 get_text_width(const char *str) +{ + s32 i= 0; + s32 textPos = 0; + s32 wideX = 0; + s32 textX, textY, offsetY, spaceX; + + for (i = 0; i < (signed)strlen(str); i++) + { + if (str[i] == '#') + { + i++; + textPos = 0; + } + if (str[i] == '<') + { + i+= text_iterate_command(str, i, FALSE); + } + get_char_from_byte(str[i], &textX, &textY, &spaceX, &offsetY); + textPos+=spaceX+1; + wideX = MAX(textPos, wideX); + } + return wideX; +} + +s32 get_text_height(const char *str) +{ + s32 i= 0; + s32 textPos = 0; + + for (i = 0; i < (signed)strlen(str); i++) + { + if (str[i] == '#') + { + i++; + textPos+=12; + } + } + return textPos; +} + +void print_small_text(s32 x, s32 y, const char *str, s32 align, s32 amount) +{ + s32 textX = 0; + s32 textY = 0; + s32 offsetY = 0; + s32 i = 0; + s32 textPos[2] = {0,0}; + s32 spaceX = 0; + s32 wideX[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; + s32 tx = amount; + s32 shakePos[2]; + s32 wavePos; + s32 lines = 0; + s32 xlu = currEnv[3]; + s32 prevxlu = 256; //Set out of bounds, so it will *always* be different at first. + + shakeToggle = 0; + waveToggle = 0; + + if (amount == PRINT_ALL) + tx = (signed)strlen(str); + gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE); + gDPSetTexturePersp(gDisplayListHead++, G_TP_NONE); + gDPSetCombineMode(gDisplayListHead++, G_CC_FADEA, G_CC_FADEA); + gDPSetTextureFilter(gDisplayListHead++, G_TF_POINT); + if (align == PRINT_TEXT_ALIGN_CENTRE) + { + for (i = 0; i < (signed)strlen(str); i++) + { + if (str[i] == '#') + { + i++; + textPos[0] = 0; + lines++; + } + if (str[i] == '<') + { + i+= text_iterate_command(str, i, FALSE); + } + get_char_from_byte(str[i], &textX, &textY, &spaceX, &offsetY); + textPos[0]+=spaceX+1; + wideX[lines] = MAX(textPos[0], wideX[lines]); + } + textPos[0] = -(wideX[0]/2); + } + else + if (align == PRINT_TEXT_ALIGN_RIGHT) + { + for (i = 0; i < (signed)strlen(str); i++) + { + if (str[i] == '#') + { + i++; + textPos[0] = 0; + lines++; + } + else + { + textPos[0]+=spaceX+1; + } + if (str[i] == '<') + { + i+= text_iterate_command(str, i, FALSE); + } + get_char_from_byte(str[i], &textX, &textY, &spaceX, &offsetY); + + wideX[lines] = MAX(textPos[0], wideX[lines]); + } + textPos[0] = -wideX[0]; + } + lines = 0; + gDPLoadTextureBlock_4b(gDisplayListHead++, segmented_to_virtual(small_font), G_IM_FMT_I, 128, 60, G_TX_NOMIRROR | G_TX_CLAMP, G_TX_NOMIRROR | G_TX_CLAMP, 0, 0, 0, 0, 0); + for (i = 0; i < tx; i++) + { + if (str[i] == '#') + { + i++; + lines++; + if (align == PRINT_TEXT_ALIGN_RIGHT) + textPos[0] = -(wideX[lines]); + else + textPos[0] = -(wideX[lines]/2); + textPos[1] += 12; + } + if (str[i] == '<') + { + i+= text_iterate_command(str, i, TRUE); + } + if (shakeToggle) + { + shakePos[0] = -1+(random_u16() % 2); + shakePos[1] = -1+(random_u16() % 2); + } + else + { + shakePos[0] = 0; + shakePos[1] = 0; + } + if (waveToggle) + { + wavePos = (sins((gGlobalTimer*3000)+(i*10000)))*2; + } + else + { + wavePos = 0; + } + get_char_from_byte(str[i], &textX, &textY, &spaceX, &offsetY); + if (xlu != prevxlu) + { + prevxlu = xlu; + if (xlu > 250) + { + gDPSetRenderMode(gDisplayListHead++, G_RM_TEX_EDGE, G_RM_TEX_EDGE2); + } + else + { + gDPSetRenderMode(gDisplayListHead++, G_RM_XLU_SURF, G_RM_XLU_SURF); + } + } + gSPScisTextureRectangle(gDisplayListHead++, (x+shakePos[0]+textPos[0]) << 2, (y+shakePos[1]+offsetY+textPos[1]+wavePos) << 2, (x+textPos[0]+shakePos[0]+8) << 2, (y+wavePos+offsetY+shakePos[1]+12+textPos[1]) << 2, G_TX_RENDERTILE, textX << 6, textY << 6, 1 << 10, 1 << 10); + textPos[0]+=spaceX+1; + } + gSPDisplayList(gDisplayListHead++, dl_rgba16_text_end); +} + +void render_multi_image(Texture *image, s32 x, s32 y, s32 width, s32 height, s32 scaleX, s32 scaleY, s32 mode) +{ + s32 posW, posH, imW, imH, peakH, maskW, maskH, cycles, num, i, modeSC, mOne; + i = 0; + num = 256; + maskW = 1; + maskH = 1; + + if (mode == G_CYC_COPY) + { + gDPSetCycleType(gDisplayListHead++, mode); + gDPSetRenderMode(gDisplayListHead++, G_RM_NOOP, G_RM_NOOP2); + modeSC = 4; + mOne = 1; + } + else + { + gDPSetCycleType(gDisplayListHead++, mode); + gDPSetRenderMode(gDisplayListHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2); + modeSC = 1; + mOne = 0; + } + + + //Find how best to seperate the horizontal. Keep going until it finds a whole value. + while (1) + { + f32 val = (f32)width/(f32)num; + + if ((s32)val == val && (s32) val >= 1) + { + imW = num; + break; + } + num /= 2; + if (num == 1) + { + print_text(32,32,"IMAGE WIDTH FAILURE"); + return; + } + } + //Find the tile height + imH = 64/(imW/32); //This gets the vertical amount. + + num = 2; + //Find the width mask + while (1) + { + if ((s32) num == imW) + break; + + num*=2; + maskW++; + if (maskW == 9) + { + print_text(32,32,"WIDTH MASK FAILURE"); + return; + } + } + num = 2; + //Find the height mask + while (1) + { + if ((s32) num == imH) + break; + + num*=2; + maskH++; + if (maskH == 9) + { + print_text(32,32,"HEIGHT MASK FAILURE"); + return; + } + } + num = height; + //Find the height remainder + peakH = height - (height % imH); + cycles = (width*peakH)/(imW*imH); + + //Pass 1 + for (i = 0; i < cycles; i++) + { + posW = 0; + posH = (i*imH); + while (posH >= peakH) + { + posW += imW; + posH -= peakH; + } + gDPLoadSync(gDisplayListHead++); + gDPLoadTextureTile(gDisplayListHead++, image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, posW+imW-1, posH+imH-1, 0, G_TX_NOMIRROR | G_TX_WRAP, G_TX_NOMIRROR | G_TX_WRAP, maskW, maskH, 0, 0); + gSPScisTextureRectangle(gDisplayListHead++, (x + posW) << 2, (y + posH) << 2, (x + posW+imW-mOne) << 2,(y + posH + imH-mOne) << 2, G_TX_RENDERTILE, 0, 0, modeSC << 10, 1 << 10); + } + //If there's a remainder on the vertical side, then it will cycle through that too. + if (height-peakH != 0) + { + posW = 0; + posH = peakH; + for (i = 0; i < (width/imW); i++) + { + posW = i*imW; + gDPLoadSync(gDisplayListHead++); + gDPLoadTextureTile(gDisplayListHead++, image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, posW+imW-1, height-1, 0, G_TX_NOMIRROR | G_TX_WRAP, G_TX_NOMIRROR | G_TX_WRAP, maskW, maskH, 0, 0); + gSPScisTextureRectangle(gDisplayListHead++, (x + posW) << 2, (y + posH) << 2, (x + posW+imW-mOne) << 2,(y + posH + imH-mOne) << 2, G_TX_RENDERTILE, 0, 0, modeSC << 10, 1 << 10); + } + } +} + +#endif diff --git a/src/game/puppyprint.h b/src/game/puppyprint.h new file mode 100644 index 00000000..f970b27d --- /dev/null +++ b/src/game/puppyprint.h @@ -0,0 +1,72 @@ +#ifndef PUPPYPRINT_H +#define PUPPYPRINT_H + +#ifdef PUPPYPRINT + +//This is how many indexes of timers are saved at once. higher creates a smoother average, but naturally uses more RAM. 15's fine. +#define NUM_PERF_ITERATIONS 15 +#define NUM_BENCH_ITERATIONS 150 + +#define BENCHMARK_GAME 1 +#define BENCHMARK_AUDIO 2 +#define BENCHMARK_GRAPHICS 3 + +#define PRINT_TEXT_ALIGN_LEFT 0 +#define PRINT_TEXT_ALIGN_CENTRE 1 +#define PRINT_TEXT_ALIGN_RIGHT 2 +#define PRINT_ALL -1 + +extern Texture small_font[]; +extern s8 perfIteration; +extern s16 benchmarkLoop; +extern s32 benchmarkTimer; +extern u8 currEnv[4]; +extern s32 ramsizeSegment[33]; +extern s32 audioPool[12]; +extern s8 nameTable; +extern s32 mempool; +extern u8 benchOption; + +//General +extern OSTime cpuTime; +extern OSTime rspTime; +extern OSTime rdpTime; +extern OSTime ramTime; +extern OSTime loadTime; +extern OSTime rspDelta; +extern s32 benchMark[NUM_BENCH_ITERATIONS+2]; + +//CPU +extern OSTime collisionTime[NUM_PERF_ITERATIONS+1]; +extern OSTime behaviourTime[NUM_PERF_ITERATIONS+1]; +extern OSTime scriptTime[NUM_PERF_ITERATIONS+1]; +extern OSTime graphTime[NUM_PERF_ITERATIONS+1]; +extern OSTime audioTime[NUM_PERF_ITERATIONS+1]; +extern OSTime dmaTime[NUM_PERF_ITERATIONS+1]; +extern OSTime dmaAudioTime[NUM_PERF_ITERATIONS+1]; +//RSP +extern OSTime rspGenTime[NUM_PERF_ITERATIONS+1]; +//RDP +extern OSTime bufferTime[NUM_PERF_ITERATIONS+1]; +extern OSTime tmemTime[NUM_PERF_ITERATIONS+1]; +extern OSTime busTime[NUM_PERF_ITERATIONS+1]; + +extern void profiler_update(OSTime *time, OSTime time2); +extern void puppyprint_profiler_process(void); +extern void puppyprint_render_profiler(void); +extern void puppyprint_profiler_finished(void); +extern void print_set_envcolour(s32 r, s32 g, s32 b, s32 a); +extern void prepare_blank_box(void); +extern void finish_blank_box(void); +extern void render_blank_box(s16 x1, s16 y1, s16 x2, s16 y2, u8 r, u8 g, u8 b, u8 a); +extern void print_small_text(s32 x, s32 y, const char *str, s32 align, s32 amount); +extern void render_multi_image(Texture *image, s32 x, s32 y, s32 width, s32 height, s32 scaleX, s32 scaleY, s32 mode); +extern s32 get_text_height(const char *str); +extern s32 get_text_width(const char *str); +extern void prepare_blank_box(void); +extern void finish_blank_box(void); +extern void render_blank_box(s16 x1, s16 y1, s16 x2, s16 y2, u8 r, u8 g, u8 b, u8 a); + +#endif + +#endif // PUPPYPRINT_H diff --git a/src/game/rendering_graph_node.c b/src/game/rendering_graph_node.c index b1e00dbf..65627bab 100644 --- a/src/game/rendering_graph_node.c +++ b/src/game/rendering_graph_node.c @@ -12,6 +12,7 @@ #include "sm64.h" #include "game_init.h" #include "engine/extended_bounds.h" +#include "puppyprint.h" #include "config.h" @@ -1107,6 +1108,9 @@ void geo_process_node_and_siblings(struct GraphNode *firstNode) { */ void geo_process_root(struct GraphNodeRoot *node, Vp *b, Vp *c, s32 clearColor) { UNUSED s32 unused; + #ifdef PUPPYPRINT + OSTime first = osGetTime(); + #endif if (node->node.flags & GRAPH_RENDER_ACTIVE) { Mtx *initialMatrix; @@ -1147,4 +1151,7 @@ void geo_process_root(struct GraphNodeRoot *node, Vp *b, Vp *c, s32 clearColor) } main_pool_free(gDisplayListHeap); } + #ifdef PUPPYPRINT + profiler_update(graphTime, first); + #endif } diff --git a/src/game/sound_init.c b/src/game/sound_init.c index c4bad21c..77069c6e 100644 --- a/src/game/sound_init.c +++ b/src/game/sound_init.c @@ -15,6 +15,7 @@ #include "sm64.h" #include "sound_init.h" #include "rumble_init.h" +#include "puppyprint.h" #define MUSIC_NONE 0xFFFF @@ -335,6 +336,9 @@ void audio_game_loop_tick(void) { void thread4_sound(UNUSED void *arg) { audio_init(); sound_init(); + #ifdef PUPPYPRINT + OSTime lastTime; + #endif // Zero-out unused vector vec3f_copy(unused80339DC0, gVec3fZero); @@ -342,18 +346,44 @@ void thread4_sound(UNUSED void *arg) { osCreateMesgQueue(&sSoundMesgQueue, sSoundMesgBuf, ARRAY_COUNT(sSoundMesgBuf)); set_vblank_handler(1, &sSoundVblankHandler, &sSoundMesgQueue, (OSMesg) 512); - while (TRUE) { + while (TRUE) + { OSMesg msg; osRecvMesg(&sSoundMesgQueue, &msg, OS_MESG_BLOCK); - if (gResetTimer < 25) { - struct SPTask *spTask; - profiler_log_thread4_time(); - spTask = create_next_audio_frame_task(); - if (spTask != NULL) { - dispatch_audio_sptask(spTask); + #ifdef PUPPYPRINT + while (TRUE) + { + lastTime = osGetTime(); + dmaAudioTime[perfIteration] = 0; + #endif + if (gResetTimer < 25) { + struct SPTask *spTask; + profiler_log_thread4_time(); + spTask = create_next_audio_frame_task(); + if (spTask != NULL) { + dispatch_audio_sptask(spTask); + } + profiler_log_thread4_time(); + #ifdef PUPPYPRINT + profiler_update(audioTime, lastTime); + audioTime[perfIteration] -= dmaAudioTime[perfIteration]; + if (benchmarkLoop > 0 && benchOption == 1) + { + benchmarkLoop--; + benchMark[benchmarkLoop] = osGetTime() - lastTime; + if (benchmarkLoop == 0) + { + puppyprint_profiler_finished(); + break; + } + } + else + break; + #endif } - profiler_log_thread4_time(); + #ifdef PUPPYPRINT } + #endif } } diff --git a/textures/segment2/custom_text.i4.png b/textures/segment2/custom_text.i4.png new file mode 100644 index 00000000..02dd2d17 Binary files /dev/null and b/textures/segment2/custom_text.i4.png differ