Compare commits

...

3 Commits

Author SHA1 Message Date
Mr-Wiseguy
a7b208e8e8 Added CPU audio to lightweight profiler 2022-01-17 19:23:48 -05:00
Mr-Wiseguy
52600eb453 Added RSP profiling to the lightweight profiler 2022-01-16 23:31:13 -05:00
Mr-Wiseguy
ee5f5adc9d Implemented new lightweight CPU/RDP profiler 2022-01-16 17:18:11 -05:00
10 changed files with 368 additions and 0 deletions

View File

@@ -19,6 +19,7 @@
#endif
#include "game/puppyprint.h"
#include "game/puppylights.h"
#include "game/profiling.h"
// Message IDs
enum MessageIDs {
@@ -189,6 +190,7 @@ void start_gfx_sptask(void) {
rspDelta = osGetTime();
#endif
start_sptask(M_GFXTASK);
fast_profiler_rsp_started(PROFILER_RSP_GFX);
}
}
@@ -220,6 +222,7 @@ void handle_vblank(void) {
} else {
pretend_audio_sptask_done();
}
fast_profiler_rsp_started(PROFILER_RSP_AUDIO);
}
} else {
if (gActiveSPTask == NULL
@@ -229,6 +232,7 @@ void handle_vblank(void) {
rspDelta = osGetTime();
#endif
start_sptask(M_GFXTASK);
fast_profiler_rsp_started(PROFILER_RSP_GFX);
}
}
#if ENABLE_RUMBLE
@@ -256,6 +260,9 @@ void handle_sp_complete(void) {
#if PUPPYPRINT_DEBUG
profiler_update(rspGenTime, rspDelta);
#endif
fast_profiler_rsp_completed(PROFILER_RSP_GFX);
} else {
fast_profiler_rsp_yielded();
}
// Start the audio task, as expected by handle_vblank.
@@ -264,12 +271,19 @@ void handle_sp_complete(void) {
} else {
pretend_audio_sptask_done();
}
fast_profiler_rsp_started(PROFILER_RSP_AUDIO);
} else {
curSPTask->state = SPTASK_STATE_FINISHED;
if (curSPTask->task.t.type == M_AUDTASK) {
fast_profiler_rsp_completed(PROFILER_RSP_AUDIO);
// After audio tasks come gfx tasks.
if ((sCurrentDisplaySPTask != NULL)
&& (sCurrentDisplaySPTask->state != SPTASK_STATE_FINISHED)) {
if (sCurrentDisplaySPTask->state == SPTASK_STATE_INTERRUPTED) {
fast_profiler_rsp_resumed();
} else {
fast_profiler_rsp_started(PROFILER_RSP_GFX);
}
start_sptask(M_GFXTASK);
}
sCurrentAudioSPTask = NULL;
@@ -283,6 +297,7 @@ void handle_sp_complete(void) {
#if PUPPYPRINT_DEBUG
profiler_update(rspGenTime, rspDelta);
#endif
fast_profiler_rsp_completed(PROFILER_RSP_GFX);
}
}
}

View File

@@ -25,6 +25,7 @@
#include "puppyprint.h"
#include "debug_box.h"
#include "engine/colors.h"
#include "profiling.h"
struct SpawnInfo gPlayerSpawnInfos[1];
struct GraphNode *gGraphNodePointers[MODEL_ID_COUNT];
@@ -434,6 +435,9 @@ void render_game(void) {
gViewportOverride = NULL;
gViewportClip = NULL;
fast_profiler_update(PROFILER_TIME_GFX);
fast_profiler_print_times();
#if PUPPYPRINT_DEBUG
profiler_update(graphTime, first);

76
src/game/fasttext.c Normal file
View File

@@ -0,0 +1,76 @@
// This file is a modification of a file from https://github.com/danbolt/n64-jam-1, which was licensed under the MPL-2.0 License
// See the original repo for more details.
#include <ultra64.h>
#define TEX_ASCII_START '!'
#define G_CC_TEXT PRIMITIVE, 0, TEXEL0, 0, 0, 0, 0, TEXEL0
__asm__(
".section \".rodata\", \"a\", @progbits\n"
".global fastfont\n"
"fastfont:\n"
".incbin \"src/game/newfont2_swapped.bin\"\n"
".previous\n"
);
extern u8 fastfont[];
// Naive strlen copied from
// https://stackoverflow.com/questions/22520413/c-strlen-implementation-in-one-line-of-code
void my_strlen(const char *str, u32 *len)
{
for (*len = 0; str[*len]; (*len)++);
}
int computeS(unsigned char letter) {
int idx = letter;
if (letter > 'z') {
idx -= (3 + 2 + 3 + 1 + 3);
} else if (letter > '^') {
idx -= (2 + 3 + 1 + 3);
} else if (letter > 'Z') {
idx -= (3 + 1 + 3);
} else if (letter > '?') {
idx -= (1 + 3);
} else if (letter > ';') {
idx -= (3);
}
return (idx - TEX_ASCII_START) * 8;
}
void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int g, int b) {
int i = 0;
int xPos = x;
int yPos = y;
int s = 0;
Gfx *dlHead = *dl;
r = g = b = 255;
gDPLoadTextureBlock_4bS(dlHead++, fastfont, G_IM_FMT_IA, 672, 12, 0, G_TX_MIRROR | G_TX_WRAP, G_TX_MIRROR | G_TX_WRAP, G_TX_NOMASK, G_TX_NOMASK, G_TX_NOLOD, G_TX_NOLOD);
gDPSetPrimColor(dlHead++, 0, 0, r, g, b, 255);
gDPSetCombineMode(dlHead++, G_CC_TEXT, G_CC_TEXT);
gDPPipeSync(dlHead++);
while (string[i] != '\0') {
if (string[i] == '\n') {
xPos = x;
yPos += 12;
i++;
continue;
}
if (string[i] != ' ') {
s = computeS(string[i]);
gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, s << 5, 0, 1 << 10, 1 << 10);
}
xPos += 8;
i++;
}
gDPSetPrimColor(dlHead++, 0, 0, 255, 255, 255, 255);
gDPPipeSync(dlHead++);
*dl = dlHead;
}

14
src/game/fasttext.h Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __FASTTEXT_H__
#define __FASTTEXT_H__
void drawSmallString_impl(Gfx**, int, int, const char*, int, int , int);
static inline void drawSmallString(Gfx **dl, int x, int y, const char* string) {
drawSmallString_impl(dl, x, y, string, 255, 255, 255);
}
static inline void drawSmallStringCol(Gfx **dl, int x, int y, const char* string, int r, int g, int b) {
drawSmallString_impl(dl, x, y, string, r, g, b);
}
#endif

View File

@@ -29,6 +29,7 @@
#include "puppycam2.h"
#include "debug_box.h"
#include "vc_check.h"
#include "profiling.h"
// First 3 controller slots
struct Controller gControllers[3];
@@ -762,6 +763,7 @@ void thread5_game_loop(UNUSED void *arg) {
render_init();
while (TRUE) {
fast_profiler_frame_setup();
// If the reset timer is active, run the process to reset the game.
if (gResetTimer != 0) {
draw_reset_bars();
@@ -788,6 +790,7 @@ void thread5_game_loop(UNUSED void *arg) {
audio_game_loop_tick();
select_gfx_pool();
read_controller_inputs(THREAD_5_GAME_LOOP);
fast_profiler_update(PROFILER_TIME_CONTROLLERS);
addr = level_script_execute(addr);
#if !PUPPYPRINT_DEBUG && defined(VISUAL_DEBUG)
debug_box_input();

Binary file not shown.

View File

@@ -21,6 +21,7 @@
#include "spawn_object.h"
#include "puppyprint.h"
#include "puppylights.h"
#include "profiling.h"
/**
@@ -611,6 +612,7 @@ void update_objects(UNUSED s32 unused) {
OSTime first = osGetTime();
OSTime colTime = collisionTime[perfIteration];
#endif
fast_profiler_update(PROFILER_TIME_LEVEL_SCRIPT);
// cycleCounts[0] = get_current_clock();
@@ -632,6 +634,8 @@ void update_objects(UNUSED s32 unused) {
// Update spawners and objects with surfaces
// cycleCounts[2] = get_clock_difference(cycleCounts[0]);
update_terrain_objects();
fast_profiler_update(PROFILER_TIME_OBJECTS1);
// If Mario was touching a moving platform at the end of last frame, apply
// displacement now
@@ -673,4 +677,6 @@ void update_objects(UNUSED s32 unused) {
profiler_update(behaviourTime, first);
behaviourTime[perfIteration] -= collisionTime[perfIteration] - colTime;
#endif
fast_profiler_update(PROFILER_TIME_OBJECTS2);
}

202
src/game/profiling.c Normal file
View File

@@ -0,0 +1,202 @@
#include <ultra64.h>
#include <PR/os_internal_reg.h>
#include "game_init.h"
#include "profiling.h"
#include "fasttext.h"
#define RDP_CYCLE_CONV(x) ((10 * (x)) / 625) // 62.5 million cycles per frame
typedef struct {
u32 counts[PROFILING_BUFFER_SIZE];
u32 total;
} ProfileTimeData;
ProfileTimeData all_profiling_data[PROFILER_TIME_COUNT];
int profile_buffer_index = -1;
int rsp_buffer_indices[PROFILER_RSP_COUNT];
// Holds either the start time if the task is running, or the amount of time the task has run for so far if yielded
u32 rsp_pending_times[PROFILER_RSP_COUNT];
u32 prev_start;
u32 start;
u32 prev_time;
u32 audio_start;
u32 audio_buffer_index;
u32 preempted_time;
static void buffer_update(ProfileTimeData* data, u32 new, int buffer_index) {
u32 old = data->counts[buffer_index];
data->total -= old;
data->total += new;
data->counts[buffer_index] = new;
}
void fast_profiler_update(enum ProfilerTime which) {
u32 cur_time = osGetCount();
u32 diff;
ProfileTimeData* cur_data = &all_profiling_data[which];
diff = cur_time - prev_time;
u32 saved = __osDisableInt();
u32 cur_preempted_time = preempted_time;
preempted_time = 0;
__osRestoreInt(saved);
if (cur_preempted_time > 0) {
diff -= cur_preempted_time;
start += cur_preempted_time;
}
buffer_update(cur_data, diff, profile_buffer_index);
prev_time = cur_time;
}
void fast_profiler_rsp_started(enum ProfilerRSPTime which) {
rsp_pending_times[which] = osGetCount();
}
void fast_profiler_rsp_completed(enum ProfilerRSPTime which) {
ProfileTimeData* cur_data = &all_profiling_data[PROFILER_TIME_RSP_GFX + which];
int cur_index = rsp_buffer_indices[which];
u32 time = osGetCount() - rsp_pending_times[which];
rsp_pending_times[which] = 0;
buffer_update(cur_data, time, cur_index);
cur_index++;
if (cur_index >= PROFILING_BUFFER_SIZE) {
cur_index = 0;
}
rsp_buffer_indices[which] = cur_index;
}
void fast_profiler_rsp_resumed() {
rsp_pending_times[PROFILER_RSP_GFX] = osGetCount() - rsp_pending_times[PROFILER_RSP_GFX];
}
// This ends up being the same math as resumed, so we just use resumed for both
// void fast_profiler_rsp_yielded() {
// rsp_pending_times[PROFILER_RSP_GFX] = osGetCount() - rsp_pending_times[PROFILER_RSP_GFX];
// }
void fast_profiler_audio_started() {
audio_start = osGetCount();
}
void fast_profiler_audio_completed() {
ProfileTimeData* cur_data = &all_profiling_data[PROFILER_TIME_AUDIO];
u32 time = osGetCount() - audio_start;
u32 cur_index = audio_buffer_index;
preempted_time = time;
buffer_update(cur_data, time, cur_index);
cur_index++;
if (cur_index >= PROFILING_BUFFER_SIZE) {
cur_index = 0;
}
audio_buffer_index = cur_index;
}
static void update_fps_timer() {
u32 diff = start - prev_start;
buffer_update(&all_profiling_data[PROFILER_TIME_FPS], diff, profile_buffer_index);
prev_start = start;
}
static void update_total_timer() {
u32 saved = __osDisableInt();
u32 cur_preempted_time = preempted_time;
preempted_time = 0;
__osRestoreInt(saved);
prev_time = start + cur_preempted_time;
fast_profiler_update(PROFILER_TIME_TOTAL);
}
static void update_rdp_timers() {
u32 tmem = IO_READ(DPC_TMEM_REG);
u32 cmd = IO_READ(DPC_BUFBUSY_REG);
u32 pipe = IO_READ(DPC_PIPEBUSY_REG);
if (gGlobalTimer > 5) {
IO_WRITE(DPC_STATUS_REG, (DPC_CLR_CLOCK_CTR | DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR));
}
buffer_update(&all_profiling_data[PROFILER_TIME_TMEM], tmem, profile_buffer_index);
buffer_update(&all_profiling_data[PROFILER_TIME_CMD], cmd, profile_buffer_index);
buffer_update(&all_profiling_data[PROFILER_TIME_PIPE], pipe, profile_buffer_index);
}
void fast_profiler_print_times() {
u32 microseconds[PROFILER_TIME_COUNT];
char text_buffer[196];
update_fps_timer();
update_total_timer();
update_rdp_timers();
for (int i = 0; i < PROFILER_TIME_COUNT; i++) {
if (i < PROFILER_TIME_TMEM) {
microseconds[i] = OS_CYCLES_TO_USEC(all_profiling_data[i].total / PROFILING_BUFFER_SIZE);
} else {
microseconds[i] = RDP_CYCLE_CONV(all_profiling_data[i].total / PROFILING_BUFFER_SIZE);
}
}
sprintf(text_buffer,
"FPS: %5.2f\n"
"CPU\n"
"CONT: %7d\n"
"LEVEL:%7d\n"
"OBJ1: %7d\n"
"OBJ2: %7d\n"
"GFX: %7d\n"
"AUDIO:%7d\n"
"TOTAL:%7d\n"
"RDP\n"
"TMEM: %7d\n"
"CMD: %7d\n"
"PIPE: %7d\n"
"RSP\n"
"GFX: %7d\n"
"AUDIO:%7d\n"
"TOTAL:%7d",
1000000.0f / microseconds[PROFILER_TIME_FPS],
microseconds[PROFILER_TIME_CONTROLLERS],
microseconds[PROFILER_TIME_LEVEL_SCRIPT],
microseconds[PROFILER_TIME_OBJECTS1],
microseconds[PROFILER_TIME_OBJECTS2],
microseconds[PROFILER_TIME_GFX],
microseconds[PROFILER_TIME_AUDIO] * 2, // audio is 60Hz, so double the average
microseconds[PROFILER_TIME_TOTAL] + microseconds[PROFILER_TIME_AUDIO] * 2, // audio time is removed from the main thread profiling, so add it back here
microseconds[PROFILER_TIME_TMEM],
microseconds[PROFILER_TIME_CMD],
microseconds[PROFILER_TIME_PIPE],
microseconds[PROFILER_TIME_RSP_GFX],
microseconds[PROFILER_TIME_RSP_AUDIO],
microseconds[PROFILER_TIME_RSP_GFX] + microseconds[PROFILER_TIME_RSP_AUDIO]
);
Gfx* dlHead = gDisplayListHead;
gDPPipeSync(dlHead++);
gDPSetCycleType(dlHead++, G_CYC_1CYCLE);
gDPSetRenderMode(dlHead++, G_RM_TEX_EDGE, G_RM_TEX_EDGE2);
gDPSetTexturePersp(dlHead++, G_TP_NONE);
gDPSetTextureFilter(dlHead++, G_TF_POINT);
gDPSetTextureLUT(dlHead++, G_TT_NONE);
drawSmallStringCol(&dlHead, 10, 10, text_buffer, 0, 0, 0);
gDisplayListHead = dlHead;
}
void fast_profiler_frame_setup() {
profile_buffer_index++;
preempted_time = 0;
if (profile_buffer_index >= PROFILING_BUFFER_SIZE) {
profile_buffer_index = 0;
}
prev_time = start = osGetCount();
}

45
src/game/profiling.h Normal file
View File

@@ -0,0 +1,45 @@
#ifndef __PROFILING_H__
#define __PROFILING_H__
#include <ultra64.h>
#include "macros.h"
#define PROFILING_BUFFER_SIZE 64
enum ProfilerTime {
PROFILER_TIME_FPS,
PROFILER_TIME_CONTROLLERS,
PROFILER_TIME_LEVEL_SCRIPT,
PROFILER_TIME_OBJECTS1,
PROFILER_TIME_OBJECTS2,
PROFILER_TIME_GFX,
PROFILER_TIME_AUDIO,
PROFILER_TIME_TOTAL,
PROFILER_TIME_TMEM,
PROFILER_TIME_PIPE,
PROFILER_TIME_CMD,
PROFILER_TIME_RSP_GFX,
PROFILER_TIME_RSP_AUDIO,
PROFILER_TIME_COUNT,
};
enum ProfilerRSPTime {
PROFILER_RSP_GFX,
PROFILER_RSP_AUDIO,
PROFILER_RSP_COUNT
};
void fast_profiler_update(enum ProfilerTime which);
void fast_profiler_print_times();
void fast_profiler_frame_setup();
void fast_profiler_rsp_started(enum ProfilerRSPTime which);
void fast_profiler_rsp_completed(enum ProfilerRSPTime which);
void fast_profiler_rsp_resumed();
void fast_profiler_audio_started();
void fast_profiler_audio_completed();
// See profiling.c to see why fast_profiler_rsp_yielded isn't its own function
static ALWAYS_INLINE void fast_profiler_rsp_yielded() {
fast_profiler_rsp_resumed();
}
#endif

View File

@@ -15,6 +15,7 @@
#include "sound_init.h"
#include "rumble_init.h"
#include "puppyprint.h"
#include "profiling.h"
#define MUSIC_NONE 0xFFFF
@@ -352,6 +353,7 @@ void thread4_sound(UNUSED void *arg) {
OSMesg msg;
osRecvMesg(&sSoundMesgQueue, &msg, OS_MESG_BLOCK);
fast_profiler_audio_started();
#if PUPPYPRINT_DEBUG
while (TRUE) {
lastTime = osGetTime();
@@ -381,5 +383,6 @@ void thread4_sound(UNUSED void *arg) {
#if PUPPYPRINT_DEBUG
}
#endif
fast_profiler_audio_completed();
}
}