From 26e00f42c0e652102d1f1681fb155759ee4148c9 Mon Sep 17 00:00:00 2001 From: Sauraen Date: Tue, 30 Sep 2025 21:17:29 -0700 Subject: [PATCH] Some counters fixes --- f3dex3.s | 27 ++++++++++++--------------- rsp/rsp_defs.inc | 4 ++-- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/f3dex3.s b/f3dex3.s index c9fc251..8782042 100644 --- a/f3dex3.s +++ b/f3dex3.s @@ -82,7 +82,6 @@ ACC_LOWER equ 2 .endif ENABLE_PROFILING equ 1 COUNTER_A_UPPER_VERTEX_COUNT equ 0 -COUNTER_B_LOWER_CMD_COUNT equ 1 COUNTER_C_FIFO_FULL equ 1 // Profiling Configuration B @@ -104,7 +103,6 @@ COUNTER_C_FIFO_FULL equ 1 .endif ENABLE_PROFILING equ 1 COUNTER_A_UPPER_VERTEX_COUNT equ 1 -COUNTER_B_LOWER_CMD_COUNT equ 0 COUNTER_C_FIFO_FULL equ 0 // Profiling Configuration C @@ -121,7 +119,6 @@ COUNTER_C_FIFO_FULL equ 0 .elseif CFG_PROFILING_C ENABLE_PROFILING equ 1 COUNTER_A_UPPER_VERTEX_COUNT equ 0 -COUNTER_B_LOWER_CMD_COUNT equ 1 COUNTER_C_FIFO_FULL equ 0 // Default (extra profiling disabled) @@ -138,7 +135,6 @@ COUNTER_C_FIFO_FULL equ 0 .else ENABLE_PROFILING equ 0 COUNTER_A_UPPER_VERTEX_COUNT equ 1 -COUNTER_B_LOWER_CMD_COUNT equ 0 COUNTER_C_FIFO_FULL equ 1 .endif @@ -725,7 +721,8 @@ clipPolySgn equ (-(0x1000 - clipPoly)) // Underflow DMEM address // See rsp_defs.inc about why these are not used and we can reuse them. startCounterTime equ (OSTask + OSTask_ucode_size) xfrmLookatDirs equ -(0x1000 - (OSTask + OSTask_ucode_data)) // and OSTask_ucode_data_size -dumpDmemBuffer equ (OSTask + OSTask_yield_data_size) +dumpDmemBuffer equ (OSTask + OSTask_yield_data_size) // CFG_PROFILING_B only +startFifoStallTime equ dumpDmemBuffer // CFG_PROFILING_A only memsetBufferStart equ ((vertexBuffer + 0xF) & 0xFF0) memsetBufferMaxEnd equ (rdpCmdBuffer1 & 0xFF0) @@ -1241,22 +1238,20 @@ run_next_DL_command: lw cmd_w0, (inputBufferEnd)(inputBufferPos) // Word 0 vmudl $v5, $v4, vTRC_VS // Vtx indices times length lw cmd_w1_dram, (inputBufferEnd + 4)(inputBufferPos) // Word 1 +.if CFG_PROFILING_C + mfc0 $10, DPC_STATUS +.endif vmadn $v7, vOne, vTRC_VB // Plus address of vertex buffer sll $ra, $ra, 2 // Convert to a number of instructions .if CFG_PROFILING_C - mfc0 $10, DPC_STATUS + addi perfCounterB, perfCounterB, 1 // Count commands andi $10, $10, DPC_STATUS_GCLK_ALIVE // Sample whether GCLK is active now sll $10, $10, 16 - 3 // move from bit 3 to bit 16 add perfCounterB, perfCounterB, $10 // Add to the perf counter -.endif -.if CFG_PROFILING_A +.elseif CFG_PROFILING_A mfc0 $10, DPC_CLOCK -.endif -.if COUNTER_B_LOWER_CMD_COUNT // A or C + sw perfCounterC, startFifoStallTime // Save initial FIFO stall time addi perfCounterB, perfCounterB, 1 // Count commands -.endif -.if CFG_PROFILING_A - add perfCounterD, perfCounterD, perfCounterC // Add initial FIFO stall time to tri time; will subtract final FIFO time later sw $10, startCounterTime .endif vmadl $v6, $v31, $v31[2] // 0; copy in v6 @@ -2642,10 +2637,12 @@ tris_end: sub $11, $11, $10 beqz $ra, run_next_DL_command // $ra != 0 if from tri cmds add perfCounterA, perfCounterA, $11 // Add to vert cycles perf counter - sub perfCounterA, perfCounterA, $11 // From tris, undo add to vert perf counter + lw $10, startFifoStallTime // From tris + sub perfCounterA, perfCounterA, $11 // Undo add to vert perf counter add perfCounterD, perfCounterD, $11 // Add to tri cycles perf counter + sub $10, perfCounterC, $10 // RDP FIFO stall time elapsed during tri draw j run_next_DL_command - sub perfCounterD, perfCounterD, perfCounterC // Subtract final RDP FIFO stall time; ends up subtracting any FIFO stall time during these tris + sub perfCounterD, perfCounterD, $10 // Subtract final RDP FIFO stall time from tri time .else j run_next_DL_command lqv vTRC, (vTRCValue)($zero) // Restore value overwritten by matrix diff --git a/rsp/rsp_defs.inc b/rsp/rsp_defs.inc index fc40644..bbd0662 100644 --- a/rsp/rsp_defs.inc +++ b/rsp/rsp_defs.inc @@ -16,7 +16,7 @@ OSTask_flags equ 0x0004 // see note below | OSTask_ucode_boot equ 0x0008 // | OSTask_ucode_boot_size equ 0x000C // v OSTask_ucode equ 0x0010 // used in F3D, S2D, and boot -OSTask_ucode_size equ 0x0014 // EX3: startCounterTime +OSTask_ucode_size equ 0x0014 // EX3: startCounterTime (CFG_PROFILING_A and C only) OSTask_ucode_data equ 0x0018 // used in boot only. EX3: xfrmLookatDirs ^ OSTask_ucode_data_size equ 0x001C // used in boot only v OSTask_dram_stack equ 0x0020 // used in F3D and S2D @@ -26,7 +26,7 @@ OSTask_output_buff_size equ 0x002C // used in F3D and S2D; actually end pointer OSTask_data_ptr equ 0x0030 // used in F3D and S2D OSTask_data_size equ 0x0034 // EX3: 2 bytes: savedOrigV1Addr OSTask_yield_data_ptr equ 0x0038 // used in F3D and S2D -OSTask_yield_data_size equ 0x003C // EX3: dumpDmemBuffer (CFG_PROFILING_B only) +OSTask_yield_data_size equ 0x003C // EX3: dumpDmemBuffer (CFG_PROFILING_B only), startFifoStallTime (CFG_PROFILING_A only) // Unlabeled members are never used. In particular, boot does NOT use OSTask_ucode_size, // it always fills all of IMEM after start. // A hypothetical other microcode could use OSTask_dram_stack_size or OSTask_data_size