Optimize BETTER_REVERB lightweight by an additional ~20% CPU, while also cutting memory requirements for it by ~30% (#744)

2026-01-21 10:17:19 -08:00 · 2023-12-30 22:22:05 -06:00
parent 6148582f5e
commit 1d7a690361
3 changed files with 40 additions and 41 deletions
--- a/src/audio/data.c
+++ b/src/audio/data.c
@@ -73,7 +73,7 @@ u8 sReverbMultsArr[][NUM_ALLPASS / 3] = {

 /**
 * Format:
- * - useLightweightSettings (Reduce some runtime configurability options in favor of a slight speed boost during processing; Light configurability settings are found in synthesis.h)
+ * - useLightweightSettings (Reduce some runtime configurability options in favor of a significant speed boost during processing; Light configurability settings are found in synthesis.h)
 * - downsampleRate         (Higher values exponentially reduce the number of input samples to process, improving perfomance at cost of quality; number <= 0 signifies use of vanilla reverb)
 * - isMono                 (Only process reverb on the left channel and share it with the right channel, improving performance at cost of quality)
 * - filterCount            (Number of filters to process data with; in general, more filters means higher quality at the cost of performance demand; always 3 with light settings)
@@ -85,8 +85,8 @@ u8 sReverbMultsArr[][NUM_ALLPASS / 3] = {
 * 
 * - *delaysL               (Advanced parameter; array of variable audio buffer sizes / delays for each respective filter [left channel])
 * - *delaysR               (Advanced parameter; array of variable audio buffer sizes / delays for each respective filter [right channel])
- * - *reverbMultsL          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [left channel]; overridden when using light settings)
- * - *reverbMultsR          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [right channel]; overridden when using light settings)
+ * - *reverbMultsL          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [left channel]; unused when using light settings)
+ * - *reverbMultsR          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [right channel]; unused when using light settings)
 * 
 * NOTE: The first entry will always be used by default when not using the level commands to specify a preset.
 * Please reference the HackerSM64 Wiki for more descriptive documentation of these parameters and usage of BETTER_REVERB in general.
--- a/src/audio/synthesis.c
+++ b/src/audio/synthesis.c
@@ -47,11 +47,11 @@ u8 toggleBetterReverb = FALSE;
 u8 betterReverbLightweight = FALSE;
 u8 monoReverb;
 s8 betterReverbDownsampleRate;
-static s32        reverbMults[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS / 3] = {0};
-static s32         allpassIdx[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
-static s32 betterReverbDelays[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
-static s32     lastDelayLight[SYNTH_CHANNEL_STEREO_COUNT];
-static s16        **delayBufs[SYNTH_CHANNEL_STEREO_COUNT];
+static s32         reverbMults[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS / 3] = {0};
+static s32          allpassIdx[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
+static s32  betterReverbDelays[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
+static s32 historySamplesLight[SYNTH_CHANNEL_STEREO_COUNT];
+static s16         **delayBufs[SYNTH_CHANNEL_STEREO_COUNT];
 u8 *gReverbMults[SYNTH_CHANNEL_STEREO_COUNT];
 s32 reverbLastFilterIndex;
 s32 reverbFilterCount;
@@ -106,6 +106,7 @@ static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 chan
    j = 0;

    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
+        // Mix the very last filter output with new incoming sample
        tmpCarryover = ((delayBufsLocal[lastFilterIndex][allpassIdxLocal[lastFilterIndex]] * revIndex) >> 8) + *downsampleBuffer;
        outSampleTotal = 0;
        i = 0;
@@ -134,7 +135,6 @@ static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 chan
    }
 }

-#define FILTERS_MINUS_1 (BETTER_REVERB_FILTER_COUNT_LIGHT - 1)
 static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s32 channel) {
    s16 *curDelaySample;
    s32 historySample;
@@ -144,14 +144,16 @@ static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s3
    s32 downsampleIncrement = gReverbDownsampleRate;
    s32 *delaysLocal = betterReverbDelays[channel];
    s32 *allpassIdxLocal = allpassIdx[channel];
-    s32 lastDelayLightLocal = lastDelayLight[channel];
    s16 **delayBufsLocal = delayBufs[channel];

-    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
-        tmpCarryover = (((delayBufsLocal[FILTERS_MINUS_1][allpassIdxLocal[FILTERS_MINUS_1]] * BETTER_REVERB_REVERB_INDEX_LIGHT) >> 8) + *downsampleBuffer);
-        i = 0;
+    // Get history sample from last processing tick
+    tmpCarryover = historySamplesLight[channel];

-        for (; i < FILTERS_MINUS_1; ++i) {
+    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
+        // Mix previous sample with new incoming sample
+        tmpCarryover = ((tmpCarryover * BETTER_REVERB_REVERB_INDEX_LIGHT) >> 8) + *downsampleBuffer;
+
+        for (i = 0; i < BETTER_REVERB_FILTER_COUNT_LIGHT; ++i) {
            curDelaySample = &delayBufsLocal[i][allpassIdxLocal[i]];
            historySample = *curDelaySample;

@@ -162,16 +164,13 @@ static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s3
            if (++allpassIdxLocal[i] == delaysLocal[i]) allpassIdxLocal[i] = 0;
        }

-        curDelaySample = &delayBufsLocal[FILTERS_MINUS_1][allpassIdxLocal[FILTERS_MINUS_1]];
-        historySample = ((*curDelaySample * BETTER_REVERB_MULTIPLE_LIGHT) >> 8); // outSampleTotal variable not needed, as there is no sample addition happening here. Not really a history sample though.
-        *curDelaySample = CLAMP_S16(tmpCarryover);
-
-        if (++allpassIdxLocal[FILTERS_MINUS_1] == lastDelayLightLocal) allpassIdxLocal[FILTERS_MINUS_1] = 0;
-
-        *start = CLAMP_S16(historySample);
+        // Lightweight does not use the final filter type at all, unlike standard reverb processing
+        *start = CLAMP_S16(tmpCarryover);
    }
+    
+    // Copy history sample to temporary buffer for processing next tick
+    historySamplesLight[channel] = tmpCarryover;
 }
-#undef FILTERS_MINUS_1

 void initialize_better_reverb_buffers(void) {
    delayBufs[SYNTH_CHANNEL_LEFT] = (s16**) soundAlloc(&gBetterReverbPool, BETTER_REVERB_PTR_SIZE);
@@ -180,8 +179,11 @@ void initialize_better_reverb_buffers(void) {

 void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {
    s32 bufOffset = 0;
-    s32 i;
    s32 filterCount = reverbFilterCount;
+    u32 *inputDelayPtrs[SYNTH_CHANNEL_STEREO_COUNT] = {
+        [SYNTH_CHANNEL_LEFT]  = inputDelaysL,
+        [SYNTH_CHANNEL_RIGHT] = inputDelaysR,
+    };

    if (betterReverbLightweight)
        filterCount = BETTER_REVERB_FILTER_COUNT_LIGHT;
@@ -194,20 +196,17 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {

    // NOTE: Using filterCount over NUM_ALLPASS will report less memory usage with fewer filters, but poses an additional
    // risk to anybody testing on console with performance compromises, as emulator can be easily overlooked.
-    for (i = 0; i < filterCount; ++i) {
-        betterReverbDelays[SYNTH_CHANNEL_LEFT][i] = (s32) (inputDelaysL[i] / gReverbDownsampleRate);
-        betterReverbDelays[SYNTH_CHANNEL_RIGHT][i] = (s32) (inputDelaysR[i] / gReverbDownsampleRate);
-        delayBufs[SYNTH_CHANNEL_LEFT][i] = soundAlloc(&gBetterReverbPool, betterReverbDelays[SYNTH_CHANNEL_LEFT][i] * sizeof(s16));
-        bufOffset += betterReverbDelays[SYNTH_CHANNEL_LEFT][i];
-        delayBufs[SYNTH_CHANNEL_RIGHT][i] = soundAlloc(&gBetterReverbPool, betterReverbDelays[SYNTH_CHANNEL_RIGHT][i] * sizeof(s16));
-        bufOffset += betterReverbDelays[SYNTH_CHANNEL_RIGHT][i];
+    for (s32 channel = 0; channel < SYNTH_CHANNEL_STEREO_COUNT; channel++) {
+        historySamplesLight[channel] = 0;
+        for (s32 filter = 0; filter < filterCount; filter++) {
+            betterReverbDelays[channel][filter] = (s32) (inputDelayPtrs[channel][filter] / gReverbDownsampleRate);
+            delayBufs[channel][filter] = soundAlloc(&gBetterReverbPool, betterReverbDelays[channel][filter] * sizeof(s16));
+            bufOffset += betterReverbDelays[channel][filter];
+        }
    }

    aggress(bufOffset * sizeof(s16) <= BETTER_REVERB_SIZE - BETTER_REVERB_PTR_SIZE, "BETTER_REVERB_SIZE is too small for this preset!");

-    lastDelayLight[SYNTH_CHANNEL_LEFT] = betterReverbDelays[SYNTH_CHANNEL_LEFT][filterCount-1];
-    lastDelayLight[SYNTH_CHANNEL_RIGHT] = betterReverbDelays[SYNTH_CHANNEL_RIGHT][filterCount-1];
-
    bzero(allpassIdx, sizeof(allpassIdx));
 }
 #endif
--- a/src/audio/synthesis.h
+++ b/src/audio/synthesis.h
@@ -42,13 +42,13 @@ enum ChannelIndexes {

 /* ------ BETTER REVERB LIGHTWEIGHT PARAMETER OVERRIDES ------ */

-// Filter count works differently than normal when used with light settings and can support numbers that are not multiples of 3, though 3 is generally recommended.
-// This can be reduced to 2 to save a third of runtime overhead, but substantially reduces reverb saturation.
-// Similarly this can be increased from 3, but likely won't have beneficial outcomes worth the runtime expense compared to the modification of other parameters without using light settings.
-#define BETTER_REVERB_FILTER_COUNT_LIGHT 3
-#define BETTER_REVERB_GAIN_INDEX_LIGHT 0xA0 // Advanced parameter; used to tune the outputs of every filter except for the final one
-#define BETTER_REVERB_REVERB_INDEX_LIGHT 0x30 // Advanced parameter; used to tune the incoming output of the final filter
-#define BETTER_REVERB_MULTIPLE_LIGHT 0xD0 // Advanced parameter; multiplier applied to the final output signal for both the left and right channels (divided by 256)
+// Filter count works differently than normal when used with light settings and can support numbers that are not multiples of 3.
+// A value of 2 is generally recommended for most similar behavior to non-lightweight reverb.
+// This can be reduced to 1 to save additional runtime overhead, but will reduce some reverb saturation as consequence.
+// Similarly this can be increased from 2, but likely won't have beneficial outcomes worth the runtime expense compared to the modification of other parameters without using light settings.
+#define BETTER_REVERB_FILTER_COUNT_LIGHT 2
+#define BETTER_REVERB_GAIN_INDEX_LIGHT 0xA0 // Advanced parameter; used to tune the outputs of every filter except for the final one (multiples of 0x10 will compile more efficiently)
+#define BETTER_REVERB_REVERB_INDEX_LIGHT 0x30 // Advanced parameter; used to tune the reuse of the previously processed output sample (multiples of 0x10 will compile more efficiently)


 /* ------------ BETTER REVERB EXTERNED VARIABLES ------------ */
@@ -74,7 +74,7 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR);
 /* -------------- BETTER REVERB STATIC ASSERTS -------------- */

 STATIC_ASSERT(NUM_ALLPASS % 3 == 0, "NUM_ALLPASS must be a multiple of 3!");
-STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT >= 2, "BETTER_REVERB_FILTER_COUNT_LIGHT should be no less than 2!");
+STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT > 0, "BETTER_REVERB_FILTER_COUNT_LIGHT must be greater than 0!");
 STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT <= NUM_ALLPASS, "BETTER_REVERB_FILTER_COUNT_LIGHT cannot be larger than NUM_ALLPASS!");

 #else