From dcfcf50a802a6e2635cd694f14a11e718f51dd67 Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Fri, 15 Dec 2023 17:24:03 +1300
Subject: [PATCH 01/23] wall fix (#734)

---
 data/behavior_data.c           |  2 +-
 src/engine/surface_collision.c | 31 ++++++++++++++++---------------
 src/engine/surface_load.c      | 14 --------------
 3 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/data/behavior_data.c b/data/behavior_data.c
index 2e2c2dc1d..07803591a 100644
--- a/data/behavior_data.c
+++ b/data/behavior_data.c
@@ -5873,7 +5873,7 @@ const BehaviorScript bhvRacingPenguin[] = {
     OR_INT(oFlags, (OBJ_FLAG_COMPUTE_ANGLE_TO_MARIO | OBJ_FLAG_ACTIVE_FROM_AFAR | OBJ_FLAG_COMPUTE_DIST_TO_MARIO | OBJ_FLAG_SET_FACE_YAW_TO_MOVE_YAW | OBJ_FLAG_UPDATE_GFX_POS_AND_ANGLE)),
     LOAD_ANIMATIONS(oAnimations, penguin_seg5_anims_05008B74),
     ANIMATE(PENGUIN_ANIM_IDLE),
-    SET_OBJ_PHYSICS(/*Wall hitbox radius*/ 300, /*Gravity*/ -800, /*Bounciness*/ -5, /*Drag strength*/ 0, /*Friction*/ 0, /*Buoyancy*/ 0, /*Unused*/ 0, 0),
+    SET_OBJ_PHYSICS(/*Wall hitbox radius*/ 200, /*Gravity*/ -800, /*Bounciness*/ -5, /*Drag strength*/ 0, /*Friction*/ 0, /*Buoyancy*/ 0, /*Unused*/ 0, 0),
     SCALE(/*Unused*/ 0, /*Field*/ 400),
     CALL_NATIVE(bhv_racing_penguin_init),
     BEGIN_LOOP(),
diff --git a/src/engine/surface_collision.c b/src/engine/surface_collision.c
index 007c5748c..8b54ef8af 100644
--- a/src/engine/surface_collision.c
+++ b/src/engine/surface_collision.c
@@ -63,11 +63,6 @@ static s32 find_wall_collisions_from_list(struct SurfaceNode *surfaceNode, struc
     TerrainData type = SURFACE_DEFAULT;
     s32 numCols = 0;
 
-    // Max collision radius = 200
-    if (radius > 200) {
-        radius = 200;
-    }
-
     f32 margin_radius = radius - 1.0f;
 
     // Stay in this loop until out of walls.
@@ -216,19 +211,25 @@ s32 find_wall_collisions(struct WallCollisionData *colData) {
     }
 
     // World (level) consists of a 16x16 grid. Find where the collision is on the grid (round toward -inf)
-    s32 cellX = GET_CELL_COORD(x);
-    s32 cellZ = GET_CELL_COORD(z);
+    s32 minCellX = GET_CELL_COORD(x - colData->radius);
+    s32 minCellZ = GET_CELL_COORD(z - colData->radius);
+    s32 maxCellX = GET_CELL_COORD(x + colData->radius);
+    s32 maxCellZ = GET_CELL_COORD(z + colData->radius);
 
-    if (!(gCollisionFlags & COLLISION_FLAG_EXCLUDE_DYNAMIC)) {
-        // Check for surfaces belonging to objects.
-        node = gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
-        numCollisions += find_wall_collisions_from_list(node, colData);
+    for (s32 cellX = minCellX; cellX <= maxCellX; cellX++) {
+        for (s32 cellZ = minCellZ; cellZ <= maxCellZ; cellZ++) {
+            if (!(gCollisionFlags & COLLISION_FLAG_EXCLUDE_DYNAMIC)) {
+                // Check for surfaces belonging to objects.
+                node = gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
+                numCollisions += find_wall_collisions_from_list(node, colData);
+            }
+
+            // Check for surfaces that are a part of level geometry.
+            node = gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
+            numCollisions += find_wall_collisions_from_list(node, colData);
+        }
     }
 
-    // Check for surfaces that are a part of level geometry.
-    node = gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
-    numCollisions += find_wall_collisions_from_list(node, colData);
-
     gCollisionFlags &= ~(COLLISION_FLAG_RETURN_FIRST | COLLISION_FLAG_EXCLUDE_DYNAMIC | COLLISION_FLAG_INCLUDE_INTANGIBLE);
 #ifdef VANILLA_DEBUG
     // Increment the debug tracker.
diff --git a/src/engine/surface_load.c b/src/engine/surface_load.c
index 4474bbb2e..f2444fb73 100644
--- a/src/engine/surface_load.c
+++ b/src/engine/surface_load.c
@@ -187,13 +187,6 @@ static s32 lower_cell_index(s32 coord) {
     // [0, NUM_CELLS)
     s32 index = coord / CELL_SIZE;
 
-    // Include extra cell if close to boundary
-    //! Some wall checks are larger than the buffer, meaning wall checks can
-    //  miss walls that are near a cell border.
-    if (coord % CELL_SIZE < 50) {
-        index--;
-    }
-
     // Potentially > NUM_CELLS - 1, but since the upper index is <= NUM_CELLS - 1, not exploitable
     return MAX(0, index);
 }
@@ -213,13 +206,6 @@ static s32 upper_cell_index(s32 coord) {
     // [0, NUM_CELLS)
     s32 index = coord / CELL_SIZE;
 
-    // Include extra cell if close to boundary
-    //! Some wall checks are larger than the buffer, meaning wall checks can
-    //  miss walls that are near a cell border.
-    if (coord % CELL_SIZE > CELL_SIZE - 50) {
-        index++;
-    }
-
     // Potentially < 0, but since lower index is >= 0, not exploitable
     return MIN((NUM_CELLS - 1), index);
 }

From 16467023937b3bbc786181a5e8d9efbf4d52f718 Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Fri, 15 Dec 2023 17:24:35 +1300
Subject: [PATCH 02/23] shorten call_native command (#733)

---
 data/behavior_data.c         |  7 +++----
 src/engine/behavior_script.c | 25 +++++++++++++------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/data/behavior_data.c b/data/behavior_data.c
index 07803591a..a0d4303e8 100644
--- a/data/behavior_data.c
+++ b/data/behavior_data.c
@@ -66,6 +66,7 @@
 #define BC_HH(a, b) (_SHIFTL(a, 16, 16) | _SHIFTL(b, 0, 16))
 #define BC_W(a) ((uintptr_t)(u32)(a))
 #define BC_PTR(a) ((uintptr_t)(a))
+#define BC_BPTR(a, b) (_SHIFTL(a, 24, 8) + OS_K0_TO_PHYSICAL(b))
 
 enum BehaviorCommands {
     /*0x00*/ BHV_CMD_BEGIN,
@@ -180,8 +181,7 @@ enum BehaviorCommands {
 
 // Executes a native game function.
 #define CALL_NATIVE(func) \
-    BC_B(BHV_CMD_CALL_NATIVE), \
-    BC_PTR(func)
+    BC_BPTR(BHV_CMD_CALL_NATIVE, func)
 
 // Adds a float to the specified field.
 #define ADD_FLOAT(field, value) \
@@ -386,8 +386,7 @@ enum BehaviorCommands {
 
 // Spawns a water droplet with the given parameters.
 #define SPAWN_WATER_DROPLET(dropletParams) \
-    BC_B(BHV_CMD_SPAWN_WATER_DROPLET), \
-    BC_PTR(dropletParams)
+    BC_BPTR(BHV_CMD_SPAWN_WATER_DROPLET, dropletParams)
 
 
 const BehaviorScript bhvStarDoor[] = {
diff --git a/src/engine/behavior_script.c b/src/engine/behavior_script.c
index 299eec42f..9ae8b9004 100644
--- a/src/engine/behavior_script.c
+++ b/src/engine/behavior_script.c
@@ -17,16 +17,17 @@
 #include "game/puppylights.h"
 
 // Macros for retrieving arguments from behavior scripts.
-#define BHV_CMD_GET_1ST_U8(index)  (u8)((gCurBhvCommand[index] >> 24) & 0xFF) // unused
-#define BHV_CMD_GET_2ND_U8(index)  (u8)((gCurBhvCommand[index] >> 16) & 0xFF)
-#define BHV_CMD_GET_3RD_U8(index)  (u8)((gCurBhvCommand[index] >> 8) & 0xFF)
-#define BHV_CMD_GET_4TH_U8(index)  (u8)((gCurBhvCommand[index]) & 0xFF)
+#define BHV_CMD_GET_1ST_U8(index)     (u8)((gCurBhvCommand[index] >> 24) & 0xFF) // unused
+#define BHV_CMD_GET_2ND_U8(index)     (u8)((gCurBhvCommand[index] >> 16) & 0xFF)
+#define BHV_CMD_GET_3RD_U8(index)     (u8)((gCurBhvCommand[index] >> 8) & 0xFF)
+#define BHV_CMD_GET_4TH_U8(index)     (u8)((gCurBhvCommand[index]) & 0xFF)
 
-#define BHV_CMD_GET_1ST_S16(index) (s16)(gCurBhvCommand[index] >> 16)
-#define BHV_CMD_GET_2ND_S16(index) (s16)(gCurBhvCommand[index] & 0xFFFF)
+#define BHV_CMD_GET_1ST_S16(index)    (s16)(gCurBhvCommand[index] >> 16)
+#define BHV_CMD_GET_2ND_S16(index)    (s16)(gCurBhvCommand[index] & 0xFFFF)
 
-#define BHV_CMD_GET_U32(index)     (u32)(gCurBhvCommand[index])
-#define BHV_CMD_GET_VPTR(index)    (void *)(gCurBhvCommand[index])
+#define BHV_CMD_GET_U32(index)        (u32)(gCurBhvCommand[index])
+#define BHV_CMD_GET_VPTR(index)       (void *)(gCurBhvCommand[index])
+#define BHV_CMD_GET_VPTR_SMALL(index) (void *)(OS_PHYSICAL_TO_K0(gCurBhvCommand[index] & 0xFFFFFF))
 
 #define BHV_CMD_GET_ADDR_OF_CMD(index) (uintptr_t)(&gCurBhvCommand[index])
 
@@ -316,11 +317,11 @@ static s32 bhv_cmd_end_loop(void) {
 // Usage: CALL_NATIVE(func)
 typedef void (*NativeBhvFunc)(void);
 static s32 bhv_cmd_call_native(void) {
-    NativeBhvFunc behaviorFunc = BHV_CMD_GET_VPTR(1);
+    NativeBhvFunc behaviorFunc = BHV_CMD_GET_VPTR_SMALL(0);
 
     behaviorFunc();
 
-    gCurBhvCommand += 2;
+    gCurBhvCommand++;
     return BHV_PROC_CONTINUE;
 }
 
@@ -734,11 +735,11 @@ static s32 bhv_cmd_parent_bit_clear(void) {
 // Command 0x37: Spawns a water droplet with the given parameters.
 // Usage: SPAWN_WATER_DROPLET(dropletParams)
 static s32 bhv_cmd_spawn_water_droplet(void) {
-    struct WaterDropletParams *dropletParams = BHV_CMD_GET_VPTR(1);
+    struct WaterDropletParams *dropletParams = BHV_CMD_GET_VPTR_SMALL(0);
 
     spawn_water_droplet(gCurrentObject, dropletParams);
 
-    gCurBhvCommand += 2;
+    gCurBhvCommand++;
     return BHV_PROC_CONTINUE;
 }
 

From 76dae5e8dd2c2d3b59478a96ba8f1278c32a9be7 Mon Sep 17 00:00:00 2001
From: someone2639 <someone2639@users.noreply.github.com>
Date: Thu, 14 Dec 2023 23:27:50 -0500
Subject: [PATCH 03/23] Add file select skip define (#723)

* add config define to skip file select

* implement file select skip

* Add more coverage to the skip file select tweak

---------

Co-authored-by: someone2639 <someone2639@gmail.com>
---
 include/config/config_cutscenes.h | 6 ++++++
 levels/menu/script.c              | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/include/config/config_cutscenes.h b/include/config/config_cutscenes.h
index e1b0bdc5d..ab2616b30 100644
--- a/include/config/config_cutscenes.h
+++ b/include/config/config_cutscenes.h
@@ -17,3 +17,9 @@
  * Skips the title/splash screen (Super Mario 64 logo).
  */
 // #define SKIP_TITLE_SCREEN
+
+/**
+ * Skips the File Select screen
+ */
+// #define SKIP_FILE_SELECT
+
diff --git a/levels/menu/script.c b/levels/menu/script.c
index a4b032e2f..72474a6cc 100644
--- a/levels/menu/script.c
+++ b/levels/menu/script.c
@@ -6,6 +6,8 @@
 #include "segment_symbols.h"
 #include "level_commands.h"
 
+#include "config/config_cutscenes.h"
+
 #include "game/area.h"
 #include "game/level_update.h"
 #include "menu/file_select.h"
@@ -19,6 +21,7 @@
 #include "levels/menu/header.h"
 
 const LevelScript level_main_menu_entry_file_select[] = {
+#ifndef SKIP_FILE_SELECT
     INIT_LEVEL(),
     LOAD_GODDARD(),
     LOAD_LEVEL_DATA(menu),
@@ -53,6 +56,7 @@ const LevelScript level_main_menu_entry_file_select[] = {
     SLEEP(/*frames*/ 16),
     CLEAR_LEVEL(),
     SLEEP_BEFORE_EXIT(/*frames*/ 1),
+#endif // SKIP_FILE_SELECT
     SET_REG(/*value*/ START_LEVEL),
     EXIT_AND_EXECUTE(/*seg*/ SEGMENT_GLOBAL_LEVEL_SCRIPT, _scriptsSegmentRomStart, _scriptsSegmentRomEnd, level_main_scripts_entry),
 };

From 9fc928ed70303d101714ca76fa1dbf9888d6f86d Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Sat, 16 Dec 2023 02:48:32 +1300
Subject: [PATCH 04/23] Remove OBJECTS_REJ and clean up core render func (By
 Fazana) (#735)

* crowd pleaser

* Fix debug boxes

* fix comments

* renamed temp graphics head to be consistent

---------

Co-authored-by: Fazana <52551480+FazanaJ@users.noreply.github.com>
---
 include/config/config_safeguards.h |  20 ---
 include/object_constants.h         |   2 -
 include/types.h                    |   3 -
 src/engine/behavior_script.c       |  13 --
 src/engine/graph_node.h            |  13 +-
 src/game/debug_box.c               |  53 +++-----
 src/game/debug_box.h               |  12 +-
 src/game/game_init.c               |   4 -
 src/game/rendering_graph_node.c    | 198 +++++++----------------------
 src/game/rendering_graph_node.h    |  36 +-----
 src/game/spawn_object.c            |   3 -
 11 files changed, 75 insertions(+), 282 deletions(-)

diff --git a/include/config/config_safeguards.h b/include/config/config_safeguards.h
index 3a7b96594..271544453 100644
--- a/include/config/config_safeguards.h
+++ b/include/config/config_safeguards.h
@@ -49,26 +49,6 @@
     #undef BETTER_REVERB
 #endif
 
-
-/*****************
- * config_graphics.h
- */
-
-#ifndef F3DEX_GBI_2
-    #undef OBJECTS_REJ // OBJECTS_REJ requires f3dex2.
-#endif // !F3DEX_GBI_2
-
-#ifndef F3DEX_GBI_SHARED
-    #undef OBJECTS_REJ // Non F3DEX-based ucodes do NOT support ucode switching.
-#endif // !F3DEX_GBI_SHARED
-
-#ifdef OBJECTS_REJ
-    // Enable required ucodes.
-    #define F3DEX2_REJ_GBI
-    #define F3DLX2_REJ_GBI
-#endif // OBJECTS_REJ
-
-
 /*****************
  * config_debug.h
  */
diff --git a/include/object_constants.h b/include/object_constants.h
index 7ee6ad171..949dd8d6c 100644
--- a/include/object_constants.h
+++ b/include/object_constants.h
@@ -49,8 +49,6 @@ enum ObjFlags {
     OBJ_FLAG_PERSISTENT_RESPAWN                = (1 << 14), // 0x00004000
     OBJ_FLAG_VELOCITY_PLATFORM                 = (1 << 15), // 0x00008000
     OBJ_FLAG_DONT_CALC_COLL_DIST               = (1 << 16), // 0x00010000
-    OBJ_FLAG_UCODE_SMALL                       = (1 << 17), // 0x00020000
-    OBJ_FLAG_UCODE_LARGE                       = (1 << 18), // 0x00040000
     OBJ_FLAG_SILHOUETTE                        = (1 << 19), // 0x00080000
     OBJ_FLAG_OCCLUDE_SILHOUETTE                = (1 << 20), // 0x00100000
     OBJ_FLAG_OPACITY_FROM_CAMERA_DIST          = (1 << 21), // 0x00200000
diff --git a/include/types.h b/include/types.h
index 81b4813f6..eeeeac153 100644
--- a/include/types.h
+++ b/include/types.h
@@ -248,9 +248,6 @@ struct GraphNodeObject {
     /*0x4C*/ struct SpawnInfo *spawnInfo;
     /*0x50*/ Mat4 *throwMatrix; // matrix ptr
     /*0x54*/ Vec3f cameraToObject;
-#ifdef OBJECTS_REJ
-    u16 ucode;
-#endif
 };
 
 struct ObjectNode {
diff --git a/src/engine/behavior_script.c b/src/engine/behavior_script.c
index 9ae8b9004..8c918392f 100644
--- a/src/engine/behavior_script.c
+++ b/src/engine/behavior_script.c
@@ -50,11 +50,6 @@ void obj_update_gfx_pos_and_angle(struct Object *obj) {
 #define OBJ_OPACITY_LENGTH 512.0f
 void obj_set_opacity_from_cam_dist(struct Object *obj) {
     s32 opacityDist = ((-obj->header.gfx.cameraToObject[2] - OBJ_OPACITY_NEAR) * (256.0f / OBJ_OPACITY_LENGTH));
-#ifdef OBJECTS_REJ
-    if (opacityDist > 0) {
-        obj->header.gfx.ucode = GRAPH_NODE_UCODE_REJ;
-    }
-#endif
     obj->oOpacity = CLAMP(opacityDist, 0x00, 0xFF);
 }
 #undef OBJ_OPACITY_NEAR
@@ -910,14 +905,6 @@ void cur_obj_update(void) {
     COND_BIT((objFlags & OBJ_FLAG_OCCLUDE_SILHOUETTE), o->header.gfx.node.flags, GRAPH_RENDER_OCCLUDE_SILHOUETTE);
 #endif
 
-#ifdef OBJECTS_REJ
-    if ((objFlags & OBJ_FLAG_SILHOUETTE) || (objFlags & OBJ_FLAG_UCODE_SMALL)) {
-        o->header.gfx.ucode = GRAPH_NODE_UCODE_REJ;
-    } else {
-        o->header.gfx.ucode = GRAPH_NODE_UCODE_DEFAULT;
-    }
-#endif
-
 #ifdef OBJ_OPACITY_BY_CAM_DIST
     if (objFlags & OBJ_FLAG_OPACITY_FROM_CAMERA_DIST) {
         obj_set_opacity_from_cam_dist(o);
diff --git a/src/engine/graph_node.h b/src/engine/graph_node.h
index 4d0d4d2a9..bccb8f2b3 100644
--- a/src/engine/graph_node.h
+++ b/src/engine/graph_node.h
@@ -9,15 +9,6 @@
 #include "geo_commands.h"
 #include "game/memory.h"
 
-// UCode indices for listHeads & listTails
-enum GraphNodeUCodes {
-    GRAPH_NODE_UCODE_DEFAULT,
-#ifdef OBJECTS_REJ
-    GRAPH_NODE_UCODE_REJ,
-#endif
-    GRAPH_NODE_NUM_UCODES,
-};
-
 enum GraphRenderFlags {
     GRAPH_RENDER_ACTIVE             = (1 << 0), // 0x0001
     GRAPH_RENDER_CHILDREN_FIRST     = (1 << 1), // 0x0002
@@ -156,8 +147,8 @@ struct DisplayListNode {
  */
 struct GraphNodeMasterList {
     /*0x00*/ struct GraphNode node;
-    /*0x14*/ struct DisplayListNode *listHeads[GRAPH_NODE_NUM_UCODES][LAYER_COUNT];
-    /*0x34*/ struct DisplayListNode *listTails[GRAPH_NODE_NUM_UCODES][LAYER_COUNT];
+    /*0x14*/ struct DisplayListNode *listHeads[LAYER_COUNT];
+    /*0x34*/ struct DisplayListNode *listTails[LAYER_COUNT];
 };
 
 /** Simply used as a parent to group multiple children.
diff --git a/src/game/debug_box.c b/src/game/debug_box.c
index cd7a9e73c..f186c8e96 100644
--- a/src/game/debug_box.c
+++ b/src/game/debug_box.c
@@ -263,15 +263,9 @@ void iterate_surfaces_envbox(Vtx *verts) {
 }
 
 // VERTCOUNT = The highest number divisible by 6, which is less than the maximum vertex buffer divided by 2.
-// The vertex buffer is 64 if OBJECTS_REJ is enabled, 32 otherwise.
-//! TODO: Why can this only use half of the vertex buffer?
-#ifdef OBJECTS_REJ
 #define VERTCOUNT 30
-#else
-#define VERTCOUNT 12
-#endif // OBJECTS_REJ
 
-void visual_surface_display(Vtx *verts, s32 iteration) {
+void visual_surface_display(Gfx **gfx, Vtx *verts, s32 iteration) {
     s32 vts = (iteration ? gVisualOffset : gVisualSurfaceCount);
     s32 vtl = 0;
     s32 count = VERTCOUNT;
@@ -280,13 +274,13 @@ void visual_surface_display(Vtx *verts, s32 iteration) {
     while (vts > 0) {
         if (count == VERTCOUNT) {
             ntx = MIN(VERTCOUNT, vts);
-            gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts + (gVisualSurfaceCount - vts)), ntx, 0);
+            gSPVertex((*gfx)++, VIRTUAL_TO_PHYSICAL(verts + (gVisualSurfaceCount - vts)), ntx, 0);
             count = 0;
             vtl   = VERTCOUNT;
         }
 
         if (vtl >= 6) {
-            gSP2Triangles(gDisplayListHead++, (count + 0),
+            gSP2Triangles((*gfx)++, (count + 0),
                                               (count + 1),
                                               (count + 2), 0x0,
                                               (count + 3),
@@ -296,7 +290,7 @@ void visual_surface_display(Vtx *verts, s32 iteration) {
             vtl   -= 6;
             count += 6;
         } else if (vtl >= 3) {
-            gSP1Triangle(gDisplayListHead++, (count + 0),
+            gSP1Triangle((*gfx)++, (count + 0),
                                              (count + 1),
                                              (count + 2), 0x0);
             vts   -= 3;
@@ -343,7 +337,7 @@ s32 iterate_surface_count(s32 x, s32 z) {
     return j;
 }
 
-void visual_surface_loop(void) {
+void visual_surface_loop(Gfx **gfx) {
     if (!gSurfaceNodesAllocated
      || !gSurfacesAllocated
      || !gMarioState->marioObj) {
@@ -358,19 +352,20 @@ void visual_surface_loop(void) {
         return;
     }
 
-    gSPDisplayList(gDisplayListHead++, dl_visual_surface);
+    gSPDisplayList((*gfx)++, dl_visual_surface);
 
     iterate_surfaces_visual(gMarioState->pos[0], gMarioState->pos[2], verts);
 
-    visual_surface_display(verts, 0);
+    visual_surface_display(gfx, verts, 0);
 
+    gDPPipeSync((*gfx)++);
     iterate_surfaces_envbox(verts);
 
-    gDPSetRenderMode(gDisplayListHead++, G_RM_ZB_XLU_SURF, G_RM_NOOP2);
+    gDPSetRenderMode((*gfx)++, G_RM_ZB_XLU_SURF, G_RM_NOOP2);
 
-    visual_surface_display(verts, 1);
+    visual_surface_display(gfx, verts, 1);
 
-    gSPDisplayList(gDisplayListHead++, dl_debug_box_end);
+    gSPDisplayList((*gfx)++, dl_debug_box_end);
 }
 
 /**
@@ -388,9 +383,6 @@ static void append_debug_box(Vec3f center, Vec3f bounds, s16 yaw, s32 type) {
         sBoxes[sNumBoxes].yaw   = yaw;
         sBoxes[sNumBoxes].color = sCurBoxColor;
         sBoxes[sNumBoxes].type  = type;
-        if (!(sBoxes[sNumBoxes].type & (DEBUG_UCODE_REJ | DEBUG_UCODE_DEFAULT))) {
-            sBoxes[sNumBoxes].type |= DEBUG_UCODE_DEFAULT;
-        }
         ++sNumBoxes;
     }
 }
@@ -448,7 +440,7 @@ void debug_box_pos_rot(Vec3f pMin, Vec3f pMax, s16 yaw, s32 type) {
     append_debug_box(center, bounds, yaw, type);
 }
 
-static void render_box(int index) {
+static void render_box(Gfx **gfx, int index) {
     struct DebugBox *box = &sBoxes[index];
     s32 color = box->color;
     Mat4 mtxFloat;
@@ -477,20 +469,20 @@ static void render_box(int index) {
     mtxf_to_mtx(mtx, mtxFloat);
 
     // Load the calculated matrix
-    gSPMatrix(gDisplayListHead++, mtx, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
+    gSPMatrix((*gfx)++, mtx, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
 
     // Set env color to the color of this box
-    gDPSetColor(gDisplayListHead++, G_SETENVCOLOR, color);
+    gDPSetColor((*gfx)++, G_SETENVCOLOR, color);
 
     if (box->type & DEBUG_SHAPE_BOX) {
-        gSPDisplayList(gDisplayListHead++, dl_debug_box_verts);
+        gSPDisplayList((*gfx)++, dl_debug_box_verts);
     }
     if (box->type & DEBUG_SHAPE_CYLINDER) {
-        gSPDisplayList(gDisplayListHead++, dl_debug_cylinder_verts);
+        gSPDisplayList((*gfx)++, dl_debug_cylinder_verts);
     }
 }
 
-void render_debug_boxes(s32 type) {
+void render_debug_boxes(Gfx **gfx) {
     s32 i;
 
     debug_box_color(DBG_BOX_DEF_COLOR);
@@ -498,17 +490,14 @@ void render_debug_boxes(s32 type) {
     if (sNumBoxes == 0) return;
     if (gAreaUpdateCounter < 3) return;
 
-    gSPDisplayList(gDisplayListHead++, dl_debug_box_begin);
+    gSPDisplayList((*gfx)++, dl_debug_box_begin);
 
     for (i = 0; i < sNumBoxes; ++i) {
-        if ((type & DEBUG_UCODE_DEFAULT) && (sBoxes[i].type & DEBUG_UCODE_DEFAULT)) render_box(i);
-        if ((type & DEBUG_UCODE_REJ    ) && (sBoxes[i].type & DEBUG_UCODE_REJ    )) render_box(i);
+        render_box(gfx, i);
     }
 
-    if (type & DEBUG_BOX_CLEAR) {
-        sNumBoxes = 0;
-    }
-    gSPDisplayList(gDisplayListHead++, dl_debug_box_end);
+    sNumBoxes = 0;
+    gSPDisplayList((*gfx)++, dl_debug_box_end);
 }
 
 #endif
diff --git a/src/game/debug_box.h b/src/game/debug_box.h
index 1041c51c6..0786d77b2 100644
--- a/src/game/debug_box.h
+++ b/src/game/debug_box.h
@@ -19,13 +19,7 @@
 enum DebugBoxFlags {
     DEBUG_SHAPE_BOX      = (1 << 0), // 0x01
     DEBUG_SHAPE_CYLINDER = (1 << 1), // 0x02
-    DEBUG_UCODE_DEFAULT  = (1 << 2), // 0x04
-#ifdef OBJECTS_REJ
-    DEBUG_UCODE_REJ      = (1 << 3), // 0x08
-#else
-    DEBUG_UCODE_REJ      = DEBUG_UCODE_DEFAULT,
-#endif
-    DEBUG_BOX_CLEAR      = (1 << 4), // 0x10
+    DEBUG_BOX_CLEAR      = (1 << 2), // 0x04
 };
 
 extern u8 hitboxView;
@@ -39,8 +33,8 @@ void debug_box_rot(Vec3f center, Vec3f bounds, s16 yaw, s32 type);
 void debug_box_pos(Vec3f pMin, Vec3f pMax, s32 type);
 void debug_box_pos_rot(Vec3f pMin, Vec3f pMax, s16 yaw, s32 type);
 
-void render_debug_boxes(s32 type);
-extern void visual_surface_loop(void);
+void render_debug_boxes(Gfx **gfx);
+extern void visual_surface_loop(Gfx **gfx);
 
 #endif
 
diff --git a/src/game/game_init.c b/src/game/game_init.c
index 4d92e9aa1..4b415138f 100644
--- a/src/game/game_init.c
+++ b/src/game/game_init.c
@@ -277,11 +277,7 @@ void create_gfx_task_structure(void) {
     gGfxSPTask->task.t.type = M_GFXTASK;
     gGfxSPTask->task.t.ucode_boot = rspbootTextStart;
     gGfxSPTask->task.t.ucode_boot_size = ((u8 *) rspbootTextEnd - (u8 *) rspbootTextStart);
-#if defined(F3DEX_GBI_SHARED) && defined(OBJECTS_REJ)
     gGfxSPTask->task.t.flags = (OS_TASK_LOADABLE | OS_TASK_DP_WAIT);
-#else
-    gGfxSPTask->task.t.flags = 0x0;
-#endif
 #ifdef  L3DEX2_ALONE
     gGfxSPTask->task.t.ucode = gspL3DEX2_fifoTextStart;
     gGfxSPTask->task.t.ucode_data = gspL3DEX2_fifoDataStart;
diff --git a/src/game/rendering_graph_node.c b/src/game/rendering_graph_node.c
index 4fc8c7e5c..9e33e9ad4 100644
--- a/src/game/rendering_graph_node.c
+++ b/src/game/rendering_graph_node.c
@@ -195,76 +195,10 @@ static const Gfx dl_silhouette_end[] = {
 struct RenderPhase {
     u8 startLayer;
     u8 endLayer;
-#ifdef OBJECTS_REJ
-    u8 ucode;
-#endif
 };
 
 static struct RenderPhase sRenderPhases[] = {
-#ifdef OBJECTS_REJ
- #if SILHOUETTE
-    // Silhouette, .rej
-    [RENDER_PHASE_ZEX_BEFORE_SILHOUETTE]   = {
-        .startLayer = LAYER_FIRST,
-        .endLayer   = LAYER_LAST_BEFORE_SILHOUETTE,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_ZB]                  = {
-        .startLayer = LAYER_ZB_FIRST,
-        .endLayer   = LAYER_LAST_BEFORE_SILHOUETTE,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_REJ_SILHOUETTE]          = {
-        .startLayer = LAYER_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_SILHOUETTE_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_REJ_NON_SILHOUETTE]      = {
-        .startLayer = LAYER_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_SILHOUETTE_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_REJ_OCCLUDE_SILHOUETTE]  = {
-        .startLayer = LAYER_OCCLUDE_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_OCCLUDE_SILHOUETTE_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_ZEX_AFTER_SILHOUETTE]    = {
-        .startLayer = LAYER_OCCLUDE_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_NON_ZB]              = {
-        .startLayer = LAYER_NON_ZB_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
- #else
-    // No silhouette, .rej
-    [RENDER_PHASE_ZEX_BG]                  = {
-        .startLayer = LAYER_FIRST,
-        .endLayer   = LAYER_FIRST,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_ZB]                  = {
-        .startLayer = LAYER_ZB_FIRST,
-        .endLayer   = LAYER_ZB_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_ZEX_ALL]                 = {
-        .startLayer = LAYER_ZB_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_NON_ZB]              = {
-        .startLayer = LAYER_NON_ZB_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
- #endif
-#else
- #if SILHOUETTE
-    // Silhouette, no .rej
+#if SILHOUETTE
     [RENDER_PHASE_ZEX_BEFORE_SILHOUETTE]   = {
         .startLayer = LAYER_FIRST,
         .endLayer   = LAYER_LAST_BEFORE_SILHOUETTE,
@@ -289,46 +223,16 @@ static struct RenderPhase sRenderPhases[] = {
         .startLayer = LAYER_OCCLUDE_SILHOUETTE_FIRST,
         .endLayer   = LAYER_LAST,
     },
-
- #else
-    // No silhouette, no .rej
+#else
     [RENDER_PHASE_ZEX_ALL]                 = {
         .startLayer = LAYER_FIRST,
         .endLayer   = LAYER_LAST,
     },
-
- #endif
 #endif
 };
 
 extern const Gfx init_rsp[];
 
-#ifdef OBJECTS_REJ
-void switch_ucode(s32 ucode) {
-    // Set the ucode and RCP settings
-    switch (ucode) {
-        default: // GRAPH_NODE_UCODE_DEFAULT
-        case GRAPH_NODE_UCODE_DEFAULT:
-            gSPLoadUcodeL(gDisplayListHead++, gspF3DZEX2_NoN_PosLight_fifo); // F3DZEX2_PosLight
-            // Reload the necessary RSP settings
-            gSPDisplayList(gDisplayListHead++, init_rsp);
-            break;
-        case GRAPH_NODE_UCODE_REJ:
-            // Use .rej Microcode, skip sub-pixel processing on console
-            if (gEmulator & EMU_CONSOLE) {
-                gSPLoadUcodeL(gDisplayListHead++, gspF3DLX2_Rej_fifo); // F3DLX2_Rej
-            } else {
-                gSPLoadUcodeL(gDisplayListHead++, gspF3DEX2_Rej_fifo); // F3DEX2_Rej
-            }
-            // Reload the necessary RSP settings
-            gSPDisplayList(gDisplayListHead++, init_rsp);
-            // Set the clip ratio (see init_rsp)
-            gSPClipRatio(gDisplayListHead++, FRUSTRATIO_2);
-            break;
-    }
-}
-#endif
-
 #define UPPER_FIXED(x) ((int)((unsigned int)((x) * 0x10000) >> 16))
 #define LOWER_FIXED(x) ((int)((unsigned int)((x) * 0x10000) & 0xFFFF))
 
@@ -357,67 +261,65 @@ void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
     s32 currLayer     = LAYER_FIRST;
     s32 startLayer    = LAYER_FIRST;
     s32 endLayer      = LAYER_LAST;
-    s32 ucode         = GRAPH_NODE_UCODE_DEFAULT;
     s32 phaseIndex    = RENDER_PHASE_FIRST;
     s32 enableZBuffer = (node->node.flags & GRAPH_RENDER_Z_BUFFER) != 0;
+    s32 finalPhase    = enableZBuffer ? RENDER_PHASE_END : 1;
     struct RenderModeContainer *mode1List = &renderModeTable_1Cycle[enableZBuffer];
     struct RenderModeContainer *mode2List = &renderModeTable_2Cycle[enableZBuffer];
+    Gfx *tempGfxHead = gDisplayListHead;
 
     // Loop through the render phases
-    for (phaseIndex = RENDER_PHASE_FIRST; phaseIndex < RENDER_PHASE_END; phaseIndex++) {
-        // Get the render phase information.
-        renderPhase = &sRenderPhases[phaseIndex];
-        startLayer  = renderPhase->startLayer;
-        endLayer    = renderPhase->endLayer;
-#ifdef OBJECTS_REJ
-        ucode       = renderPhase->ucode;
-        // Set the ucode for the current render phase
-        switch_ucode(ucode);
-        gSPLookAt(gDisplayListHead++, gCurLookAt);
-#endif
+    for (phaseIndex = RENDER_PHASE_FIRST; phaseIndex < finalPhase; phaseIndex++) {
         if (enableZBuffer) {
+            // Get the render phase information.
+            renderPhase = &sRenderPhases[phaseIndex];
+            startLayer  = renderPhase->startLayer;
+            endLayer    = renderPhase->endLayer;
             // Enable z buffer.
-            gDPPipeSync(gDisplayListHead++);
-            gSPSetGeometryMode(gDisplayListHead++, G_ZBUFFER);
+            gDPPipeSync(tempGfxHead++);
+            gSPSetGeometryMode(tempGfxHead++, G_ZBUFFER);
+        } else {
+            startLayer = LAYER_FORCE;
+            endLayer = LAYER_TRANSPARENT;
         }
         // Iterate through the layers on the current render phase.
         for (currLayer = startLayer; currLayer <= endLayer; currLayer++) {
             // Set 'currList' to the first DisplayListNode on the current layer.
-            currList = node->listHeads[ucode][currLayer];
+            currList = node->listHeads[currLayer];
 #if defined(DISABLE_AA) || !SILHOUETTE
             // Set the render mode for the current layer.
-            gDPSetRenderMode(gDisplayListHead++, mode1List->modes[currLayer],
+            gDPSetRenderMode(tempGfxHead++, mode1List->modes[currLayer],
                                                  mode2List->modes[currLayer]);
 #else
             if (phaseIndex == RENDER_PHASE_NON_SILHOUETTE) {
                 // To properly cover the silhouette, disable AA.
                 // The silhouette model does not have AA due to the hack used to prevent triangle overlap.
-                gDPSetRenderMode(gDisplayListHead++, (mode1List->modes[currLayer] & ~IM_RD),
+                gDPSetRenderMode(tempGfxHead++, (mode1List->modes[currLayer] & ~IM_RD),
                                                      (mode2List->modes[currLayer] & ~IM_RD));
             } else {
                 // Set the render mode for the current dl.
-                gDPSetRenderMode(gDisplayListHead++, mode1List->modes[currLayer],
+                gDPSetRenderMode(tempGfxHead++, mode1List->modes[currLayer],
                                                      mode2List->modes[currLayer]);
             }
 #endif
             // Iterate through all the displaylists on the current layer.
             while (currList != NULL) {
                 // Add the display list's transformation to the master list.
-                gSPMatrix(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(currList->transform),
+                gSPMatrix(tempGfxHead++, VIRTUAL_TO_PHYSICAL(currList->transform),
                           (G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH));
 #if SILHOUETTE
                 if (phaseIndex == RENDER_PHASE_SILHOUETTE) {
                     // Add the current display list to the master list, with silhouette F3D.
-                    gSPDisplayList(gDisplayListHead++, dl_silhouette_begin);
-                    gSPDisplayList(gDisplayListHead++, currList->displayList);
-                    gSPDisplayList(gDisplayListHead++, dl_silhouette_end);
+                    gSPDisplayList(tempGfxHead++, dl_silhouette_begin);
+                    gSPDisplayList(tempGfxHead++, currList->displayList);
+                    gSPDisplayList(tempGfxHead++, dl_silhouette_end);
                 } else {
                     // Add the current display list to the master list.
-                    gSPDisplayList(gDisplayListHead++, currList->displayList);
+                    gSPDisplayList(tempGfxHead++, currList->displayList);
                 }
 #else
                 // Add the current display list to the master list.
-                gSPDisplayList(gDisplayListHead++, currList->displayList);
+                gSPDisplayList(tempGfxHead++, currList->displayList);
 #endif
                 // Move to the next DisplayListNode.
                 currList = currList->next;
@@ -427,21 +329,17 @@ void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
 
     if (enableZBuffer) {
         // Disable z buffer.
-        gDPPipeSync(gDisplayListHead++);
-        gSPClearGeometryMode(gDisplayListHead++, G_ZBUFFER);
-    }
-#ifdef OBJECTS_REJ
- #if defined(F3DEX_GBI_2) && defined(VISUAL_DEBUG)
-    if (hitboxView) render_debug_boxes(DEBUG_UCODE_REJ);
- #endif
-    switch_ucode(GRAPH_NODE_UCODE_DEFAULT);
-#endif
+        gDPPipeSync(tempGfxHead++);
+        gSPClearGeometryMode(tempGfxHead++, G_ZBUFFER);
 #ifdef VISUAL_DEBUG
-    if ( hitboxView) render_debug_boxes(DEBUG_UCODE_DEFAULT | DEBUG_BOX_CLEAR);
-    // Load the world scale identity matrix
-    gSPMatrix(gDisplayListHead++, &identityMatrixWorldScale, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
-    if (surfaceView) visual_surface_loop();
+        // Load the world scale identity matrix
+        gSPMatrix(tempGfxHead++, &identityMatrixWorldScale, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
+        if (surfaceView) visual_surface_loop(&tempGfxHead);
+        render_debug_boxes(&tempGfxHead);
 #endif
+    }
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -450,16 +348,11 @@ void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
  * render modes of layers.
  */
 void geo_append_display_list(void *displayList, s32 layer) {
-    s32 ucode = GRAPH_NODE_UCODE_DEFAULT;
 #ifdef F3DEX_GBI_2
     gSPLookAt(gDisplayListHead++, gCurLookAt);
 #endif
-#if defined(OBJECTS_REJ) || SILHOUETTE
+#if SILHOUETTE
     if (gCurGraphNodeObject != NULL) {
- #ifdef OBJECTS_REJ
-        ucode = gCurGraphNodeObject->ucode;
- #endif
- #if SILHOUETTE
         if (gCurGraphNodeObject->node.flags & GRAPH_RENDER_SILHOUETTE) {
             switch (layer) {
                 case LAYER_OPAQUE: layer = LAYER_SILHOUETTE_OPAQUE; break;
@@ -472,7 +365,6 @@ void geo_append_display_list(void *displayList, s32 layer) {
                 case LAYER_ALPHA:  layer = LAYER_OCCLUDE_SILHOUETTE_ALPHA;  break;
             }
         }
- #endif // SILHOUETTE
     }
 #endif // F3DEX_GBI_2 || SILHOUETTE
     if (gCurGraphNodeMasterList != NULL) {
@@ -482,12 +374,12 @@ void geo_append_display_list(void *displayList, s32 layer) {
         listNode->transform = gMatStackFixed[gMatStackIndex];
         listNode->displayList = displayList;
         listNode->next = NULL;
-        if (gCurGraphNodeMasterList->listHeads[ucode][layer] == NULL) {
-            gCurGraphNodeMasterList->listHeads[ucode][layer] = listNode;
+        if (gCurGraphNodeMasterList->listHeads[layer] == NULL) {
+            gCurGraphNodeMasterList->listHeads[layer] = listNode;
         } else {
-            gCurGraphNodeMasterList->listTails[ucode][layer]->next = listNode;
+            gCurGraphNodeMasterList->listTails[layer]->next = listNode;
         }
-        gCurGraphNodeMasterList->listTails[ucode][layer] = listNode;
+        gCurGraphNodeMasterList->listTails[layer] = listNode;
     }
 }
 
@@ -512,14 +404,12 @@ static void append_dl_and_return(struct GraphNodeDisplayList *node) {
  * Process the master list node.
  */
 void geo_process_master_list(struct GraphNodeMasterList *node) {
-    s32 ucode, layer;
+    s32 layer;
 
     if (gCurGraphNodeMasterList == NULL && node->node.children != NULL) {
         gCurGraphNodeMasterList = node;
-        for (ucode = 0; ucode < GRAPH_NODE_NUM_UCODES; ucode++) {
-            for (layer = LAYER_FIRST; layer < LAYER_COUNT; layer++) {
-                node->listHeads[ucode][layer] = NULL;
-            }
+        for (layer = LAYER_FIRST; layer < LAYER_COUNT; layer++) {
+            node->listHeads[layer] = NULL;
         }
         geo_process_node_and_siblings(node->node.children);
         geo_process_master_list_sub(gCurGraphNodeMasterList);
@@ -1128,16 +1018,16 @@ void visualise_object_hitbox(struct Object *node) {
             debug_box_color(COLOR_RGBA32_DEBUG_HITBOX);
         }
 
-        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER | DEBUG_UCODE_REJ));
+        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER));
         vec3f_set(bnds1, node->oPosX, (node->oPosY - node->hitboxDownOffset), node->oPosZ);
         vec3f_set(bnds2, node->hurtboxRadius, node->hurtboxHeight, node->hurtboxRadius);
         debug_box_color(COLOR_RGBA32_DEBUG_HURTBOX);
-        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER | DEBUG_UCODE_REJ));
+        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER));
     } else {
         vec3f_set(bnds1, node->oPosX, (node->oPosY - 15), node->oPosZ);
         vec3f_set(bnds2, 30, 30, 30);
         debug_box_color(COLOR_RGBA32_DEBUG_POSITION);
-        debug_box(bnds1, bnds2, (DEBUG_SHAPE_BOX | DEBUG_UCODE_REJ));
+        debug_box(bnds1, bnds2, (DEBUG_SHAPE_BOX));
     }
 }
 #endif
diff --git a/src/game/rendering_graph_node.h b/src/game/rendering_graph_node.h
index f681163a1..ee72feea4 100644
--- a/src/game/rendering_graph_node.h
+++ b/src/game/rendering_graph_node.h
@@ -15,6 +15,9 @@ extern struct GraphNodeHeldObject  *gCurGraphNodeHeldObject;
 extern u16 gAreaUpdateCounter;
 extern Vec3f globalLightDirection;
 
+#define GRAPH_ROOT_PERSP 0
+#define GRAPH_ROOT_ORTHO 1
+
 enum AnimType {
     // after processing an object, the type is reset to this
     ANIM_TYPE_NONE,
@@ -41,34 +44,7 @@ struct RenderModeContainer {
     u32 modes[LAYER_COUNT];
 };
 
-#ifdef OBJECTS_REJ
- #if SILHOUETTE
-    // Silhouette, .rej
-    enum RenderPhases {
-        RENDER_PHASE_ZEX_BEFORE_SILHOUETTE,
-        RENDER_PHASE_REJ_ZB,
-        RENDER_PHASE_REJ_SILHOUETTE,
-        RENDER_PHASE_REJ_NON_SILHOUETTE,
-        RENDER_PHASE_REJ_OCCLUDE_SILHOUETTE,
-        RENDER_PHASE_ZEX_AFTER_SILHOUETTE,
-        RENDER_PHASE_REJ_NON_ZB,
-        RENDER_PHASE_END,
-    };
-    #define RENDER_PHASE_SILHOUETTE RENDER_PHASE_REJ_SILHOUETTE
-    #define RENDER_PHASE_NON_SILHOUETTE RENDER_PHASE_REJ_NON_SILHOUETTE
- #else
-    // No silhouette, .rej
-    enum RenderPhases {
-        RENDER_PHASE_ZEX_BG,
-        RENDER_PHASE_REJ_ZB,
-        RENDER_PHASE_ZEX_ALL,
-        RENDER_PHASE_REJ_NON_ZB,
-        RENDER_PHASE_END,
-    };
- #endif
-#else
- #if SILHOUETTE
-    // Silhouette, no .rej
+#if SILHOUETTE
     enum RenderPhases {
         RENDER_PHASE_ZEX_BEFORE_SILHOUETTE,
         RENDER_PHASE_ZEX_SILHOUETTE,
@@ -79,13 +55,11 @@ struct RenderModeContainer {
     };
     #define RENDER_PHASE_SILHOUETTE RENDER_PHASE_ZEX_SILHOUETTE
     #define RENDER_PHASE_NON_SILHOUETTE RENDER_PHASE_ZEX_NON_SILHOUETTE
- #else
-    // No silhouette, no .rej
+#else
     enum RenderPhases {
         RENDER_PHASE_ZEX_ALL,
         RENDER_PHASE_END,
     };
- #endif
 #endif
 
 #if SILHOUETTE
diff --git a/src/game/spawn_object.c b/src/game/spawn_object.c
index adb191bbc..92f7beaea 100644
--- a/src/game/spawn_object.c
+++ b/src/game/spawn_object.c
@@ -188,9 +188,6 @@ struct Object *allocate_object(struct ObjectNode *objList) {
     obj->oRoom = -1;
 
     obj->header.gfx.node.flags &= ~GRAPH_RENDER_INVISIBLE;
-#ifdef OBJECTS_REJ
-    obj->header.gfx.ucode = GRAPH_NODE_UCODE_REJ;
-#endif
     vec3_same(obj->header.gfx.pos, -10000.0f);
     obj->header.gfx.throwMatrix = NULL;
 #ifdef PUPPYLIGHTS

From b73d8dd178ecbd306f9b0e08cb981c88c198ed94 Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Sat, 16 Dec 2023 17:44:06 +1300
Subject: [PATCH 05/23] Math util cleanup (#566)

* math util cleanup

* clean up min/max 3 and also distance and angle functions

* improve pipelining and make macros safe

* macros safer and cleaned approaches

* fix build

* made sure puppycam builds (and fixed unused var warnings)

* added 32 bit mins and maxs
---
 src/engine/colors.c                  |   6 +-
 src/engine/colors.h                  |   8 +-
 src/engine/math_util.c               | 614 +++-----------------
 src/engine/math_util.h               | 814 +++++++++++++++------------
 src/engine/surface_load.c            |   9 +-
 src/game/behaviors/chain_chomp.inc.c |   6 +-
 src/game/behaviors/hoot.inc.c        |   3 +-
 src/game/behaviors/intro_peach.inc.c |   3 +-
 src/game/behaviors/spawn_star.inc.c  |   3 +-
 src/game/camera.c                    |   2 +-
 src/game/gamecube_controller.c       |   6 +-
 src/game/mario.c                     |   3 +-
 src/game/mario_actions_moving.c      |   3 -
 src/game/mario_actions_submerged.c   |   2 +-
 src/game/object_helpers.c            |   6 +-
 src/game/puppycam2.c                 |   7 +-
 src/game/rendering_graph_node.c      |   2 +-
 src/game/skybox.c                    |   5 +-
 18 files changed, 563 insertions(+), 939 deletions(-)

diff --git a/src/engine/colors.c b/src/engine/colors.c
index 32d7fe4b5..41f67d498 100644
--- a/src/engine/colors.c
+++ b/src/engine/colors.c
@@ -81,8 +81,8 @@ void rgba32_to_colorRGBAf(ColorRGBAf dst, RGBA32 src) {
     dst[3] = COMPOSITE_TO_COLORF(src, MSK_RGBA32_A, IDX_RGBA32_A);
 }
 
-void colorRGB_to_colorRGBf(ColorRGBf dst, ColorRGB src) { vec3_quot_val(dst, src, 255.0f); }
-void colorRGBf_to_colorRGB(ColorRGB dst, ColorRGBf src) { vec3_prod_val(dst, src, 255.0f); }
+void colorRGB_to_colorRGBf(ColorRGBf dst, ColorRGB src) { vec3_scale_dest(dst, src, 1/255.0f); }
+void colorRGBf_to_colorRGB(ColorRGB dst, ColorRGBf src) { vec3_scale_dest(dst, src, 255.0f); }
 
 RGBA16Return32 colorRGBf_to_rgba16(ColorRGBf src) {
     return (COLORF_TO_COMPOSITE(src[0], MSK_RGBA16_C, IDX_RGBA16_R)
@@ -137,7 +137,7 @@ Bool32 colorRGBA_average_3(ColorRGBA dst, ColorRGBA c1, ColorRGBA c2, ColorRGBA
 RGBA16Return32 rgba16_make_grayscale(RGBA16 rgba) {
     ColorRGBf color;
     rgba16_to_colorRGBf(color, rgba);
-    ColorF avg = vec3_average(color);
+    ColorF avg = (color[0] + color[1] + color[2]) / 3.f;
     vec3_same(color, avg);
     return colorRGBf_to_rgba16(color);
 }
diff --git a/src/engine/colors.h b/src/engine/colors.h
index 7db18298c..866e68424 100644
--- a/src/engine/colors.h
+++ b/src/engine/colors.h
@@ -103,10 +103,10 @@
 #define COMPOSITE_TO_COLORF(src, bitmask, index)    ((ColorF)(((src) >> (index)) & (bitmask)) / (bitmask))
 #define COLORF_TO_COMPOSITE(src, bitmask, index)    (((CompositeColor)((src) * (bitmask)) & (bitmask)) << (index))
 
-#define COLORRGB_TO_COLORRGBF(  dst, src) vec3_quot_val((dst), (src), 255.0f)
-#define COLORRGBF_TO_COLORRGB(  dst, src) vec3_prod_val((dst), (src), 255.0f)
-#define COLORRGBA_TO_COLORRGBAF(dst, src) vec4_quot_val((dst), (src), 255.0f)
-#define COLORRGBAF_TO_COLORRGBA(dst, src) vec4_prod_val((dst), (src), 255.0f)
+#define COLORRGB_TO_COLORRGBF(  dst, src) vec3_scale_dest((dst), (src), 1/255.0f)
+#define COLORRGBF_TO_COLORRGB(  dst, src) vec3_scale_dest((dst), (src), 255.0f)
+#define COLORRGBA_TO_COLORRGBAF(dst, src) vec4_scale_dest((dst), (src), 1/255.0f)
+#define COLORRGBAF_TO_COLORRGBA(dst, src) vec4_scale_dest((dst), (src), 255.0f)
 
 #define colorRGB_set(    dst, r, g, b) vec3_set( (dst), (r), (g), (b))
 #define colorRGB_copy(   dst, src    ) vec3_copy((dst), (src)        )
diff --git a/src/engine/math_util.c b/src/engine/math_util.c
index 16efd0277..bcaca282d 100644
--- a/src/engine/math_util.c
+++ b/src/engine/math_util.c
@@ -63,28 +63,7 @@ s32 random_sign(void) {
     return ((random_u16() >= 0x7FFF) ? 1 : -1);
 }
 
-/// Returns the lowest of three values.
-#define min_3_func(a0, a1, a2) {\
-    if (a1 < a0) a0 = a1;       \
-    if (a2 < a0) a0 = a2;       \
-    return a0;                  \
-}
-
-f32 min_3f(f32 a, f32 b, f32 c) { min_3_func(a, b, c); }
-s32 min_3i(s32 a, s32 b, s32 c) { min_3_func(a, b, c); }
-s32 min_3s(s16 a, s16 b, s16 c) { min_3_func(a, b, c); }
-
-/// Returns the highest of three values.
-#define max_3_func(a0, a1, a2) {\
-    if (a1 > a0) a0 = a1;       \
-    if (a2 > a0) a0 = a2;       \
-    return a0;                  \
-}
-f32 max_3f(f32 a, f32 b, f32 c) { max_3_func(a, b, c); }
-s32 max_3i(s32 a, s32 b, s32 c) { max_3_func(a, b, c); }
-s32 max_3s(s16 a, s16 b, s16 c) { max_3_func(a, b, c); }
-
-/// A combination of the above.
+// Get the maximum and minimum of three numbers at the same time.
 #define min_max_3_func(a, b, c, min, max) { \
     if (b < a) {                            \
         *max = a;                           \
@@ -96,232 +75,24 @@ s32 max_3s(s16 a, s16 b, s16 c) { max_3_func(a, b, c); }
     if (c < *min) *min = c;                 \
     if (c > *max) *max = c;                 \
 }
+
 void min_max_3f(f32 a, f32 b, f32 c, f32 *min, f32 *max) { min_max_3_func(a, b, c, min, max); }
 void min_max_3i(s32 a, s32 b, s32 c, s32 *min, s32 *max) { min_max_3_func(a, b, c, min, max); }
 void min_max_3s(s16 a, s16 b, s16 c, s16 *min, s16 *max) { min_max_3_func(a, b, c, min, max); }
 
-/// Perform a bitwise copy from vector 'src' to 'dest'
-#define vec3_copy_bits(destFmt, dest, srcFmt, src) { \
-    register destFmt x = ((srcFmt *) src)[0];        \
-    register destFmt y = ((srcFmt *) src)[1];        \
-    register destFmt z = ((srcFmt *) src)[2];        \
-    ((destFmt *) dest)[0] = x;                       \
-    ((destFmt *) dest)[1] = y;                       \
-    ((destFmt *) dest)[2] = z;                       \
-}
-void vec3f_copy    (Vec3f dest, const Vec3f src) { vec3_copy_bits(f32, dest, f32, src); } // 32 -> 32
-void vec3i_copy    (Vec3i dest, const Vec3i src) { vec3_copy_bits(s32, dest, s32, src); } // 32 -> 32
-void vec3s_copy    (Vec3s dest, const Vec3s src) { vec3_copy_bits(s16, dest, s16, src); } // 16 -> 16
-void vec3s_to_vec3i(Vec3i dest, const Vec3s src) { vec3_copy_bits(s32, dest, s16, src); } // 16 -> 32
-void vec3s_to_vec3f(Vec3f dest, const Vec3s src) { vec3_copy_bits(f32, dest, s16, src); } // 16 -> 32
-void vec3i_to_vec3s(Vec3s dest, const Vec3i src) { vec3_copy_bits(s16, dest, s32, src); } // 32 -> 16
-void vec3i_to_vec3f(Vec3f dest, const Vec3i src) { vec3_copy_bits(f32, dest, s32, src); } // 32 -> 32
-
-void surface_normal_to_vec3f(Vec3f dest, struct Surface *surf) {
-    register f32 x = surf->normal.x;
-    register f32 y = surf->normal.y;
-    register f32 z = surf->normal.z;
-    ((f32 *) dest)[0] = x;
-    ((f32 *) dest)[1] = y;
-    ((f32 *) dest)[2] = z;
-}
-
-/// Convert float vector a to a short vector 'dest' by rounding the components to the nearest integer.
-#define vec3_copy_bits_roundf(fmt, dest, src) { \
-    register fmt x = roundf(src[0]);            \
-    register fmt y = roundf(src[1]);            \
-    register fmt z = roundf(src[2]);            \
-    ((fmt *) dest)[0] = x;                      \
-    ((fmt *) dest)[1] = y;                      \
-    ((fmt *) dest)[2] = z;                      \
-}
-void vec3f_to_vec3s(Vec3s dest, const Vec3f src) { vec3_copy_bits_roundf(s16, dest, src); } // 32 -> 16
-void vec3f_to_vec3i(Vec3i dest, const Vec3f src) { vec3_copy_bits_roundf(s32, dest, src); } // 32 -> 32
-#undef vec3_copy_bits_roundf
-
-#define vec3_copy_y_off_func(destFmt, dest, srcFmt, src, yOff) {\
-    register destFmt x = ((srcFmt *) src)[0];                   \
-    register destFmt y = ((srcFmt *) src)[1] + yOff;            \
-    register destFmt z = ((srcFmt *) src)[2];                   \
-    ((destFmt *) dest)[0] = x;                                  \
-    ((destFmt *) dest)[1] = y;                                  \
-    ((destFmt *) dest)[2] = z;                                  \
-}
-void vec3f_copy_y_off(Vec3f dest, Vec3f src, f32 yOff) { vec3_copy_y_off_func(f32, dest, f32, src, yOff); }
-#undef vec3_copy_y_off_func
-
-/// Set vector 'dest' to (x, y, z)
-inline void vec3f_set(Vec3f dest, const f32 x, const f32 y, const f32 z) { vec3_set(dest, x, y, z); }
-inline void vec3i_set(Vec3i dest, const s32 x, const s32 y, const s32 z) { vec3_set(dest, x, y, z); }
-inline void vec3s_set(Vec3s dest, const s16 x, const s16 y, const s16 z) { vec3_set(dest, x, y, z); }
-
-/// Add vector 'a' to 'dest'
-#define vec3_add_func(fmt, dest, a) {   \
-    register fmt *temp = (fmt *)(dest); \
-    register fmt sum, sum2;             \
-    register s32 i;                     \
-    for (i = 0; i < 3; i++) {           \
-        sum = *(a);                     \
-        (a)++;                          \
-        sum2 = *temp;                   \
-        *temp = (sum + sum2);           \
-        temp++;                         \
-    }                                   \
-}
-void vec3f_add(Vec3f dest, const Vec3f a) { vec3_add_func(f32, dest, a); }
-void vec3i_add(Vec3i dest, const Vec3i a) { vec3_add_func(s32, dest, a); }
-void vec3s_add(Vec3s dest, const Vec3s a) { vec3_add_func(s16, dest, a); }
-#undef vec3_add_func
-
-/// Make 'dest' the sum of vectors a and b.
-#define vec3_sum_func(fmt, dest, a, b) {\
-    register fmt *temp = (fmt *)(dest); \
-    register fmt sum, sum2;             \
-    register s32 i;                     \
-    for (i = 0; i < 3; i++) {           \
-        sum = *(a);                     \
-        (a)++;                          \
-        sum2 = *(b);                    \
-        (b)++;                          \
-        *temp = (sum + sum2);           \
-        temp++;                         \
-    }                                   \
-}
-void vec3f_sum(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_sum_func(f32, dest, a, b); }
-void vec3i_sum(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_sum_func(s32, dest, a, b); }
-void vec3s_sum(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_sum_func(s16, dest, a, b); }
-#undef vec3_sum_func
-
-/// Subtract vector a from 'dest'
-#define vec3_sub_func(fmt, dest, a) {   \
-    register fmt x = ((fmt *) a)[0];    \
-    register fmt y = ((fmt *) a)[1];    \
-    register fmt z = ((fmt *) a)[2];    \
-    ((fmt *) dest)[0] -= x;             \
-    ((fmt *) dest)[1] -= y;             \
-    ((fmt *) dest)[2] -= z;             \
-}
-void vec3f_sub(Vec3f dest, const Vec3f a) { vec3_sub_func(f32, dest, a); }
-void vec3i_sub(Vec3i dest, const Vec3i a) { vec3_sub_func(s32, dest, a); }
-void vec3s_sub(Vec3s dest, const Vec3s a) { vec3_sub_func(s16, dest, a); }
-#undef vec3_sub_func
-
-/// Make 'dest' the difference of vectors a and b.
-#define vec3_diff_func(fmt, dest, a, b) {   \
-    register fmt x1 = ((fmt *) a)[0];       \
-    register fmt y1 = ((fmt *) a)[1];       \
-    register fmt z1 = ((fmt *) a)[2];       \
-    register fmt x2 = ((fmt *) b)[0];       \
-    register fmt y2 = ((fmt *) b)[1];       \
-    register fmt z2 = ((fmt *) b)[2];       \
-    ((fmt *) dest)[0] = (x1 - x2);          \
-    ((fmt *) dest)[1] = (y1 - y2);          \
-    ((fmt *) dest)[2] = (z1 - z2);          \
-}
-void vec3f_diff(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_diff_func(f32, dest, a, b); }
-void vec3i_diff(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_diff_func(s32, dest, a, b); }
-void vec3s_diff(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_diff_func(s16, dest, a, b); }
-#undef vec3_diff_func
-
-/// Multiply vector 'a' into 'dest'
-#define vec3_mul_func(fmt, dest, a) {   \
-    register fmt x = ((fmt *) a)[0];    \
-    register fmt y = ((fmt *) a)[1];    \
-    register fmt z = ((fmt *) a)[2];    \
-    ((fmt *) dest)[0] *= x;             \
-    ((fmt *) dest)[1] *= y;             \
-    ((fmt *) dest)[2] *= z;             \
-}
-void vec3f_mul(Vec3f dest, const Vec3f a) { vec3_mul_func(f32, dest, a); }
-void vec3i_mul(Vec3i dest, const Vec3i a) { vec3_mul_func(s32, dest, a); }
-void vec3s_mul(Vec3s dest, const Vec3s a) { vec3_mul_func(s16, dest, a); }
-#undef vec3_mul_func
-
-/// Make 'dest' the product of vectors a and b.
-#define vec3_prod_func(fmt, dest, a, b) {   \
-    register fmt x1 = ((fmt *) a)[0];       \
-    register fmt y1 = ((fmt *) a)[1];       \
-    register fmt z1 = ((fmt *) a)[2];       \
-    register fmt x2 = ((fmt *) b)[0];       \
-    register fmt y2 = ((fmt *) b)[1];       \
-    register fmt z2 = ((fmt *) b)[2];       \
-    ((fmt *) dest)[0] = (x1 * x2);          \
-    ((fmt *) dest)[1] = (y1 * y2);          \
-    ((fmt *) dest)[2] = (z1 * z2);          \
-}
-void vec3f_prod(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_prod_func(f32, dest, a, b); }
-void vec3i_prod(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_prod_func(s32, dest, a, b); }
-void vec3s_prod(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_prod_func(s16, dest, a, b); }
-#undef vec3_prod_func
-
-
-/// Performs element-wise division of two 3-vectors
-#define vec3_div_func(fmt, dest, a) {   \
-    register fmt x = ((fmt *) a)[0];    \
-    register fmt y = ((fmt *) a)[1];    \
-    register fmt z = ((fmt *) a)[2];    \
-    ((fmt *) dest)[0] /= x;             \
-    ((fmt *) dest)[1] /= y;             \
-    ((fmt *) dest)[2] /= z;             \
-}
-void vec3f_div(Vec3f dest, const Vec3f a) { vec3_div_func(f32, dest, a); }
-void vec3i_div(Vec3i dest, const Vec3i a) { vec3_div_func(s32, dest, a); }
-void vec3s_div(Vec3s dest, const Vec3s a) { vec3_div_func(s16, dest, a); }
-#undef vec3_div_func
-
-/// Make 'dest' the quotient of vectors a and b.
-#define vec3_quot_func(fmt, dest, a, b) {   \
-    register fmt x1 = ((fmt *) a)[0];       \
-    register fmt y1 = ((fmt *) a)[1];       \
-    register fmt z1 = ((fmt *) a)[2];       \
-    register fmt x2 = ((fmt *) b)[0];       \
-    register fmt y2 = ((fmt *) b)[1];       \
-    register fmt z2 = ((fmt *) b)[2];       \
-    ((fmt *) dest)[0] = (x1 / x2);          \
-    ((fmt *) dest)[1] = (y1 / y2);          \
-    ((fmt *) dest)[2] = (z1 / z2);          \
-}
-void vec3f_quot(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_quot_func(f32, dest, a, b); }
-void vec3i_quot(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_quot_func(s32, dest, a, b); }
-void vec3s_quot(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_quot_func(s16, dest, a, b); }
-#undef vec3_quot_func
-
-/// Return the dot product of vectors a and b.
-f32 vec3f_dot(const Vec3f a, const Vec3f b) {
-    return vec3_dot(a, b);
-}
-
-/// Make vector 'dest' the cross product of vectors a and b.
-void vec3f_cross(Vec3f dest, const Vec3f a, const Vec3f b) {
-    vec3_cross(dest, a, b);
-}
-
-/// Scale vector 'dest' so it has length 1
-void vec3f_normalize(Vec3f dest) {
-    register f32 mag = (sqr(dest[0]) + sqr(dest[1]) + sqr(dest[2]));
-    if (mag > NEAR_ZERO) {
-        register f32 invsqrt = (1.0f / sqrtf(mag));
-        vec3_mul_val(dest, invsqrt);
-    } else {
-        // Default to up vector.
-        dest[0] = 0;
-        ((u32 *) dest)[1] = FLOAT_ONE;
-        dest[2] = 0;
-    }
-}
-
 /// Struct the same data size as a Mat4
 struct CopyMat4 {
     f32 a[0x10];
 };
 
 /// Copy matrix 'src' to 'dest' by casting to a struct CopyMat4 pointer.
-void mtxf_copy(register Mat4 dest, register Mat4 src) {
+void mtxf_copy(Mat4 dest, Mat4 src) {
     *((struct CopyMat4 *) dest) = *((struct CopyMat4 *) src);
 }
 
 /// Set mtx to the identity matrix.
-void mtxf_identity(register Mat4 mtx) {
+
+void mtxf_identity(Mat4 mtx) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
     s32 i;
     f32 *dest;
@@ -336,8 +107,8 @@ void mtxf_identity(register Mat4 mtx) {
 /// Set dest to a translation matrix of vector b.
 void mtxf_translate(Mat4 dest, Vec3f b) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register s32 i;
-    register f32 *pen;
+    s32 i;
+    f32 *pen;
     for (pen = ((f32 *) dest + 1), i = 0; i < 12; pen++, i++) {
         *pen = 0;
     }
@@ -347,65 +118,20 @@ void mtxf_translate(Mat4 dest, Vec3f b) {
     vec3f_copy(&dest[3][0], &b[0]);
 }
 
-/**
- * Multiply a vector by a matrix of the form
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | 0 0 0 1 |
- * i.e. a matrix representing a linear transformation over 3 space.
- */
-void linear_mtxf_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    s32 i;
-    for (i = 0; i < 3; i++) {
-        dst[i] = ((m[0][i] * v[0])
-                + (m[1][i] * v[1])
-                + (m[2][i] * v[2]));
-    }
-}
-
-void linear_mtxf_mul_vec3f_and_translate(Mat4 m, Vec3f dst, Vec3f v) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    s32 i;
-    for (i = 0; i < 3; i++) {
-        dst[i] = ((m[0][i] * v[0])
-                + (m[1][i] * v[1])
-                + (m[2][i] * v[2])
-                +  m[3][i]);
-    }
-}
-
-/**
- * Multiply a vector by the transpose of a matrix of the form
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | 0 0 0 1 |
- * i.e. a matrix representing a linear transformation over 3 space.
- */
-void linear_mtxf_transpose_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    s32 i;
-    for (i = 0; i < 3; i++) {
-        dst[i] = vec3_dot(m[i], v);
-    }
-}
-
 /// Build a matrix that rotates around the z axis, then the x axis, then the y axis, and then translates.
 void mtxf_rotate_zxy_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx   = sins(rot[0]);
-    register f32 cx   = coss(rot[0]);
-    register f32 sy   = sins(rot[1]);
-    register f32 cy   = coss(rot[1]);
-    register f32 sz   = sins(rot[2]);
-    register f32 cz   = coss(rot[2]);
-    register f32 sysz = (sy * sz);
-    register f32 cycz = (cy * cz);
+    f32 sx   = sins(rot[0]);
+    f32 cx   = coss(rot[0]);
+    f32 sy   = sins(rot[1]);
+    f32 cy   = coss(rot[1]);
+    f32 sz   = sins(rot[2]);
+    f32 cz   = coss(rot[2]);
+    f32 sysz = (sy * sz);
+    f32 cycz = (cy * cz);
     dest[0][0] = ((sysz * sx) + cycz);
-    register f32 cysz = (cy * sz);
-    register f32 sycz = (sy * cz);
+    f32 cysz = (cy * sz);
+    f32 sycz = (sy * cz);
     dest[1][0] = ((sycz * sx) - cysz);
     dest[2][0] = (cx * sy);
     dest[0][1] = (cx * sz);
@@ -421,20 +147,20 @@ void mtxf_rotate_zxy_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
 /// Build a matrix that rotates around the x axis, then the y axis, then the z axis, and then translates.
 UNUSED void mtxf_rotate_xyz_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx   = sins(rot[0]);
-    register f32 cx   = coss(rot[0]);
-    register f32 sy   = sins(rot[1]);
-    register f32 cy   = coss(rot[1]);
-    register f32 sz   = sins(rot[2]);
-    register f32 cz   = coss(rot[2]);
+    f32 sx   = sins(rot[0]);
+    f32 cx   = coss(rot[0]);
+    f32 sy   = sins(rot[1]);
+    f32 cy   = coss(rot[1]);
+    f32 sz   = sins(rot[2]);
+    f32 cz   = coss(rot[2]);
     dest[0][0] = (cy * cz);
     dest[0][1] = (cy * sz);
     dest[0][2] = -sy;
-    register f32 sxcz = (sx * cz);
-    register f32 cxsz = (cx * sz);
+    f32 sxcz = (sx * cz);
+    f32 cxsz = (cx * sz);
     dest[1][0] = ((sxcz * sy) - cxsz);
-    register f32 sxsz = (sx * sz);
-    register f32 cxcz = (cx * cz);
+    f32 sxsz = (sx * sz);
+    f32 cxcz = (cx * cz);
     dest[1][1] = ((sxsz * sy) + cxcz);
     dest[1][2] = (sx * cy);
     dest[2][0] = ((cxcz * sy) + sxsz);
@@ -447,19 +173,19 @@ UNUSED void mtxf_rotate_xyz_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
 /// Build a matrix that rotates around the z axis, then the x axis, then the y axis, and then translates and multiplies.
 void mtxf_rotate_zxy_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Mat4 src) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx = sins(rot[0]);
-    register f32 cx = coss(rot[0]);
-    register f32 sy = sins(rot[1]);
-    register f32 cy = coss(rot[1]);
-    register f32 sz = sins(rot[2]);
-    register f32 cz = coss(rot[2]);
+    f32 sx = sins(rot[0]);
+    f32 cx = coss(rot[0]);
+    f32 sy = sins(rot[1]);
+    f32 cy = coss(rot[1]);
+    f32 sz = sins(rot[2]);
+    f32 cz = coss(rot[2]);
     Vec3f entry;
-    register f32 sysz = (sy * sz);
-    register f32 cycz = (cy * cz);
+    f32 sysz = (sy * sz);
+    f32 cycz = (cy * cz);
     entry[0] = ((sysz * sx) + cycz);
     entry[1] = (sz * cx);
-    register f32 cysz = (cy * sz);
-    register f32 sycz = (sy * cz);
+    f32 cysz = (cy * sz);
+    f32 sycz = (sy * cz);
     entry[2] = ((cysz * sx) - sycz);
     linear_mtxf_mul_vec3f(src, dest[0], entry);
     entry[0] = ((sycz * sx) - cysz);
@@ -478,22 +204,22 @@ void mtxf_rotate_zxy_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Ma
 /// Build a matrix that rotates around the x axis, then the y axis, then the z axis, and then translates and multiplies.
 void mtxf_rotate_xyz_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Mat4 src) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx = sins(rot[0]);
-    register f32 cx = coss(rot[0]);
-    register f32 sy = sins(rot[1]);
-    register f32 cy = coss(rot[1]);
-    register f32 sz = sins(rot[2]);
-    register f32 cz = coss(rot[2]);
+    f32 sx = sins(rot[0]);
+    f32 cx = coss(rot[0]);
+    f32 sy = sins(rot[1]);
+    f32 cy = coss(rot[1]);
+    f32 sz = sins(rot[2]);
+    f32 cz = coss(rot[2]);
     Vec3f entry;
     entry[0] = (cy * cz);
     entry[1] = (cy * sz);
     entry[2] = -sy;
     linear_mtxf_mul_vec3f(src, dest[0], entry);
-    register f32 sxcz = (sx * cz);
-    register f32 cxsz = (cx * sz);
+    f32 sxcz = (sx * cz);
+    f32 cxsz = (cx * sz);
     entry[0] = ((sxcz * sy) - cxsz);
-    register f32 sxsz = (sx * sz);
-    register f32 cxcz = (cx * cz);
+    f32 sxsz = (sx * sz);
+    f32 cxcz = (cx * cz);
     entry[1] = ((sxsz * sy) + cxcz);
     entry[2] = (sx * cy);
     linear_mtxf_mul_vec3f(src, dest[1], entry);
@@ -515,9 +241,9 @@ void mtxf_rotate_xyz_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Ma
 void mtxf_lookat(Mat4 mtx, Vec3f from, Vec3f to, s16 roll) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
     Vec3f colX, colY, colZ;
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    register f32 invLength = sqrtf(sqr(dx) + sqr(dz));
+    f32 dx = (to[0] - from[0]);
+    f32 dz = (to[2] - from[2]);
+    f32 invLength = sqrtf(sqr(dx) + sqr(dz));
     invLength = -(1.0f / MAX(invLength, NEAR_ZERO));
     dx *= invLength;
     dz *= invLength;
@@ -555,10 +281,10 @@ void mtxf_lookat(Mat4 mtx, Vec3f from, Vec3f to, s16 roll) {
  */
 void mtxf_billboard(Mat4 dest, Mat4 mtx, Vec3f position, Vec3f scale, s16 angle) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register s32 i;
-    register f32 sx = scale[0];
-    register f32 sy = scale[1];
-    register f32 sz = scale[2];
+    s32 i;
+    f32 sx = scale[0];
+    f32 sy = scale[1];
+    f32 sz = scale[2];
     Mat4* cameraMat = &gCameraTransform;
     for (i = 0; i < 3; i++) {
         for (int j = 0; j < 3; j++) {
@@ -676,7 +402,7 @@ void mtxf_align_terrain_triangle(Mat4 mtx, Vec3f pos, s16 yaw, f32 radius) {
     if ((point1[1] - pos[1]) < minY) point1[1] = pos[1];
     if ((point2[1] - pos[1]) < minY) point2[1] = pos[1];
 
-    f32 avgY = average_3(point0[1], point1[1], point2[1]);
+    f32 avgY = (point0[1] + point1[1] + point2[1]) / 3.f;
 
     vec3f_set(forward, sins(yaw), 0.0f, coss(yaw));
     find_vector_perpendicular_to_plane(yColumn, point0, point1, point2);
@@ -707,10 +433,10 @@ void mtxf_align_terrain_triangle(Mat4 mtx, Vec3f pos, s16 yaw, f32 radius) {
 void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
     Vec3f entry;
-    register f32 *temp  = (f32 *)a;
-    register f32 *temp2 = (f32 *)dest;
-    register f32 *temp3;
-    register s32 i;
+    f32 *temp  = (f32 *)a;
+    f32 *temp2 = (f32 *)dest;
+    f32 *temp3;
+    s32 i;
     for (i = 0; i < 16; i++) {
         vec3_copy(entry, temp);
         for (temp3 = (f32 *)b; (i & 3) != 3; i++) {
@@ -731,11 +457,11 @@ void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b) {
 /**
  * Set matrix 'dest' to 'mtx' scaled by vector s
  */
-void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, register Vec3f s) {
+void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, Vec3f s) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 *temp  = (f32 *)dest;
-    register f32 *temp2 = (f32 *)mtx;
-    register s32 i;
+    f32 *temp  = (f32 *)dest;
+    f32 *temp2 = (f32 *)mtx;
+    s32 i;
 
     for (i = 0; i < 4; i++) {
         temp[ 0] = temp2[ 0] * s[0];
@@ -747,29 +473,6 @@ void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, register Vec3f s) {
     }
 }
 
-/**
- * Multiply a vector with a transformation matrix, which applies the transformation
- * to the point. Note that the bottom row is assumed to be [0, 0, 0, 1], which is
- * true for transformation matrices if the translation has a w component of 1.
- */
-UNUSED void mtxf_mul_vec3s(Mat4 mtx, Vec3s b) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 x = b[0];
-    register f32 y = b[1];
-    register f32 z = b[2];
-    register f32 *temp2 = (f32 *)mtx;
-    register s32 i;
-    register s16 *c = b;
-    for (i = 0; i < 3; i++) {
-        c[0] = ((x * temp2[ 0])
-              + (y * temp2[ 4])
-              + (z * temp2[ 8])
-              +      temp2[12]);
-        c++;
-        temp2++;
-    }
-}
-
 /**
  * Set 'mtx' to a transformation matrix that rotates around the z axis.
  */
@@ -778,10 +481,10 @@ UNUSED void mtxf_mul_vec3s(Mat4 mtx, Vec3s b) {
     ((s16 *) mtx)[a + 16] = (((s32) b) & 0xFFFF);
 void mtxf_rotate_xy(Mtx *mtx, s16 angle) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register s32 i = (coss(angle) * 0x10000);
-    register s32 j = (sins(angle) * 0x10000);
-    register f32 *temp = (f32 *)mtx;
-    register s32 k;
+    s32 i = (coss(angle) * 0x10000);
+    s32 j = (sins(angle) * 0x10000);
+    f32 *temp = (f32 *)mtx;
+    s32 k;
     for (k = 0; k < 16; k++) {
         *temp = 0;
         temp++;
@@ -794,168 +497,13 @@ void mtxf_rotate_xy(Mtx *mtx, s16 angle) {
     ((s16 *) mtx)[15] = 1;
 }
 
-/**
- * Take the vector starting at 'from' pointed at 'to' an retrieve the length
- * of that vector, as well as the yaw and pitch angles.
- * Basically it converts the direction to spherical coordinates.
- */
-
-/// Finds the horizontal distance between two vectors.
-void vec3f_get_lateral_dist(Vec3f from, Vec3f to, f32 *lateralDist) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *lateralDist = sqrtf(sqr(dx) + sqr(dz));
-}
-
-/// Finds the squared horizontal distance between two vectors. Avoids a sqrtf call.
-void vec3f_get_lateral_dist_squared(Vec3f from, Vec3f to, f32 *lateralDist) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *lateralDist = (sqr(dx) + sqr(dz));
-}
-
-/// Finds the distance between two vectors.
-void vec3f_get_dist(Vec3f from, Vec3f to, f32 *dist) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *dist = vec3_mag(d);
-}
-
-/// Finds the squared distance between two vectors. Avoids a sqrtf call.
-void vec3f_get_dist_squared(Vec3f from, Vec3f to, f32 *dist) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *dist = vec3_sumsq(d);
-}
-
-/// Finds the distance and yaw etween two vectors.
-void vec3f_get_dist_and_yaw(Vec3f from, Vec3f to, f32 *dist, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *dist = vec3_mag(d);
-    *yaw = atan2s(d[2], d[0]);
-}
-
-/// Finds the pitch between two vectors.
-void vec3f_get_pitch(Vec3f from, Vec3f to, s16 *pitch) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *pitch = atan2s(sqrtf(sqr(d[0]) + sqr(d[2])), d[1]);
-}
-
-/// Finds the yaw between two vectors.
-void vec3f_get_yaw(Vec3f from, Vec3f to, s16 *yaw) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *yaw = atan2s(dz, dx);
-}
-
-/// Finds the pitch and yaw between two vectors.
-void vec3f_get_angle(Vec3f from, Vec3f to, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *pitch = atan2s(sqrtf(sqr(d[0]) + sqr(d[2])), d[1]);
-    *yaw   = atan2s(d[2], d[0]);
-}
-
-/// Finds the horizontal distance and pitch between two vectors.
-void vec3f_get_lateral_dist_and_pitch(Vec3f from, Vec3f to, f32 *lateralDist, s16 *pitch) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *lateralDist = sqrtf(sqr(d[0]) + sqr(d[2]));
-    *pitch       = atan2s(*lateralDist, d[1]);
-}
-
-/// Finds the horizontal distance and yaw between two vectors.
-void vec3f_get_lateral_dist_and_yaw(Vec3f from, Vec3f to, f32 *lateralDist, s16 *yaw) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *lateralDist = sqrtf(sqr(dx) + sqr(dz));
-    *yaw         = atan2s(dz, dx);
-}
-
-/// Finds the horizontal distance and angles between two vectors.
-void vec3f_get_lateral_dist_and_angle(Vec3f from, Vec3f to, f32 *lateralDist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *lateralDist = sqrtf(sqr(d[0]) + sqr(d[2]));
-    *pitch       = atan2s(*lateralDist, d[1]);
-    *yaw         = atan2s(d[2], d[0]);
-}
-
-/// Finds the distance and angles between two vectors.
-void vec3f_get_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *pitch          = atan2s(sqrtf(xz), d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-void vec3s_get_dist_and_angle(Vec3s from, Vec3s to, s16 *dist, s16 *pitch, s16 *yaw) {
-    Vec3s d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *pitch          = atan2s(sqrtf(xz), d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-void vec3f_to_vec3s_get_dist_and_angle(Vec3f from, Vec3s to, f32 *dist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *pitch          = atan2s(sqrtf(xz), d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-
-/// Finds the distance, horizontal distance, and angles between two vectors.
-void vec3f_get_dist_and_lateral_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, f32 *lateralDist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *lateralDist    = sqrtf(xz);
-    *pitch          = atan2s(*lateralDist, d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-
-/**
- * Construct the 'to' point which is distance 'dist' away from the 'from' position,
- * and has the angles pitch and yaw.
- */
-#define vec3_set_dist_and_angle(from, to, dist, pitch, yaw) { \
-    register f32 dcos = (dist * coss(pitch)); \
-    to[0] = (from[0] + (dcos * sins(yaw  ))); \
-    to[1] = (from[1] + (dist * sins(pitch))); \
-    to[2] = (from[2] + (dcos * coss(yaw  ))); \
-}
-void vec3f_set_dist_and_angle(Vec3f from, Vec3f to, f32 dist, s16 pitch, s16 yaw) {
-    vec3_set_dist_and_angle(from, to, dist, pitch, yaw);
-}
-void vec3s_set_dist_and_angle(Vec3s from, Vec3s to, s16 dist, s16 pitch, s16 yaw) {
-    vec3_set_dist_and_angle(from, to, dist, pitch, yaw);
-}
-
 /**
  * Similar to approach_s32, but converts to s16 and allows for overflow between 32767 and -32768
  */
-s16 approach_angle(s16 current, s16 target, s16 inc) {
-    s32 dist = (s16)(target - current);
-    if (dist < 0) {
-        dist += inc;
-        if (dist > 0) dist = 0;
-    } else if (dist > 0) {
-        dist -= inc;
-        if (dist < 0) dist = 0;
-    }
-    return (target - dist);
-}
-Bool32 approach_angle_bool(s16 *current, s16 target, s16 inc) {
-    *current = approach_angle(*current, target, inc);
+Bool32 approach_s16_bool(s16 *current, s16 target, s16 inc, s16 dec) {
+    *current = approach_s16(*current, target, inc, dec);
     return (*current != target);
 }
-
 s16 approach_s16(s16 current, s16 target, s16 inc, s16 dec) {
     s16 dist = (target - current);
     if (dist >= 0) { // target >= current
@@ -965,15 +513,15 @@ s16 approach_s16(s16 current, s16 target, s16 inc, s16 dec) {
     }
     return current;
 }
-Bool32 approach_s16_bool(s16 *current, s16 target, s16 inc, s16 dec) {
-    *current = approach_s16(*current, target, inc, dec);
-    return (*current != target);
-}
 
 /**
  * Return the value 'current' after it tries to approach target, going up at
  * most 'inc' and going down at most 'dec'.
  */
+Bool32 approach_s32_bool(s32 *current, s32 target, s32 inc, s32 dec) {
+    *current = approach_s32(*current, target, inc, dec);
+    return (*current != target);
+}
 s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec) {
     s32 dist = (target - current);
     if (dist > 0) { // current < target
@@ -983,15 +531,15 @@ s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec) {
     }
     return current;
 }
-Bool32 approach_s32_bool(s32 *current, s32 target, s32 inc, s32 dec) {
-    *current = approach_s32(*current, target, inc, dec);
-    return (*current != target);
-}
 
 /**
  * Return the value 'current' after it tries to approach target, going up at
  * most 'inc' and going down at most 'dec'.
  */
+Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec) {
+    *current = approach_f32(*current, target, inc, dec);
+    return !(*current == target);
+}
 f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec) {
     f32 dist = (target - current);
     if (dist >= 0.0f) { // target >= current
@@ -1001,10 +549,6 @@ f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec) {
     }
     return current;
 }
-Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec) {
-    *current = approach_f32(*current, target, inc, dec);
-    return !(*current == target);
-}
 
 s32 approach_f32_signed(f32 *current, f32 target, f32 inc) {
     *current += inc;
@@ -1077,7 +621,7 @@ s16 approach_s16_asymptotic(s16 current, s16 target, s16 divisor) {
 }
 
 s16 abs_angle_diff(s16 a0, s16 a1) {
-    register s16 diff = (a1 - a0);
+    s16 diff = (a1 - a0);
     if (diff == -0x8000) return 0x7FFF;
     return abss(diff);
 }
@@ -1334,7 +878,7 @@ s32 ray_surface_intersect(Vec3f orig, Vec3f dir, f32 dir_length, struct Surface
     // Successful contact.
     // Make 'add_dir' into 'dir' scaled by 'length'.
     Vec3f add_dir;
-    vec3_prod_val(add_dir, dir, *length);
+    vec3_scale_dest(add_dir, dir, *length);
     // Make 'hit_pos' into the sum of 'orig' and 'add_dir'.
     vec3f_sum(hit_pos, orig, add_dir);
     return TRUE;
diff --git a/src/engine/math_util.h b/src/engine/math_util.h
index 7a80952b3..5554791af 100644
--- a/src/engine/math_util.h
+++ b/src/engine/math_util.h
@@ -4,6 +4,7 @@
 #include <PR/ultratypes.h>
 
 #include "types.h"
+#include "game/puppyprint.h"
 
 #define NEAR_ZERO   __FLT_EPSILON__
 #define NEAR_ONE    (1.0f - __FLT_EPSILON__)
@@ -22,26 +23,18 @@ extern Vec3i gVec3iZero;
 extern Vec3f gVec3fOne;
 extern Vec3s gVec3sOne;
 
+
+// Angles
+
 /**
  * Converts an angle in degrees to sm64's s16 angle units. For example, DEGREES(90) == 0x4000
  * This should be used mainly to make camera code clearer at first glance.
  */
 // #define DEGREES(x) ((x) * 0x10000 / 360)
 #define DEGREES(x) ((x) * 0x2000 / 45)
-// #define DEGREES(x) (((x) << 13) / 45)
 
-/*
- * The sine and cosine tables overlap, but "#define gCosineTable (gSineTable +
- * 0x400)" doesn't give expected codegen; gSineTable and gCosineTable need to
- * be different symbols for code to match. Most likely the tables were placed
- * adjacent to each other, and gSineTable cut short, such that reads overflow
- * into gCosineTable.
- *
- * These kinds of out of bounds reads are undefined behavior, and break on
- * e.g. GCC (which doesn't place the tables next to each other, and probably
- * exploits array sizes for range analysis-based optimizations as well).
- * Thus, for non-IDO compilers we use the standard-compliant version.
- */
+// Trig functions
+
 extern f32 gSineTable[];
 #define gCosineTable (gSineTable + 0x400)
 
@@ -51,6 +44,8 @@ extern f32 gSineTable[];
 #define cots(x) (coss(x) / sins(x))
 #define atans(x) gArctanTable[(s32)((((x) * 1024) + 0.5f))] // is this correct? used for atan2_lookup
 
+// Angle conversion macros
+
 #define RAD_PER_DEG (M_PI / 180.0f)
 #define DEG_PER_RAD (180.0f / M_PI)
 
@@ -61,90 +56,121 @@ extern f32 gSineTable[];
 #define degrees_to_radians(x) (f32)( (f32)(x) * RAD_PER_DEG       )
 #define radians_to_degrees(x) (f32)( (f32)(x) * DEG_PER_RAD       )
 
-#define signum_positive(x) ((x < 0) ? -1 : 1)
 
-// #define min(a, b) MIN((a), (b)) // ((a) < (b) ? (a) : (b))
-// #define max(a, b) MAX((a), (b)) // ((a) > (b) ? (a) : (b))
-#define CLAMP(x, low, high)  (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
+// Various basic helper macros
 
-// from limits.h
-#define S8_MAX __SCHAR_MAX__
-#define S8_MIN (-S8_MAX - 1)
-#define U8_MAX (S8_MAX * 2 + 1)
-#define S16_MAX __SHRT_MAX__
-#define S16_MIN (-S16_MAX - 1)
-#define U16_MAX (S16_MAX * 2 + 1)
-#define S32_MAX __INT_MAX__
-#define S32_MIN (-S32_MAX - 1)
-#define U32_MAX (S32_MAX * 2U + 1U)
-#define S64_MAX __LONG_LONG_MAX__
-#define S64_MIN (-S64_MAX - 1LL)
-#define U64_MAX (S64_MAX * 2ULL + 1ULL)
-#define F32_MAX __FLT_MAX__
-#define F32_MIN __FLT_MIN__
-#define F64_MAX __DBL_MAX__
-#define F64_MIN __DBL_MIN__
+// Get the square of a number
+#define sqr(x) ({         \
+    __auto_type _x = (x); \
+    _x * _x; })
 
-#define CLAMP_U8( x)        CLAMP((x),     0x0,  U8_MAX)
+// Get the sign of a number
+#define signum_positive(x) (((x) < 0) ? -1 : 1)
+
+// Absolute value
+#define ABS(x) ({         \
+    __auto_type _x = (x); \
+    _x > 0 ? _x : -_x; })
+#define absi ABS
+#define abss ABS
+
+// Absolute value of a float (faster than using the above macro)
+ALWAYS_INLINE f32 absf(f32 in) {
+    f32 out;
+    __asm__("abs.s %0,%1" : "=f" (out) : "f" (in));
+    return out;
+}
+
+// Get the minimum / maximum of a set of numbers
+#undef MIN
+#define MIN(a, b) ({      \
+    __auto_type _a = (a); \
+    __auto_type _b = (b); \
+    _a < _b ? _a : _b; })
+
+#undef MAX
+#define MAX(a, b) ({      \
+    __auto_type _a = (a); \
+    __auto_type _b = (b); \
+    _a > _b ? _a : _b; })
+
+#define min_3(a, b, c) MIN(MIN(a, b), c)
+
+#define max_3(a, b, c) MAX(MAX(a, b), c)
+
+#define min_3f min_3
+#define min_3i min_3
+#define min_3s min_3
+
+#define max_3f max_3
+#define max_3i max_3
+#define max_3s max_3
+
+void min_max_3f(f32 a, f32 b, f32 c, f32 *min, f32 *max);
+void min_max_3i(s32 a, s32 b, s32 c, s32 *min, s32 *max);
+void min_max_3s(s16 a, s16 b, s16 c, s16 *min, s16 *max);
+
+// From Wiseguy
+// Round a float to the nearest integer
+ALWAYS_INLINE s32 roundf(f32 in) {
+    f32 tmp;
+    s32 out;
+    __asm__("round.w.s %0,%1" : "=f" (tmp) : "f" (in ));
+    __asm__("mfc1      %0,%1" : "=r" (out) : "f" (tmp));
+    return out;
+}
+
+#define round_float roundf
+
+#define FLT_IS_NONZERO(x) (absf(x) > NEAR_ZERO)
+
+
+// Integer limits and clamping
+
+#define S8_MAX   127
+#define S8_MIN  -128
+#define U8_MAX   255
+#define S16_MAX  32767
+#define S16_MIN -32768
+#define U16_MAX  65535
+#define S32_MAX  2147483647
+#define S32_MIN -2147483648
+#define U32_MAX  4294967295
+
+// Clamp a value inbetween a range
+#define CLAMP(x, low, high)  MIN(MAX((x), (low)), (high))
+
+// Clamp a value to the range of a specific data type
+#define CLAMP_U8( x)        CLAMP((x),       0,  U8_MAX)
 #define CLAMP_S8( x)        CLAMP((x),  S8_MIN,  S8_MAX)
-#define CLAMP_U16(x)        CLAMP((x),     0x0, U16_MAX)
+#define CLAMP_U16(x)        CLAMP((x),       0, U16_MAX)
 #define CLAMP_S16(x)        CLAMP((x), S16_MIN, S16_MAX)
-#define CLAMP_U32(x)        CLAMP((x),     0x0, U32_MAX)
-#define CLAMP_S32(x)        CLAMP((x), S32_MIN, S32_MAX)
-#define CLAMP_U64(x)        CLAMP((x),     0x0, U64_MAX)
-#define CLAMP_S64(x)        CLAMP((x), S64_MIN, S64_MAX)
-#define CLAMP_F32(x)        CLAMP((x), F32_MIN, F32_MAX)
-#define CLAMP_F64(x)        CLAMP((x), F64_MIN, F64_MAX)
 
-#define SWAP(a, b)          { ((a) ^= (b)); ((b) ^= (a)); ((a) ^= (b)); }
 
-#define sqr(x)              (    (x) * (x))
-#define cube(x)             ( sqr(x) * (x))
-#define quad(x)             (cube(x) * (x))
-
-#define average_2(a, b      )   (((a) + (b)            ) / 2.0f)
-#define average_3(a, b, c   )   (((a) + (b) + (c)      ) / 3.0f)
-#define average_4(a, b, c, d)   (((a) + (b) + (c) + (d)) / 4.0f)
+// Vector operations
 
+// Set all elements of a vector to the same constant
 #define vec2_same(v, s)     (((v)[0]) = ((v)[1])                       = (s))
 #define vec3_same(v, s)     (((v)[0]) = ((v)[1]) = ((v)[2])            = (s))
 #define vec4_same(v, s)     (((v)[0]) = ((v)[1]) = ((v)[2]) = ((v)[3]) = (s))
 
+// Set all elements of a vector to zero
 #define vec2_zero(v)        (vec2_same((v), 0))
 #define vec3_zero(v)        (vec3_same((v), 0))
 #define vec4_zero(v)        (vec4_same((v), 0))
 
-#define vec2_c(v)           (   (v)[0] + (v)[1])
-#define vec3_c(v)           (vec2_c(v) + (v)[2])
-#define vec4_c(v)           (vec3_c(v) + (v)[3])
-
-#define vec2_average(v)     (vec2_c(v) / 2.0f)
-#define vec3_average(v)     (vec3_c(v) / 3.0f)
-#define vec4_average(v)     (vec4_c(v) / 4.0f)
-
-#define vec2_sumsq(v)       (  sqr((v)[0]) + sqr((v)[1]))
-#define vec3_sumsq(v)       (vec2_sumsq(v) + sqr((v)[2]))
-#define vec4_sumsq(v)       (vec3_sumsq(v) + sqr((v)[3]))
+// Sum of the squares of all elements of a vector
+#define vec2_sumsq(v)       (sqr((v)[0]) + sqr((v)[1]))
+#define vec3_sumsq(v)       (sqr((v)[0]) + sqr((v)[1]) + sqr((v)[2]))
+#define vec4_sumsq(v)       (sqr((v)[0]) + sqr((v)[1]) + sqr((v)[2]) + sqr((v)[3]))
 
+// Calculate the magnitude of a vector
 #define vec2_mag(v)         (sqrtf(vec2_sumsq(v)))
 #define vec3_mag(v)         (sqrtf(vec3_sumsq(v)))
 #define vec4_mag(v)         (sqrtf(vec4_sumsq(v)))
 
-#define vec3_yaw(from, to)  (atan2s(((to)[2] - (from)[2]), ((to)[0] - (from)[0])))
-
-#define vec2_dot(a, b)       (((a)[0] * (b)[0]) + ((a)[1] * (b)[1]))
-#define vec3_dot(a, b)      (vec2_dot((a), (b)) + ((a)[2] * (b)[2]))
-#define vec4_dot(a, b)      (vec3_dot((a), (b)) + ((a)[3] * (b)[3]))
-
-/// Make vector 'dest' the cross product of vectors a and b.
-#define vec3_cross(dst, a, b) {                         \
-    (dst)[0] = (((a)[1] * (b)[2]) - ((a)[2] * (b)[1])); \
-    (dst)[1] = (((a)[2] * (b)[0]) - ((a)[0] * (b)[2])); \
-    (dst)[2] = (((a)[0] * (b)[1]) - ((a)[1] * (b)[0])); \
-}
-
 /**
- * Set 'dest' the normal vector of a triangle with vertices a, b and c.
+ * Set 'dest' to the normal vector of a triangle with vertices a, b and c.
  * Equivalent to cross((c-b), (c-a)).
  */
 #define find_vector_perpendicular_to_plane(dest, a, b, c) {                                     \
@@ -153,247 +179,413 @@ extern f32 gSineTable[];
     (dest)[2] = ((b)[0] - (a)[0]) * ((c)[1] - (b)[1]) - ((c)[0] - (b)[0]) * ((b)[1] - (a)[1]);  \
 }
 
-/**
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | 0 0 0 1 |
- * i.e. a matrix representing a linear transformation over 3 space.
- */
-// Multiply a vector by a matrix of the form
-#define linear_mtxf_mul_vec3(mtx, dstV, srcV) {                                                     \
-    (dstV)[0] = (((mtx)[0][0] * (srcV)[0]) + ((mtx)[1][0] * (srcV)[1]) + ((mtx)[2][0] * (srcV)[2]));\
-    (dstV)[1] = (((mtx)[0][1] * (srcV)[0]) + ((mtx)[1][1] * (srcV)[1]) + ((mtx)[2][1] * (srcV)[2]));\
-    (dstV)[2] = (((mtx)[0][2] * (srcV)[0]) + ((mtx)[1][2] * (srcV)[1]) + ((mtx)[2][2] * (srcV)[2]));\
-}
-
-#define linear_mtxf_mul_vec3_and_translate(mtx, dstV, srcV) {   \
-    linear_mtxf_mul_vec3((mtx), (dstV), (srcV));                \
-    vec3_add((dstV), (mtx)[3]);                                 \
-}
-
-// Multiply a vector by the transpose of a matrix of the form
-#define linear_mtxf_transpose_mul_vec3(mtx, dstV, srcV) {   \
-    (dstV)[0] = vec3_dot((mtx)[0], (srcV));                 \
-    (dstV)[1] = vec3_dot((mtx)[1], (srcV));                 \
-    (dstV)[2] = vec3_dot((mtx)[2], (srcV));                 \
-}
-
+// Set the elements of vector 'dst' to the given values
 #define vec2_set(dst, x, y) {           \
     (dst)[0] = (x);                     \
     (dst)[1] = (y);                     \
 }
 #define vec3_set(dst, x, y, z) {        \
-    vec2_set((dst), (x), (y));          \
+    (dst)[0] = (x);                     \
+    (dst)[1] = (y);                     \
     (dst)[2] = (z);                     \
 }
 #define vec4_set(dst, x, y, z, w) {     \
-    vec3_set((dst), (x), (y), (z));     \
+    (dst)[0] = (x);                     \
+    (dst)[1] = (y);                     \
+    (dst)[2] = (z);                     \
     (dst)[3] = (w);                     \
 }
 
+#define vec3f_set vec3_set
+#define vec3i_set vec3_set
+#define vec3s_set vec3_set
+
+// Copy vector 'src' to vector 'dst'
 #define vec2_copy(dst, src) {           \
-    (dst)[0] = (src)[0];                \
-    (dst)[1] = (src)[1];                \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
 }
 #define vec3_copy(dst, src) {           \
-    vec2_copy((dst), (src));            \
-    (dst)[2] = (src)[2];                \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1];          \
+    __auto_type _z = (src)[2];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
 }
 #define vec4_copy(dst, src) {           \
-    vec3_copy((dst), (src));            \
-    (dst)[3] = (src)[3];                \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1];          \
+    __auto_type _z = (src)[2];          \
+    __auto_type _w = (src)[3];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
+    (dst)[3] = _w;                      \
 }
 
+#define vec3f_copy vec3_copy
+#define vec3i_copy vec3_copy
+#define vec3s_copy vec3_copy
+#define vec3s_to_vec3i vec3_copy
+#define vec3s_to_vec3f vec3_copy
+#define vec3i_to_vec3s vec3_copy
+#define vec3i_to_vec3f vec3_copy
+#define vec3f_to_vec3s vec3_copy
+#define vec3f_to_vec3i vec3_copy
+
+#define surface_normal_to_vec3f(dst, surf) vec3f_copy((dst), &((surf)->normal.x))
+
+// Copy vector 'src' to vector 'dst' and add a scalar to the y component
 #define vec3_copy_y_off(dst, src, y) {  \
-    (dst)[0] =  (src)[0];               \
-    (dst)[1] = ((src)[1] + (y));        \
-    (dst)[2] =  (src)[2];               \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1] + (y);    \
+    __auto_type _z = (src)[2];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
 }
 
-#define vec2_copy_roundf(dst, src) {    \
-    (dst)[0] = roundf((src)[0]);        \
-    (dst)[1] = roundf((src)[1]);        \
+#define vec3f_copy_y_off vec3_copy_y_off
+
+// Set vector 'dst' to the sum of vectors 'src1' and 'src2'
+#define vec2_sum(dst, src1, src2) {         \
+    __auto_type _x = (src1)[0] + (src2)[0]; \
+    __auto_type _y = (src1)[1] + (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_copy_roundf(dst, src) {    \
-    vec2_copy_roundf((dst), (src));     \
-    (dst)[2] = roundf((src)[2]);        \
+#define vec3_sum(dst, src1, src2) {         \
+    __auto_type _x = (src1)[0] + (src2)[0]; \
+    __auto_type _y = (src1)[1] + (src2)[1]; \
+    __auto_type _z = (src1)[2] + (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_copy_roundf(dst, src) {    \
-    vec3_copy_roundf((dst), (src));     \
-    (dst)[3] = roundf((src)[3]);        \
+#define vec4_sum(dst, src1, src2) {         \
+    __auto_type _x = (src1)[0] + (src2)[0]; \
+    __auto_type _y = (src1)[1] + (src2)[1]; \
+    __auto_type _z = (src1)[2] + (src2)[2]; \
+    __auto_type _w = (src1)[3] + (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
-#define vec2_copy_inverse(dst, src) {   \
-    (dst)[0] = (src)[1];                \
-    (dst)[1] = (src)[0];                \
-}
-#define vec3_copy_inverse(dst, src) {   \
-    (dst)[0] = (src)[2];                \
-    (dst)[1] = (src)[1];                \
-    (dst)[2] = (src)[0];                \
-}
-#define vec4_copy_inverse(dst, src) {   \
-    (dst)[0] = (src)[3];                \
-    (dst)[1] = (src)[2];                \
-    (dst)[2] = (src)[1];                \
-    (dst)[3] = (src)[0];                \
-}
-
-#define vec3_copy_offset_m1(dst, src) { \
-    (dst)[0] = (src)[1];                \
-    (dst)[1] = (src)[2];                \
-    (dst)[2] = (src)[0];                \
-}
-
-#define vec2_copy_negative(dst, src) {  \
-    (dst)[0] = -(src)[0];               \
-    (dst)[1] = -(src)[1];               \
-}
-#define vec3_copy_negative(dst, src) {  \
-    vec2_copy_negative((dst), (src));   \
-    (dst)[2] = -(src)[2];               \
-}
-#define vec4_copy_negative(dst, src) {  \
-    vec3_copy_negative((dst), (src));   \
-    (dst)[3] = -(src)[3];               \
-}
-
-#define vec2_sum(dst, src1, src2) {     \
-    (dst)[0] = ((src1)[0] + (src2)[0]); \
-    (dst)[1] = ((src1)[1] + (src2)[1]); \
-}
-#define vec3_sum(dst, src1, src2) {     \
-    vec2_sum((dst), (src1), (src2));    \
-    (dst)[2] = ((src1)[2] + (src2)[2]); \
-}
-#define vec4_sum(dst, src1, src2) {     \
-    vec3_sum((dst), (src1), (src2));    \
-    (dst)[3] = ((src1)[3] + (src2)[3]); \
-}
+#define vec3f_sum vec3_sum
+#define vec3i_sum vec3_sum
+#define vec3s_sum vec3_sum
 
+// Add the vector 'src' to vector 'dst'
 #define vec2_add(dst, src) vec2_sum((dst), (dst), (src))
 #define vec3_add(dst, src) vec3_sum((dst), (dst), (src))
 #define vec4_add(dst, src) vec4_sum((dst), (dst), (src))
 
-#define vec2_sum_val(dst, src, x) {     \
-    (dst)[0] = ((src)[0] + (x));        \
-    (dst)[1] = ((src)[1] + (x));        \
+#define vec3f_add vec3_add
+#define vec3i_add vec3_add
+#define vec3s_add vec3_add
+
+// Set vector 'dst' to the difference of vectors 'src1' and 'src2'
+#define vec2_diff(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] - (src2)[0]; \
+    __auto_type _y = (src1)[1] - (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_sum_val(dst, src, x) {     \
-    vec2_sum_val((dst), (src), (x));    \
-    (dst)[2] = ((src)[2] + (x));        \
+#define vec3_diff(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] - (src2)[0]; \
+    __auto_type _y = (src1)[1] - (src2)[1]; \
+    __auto_type _z = (src1)[2] - (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_sum_val(dst, src, x) {     \
-    vec3_sum_val((dst), (src), (x));    \
-    (dst)[3] = ((src)[2] + (x));        \
+#define vec4_diff(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] - (src2)[0]; \
+    __auto_type _y = (src1)[1] - (src2)[1]; \
+    __auto_type _z = (src1)[2] - (src2)[2]; \
+    __auto_type _w = (src1)[3] - (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
-#define vec2_add_val(dst, x) vec2_sum_val((dst), (dst), (x))
-#define vec3_add_val(dst, x) vec3_sum_val((dst), (dst), (x))
-#define vec4_add_val(dst, x) vec4_sum_val((dst), (dst), (x))
-
-#define vec2_diff(dst, src1, src2) {    \
-    (dst)[0] = ((src1)[0] - (src2)[0]); \
-    (dst)[1] = ((src1)[1] - (src2)[1]); \
-}
-#define vec3_diff(dst, src1, src2) {    \
-    vec2_diff((dst), (src1), (src2));   \
-    (dst)[2] = ((src1)[2] - (src2)[2]); \
-}
-#define vec4_diff(dst, src1, src2) {    \
-    vec3_diff((dst), (src1), (src2));   \
-    (dst)[3] = ((src1)[3] - (src2)[3]); \
-}
+#define vec3f_diff vec3_diff
+#define vec3i_diff vec3_diff
+#define vec3s_diff vec3_diff
 
+// Subtract the vector 'src' from vector 'dst'
 #define vec2_sub(dst, src) vec2_diff((dst), (dst), (src))
 #define vec3_sub(dst, src) vec3_diff((dst), (dst), (src))
 #define vec4_sub(dst, src) vec4_diff((dst), (dst), (src))
 
-#define vec2_diff_val(dst, src, x) {    \
-    (dst)[0] = ((src)[0] - (x));        \
-    (dst)[1] = ((src)[1] - (x));        \
+#define vec3f_sub vec3_sub
+#define vec3i_sub vec3_sub
+#define vec3s_sub vec3_sub
+
+// Set vector 'dst' to the product of vectors 'src1' and 'src2'
+#define vec2_prod(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] * (src2)[0]; \
+    __auto_type _y = (src1)[1] * (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_diff_val(dst, src, x) {    \
-    vec2_diff_val((dst), (src), (x));   \
-    (dst)[2] = ((src)[2] - (x));        \
+#define vec3_prod(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] * (src2)[0]; \
+    __auto_type _y = (src1)[1] * (src2)[1]; \
+    __auto_type _z = (src1)[2] * (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_diff_val(dst, src, x) {    \
-    vec3_diff_val((dst), (src), (x));   \
-    (dst)[3] = ((src)[3] - (x));        \
+#define vec4_prod(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] * (src2)[0]; \
+    __auto_type _y = (src1)[1] * (src2)[1]; \
+    __auto_type _z = (src1)[2] * (src2)[2]; \
+    __auto_type _w = (src1)[3] * (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
-#define vec2_sub_val(dst, x) vec2_diff_val((dst), (dst), (x))
-#define vec3_sub_val(dst, x) vec3_diff_val((dst), (dst), (x))
-#define vec4_sub_val(dst, x) vec4_diff_val((dst), (dst), (x))
-
-#define vec2_prod(dst, src1, src2) {    \
-    (dst)[0] = ((src1)[0] * (src2)[0]); \
-    (dst)[1] = ((src1)[1] * (src2)[1]); \
-}
-#define vec3_prod(dst, src1, src2) {    \
-    vec2_prod((dst), (src1), (src2));   \
-    (dst)[2] = ((src1)[2] * (src2)[2]); \
-}
-#define vec4_prod(dst, src1, src2) {    \
-    vec3_prod((dst), (src1), (src2));   \
-    (dst)[3] = ((src1)[3] * (src2)[3]); \
-}
+#define vec3f_prod vec3_prod
+#define vec3i_prod vec3_prod
+#define vec3s_prod vec3_prod
 
+// Multiply vector 'dst' by vector 'src'
 #define vec2_mul(dst, src) vec2_prod((dst), (dst), (src))
 #define vec3_mul(dst, src) vec3_prod((dst), (dst), (src))
 #define vec4_mul(dst, src) vec4_prod((dst), (dst), (src))
 
-#define vec2_prod_val(dst, src, x) {    \
-    (dst)[0] = ((src)[0] * (x));        \
-    (dst)[1] = ((src)[1] * (x));        \
+#define vec3f_mul vec3_mul
+#define vec3i_mul vec3_mul
+#define vec3s_mul vec3_mul
+
+// Set vector 'dst' to vector 'src' scaled by the scalar 'x'
+#define vec2_scale_dest(dst, src, x) {  \
+    __auto_type _x = (src)[0] * (x);    \
+    __auto_type _y = (src)[1] * (x);    \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
 }
-#define vec3_prod_val(dst, src, x) {    \
-    vec2_prod_val((dst), (src), (x));   \
-    (dst)[2] = ((src)[2] * (x));        \
+#define vec3_scale_dest(dst, src, x) {  \
+    __auto_type _x = (src)[0] * (x);    \
+    __auto_type _y = (src)[1] * (x);    \
+    __auto_type _z = (src)[2] * (x);    \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
 }
-#define vec4_prod_val(dst, src, x) {    \
-    vec3_prod_val((dst), (src), (x));   \
-    (dst)[3] = ((src)[3] * (x));        \
+#define vec4_scale_dest(dst, src, x) {  \
+    __auto_type _x = (src)[0] * (x);    \
+    __auto_type _y = (src)[1] * (x);    \
+    __auto_type _z = (src)[2] * (x);    \
+    __auto_type _w = (src)[3] * (x);    \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
+    (dst)[3] = _w;                      \
 }
 
-#define vec2_mul_val(dst, x) vec2_prod_val(dst, dst, x)
-#define vec3_mul_val(dst, x) vec3_prod_val(dst, dst, x)
-#define vec4_mul_val(dst, x) vec4_prod_val(dst, dst, x)
+// Scale vector 'dst' by the scalar 'x'
+#define vec2_scale(dst, x) vec2_scale_dest(dst, dst, x)
+#define vec3_scale(dst, x) vec3_scale_dest(dst, dst, x)
+#define vec4_scale(dst, x) vec4_scale_dest(dst, dst, x)
 
-#define vec2_quot(dst, src1, src2) {    \
-    (dst)[0] = ((src1)[0] / (src2)[0]); \
-    (dst)[1] = ((src1)[1] / (src2)[1]); \
+// Set vector 'dst' to vector 'src1' divided by vector 'src2'
+#define vec2_quot(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] / (src2)[0]; \
+    __auto_type _y = (src1)[1] / (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_quot(dst, src1, src2) {    \
-    vec2_quot((dst), (src1), (src2));   \
-    (dst)[2] = ((src1)[2] / (src2)[2]); \
+#define vec3_quot(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] / (src2)[0]; \
+    __auto_type _y = (src1)[1] / (src2)[1]; \
+    __auto_type _z = (src1)[2] / (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_quot(dst, src1, src2) {    \
-    vec3_quot((dst), (src1), (src2));   \
-    (dst)[3] = ((src1)[3] / (src2)[3]); \
+#define vec4_quot(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] / (src2)[0]; \
+    __auto_type _y = (src1)[1] / (src2)[1]; \
+    __auto_type _z = (src1)[2] / (src2)[2]; \
+    __auto_type _w = (src1)[3] / (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
+#define vec3f_quot vec3_quot
+#define vec3i_quot vec3_quot
+#define vec3s_quot vec3_quot
+
+// Divide vector 'dst' by vector 'src'
 #define vec2_div(dst, src) vec2_quot((dst), (dst), (src))
 #define vec3_div(dst, src) vec3_quot((dst), (dst), (src))
 #define vec4_div(dst, src) vec4_quot((dst), (dst), (src))
 
-#define vec2_quot_val(dst, src, x) {    \
-    (dst)[0] = ((src)[0] / (x));        \
-    (dst)[1] = ((src)[1] / (x));        \
-}
-#define vec3_quot_val(dst, src, x) {    \
-    vec2_quot_val((dst), (src), (x));   \
-    (dst)[2] = ((src)[2] / (x));        \
-}
-#define vec4_quot_val(dst, src, x) {    \
-    vec3_quot_val((dst), (src), (x));   \
-    (dst)[3] = ((src)[3] / (x));        \
+#define vec3f_div vec3_div
+#define vec3i_div vec3_div
+#define vec3s_div vec3_div
+
+// The yaw between two points in 3D space
+#define vec3_yaw(from, to)  (atan2s(((to)[2] - (from)[2]), ((to)[0] - (from)[0])))
+
+// Calculate the dot product of two vectors
+#define vec2_dot(a, b)       (((a)[0] * (b)[0]) + ((a)[1] * (b)[1]))
+#define vec3_dot(a, b)      (vec2_dot((a), (b)) + ((a)[2] * (b)[2]))
+#define vec4_dot(a, b)      (vec3_dot((a), (b)) + ((a)[3] * (b)[3]))
+
+#define vec3f_dot vec3_dot
+
+// Make vector 'dest' the cross product of vectors a and b.
+#define vec3_cross(dst, a, b) {                             \
+    __auto_type _x = ((a)[1] * (b)[2]) - ((a)[2] * (b)[1]); \
+    __auto_type _y = ((a)[2] * (b)[0]) - ((a)[0] * (b)[2]); \
+    __auto_type _z = ((a)[0] * (b)[1]) - ((a)[1] * (b)[0]); \
+    (dst)[0] = _x;                                          \
+    (dst)[1] = _y;                                          \
+    (dst)[2] = _z;                                          \
 }
 
-#define vec2_div_val(dst, x) vec2_quot_val((dst), (dst), (x))
-#define vec3_div_val(dst, x) vec3_quot_val((dst), (dst), (x))
-#define vec4_div_val(dst, x) vec4_quot_val((dst), (dst), (x))
+#define vec3f_cross vec3_cross
+
+// Scale vector 'v' so it has length 1
+#define vec3_normalize(v) {                       \
+    f32 _v_invmag = vec3_mag((v));                 \
+    _v_invmag = (1.0f / MAX(_v_invmag, NEAR_ZERO)); \
+    vec3_scale((v), _v_invmag);                    \
+}
+
+#define vec3f_normalize vec3_normalize
+
+// If the magnitude of vector 'v' is greater than 'max', scale it down to 'max'
+#define vec3_set_max_dist(v, max) { \
+    f32 _v_mag = vec3_mag(v);        \
+    f32 _max = max;                 \
+    _v_mag = MAX(_v_mag, NEAR_ZERO);  \
+    if (_v_mag > _max) {             \
+        _v_mag = (_max / _v_mag);     \
+        vec3_scale(v, _v_mag);       \
+    }                               \
+}
+
+// Transform the vector 'srcV' by the matrix 'mtx' and store the result in 'dstV'. Ignores translation.
+#define linear_mtxf_mul_vec3(mtx, dstV, srcV) {                                                         \
+    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);                                                   \
+    __auto_type _x = ((mtx)[0][0] * (srcV)[0]) + ((mtx)[1][0] * (srcV)[1]) + ((mtx)[2][0] * (srcV)[2]); \
+    __auto_type _y = ((mtx)[0][1] * (srcV)[0]) + ((mtx)[1][1] * (srcV)[1]) + ((mtx)[2][1] * (srcV)[2]); \
+    __auto_type _z = ((mtx)[0][2] * (srcV)[0]) + ((mtx)[1][2] * (srcV)[1]) + ((mtx)[2][2] * (srcV)[2]); \
+    (dstV)[0] = _x;                                                                                     \
+    (dstV)[1] = _y;                                                                                     \
+    (dstV)[2] = _z;                                                                                     \
+}
+
+// Transform the vector 'srcV' by the matrix 'mtx' including translation, and store the result in 'dstV'
+#define linear_mtxf_mul_vec3_and_translate(mtx, dstV, srcV) { \
+    linear_mtxf_mul_vec3((mtx), (dstV), (srcV));              \
+    vec3_add((dstV), (mtx)[3]);                               \
+}
+
+// Transform the vector 'srcV' by the transpose of the matrix 'mtx'
+// and store the result in 'dstV'. Ignores translation.
+// For most transformation matrices, this will apply the inverse of the transformation.
+#define linear_mtxf_transpose_mul_vec3(mtx, dstV, srcV) { \
+    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);     \
+    __auto_type _x = vec3_dot((mtx)[0], (srcV));          \
+    __auto_type _y = vec3_dot((mtx)[1], (srcV));          \
+    __auto_type _z = vec3_dot((mtx)[2], (srcV));          \
+    (dstV)[0] = _x;                                       \
+    (dstV)[1] = _y;                                       \
+    (dstV)[2] = _z;                                       \
+}
+
+#define linear_mtxf_mul_vec3f linear_mtxf_mul_vec3
+#define linear_mtxf_mul_vec3f_and_translate linear_mtxf_mul_vec3_and_translate
+#define linear_mtxf_transpose_mul_vec3f linear_mtxf_transpose_mul_vec3
+
+
+// Angles and distances between vectors
+
+/// Finds the distance between two vectors
+#define vec3_get_dist(from, to, dist) { \
+    Vec3f _d;                           \
+    vec3_diff(_d, (to), (from));        \
+    *(dist) = vec3_mag((_d));           \
+}
+
+#define vec3f_get_dist vec3_get_dist
+#define vec3s_get_dist vec3_get_dist
+
+/// Finds the horizontal distance between two vectors
+#define vec3_get_lateral_dist(from, to, lateralDist) { \
+    Vec3f _d;                                          \
+    vec3_diff(_d, (to), (from));                       \
+    *(lateralDist) = sqrtf(sqr(_d[0]) + sqr(_d[2]));   \
+}
+
+#define vec3f_get_lateral_dist vec3_get_lateral_dist
+#define vec3s_get_lateral_dist vec3_get_lateral_dist
+
+/// Finds the pitch between two vectors
+#define vec3_get_pitch(from, to, pitch) {                     \
+    Vec3f _d;                                                 \
+    vec3_diff(_d, (to), (from));                              \
+    *(pitch) = atan2s(sqrtf(sqr(_d[0]) + sqr(_d[2])), _d[1]); \
+}
+
+#define vec3f_get_pitch vec3_get_pitch
+#define vec3s_get_pitch vec3_get_pitch
+
+/// Finds the yaw between two vectors
+#define vec3_get_yaw(from, to, yaw) { \
+    f32 _dx = ((to)[0] - (from)[0]);  \
+    f32 _dz = ((to)[2] - (from)[2]);  \
+    *(yaw) = atan2s(_dz, _dx);        \
+}
+
+#define vec3f_get_yaw vec3_get_yaw
+#define vec3s_get_yaw vec3_get_yaw
+
+// Finds the distance, pitch, and yaw between two vectors
+#define vec3_get_dist_and_angle(from, to, dist, pitch, yaw) { \
+    Vec3f _d;                                                 \
+    vec3f_diff(_d, (to), (from));                             \
+    f32 _xz = (sqr(_d[0]) + sqr(_d[2]));                      \
+    *(dist)  = sqrtf(_xz + sqr(_d[1]));                         \
+    *(pitch) = atan2s(sqrtf(_xz), _d[1]);                       \
+    *(yaw)   = atan2s(_d[2], _d[0]);                            \
+}
+
+#define vec3f_get_dist_and_angle vec3_get_dist_and_angle
+#define vec3s_get_dist_and_angle vec3_get_dist_and_angle
+
+// Constructs the 'to' point which is distance 'dist' away from the 'from' position,
+// and has the angles pitch and yaw.
+#define vec3_set_dist_and_angle(from, to, dist, pitch, yaw) { \
+    f32 _dcos = ((dist) * coss(pitch));                         \
+    __auto_type _x = ((from)[0] + (_dcos  * sins(yaw)));       \
+    __auto_type _y = ((from)[1] + ((dist) * sins(pitch)));     \
+    __auto_type _z = ((from)[2] + (_dcos  * coss(yaw)));       \
+    (to)[0] = _x;                                             \
+    (to)[1] = _y;                                             \
+    (to)[2] = _z;                                             \
+}
+
+#define vec3f_set_dist_and_angle vec3_set_dist_and_angle
+#define vec3s_set_dist_and_angle vec3_set_dist_and_angle
+
+
+// Matrices
 
 #define MAT4_VEC_DOT_PROD(R, A, B, row, col) {              \
     (R)[(row)][(col)]  = ((A)[(row)][0] * (B)[0][(col)]);   \
@@ -431,96 +623,10 @@ extern f32 gSineTable[];
     ((u32 *)(mtx))[15] = FLOAT_ONE;             \
 }
 
-#define NAME_INVMAG(v) v##_invmag
-
-/// Scale vector 'v' so it has length 1
-#define vec3_normalize(v) {                                     \
-    register f32 NAME_INVMAG(v) = vec3_mag((v));                \
-    NAME_INVMAG(v) = (1.0f / MAX(NAME_INVMAG(v), NEAR_ZERO));   \
-    vec3_mul_val((v), NAME_INVMAG(v));                          \
-}
-
-#define vec3_normalize_max(v, max) {    \
-    register f32 v##_mag = vec3_mag(v); \
-    v##_mag = MAX(v##_mag, NEAR_ZERO);  \
-    if (v##_mag > max) {                \
-        v##_mag = (max / v##_mag);      \
-        vec3_mul_val(v, v##_mag);       \
-    }                                   \
-}
-
-#define ABS(x)  (((x) > 0) ? (x) : -(x))
-
-extern s32 roundf(f32);
-// backwards compatibility
-#define round_float(in) roundf(in)
-
-#define absf ABS
-#define absi ABS
-#define abss ABS
-
-#define FLT_IS_NONZERO(x) (absf(x) > NEAR_ZERO)
-
 u16 random_u16(void);
 f32 random_float(void);
 s32 random_sign(void);
 
-f32  min_3f(   f32 a, f32 b, f32 c);
-s32  min_3i(   s32 a, s32 b, s32 c);
-s32  min_3s(   s16 a, s16 b, s16 c);
-f32  max_3f(   f32 a, f32 b, f32 c);
-s32  max_3i(   s32 a, s32 b, s32 c);
-s32  max_3s(   s16 a, s16 b, s16 c);
-void min_max_3f(f32 a, f32 b, f32 c, f32 *min, f32 *max);
-void min_max_3i(s32 a, s32 b, s32 c, s32 *min, s32 *max);
-void min_max_3s(s16 a, s16 b, s16 c, s16 *min, s16 *max);
-
-void vec3f_copy    (Vec3f dest, const Vec3f src);
-void vec3i_copy    (Vec3i dest, const Vec3i src);
-void vec3s_copy    (Vec3s dest, const Vec3s src);
-void vec3s_to_vec3i(Vec3i dest, const Vec3s src);
-void vec3s_to_vec3f(Vec3f dest, const Vec3s src);
-void vec3i_to_vec3s(Vec3s dest, const Vec3i src);
-void vec3i_to_vec3f(Vec3f dest, const Vec3i src);
-void vec3f_to_vec3s(Vec3s dest, const Vec3f src);
-void vec3f_to_vec3i(Vec3i dest, const Vec3f src);
-
-void vec3f_copy_y_off(Vec3f dest, Vec3f src, f32 yOff);
-
-void surface_normal_to_vec3f(Vec3f dest, struct Surface *surf);
-
-void vec3f_set(Vec3f dest, const f32 x, const f32 y, const f32 z);
-void vec3i_set(Vec3i dest, const s32 x, const s32 y, const s32 z);
-void vec3s_set(Vec3s dest, const s16 x, const s16 y, const s16 z);
-
-void vec3f_add (Vec3f dest, const Vec3f a               );
-void vec3i_add (Vec3i dest, const Vec3i a               );
-void vec3s_add (Vec3s dest, const Vec3s a               );
-void vec3f_sum (Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_sum (Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_sum (Vec3s dest, const Vec3s a, const Vec3s b);
-void vec3f_sub (Vec3f dest, const Vec3f a               );
-void vec3i_sub (Vec3i dest, const Vec3i a               );
-void vec3s_sub (Vec3s dest, const Vec3s a               );
-void vec3f_diff(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_diff(Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_diff(Vec3s dest, const Vec3s a, const Vec3s b);
-void vec3f_mul (Vec3f dest, const Vec3f a               );
-void vec3i_mul (Vec3i dest, const Vec3i a               );
-void vec3s_mul (Vec3s dest, const Vec3s a               );
-void vec3f_prod(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_prod(Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_prod(Vec3s dest, const Vec3s a, const Vec3s b);
-void vec3f_div (Vec3f dest, const Vec3f a               );
-void vec3i_div (Vec3i dest, const Vec3i a               );
-void vec3s_div (Vec3s dest, const Vec3s a               );
-void vec3f_quot(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_quot(Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_quot(Vec3s dest, const Vec3s a, const Vec3s b);
-
-f32  vec3f_dot(              const Vec3f a, const Vec3f b);
-void vec3f_cross(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3f_normalize(Vec3f dest);
 void mtxf_copy(Mat4 dest, Mat4 src);
 void mtxf_identity(Mat4 mtx);
 void mtxf_translate(Mat4 dest, Vec3f b);
@@ -537,41 +643,17 @@ void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b);
 void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, Vec3f s);
 void mtxf_mul_vec3s(Mat4 mtx, Vec3s b);
 
-extern void mtxf_to_mtx_fast(register s16 *dest, register float *src);
-ALWAYS_INLINE void mtxf_to_mtx(register void *dest, register void *src) {
+extern void mtxf_to_mtx_fast(s16 *dest, float *src);
+ALWAYS_INLINE void mtxf_to_mtx(void *dest, void *src) {
     mtxf_to_mtx_fast((s16*)dest, (float*)src);
     // guMtxF2L(src, dest);
 }
 
 void mtxf_rotate_xy(Mtx *mtx, s16 angle);
-void linear_mtxf_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v);
-void linear_mtxf_mul_vec3f_and_translate(Mat4 m, Vec3f dst, Vec3f v);
-void linear_mtxf_transpose_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v);
 
-void vec2f_get_lateral_dist(                   Vec2f from, Vec2f to,            f32 *lateralDist                      );
-void vec3f_get_lateral_dist(                   Vec3f from, Vec3f to,            f32 *lateralDist                      );
-void vec3f_get_lateral_dist_squared(           Vec3f from, Vec3f to,            f32 *lateralDist                      );
-void vec3f_get_dist(                           Vec3f from, Vec3f to, f32 *dist                                        );
-void vec3f_get_dist_squared(                   Vec3f from, Vec3f to, f32 *dist                                        );
-void vec3f_get_dist_and_yaw(                   Vec3f from, Vec3f to, f32 *dist,                               s16 *yaw);
-void vec3f_get_pitch(                          Vec3f from, Vec3f to,                              s16 *pitch          );
-void vec3f_get_yaw(                            Vec3f from, Vec3f to,                                          s16 *yaw);
-void vec3f_get_angle(                          Vec3f from, Vec3f to,                              s16 *pitch, s16 *yaw);
-void vec3f_get_lateral_dist_and_pitch(         Vec3f from, Vec3f to,            f32 *lateralDist, s16 *pitch          );
-void vec3f_get_lateral_dist_and_yaw(           Vec3f from, Vec3f to,            f32 *lateralDist,             s16 *yaw);
-void vec3f_get_lateral_dist_and_angle(         Vec3f from, Vec3f to,            f32 *lateralDist, s16 *pitch, s16 *yaw);
-void vec3f_get_dist_and_lateral_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, f32 *lateralDist, s16 *pitch, s16 *yaw);
-void vec3f_get_dist_and_angle(                 Vec3f from, Vec3f to, f32 *dist,                   s16 *pitch, s16 *yaw);
-void vec3s_get_dist_and_angle(                 Vec3s from, Vec3s to, s16 *dist,                   s16 *pitch, s16 *yaw);
-void vec3f_to_vec3s_get_dist_and_angle(        Vec3f from, Vec3s to, f32 *dist,                   s16 *pitch, s16 *yaw);
-void vec3s_set_dist_and_angle(                 Vec3s from, Vec3s to, s16  dist,                   s16  pitch, s16  yaw);
-void vec3f_set_dist_and_angle(                 Vec3f from, Vec3f to, f32  dist,                   s16  pitch, s16  yaw);
-
-s16 approach_angle(s16 current, s16 target, s16 inc);
 s16 approach_s16(s16 current, s16 target, s16 inc, s16 dec);
 s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec);
 f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec);
-Bool32 approach_angle_bool(s16 *current, s16 target, s16 inc);
 Bool32 approach_s16_bool(s16 *current, s16 target, s16 inc, s16 dec);
 Bool32 approach_s32_bool(s32 *current, s32 target, s32 inc, s32 dec);
 Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec);
@@ -581,6 +663,8 @@ Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec);
 #define approach_s16_symmetric_bool(current, target, inc) approach_s16_bool((current), (target), (inc), (inc))
 #define approach_s32_symmetric_bool(current, target, inc) approach_s32_bool((current), (target), (inc), (inc))
 #define approach_f32_symmetric_bool(current, target, inc) approach_f32_bool((current), (target), (inc), (inc))
+#define approach_angle approach_s16_symmetric
+#define approach_angle_bool approach_s16_symmetric_bool
 s32 approach_f32_signed(f32 *current, f32 target, f32 inc);
 s32 approach_f32_asymptotic_bool(f32 *current, f32 target, f32 multiplier);
 f32 approach_f32_asymptotic(f32 current, f32 target, f32 multiplier);
diff --git a/src/engine/surface_load.c b/src/engine/surface_load.c
index f2444fb73..a68362d69 100644
--- a/src/engine/surface_load.c
+++ b/src/engine/surface_load.c
@@ -248,7 +248,7 @@ static struct Surface *read_surface_data(TerrainData *vertexData, TerrainData **
     Vec3t offset;
     s16 min, max;
 
-    vec3_prod_val(offset, (*vertexIndices), 3);
+    vec3_scale_dest(offset, (*vertexIndices), 3);
 
     vec3s_copy(v[0], (vertexData + offset[0]));
     vec3s_copy(v[1], (vertexData + offset[1]));
@@ -265,7 +265,7 @@ static struct Surface *read_surface_data(TerrainData *vertexData, TerrainData **
     }
 #endif
     mag = 1.0f / sqrtf(mag);
-    vec3_mul_val(n, mag);
+    vec3_scale(n, mag);
 
     struct Surface *surface = alloc_surface(dynamic);
 
@@ -686,8 +686,9 @@ void load_object_collision_model(void) {
     PUPPYPRINT_GET_SNAPSHOT();
     TerrainData *collisionData = o->collisionData;
 
-    f32 sqrLateralDist;
-    vec3f_get_lateral_dist_squared(&o->oPosVec, &gMarioObject->oPosVec, &sqrLateralDist);
+    Vec3f dist;
+    vec3_diff(dist, &o->oPosVec, &gMarioObject->oPosVec);
+    f32 sqrLateralDist = sqr(dist[0]) + sqr(dist[2]);
 
     f32 verticalMarioDiff = gMarioObject->oPosY - o->oPosY;
 
diff --git a/src/game/behaviors/chain_chomp.inc.c b/src/game/behaviors/chain_chomp.inc.c
index a23480d1e..f3831896f 100644
--- a/src/game/behaviors/chain_chomp.inc.c
+++ b/src/game/behaviors/chain_chomp.inc.c
@@ -104,12 +104,12 @@ static void chain_chomp_update_chain_segments(void) {
         // Cap distance to previous chain part (so that the tail follows the chomp)
         Vec3f offset;
         vec3f_diff(offset, segment->pos, prevSegment->pos);
-        vec3_normalize_max(offset, o->oChainChompMaxDistBetweenChainParts);
+        vec3_set_max_dist(offset, o->oChainChompMaxDistBetweenChainParts);
 
         // Cap distance to pivot (so that it stretches when the chomp moves far from the wooden post)
         vec3f_add(offset, prevSegment->pos);
         f32 maxTotalDist = o->oChainChompMaxDistFromPivotPerChainPart * (CHAIN_CHOMP_NUM_SEGMENTS - i);
-        vec3_normalize_max(offset, maxTotalDist);
+        vec3_set_max_dist(offset, maxTotalDist);
 
         vec3f_copy(segment->pos, offset);
     }
@@ -365,7 +365,7 @@ static void chain_chomp_act_move(void) {
             f32 ratio = maxDistToPivot / o->oChainChompDistToPivot;
             o->oChainChompDistToPivot = maxDistToPivot;
 
-            vec3_mul_val(o->oChainChompSegments[0].pos, ratio);
+            vec3_scale(o->oChainChompSegments[0].pos, ratio);
 
             if (o->oChainChompReleaseStatus == CHAIN_CHOMP_NOT_RELEASED) {
                 // Restrict chain chomp position
diff --git a/src/game/behaviors/hoot.inc.c b/src/game/behaviors/hoot.inc.c
index 48073fc2a..cd4078398 100644
--- a/src/game/behaviors/hoot.inc.c
+++ b/src/game/behaviors/hoot.inc.c
@@ -194,7 +194,8 @@ void hoot_action_loop(void) {
 
 void hoot_turn_to_home(void) {
     s16 pitchToHome, yawToHome;
-    vec3f_get_angle(&o->oPosVec, &o->oHomeVec, &pitchToHome, &yawToHome);
+    f32 distToHome;
+    vec3f_get_dist_and_angle(&o->oPosVec, &o->oHomeVec, &distToHome, &pitchToHome, &yawToHome);
 
     o->oMoveAngleYaw = approach_s16_symmetric(o->oMoveAngleYaw, yawToHome, 0x140);
     o->oMoveAnglePitch = approach_s16_symmetric(o->oMoveAnglePitch, -pitchToHome, 0x140);
diff --git a/src/game/behaviors/intro_peach.inc.c b/src/game/behaviors/intro_peach.inc.c
index 781938fe5..cfe3d0a9c 100644
--- a/src/game/behaviors/intro_peach.inc.c
+++ b/src/game/behaviors/intro_peach.inc.c
@@ -7,8 +7,9 @@
 void intro_peach_set_pos_and_opacity(struct Object *obj, f32 targetOpacity, f32 increment) {
     Vec3f newPos;
     s16 focusPitch, focusYaw;
+    f32 dist;
 
-    vec3f_get_angle(gLakituState.pos, gLakituState.focus, &focusPitch, &focusYaw);
+    vec3f_get_dist_and_angle(gLakituState.pos, gLakituState.focus, &dist, &focusPitch, &focusYaw);
     vec3f_set_dist_and_angle(gLakituState.pos, newPos, obj->oIntroPeachDistToCamera,
                              obj->oIntroPeachPitchFromFocus + focusPitch,
                              obj->oIntroPeachYawFromFocus + focusYaw);
diff --git a/src/game/behaviors/spawn_star.inc.c b/src/game/behaviors/spawn_star.inc.c
index b3f6408d3..84135dbe0 100644
--- a/src/game/behaviors/spawn_star.inc.c
+++ b/src/game/behaviors/spawn_star.inc.c
@@ -40,7 +40,8 @@ void bhv_collect_star_loop(void) {
 
 void bhv_star_spawn_init(void) {
     s16 yaw;
-    vec3f_get_lateral_dist_and_yaw(&o->oPosVec, &o->oHomeVec, &o->oStarSpawnDisFromHome, &yaw);
+    vec3f_get_yaw(&o->oPosVec, &o->oHomeVec, &yaw);
+    vec3f_get_lateral_dist(&o->oPosVec, &o->oHomeVec, &o->oStarSpawnDisFromHome)
     o->oMoveAngleYaw = yaw;
     o->oVelY = (o->oHomeY - o->oPosY) / 30.0f;
     o->oForwardVel = o->oStarSpawnDisFromHome / 30.0f;
diff --git a/src/game/camera.c b/src/game/camera.c
index 9ef41f806..8cd629d0a 100644
--- a/src/game/camera.c
+++ b/src/game/camera.c
@@ -3462,7 +3462,7 @@ void evaluate_cubic_spline(f32 u, Vec3f Q, Vec3f spline1, Vec3f spline2, Vec3f s
     register f32 su = sqr(u);
     register f32 hcu = (su * u) / 2.0f;
 
-    B[0] = cube(nu) / 6.0f;
+    B[0] = (nu * nu * nu) / 6.0f;
     B[1] = hcu - su + (2.0f / 3.0f);
     B[2] = -hcu + (su / 2.0f) + (u / 2.0f) + (1.0f / 6.0f);
     B[3] =  hcu / 3.0f;
diff --git a/src/game/gamecube_controller.c b/src/game/gamecube_controller.c
index 1a44c7da0..760c618b8 100644
--- a/src/game/gamecube_controller.c
+++ b/src/game/gamecube_controller.c
@@ -1,4 +1,5 @@
 #include "PR/os_internal.h"
+#include "engine/math_util.h"
 
 #include "game_init.h"
 
@@ -9,11 +10,6 @@
 #define ARRLEN(x) ((s32)(sizeof(x) / sizeof(x[0])))
 #define CHNL_ERR(format) (((format).rxsize & CHNL_ERR_MASK) >> 4)
 
-#define CLAMP(x, low, high)  (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
-#define S8_MAX __SCHAR_MAX__
-#define S8_MIN (-S8_MAX - 1)
-#define CLAMP_S8( x)        CLAMP((x),  S8_MIN,  S8_MAX)
-
 #define CHNL_ERR_MASK		0xC0	/* Bit 6-7: channel errors */
 
 typedef struct
diff --git a/src/game/mario.c b/src/game/mario.c
index b2603c0e9..af2678113 100644
--- a/src/game/mario.c
+++ b/src/game/mario.c
@@ -1706,7 +1706,8 @@ s32 execute_mario_action(UNUSED struct Object *obj) {
     s32 inLoop = TRUE;
 
     // Updates once per frame:
-    vec3f_get_dist_and_lateral_dist_and_angle(gMarioState->prevPos, gMarioState->pos, &gMarioState->moveSpeed, &gMarioState->lateralSpeed, &gMarioState->movePitch, &gMarioState->moveYaw);
+    vec3f_get_dist_and_angle(gMarioState->prevPos, gMarioState->pos, &gMarioState->moveSpeed, &gMarioState->movePitch, &gMarioState->moveYaw);
+    vec3f_get_lateral_dist(gMarioState->prevPos, gMarioState->pos, &gMarioState->lateralSpeed);
     vec3f_copy(gMarioState->prevPos, gMarioState->pos);
 
     if (gMarioState->action) {
diff --git a/src/game/mario_actions_moving.c b/src/game/mario_actions_moving.c
index 902a18232..2c72df9de 100644
--- a/src/game/mario_actions_moving.c
+++ b/src/game/mario_actions_moving.c
@@ -1353,9 +1353,6 @@ void tilt_body_butt_slide(struct MarioState *m) {
 }
 
 void common_slide_action(struct MarioState *m, u32 endAction, u32 airAction, s32 animation) {
-    Vec3f pos;
-
-    vec3f_copy(pos, m->pos);
     play_sound(SOUND_MOVING_TERRAIN_SLIDE + m->terrainSoundAddend, m->marioObj->header.gfx.cameraToObject);
 
 #if ENABLE_RUMBLE
diff --git a/src/game/mario_actions_submerged.c b/src/game/mario_actions_submerged.c
index 47ebaaac7..6c8a73729 100644
--- a/src/game/mario_actions_submerged.c
+++ b/src/game/mario_actions_submerged.c
@@ -137,7 +137,7 @@ static void apply_water_current(struct MarioState *m, Vec3f step) {
         if (whirlpool != NULL) {
             strength = 0.0f;
 
-            vec3f_to_vec3s_get_dist_and_angle(m->pos, whirlpool->pos, &distance, &pitchToWhirlpool, &yawToWhirlpool);
+            vec3_get_dist_and_angle(m->pos, whirlpool->pos, &distance, &pitchToWhirlpool, &yawToWhirlpool);
 
             yawToWhirlpool -= (s16)(0x2000 * 1000.0f / (distance + 1000.0f));
 
diff --git a/src/game/object_helpers.c b/src/game/object_helpers.c
index 2f9673415..5df500a47 100644
--- a/src/game/object_helpers.c
+++ b/src/game/object_helpers.c
@@ -476,9 +476,9 @@ void obj_init_animation(struct Object *obj, s32 animIndex) {
 void obj_apply_scale_to_transform(struct Object *obj) {
     Vec3f scale;
     vec3f_copy(scale, obj->header.gfx.scale);
-    vec3_mul_val(obj->transform[0], scale[0]);
-    vec3_mul_val(obj->transform[1], scale[1]);
-    vec3_mul_val(obj->transform[2], scale[2]);
+    vec3_scale(obj->transform[0], scale[0]);
+    vec3_scale(obj->transform[1], scale[1]);
+    vec3_scale(obj->transform[2], scale[2]);
 }
 
 void obj_copy_scale(struct Object *dst, struct Object *src) {
diff --git a/src/game/puppycam2.c b/src/game/puppycam2.c
index c6f9669ea..bc9fc26cf 100644
--- a/src/game/puppycam2.c
+++ b/src/game/puppycam2.c
@@ -223,7 +223,6 @@ s32 puppycam_move_spline(struct sPuppySpline splinePos[], struct sPuppySpline sp
     f32 tempProgress[2] = {0.0f, 0.0f};
     f32 progChange = 0.0f;
     s32 i;
-    Vec3f prevPos;
 
     if (gPuppyCam.splineIndex == 65000) {
         gPuppyCam.splineIndex = index;
@@ -236,7 +235,6 @@ s32 puppycam_move_spline(struct sPuppySpline splinePos[], struct sPuppySpline sp
             return TRUE;
         }
     }
-    vec3f_set(prevPos, gPuppyCam.pos[0], gPuppyCam.pos[1], gPuppyCam.pos[2]);
 
     for (i = 0; i < 4; i++) {
         vec3f_set(tempPoints[i], splinePos[gPuppyCam.splineIndex + i].pos[0], splinePos[gPuppyCam.splineIndex + i].pos[1], splinePos[gPuppyCam.splineIndex + i].pos[2]);
@@ -680,7 +678,6 @@ static void puppycam_input_hold_preset2(f32 ivX) {
 
 // Another alternative control scheme. This one aims to mimic the parallel camera scheme down to the last bit from the original game.
 static void puppycam_input_hold_preset3(f32 ivX) {
-    f32 stickMag[2] = {gPlayer1Controller->rawStickX*0.65f, gPlayer1Controller->rawStickY*0.2f};
     // Just in case it happens to be nonzero.
     gPuppyCam.yawAcceleration = 0;
 
@@ -1374,7 +1371,7 @@ static void puppycam_collision(void) {
     vec3f_normalize(dirToCam);
     // Get the vector from mario's head to the camera plus the extra check dist
     Vec3f vecToCam;
-    vec3_prod_val(vecToCam, dirToCam, colCheckDist);
+    vec3_scale_dest(vecToCam, dirToCam, colCheckDist);
 
     dist[0] = find_surface_on_ray(target[0], vecToCam, &surf[0], hitpos[0], RAYCAST_FIND_FLOOR | RAYCAST_FIND_CEIL | RAYCAST_FIND_WALL);
     dist[1] = find_surface_on_ray(target[1], vecToCam, &surf[1], hitpos[1], RAYCAST_FIND_FLOOR | RAYCAST_FIND_CEIL | RAYCAST_FIND_WALL);
@@ -1391,7 +1388,7 @@ static void puppycam_collision(void) {
             closestDist -= surfOffset;
             // Allow the camera to ride right up next to the wall (mario's wall radius is 50u so this is safe)
             closestDist = MAX(closestDist, 50);
-            vec3_mul_val(dirToCam, closestDist);
+            vec3_scale(dirToCam, closestDist);
             vec3_sum(gPuppyCam.pos, target[0], dirToCam);
 
             // If the camera is uncomfortably close to the wall, move it up a bit
diff --git a/src/game/rendering_graph_node.c b/src/game/rendering_graph_node.c
index 9e33e9ad4..934cc6958 100644
--- a/src/game/rendering_graph_node.c
+++ b/src/game/rendering_graph_node.c
@@ -1123,7 +1123,7 @@ void geo_process_held_object(struct GraphNodeHeldObject *node) {
         node->fnNode.func(GEO_CONTEXT_RENDER, &node->fnNode.node, gMatStack[gMatStackIndex]);
     }
     if (node->objNode != NULL && node->objNode->header.gfx.sharedChild != NULL) {
-        vec3_prod_val(translation, node->translation, 0.25f);
+        vec3_scale_dest(translation, node->translation, 0.25f);
 
         mtxf_translate(mat, translation);
         mtxf_copy(gMatStack[gMatStackIndex + 1], *gCurGraphNodeObject->throwMatrix);
diff --git a/src/game/skybox.c b/src/game/skybox.c
index 490d32576..7485d3832 100644
--- a/src/game/skybox.c
+++ b/src/game/skybox.c
@@ -61,7 +61,7 @@ struct Skybox {
 
 struct Skybox sSkyBoxInfo[2];
 
-typedef const Texture *const SkyboxTexture[80 * sqr(SKYBOX_SIZE)];
+typedef const Texture *const SkyboxTexture[80 * SKYBOX_SIZE * SKYBOX_SIZE];
 
 extern SkyboxTexture bbh_skybox_ptrlist;
 extern SkyboxTexture bidw_skybox_ptrlist;
@@ -304,7 +304,8 @@ Gfx *create_skybox_facing_camera(s8 player, s8 background, f32 fov, Vec3f pos, V
     //! the first frame, which causes a floating point divide by 0
     fov = 90.0f;
     s16 yaw;
-    vec3f_get_angle(pos, focus, &sSkyBoxInfo[player].pitch, &yaw);
+    f32 dist;
+    vec3f_get_dist_and_angle(pos, focus, &dist, &sSkyBoxInfo[player].pitch, &yaw);
     sSkyBoxInfo[player].yaw = yaw;
     sSkyBoxInfo[player].scaledX = calculate_skybox_scaled_x(player, fov);
     sSkyBoxInfo[player].scaledY = calculate_skybox_scaled_y(player, fov);

From d7c840b8f1b9efb7028ca99c1e81c8ad1e12ec89 Mon Sep 17 00:00:00 2001
From: someone2639 <someone2639@users.noreply.github.com>
Date: Fri, 15 Dec 2023 23:56:40 -0500
Subject: [PATCH 06/23] Cache master displaylist on "hot" code paths (#724)

* add displaylist macros

* implement displaylist macro in memory.c

* API update

* implement displaylist macro on all 'hot' code paths

* Remove the macro entirely

* rename tmpDL

* remove the other two macros

* catch the last 2 missing statements

---------

Co-authored-by: someone2639 <someone2639@gmail.com>
---
 src/boot/memory.c            |  8 ++--
 src/game/debug_box.c         |  4 ++
 src/game/game_init.c         | 82 ++++++++++++++++++++++--------------
 src/game/hud.c               | 65 +++++++++++++++++-----------
 src/game/puppycam2.c         | 24 ++++++-----
 src/game/puppyprint.c        | 16 ++++---
 src/game/screen_transition.c | 56 +++++++++++++-----------
 7 files changed, 156 insertions(+), 99 deletions(-)

diff --git a/src/boot/memory.c b/src/boot/memory.c
index c086f8b0b..0066f8ba5 100644
--- a/src/boot/memory.c
+++ b/src/boot/memory.c
@@ -94,11 +94,13 @@ void *virtual_to_segmented(u32 segment, const void *addr) {
 }
 
 void move_segment_table_to_dmem(void) {
-    s32 i;
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    for (i = 0; i < 16; i++) {
-        gSPSegment(gDisplayListHead++, i, sSegmentTable[i]);
+    for (s32 i = 0; i < 16; i++) {
+        gSPSegment(tempGfxHead++, i, sSegmentTable[i]);
     }
+
+    gDisplayListHead = tempGfxHead;
 }
 #else
 void *segmented_to_virtual(const void *addr) {
diff --git a/src/game/debug_box.c b/src/game/debug_box.c
index f186c8e96..9281625fe 100644
--- a/src/game/debug_box.c
+++ b/src/game/debug_box.c
@@ -271,6 +271,8 @@ void visual_surface_display(Gfx **gfx, Vtx *verts, s32 iteration) {
     s32 count = VERTCOUNT;
     s32 ntx = 0;
 
+    Gfx *tempGfxHead = gDisplayListHead;
+
     while (vts > 0) {
         if (count == VERTCOUNT) {
             ntx = MIN(VERTCOUNT, vts);
@@ -298,6 +300,8 @@ void visual_surface_display(Gfx **gfx, Vtx *verts, s32 iteration) {
             count += 3;
         }
     }
+
+    gDisplayListHead = tempGfxHead;
 }
 
 s32 iterate_surface_count(s32 x, s32 z) {
diff --git a/src/game/game_init.c b/src/game/game_init.c
index 4b415138f..bec0cfd3e 100644
--- a/src/game/game_init.c
+++ b/src/game/game_init.c
@@ -156,32 +156,40 @@ void my_rsp_init(void) {
  * Initialize the z buffer for the current frame.
  */
 void init_z_buffer(s32 resetZB) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetDepthSource(gDisplayListHead++, G_ZS_PIXEL);
-    gDPSetDepthImage(gDisplayListHead++, gPhysicalZBuffer);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetColorImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH, gPhysicalZBuffer);
+    gDPSetDepthSource(tempGfxHead++, G_ZS_PIXEL);
+    gDPSetDepthImage(tempGfxHead++, gPhysicalZBuffer);
+
+    gDPSetColorImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH, gPhysicalZBuffer);
     if (!resetZB)
         return;
-    gDPSetFillColor(gDisplayListHead++,
+    gDPSetFillColor(tempGfxHead++,
                     GPACK_ZDZ(G_MAXFBZ, 0) << 16 | GPACK_ZDZ(G_MAXFBZ, 0));
 
-    gDPFillRectangle(gDisplayListHead++, 0, gBorderHeight, SCREEN_WIDTH - 1,
+    gDPFillRectangle(tempGfxHead++, 0, gBorderHeight, SCREEN_WIDTH - 1,
                      SCREEN_HEIGHT - 1 - gBorderHeight);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
  * Tells the RDP which of the three framebuffers it shall draw to.
  */
 void select_framebuffer(void) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
-    gDPSetColorImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH,
+    gDPPipeSync(tempGfxHead++);
+
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+    gDPSetColorImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH,
                      gPhysicalFramebuffers[sRenderingFramebuffer]);
-    gDPSetScissor(gDisplayListHead++, G_SC_NON_INTERLACE, 0, gBorderHeight, SCREEN_WIDTH,
+    gDPSetScissor(tempGfxHead++, G_SC_NON_INTERLACE, 0, gBorderHeight, SCREEN_WIDTH,
                   SCREEN_HEIGHT - gBorderHeight);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -189,19 +197,23 @@ void select_framebuffer(void) {
  * Information about the color argument: https://jrra.zone/n64/doc/n64man/gdp/gDPSetFillColor.htm
  */
 void clear_framebuffer(s32 color) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
-    gDPSetCycleType(gDisplayListHead++, G_CYC_FILL);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetFillColor(gDisplayListHead++, color);
-    gDPFillRectangle(gDisplayListHead++,
+    gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
+    gDPSetCycleType(tempGfxHead++, G_CYC_FILL);
+
+    gDPSetFillColor(tempGfxHead++, color);
+    gDPFillRectangle(tempGfxHead++,
                      GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), gBorderHeight,
                      GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(0) - 1, SCREEN_HEIGHT - gBorderHeight - 1);
 
-    gDPPipeSync(gDisplayListHead++);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -218,38 +230,46 @@ void clear_viewport(Vp *viewport, s32 color) {
     vpLrx = GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(SCREEN_WIDTH - vpLrx);
 #endif
 
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
-    gDPSetCycleType(gDisplayListHead++, G_CYC_FILL);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetFillColor(gDisplayListHead++, color);
-    gDPFillRectangle(gDisplayListHead++, vpUlx, vpUly, vpLrx, vpLry);
+    gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
+    gDPSetCycleType(tempGfxHead++, G_CYC_FILL);
 
-    gDPPipeSync(gDisplayListHead++);
+    gDPSetFillColor(tempGfxHead++, color);
+    gDPFillRectangle(tempGfxHead++, vpUlx, vpUly, vpLrx, vpLry);
 
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
+    gDPPipeSync(tempGfxHead++);
+
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
  * Draw the horizontal screen borders.
  */
 void draw_screen_borders(void) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetScissor(gDisplayListHead++, G_SC_NON_INTERLACE, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT);
-    gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
-    gDPSetCycleType(gDisplayListHead++, G_CYC_FILL);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetFillColor(gDisplayListHead++, GPACK_RGBA5551(0, 0, 0, 0) << 16 | GPACK_RGBA5551(0, 0, 0, 0));
+    gDPSetScissor(tempGfxHead++, G_SC_NON_INTERLACE, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT);
+    gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
+    gDPSetCycleType(tempGfxHead++, G_CYC_FILL);
+
+    gDPSetFillColor(tempGfxHead++, GPACK_RGBA5551(0, 0, 0, 0) << 16 | GPACK_RGBA5551(0, 0, 0, 0));
 
     if (gBorderHeight) {
-        gDPFillRectangle(gDisplayListHead++, GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), 0,
+        gDPFillRectangle(tempGfxHead++, GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), 0,
                         GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(0) - 1, gBorderHeight - 1);
-        gDPFillRectangle(gDisplayListHead++,
+        gDPFillRectangle(tempGfxHead++,
                         GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), SCREEN_HEIGHT - gBorderHeight,
                         GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(0) - 1, SCREEN_HEIGHT - 1);
     }
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
diff --git a/src/game/hud.c b/src/game/hud.c
index 75118b56b..88b8e7ca0 100644
--- a/src/game/hud.c
+++ b/src/game/hud.c
@@ -117,29 +117,37 @@ static struct CameraHUD sCameraHUD = { CAM_STATUS_NONE };
  * Renders a rgba16 16x16 glyph texture from a table list.
  */
 void render_hud_tex_lut(s32 x, s32 y, Texture *texture) {
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
-    gSPDisplayList(gDisplayListHead++, &dl_hud_img_load_tex_block);
-    gSPTextureRectangle(gDisplayListHead++, x << 2, y << 2, (x + 15) << 2, (y + 15) << 2,
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
+    gSPDisplayList(tempGfxHead++, &dl_hud_img_load_tex_block);
+    gSPTextureRectangle(tempGfxHead++, x << 2, y << 2, (x + 15) << 2, (y + 15) << 2,
                         G_TX_RENDERTILE, 0, 0, 4 << 10, 1 << 10);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
  * Renders a rgba16 8x8 glyph texture from a table list.
  */
 void render_hud_small_tex_lut(s32 x, s32 y, Texture *texture) {
-    gDPSetTile(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 0, 0, G_TX_LOADTILE, 0,
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPSetTile(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 0, 0, G_TX_LOADTILE, 0,
                 G_TX_WRAP | G_TX_NOMIRROR, G_TX_NOMASK, G_TX_NOLOD, G_TX_WRAP | G_TX_NOMIRROR, G_TX_NOMASK, G_TX_NOLOD);
-    gDPTileSync(gDisplayListHead++);
-    gDPSetTile(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 2, 0, G_TX_RENDERTILE, 0,
+    gDPTileSync(tempGfxHead++);
+    gDPSetTile(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 2, 0, G_TX_RENDERTILE, 0,
                 G_TX_CLAMP, 3, G_TX_NOLOD, G_TX_CLAMP, 3, G_TX_NOLOD);
-    gDPSetTileSize(gDisplayListHead++, G_TX_RENDERTILE, 0, 0, (8 - 1) << G_TEXTURE_IMAGE_FRAC, (8 - 1) << G_TEXTURE_IMAGE_FRAC);
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
-    gDPLoadSync(gDisplayListHead++);
-    gDPLoadBlock(gDisplayListHead++, G_TX_LOADTILE, 0, 0, 8 * 8 - 1, CALC_DXT(8, G_IM_SIZ_16b_BYTES));
-    gSPTextureRectangle(gDisplayListHead++, x << 2, y << 2, (x + 7) << 2, (y + 7) << 2, G_TX_RENDERTILE,
+    gDPSetTileSize(tempGfxHead++, G_TX_RENDERTILE, 0, 0, (8 - 1) << G_TEXTURE_IMAGE_FRAC, (8 - 1) << G_TEXTURE_IMAGE_FRAC);
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
+    gDPLoadSync(tempGfxHead++);
+    gDPLoadBlock(tempGfxHead++, G_TX_LOADTILE, 0, 0, 8 * 8 - 1, CALC_DXT(8, G_IM_SIZ_16b_BYTES));
+    gSPTextureRectangle(tempGfxHead++, x << 2, y << 2, (x + 7) << 2, (y + 7) << 2, G_TX_RENDERTILE,
                         0, 0, 4 << 10, 1 << 10);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -147,14 +155,17 @@ void render_hud_small_tex_lut(s32 x, s32 y, Texture *texture) {
  */
 void render_power_meter_health_segment(s16 numHealthWedges) {
     Texture *(*healthLUT)[] = segmented_to_virtual(&power_meter_health_segments_lut);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1,
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1,
                        (*healthLUT)[numHealthWedges - 1]);
-    gDPLoadSync(gDisplayListHead++);
-    gDPLoadBlock(gDisplayListHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
-    gSP1Triangle(gDisplayListHead++, 0, 1, 2, 0);
-    gSP1Triangle(gDisplayListHead++, 0, 2, 3, 0);
+    gDPLoadSync(tempGfxHead++);
+    gDPLoadBlock(tempGfxHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
+    gSP1Triangle(tempGfxHead++, 0, 1, 2, 0);
+    gSP1Triangle(tempGfxHead++, 0, 2, 3, 0);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -293,12 +304,16 @@ void render_hud_power_meter(void) {
 void render_breath_meter_segment(s16 numBreathWedges) {
     Texture *(*breathLUT)[];
     breathLUT = segmented_to_virtual(&breath_meter_segments_lut);
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, (*breathLUT)[numBreathWedges - 1]);
-    gDPLoadSync(gDisplayListHead++);
-    gDPLoadBlock(gDisplayListHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
-    gSP1Triangle(gDisplayListHead++, 0, 1, 2, 0);
-    gSP1Triangle(gDisplayListHead++, 0, 2, 3, 0);
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, (*breathLUT)[numBreathWedges - 1]);
+    gDPLoadSync(tempGfxHead++);
+    gDPLoadBlock(tempGfxHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
+    gSP1Triangle(tempGfxHead++, 0, 1, 2, 0);
+    gSP1Triangle(tempGfxHead++, 0, 2, 3, 0);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
diff --git a/src/game/puppycam2.c b/src/game/puppycam2.c
index bc9fc26cf..384d8af0c 100644
--- a/src/game/puppycam2.c
+++ b/src/game/puppycam2.c
@@ -286,19 +286,23 @@ static void puppycam_process_cutscene(void) {
 #define BLANK 0, 0, 0, ENVIRONMENT, 0, 0, 0, ENVIRONMENT
 
 static void puppycam_display_box(s32 x1, s32 y1, s32 x2, s32 y2, u8 r, u8 g, u8 b, u8 a) {
-    gDPSetCombineMode(gDisplayListHead++, BLANK, BLANK);
-    gDPSetCycleType(  gDisplayListHead++, G_CYC_1CYCLE);
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPSetCombineMode(tempGfxHead++, BLANK, BLANK);
+    gDPSetCycleType(  tempGfxHead++, G_CYC_1CYCLE);
     if (a !=255) {
-        gDPSetRenderMode(gDisplayListHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2);
+        gDPSetRenderMode(tempGfxHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2);
     } else {
-        gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF);
+        gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF);
     }
-    gDPSetEnvColor(   gDisplayListHead++, r, g, b, a);
-    gDPFillRectangle( gDisplayListHead++, x1, y1, x2, y2);
-    gDPPipeSync(      gDisplayListHead++);
-    gDPSetEnvColor(   gDisplayListHead++, 255, 255, 255, 255);
-    gDPSetCycleType(  gDisplayListHead++, G_CYC_1CYCLE);
-    gSPDisplayList(   gDisplayListHead++,dl_hud_img_end);
+    gDPSetEnvColor(   tempGfxHead++, r, g, b, a);
+    gDPFillRectangle( tempGfxHead++, x1, y1, x2, y2);
+    gDPPipeSync(      tempGfxHead++);
+    gDPSetEnvColor(   tempGfxHead++, 255, 255, 255, 255);
+    gDPSetCycleType(  tempGfxHead++, G_CYC_1CYCLE);
+    gSPDisplayList(   tempGfxHead++,dl_hud_img_end);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 //I actually took the time to redo this, properly. Lmao. Please don't bully me over this anymore :(
diff --git a/src/game/puppyprint.c b/src/game/puppyprint.c
index 9ef3b863f..5da36fdd4 100644
--- a/src/game/puppyprint.c
+++ b/src/game/puppyprint.c
@@ -522,12 +522,16 @@ void puppyprint_render_minimal(void) {
 }
 
 void render_coverage_map(void) {
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
-    gDPSetBlendColor(gDisplayListHead++, 0xFF, 0xFF, 0xFF, 0xFF);
-    gDPSetPrimDepth(gDisplayListHead++, 0xFFFF, 0xFFFF);
-    gDPSetDepthSource(gDisplayListHead++, G_ZS_PRIM);
-    gDPSetRenderMode(gDisplayListHead++, G_RM_VISCVG, G_RM_VISCVG2);
-    gDPFillRectangle(gDisplayListHead++, 0,0, SCREEN_WIDTH-1, SCREEN_HEIGHT-1);
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+    gDPSetBlendColor(tempGfxHead++, 0xFF, 0xFF, 0xFF, 0xFF);
+    gDPSetPrimDepth(tempGfxHead++, 0xFFFF, 0xFFFF);
+    gDPSetDepthSource(tempGfxHead++, G_ZS_PRIM);
+    gDPSetRenderMode(tempGfxHead++, G_RM_VISCVG, G_RM_VISCVG2);
+    gDPFillRectangle(tempGfxHead++, 0,0, SCREEN_WIDTH-1, SCREEN_HEIGHT-1);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 void puppycamera_debug_view(void) {
diff --git a/src/game/screen_transition.c b/src/game/screen_transition.c
index 2376b1f74..a59e25592 100644
--- a/src/game/screen_transition.c
+++ b/src/game/screen_transition.c
@@ -140,43 +140,47 @@ s32 render_textured_transition(s8 transTime, struct WarpTransitionData *transDat
 
 
     if (verts != NULL) {
+        Gfx *tempGfxHead = gDisplayListHead;
+
         make_tex_transition_vertices(verts, centerTransX, centerTransY, texTransRadius, transTexType);
-        gSPDisplayList(gDisplayListHead++, dl_proj_mtx_fullscreen);
+        gSPDisplayList(tempGfxHead++, dl_proj_mtx_fullscreen);
 
         u8 r = transData->red;
         u8 g = transData->green;
         u8 b = transData->blue;
-        gDPSetPrimColor(gDisplayListHead++, 0, 0, r, g, b, 255);
+        gDPSetPrimColor(tempGfxHead++, 0, 0, r, g, b, 255);
 
-        gDPSetCombineMode(gDisplayListHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
-        gDPSetRenderMode(gDisplayListHead++, G_RM_AA_OPA_SURF, G_RM_AA_OPA_SURF2);
+        gDPSetCombineMode(tempGfxHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
+        gDPSetRenderMode(tempGfxHead++, G_RM_AA_OPA_SURF, G_RM_AA_OPA_SURF2);
 
-        gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts), 8, 0);
-        gSPDisplayList(gDisplayListHead++, dl_transition_draw_filled_region);
-        gDPPipeSync(gDisplayListHead++);
+        gSPVertex(tempGfxHead++, VIRTUAL_TO_PHYSICAL(verts), 8, 0);
+        gSPDisplayList(tempGfxHead++, dl_transition_draw_filled_region);
+        gDPPipeSync(tempGfxHead++);
 
-        gDPSetCombineLERP(gDisplayListHead++, 0, 0, 0, PRIMITIVE, 0, 0, 0, TEXEL0, 
+        gDPSetCombineLERP(tempGfxHead++, 0, 0, 0, PRIMITIVE, 0, 0, 0, TEXEL0, 
                                               0, 0, 0, PRIMITIVE, 0, 0, 0, TEXEL0);
 
-        gDPSetRenderMode(gDisplayListHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
-        gDPSetTextureFilter(gDisplayListHead++, G_TF_BILERP);
+        gDPSetRenderMode(tempGfxHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
+        gDPSetTextureFilter(tempGfxHead++, G_TF_BILERP);
 
         switch (transTexType) {
             case TRANS_TYPE_MIRROR:
-                gDPLoadTextureBlock(gDisplayListHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 32, 64, 0,
+                gDPLoadTextureBlock(tempGfxHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 32, 64, 0,
                     G_TX_WRAP | G_TX_MIRROR, G_TX_WRAP | G_TX_MIRROR, 5, 6, G_TX_NOLOD, G_TX_NOLOD);
                 break;
             case TRANS_TYPE_CLAMP:
-                gDPLoadTextureBlock(gDisplayListHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 64, 64, 0,
+                gDPLoadTextureBlock(tempGfxHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 64, 64, 0,
                     G_TX_CLAMP, G_TX_CLAMP, 6, 6, G_TX_NOLOD, G_TX_NOLOD);
                 break;
         }
-        gSPTexture(gDisplayListHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_ON);
-        gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
-        gSPDisplayList(gDisplayListHead++, dl_draw_quad_verts_0123);
-        gSPTexture(gDisplayListHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_OFF);
-        gSPDisplayList(gDisplayListHead++, dl_screen_transition_end);
+        gSPTexture(tempGfxHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_ON);
+        gSPVertex(tempGfxHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
+        gSPDisplayList(tempGfxHead++, dl_draw_quad_verts_0123);
+        gSPTexture(tempGfxHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_OFF);
+        gSPDisplayList(tempGfxHead++, dl_screen_transition_end);
         sTransitionTextureAngle += transData->angleSpeed;
+
+        gDisplayListHead = tempGfxHead;
     }
     return set_and_reset_transition_fade_timer(transTime);
 }
@@ -197,17 +201,21 @@ s32 dl_transition_color(u8 transTime, struct WarpTransitionData *transData, u8 a
     Vtx *verts = vertex_transition_color();
 
     if (verts != NULL) {
+        Gfx *tempGfxHead = gDisplayListHead;
+
         u8 r = transData->red;
         u8 g = transData->green;
         u8 b = transData->blue;
-        gDPSetPrimColor(gDisplayListHead++, 0, 0, r, g, b, alpha);
+        gDPSetPrimColor(tempGfxHead++, 0, 0, r, g, b, alpha);
 
-        gSPDisplayList(gDisplayListHead++, dl_proj_mtx_fullscreen);
-        gDPSetCombineMode(gDisplayListHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
-        gDPSetRenderMode(gDisplayListHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
-        gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
-        gSPDisplayList(gDisplayListHead++, dl_draw_quad_verts_0123);
-        gSPDisplayList(gDisplayListHead++, dl_screen_transition_end);
+        gSPDisplayList(tempGfxHead++, dl_proj_mtx_fullscreen);
+        gDPSetCombineMode(tempGfxHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
+        gDPSetRenderMode(tempGfxHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
+        gSPVertex(tempGfxHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
+        gSPDisplayList(tempGfxHead++, dl_draw_quad_verts_0123);
+        gSPDisplayList(tempGfxHead++, dl_screen_transition_end);
+
+        gDisplayListHead = tempGfxHead;
     }
     return set_and_reset_transition_fade_timer(transTime);
 }

From 913cda93b80cd1686181d6ab29d00ea9a1ba179a Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Fri, 15 Dec 2023 23:57:19 -0500
Subject: [PATCH 07/23] Audio cleanup checkpoint: Large portion of synthesis.c
 taken care of + occasional audio optimizations (#710)

---
 src/audio/effects.c   |   4 +-
 src/audio/external.c  |   6 +-
 src/audio/internal.h  |  15 +-
 src/audio/playback.c  |  22 +-
 src/audio/seqplayer.c |  13 +-
 src/audio/synthesis.c | 801 +++++-------------------------------------
 6 files changed, 108 insertions(+), 753 deletions(-)

diff --git a/src/audio/effects.c b/src/audio/effects.c
index 9b635e287..ec2abc25d 100644
--- a/src/audio/effects.c
+++ b/src/audio/effects.c
@@ -3,7 +3,9 @@
 #include "effects.h"
 #include "load.h"
 #include "data.h"
+#include "external.h"
 #include "seqplayer.h"
+#include "game/game_init.h"
 #include "game/main.h"
 #include "engine/math_util.h"
 
@@ -66,7 +68,7 @@ static void sequence_channel_process_sound(struct SequenceChannel *seqChannel) {
     for (i = 0; i < 4; i++) {
         struct SequenceChannelLayer *layer = seqChannel->layers[i];
         if (layer != NULL && layer->enabled && layer->note != NULL) {
-            layer->noteFreqScale = layer->freqScale * seqChannel->freqScale * gConfig.audioFrequency;
+            layer->noteFreqScale = layer->freqScale * seqChannel->freqScale;
             layer->noteVelocity = layer->velocitySquare * channelVolume;
             layer->notePan = (layer->pan * panLayerWeight) + panFromChannel;
         }
diff --git a/src/audio/external.c b/src/audio/external.c
index cf33cd378..bed409f6e 100644
--- a/src/audio/external.c
+++ b/src/audio/external.c
@@ -1129,7 +1129,7 @@ static f32 get_sound_freq_scale(u8 bank, u8 item) {
 
     // Goes from 1 at the camera to 1 + 1/15 at AUDIO_MAX_DISTANCE (and continues rising
     // farther than that)
-    return amount / 15.0f + 1.0f;
+    return (amount / 15.0f + 1.0f) * gConfig.audioFrequency;
 }
 
 /**
@@ -1311,7 +1311,7 @@ static void update_game_sound(void) {
 #else
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->volume = 1.0f;
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->pan = 0.5f;
-                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = 1.0f;
+                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = gConfig.audioFrequency;
 #endif
                             break;
                         case SOUND_BANK_ACTION:
@@ -1477,7 +1477,7 @@ static void update_game_sound(void) {
 #else
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->volume = 1.0f;
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->pan = 0.5f;
-                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = 1.0f;
+                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = gConfig.audioFrequency;
 #endif
                             break;
                         case SOUND_BANK_ACTION:
diff --git a/src/audio/internal.h b/src/audio/internal.h
index e9c65bc93..a70d55660 100644
--- a/src/audio/internal.h
+++ b/src/audio/internal.h
@@ -706,17 +706,14 @@ struct Note {
     /*0x8C*/ struct AudioListItem listItem;
     /*0x9C*/ s16 curVolLeft; // Q1.15, but will always be non-negative
     /*0x9E*/ s16 curVolRight; // Q1.15, but will always be non-negative
-    /*0xA0*/ s16 reverbVolShifted; // Q1.15
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-    /*0xA2*/ u16 headsetPanRight;
-    /*0xA4*/ u16 headsetPanLeft;
-    /*0xA6*/ u16 prevHeadsetPanRight;
-    /*0xA8*/ u16 prevHeadsetPanLeft;
-    /*    */ u8 align16Padding[0x06];
-#else
-    /*    */ u8 align16Padding[0x0E];
+    /*0xA0*/ u16 headsetPanRight;
+    /*0xA2*/ u16 headsetPanLeft;
+    /*0xA4*/ u16 prevHeadsetPanRight;
+    /*0xA6*/ u16 prevHeadsetPanLeft;
+    /*    */ u8 align16Padding[0x08];
 #endif
-}; // size = 0xB0
+}; // size = 0xA0, 0xB0
 #endif
 
 struct NoteSynthesisBuffers {
diff --git a/src/audio/playback.c b/src/audio/playback.c
index 50622a4b9..b36faeeb9 100644
--- a/src/audio/playback.c
+++ b/src/audio/playback.c
@@ -359,7 +359,6 @@ void process_notes(void) {
 #endif
     u8 bookOffset;
 #endif
-    struct NoteAttributes *attributes;
 #if defined(VERSION_JP) || defined(VERSION_US)
     struct AudioListItem *it;
 #endif
@@ -580,17 +579,18 @@ void process_notes(void) {
 
             adsr_update(note);
             note_vibrato_update(note);
-            attributes = &note->attributes;
             if (note->priority == NOTE_PRIORITY_STOPPING) {
+                struct NoteAttributes *attributes = &note->attributes;
                 frequency = attributes->freqScale;
                 velocity = attributes->velocity;
                 pan = attributes->pan;
                 reverbVol = attributes->reverbVol;
             } else {
-                frequency = note->parentLayer->noteFreqScale;
-                velocity = note->parentLayer->noteVelocity;
-                pan = note->parentLayer->notePan;
-                reverbVol = note->parentLayer->seqChannel->reverbVol;
+                struct SequenceChannelLayer *parentLayer = note->parentLayer;
+                frequency = parentLayer->noteFreqScale;
+                velocity = parentLayer->noteVelocity;
+                pan = parentLayer->notePan;
+                reverbVol = parentLayer->seqChannel->reverbVol;
             }
 
             scale = note->adsrVolScale;
@@ -875,14 +875,8 @@ void build_synthetic_wave(struct Note *note, struct SequenceChannelLayer *seqLay
     // Repeat sample
     for (offset = note->sampleCount; offset < 0x40; offset += note->sampleCount) {
         lim = note->sampleCount;
-        if (offset < 0 || offset > 0) {
-            for (j = 0; j < lim; j++) {
-                note->synthesisBuffers->samples[offset + j] = note->synthesisBuffers->samples[j];
-            }
-        } else {
-            for (j = 0; j < lim; j++) {
-                note->synthesisBuffers->samples[offset + j] = note->synthesisBuffers->samples[j];
-            }
+        for (j = 0; j < lim; j++) {
+            note->synthesisBuffers->samples[offset + j] = note->synthesisBuffers->samples[j];
         }
     }
 
diff --git a/src/audio/seqplayer.c b/src/audio/seqplayer.c
index 1e139b2b1..65a948be3 100644
--- a/src/audio/seqplayer.c
+++ b/src/audio/seqplayer.c
@@ -6,6 +6,7 @@
 #include "heap.h"
 #include "load.h"
 #include "seqplayer.h"
+#include "game/main.h"
 
 #ifdef VERSION_SH
 void seq_channel_layer_process_script_part1(struct SequenceChannelLayer *layer);
@@ -44,7 +45,7 @@ void sequence_channel_init(struct SequenceChannel *seqChannel) {
     seqChannel->scriptState.depth = 0;
     seqChannel->volume = 1.0f;
     seqChannel->volumeScale = 1.0f;
-    seqChannel->freqScale = 1.0f;
+    seqChannel->freqScale = gConfig.audioFrequency;
     seqChannel->pan = 0.5f;
     seqChannel->panChannelWeight = 1.0f;
     seqChannel->noteUnused = NULL;
@@ -77,7 +78,7 @@ void sequence_channel_init(struct SequenceChannel *seqChannel) {
 #if defined(VERSION_EU) || defined(VERSION_SH)
     seqChannel->volume = 1.0f;
     seqChannel->volumeScale = 1.0f;
-    seqChannel->freqScale = 1.0f;
+    seqChannel->freqScale = gConfig.audioFrequency;
 #endif
 
     for (i = 0; i < 8; i++) {
@@ -1684,7 +1685,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 
                     case 0xde: // chan_freqscale; pitch bend using raw frequency multiplier N/2^15 (N is u16)
                         sp5A = m64_read_s16(state);
-                        seqChannel->freqScale = FLOAT_CAST(sp5A) / 32768.0f;
+                        seqChannel->freqScale = FLOAT_CAST(sp5A) / 32768.0f * gConfig.audioFrequency;
 #if defined(VERSION_EU) || defined(VERSION_SH)
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
 #endif
@@ -1697,7 +1698,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 #else
                         cmd = m64_read_u8(state) + 127;
 #endif
-                        seqChannel->freqScale = gPitchBendFrequencyScale[cmd];
+                        seqChannel->freqScale = gPitchBendFrequencyScale[cmd] * gConfig.audioFrequency;
 #if defined(VERSION_EU) || defined(VERSION_SH)
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
 #endif
@@ -1706,7 +1707,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 #ifdef VERSION_SH
                     case 0xee:
                         cmd = m64_read_u8(state) + 0x80;
-                        seqChannel->freqScale = unk_sh_data_1[cmd];
+                        seqChannel->freqScale = unk_sh_data_1[cmd] * gConfig.audioFrequency;
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
                         break;
 #endif
@@ -1955,7 +1956,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
                         seqChannel->vibratoRateTarget = 0;
                         seqChannel->vibratoRateStart = 0;
                         seqChannel->vibratoRateChangeDelay = 0;
-                        seqChannel->freqScale = 1.0f;
+                        seqChannel->freqScale = gConfig.audioFrequency;
                         break;
 
                     case 0xe9: // chan_setnotepriority
diff --git a/src/audio/synthesis.c b/src/audio/synthesis.c
index 6d027440e..4b866a726 100644
--- a/src/audio/synthesis.c
+++ b/src/audio/synthesis.c
@@ -12,7 +12,6 @@
 #include "game/debug.h"
 #include "engine/math_util.h"
 
-
 #define DMEM_ADDR_TEMP 0x0
 #define DMEM_ADDR_RESAMPLED 0x20
 #define DMEM_ADDR_RESAMPLED2 0x160
@@ -61,7 +60,6 @@ s32 betterReverbRevIndex; // This one is okay to adjust whenever
 s32 betterReverbGainIndex; // This one is okay to adjust whenever
 #endif
 
-
 struct VolumeChange {
     u16 sourceLeft;
     u16 sourceRight;
@@ -69,33 +67,20 @@ struct VolumeChange {
     u16 targetRight;
 };
 
-u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateIndex);
-#ifdef VERSION_EU
-u64 *synthesis_process_note(struct Note *note, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s16 *aiBuf, s32 bufLen, u64 *cmd);
-u64 *load_wave_samples(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s32 nSamplesToLoad);
-u64 *process_envelope(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s32 nSamples, u16 inBuf, s32 headsetPanSettings, u32 flags);
-u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *note, s32 bufLen, s32 flags, s32 leftRight);
-#else
-u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd);
+u64 *synthesis_do_one_audio_update(s16 *aiBuf, u32 bufLen, u64 *cmd, s32 updateIndex);
+u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd);
 u64 *load_wave_samples(u64 *cmd, struct Note *note, s32 nSamplesToLoad);
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf, s32 headsetPanSettings);
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf,
-                            s32 headsetPanSettings, struct VolumeChange *vol);
 u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32 flags, s32 leftRight);
 #else
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf);
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf, struct VolumeChange *vol);
-#endif
 #endif
 
-#ifdef VERSION_EU
-struct SynthesisReverb gSynthesisReverbs[4];
-u8 sAudioSynthesisPad[0x10];
-#else
 struct SynthesisReverb gSynthesisReverb;
-u8 sAudioSynthesisPad[0x20];
-#endif
+
+f32 *currentRampingTableLeft;
+f32 *currentRampingTableRight;
 
 #ifdef BETTER_REVERB
 static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 channel) {
@@ -227,74 +212,6 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {
 }
 #endif
 
-#ifdef VERSION_EU
-s16 gVolume;
-s8 gUseReverb;
-s8 gNumSynthesisReverbs;
-struct NoteSubEu *gNoteSubsEu;
-#endif
-
-#ifdef VERSION_EU
-f32 gLeftVolRampings[3][1024];
-f32 gRightVolRampings[3][1024];
-f32 *gCurrentLeftVolRamping; // Points to any of the three left buffers above
-f32 *gCurrentRightVolRamping; // Points to any of the three right buffers above
-
-u8 audioString1[] = "pitch %x: delaybytes %d : olddelay %d\n";
-u8 audioString2[] = "cont %x: delaybytes %d : olddelay %d\n";
-#endif
-
-#ifdef VERSION_EU
-// Equivalent functionality as the US/JP version,
-// just that the reverb structure is chosen from an array with index
-void prepare_reverb_ring_buffer(s32 chunkLen, u32 updateIndex, s32 reverbIndex) {
-    struct ReverbRingBufferItem *item;
-    struct SynthesisReverb *reverb = &gSynthesisReverbs[reverbIndex];
-    s32 srcPos, dstPos;
-    s32 nSamples;
-    s32 excessiveSamples;
-    if (reverb->downsampleRate != 1) {
-        if (reverb->framesLeftToIgnore == 0) {
-            // Now that the RSP has finished, downsample the samples produced two frames ago by skipping
-            // samples.
-            item = &reverb->items[reverb->curFrame][updateIndex];
-
-            // Touches both left and right since they are adjacent in memory
-            osInvalDCache(item->toDownsampleLeft, DEFAULT_LEN_2CH);
-
-            for (srcPos = 0, dstPos = 0; dstPos < item->lengthA / 2;
-                 srcPos += reverb->downsampleRate, dstPos++) {
-                reverb->ringBuffer.left[item->startPos + dstPos] = item->toDownsampleLeft[srcPos];
-                reverb->ringBuffer.right[item->startPos + dstPos] = item->toDownsampleRight[srcPos];
-            }
-            for (dstPos = 0; dstPos < item->lengthB / 2; srcPos += reverb->downsampleRate, dstPos++) {
-                reverb->ringBuffer.left[dstPos] = item->toDownsampleLeft[srcPos];
-                reverb->ringBuffer.right[dstPos] = item->toDownsampleRight[srcPos];
-            }
-        }
-    }
-
-    item = &reverb->items[reverb->curFrame][updateIndex];
-    nSamples = chunkLen / reverb->downsampleRate;
-    excessiveSamples = (nSamples + reverb->nextRingBufferPos) - reverb->bufSizePerChannel;
-    if (excessiveSamples < 0) {
-        // There is space in the ring buffer before it wraps around
-        item->lengthA = nSamples * 2;
-        item->lengthB = 0;
-        item->startPos = (s32) reverb->nextRingBufferPos;
-        reverb->nextRingBufferPos += nSamples;
-    } else {
-        // Ring buffer wrapped around
-        item->lengthA = (nSamples - excessiveSamples) * 2;
-        item->lengthB = excessiveSamples * 2;
-        item->startPos = reverb->nextRingBufferPos;
-        reverb->nextRingBufferPos = excessiveSamples;
-    }
-    // These fields are never read later
-    item->numSamplesAfterDownsampling = nSamples;
-    item->chunkLen = chunkLen;
-}
-#else
 void prepare_reverb_ring_buffer(s32 chunkLen, u32 updateIndex) {
     struct ReverbRingBufferItem *item;
     s32 srcPos, dstPos;
@@ -406,108 +323,10 @@ void prepare_reverb_ring_buffer(s32 chunkLen, u32 updateIndex) {
     item->numSamplesAfterDownsampling = numSamplesAfterDownsampling;
     item->chunkLen = chunkLen;
 }
-#endif
 
-#ifdef VERSION_EU
-u64 *synthesis_load_reverb_ring_buffer(u64 *cmd, u16 addr, u16 srcOffset, s32 len, s32 reverbIndex) {
-    aSetBuffer(cmd++, 0, addr, 0, len);
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.left[srcOffset]));
-
-    aSetBuffer(cmd++, 0, addr + DEFAULT_LEN_1CH, 0, len);
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.right[srcOffset]));
-
-    return cmd;
-}
-#endif
-
-#ifdef VERSION_EU
-u64 *synthesis_save_reverb_ring_buffer(u64 *cmd, u16 addr, u16 destOffset, s32 len, s32 reverbIndex) {
-    aSetBuffer(cmd++, 0, 0, addr, len);
-    aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.left[destOffset]));
-
-    aSetBuffer(cmd++, 0, 0, addr + DEFAULT_LEN_1CH, len);
-    aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.right[destOffset]));
-
-    return cmd;
-}
-#endif
-
-#ifdef VERSION_EU
-void synthesis_load_note_subs_eu(s32 updateIndex) {
-    struct NoteSubEu *src;
-    struct NoteSubEu *dest;
-    s32 i;
-
-    for (i = 0; i < gMaxSimultaneousNotes; i++) {
-        src = &gNotes[i].noteSubEu;
-        dest = &gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i];
-        if (src->enabled) {
-            *dest = *src;
-            src->needsInit = FALSE;
-        } else {
-            dest->enabled = FALSE;
-        }
-    }
-}
-#endif
-
-#ifdef VERSION_EU
-// TODO: (Scrub C) pointless mask and whitespace
-u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
-    s32 i, j;
-    f32 *leftVolRamp;
-    f32 *rightVolRamp;
-    u32 *aiBufPtr;
-    u64 *cmd = cmdBuf;
-    s32 chunkLen;
-    s32 nextVolRampTable = 0;
-
-    for (i = gAudioBufferParameters.updatesPerFrame; i > 0; i--) {
-        process_sequences(i - 1);
-        synthesis_load_note_subs_eu(gAudioBufferParameters.updatesPerFrame - i);
-    }
-    aSegment(cmd++, 0, 0);
-    aiBufPtr = (u32 *) aiBuf;
-    for (i = gAudioBufferParameters.updatesPerFrame; i > 0; i--) {
-        if (i == 1) {
-            // self-assignment has no affect when added here, could possibly simplify a macro definition
-            chunkLen = bufLen;
-            leftVolRamp = gLeftVolRampings[nextVolRampTable];
-            rightVolRamp = gRightVolRampings[nextVolRampTable & 0xFFFFFFFF];
-        } else {
-            if (bufLen / i >= gAudioBufferParameters.samplesPerUpdateMax) {
-                chunkLen = gAudioBufferParameters.samplesPerUpdateMax; nextVolRampTable = 2; leftVolRamp = gLeftVolRampings[2]; rightVolRamp = gRightVolRampings[2];
-            } else if (bufLen / i <= gAudioBufferParameters.samplesPerUpdateMin) {
-                chunkLen = gAudioBufferParameters.samplesPerUpdateMin; nextVolRampTable = 0; leftVolRamp = gLeftVolRampings[0]; rightVolRamp = gRightVolRampings[0];
-            } else {
-                chunkLen = gAudioBufferParameters.samplesPerUpdate; nextVolRampTable = 1; leftVolRamp = gLeftVolRampings[1]; rightVolRamp = gRightVolRampings[1];
-            }
-        }
-        gCurrentLeftVolRamping = leftVolRamp;
-        gCurrentRightVolRamping = rightVolRamp;
-        for (j = 0; j < gNumSynthesisReverbs; j++) {
-            if (gSynthesisReverbs[j].useReverb) {
-                prepare_reverb_ring_buffer(chunkLen, gAudioBufferParameters.updatesPerFrame - i, j);
-            }
-        }
-        cmd = synthesis_do_one_audio_update((s16 *) aiBufPtr, chunkLen, cmd, gAudioBufferParameters.updatesPerFrame - i);
-        bufLen -= chunkLen;
-        aiBufPtr += chunkLen;
-    }
-
-    for (j = 0; j < gNumSynthesisReverbs; j++) {
-        if (gSynthesisReverbs[j].framesLeftToIgnore != 0) {
-            gSynthesisReverbs[j].framesLeftToIgnore--;
-        }
-        gSynthesisReverbs[j].curFrame ^= 1;
-    }
-    *writtenCmds = cmd - cmdBuf;
-    return cmd;
-}
-#else
 // bufLen will be divisible by 16
 u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
-    s32 chunkLen;
+    u32 chunkLen;
     s32 i;
     u32 *aiBufPtr = (u32 *) aiBuf;
     u64 *cmd = cmdBuf + 1;
@@ -567,7 +386,7 @@ u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
         if (gSynthesisReverb.useReverb) {
             prepare_reverb_ring_buffer(chunkLen, gAudioUpdatesPerFrame - i);
         }
-        cmd = synthesis_do_one_audio_update((s16 *) aiBufPtr, chunkLen, cmd, gAudioUpdatesPerFrame - i);
+        cmd = synthesis_do_one_audio_update((s16 *) aiBufPtr, chunkLen * 2, cmd, gAudioUpdatesPerFrame - i);
 
         AUDIO_PROFILER_COMPLETE_AND_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB, PROFILER_TIME_SUB_AUDIO_SYNTHESIS, PROFILER_TIME_SUB_AUDIO_UPDATE);
 
@@ -581,152 +400,8 @@ u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
     *writtenCmds = cmd - cmdBuf;
     return cmd;
 }
-#endif
 
-
-#ifdef VERSION_EU
-u64 *synthesis_resample_and_mix_reverb(u64 *cmd, s32 bufLen, s16 reverbIndex, s16 updateIndex) {
-    struct ReverbRingBufferItem *item;
-    s16 startPad;
-    s16 paddedLengthA;
-
-    item = &gSynthesisReverbs[reverbIndex].items[gSynthesisReverbs[reverbIndex].curFrame][updateIndex];
-
-    aClearBuffer(cmd++, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
-    if (gSynthesisReverbs[reverbIndex].downsampleRate == 1) {
-        cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH, item->startPos, item->lengthA, reverbIndex);
-        if (item->lengthB != 0) {
-            cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH + item->lengthA, 0, item->lengthB, reverbIndex);
-        }
-        aSetBuffer(cmd++, 0, 0, 0, DEFAULT_LEN_2CH);
-        aMix(cmd++, 0, 0x7fff, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH);
-        aMix(cmd++, 0, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH);
-    } else {
-        startPad = (item->startPos & 0x7) * 2;
-        paddedLengthA = ALIGN16(startPad + item->lengthA);
-
-        cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED, (item->startPos - startPad / 2), DEFAULT_LEN_1CH, reverbIndex);
-        if (item->lengthB != 0) {
-            cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED + paddedLengthA, 0, DEFAULT_LEN_1CH - paddedLengthA, reverbIndex);
-        }
-
-        aSetBuffer(cmd++, 0, DMEM_ADDR_RESAMPLED + startPad, DMEM_ADDR_WET_LEFT_CH, bufLen * 2);
-        aResample(cmd++, gSynthesisReverbs[reverbIndex].resampleFlags, gSynthesisReverbs[reverbIndex].resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverbs[reverbIndex].resampleStateLeft));
-
-        aSetBuffer(cmd++, 0, DMEM_ADDR_RESAMPLED2 + startPad, DMEM_ADDR_WET_RIGHT_CH, bufLen * 2);
-        aResample(cmd++, gSynthesisReverbs[reverbIndex].resampleFlags, gSynthesisReverbs[reverbIndex].resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverbs[reverbIndex].resampleStateRight));
-
-        aSetBuffer(cmd++, 0, 0, 0, DEFAULT_LEN_2CH);
-        aMix(cmd++, 0, 0x7fff, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH);
-        aMix(cmd++, 0, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH);
-    }
-    return cmd;
-}
-
-u64 *synthesis_save_reverb_samples(u64 *cmd, s16 reverbIndex, s16 updateIndex) {
-    struct ReverbRingBufferItem *item;
-
-    item = &gSynthesisReverbs[reverbIndex].items[gSynthesisReverbs[reverbIndex].curFrame][updateIndex];
-    if (gSynthesisReverbs[reverbIndex].useReverb) {
-        switch (gSynthesisReverbs[reverbIndex].downsampleRate) {
-            case 1:
-                // Put the oldest samples in the ring buffer into the wet channels
-                cmd = synthesis_save_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH, item->startPos, item->lengthA, reverbIndex);
-                if (item->lengthB != 0) {
-                    // Ring buffer wrapped
-                    cmd = synthesis_save_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH + item->lengthA, 0, item->lengthB, reverbIndex);
-                }
-                break;
-
-            default:
-                // Downsampling is done later by CPU when RSP is done, therefore we need to have double
-                // buffering. Left and right buffers are adjacent in memory.
-                aSetBuffer(cmd++, 0, 0, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
-                aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(gSynthesisReverbs[reverbIndex].items[gSynthesisReverbs[reverbIndex].curFrame][updateIndex].toDownsampleLeft));
-                gSynthesisReverbs[reverbIndex].resampleFlags = 0;
-                break;
-        }
-    }
-    return cmd;
-}
-#endif
-
-#ifdef VERSION_EU
-u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateIndex) {
-    struct NoteSubEu *noteSubEu;
-    u8 noteIndices[56];
-    s32 temp;
-    s32 i;
-    s16 j;
-    s16 notePos = 0;
-
-    if (gNumSynthesisReverbs == 0) {
-        for (i = 0; i < gMaxSimultaneousNotes; i++) {
-            if (gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i].enabled) {
-                noteIndices[notePos++] = i;
-            }
-        }
-    } else {
-        for (j = 0; j < gNumSynthesisReverbs; j++) {
-            for (i = 0; i < gMaxSimultaneousNotes; i++) {
-                noteSubEu = &gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i];
-                if (noteSubEu->enabled && j == noteSubEu->reverbIndex) {
-                    noteIndices[notePos++] = i;
-                }
-            }
-        }
-
-        for (i = 0; i < gMaxSimultaneousNotes; i++) {
-            noteSubEu = &gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i];
-            if (noteSubEu->enabled && noteSubEu->reverbIndex >= gNumSynthesisReverbs) {
-                noteIndices[notePos++] = i;
-            }
-        }
-    }
-    aClearBuffer(cmd++, DMEM_ADDR_LEFT_CH, DEFAULT_LEN_2CH);
-    i = 0;
-    for (j = 0; j < gNumSynthesisReverbs; j++) {
-        gUseReverb = gSynthesisReverbs[j].useReverb;
-        if (gUseReverb) {
-            cmd = synthesis_resample_and_mix_reverb(cmd, bufLen, j, updateIndex);
-        }
-        for (; i < notePos; i++) {
-            temp = updateIndex * gMaxSimultaneousNotes;
-            if (j == gNoteSubsEu[temp + noteIndices[i]].reverbIndex) {
-                cmd = synthesis_process_note(&gNotes[noteIndices[i]],
-                                             &gNoteSubsEu[temp + noteIndices[i]],
-                                             &gNotes[noteIndices[i]].synthesisState,
-                                             aiBuf, bufLen, cmd);
-                continue;
-            } else {
-                break;
-            }
-        }
-        if (gSynthesisReverbs[j].useReverb) {
-            cmd = synthesis_save_reverb_samples(cmd, j, updateIndex);
-        }
-    }
-    for (; i < notePos; i++) {
-        temp = updateIndex * gMaxSimultaneousNotes;
-        if (IS_BANK_LOAD_COMPLETE(gNoteSubsEu[temp + noteIndices[i]].bankId)) {
-            cmd = synthesis_process_note(&gNotes[noteIndices[i]],
-                                         &gNoteSubsEu[temp + noteIndices[i]],
-                                         &gNotes[noteIndices[i]].synthesisState,
-                                         aiBuf, bufLen, cmd);
-        } else {
-            gAudioErrorFlags = (gNoteSubsEu[temp + noteIndices[i]].bankId + (i << 8)) + 0x10000000;
-        }
-    }
-
-    temp = bufLen * 2;
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, temp);
-    aInterleave(cmd++, DMEM_ADDR_LEFT_CH, DMEM_ADDR_RIGHT_CH);
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, temp * 2);
-    aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(aiBuf));
-    return cmd;
-}
-#else
-u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateIndex) {
+u64 *synthesis_do_one_audio_update(s16 *aiBuf, u32 bufLen, u64 *cmd, s32 updateIndex) {
     s16 ra;
     s16 t4;
     struct ReverbRingBufferItem *v1;
@@ -766,9 +441,9 @@ u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateI
                 // Ring buffer wrapped
                 aSetLoadBufferPair(cmd++, ra, 0);
             }
-            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH, bufLen << 1);
+            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH, bufLen);
             aResample(cmd++, gSynthesisReverb.resampleFlags, (u16) gSynthesisReverb.resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.resampleStateLeft));
-            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_RIGHT_CH, DMEM_ADDR_RIGHT_CH, bufLen << 1);
+            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_RIGHT_CH, DMEM_ADDR_RIGHT_CH, bufLen);
             aResample(cmd++, gSynthesisReverb.resampleFlags, (u16) gSynthesisReverb.resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.resampleStateRight));
             aDMEMMove(cmd++, DMEM_ADDR_LEFT_CH, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
             aSetBuffer(cmd++, 0, 0, 0, DEFAULT_LEN_2CH);
@@ -795,57 +470,28 @@ u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateI
     }
     return cmd;
 }
-#endif
 
-#ifdef VERSION_EU
-// Processes just one note, not all
-u64 *synthesis_process_note(struct Note *note, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, UNUSED s16 *aiBuf, s32 bufLen, u64 *cmd) {
-#else
-u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
+u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd) {
     s32 noteIndex;                           // sp174
     struct Note *note;                       // s7
-#endif
     struct AudioBankSample *audioBookSample; // sp164, sp138
     struct AdpcmLoop *loopInfo;              // sp160, sp134
     s16 *curLoadedBook = NULL;               // sp154, sp130
-#ifndef VERSION_EU
-    u16 resamplingRateFixedPoint;            // sp5c, sp11A
-#endif
     s32 noteFinished;                        // 150 t2, sp124
     s32 restart;                             // 14c t3, sp120
     s32 flags;                               // sp148, sp11C
-#ifdef VERSION_EU
-    u16 resamplingRateFixedPoint;            // sp5c, sp11A
-#endif
-    UNUSED s32 tempBufLen;
     s32 sp130 = 0;  //sp128, sp104
     s32 nAdpcmSamplesProcessed; // signed required for US
     s32 t0;
-#ifdef VERSION_EU
     u8 *sampleAddr;                          // sp120, spF4
     s32 s6;
-#else
-    s32 s6;
-    u8 *sampleAddr;                          // sp120, spF4
-#endif
 
-#ifdef VERSION_EU
-    s32 samplesLenAdjusted; // 108,      spEC
-    // Might have been used to store (samplesLenFixedPoint >> 0x10), but doing so causes strange
+    // Might have been used to store (samplesLenFixedPoint >> 16), but doing so causes strange
     // behavior with the break near the end of the loop, causing US and JP to need a goto instead
-    UNUSED s32 samplesLenInt;
-    s32 endPos;             // sp110,    spE4
-    s32 nSamplesToProcess;  // sp10c/a0, spE0
-    s32 s2;
-#else
-    // Might have been used to store (samplesLenFixedPoint >> 0x10), but doing so causes strange
-    // behavior with the break near the end of the loop, causing US and JP to need a goto instead
-    UNUSED s32 samplesLenInt;
     s32 samplesLenAdjusted; // 108
     s32 s2;
     s32 endPos;             // sp110,    spE4
     s32 nSamplesToProcess;  // sp10c/a0, spE0
-#endif
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
     s32 leftRight;
@@ -856,68 +502,49 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
     u32 samplesLenFixedPoint;    // v1_1
     s32 nSamplesInThisIteration; // v1_2
     u32 a3;
-#ifndef VERSION_EU
-    s32 t9;
-#endif
     u8 *v0_2;
     s32 nParts;                 // spE8, spBC
     s32 curPart;                // spE4, spB8
 
-#ifndef VERSION_EU
     f32 resamplingRate; // f12
-#endif
     s32 temp;
 
-#ifdef VERSION_EU
     s32 s5Aligned;
-#endif
     s32 resampledTempLen;                    // spD8, spAC
     u16 noteSamplesDmemAddrBeforeResampling = 0; // spD6, spAA
+    u16 resamplingRateFixedPoint;            // sp5c, sp11A
 
+    switch (bufLen) {
+        case (128 * 2):
+            currentRampingTableLeft = gVolRampingLhs128;
+            currentRampingTableRight = gVolRampingRhs128;
+            break;
+        case (144 * 2):
+            currentRampingTableLeft = gVolRampingLhs144;
+            currentRampingTableRight = gVolRampingRhs144;
+            break;
+        case (136 * 2):
+        default:
+            currentRampingTableLeft = gVolRampingLhs136;
+            currentRampingTableRight = gVolRampingRhs136;
+            break;
+    }
 
-#ifndef VERSION_EU
     for (noteIndex = 0; noteIndex < gMaxSimultaneousNotes; noteIndex++) {
         note = &gNotes[noteIndex];
-#ifdef VERSION_US
         //! This function requires note->enabled to be volatile, but it breaks other functions like note_enable.
         //! Casting to a struct with just the volatile bitfield works, but there may be a better way to match.
         if (((struct vNote *)note)->enabled && !IS_BANK_LOAD_COMPLETE(note->bankId)) {
-#else
-        if (!IS_BANK_LOAD_COMPLETE(note->bankId)) {
-#endif
             gAudioErrorFlags = (note->bankId << 8) + noteIndex + 0x1000000;
         } else if (((struct vNote *)note)->enabled) {
-#else
-        if (note->noteSubEu.enabled == FALSE) {
-            return cmd;
-        } else {
-#endif
             flags = 0;
-#ifdef VERSION_EU
-            tempBufLen = bufLen;
-#endif
 
-#ifdef VERSION_EU
-            if (noteSubEu->needsInit == TRUE) {
-#else
             if (note->needsInit == TRUE) {
-#endif
                 flags = A_INIT;
-#ifndef VERSION_EU
                 note->samplePosInt = 0;
                 note->samplePosFrac = 0;
-#else
-                synthesisState->restart = FALSE;
-                synthesisState->samplePosInt = 0;
-                synthesisState->samplePosFrac = 0;
-                synthesisState->curVolLeft = 1;
-                synthesisState->curVolRight = 1;
-                synthesisState->prevHeadsetPanRight = 0;
-                synthesisState->prevHeadsetPanLeft = 0;
-#endif
             }
 
-#ifndef VERSION_EU
             if (note->frequency < 2.0f) {
                 nParts = 1;
                 if (note->frequency > 1.99996f) {
@@ -934,39 +561,20 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
             }
 
             resamplingRateFixedPoint = (u16)(s32)(resamplingRate * 32768.0f);
-            samplesLenFixedPoint = note->samplePosFrac + (resamplingRateFixedPoint * bufLen) * 2;
+            samplesLenFixedPoint = note->samplePosFrac + (resamplingRateFixedPoint * bufLen);
             note->samplePosFrac = samplesLenFixedPoint & 0xFFFF; // 16-bit store, can't reuse
-#else
-            resamplingRateFixedPoint = noteSubEu->resamplingRateFixedPoint;
-            nParts = noteSubEu->hasTwoAdpcmParts + 1;
-            samplesLenFixedPoint = (resamplingRateFixedPoint * tempBufLen * 2) + synthesisState->samplePosFrac;
-            synthesisState->samplePosFrac = samplesLenFixedPoint & 0xFFFF;
-#endif
 
-#ifdef VERSION_EU
-            if (noteSubEu->isSyntheticWave) {
-                cmd = load_wave_samples(cmd, noteSubEu, synthesisState, samplesLenFixedPoint >> 0x10);
-                noteSamplesDmemAddrBeforeResampling = (synthesisState->samplePosInt * 2) + DMEM_ADDR_UNCOMPRESSED_NOTE;
-                synthesisState->samplePosInt += samplesLenFixedPoint >> 0x10;
-            }
-#else
             if (note->sound == NULL) {
                 // A wave synthesis note (not ADPCM)
 
-                cmd = load_wave_samples(cmd, note, samplesLenFixedPoint >> 0x10);
+                cmd = load_wave_samples(cmd, note, samplesLenFixedPoint >> 16);
                 noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_UNCOMPRESSED_NOTE + note->samplePosInt * 2;
-                note->samplePosInt += (samplesLenFixedPoint >> 0x10);
+                note->samplePosInt += (samplesLenFixedPoint >> 16);
                 flags = 0;
             }
-#endif
             else {
                 // ADPCM note
-
-#ifdef VERSION_EU
-                audioBookSample = noteSubEu->sound.audioBankSound->sample;
-#else
                 audioBookSample = note->sound->sample;
-#endif
 
                 loopInfo = audioBookSample->loop;
                 endPos = loopInfo->end;
@@ -977,32 +585,21 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                     s5 = 0;                     // s4
 
                     if (nParts == 1) {
-                        samplesLenAdjusted = samplesLenFixedPoint >> 0x10;
-                    } else if ((samplesLenFixedPoint >> 0x10) & 1) {
-                        samplesLenAdjusted = ((samplesLenFixedPoint >> 0x10) & ~1) + (curPart * 2);
+                        samplesLenAdjusted = samplesLenFixedPoint >> 16;
+                    } else if ((samplesLenFixedPoint >> 16) & 1) {
+                        samplesLenAdjusted = ((samplesLenFixedPoint >> 16) & ~1) + (curPart * 2);
                     }
                     else {
-                        samplesLenAdjusted = (samplesLenFixedPoint >> 0x10);
+                        samplesLenAdjusted = (samplesLenFixedPoint >> 16);
                     }
 
                     if (curLoadedBook != audioBookSample->book->book) {
                         u32 nEntries; // v1
                         curLoadedBook = audioBookSample->book->book;
-#ifdef VERSION_EU
-                        nEntries = 16 * audioBookSample->book->order * audioBookSample->book->npredictors;
-                        aLoadADPCM(cmd++, nEntries, VIRTUAL_TO_PHYSICAL2(curLoadedBook + noteSubEu->bookOffset));
-#else
-                        nEntries = audioBookSample->book->order * audioBookSample->book->npredictors;
-                        aLoadADPCM(cmd++, nEntries * 16, VIRTUAL_TO_PHYSICAL2(curLoadedBook));
-#endif
+                        nEntries = audioBookSample->book->order * audioBookSample->book->npredictors * 16U;
+                        aLoadADPCM(cmd++, nEntries, VIRTUAL_TO_PHYSICAL2(curLoadedBook));
                     }
 
-#ifdef VERSION_EU
-                    if (noteSubEu->bookOffset) {
-                        curLoadedBook = euUnknownData_80301950; // what's this? never read
-                    }
-#endif
-
                     while (nAdpcmSamplesProcessed != samplesLenAdjusted) {
                         s32 samplesRemaining; // v1
                         s32 s0;
@@ -1010,23 +607,13 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                         noteFinished = FALSE;
                         restart = FALSE;
                         nSamplesToProcess = samplesLenAdjusted - nAdpcmSamplesProcessed;
-#ifdef VERSION_EU
-                        s2 = synthesisState->samplePosInt & 0xf;
-                        samplesRemaining = endPos - synthesisState->samplePosInt;
-#else
                         s2 = note->samplePosInt & 0xf;
                         samplesRemaining = endPos - note->samplePosInt;
-#endif
 
-#ifdef VERSION_EU
-                        if (s2 == 0 && synthesisState->restart == FALSE) {
-                            s2 = 16;
-                        }
-#else
                         if (s2 == 0 && note->restart == FALSE) {
                             s2 = 16;
                         }
-#endif
+
                         s6 = 16 - s2; // a1
 
                         if (nSamplesToProcess < samplesRemaining) {
@@ -1034,11 +621,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             s0 = t0 * 16;
                             s3 = s6 + s0 - nSamplesToProcess;
                         } else {
-#ifndef VERSION_EU
-                            s0 = samplesRemaining + s2 - 0x10;
-#else
                             s0 = samplesRemaining - s6;
-#endif
                             s3 = 0;
                             if (s0 <= 0) {
                                 s0 = 0;
@@ -1054,22 +637,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                         }
 
                         if (t0 != 0) {
-#ifdef VERSION_EU
-                            temp = (synthesisState->samplePosInt - s2 + 0x10) / 16;
-                            if (audioBookSample->loaded == 0x81) {
-                                v0_2 = sampleAddr + temp * 9;
-                            } else {
-                                AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA);
-
-                                v0_2 = dma_sample_data(
-                                    (uintptr_t) (sampleAddr + temp * 9),
-                                    t0 * 9, flags, &synthesisState->sampleDmaIndex);
-
-                                AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
-                            }
-#else
-                            // HACKERSM64_TODO: Is the EU thing above applicable to US? Could potentially save some resources.
-                            temp = (note->samplePosInt - s2 + 0x10) / 16;
+                            temp = (note->samplePosInt - s2 + 16) / 16;
             
                             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA);
 
@@ -1078,7 +646,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                                 t0 * 9, flags, &note->sampleDmaIndex);
 
                             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
-#endif
+
                             a3 = (u32)((uintptr_t) v0_2 & 0xf);
                             aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA, 0, t0 * 9 + a3);
                             aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(v0_2 - a3));
@@ -1087,48 +655,23 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             a3 = 0;
                         }
 
-#ifdef VERSION_EU
-                        if (synthesisState->restart != FALSE) {
-                            aSetLoop(cmd++, VIRTUAL_TO_PHYSICAL2(audioBookSample->loop->state));
-                            flags = A_LOOP; // = 2
-                            synthesisState->restart = FALSE;
-                        }
-#else
                         if (note->restart != FALSE) {
                             aSetLoop(cmd++, VIRTUAL_TO_PHYSICAL2(audioBookSample->loop->state));
                             flags = A_LOOP; // = 2
                             note->restart = FALSE;
                         }
-#endif
 
                         nSamplesInThisIteration = s0 + s6 - s3;
-#ifdef VERSION_EU
-                        if (nAdpcmSamplesProcessed == 0) {
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3,
-                                       DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
-                            aADPCMdec(cmd++, flags,
-                                      VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
-                            sp130 = s2 * 2;
-                        } else {
-                            s5Aligned = ALIGN32(s5);
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3,
-                                       DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
-                            aADPCMdec(cmd++, flags,
-                                      VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
-                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned + (s2 * 2),
-                                      DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
-                        }
-#else
                         if (nAdpcmSamplesProcessed == 0) {
                             aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
                             aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
                             sp130 = s2 * 2;
                         } else {
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5), s0 * 2);
+                            s5Aligned = ALIGN32(s5);
+                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
                             aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
-                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5) + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
+                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
                         }
-#endif
 
                         nAdpcmSamplesProcessed += nSamplesInThisIteration;
 
@@ -1160,32 +703,18 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                         if (noteFinished) {
                             aClearBuffer(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5,
                                          (samplesLenAdjusted - nAdpcmSamplesProcessed) * 2);
-#ifdef VERSION_EU
-                            noteSubEu->finished = 1;
-                            note->noteSubEu.finished = 1;
-                            note->noteSubEu.enabled = 0;
-#else
                             note->samplePosInt = 0;
                             note->finished = 1;
                             ((struct vNote *)note)->enabled = 0;
-#endif
                             break;
                         }
-#ifdef VERSION_EU
-                        if (restart) {
-                            synthesisState->restart = TRUE;
-                            synthesisState->samplePosInt = loopInfo->start;
-                        } else {
-                            synthesisState->samplePosInt += nSamplesToProcess;
-                        }
-#else
+
                         if (restart) {
                             note->restart = TRUE;
                             note->samplePosInt = loopInfo->start;
                         } else {
                             note->samplePosInt += nSamplesToProcess;
                         }
-#endif
                     }
 
                     switch (nParts) {
@@ -1197,19 +726,11 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             switch (curPart) {
                                 case 0:
                                     aSetBuffer(cmd++, 0, DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, DMEM_ADDR_RESAMPLED, samplesLenAdjusted + 4);
-#ifdef VERSION_EU
-                                    aResample(cmd++, A_INIT, 0xff60, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->dummyResampleState));
-#else
                                     aResample(cmd++, A_INIT, 0xff60, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->dummyResampleState));
-#endif
                                     resampledTempLen = samplesLenAdjusted + 4;
                                     noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_RESAMPLED + 4;
-#ifdef VERSION_EU
-                                    if (noteSubEu->finished != FALSE) {
-#else
                                     if (note->finished != FALSE) {
-#endif
-                                        aClearBuffer(cmd++, DMEM_ADDR_RESAMPLED + resampledTempLen, samplesLenAdjusted + 0x10);
+                                        aClearBuffer(cmd++, DMEM_ADDR_RESAMPLED + resampledTempLen, samplesLenAdjusted + 16);
                                     }
                                     break;
 
@@ -1217,15 +738,9 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                                     aSetBuffer(cmd++, 0, DMEM_ADDR_UNCOMPRESSED_NOTE + sp130,
                                                DMEM_ADDR_RESAMPLED2,
                                                samplesLenAdjusted + 8);
-#ifdef VERSION_EU
-                                    aResample(cmd++, A_INIT, 0xff60,
-                                              VIRTUAL_TO_PHYSICAL2(
-                                                  synthesisState->synthesisBuffers->dummyResampleState));
-#else
                                     aResample(cmd++, A_INIT, 0xff60,
                                               VIRTUAL_TO_PHYSICAL2(
                                                   note->synthesisBuffers->dummyResampleState));
-#endif
                                     aDMEMMove(cmd++, DMEM_ADDR_RESAMPLED2 + 4,
                                               DMEM_ADDR_RESAMPLED + resampledTempLen,
                                               samplesLenAdjusted + 4);
@@ -1233,137 +748,86 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             }
                     }
 
-#ifdef VERSION_EU
-                    if (noteSubEu->finished != FALSE) {
-#else
                     if (note->finished != FALSE) {
-#endif
                         break;
                     }
                 }
             }
 
             flags = 0;
-
-#ifdef VERSION_EU
-            if (noteSubEu->needsInit == TRUE) {
-                flags = A_INIT;
-                noteSubEu->needsInit = FALSE;
-            }
-
-            // final resample
-            aSetBuffer(cmd++, /*flags*/ 0, noteSamplesDmemAddrBeforeResampling, /*dmemout*/ DMEM_ADDR_TEMP, bufLen * 2);
-            aResample(cmd++, flags, resamplingRateFixedPoint, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->finalResampleState));
-#else
             if (note->needsInit == TRUE) {
                 flags = A_INIT;
                 note->needsInit = FALSE;
             }
 
             // final resample
-            aSetBuffer(cmd++, /*flags*/ 0, noteSamplesDmemAddrBeforeResampling, /*dmemout*/ DMEM_ADDR_TEMP, bufLen * 2);
+            aSetBuffer(cmd++, /*flags*/ 0, noteSamplesDmemAddrBeforeResampling, /*dmemout*/ DMEM_ADDR_TEMP, bufLen);
             aResample(cmd++, flags, resamplingRateFixedPoint, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->finalResampleState));
-#endif
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-#ifdef VERSION_EU
-            if (noteSubEu->headsetPanRight != 0 || synthesisState->prevHeadsetPanRight != 0) {
-                leftRight = 1;
-            } else if (noteSubEu->headsetPanLeft != 0 || synthesisState->prevHeadsetPanLeft != 0) {
-                leftRight = 2;
-#else
             if (note->headsetPanRight != 0 || note->prevHeadsetPanRight != 0) {
                 leftRight = 1;
             } else if (note->headsetPanLeft != 0 || note->prevHeadsetPanLeft != 0) {
                 leftRight = 2;
-#endif
             } else {
                 leftRight = 0;
             }
 
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB);
-#ifdef VERSION_EU
-            cmd = process_envelope(cmd, noteSubEu, synthesisState, bufLen, 0, leftRight, flags);
-#else
             cmd = process_envelope(cmd, note, bufLen, 0, leftRight);
-#endif
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
 
-#ifdef VERSION_EU
-            if (noteSubEu->usesHeadsetPanEffects) {
-                cmd = note_apply_headset_pan_effects(cmd, noteSubEu, synthesisState, bufLen * 2, flags, leftRight);
-            }
-#else
             if (note->usesHeadsetPanEffects) {
-                cmd = note_apply_headset_pan_effects(cmd, note, bufLen * 2, flags, leftRight);
+                cmd = note_apply_headset_pan_effects(cmd, note, bufLen, flags, leftRight);
             }
-#endif
 #else
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB);
             cmd = process_envelope(cmd, note, bufLen, 0);
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
 #endif
         }
-#ifndef VERSION_EU
     }
 
-    t9 = bufLen * 2;
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, t9);
+    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, bufLen);
     aInterleave(cmd++, DMEM_ADDR_LEFT_CH, DMEM_ADDR_RIGHT_CH);
-    t9 *= 2;
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, t9);
+    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, bufLen * 2);
     aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(aiBuf));
-#endif
 
     return cmd;
 }
 
-#ifdef VERSION_EU
-u64 *load_wave_samples(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s32 nSamplesToLoad) {
+u64 *load_wave_samples(u64 *cmd, struct Note *note, s32 nSamplesToLoad) {
     s32 a3;
     s32 repeats;
     s32 i;
-    aSetBuffer(cmd++, /*flags*/ 0, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ 0, /*count*/ 128);
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(noteSubEu->sound.samples));
+    aSetBuffer(cmd++, /*flags*/ 0, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ 0,
+               /*count*/ sizeof(note->synthesisBuffers->samples));
+    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->samples));
 
-    synthesisState->samplePosInt &= 0x3f;
-    a3 = 64 - synthesisState->samplePosInt;
+    note->samplePosInt &= (note->sampleCount - 1);
+    a3 = 64 - note->samplePosInt;
     if (a3 < nSamplesToLoad) {
         repeats = (nSamplesToLoad - a3 + 63) / 64;
         for (i = 0; i < repeats; i++) {
             aDMEMMove(cmd++,
                       /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE,
-                      /*dmemout*/ DMEM_ADDR_UNCOMPRESSED_NOTE + (1 + i) * 128,
-                      /*count*/ 128);
+                      /*dmemout*/ DMEM_ADDR_UNCOMPRESSED_NOTE + (1 + i) * sizeof(note->synthesisBuffers->samples),
+                      /*count*/ sizeof(note->synthesisBuffers->samples));
         }
     }
     return cmd;
 }
-#else
-u64 *load_wave_samples(u64 *cmd, struct Note *note, s32 nSamplesToLoad) {
-    s32 a3;
-    s32 i;
-    aSetBuffer(cmd++, /*flags*/ 0, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ 0,
-               /*count*/ sizeof(note->synthesisBuffers->samples));
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->samples));
-    note->samplePosInt &= (note->sampleCount - 1);
-    a3 = 64 - note->samplePosInt;
-    if (a3 < nSamplesToLoad) {
-        for (i = 0; i <= (nSamplesToLoad - a3 + 63) / 64 - 1; i++) {
-            aDMEMMove(cmd++, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ DMEM_ADDR_UNCOMPRESSED_NOTE + (1 + i) * sizeof(note->synthesisBuffers->samples), /*count*/ sizeof(note->synthesisBuffers->samples));
-        }
-    }
-    return cmd;
-}
-#endif
 
-#ifndef VERSION_EU
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf, s32 headsetPanSettings) {
 #else
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf) {
 #endif
+    u8 mixerFlags;
+    s32 rampLeft;
+    s32 rampRight;
     struct VolumeChange vol;
+
     if (note->initFullVelocity) {
         note->initFullVelocity = FALSE;
         vol.sourceLeft = note->targetVolLeft;
@@ -1376,45 +840,6 @@ u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf) {
     vol.targetRight = note->targetVolRight;
     note->curVolLeft = vol.targetLeft;
     note->curVolRight = vol.targetRight;
-#ifdef ENABLE_STEREO_HEADSET_EFFECTS
-    return process_envelope_inner(cmd, note, nSamples, inBuf, headsetPanSettings, &vol);
-#else
-    return process_envelope_inner(cmd, note, nSamples, inBuf, &vol);
-#endif
-}
-
-#ifdef ENABLE_STEREO_HEADSET_EFFECTS
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf,
-                            s32 headsetPanSettings, struct VolumeChange *vol) {
-#else
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf,
-                            struct VolumeChange *vol) {
-#endif
-    u8 mixerFlags;
-    s32 rampLeft, rampRight;
-#elif defined(VERSION_EU)
-u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisState *synthesisState, s32 nSamples, u16 inBuf, s32 headsetPanSettings, UNUSED u32 flags) {
-    u16 sourceRight;
-    u16 sourceLeft;
-    u16 targetLeft;
-    u16 targetRight;
-    s32 mixerFlags;
-    s32 rampLeft;
-    s32 rampRight;
-
-    sourceLeft = synthesisState->curVolLeft;
-    sourceRight = synthesisState->curVolRight;
-    targetLeft = (note->targetVolLeft << 5);
-    targetRight = (note->targetVolRight << 5);
-    if (targetLeft == 0) {
-        targetLeft++;
-    }
-    if (targetRight == 0) {
-        targetRight++;
-    }
-    synthesisState->curVolLeft = targetLeft;
-    synthesisState->curVolRight = targetRight;
-#endif
 
     // For aEnvMixer, five buffers and count are set using aSetBuffer.
     // in, dry left, count without A_AUX flag.
@@ -1426,17 +851,17 @@ u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisStat
 
         switch (headsetPanSettings) {
             case 1:
-                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_NOTE_PAN_TEMP, nSamples * 2);
+                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_NOTE_PAN_TEMP, nSamples);
                 aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH,
                            DMEM_ADDR_WET_RIGHT_CH);
                 break;
             case 2:
-                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
                 aSetBuffer(cmd++, A_AUX, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_WET_LEFT_CH,
                            DMEM_ADDR_WET_RIGHT_CH);
                 break;
             default:
-                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
                 aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH,
                            DMEM_ADDR_WET_RIGHT_CH);
                 break;
@@ -1447,106 +872,66 @@ u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisStat
         // mixed into a temporary buffer and then subtracted from the normal buffer.
         if (note->stereoStrongRight) {
             aClearBuffer(cmd++, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, DEFAULT_LEN_2CH);
-            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, nSamples * 2);
+            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, nSamples);
             aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_STEREO_STRONG_TEMP_WET,
                        DMEM_ADDR_WET_RIGHT_CH);
         } else if (note->stereoStrongLeft) {
             aClearBuffer(cmd++, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, DEFAULT_LEN_2CH);
-            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
             aSetBuffer(cmd++, A_AUX, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, DMEM_ADDR_WET_LEFT_CH,
                        DMEM_ADDR_STEREO_STRONG_TEMP_WET);
         } else {
-            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
             aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_RIGHT_CH);
         }
     }
 #else
-    aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+    aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
     aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_RIGHT_CH);
 #endif
 
-#ifdef VERSION_EU
-    if (targetLeft == sourceLeft && targetRight == sourceRight && !note->envMixerNeedsInit) {
-#else
-    if (vol->targetLeft == vol->sourceLeft && vol->targetRight == vol->sourceRight
+    if (vol.targetLeft == vol.sourceLeft && vol.targetRight == vol.sourceRight
         && !note->envMixerNeedsInit) {
-#endif
         mixerFlags = A_CONTINUE;
     } else {
         mixerFlags = A_INIT;
 
-#ifdef VERSION_EU
-        rampLeft = gCurrentLeftVolRamping[targetLeft >> 5] * gCurrentRightVolRamping[sourceLeft >> 5];
-        rampRight = gCurrentLeftVolRamping[targetRight >> 5] * gCurrentRightVolRamping[sourceRight >> 5];
-#else
         // volume ramping
         // This roughly computes 2^16 * (targetVol / sourceVol) ^ (8 / arg2),
         // but with discretizations of targetVol, sourceVol and arg2.
-        switch (nSamples) {
-            case 128:
-                rampLeft = gVolRampingLhs128[vol->targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs128[vol->sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
-                rampRight = gVolRampingLhs128[vol->targetRight >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs128[vol->sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
-                break;
-            case 144:
-                rampLeft = gVolRampingLhs144[vol->targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs144[vol->sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
-                rampRight = gVolRampingLhs144[vol->targetRight >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs144[vol->sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
-                break;
-            case 136:
-            default:
-                rampLeft = gVolRampingLhs136[vol->targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs136[vol->sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
-                rampRight = gVolRampingLhs136[vol->targetRight >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs136[vol->sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
-                break;
-        }
-#endif
+        rampLeft = currentRampingTableLeft[vol.targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * currentRampingTableRight[vol.sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
+        rampRight = currentRampingTableLeft[vol.targetRight >> (15 - VOL_RAMPING_EXPONENT)] * currentRampingTableRight[vol.sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
 
         // The operation's parameters change meanings depending on flags
-#ifdef VERSION_EU
-        aSetVolume(cmd++, A_VOL | A_LEFT, sourceLeft, 0, 0);
-        aSetVolume(cmd++, A_VOL | A_RIGHT, sourceRight, 0, 0);
-        aSetVolume32(cmd++, A_RATE | A_LEFT, targetLeft, rampLeft);
-        aSetVolume32(cmd++, A_RATE | A_RIGHT, targetRight, rampRight);
+        aSetVolume(cmd++, A_VOL | A_LEFT, vol.sourceLeft, 0, 0);
+        aSetVolume(cmd++, A_VOL | A_RIGHT, vol.sourceRight, 0, 0);
+        aSetVolume32(cmd++, A_RATE | A_LEFT, vol.targetLeft, rampLeft);
+        aSetVolume32(cmd++, A_RATE | A_RIGHT, vol.targetRight, rampRight);
         aSetVolume(cmd++, A_AUX, gVolume, 0, note->reverbVol << 8);
-#else
-        aSetVolume(cmd++, A_VOL | A_LEFT, vol->sourceLeft, 0, 0);
-        aSetVolume(cmd++, A_VOL | A_RIGHT, vol->sourceRight, 0, 0);
-        aSetVolume32(cmd++, A_RATE | A_LEFT, vol->targetLeft, rampLeft);
-        aSetVolume32(cmd++, A_RATE | A_RIGHT, vol->targetRight, rampRight);
-        aSetVolume(cmd++, A_AUX, gVolume, 0, note->reverbVolShifted);
-#endif
     }
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-#ifdef VERSION_EU
-    if (gUseReverb && note->reverbVol != 0) {
-        aEnvMixer(cmd++, mixerFlags | A_AUX,
-                  VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->mixEnvelopeState));
-#else
     if (gSynthesisReverb.useReverb && note->reverbVol != 0) {
         aEnvMixer(cmd++, mixerFlags | A_AUX,
                   VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->mixEnvelopeState));
-#endif
         if (note->stereoStrongRight) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             // 0x8000 is -100%, so subtract sound instead of adding...
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY, /*out*/ DMEM_ADDR_LEFT_CH);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_WET, /*out*/ DMEM_ADDR_WET_LEFT_CH);
         } else if (note->stereoStrongLeft) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY, /*out*/ DMEM_ADDR_RIGHT_CH);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_WET, /*out*/ DMEM_ADDR_WET_RIGHT_CH);
         }
     } else {
-#ifdef VERSION_EU
-        aEnvMixer(cmd++, mixerFlags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->mixEnvelopeState));
-#else
         aEnvMixer(cmd++, mixerFlags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->mixEnvelopeState));
-#endif
         if (note->stereoStrongRight) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY,
                  /*out*/ DMEM_ADDR_LEFT_CH);
         } else if (note->stereoStrongLeft) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY,
                  /*out*/ DMEM_ADDR_RIGHT_CH);
         }
@@ -1561,41 +946,23 @@ u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisStat
 }
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-#ifdef VERSION_EU
-u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *note, s32 bufLen, s32 flags, s32 leftRight) {
-#else
 u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32 flags, s32 leftRight) {
-#endif
     u16 dest;
     u16 pitch;
-#ifdef VERSION_EU
-    u8 prevPanShift;
-    u8 panShift;
-    UNUSED u8 unkDebug;
-#else
     u16 prevPanShift;
     u16 panShift;
-#endif
 
     switch (leftRight) {
         case 1:
             dest = DMEM_ADDR_LEFT_CH;
-#ifdef VERSION_EU
-            panShift = noteSubEu->headsetPanRight;
-#else
             panShift = note->headsetPanRight;
-#endif
             note->prevHeadsetPanLeft = 0;
             prevPanShift = note->prevHeadsetPanRight;
             note->prevHeadsetPanRight = panShift;
             break;
         case 2:
             dest = DMEM_ADDR_RIGHT_CH;
-#ifdef VERSION_EU
-            panShift = noteSubEu->headsetPanLeft;
-#else
             panShift = note->headsetPanLeft;
-#endif
             note->prevHeadsetPanRight = 0;
 
             prevPanShift = note->prevHeadsetPanLeft;
@@ -1611,8 +978,8 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32
             // Kind of a hack that moves the first samples into the resample state
             aDMEMMove(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_TEMP, 8);
             aClearBuffer(cmd++, 8, 8); // Set pitch accumulator to 0 in the resample state
-            aDMEMMove(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_TEMP + 0x10,
-                      0x10); // No idea, result seems to be overwritten later
+            aDMEMMove(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_TEMP + 16,
+                      16); // No idea, result seems to be overwritten later
 
             aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, 32);
             aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panResampleState));
@@ -1658,14 +1025,10 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32
 }
 #endif
 
-#ifndef VERSION_EU
-// Moved to playback.c in EU
-
 void note_init_volume(struct Note *note) {
     note->targetVolLeft = 0;
     note->targetVolRight = 0;
     note->reverbVol = 0;
-    note->reverbVolShifted = 0;
     note->curVolLeft = 1;
     note->curVolRight = 1;
     note->frequency = 0.0f;
@@ -1728,7 +1091,6 @@ void note_set_vel_pan_reverb(struct Note *note, f32 velocity, f32 pan, u8 reverb
     }
     if (note->reverbVol != reverbVol) {
         note->reverbVol = reverbVol;
-        note->reverbVolShifted = reverbVol << 8;
         note->envMixerNeedsInit = TRUE;
         return;
     }
@@ -1770,4 +1132,3 @@ void note_disable(struct Note *note) {
     note->prevParentLayer = NO_LAYER;
 }
 #endif
-#endif

From 6148582f5e363e6cc13c4731d6cc06a463b82463 Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Sun, 17 Dec 2023 17:26:31 +1300
Subject: [PATCH 08/23] remove puppylights (#736)

---
 README.md                        |   1 -
 include/level_commands.h         |   5 +-
 include/object_fields.h          |   3 -
 include/types.h                  |  21 --
 src/boot/main.c                  |   4 -
 src/engine/behavior_script.c     |   5 -
 src/engine/level_script.c        |  49 ----
 src/game/behavior_actions.c      |   1 -
 src/game/level_update.c          |  27 +--
 src/game/obj_behaviors.c         |   1 -
 src/game/obj_behaviors_2.c       |   1 -
 src/game/object_helpers.c        |   4 -
 src/game/object_list_processor.c |   5 -
 src/game/puppylights.c           | 382 -------------------------------
 src/game/puppylights.h           |  57 -----
 src/game/spawn_object.c          |   4 -
 16 files changed, 2 insertions(+), 568 deletions(-)
 delete mode 100644 src/game/puppylights.c
 delete mode 100644 src/game/puppylights.h

diff --git a/README.md b/README.md
index b97dc4722..37850f2d2 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,6 @@ Thanks to Frame#5375 and AloXado320 for also helping with silhouette stuff
 
 **Lighting Engine by Wiseguy**
 - Lighting Engine is available on a separate branch ([base/lighting-engine](https://github.com/Reonu/HackerSM64/tree/base/lighting-engine)). Instructions on how to use it are in the readme of that branch.
-- Alternatively, the main repo has `Puppylights` available, which is a more lightweight, but limited lighting library intended to be used to modify existing light properties. You can look at `puppylights.c` to find out how to use it.
 
 **Puppycam**
 - Puppycam is available on the master branch now, you can toggle it in `config/config_camera.h`. *
diff --git a/include/level_commands.h b/include/level_commands.h
index 323312863..fa8f8d136 100644
--- a/include/level_commands.h
+++ b/include/level_commands.h
@@ -5,7 +5,6 @@
 
 #include "level_table.h"
 #include "config.h"
-#include "game/puppylights.h"
 
 enum LevelCommands {
     /*0x00*/ LEVEL_CMD_LOAD_AND_EXECUTE,
@@ -71,9 +70,7 @@ enum LevelCommands {
     /*0x3C*/ LEVEL_CMD_GET_OR_SET_VAR,
     /*0x3D*/ LEVEL_CMD_PUPPYVOLUME,
     /*0x3E*/ LEVEL_CMD_CHANGE_AREA_SKYBOX,
-    /*0x3F*/ LEVEL_CMD_PUPPYLIGHT_ENVIRONMENT,
-    /*0x40*/ LEVEL_CMD_PUPPYLIGHT_NODE,
-    /*0x41*/ LEVEL_CMD_SET_ECHO,
+    /*0x3F*/ LEVEL_CMD_SET_ECHO,
 };
 
 enum LevelActs {
diff --git a/include/object_fields.h b/include/object_fields.h
index d3487dadc..aa00f163f 100644
--- a/include/object_fields.h
+++ b/include/object_fields.h
@@ -169,9 +169,6 @@
 #define /*0x1BC*/ oAngleToHome                OBJECT_FIELD_S32(0x4D)
 #define /*0x1C0*/ oFloor                      OBJECT_FIELD_SURFACE(0x4E)
 #define /*0x1C4*/ oDeathSound                 OBJECT_FIELD_S32(0x4F)
-#ifdef PUPPYLIGHTS
-#define /*0x1C4*/ oLightID                                      OBJECT_FIELD_S32(0x50)
-#endif
 
 /* Pathed (see obj_follow_path) */
 #define /*0x0FC*/ oPathedStartWaypoint     OBJECT_FIELD_WAYPOINT(0x1D)
diff --git a/include/types.h b/include/types.h
index eeeeac153..68eb13792 100644
--- a/include/types.h
+++ b/include/types.h
@@ -256,27 +256,9 @@ struct ObjectNode {
     struct ObjectNode *prev;
 };
 
-#ifdef PUPPYLIGHTS
-struct PuppyLight {
-    Vec3t pos[2];   // The location of the light. First index is the absolute position, second index are offsets.
-    s16 yaw;        // Used by cubes. Allows epic rotating of the volume.
-    RoomData room;  // Which room to use. -1 is visible from all rooms.
-    s8 epicentre;   // What percentage inside the volume you'll be before maximum light strength is applied. (E.g: 100 will be full strength always, and 0 will be full strength at the centre.)
-    u8 flags;       // Some stuff to define how the volume is used. Mostly just shape stuff, but can potentially have other uses.
-    ColorRGBA rgba; // Colour. Go on, take even the tiniest guess as to what this entails.
-    u8 area;        // Which section of the level this light is stored in.
-    u8 active: 1;   // Whether the light will actually work. Mostly intended to be used for objects.
-};
-#endif
-
 // NOTE: Since ObjectNode is the first member of Object, it is difficult to determine
 // whether some of these pointers point to ObjectNode or Object.
-
-#ifdef PUPPYLIGHTS
-#define MAX_OBJECT_FIELDS 0x51
-#else
 #define MAX_OBJECT_FIELDS 0x50
-#endif
 
 struct Object {
     /*0x000*/ struct ObjectNode header;
@@ -335,9 +317,6 @@ struct Object {
     /*0x218*/ void *collisionData;
     /*0x21C*/ Mat4 transform;
     /*0x25C*/ void *respawnInfo;
-#ifdef PUPPYLIGHTS
-    struct PuppyLight puppylight;
-#endif
 };
 
 struct ObjectHitbox {
diff --git a/src/boot/main.c b/src/boot/main.c
index 9d812aadc..2fe93aaff 100644
--- a/src/boot/main.c
+++ b/src/boot/main.c
@@ -20,7 +20,6 @@
 #include "usb/debug.h"
 #endif
 #include "game/puppyprint.h"
-#include "game/puppylights.h"
 #include "game/profiling.h"
 #include "game/emutest.h"
 
@@ -115,9 +114,6 @@ void alloc_pool(void) {
 
     main_pool_init(start, end);
     gEffectsMemoryPool = mem_pool_init(EFFECTS_MEMORY_POOL, MEMORY_POOL_LEFT);
-#ifdef PUPPYLIGHTS
-    gLightsPool = mem_pool_init(PUPPYLIGHTS_POOL, MEMORY_POOL_LEFT);
-#endif
 }
 
 void create_thread(OSThread *thread, OSId id, void (*entry)(void *), void *arg, void *sp, OSPri pri) {
diff --git a/src/engine/behavior_script.c b/src/engine/behavior_script.c
index 8c918392f..282b8b3b1 100644
--- a/src/engine/behavior_script.c
+++ b/src/engine/behavior_script.c
@@ -14,7 +14,6 @@
 #include "math_util.h"
 #include "graph_node.h"
 #include "surface_collision.h"
-#include "game/puppylights.h"
 
 // Macros for retrieving arguments from behavior scripts.
 #define BHV_CMD_GET_1ST_U8(index)     (u8)((gCurBhvCommand[index] >> 24) & 0xFF) // unused
@@ -911,10 +910,6 @@ void cur_obj_update(void) {
     }
 #endif
 
-#ifdef PUPPYLIGHTS
-    puppylights_object_emit(o);
-#endif
-
     // Handle visibility of object
     if (o->oRoom != -1) {
         // If the object is in a room, only show it when Mario is in the room.
diff --git a/src/engine/level_script.c b/src/engine/level_script.c
index 3dad0c35b..7c471b134 100644
--- a/src/engine/level_script.c
+++ b/src/engine/level_script.c
@@ -28,7 +28,6 @@
 #include "string.h"
 #include "game/puppycam2.h"
 #include "game/puppyprint.h"
-#include "game/puppylights.h"
 #include "game/emutest.h"
 
 #include "config.h"
@@ -859,52 +858,6 @@ static void level_cmd_puppyvolume(void) {
     sCurrentCmd = CMD_NEXT;
 }
 
-static void level_cmd_puppylight_environment(void) {
-#ifdef PUPPYLIGHTS
-    Lights1 temp = gdSPDefLights1(CMD_GET(u8, 2), CMD_GET(u8, 3), CMD_GET(u8, 4),
-                                  CMD_GET(u8, 5), CMD_GET(u8, 6), CMD_GET(u8, 7),
-                                  CMD_GET(u8, 8), CMD_GET(u8, 9), CMD_GET(u8, 10));
-
-    memcpy(&gLevelLight, &temp, sizeof(Lights1));
-    levelAmbient = TRUE;
-#endif
-    sCurrentCmd = CMD_NEXT;
-}
-
-static void level_cmd_puppylight_node(void) {
-#ifdef PUPPYLIGHTS
-    gPuppyLights[gNumLights] = mem_pool_alloc(gLightsPool, sizeof(struct PuppyLight));
-    if (gPuppyLights[gNumLights] == NULL) {
-        append_puppyprint_log("Puppylight allocation failed.");
-        sCurrentCmd = CMD_NEXT;
-        return;
-    }
-
-    vec4_set(gPuppyLights[gNumLights]->rgba, CMD_GET(u8,   2),
-                                             CMD_GET(u8,   3),
-                                             CMD_GET(u8,   4),
-                                             CMD_GET(u8,   5));
-
-    vec3s_set(gPuppyLights[gNumLights]->pos[0], CMD_GET(s16,  6),
-                                                CMD_GET(s16,  8),
-                                                CMD_GET(s16, 10));
-
-    vec3s_set(gPuppyLights[gNumLights]->pos[1], CMD_GET(s16, 12),
-                                                CMD_GET(s16, 14),
-                                                CMD_GET(s16, 16));
-    gPuppyLights[gNumLights]->yaw       = CMD_GET(s16, 18);
-    gPuppyLights[gNumLights]->epicentre = CMD_GET(u8,  20);
-    gPuppyLights[gNumLights]->flags     = CMD_GET(u8,  21);
-    gPuppyLights[gNumLights]->active    = TRUE;
-    gPuppyLights[gNumLights]->area      = sCurrAreaIndex;
-    gPuppyLights[gNumLights]->room      = CMD_GET(s16, 22);
-
-    gNumLights++;
-
-#endif
-    sCurrentCmd = CMD_NEXT;
-}
-
 static void level_cmd_set_echo(void) {
     if (sCurrAreaIndex >= 0 && sCurrAreaIndex < AREA_COUNT) {
         gAreaData[sCurrAreaIndex].useEchoOverride = TRUE;
@@ -980,8 +933,6 @@ static void (*LevelScriptJumpTable[])(void) = {
     /*LEVEL_CMD_GET_OR_SET_VAR              */ level_cmd_get_or_set_var,
     /*LEVEL_CMD_PUPPYVOLUME                 */ level_cmd_puppyvolume,
     /*LEVEL_CMD_CHANGE_AREA_SKYBOX          */ level_cmd_change_area_skybox,
-    /*LEVEL_CMD_PUPPYLIGHT_ENVIRONMENT      */ level_cmd_puppylight_environment,
-    /*LEVEL_CMD_PUPPYLIGHT_NODE             */ level_cmd_puppylight_node,
     /*LEVEL_CMD_SET_ECHO                    */ level_cmd_set_echo,
 };
 
diff --git a/src/game/behavior_actions.c b/src/game/behavior_actions.c
index d2f80578a..d400977ab 100644
--- a/src/game/behavior_actions.c
+++ b/src/game/behavior_actions.c
@@ -44,7 +44,6 @@
 #include "spawn_object.h"
 #include "spawn_sound.h"
 #include "rumble_init.h"
-#include "puppylights.h"
 
 #include "behaviors/star_door.inc.c"
 #include "behaviors/mr_i.inc.c"
diff --git a/src/game/level_update.c b/src/game/level_update.c
index ade153134..086e99722 100644
--- a/src/game/level_update.c
+++ b/src/game/level_update.c
@@ -30,7 +30,6 @@
 #include "rumble_init.h"
 #include "puppycam2.h"
 #include "puppyprint.h"
-#include "puppylights.h"
 #include "level_commands.h"
 
 #include "config.h"
@@ -630,30 +629,16 @@ void initiate_warp(s16 destLevel, s16 destArea, s16 destWarpNode, s32 warpFlags)
     sWarpDest.areaIdx = destArea;
     sWarpDest.nodeId = destWarpNode;
     sWarpDest.arg = warpFlags;
-#if defined(PUPPYCAM) || defined(PUPPYLIGHTS)
-    s32 i = 0;
-#endif
 #ifdef PUPPYCAM
     if (sWarpDest.type == WARP_TYPE_CHANGE_LEVEL)
     {
-        for (i = 0; i < gPuppyVolumeCount; i++)
+        for (s32 i = 0; i < gPuppyVolumeCount; i++)
         {
             mem_pool_free(gPuppyMemoryPool, sPuppyVolumeStack[i]);
         }
         gPuppyVolumeCount = 0;
     }
 #endif
-#ifdef PUPPYLIGHTS
-    if (sWarpDest.type == WARP_TYPE_CHANGE_LEVEL)
-    {
-        for (i = 0; i < gNumLights; i++)
-        {
-            mem_pool_free(gLightsPool, gPuppyLights[i]);
-        }
-        gNumLights = 0;
-        levelAmbient = FALSE;
-    }
-#endif
 }
 
 // From Surface 0xD3 to 0xFC
@@ -992,9 +977,6 @@ void update_hud_values(void) {
 void basic_update(void) {
     area_update_objects();
     update_hud_values();
-#ifdef PUPPYLIGHTS
-    delete_lights();
-#endif
 
     if (gCurrentArea != NULL) {
         update_camera(gCurrentArea->camera);
@@ -1035,9 +1017,6 @@ s32 play_mode_normal(void) {
     area_update_objects();
 #endif
     update_hud_values();
-#ifdef PUPPYLIGHTS
-    delete_lights();
-#endif
     if (gCurrentArea != NULL) {
 #ifdef PUPPYPRINT_DEBUG
 #ifdef BETTER_REVERB
@@ -1318,10 +1297,6 @@ s32 init_level(void) {
         sound_banks_disable(SEQ_PLAYER_SFX, SOUND_BANKS_DISABLED_DURING_INTRO_CUTSCENE);
     }
 
-#ifdef PUPPYLIGHTS
-    puppylights_allocate();
-#endif
-
     append_puppyprint_log("Level loaded in %d" PP_CYCLE_STRING ".", (s32)(PP_CYCLE_CONV(osGetTime() - first)));
     return TRUE;
 }
diff --git a/src/game/obj_behaviors.c b/src/game/obj_behaviors.c
index 46c699df0..0e83b4b38 100644
--- a/src/game/obj_behaviors.c
+++ b/src/game/obj_behaviors.c
@@ -32,7 +32,6 @@
 #include "spawn_object.h"
 #include "spawn_sound.h"
 #include "rumble_init.h"
-#include "puppylights.h"
 
 /**
  * @file obj_behaviors.c
diff --git a/src/game/obj_behaviors_2.c b/src/game/obj_behaviors_2.c
index e7626ab16..90b18e76b 100644
--- a/src/game/obj_behaviors_2.c
+++ b/src/game/obj_behaviors_2.c
@@ -45,7 +45,6 @@
 #include "save_file.h"
 #include "seq_ids.h"
 #include "spawn_sound.h"
-#include "puppylights.h"
 
 //! TODO: remove static
 
diff --git a/src/game/object_helpers.c b/src/game/object_helpers.c
index 5df500a47..33aabfa4c 100644
--- a/src/game/object_helpers.c
+++ b/src/game/object_helpers.c
@@ -26,7 +26,6 @@
 #include "rendering_graph_node.h"
 #include "spawn_object.h"
 #include "spawn_sound.h"
-#include "puppylights.h"
 
 static s32 clear_move_flag(u32 *bitSet, s32 flag);
 
@@ -878,9 +877,6 @@ s32 cur_obj_clear_interact_status_flag(s32 flag) {
  * Mark an object to be unloaded at the end of the frame.
  */
 void obj_mark_for_deletion(struct Object *obj) {
-#ifdef PUPPYLIGHTS
-    obj_disable_light(obj);
-#endif
     //! This clears all activeFlags. Since some of these flags disable behavior,
     //  setting it to 0 could potentially enable unexpected behavior. After an
     //  object is marked for deletion, it still updates on that frame (I think),
diff --git a/src/game/object_list_processor.c b/src/game/object_list_processor.c
index abb281834..5dd018e24 100644
--- a/src/game/object_list_processor.c
+++ b/src/game/object_list_processor.c
@@ -20,7 +20,6 @@
 #include "platform_displacement.h"
 #include "spawn_object.h"
 #include "puppyprint.h"
-#include "puppylights.h"
 #include "profiling.h"
 
 
@@ -382,10 +381,6 @@ s32 unload_deactivated_objects_in_list(struct ObjectNode *objList) {
         obj = obj->next;
 
         if ((gCurrentObject->activeFlags & ACTIVE_FLAG_ACTIVE) != ACTIVE_FLAG_ACTIVE) {
-#ifdef PUPPYLIGHTS
-            if (gCurrentObject->oLightID != 0xFFFF)
-                obj_disable_light(gCurrentObject);
-#endif
             // Prevent object from respawning after exiting and re-entering the
             // area
             if (!(gCurrentObject->oFlags & OBJ_FLAG_PERSISTENT_RESPAWN)) {
diff --git a/src/game/puppylights.c b/src/game/puppylights.c
deleted file mode 100644
index ad01a4e20..000000000
--- a/src/game/puppylights.c
+++ /dev/null
@@ -1,382 +0,0 @@
-///Puppylights 2.0 by Fazana. What happened to 1.0? Tragic accident.
-/**
-Intended for use with manipulating existing Lights1 structs for objects in real time.
-Can support static lights that are loaded with the level, or lights created by objects.
-
-Puppylights is generally intended to be used with things that don't directly use lights to colour
-themselves. Inside the main function, you can pass through a colour to override the default light
-but it will not be affected by environmental tinting. If you wish for an object to emit a light,
-simply set the object flag OBJ_FLAG_EMIT_LIGHT and set some values to o->puppylight.
-
-For easy light modification, you can call set_light_properties, so set all the attributes of any
-given loaded puppylight struct. Objects will ignore x, y, z, active and room, as it will set all
-of these automatically. It will force the PUPPYLIGHT_DYNAMIC flag, too.
-
-If you're introducing a static light in the level script with PUPPYLIGHT_NODE, ensure it's contained
-inside the respective area node it's going to be inside, otherwise it will not show up. If you do not
-use rooms in your level, or if you wish for this light to be seen from any room, use -1 for that param.
-
-If you have visual debug enabled, light nodes will show up as magenta in the world. They will be
-shaped and rotated correctly, for accurate representation of their properties.
-**/
-
-#include <ultra64.h>
-#include "types.h"
-#include "puppylights.h"
-#include "area.h"
-#include "engine/math_util.h"
-#include "string.h"
-#include "object_fields.h"
-#include "object_constants.h"
-#include "camera.h"
-#include "memory.h"
-#include "print.h"
-#include "debug_box.h"
-#include "object_list_processor.h"
-#include "level_update.h"
-#include "engine/surface_collision.h"
-#include "surface_terrains.h"
-
-#ifdef PUPPYLIGHTS
-
-Lights1 gLevelLight; // Existing ambient light in the area. Will be set by the level script, though can always be changed afterwards if desired.
-u8 levelAmbient = FALSE;
-Lights1 *sLightBase; // The base value where lights are written to when worked with.
-Lights1 sDefaultLights = gdSPDefLights1(0x7F, 0x7F, 0x7F, 0xFE, 0xFE, 0xFE, 0x28, 0x28, 0x28); // Default lights default lights
-u16 gNumLights = 0; // How many lights are loaded.
-u16 gDynLightStart = 0; // Where the dynamic lights will start.
-struct PuppyLight *gPuppyLights[MAX_LIGHTS]; // This contains all the loaded data.
-struct MemoryPool *gLightsPool; // The memory pool where the above is stored.
-
-// Runs after an area load, allocates the dynamic light slots.
-void puppylights_allocate(void) {
-    s32 numAllocate = MIN(MAX_LIGHTS - gNumLights, MAX_LIGHTS_DYNAMIC);
-    s32 i;
-
-    gDynLightStart = gNumLights;
-
-    if (numAllocate <= 0) { // If this happens you've allocated too many static lights and therefore cucked dynamic.
-        return;
-    }
-    // Now it has the number it wants, it will allocate this many extra lights, intended for dynamic lights.
-    for (i = 0; i < numAllocate; i++) {
-        gPuppyLights[gNumLights] = mem_pool_alloc(gLightsPool, sizeof(struct PuppyLight));
-        if (gPuppyLights[gNumLights] == NULL) {
-            return;
-        }
-        gPuppyLights[gNumLights]->active = FALSE;
-        gPuppyLights[gNumLights]->flags = 0;
-        gNumLights++;
-    }
-}
-
-extern Mat4 gMatStack[32];
-
-// Function that iterates through each light.
-void puppylights_iterate(struct PuppyLight *light, Lights1 *src, struct Object *obj, s32 flags) {
-    Lights1 *tempLight;
-    s32 lightPos[2];
-    Vec3i lightRelative;
-    Vec3i lightDir = {0, 0, 0};
-    s32 i;
-    s32 colour;
-    s32 ambient;
-    f64 scaleOrig;
-    f32 scale;
-    f32 scale2;
-    f64 scaleVal = 1.0f;
-    Vec3f debugPos[2];
-
-    // Relative positions of the object vs. the centre of the node.
-    lightRelative[0] = light->pos[0][0] - obj->oPosX;
-    lightRelative[1] = light->pos[0][1] - obj->oPosY;
-    lightRelative[2] = light->pos[0][2] - obj->oPosZ;
-
-    // If the nodes X and Z values are equal, then a check is made if the angle is a derivative of 90.
-    // If so, then it will completely skip over the calculation that figures out position from rotation.
-    // If it's a cylinder, then it ignores that check, simply because an equal sided cylinder will have the
-    // same result no matter the yaw. If neither is true, then it simply checks if it's 180 degrees, since
-    // That will just be the same as 0.
-    if (light->pos[1][0] == light->pos[1][2]) {
-        if (light->yaw % 0x4000 == 0 || light->flags & PUPPYLIGHT_SHAPE_CYLINDER) {
-            lightPos[0] = lightRelative[0];
-            lightPos[1] = lightRelative[2];
-            goto skippingTrig;
-        }
-    } else if (light->yaw % 0x8000 == 0) {
-        lightPos[0] = lightRelative[0];
-        lightPos[1] = lightRelative[2];
-        goto skippingTrig;
-    }
-
-    // Get the position based off the rotation of the box.
-    lightPos[0] = lightRelative[2] * sins(-light->yaw) + lightRelative[0] * coss(-light->yaw);
-    lightPos[1] = lightRelative[2] * coss(-light->yaw) - lightRelative[0] * sins(-light->yaw);
-    skippingTrig:
-
-#ifdef VISUAL_DEBUG
-    vec3f_set(debugPos[0], light->pos[0][0], light->pos[0][1], light->pos[0][2]);
-    vec3f_set(debugPos[1], light->pos[1][0], light->pos[1][1], light->pos[1][2]);
-    debug_box_color(0xFF00FF08);
-    if (light->flags & PUPPYLIGHT_SHAPE_CYLINDER) {
-        debug_box_rot(debugPos[0], debugPos[1], light->yaw, DEBUG_SHAPE_CYLINDER | DEBUG_UCODE_DEFAULT);
-    } else {
-        debug_box_rot(debugPos[0], debugPos[1], light->yaw, DEBUG_SHAPE_BOX | DEBUG_UCODE_DEFAULT);
-    }
-#endif
-    // Check if the object is inside the box, after correcting it for rotation.
-    if (-light->pos[1][0] < lightPos[0] && lightPos[0] < light->pos[1][0] &&
-        -light->pos[1][1] < lightRelative[1] && lightRelative[1] < light->pos[1][1] &&
-        -light->pos[1][2] < lightPos[1] && lightPos[1] < light->pos[1][2]) {
-        // If so, then start making preparations to see how alongside they're in.
-        // This takes the largest side of the box and multiplies the other axis to match the numbers.
-        // This way, the colour value will scale correctly, no matter which side is entered.
-        // Because positions are a vector, and Y is up, it means tempID needs to be multiplied
-        // By 2 in order to reach the X and Z axis. Thanks SM64.
-        // It will skip scaling the opposite axis if there's no need to.
-
-        // Every axis needs to be the same as Z, so X and Y, if necessary, will be scaled to match it.
-        // This is done, so that when calculating scale, it's done spherically.
-        if (light->pos[1][0] != light->pos[1][2]) {
-            lightPos[0] /= ((f32)light->pos[1][0] / light->pos[1][2]);
-        }
-        // Same for Y axis.
-        if (light->pos[1][1] != light->pos[1][2]) {
-            lightRelative[1] /= ((f32)light->pos[1][1] / light->pos[1][2]);
-        }
-        if (light->flags & PUPPYLIGHT_IGNORE_Y) {
-            scaleOrig = sqr(lightPos[0]) + sqr(lightPos[1]);
-        } else {
-            scaleOrig = sqr(lightPos[0]) + sqr(lightRelative[1]) + sqr(lightPos[1]);
-        }
-        scaleVal = (light->pos[1][2]*light->pos[1][2]);
-        // If it's a cylinder, then bin anything outside it.
-        if (light->flags & PUPPYLIGHT_SHAPE_CYLINDER) {
-            if (scaleOrig > scaleVal) {
-                return;
-            }
-        }
-    }
-    else
-        return;
-
-    f32 epc = (f32)(light->epicentre/100.0f);
-    tempLight = segmented_to_virtual(src);
-    //Now we have a scale value and a scale factor, we can start lighting things up.
-    // Convert to a percentage.
-    scale = CLAMP(scaleOrig/scaleVal, 0.0f, 1.0f);
-    // Reduce scale2 by the epicentre.
-    scale2 = CLAMP((scale - epc) * (1 + epc), 0.0f, 1.0f);
-
-    // Get the direction numbers we want by applying some maths to the relative positions. We use 64 because light directions range from -64 to 63.
-    // Note: can this be optimised further? Simply squaring lightRelative and then dividing it by preScale doesn't work.
-    if (light->flags & PUPPYLIGHT_DIRECTIONAL) {
-        lightDir[0] = ((lightRelative[0]) * 64.0f) / light->pos[1][0];
-        lightDir[1] = ((lightRelative[1]) * 64.0f) / light->pos[1][1];
-        lightDir[2] = ((lightRelative[2]) * 64.0f) / light->pos[1][2];
-    }
-    //Get direction if applicable.
-    for (i = 0; i < 3; i++) {
-        //So it works by starting from the final colour, and then lerping to the original colour, by a factor of the epicentre corrected scale. Light opacity affects this further.
-        colour = approach_f32_asymptotic(light->rgba[i], tempLight->l[0].l.col[i], scale2 * ((f32)light->rgba[3]/255.0f));
-        // If it's a directional light, then increase the current ambient by 50%, to give the effect better.
-        // Otherwise, just normalise the brightness to keep it in line with the current ambient.
-        // And now to apply the values.
-        tempLight->l[0].l.col[i] = colour;
-        tempLight->l[0].l.colc[i] = colour;
-        // Ambient, too.
-        if (!(light->flags & PUPPYLIGHT_DIRECTIONAL)) {
-            ambient = approach_f32_asymptotic(light->rgba[i]/2, tempLight->a.l.col[i], scale*((f32)light->rgba[3] / 255.0f));
-            tempLight->a.l.col[i] = ambient;
-            tempLight->a.l.colc[i] = ambient;
-        }
-        // A slightly hacky way to offset the ambient lighting in order to prevent directional lighting from having a noticeable change in ambient brightness.
-        if (flags & LIGHTFLAG_DIRECTIONAL_OFFSET) {
-            ambient = approach_f32_asymptotic(MIN(tempLight->a.l.col[i] * 2, 0xFF), tempLight->a.l.col[i], scale2*((f32)light->rgba[3] / 255.0f));
-            tempLight->a.l.col[i] = ambient;
-            tempLight->a.l.colc[i] = ambient;
-        }
-        // Apply direction. It takes the relative positions, and then multiplies them with the perspective matrix to get a correct direction.
-        // Index 1 of the first dimension of gMatStack is perspective. Note that if you ever decide to cheat your way into rendering things after the game does :^)
-        if (light->flags & PUPPYLIGHT_DIRECTIONAL) {
-            tempLight->l->l.dir[i] = approach_f32_asymptotic((s8)(lightDir[0] * gMatStack[1][0][i] + lightDir[1] * gMatStack[1][1][i] + lightDir[2] * gMatStack[1][2][i]), tempLight->l->l.dir[i], scale);
-        }
-    }
-}
-
-// Main function. Run this in the object you wish to illuminate, and just give it its light, object pointer and any potential flags if you want to use them.
-// If the object has multiple lights, then you run this for each light.
-void puppylights_run(Lights1 *src, struct Object *obj, s32 flags, u32 baseColour) {
-    s32 i;
-    s32 numlights = 0;
-    s32 offsetPlaced = 0;
-    s32 lightFlags = flags;
-
-    if (gCurrLevelNum < LEVEL_BBH) {
-        return;
-    }
-    // Checks if there's a hardset colour. Colours are only the first 3 bytes, so you can really put whatever you want in the last.
-    // If there isn't a colour, then it decides whether to apply the ambient lighting, or the default lighting as the baseline.
-    // Otherwise, it hardsets a colour to begin with. I don't recommend you use this, simply because it's intended to be used
-    // As a hacky quick-fix for models coloured by lights. Lightcoloured models don't blend nearly as nicely as ones coloured
-    // By other means.
-    if (baseColour < 0x100) {
-        sLightBase = (levelAmbient ? &gLevelLight : &sDefaultLights);
-    } else {
-        s32 colour;
-        sLightBase = (levelAmbient) ? &gLevelLight : &sDefaultLights;
-        for (i = 0; i < 3; i++) {
-            colour = (((baseColour >> (24-(i*8)))) & 0xFF);
-            sLightBase->l[0].l.col[i] = colour;
-            sLightBase->l[0].l.colc[i] = colour;
-            sLightBase->a.l.col[i] = colour/2;
-            sLightBase->a.l.colc[i] = colour/2;
-            sLightBase->l->l.dir[i] = 0x28;
-        }
-    }
-    memcpy(segmented_to_virtual(src), &sLightBase[0], sizeof(Lights1));
-
-    for (i = 0; i < gNumLights; i++) {
-        if (gPuppyLights[i]->rgba[3] > 0 && gPuppyLights[i]->active == TRUE && gPuppyLights[i]->area == gCurrAreaIndex && (gPuppyLights[i]->room == -1 || gPuppyLights[i]->room == gMarioCurrentRoom)) {
-            if (gPuppyLights[i]->flags & PUPPYLIGHT_DIRECTIONAL && !offsetPlaced) {
-                lightFlags |= LIGHTFLAG_DIRECTIONAL_OFFSET;
-                offsetPlaced = 1;
-            } else {
-                lightFlags &= ~LIGHTFLAG_DIRECTIONAL_OFFSET;
-            }
-            puppylights_iterate(gPuppyLights[i], src, obj, lightFlags);
-            numlights++;
-        }
-    }
-}
-
-// Sets and updates dynamic lights from objects.
-// 0xFFFF is essentially the null ID. If the display flag is met, it will find and set an ID, otherwise it frees up the spot.
-void puppylights_object_emit(struct Object *obj) {
-    s32 i;
-    if (gCurrLevelNum < LEVEL_BBH) {
-        return;
-    }
-    if (obj->oFlags & OBJ_FLAG_EMIT_LIGHT) {
-        f64 dist = ((obj->oPosX - gMarioState->pos[0]) * (obj->oPosX - gMarioState->pos[0])) +
-               ((obj->oPosY - gMarioState->pos[1]) * (obj->oPosY - gMarioState->pos[1])) +
-               ((obj->oPosZ - gMarioState->pos[2]) * (obj->oPosZ - gMarioState->pos[2]));
-        f64 lightSize = ((obj->puppylight.pos[1][0]) * (obj->puppylight.pos[1][0])) +
-                        ((obj->puppylight.pos[1][1]) * (obj->puppylight.pos[1][1])) +
-                        ((obj->puppylight.pos[1][2]) * (obj->puppylight.pos[1][2]));
-        if (dist > lightSize) {
-            goto deallocate; // That's right. I used a goto. Eat your heart out xkcd.
-        }
-        if (obj->oLightID == 0xFFFF) {
-            s32 fadingExists = FALSE;
-            if (ABS(gNumLights - gDynLightStart) < MAX_LIGHTS_DYNAMIC) {
-                goto deallocate;
-            }
-            for (i = gDynLightStart; i < MIN(gDynLightStart+MAX_LIGHTS_DYNAMIC, MAX_LIGHTS); i++) {
-                if (gPuppyLights[i]->active == TRUE) {
-                    if (gPuppyLights[i]->flags & PUPPYLIGHT_DELETE) {
-                        fadingExists = TRUE;
-                    }
-                    continue;
-                }
-                memcpy(gPuppyLights[i], &obj->puppylight, sizeof(struct PuppyLight));
-                gPuppyLights[i]->active = TRUE;
-                gPuppyLights[i]->area = gCurrAreaIndex;
-                gPuppyLights[i]->room = obj->oRoom;
-                obj->oLightID = i;
-                goto updatepos;
-            }
-            // Go through all the lights again, now this time, ignore the fading light flag and overwrite them.
-            if (fadingExists) {
-                for (i = gDynLightStart; i < MIN(gDynLightStart+MAX_LIGHTS_DYNAMIC, MAX_LIGHTS); i++) {
-                    if (gPuppyLights[i]->active == TRUE && !(gPuppyLights[i]->flags & PUPPYLIGHT_DELETE)) {
-                        continue;
-                    }
-                    memcpy(gPuppyLights[i], &obj->puppylight, sizeof(struct PuppyLight));
-                    gPuppyLights[i]->active = TRUE;
-                    gPuppyLights[i]->area = gCurrAreaIndex;
-                    gPuppyLights[i]->room = obj->oRoom;
-                    gPuppyLights[i]->flags &= ~PUPPYLIGHT_DELETE;
-                    obj->oLightID = i;
-                    goto updatepos;
-                }
-            }
-        } else {
-            updatepos:
-            gPuppyLights[obj->oLightID]->pos[0][0] = obj->oPosX;
-            gPuppyLights[obj->oLightID]->pos[0][1] = obj->oPosY;
-            gPuppyLights[obj->oLightID]->pos[0][2] = obj->oPosZ;
-        }
-    } else {
-        deallocate:
-        if (obj->oLightID != 0xFFFF) {
-            gPuppyLights[obj->oLightID]->active = FALSE;
-            gPuppyLights[obj->oLightID]->flags = 0;
-        }
-        obj->oLightID = 0xFFFF;
-    }
-}
-
-// A bit unorthodox, but anything to avoid having to set up data to pass through in the original function.
-// Objects will completely ignore X, Y, Z and active though.
-void set_light_properties(struct PuppyLight *light, s32 x, s32 y, s32 z, s32 offsetX, s32 offsetY, s32 offsetZ, s32 yaw, s32 epicentre, s32 colour, s32 flags, s32 room, s32 active) {
-    light->active = active;
-    light->pos[0][0] = x;
-    light->pos[0][1] = y;
-    light->pos[0][2] = z;
-    light->pos[1][0] = MAX(offsetX, 10);
-    light->pos[1][1] = MAX(offsetY, 10);
-    light->pos[1][2] = MAX(offsetZ, 10);
-    light->rgba[0] = (colour >> 24) & 0xFF;
-    light->rgba[1] = (colour >> 16) & 0xFF;
-    light->rgba[2] = (colour >>  8) & 0xFF;
-    light->rgba[3] = colour & 0xFF;
-    light->yaw = yaw;
-    light->area = gCurrAreaIndex;
-    light->room = room;
-    light->epicentre = epicentre;
-    if (!(flags & PUPPYLIGHT_SHAPE_CYLINDER) && flags & PUPPYLIGHT_SHAPE_CUBE)
-        light->flags |= PUPPYLIGHT_SHAPE_CYLINDER;
-    light->flags |= flags | PUPPYLIGHT_DYNAMIC;
-}
-
-// You can run these in objects to enable or disable their light properties.
-void cur_obj_enable_light(void) {
-    gCurrentObject->oFlags |= OBJ_FLAG_EMIT_LIGHT;
-}
-
-void cur_obj_disable_light(void) {
-    gCurrentObject->oFlags &= ~OBJ_FLAG_EMIT_LIGHT;
-    if (gPuppyLights[gCurrentObject->oLightID] && gCurrentObject->oLightID != 0xFFFF)
-        gPuppyLights[gCurrentObject->oLightID]->flags |= PUPPYLIGHT_DELETE;
-}
-
-void obj_enable_light(struct Object *obj) {
-    obj->oFlags |= OBJ_FLAG_EMIT_LIGHT;
-}
-
-void obj_disable_light(struct Object *obj) {
-    obj->oFlags &= ~OBJ_FLAG_EMIT_LIGHT;
-    if (gPuppyLights[obj->oLightID] && obj->oLightID != 0xFFFF) {
-        gPuppyLights[obj->oLightID]->flags |= PUPPYLIGHT_DELETE;
-    }
-}
-
-// This is ran during a standard area update
-void delete_lights(void) {
-    s32 i;
-
-    for (i = 0; i < gNumLights; i++) {
-        if (gPuppyLights[i]->active == TRUE && gPuppyLights[i]->flags & PUPPYLIGHT_DELETE) {
-            gPuppyLights[i]->pos[1][0] = approach_f32_asymptotic(gPuppyLights[i]->pos[1][0], 0, 0.15f);
-            gPuppyLights[i]->pos[1][1] = approach_f32_asymptotic(gPuppyLights[i]->pos[1][1], 0, 0.15f);
-            gPuppyLights[i]->pos[1][2] = approach_f32_asymptotic(gPuppyLights[i]->pos[1][2], 0, 0.15f);
-            if (gPuppyLights[i]->pos[1][0] < 1.0f && gPuppyLights[i]->pos[1][1] < 1.0f && gPuppyLights[i]->pos[1][2] < 1.0f) {
-                gPuppyLights[i]->flags &= ~ PUPPYLIGHT_DELETE;
-                gPuppyLights[i]->active = FALSE;
-            }
-        }
-    }
-}
-
-#endif
diff --git a/src/game/puppylights.h b/src/game/puppylights.h
deleted file mode 100644
index bd0bcbcf9..000000000
--- a/src/game/puppylights.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifdef PUPPYLIGHTS
-#ifndef PUPPYLIGHTS_H
-#define PUPPYLIGHTS_H
-
-#include "types.h"
-#include "command_macros_base.h"
-
-// The maximum number of lights that can be loaded at once. Any further lights that attempt to be created past this will simply not spawn.
-#define MAX_LIGHTS 32
-// The maximum number of dynamic lights available at one time.
-#define MAX_LIGHTS_DYNAMIC 8
-
-// Two shapes. Choose your destiny.
-#define PUPPYLIGHT_SHAPE_CUBE     (1 << 0) // 0x01
-#define PUPPYLIGHT_SHAPE_CYLINDER (1 << 1) // 0x02
-#define PUPPYLIGHT_DYNAMIC        (1 << 2) // 0x04
-#define PUPPYLIGHT_DIRECTIONAL    (1 << 3) // 0x08
-#define PUPPYLIGHT_SHADOW         (1 << 4) // 0x10
-#define PUPPYLIGHT_WET            (1 << 5) // 0x20
-#define PUPPYLIGHT_DELETE         (1 << 6) // 0x40
-#define PUPPYLIGHT_IGNORE_Y       (1 << 7) // 0x80
-
-#define LIGHTFLAG_DIRECTIONAL_OFFSET    0x1
-
-#define PUPPYLIGHT_ENVIRONMENT(ambientR, ambientG, ambientB, diffuseR, diffuseG, diffuseB, diffuseX, diffuseY, diffuseZ) \
-    CMD_BBBB(0x3F, 0x0C, ambientR, ambientG), \
-    CMD_BBBB(ambientB, diffuseR, diffuseG, diffuseB), \
-    CMD_BBBB(diffuseX, diffuseY, diffuseZ, 0x0)
-
-#define PUPPYLIGHT_NODE(r, g, b, a, x, y, z, offsetX, offsetY, offsetZ, yaw, epicentre, flags, room) \
-    CMD_BBBB(0x40, 0x18, r, g), \
-    CMD_BBH(b, a, x), \
-    CMD_HH(y, z), \
-    CMD_HH(offsetX, offsetY), \
-    CMD_HH(offsetZ, yaw), \
-    CMD_BBH(epicentre, flags, room)
-
-//How much RAM is allocated to puppylights
-#define PUPPYLIGHTS_POOL sizeof(struct PuppyLight) * MAX_LIGHTS
-
-extern Lights1 gLevelLight;
-extern u16 gNumLights;
-extern u8 levelAmbient;
-extern struct PuppyLight *gPuppyLights[MAX_LIGHTS];
-extern struct MemoryPool *gLightsPool;
-extern void puppylights_run(Lights1 *src, struct Object *obj, s32 flags, u32 baseColour);
-extern void puppylights_object_emit(struct Object *obj);
-extern void cur_obj_enable_light(void);
-extern void cur_obj_disable_light(void);
-extern void obj_enable_light(struct Object *obj);
-extern void obj_disable_light(struct Object *obj);
-extern void set_light_properties(struct PuppyLight *light, s32 x, s32 y, s32 z, s32 offsetX, s32 offsetY, s32 offsetZ, s32 yaw, s32 epicentre, s32 colour, s32 flags, s32 room, s32 active);
-extern void puppylights_allocate(void);
-extern void delete_lights(void);
-
-#endif
-#endif
diff --git a/src/game/spawn_object.c b/src/game/spawn_object.c
index 92f7beaea..c0ec32332 100644
--- a/src/game/spawn_object.c
+++ b/src/game/spawn_object.c
@@ -12,7 +12,6 @@
 #include "object_list_processor.h"
 #include "spawn_object.h"
 #include "types.h"
-#include "puppylights.h"
 
 /**
  * Attempt to allocate an object from freeList (singly linked) and append it
@@ -190,9 +189,6 @@ struct Object *allocate_object(struct ObjectNode *objList) {
     obj->header.gfx.node.flags &= ~GRAPH_RENDER_INVISIBLE;
     vec3_same(obj->header.gfx.pos, -10000.0f);
     obj->header.gfx.throwMatrix = NULL;
-#ifdef PUPPYLIGHTS
-    obj->oLightID = 0xFFFF;
-#endif
 
     return obj;
 }

From 1d7a690361057f55afa6de99cd2ea37f87f0be92 Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Sat, 30 Dec 2023 22:22:05 -0600
Subject: [PATCH 09/23] Optimize BETTER_REVERB lightweight by an additional
 ~20% CPU, while also cutting memory requirements for it by ~30% (#744)

---
 src/audio/data.c      |  6 ++---
 src/audio/synthesis.c | 59 +++++++++++++++++++++----------------------
 src/audio/synthesis.h | 16 ++++++------
 3 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/src/audio/data.c b/src/audio/data.c
index bf8f0de33..2c16194ba 100644
--- a/src/audio/data.c
+++ b/src/audio/data.c
@@ -73,7 +73,7 @@ u8 sReverbMultsArr[][NUM_ALLPASS / 3] = {
 
 /**
  * Format:
- * - useLightweightSettings (Reduce some runtime configurability options in favor of a slight speed boost during processing; Light configurability settings are found in synthesis.h)
+ * - useLightweightSettings (Reduce some runtime configurability options in favor of a significant speed boost during processing; Light configurability settings are found in synthesis.h)
  * - downsampleRate         (Higher values exponentially reduce the number of input samples to process, improving perfomance at cost of quality; number <= 0 signifies use of vanilla reverb)
  * - isMono                 (Only process reverb on the left channel and share it with the right channel, improving performance at cost of quality)
  * - filterCount            (Number of filters to process data with; in general, more filters means higher quality at the cost of performance demand; always 3 with light settings)
@@ -85,8 +85,8 @@ u8 sReverbMultsArr[][NUM_ALLPASS / 3] = {
  * 
  * - *delaysL               (Advanced parameter; array of variable audio buffer sizes / delays for each respective filter [left channel])
  * - *delaysR               (Advanced parameter; array of variable audio buffer sizes / delays for each respective filter [right channel])
- * - *reverbMultsL          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [left channel]; overridden when using light settings)
- * - *reverbMultsR          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [right channel]; overridden when using light settings)
+ * - *reverbMultsL          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [left channel]; unused when using light settings)
+ * - *reverbMultsR          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [right channel]; unused when using light settings)
  * 
  * NOTE: The first entry will always be used by default when not using the level commands to specify a preset.
  * Please reference the HackerSM64 Wiki for more descriptive documentation of these parameters and usage of BETTER_REVERB in general.
diff --git a/src/audio/synthesis.c b/src/audio/synthesis.c
index 4b866a726..435f219aa 100644
--- a/src/audio/synthesis.c
+++ b/src/audio/synthesis.c
@@ -47,11 +47,11 @@ u8 toggleBetterReverb = FALSE;
 u8 betterReverbLightweight = FALSE;
 u8 monoReverb;
 s8 betterReverbDownsampleRate;
-static s32        reverbMults[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS / 3] = {0};
-static s32         allpassIdx[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
-static s32 betterReverbDelays[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
-static s32     lastDelayLight[SYNTH_CHANNEL_STEREO_COUNT];
-static s16        **delayBufs[SYNTH_CHANNEL_STEREO_COUNT];
+static s32         reverbMults[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS / 3] = {0};
+static s32          allpassIdx[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
+static s32  betterReverbDelays[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
+static s32 historySamplesLight[SYNTH_CHANNEL_STEREO_COUNT];
+static s16         **delayBufs[SYNTH_CHANNEL_STEREO_COUNT];
 u8 *gReverbMults[SYNTH_CHANNEL_STEREO_COUNT];
 s32 reverbLastFilterIndex;
 s32 reverbFilterCount;
@@ -106,6 +106,7 @@ static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 chan
     j = 0;
 
     for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
+        // Mix the very last filter output with new incoming sample
         tmpCarryover = ((delayBufsLocal[lastFilterIndex][allpassIdxLocal[lastFilterIndex]] * revIndex) >> 8) + *downsampleBuffer;
         outSampleTotal = 0;
         i = 0;
@@ -134,7 +135,6 @@ static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 chan
     }
 }
 
-#define FILTERS_MINUS_1 (BETTER_REVERB_FILTER_COUNT_LIGHT - 1)
 static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s32 channel) {
     s16 *curDelaySample;
     s32 historySample;
@@ -144,14 +144,16 @@ static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s3
     s32 downsampleIncrement = gReverbDownsampleRate;
     s32 *delaysLocal = betterReverbDelays[channel];
     s32 *allpassIdxLocal = allpassIdx[channel];
-    s32 lastDelayLightLocal = lastDelayLight[channel];
     s16 **delayBufsLocal = delayBufs[channel];
 
-    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
-        tmpCarryover = (((delayBufsLocal[FILTERS_MINUS_1][allpassIdxLocal[FILTERS_MINUS_1]] * BETTER_REVERB_REVERB_INDEX_LIGHT) >> 8) + *downsampleBuffer);
-        i = 0;
+    // Get history sample from last processing tick
+    tmpCarryover = historySamplesLight[channel];
 
-        for (; i < FILTERS_MINUS_1; ++i) {
+    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
+        // Mix previous sample with new incoming sample
+        tmpCarryover = ((tmpCarryover * BETTER_REVERB_REVERB_INDEX_LIGHT) >> 8) + *downsampleBuffer;
+
+        for (i = 0; i < BETTER_REVERB_FILTER_COUNT_LIGHT; ++i) {
             curDelaySample = &delayBufsLocal[i][allpassIdxLocal[i]];
             historySample = *curDelaySample;
 
@@ -162,16 +164,13 @@ static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s3
             if (++allpassIdxLocal[i] == delaysLocal[i]) allpassIdxLocal[i] = 0;
         }
 
-        curDelaySample = &delayBufsLocal[FILTERS_MINUS_1][allpassIdxLocal[FILTERS_MINUS_1]];
-        historySample = ((*curDelaySample * BETTER_REVERB_MULTIPLE_LIGHT) >> 8); // outSampleTotal variable not needed, as there is no sample addition happening here. Not really a history sample though.
-        *curDelaySample = CLAMP_S16(tmpCarryover);
-
-        if (++allpassIdxLocal[FILTERS_MINUS_1] == lastDelayLightLocal) allpassIdxLocal[FILTERS_MINUS_1] = 0;
-
-        *start = CLAMP_S16(historySample);
+        // Lightweight does not use the final filter type at all, unlike standard reverb processing
+        *start = CLAMP_S16(tmpCarryover);
     }
+    
+    // Copy history sample to temporary buffer for processing next tick
+    historySamplesLight[channel] = tmpCarryover;
 }
-#undef FILTERS_MINUS_1
 
 void initialize_better_reverb_buffers(void) {
     delayBufs[SYNTH_CHANNEL_LEFT] = (s16**) soundAlloc(&gBetterReverbPool, BETTER_REVERB_PTR_SIZE);
@@ -180,8 +179,11 @@ void initialize_better_reverb_buffers(void) {
 
 void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {
     s32 bufOffset = 0;
-    s32 i;
     s32 filterCount = reverbFilterCount;
+    u32 *inputDelayPtrs[SYNTH_CHANNEL_STEREO_COUNT] = {
+        [SYNTH_CHANNEL_LEFT]  = inputDelaysL,
+        [SYNTH_CHANNEL_RIGHT] = inputDelaysR,
+    };
 
     if (betterReverbLightweight)
         filterCount = BETTER_REVERB_FILTER_COUNT_LIGHT;
@@ -194,20 +196,17 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {
 
     // NOTE: Using filterCount over NUM_ALLPASS will report less memory usage with fewer filters, but poses an additional
     // risk to anybody testing on console with performance compromises, as emulator can be easily overlooked.
-    for (i = 0; i < filterCount; ++i) {
-        betterReverbDelays[SYNTH_CHANNEL_LEFT][i] = (s32) (inputDelaysL[i] / gReverbDownsampleRate);
-        betterReverbDelays[SYNTH_CHANNEL_RIGHT][i] = (s32) (inputDelaysR[i] / gReverbDownsampleRate);
-        delayBufs[SYNTH_CHANNEL_LEFT][i] = soundAlloc(&gBetterReverbPool, betterReverbDelays[SYNTH_CHANNEL_LEFT][i] * sizeof(s16));
-        bufOffset += betterReverbDelays[SYNTH_CHANNEL_LEFT][i];
-        delayBufs[SYNTH_CHANNEL_RIGHT][i] = soundAlloc(&gBetterReverbPool, betterReverbDelays[SYNTH_CHANNEL_RIGHT][i] * sizeof(s16));
-        bufOffset += betterReverbDelays[SYNTH_CHANNEL_RIGHT][i];
+    for (s32 channel = 0; channel < SYNTH_CHANNEL_STEREO_COUNT; channel++) {
+        historySamplesLight[channel] = 0;
+        for (s32 filter = 0; filter < filterCount; filter++) {
+            betterReverbDelays[channel][filter] = (s32) (inputDelayPtrs[channel][filter] / gReverbDownsampleRate);
+            delayBufs[channel][filter] = soundAlloc(&gBetterReverbPool, betterReverbDelays[channel][filter] * sizeof(s16));
+            bufOffset += betterReverbDelays[channel][filter];
+        }
     }
 
     aggress(bufOffset * sizeof(s16) <= BETTER_REVERB_SIZE - BETTER_REVERB_PTR_SIZE, "BETTER_REVERB_SIZE is too small for this preset!");
 
-    lastDelayLight[SYNTH_CHANNEL_LEFT] = betterReverbDelays[SYNTH_CHANNEL_LEFT][filterCount-1];
-    lastDelayLight[SYNTH_CHANNEL_RIGHT] = betterReverbDelays[SYNTH_CHANNEL_RIGHT][filterCount-1];
-
     bzero(allpassIdx, sizeof(allpassIdx));
 }
 #endif
diff --git a/src/audio/synthesis.h b/src/audio/synthesis.h
index 5c4050267..f66267b76 100644
--- a/src/audio/synthesis.h
+++ b/src/audio/synthesis.h
@@ -42,13 +42,13 @@ enum ChannelIndexes {
 
 /* ------ BETTER REVERB LIGHTWEIGHT PARAMETER OVERRIDES ------ */
 
-// Filter count works differently than normal when used with light settings and can support numbers that are not multiples of 3, though 3 is generally recommended.
-// This can be reduced to 2 to save a third of runtime overhead, but substantially reduces reverb saturation.
-// Similarly this can be increased from 3, but likely won't have beneficial outcomes worth the runtime expense compared to the modification of other parameters without using light settings.
-#define BETTER_REVERB_FILTER_COUNT_LIGHT 3
-#define BETTER_REVERB_GAIN_INDEX_LIGHT 0xA0 // Advanced parameter; used to tune the outputs of every filter except for the final one
-#define BETTER_REVERB_REVERB_INDEX_LIGHT 0x30 // Advanced parameter; used to tune the incoming output of the final filter
-#define BETTER_REVERB_MULTIPLE_LIGHT 0xD0 // Advanced parameter; multiplier applied to the final output signal for both the left and right channels (divided by 256)
+// Filter count works differently than normal when used with light settings and can support numbers that are not multiples of 3.
+// A value of 2 is generally recommended for most similar behavior to non-lightweight reverb.
+// This can be reduced to 1 to save additional runtime overhead, but will reduce some reverb saturation as consequence.
+// Similarly this can be increased from 2, but likely won't have beneficial outcomes worth the runtime expense compared to the modification of other parameters without using light settings.
+#define BETTER_REVERB_FILTER_COUNT_LIGHT 2
+#define BETTER_REVERB_GAIN_INDEX_LIGHT 0xA0 // Advanced parameter; used to tune the outputs of every filter except for the final one (multiples of 0x10 will compile more efficiently)
+#define BETTER_REVERB_REVERB_INDEX_LIGHT 0x30 // Advanced parameter; used to tune the reuse of the previously processed output sample (multiples of 0x10 will compile more efficiently)
 
 
 /* ------------ BETTER REVERB EXTERNED VARIABLES ------------ */
@@ -74,7 +74,7 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR);
 /* -------------- BETTER REVERB STATIC ASSERTS -------------- */
 
 STATIC_ASSERT(NUM_ALLPASS % 3 == 0, "NUM_ALLPASS must be a multiple of 3!");
-STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT >= 2, "BETTER_REVERB_FILTER_COUNT_LIGHT should be no less than 2!");
+STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT > 0, "BETTER_REVERB_FILTER_COUNT_LIGHT must be greater than 0!");
 STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT <= NUM_ALLPASS, "BETTER_REVERB_FILTER_COUNT_LIGHT cannot be larger than NUM_ALLPASS!");
 
 #else

From bf924a8ec32422cb83eeb3f4678bdf559e085b1b Mon Sep 17 00:00:00 2001
From: DNVIC <37513362+DNVIC@users.noreply.github.com>
Date: Sun, 21 Jan 2024 14:11:32 -0500
Subject: [PATCH 10/23] Added a define for slope fix (#749)

* implemented a form of slope fix

* fixed the slope fix to not allow for hyper cheese

* implemented gheskett's suggestions

* Added space between comment and define

Co-authored-by: Gregory Heskett <gheskett@gmail.com>

---------

Co-authored-by: dnvic <business@dnvic.com>
Co-authored-by: Gregory Heskett <gheskett@gmail.com>
---
 include/config/config_movement.h |  5 +++++
 src/game/mario_actions_moving.c  | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/config/config_movement.h b/include/config/config_movement.h
index 882ae3b97..7034d1d86 100644
--- a/include/config/config_movement.h
+++ b/include/config/config_movement.h
@@ -113,6 +113,11 @@
  */
 #define DONT_LEDGE_GRAB_STEEP_SLOPES
 
+/**
+ * Buffers an A input if you jump off a slope during the landing lag
+ */
+// #define SLOPE_BUFFER
+
 /**
  * Disables BLJs and crushes SimpleFlips's dreams.
  */
diff --git a/src/game/mario_actions_moving.c b/src/game/mario_actions_moving.c
index 2c72df9de..0f9c11e07 100644
--- a/src/game/mario_actions_moving.c
+++ b/src/game/mario_actions_moving.c
@@ -1402,6 +1402,20 @@ void common_slide_action(struct MarioState *m, u32 endAction, u32 airAction, s32
 
 s32 common_slide_action_with_jump(struct MarioState *m, u32 stopAction, u32 jumpAction, u32 airAction,
                                   s32 animation) {
+#ifdef SLOPE_BUFFER
+    if (m->input & INPUT_A_PRESSED) {
+        m->actionState = 1;
+    } else if (!(m->input & INPUT_A_DOWN)) {
+        m->actionState = 0;
+    }
+    if (m->actionTimer == 5) {
+        if (m->actionState == 1) {
+            return set_jumping_action(m, jumpAction, 0);
+        }
+    } else {
+        m->actionTimer++;
+    }
+#else
     if (m->actionTimer == 5) {
         if (m->input & INPUT_A_PRESSED) {
             return set_jumping_action(m, jumpAction, 0);
@@ -1409,6 +1423,7 @@ s32 common_slide_action_with_jump(struct MarioState *m, u32 stopAction, u32 jump
     } else {
         m->actionTimer++;
     }
+#endif
 
     if (update_sliding(m, 4.0f)) {
         return set_mario_action(m, stopAction, 0);

From b9ba17a4d6d52c3e0174117f9a6dc06c0a848146 Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Fri, 2 Feb 2024 22:08:34 -0500
Subject: [PATCH 11/23] Add libpl submodule (#753)

* Add libpl submodule

* Clone libpl submodule automatically while building (if enabled)
---
 .gitmodules                   |  3 +++
 .vscode/c_cpp_properties.json |  1 +
 Makefile                      | 24 ++++++++++++++++++++++++
 lib/libpl                     |  1 +
 sm64.ld                       | 12 ++++++++++++
 src/game/emutest.c            |  9 ++++++++-
 src/game/emutest.h            |  5 +++++
 7 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 .gitmodules
 create mode 160000 lib/libpl

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..9fd99fb91
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "lib/libpl"]
+	path = lib/libpl
+	url = https://gitlab.com/parallel-launcher/libpl.git
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
index 3ff5fdaef..226d6bef2 100644
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@@ -27,6 +27,7 @@
                 "F3DEX_GBI_2=1",
                 "F3DZEX_NON_GBI_2=1",
                 "F3DEX_GBI_SHARED=1"
+                "LIBPL=1",
             ],
             "compilerPath": "/usr/bin/mips-linux-gnu-gcc",
             "cStandard": "gnu17",
diff --git a/Makefile b/Makefile
index 35fb2a12d..5d7791592 100644
--- a/Makefile
+++ b/Makefile
@@ -253,6 +253,18 @@ ifeq ($(HVQM),1)
   SRC_DIRS += src/hvqm
 endif
 
+# LIBPL - whether to include libpl library for interfacing with Parallel Launcher
+# (library will be pulled into repo after building with this enabled for the first time)
+#   1 - includes code in ROM
+#   0 - does not
+LIBPL ?= 0
+LIBPL_DIR := lib/libpl
+$(eval $(call validate-option,LIBPL,0 1))
+ifeq ($(LIBPL),1)
+  DEFINES += LIBPL=1
+  SRC_DIRS += $(LIBPL_DIR)
+endif
+
 BUILD_DIR_BASE := build
 # BUILD_DIR is the location where all build artifacts are placed
 BUILD_DIR      := $(BUILD_DIR_BASE)/$(VERSION)_$(CONSOLE)
@@ -335,6 +347,18 @@ ifeq ($(filter clean distclean print-%,$(MAKECMDGOALS)),)
     ifeq ($(DUMMY),FAIL)
       $(error Failed to build tools)
     endif
+
+  # Clone any needed submodules
+  ifeq ($(LIBPL),1)
+    ifeq ($(wildcard $(LIBPL_DIR)),)
+      $(info Cloning libpl submodule...)
+      DUMMY != git submodule update --init $(LIBPL_DIR) > /dev/null || echo FAIL
+      ifeq ($(DUMMY),FAIL)
+        $(error Failed to clone libpl submodule)
+      endif
+    endif
+  endif
+
   $(info Building ROM...)
 
 endif
diff --git a/lib/libpl b/lib/libpl
new file mode 160000
index 000000000..d6b3a90d0
--- /dev/null
+++ b/lib/libpl
@@ -0,0 +1 @@
+Subproject commit d6b3a90d096183df54d2c69a4d64e0ced8dda811
diff --git a/sm64.ld b/sm64.ld
index ebb6af13e..186b4d4e3 100755
--- a/sm64.ld
+++ b/sm64.ld
@@ -171,6 +171,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.text*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.text*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.text*);
 #endif
       */ULTRALIB.a:*.o(.text*);
       */libnustd.a:*.o(.text*);
@@ -191,6 +194,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.*data*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.*data*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.*data*);
 #endif
       */ULTRALIB.a:*.o(.data*);
       */libhvqm2.a:*.o(.data*);
@@ -207,6 +213,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.rodata*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.rodata*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.rodata*);
 #endif
       */ULTRALIB.a:*.o(.rodata*);
       */libgcc.a:*.o(.rodata*);
@@ -224,6 +233,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.*bss*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.*bss*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.*bss*);
 #endif
       */ULTRALIB.a:*.o(COMMON);
       */ULTRALIB.a:*.o(.scommon);
diff --git a/src/game/emutest.c b/src/game/emutest.c
index ef2b912c9..872e51de4 100644
--- a/src/game/emutest.c
+++ b/src/game/emutest.c
@@ -9,7 +9,10 @@
 #include <string.h>
 #include "emutest_vc.h"
 #include "float.h"
-#include "types.h"
+
+#ifdef LIBPL
+#include "lib/libpl/libpl-emu.h"
+#endif
 
 extern OSMesgQueue gSIEventMesgQueue;
 extern u8 __osContPifRam[];
@@ -20,6 +23,7 @@ extern void __osPiGetAccess(void);
 extern void __osPiRelAccess(void);
 
 enum Emulator gEmulator = EMU_CONSOLE;
+u8 gSupportsLibpl = FALSE;
 
 u32 pj64_get_count_factor_asm(void); // defined in asm/pj64_get_count_factor_asm.s
 u32 emux_detect(void); // defined in asm/emux.s
@@ -140,6 +144,9 @@ void detect_emulator() {
             if (magic == 0x00500000u) {
                 // libpl is supported. Must be ParallelN64
                 gEmulator = EMU_PARALLELN64;
+#ifdef LIBPL
+                gSupportsLibpl = libpl_is_supported(LPL_ABI_VERSION_CURRENT);
+#endif
                 return;
             }
             
diff --git a/src/game/emutest.h b/src/game/emutest.h
index 8ffb2f5d9..7afd57d49 100644
--- a/src/game/emutest.h
+++ b/src/game/emutest.h
@@ -1,6 +1,8 @@
 #ifndef EMUTEST_H
 #define EMUTEST_H
 
+#include "types.h"
+
 enum Emulator {
     EMU_WIIVC = 0x0001,
     EMU_PROJECT64_ANY = 0x001E,
@@ -37,6 +39,9 @@ extern void detect_emulator();
  */
 extern enum Emulator gEmulator;
 
+// determines whether libpl is safe to use
+extern u8 gSupportsLibpl;
+
 // Included for backwards compatibility when upgrading from HackerSM64 2.0
 #define gIsConsole ((gEmulator & EMU_CONSOLE) != 0)
 

From 3a5a2f14dd07d371fe6d2e2b84204082adc7da82 Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Fri, 2 Feb 2024 22:27:48 -0500
Subject: [PATCH 12/23] Missed a comma

---
 .vscode/c_cpp_properties.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
index 226d6bef2..3de36d2bb 100644
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@@ -26,8 +26,8 @@
                 "VERSION_US=1",
                 "F3DEX_GBI_2=1",
                 "F3DZEX_NON_GBI_2=1",
-                "F3DEX_GBI_SHARED=1"
-                "LIBPL=1",
+                "F3DEX_GBI_SHARED=1",
+                "LIBPL=1"
             ],
             "compilerPath": "/usr/bin/mips-linux-gnu-gcc",
             "cStandard": "gnu17",

From 88c2cdb0a0f5a85ba46c47cf0ade96fb0fb2acce Mon Sep 17 00:00:00 2001
From: YoshiMilkman <77122601+YoshiMilkman@users.noreply.github.com>
Date: Fri, 2 Feb 2024 22:52:10 -0500
Subject: [PATCH 13/23] Fix wall sidling with negative speed (#762)

---
 src/game/mario_actions_moving.c | 55 ++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/src/game/mario_actions_moving.c b/src/game/mario_actions_moving.c
index 0f9c11e07..5f6fb46e6 100644
--- a/src/game/mario_actions_moving.c
+++ b/src/game/mario_actions_moving.c
@@ -678,35 +678,34 @@ void push_or_sidle_wall(struct MarioState *m, Vec3f startPos) {
     if (m->forwardVel > 6.0f) {
         mario_set_forward_vel(m, 6.0f);
     }
-
-    if (m->forwardVel > 0.0f) {
-        if (m->wall != NULL) {
-            wallAngle = m->wallYaw;
-            dWallAngle = wallAngle - m->faceAngle[1];
-        }
-
-        if (m->wall == NULL || dWallAngle <= -DEGREES(160) || dWallAngle >= DEGREES(160)) {
-            m->flags |= MARIO_PUSHING;
-            set_mario_animation(m, MARIO_ANIM_PUSHING);
-            play_step_sound(m, 6, 18);
-        } else {
-            if (dWallAngle < 0) {
-                set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_RIGHT, animSpeed);
-            } else {
-                set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_LEFT, animSpeed);
-            }
-
-            if (m->marioObj->header.gfx.animInfo.animFrame < 20) {
-                play_sound((SOUND_MOVING_TERRAIN_SLIDE + m->terrainSoundAddend), m->marioObj->header.gfx.cameraToObject);
-                m->particleFlags |= PARTICLE_DUST;
-            }
-
-            m->actionState = ACT_STATE_PUSH_OR_SIDLE_WALL_SIDLING;
-            m->actionArg = wallAngle + 0x8000;
-            m->marioObj->header.gfx.angle[1] = wallAngle + 0x8000;
-            m->marioObj->header.gfx.angle[2] = find_floor_slope(m, 0x4000);
-        }
+    
+    if (m->wall != NULL) {
+        wallAngle = m->wallYaw;
+        dWallAngle = wallAngle - m->faceAngle[1];
     }
+
+    if (m->wall == NULL || dWallAngle <= -DEGREES(160) || dWallAngle >= DEGREES(160)) {
+        m->flags |= MARIO_PUSHING;
+        set_mario_animation(m, MARIO_ANIM_PUSHING);
+        play_step_sound(m, 6, 18);
+    } else {
+        if (dWallAngle < 0) {
+            set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_RIGHT, animSpeed);
+        } else {
+            set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_LEFT, animSpeed);
+        }
+
+        if (m->marioObj->header.gfx.animInfo.animFrame < 20) {
+            play_sound((SOUND_MOVING_TERRAIN_SLIDE + m->terrainSoundAddend), m->marioObj->header.gfx.cameraToObject);
+            m->particleFlags |= PARTICLE_DUST;
+        }
+
+        m->actionState = ACT_STATE_PUSH_OR_SIDLE_WALL_SIDLING;
+        m->actionArg = wallAngle + 0x8000;
+        m->marioObj->header.gfx.angle[1] = wallAngle + 0x8000;
+        m->marioObj->header.gfx.angle[2] = find_floor_slope(m, 0x4000);
+    }
+    
 }
 
 void tilt_body_walking(struct MarioState *m, s16 startYaw) {

From 1de4b9144b83cae16bd6261372157fe194092b8f Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Sun, 4 Feb 2024 02:29:38 -0500
Subject: [PATCH 14/23] Fix floating point crash with pitch bend table +
 various audio optimizations (#746)

* Move audio TTLs to improve dcache

Saves over 100us even when audio is idle

* Improve additional audio runtime by reducing some unnecessary float math

* Pause channel/layer processing on sfx channels when idle

* Fix possible floating point UB/crash with gPitchBendFrequencyScale
---
 include/seq_macros.inc            | 16 +++---
 sound/sequences/00_sound_player.s | 38 +++++--------
 src/audio/data.c                  | 63 ++++++++++-----------
 src/audio/data.h                  |  4 --
 src/audio/effects.c               | 39 ++++++-------
 src/audio/external.c              |  1 +
 src/audio/heap.c                  |  2 +-
 src/audio/load.c                  | 91 +++++++++----------------------
 src/audio/load.h                  |  2 +-
 src/audio/load_sh.c               |  2 +-
 src/audio/seqplayer.c             | 26 ++++-----
 src/audio/synthesis.c             |  6 +-
 12 files changed, 113 insertions(+), 177 deletions(-)

diff --git a/include/seq_macros.inc b/include/seq_macros.inc
index 1d12e1ab5..a551cc736 100644
--- a/include/seq_macros.inc
+++ b/include/seq_macros.inc
@@ -475,8 +475,8 @@
     .byte \a
 .endm
 
-.macro chan_testlayerfinished a
-    .byte 0x80 + \a
+.macro chan_testlayersfinished
+    .byte 0x80
 .endm
 
 .macro chan_setlayer a, b
@@ -488,14 +488,14 @@
     .byte 0x60 + \a
 .endm
 
-.macro chan_freelayer a
-    .byte 0x90 + \a
+.macro chan_freelayers
+    .byte 0x90
 .endm
 
 #else
 
-.macro chan_testlayerfinished a
-    .byte 0x0 + \a
+.macro chan_testlayersfinished
+    .byte 0x00
 .endm
 
 .macro chan_ioreadval a
@@ -507,8 +507,8 @@
     .byte (\b - sequence_start) >> 8, (\b - sequence_start) & 0xff
 .endm
 
-.macro chan_freelayer a
-    .byte 0xa0 + \a
+.macro chan_freelayers
+    .byte 0xa0
 .endm
 
 #ifdef VERSION_EU
diff --git a/sound/sequences/00_sound_player.s b/sound/sequences/00_sound_player.s
index 1a7111618..464bc0d6e 100644
--- a/sound/sequences/00_sound_player.s
+++ b/sound/sequences/00_sound_player.s
@@ -73,13 +73,11 @@ chan_jump .main_loop_023589
 
 // Main loop for standard, non-continuous sound effects
 .main_loop_023589:
-chan_delay1
+chan_hang
 chan_ioreadval 0
 chan_bltz .main_loop_023589
 .start_playing_023589:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_setval 0
 chan_iowriteval 5
 chan_ioreadval 4
@@ -93,13 +91,11 @@ chan_bltz .skip_023589 // if we have a signal:
   chan_beqz .force_stop_023589 // told to stop
   chan_jump .start_playing_023589 // told to play something else
 .skip_023589:
-chan_testlayerfinished 0
-chan_beqz .poll_023589 // if layer 0 hasn't finished, keep polling
+chan_testlayersfinished
+chan_beqz .poll_023589 // if all layers haven't finished, keep polling
 chan_jump .main_loop_023589 // otherwise go back to the main loop
 .force_stop_023589:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_jump .main_loop_023589
 
 .channel1:
@@ -140,13 +136,11 @@ chan_jump .main_loop_146
 
 // Main loop for moving, env and air sound effects, which play continuously
 .main_loop_146:
-chan_delay1
+chan_hang
 chan_ioreadval 0
 chan_bltz .main_loop_146
 .start_playing_146:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_setvolscale 127
 chan_setval 0
 chan_iowriteval 5
@@ -161,9 +155,7 @@ chan_bltz .poll_146
 chan_beqz .force_stop_146
 chan_jump .start_playing_146
 .force_stop_146:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_jump .main_loop_146
 
 .channel7:
@@ -177,13 +169,11 @@ chan_setdyntable .channel7_table
 
 // Loop for menu sound effects
 .main_loop_7:
-chan_delay1
+chan_hang
 chan_ioreadval 0
 chan_bltz .main_loop_7
 .start_playing_7:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_setval 0
 chan_iowriteval 5
 chan_setreverb 0
@@ -201,14 +191,12 @@ chan_bltz .skip_7 // if we have a signal:
   chan_unreservenotes
   chan_jump .start_playing_7 // told to play something else
 .skip_7:
-chan_testlayerfinished 0
-chan_beqz .poll_7 // if layer 0 hasn't finished, keep polling
+chan_testlayersfinished
+chan_beqz .poll_7 // if all layers haven't finished, keep polling
 chan_unreservenotes
 chan_jump .main_loop_7 // otherwise go back to the main loop
 .force_stop_7:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_unreservenotes
 chan_jump .main_loop_7
 
diff --git a/src/audio/data.c b/src/audio/data.c
index 2c16194ba..28332c845 100644
--- a/src/audio/data.c
+++ b/src/audio/data.c
@@ -244,41 +244,36 @@ u16 gAudioCosineTable[128] = {
 // between -1 and +1 octave.
 // gPitchBendFrequencyScale[k] = (0.5 * 2^(k/127))
 #ifndef VERSION_SH
-#if defined(VERSION_EU)
 f32 gPitchBendFrequencyScale[256] = {
-    0.5f,
-#else
-f32 gPitchBendFrequencyScale[255] = {
-#endif
-    0.500000f, 0.502736f, 0.505488f, 0.508254f, 0.511036f, 0.513833f, 0.516645f, 0.519472f, 0.522315f,
-    0.525174f, 0.528048f, 0.530938f, 0.533843f, 0.536765f, 0.539702f, 0.542656f, 0.545626f, 0.548612f,
-    0.551614f, 0.554633f, 0.557669f, 0.560721f, 0.563789f, 0.566875f, 0.569977f, 0.573097f, 0.576233f,
-    0.579387f, 0.582558f, 0.585746f, 0.588951f, 0.592175f, 0.595415f, 0.598674f, 0.601950f, 0.605245f,
-    0.608557f, 0.611888f, 0.615236f, 0.618603f, 0.621989f, 0.625393f, 0.628815f, 0.632257f, 0.635717f,
-    0.639196f, 0.642694f, 0.646212f, 0.649748f, 0.653304f, 0.656880f, 0.660475f, 0.664089f, 0.667724f,
-    0.671378f, 0.675052f, 0.678747f, 0.682461f, 0.686196f, 0.689952f, 0.693727f, 0.697524f, 0.701341f,
-    0.705180f, 0.709039f, 0.712919f, 0.716821f, 0.720744f, 0.724689f, 0.728655f, 0.732642f, 0.736652f,
-    0.740684f, 0.744737f, 0.748813f, 0.752911f, 0.757031f, 0.761175f, 0.765340f, 0.769529f, 0.773740f,
-    0.777975f, 0.782232f, 0.786513f, 0.790818f, 0.795146f, 0.799497f, 0.803873f, 0.808272f, 0.812696f,
-    0.817144f, 0.821616f, 0.826112f, 0.830633f, 0.835179f, 0.839750f, 0.844346f, 0.848966f, 0.853613f,
-    0.858284f, 0.862982f, 0.867704f, 0.872453f, 0.877228f, 0.882029f, 0.886856f, 0.891709f, 0.896590f,
-    0.901496f, 0.906430f, 0.911391f, 0.916379f, 0.921394f, 0.926436f, 0.931507f, 0.936604f, 0.941730f,
-    0.946884f, 0.952066f, 0.957277f, 0.962516f, 0.967783f, 0.973080f, 0.978405f, 0.983760f, 0.989144f,
-    0.994557f, 1.000000f, 1.005473f, 1.010975f, 1.016508f, 1.022071f, 1.027665f, 1.033289f, 1.038944f,
-    1.044630f, 1.050347f, 1.056095f, 1.061875f, 1.067687f, 1.073530f, 1.079405f, 1.085312f, 1.091252f,
-    1.097224f, 1.103229f, 1.109267f, 1.115337f, 1.121441f, 1.127579f, 1.133750f, 1.139955f, 1.146193f,
-    1.152466f, 1.158773f, 1.165115f, 1.171491f, 1.177903f, 1.184349f, 1.190831f, 1.197348f, 1.203901f,
-    1.210489f, 1.217114f, 1.223775f, 1.230473f, 1.237207f, 1.243978f, 1.250786f, 1.257631f, 1.264514f,
-    1.271434f, 1.278392f, 1.285389f, 1.292423f, 1.299497f, 1.306608f, 1.313759f, 1.320949f, 1.328178f,
-    1.335447f, 1.342756f, 1.350104f, 1.357493f, 1.364922f, 1.372392f, 1.379903f, 1.387455f, 1.395048f,
-    1.402683f, 1.410360f, 1.418078f, 1.425839f, 1.433642f, 1.441488f, 1.449377f, 1.457309f, 1.465285f,
-    1.473304f, 1.481367f, 1.489474f, 1.497626f, 1.505822f, 1.514063f, 1.522349f, 1.530681f, 1.539058f,
-    1.547481f, 1.555950f, 1.564465f, 1.573027f, 1.581636f, 1.590292f, 1.598995f, 1.607746f, 1.616545f,
-    1.625392f, 1.634287f, 1.643231f, 1.652224f, 1.661266f, 1.670358f, 1.679500f, 1.688691f, 1.697933f,
-    1.707225f, 1.716569f, 1.725963f, 1.735409f, 1.744906f, 1.754456f, 1.764058f, 1.773712f, 1.783419f,
-    1.793179f, 1.802993f, 1.812860f, 1.822782f, 1.832757f, 1.842788f, 1.852873f, 1.863013f, 1.873209f,
-    1.883461f, 1.893768f, 1.904132f, 1.914553f, 1.925031f, 1.935567f, 1.946159f, 1.956810f, 1.967520f,
-    1.978287f, 1.989114f, 2.000000f
+    0.500000f, 0.500000f, 0.502736f, 0.505488f, 0.508254f, 0.511036f, 0.513833f, 0.516645f, 0.519472f,
+    0.522315f, 0.525174f, 0.528048f, 0.530938f, 0.533843f, 0.536765f, 0.539702f, 0.542656f, 0.545626f,
+    0.548612f, 0.551614f, 0.554633f, 0.557669f, 0.560721f, 0.563789f, 0.566875f, 0.569977f, 0.573097f,
+    0.576233f, 0.579387f, 0.582558f, 0.585746f, 0.588951f, 0.592175f, 0.595415f, 0.598674f, 0.601950f,
+    0.605245f, 0.608557f, 0.611888f, 0.615236f, 0.618603f, 0.621989f, 0.625393f, 0.628815f, 0.632257f,
+    0.635717f, 0.639196f, 0.642694f, 0.646212f, 0.649748f, 0.653304f, 0.656880f, 0.660475f, 0.664089f,
+    0.667724f, 0.671378f, 0.675052f, 0.678747f, 0.682461f, 0.686196f, 0.689952f, 0.693727f, 0.697524f,
+    0.701341f, 0.705180f, 0.709039f, 0.712919f, 0.716821f, 0.720744f, 0.724689f, 0.728655f, 0.732642f,
+    0.736652f, 0.740684f, 0.744737f, 0.748813f, 0.752911f, 0.757031f, 0.761175f, 0.765340f, 0.769529f,
+    0.773740f, 0.777975f, 0.782232f, 0.786513f, 0.790818f, 0.795146f, 0.799497f, 0.803873f, 0.808272f,
+    0.812696f, 0.817144f, 0.821616f, 0.826112f, 0.830633f, 0.835179f, 0.839750f, 0.844346f, 0.848966f,
+    0.853613f, 0.858284f, 0.862982f, 0.867704f, 0.872453f, 0.877228f, 0.882029f, 0.886856f, 0.891709f,
+    0.896590f, 0.901496f, 0.906430f, 0.911391f, 0.916379f, 0.921394f, 0.926436f, 0.931507f, 0.936604f,
+    0.941730f, 0.946884f, 0.952066f, 0.957277f, 0.962516f, 0.967783f, 0.973080f, 0.978405f, 0.983760f,
+    0.989144f, 0.994557f, 1.000000f, 1.005473f, 1.010975f, 1.016508f, 1.022071f, 1.027665f, 1.033289f,
+    1.038944f, 1.044630f, 1.050347f, 1.056095f, 1.061875f, 1.067687f, 1.073530f, 1.079405f, 1.085312f,
+    1.091252f, 1.097224f, 1.103229f, 1.109267f, 1.115337f, 1.121441f, 1.127579f, 1.133750f, 1.139955f,
+    1.146193f, 1.152466f, 1.158773f, 1.165115f, 1.171491f, 1.177903f, 1.184349f, 1.190831f, 1.197348f,
+    1.203901f, 1.210489f, 1.217114f, 1.223775f, 1.230473f, 1.237207f, 1.243978f, 1.250786f, 1.257631f,
+    1.264514f, 1.271434f, 1.278392f, 1.285389f, 1.292423f, 1.299497f, 1.306608f, 1.313759f, 1.320949f,
+    1.328178f, 1.335447f, 1.342756f, 1.350104f, 1.357493f, 1.364922f, 1.372392f, 1.379903f, 1.387455f,
+    1.395048f, 1.402683f, 1.410360f, 1.418078f, 1.425839f, 1.433642f, 1.441488f, 1.449377f, 1.457309f,
+    1.465285f, 1.473304f, 1.481367f, 1.489474f, 1.497626f, 1.505822f, 1.514063f, 1.522349f, 1.530681f,
+    1.539058f, 1.547481f, 1.555950f, 1.564465f, 1.573027f, 1.581636f, 1.590292f, 1.598995f, 1.607746f,
+    1.616545f, 1.625392f, 1.634287f, 1.643231f, 1.652224f, 1.661266f, 1.670358f, 1.679500f, 1.688691f,
+    1.697933f, 1.707225f, 1.716569f, 1.725963f, 1.735409f, 1.744906f, 1.754456f, 1.764058f, 1.773712f,
+    1.783419f, 1.793179f, 1.802993f, 1.812860f, 1.822782f, 1.832757f, 1.842788f, 1.852873f, 1.863013f,
+    1.873209f, 1.883461f, 1.893768f, 1.904132f, 1.914553f, 1.925031f, 1.935567f, 1.946159f, 1.956810f,
+    1.967520f, 1.978287f, 1.989114f, 2.000000f
 };
 
 // Frequencies for notes using the standard twelve-tone equal temperament scale.
diff --git a/src/audio/data.h b/src/audio/data.h
index acca13717..b4706868d 100644
--- a/src/audio/data.h
+++ b/src/audio/data.h
@@ -65,11 +65,7 @@ extern u8 gReverbMultsArrCount;
 #endif // PUPPYPRINT_DEBUG
 #endif // BETTER_REVERB
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
 extern f32 gPitchBendFrequencyScale[256];
-#else
-extern f32 gPitchBendFrequencyScale[255];
-#endif
 extern f32 gNoteFrequencies[128];
 
 extern u8 gDefaultShortNoteVelocityTable[16];
diff --git a/src/audio/effects.c b/src/audio/effects.c
index ec2abc25d..4fd6dc15f 100644
--- a/src/audio/effects.c
+++ b/src/audio/effects.c
@@ -55,19 +55,26 @@ void sequence_channel_process_sound(struct SequenceChannel *seqChannel, s32 reca
 }
 #else
 static void sequence_channel_process_sound(struct SequenceChannel *seqChannel) {
-    s32 i;
+    s32 hasProcessedChannel = FALSE;
+    f32 channelVolume;
+    f32 panFromChannel;
+    f32 panLayerWeight;
 
-    f32 channelVolume = seqChannel->volume * seqChannel->volumeScale * seqChannel->seqPlayer->fadeVolume;
-    if (seqChannel->seqPlayer->muted && (seqChannel->muteBehavior & MUTE_BEHAVIOR_SOFTEN) != 0) {
-        channelVolume *= seqChannel->seqPlayer->muteVolumeScale;
-    }
-
-    f32 panFromChannel = seqChannel->pan * seqChannel->panChannelWeight;
-    f32 panLayerWeight = 1.0f - seqChannel->panChannelWeight;
-
-    for (i = 0; i < 4; i++) {
+    for (s32 i = 0; i < LAYERS_MAX; i++) {
         struct SequenceChannelLayer *layer = seqChannel->layers[i];
         if (layer != NULL && layer->enabled && layer->note != NULL) {
+            if (!hasProcessedChannel) {
+                hasProcessedChannel = TRUE;
+
+                channelVolume = seqChannel->volume * seqChannel->volumeScale * seqChannel->seqPlayer->fadeVolume;
+                if (seqChannel->seqPlayer->muted && (seqChannel->muteBehavior & MUTE_BEHAVIOR_SOFTEN) != 0) {
+                    channelVolume *= seqChannel->seqPlayer->muteVolumeScale;
+                }
+
+                panFromChannel = seqChannel->pan * seqChannel->panChannelWeight;
+                panLayerWeight = 1.0f - seqChannel->panChannelWeight;
+            }
+
             layer->noteFreqScale = layer->freqScale * seqChannel->freqScale;
             layer->noteVelocity = layer->velocitySquare * channelVolume;
             layer->notePan = (layer->pan * panLayerWeight) + panFromChannel;
@@ -143,19 +150,11 @@ f32 get_portamento_freq_scale(struct Portamento *p) {
     p->cur += p->speed;
     u32 v0 = (u32) p->cur;
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
     if (v0 > 127) {
-#else
-    if (v0 >= 127) {
-#endif
         v0 = 127;
     }
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
     return (1.0f + (p->extent * (gPitchBendFrequencyScale[v0 + 128] - 1.0f)));
-#else
-    return (1.0f + (p->extent * (gPitchBendFrequencyScale[v0 + 127] - 1.0f)));
-#endif
 }
 
 s32 get_vibrato_pitch_change(struct VibratoState *vib) {
@@ -242,11 +241,7 @@ f32 get_vibrato_freq_scale(struct VibratoState *vib) {
     s32 pitchChange = get_vibrato_pitch_change(vib);
     f32 extent = (f32) vib->extent / 4096.0f;
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
     return 1.0f + extent * (gPitchBendFrequencyScale[pitchChange + 128] - 1.0f);
-#else
-    return 1.0f + extent * (gPitchBendFrequencyScale[pitchChange + 127] - 1.0f);
-#endif
 }
 
 void note_vibrato_update(struct Note *note) {
diff --git a/src/audio/external.c b/src/audio/external.c
index bed409f6e..ddf28c31d 100644
--- a/src/audio/external.c
+++ b/src/audio/external.c
@@ -1236,6 +1236,7 @@ static void update_game_sound(void) {
                     // Begin playing the sound
                     gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->soundScriptIO[4] = soundId;
                     gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->soundScriptIO[0] = 1;
+                    gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->stopScript = FALSE;
 
                     switch (bank) {
                         case SOUND_BANK_MOVING:
diff --git a/src/audio/heap.c b/src/audio/heap.c
index c19f61f41..c9ec78c5c 100644
--- a/src/audio/heap.c
+++ b/src/audio/heap.c
@@ -1431,7 +1431,7 @@ void audio_reset_session(void) {
     init_reverb_us(reverbPresetId);
 #endif
 
-    init_sample_dma_buffers(gMaxSimultaneousNotes);
+    init_sample_dma_buffers();
 
 #if defined(VERSION_EU)
     build_vol_rampings_table(0, gAudioBufferParameters.samplesPerUpdate);
diff --git a/src/audio/load.c b/src/audio/load.c
index 295490e9d..c663d93c1 100644
--- a/src/audio/load.c
+++ b/src/audio/load.c
@@ -12,11 +12,9 @@
 struct SharedDma {
     /*0x0*/ u8 *buffer;       // target, points to pre-allocated buffer
     /*0x4*/ uintptr_t source; // device address
-    /*0x8*/ u16 sizeUnused;   // set to bufSize, never read
-    /*0xA*/ u16 bufSize;      // size of buffer
-    /*0xC*/ u8 unused2;       // set to 0, never read
-    /*0xD*/ u8 reuseIndex;    // position in sSampleDmaReuseQueue1/2, if ttl == 0
-    /*0xE*/ u8 ttl;           // duration after which the DMA can be discarded
+    /*0x8*/ u32 bufSize;      // size of buffer (converted from u16 for intentional padding to size 0x10)
+    /*0xC*/ u8 reuseIndex;    // position in sSampleDmaReuseQueue1/2, if ttl == 0
+    /*   */ // u8 pad[3];
 };                            // size = 0x10
 
 // EU only
@@ -43,6 +41,7 @@ OSMesg gAudioDmaMesg;
 OSIoMesg gAudioDmaIoMesg;
 
 struct SharedDma sSampleDmas[MAX_SIMULTANEOUS_NOTES * 4];
+u8 sSampleTTLs[MAX_SIMULTANEOUS_NOTES * 4];
 u32 gSampleDmaNumListItems; // sh: 0x803503D4
 u32 sSampleDmaListSize1; // sh: 0x803503D8
 
@@ -71,7 +70,6 @@ struct AudioBufferParametersEU gAudioBufferParameters;
 s32 gAiFrequency;
 #endif
 
-u32 sDmaBufSize;
 s32 gMaxAudioCmds;
 s32 gMaxSimultaneousNotes;
 
@@ -158,30 +156,20 @@ void decrease_sample_dma_ttls() {
     u32 i;
 
     for (i = 0; i < sSampleDmaListSize1; i++) {
-#if defined(VERSION_EU)
-        struct SharedDma *temp = &sSampleDmas[i];
-#else
-        struct SharedDma *temp = sSampleDmas + i;
-#endif
-        if (temp->ttl != 0) {
-            temp->ttl--;
-            if (temp->ttl == 0) {
-                temp->reuseIndex = sSampleDmaReuseQueueHead1;
+        if (sSampleTTLs[i] != 0) {
+            sSampleTTLs[i]--;
+            if (sSampleTTLs[i] == 0) {
+                sSampleDmas[i].reuseIndex = sSampleDmaReuseQueueHead1;
                 sSampleDmaReuseQueue1[sSampleDmaReuseQueueHead1++] = (u8) i;
             }
         }
     }
 
     for (i = sSampleDmaListSize1; i < gSampleDmaNumListItems; i++) {
-#if defined(VERSION_EU)
-        struct SharedDma *temp = &sSampleDmas[i];
-#else
-        struct SharedDma *temp = sSampleDmas + i;
-#endif
-        if (temp->ttl != 0) {
-            temp->ttl--;
-            if (temp->ttl == 0) {
-                temp->reuseIndex = sSampleDmaReuseQueueHead2;
+        if (sSampleTTLs[i] != 0) {
+            sSampleTTLs[i]--;
+            if (sSampleTTLs[i] == 0) {
+                sSampleDmas[i].reuseIndex = sSampleDmaReuseQueueHead2;
                 sSampleDmaReuseQueue2[sSampleDmaReuseQueueHead2++] = (u8) i;
             }
         }
@@ -203,7 +191,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
             bufferPos = devAddr - dma->source;
             if (0 <= bufferPos && (size_t) bufferPos <= dma->bufSize - size) {
                 // We already have a DMA request for this memory range.
-                if (dma->ttl == 0 && sSampleDmaReuseQueueTail2 != sSampleDmaReuseQueueHead2) {
+                if (sSampleTTLs[i] == 0 && sSampleDmaReuseQueueTail2 != sSampleDmaReuseQueueHead2) {
                     // Move the DMA out of the reuse queue, by swapping it with the
                     // tail, and then incrementing the tail.
                     if (dma->reuseIndex != sSampleDmaReuseQueueTail2) {
@@ -214,7 +202,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
                     }
                     sSampleDmaReuseQueueTail2++;
                 }
-                dma->ttl = 60;
+                sSampleTTLs[i] = 60;
                 *dmaIndexRef = (u8) i;
                 return (devAddr - dma->source) + dma->buffer;
             }
@@ -226,6 +214,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
             dmaIndex = sSampleDmaReuseQueue2[sSampleDmaReuseQueueTail2];
             sSampleDmaReuseQueueTail2++;
             dma = sSampleDmas + dmaIndex;
+            sSampleTTLs[dmaIndex] = 2;
             hasDma = TRUE;
         }
     } else {
@@ -233,7 +222,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
         bufferPos = devAddr - dma->source;
         if (0 <= bufferPos && (size_t) bufferPos <= dma->bufSize - size) {
             // We already have DMA for this memory range.
-            if (dma->ttl == 0) {
+            if (sSampleTTLs[*dmaIndexRef] == 0) {
                 // Move the DMA out of the reuse queue, by swapping it with the
                 // tail, and then incrementing the tail.
                 if (dma->reuseIndex != sSampleDmaReuseQueueTail1) {
@@ -244,7 +233,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
                 }
                 sSampleDmaReuseQueueTail1++;
             }
-            dma->ttl = 2;
+            sSampleTTLs[*dmaIndexRef] = 2;
             return dma->buffer + (devAddr - dma->source);
         }
     }
@@ -254,14 +243,13 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
         // be empty, since TTL 2 is so small.
         dmaIndex = sSampleDmaReuseQueue1[sSampleDmaReuseQueueTail1++];
         dma = sSampleDmas + dmaIndex;
+        sSampleTTLs[dmaIndex] = 2;
         hasDma = TRUE;
     }
 
     transfer = dma->bufSize;
     dmaDevAddr = devAddr & ~0xF;
-    dma->ttl = 2;
     dma->source = dmaDevAddr;
-    dma->sizeUnused = transfer;
 #ifdef VERSION_US // TODO: Is there a reason this only exists in US?
     osInvalDCache(dma->buffer, transfer);
 #endif
@@ -272,14 +260,10 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
 }
 
 
-void init_sample_dma_buffers(UNUSED s32 arg0) {
+void init_sample_dma_buffers() {
     s32 i;
-#if defined(VERSION_EU)
-#define j i
-#else
-    s32 j;
-#endif
-
+    s32 sDmaBufSize;
+    
     sDmaBufSize = DMA_BUF_SIZE_0;
 
 #if defined(VERSION_EU)
@@ -289,30 +273,21 @@ void init_sample_dma_buffers(UNUSED s32 arg0) {
 #endif
         sSampleDmas[gSampleDmaNumListItems].buffer = soundAlloc(&gNotesAndBuffersPool, sDmaBufSize);
         if (sSampleDmas[gSampleDmaNumListItems].buffer == NULL) {
-#if defined(VERSION_EU)
             break;
-#else
-            goto out1;
-#endif
         }
         sSampleDmas[gSampleDmaNumListItems].bufSize = sDmaBufSize;
         sSampleDmas[gSampleDmaNumListItems].source = 0;
-        sSampleDmas[gSampleDmaNumListItems].sizeUnused = 0;
-        sSampleDmas[gSampleDmaNumListItems].unused2 = 0;
-        sSampleDmas[gSampleDmaNumListItems].ttl = 0;
+        sSampleTTLs[gSampleDmaNumListItems] = 0;
         gSampleDmaNumListItems++;
     }
-#if defined(VERSION_JP) || defined(VERSION_US)
-out1:
-#endif
 
     for (i = 0; (u32) i < gSampleDmaNumListItems; i++) {
         sSampleDmaReuseQueue1[i] = (u8) i;
         sSampleDmas[i].reuseIndex = (u8) i;
     }
 
-    for (j = gSampleDmaNumListItems; j < 0x100; j++) {
-        sSampleDmaReuseQueue1[j] = 0;
+    for (i = gSampleDmaNumListItems; i < ARRAY_COUNT(sSampleDmaReuseQueue1); i++) {
+        sSampleDmaReuseQueue1[i] = 0;
     }
 
     sSampleDmaReuseQueueTail1 = 0;
@@ -324,22 +299,13 @@ out1:
     for (i = 0; i < gMaxSimultaneousNotes; i++) {
         sSampleDmas[gSampleDmaNumListItems].buffer = soundAlloc(&gNotesAndBuffersPool, sDmaBufSize);
         if (sSampleDmas[gSampleDmaNumListItems].buffer == NULL) {
-#if defined(VERSION_EU)
             break;
-#else
-            goto out2;
-#endif
         }
         sSampleDmas[gSampleDmaNumListItems].bufSize = sDmaBufSize;
         sSampleDmas[gSampleDmaNumListItems].source = 0;
-        sSampleDmas[gSampleDmaNumListItems].sizeUnused = 0;
-        sSampleDmas[gSampleDmaNumListItems].unused2 = 0;
-        sSampleDmas[gSampleDmaNumListItems].ttl = 0;
+        sSampleTTLs[gSampleDmaNumListItems] = 0;
         gSampleDmaNumListItems++;
     }
-#if defined(VERSION_JP) || defined(VERSION_US)
-out2:
-#endif
 
     for (i = sSampleDmaListSize1; (u32) i < gSampleDmaNumListItems; i++) {
         sSampleDmaReuseQueue2[i - sSampleDmaListSize1] = (u8) i;
@@ -348,15 +314,12 @@ out2:
 
     // This probably meant to touch the range size1..size2 as well... but it
     // doesn't matter, since these values are never read anyway.
-    for (j = gSampleDmaNumListItems; j < 0x100; j++) {
-        sSampleDmaReuseQueue2[j] = sSampleDmaListSize1;
+    for (i = gSampleDmaNumListItems; i < ARRAY_COUNT(sSampleDmaReuseQueue2); i++) {
+        sSampleDmaReuseQueue2[i] = sSampleDmaListSize1;
     }
 
     sSampleDmaReuseQueueTail2 = 0;
     sSampleDmaReuseQueueHead2 = gSampleDmaNumListItems - sSampleDmaListSize1;
-#if defined(VERSION_EU)
-#undef j
-#endif
 }
 
 #if defined(VERSION_JP) || defined(VERSION_US)
diff --git a/src/audio/load.h b/src/audio/load.h
index 12b6ed6c6..1c69c688e 100644
--- a/src/audio/load.h
+++ b/src/audio/load.h
@@ -87,7 +87,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef, s3
 #else
 void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef);
 #endif
-void init_sample_dma_buffers(s32 arg0);
+void init_sample_dma_buffers();
 #if defined(VERSION_SH)
 void patch_audio_bank(s32 bankId, struct AudioBank *mem, struct PatchStruct *patchInfo);
 #else
diff --git a/src/audio/load_sh.c b/src/audio/load_sh.c
index bc24891b3..ac6097a54 100644
--- a/src/audio/load_sh.c
+++ b/src/audio/load_sh.c
@@ -237,7 +237,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef, s3
     return (devAddr - dmaDevAddr) + dma->buffer;
 }
 
-void init_sample_dma_buffers(UNUSED s32 arg0) {
+void init_sample_dma_buffers() {
     s32 i;
 
     sDmaBufSize = 0x2D0;
diff --git a/src/audio/seqplayer.c b/src/audio/seqplayer.c
index 65a948be3..373ad9689 100644
--- a/src/audio/seqplayer.c
+++ b/src/audio/seqplayer.c
@@ -6,6 +6,7 @@
 #include "heap.h"
 #include "load.h"
 #include "seqplayer.h"
+#include "game/debug.h"
 #include "game/main.h"
 
 #ifdef VERSION_SH
@@ -1693,11 +1694,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 
                     case 0xd3: // chan_pitchbend; pitch bend by <= 1 octave in either direction (-127..127)
                         // (m64_read_u8(state) is really s8 here)
-#ifdef VERSION_SH
                         cmd = m64_read_u8(state) + 128;
-#else
-                        cmd = m64_read_u8(state) + 127;
-#endif
                         seqChannel->freqScale = gPitchBendFrequencyScale[cmd] * gConfig.audioFrequency;
 #if defined(VERSION_EU) || defined(VERSION_SH)
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
@@ -2075,15 +2072,14 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
                         }
                         break;
 #else
-                    case 0x00: // chan_testlayerfinished
-                        if (seqChannel->layers[loBits] != NULL) {
-                            value = seqChannel->layers[loBits]->finished;
+                    case 0x00: // chan_testlayersfinished (NOTE: does not use loBits)
+                        value = TRUE;
+                        for (i = 0; i < LAYERS_MAX; i++) {
+                            if (seqChannel->layers[i] != NULL && !seqChannel->layers[i]->finished) {
+                                value = FALSE;
+                                break;
+                            }
                         }
-#ifdef VERSION_EU
-                        else {
-                            value = -1;
-                        }
-#endif
                         break;
 #endif
 
@@ -2123,8 +2119,10 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
                         }
                         break;
 
-                    case 0xa0: // chan_freelayer
-                        seq_channel_layer_free(seqChannel, loBits);
+                    case 0xa0: // chan_freelayers (NOTE: does not use loBits)
+                        for (i = 0; i < LAYERS_MAX; i++) {
+                            seq_channel_layer_free(seqChannel, i);
+                        }
                         break;
 
                     case 0xb0: // chan_dynsetlayer
diff --git a/src/audio/synthesis.c b/src/audio/synthesis.c
index ade7b4319..feec87855 100644
--- a/src/audio/synthesis.c
+++ b/src/audio/synthesis.c
@@ -712,7 +712,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd) {
                             aClearBuffer(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5,
                                          (samplesLenAdjusted - nAdpcmSamplesProcessed) * 2);
                             note->samplePosInt = 0;
-                            note->finished = 1;
+                            note->finished = TRUE;
                             ((struct vNote *)note)->enabled = 0;
                             break;
                         }
@@ -737,7 +737,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd) {
                                     aResample(cmd++, A_INIT, 0xff60, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->dummyResampleState));
                                     resampledTempLen = samplesLenAdjusted + 4;
                                     noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_RESAMPLED + 4;
-                                    if (note->finished != FALSE) {
+                                    if (note->finished) {
                                         aClearBuffer(cmd++, DMEM_ADDR_RESAMPLED + resampledTempLen, samplesLenAdjusted + 16);
                                     }
                                     break;
@@ -756,7 +756,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd) {
                             }
                     }
 
-                    if (note->finished != FALSE) {
+                    if (note->finished) {
                         break;
                     }
                 }

From d07e6383f62e2cbc50bd72a59b1a0cf4f72663dc Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Sat, 10 Feb 2024 15:07:31 +1300
Subject: [PATCH 15/23] Fixed bugs relating to objects dying in lava (#766)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Changed the generic object death function to now also check that an object has landed and is over lava before killing it. Previously it would die instantly if it was above lava which resulted in objects randomly dying above lava

• Also changed the same function to kill a non-buoyant and killable object (e.g. goombas) right when it touches water. Previously it would slowly sink to the bottom and then instantly die upon touching the floor underwater which is strange.

• Changed where lava checks are done so they are done in the same place as other object flag checks. This avoids bugs where e.g. objects die when touching the edge of a platform that is over lava, because the ground flags are updated, then the object's floor is updated to be over the lava, and then the lava move flags are updated causing the object to think it is over lava and touching ground at the same time (making it die)

• Fixed the coin flicker define for regular non-bully coins because apparently it just never worked and no-one noticed? Only the coins that come from bullies worked with this define and I guess people only ever tested coins in lava with bullies. I rewrote the coin behavior to be a little closer to the bully one. I think in the future the two types of moving coins could easily be merged into one object, they don't act visibly different at all.

Tested the PR with a lot of goombas being dropped into lava/water and also this stuff is in Mario Builder so I've been testing it there too. Feel free to test more.
---
 src/game/behaviors/coin.inc.c        | 50 ++++++++++++++++------------
 src/game/behaviors/moving_coin.inc.c |  3 --
 src/game/obj_behaviors.h             |  1 +
 src/game/obj_behaviors_2.c           | 30 ++++++++++-------
 src/game/object_helpers.c            | 17 +++++-----
 5 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/src/game/behaviors/coin.inc.c b/src/game/behaviors/coin.inc.c
index 802ceda27..0162df336 100644
--- a/src/game/behaviors/coin.inc.c
+++ b/src/game/behaviors/coin.inc.c
@@ -102,20 +102,33 @@ void bhv_coin_loop(void) {
     struct Surface *floor = o->oFloor;
 
     if (floor != NULL) {
-        if (o->oMoveFlags & OBJ_MOVE_ON_GROUND) {
-            o->oAction = BOUNCING_COIN_ACT_BOUNCING;
-        }
-        if (o->oAction == BOUNCING_COIN_ACT_BOUNCING) {
-            o->oBounciness = 0;
-            if (floor->normal.y < 0.9f) {
-                s16 targetYaw = SURFACE_YAW(floor);
-                cur_obj_rotate_yaw_toward(targetYaw, 0x400);
-            }
-        }
-    }
+        switch (o->oAction) {
+            case BOUNCING_COIN_ACT_FALLING:
+                if (o->oTimer == 0) {
+                    cur_obj_play_sound_2(SOUND_GENERAL_COIN_SPURT);
+                }
+                if (o->oMoveFlags & OBJ_MOVE_LANDED) {
+                    o->oAction = BOUNCING_COIN_ACT_BOUNCING;
+                }
+                
+                break;
 
-    if (o->oTimer == 0) {
-        cur_obj_play_sound_2(SOUND_GENERAL_COIN_SPURT);
+            case BOUNCING_COIN_ACT_BOUNCING:
+                o->oBounciness = 0;
+                if (floor->normal.y < 0.9f) {
+                    s16 targetYaw = SURFACE_YAW(floor);
+                    cur_obj_rotate_yaw_toward(targetYaw, 0x400);
+                }
+                break;
+
+            case OBJ_ACT_LAVA_DEATH:
+#ifdef COIN_LAVA_FLICKER
+                obj_flicker_and_disappear(o, 0);
+#else
+                obj_mark_for_deletion(o);
+#endif
+                break;
+        }
     }
 
     if (o->oVelY < 0) {
@@ -123,16 +136,11 @@ void bhv_coin_loop(void) {
     }
 
     if (o->oMoveFlags & OBJ_MOVE_LANDED) {
-#ifdef COIN_LAVA_FLICKER
-        if ((o->oMoveFlags & OBJ_MOVE_ABOVE_DEATH_BARRIER)
-        || ((o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) && cur_obj_wait_then_blink(0, 20))) {
+        if (o->oMoveFlags & OBJ_MOVE_ABOVE_DEATH_BARRIER) {
             obj_mark_for_deletion(o);
+        } else if (o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) {
+            o->oAction = OBJ_ACT_LAVA_DEATH;
         }
-#else
-        if (o->oMoveFlags & (OBJ_MOVE_ABOVE_DEATH_BARRIER | OBJ_MOVE_ABOVE_LAVA)) {
-            obj_mark_for_deletion(o);
-        }
-#endif
     }
 
     if (o->oMoveFlags & OBJ_MOVE_BOUNCE) {
diff --git a/src/game/behaviors/moving_coin.inc.c b/src/game/behaviors/moving_coin.inc.c
index 61d1f9d2d..dabf74915 100644
--- a/src/game/behaviors/moving_coin.inc.c
+++ b/src/game/behaviors/moving_coin.inc.c
@@ -93,9 +93,6 @@ void bhv_moving_yellow_coin_loop(void) {
             break;
     }
 
-#ifdef COIN_LAVA_FLICKER
-    if (o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) moving_coin_flicker();
-#endif
     if (o->oInteractStatus & INT_STATUS_INTERACTED) {
         coin_collected();
         o->oInteractStatus = INT_STATUS_NONE;
diff --git a/src/game/obj_behaviors.h b/src/game/obj_behaviors.h
index d71530894..5e367daa2 100644
--- a/src/game/obj_behaviors.h
+++ b/src/game/obj_behaviors.h
@@ -18,6 +18,7 @@ enum ObjCollisionFlags {
 
 //! Lots of these are duplicates
 void set_yoshi_as_not_dead(void);
+s32 obj_flicker_and_disappear(struct Object *obj, s16 lifeSpan);
 s32 coin_step(s16 *collisionFlagsPtr);
 void moving_coin_flicker(void);
 void coin_collected(void);
diff --git a/src/game/obj_behaviors_2.c b/src/game/obj_behaviors_2.c
index 90b18e76b..eacaceaf9 100644
--- a/src/game/obj_behaviors_2.c
+++ b/src/game/obj_behaviors_2.c
@@ -521,24 +521,30 @@ static void obj_set_squished_action(void) {
 }
 
 static s32 obj_die_if_above_lava_and_health_non_positive(void) {
-    if (o->oMoveFlags & OBJ_MOVE_UNDERWATER_ON_GROUND) {
+    if (o->oMoveFlags & OBJ_MOVE_MASK_IN_WATER) {
         if (o->oGravity + o->oBuoyancy > 0.0f
-            || find_water_level(o->oPosX, o->oPosZ) - o->oPosY < 150.0f) {
+            || find_water_level(o->oPosX, o->oPosZ) - o->oPosY < 10.0f) {
             return FALSE;
         }
-    } else if (!(o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA)) {
-        if (o->oMoveFlags & OBJ_MOVE_ENTERED_WATER) {
-            if (o->oWallHitboxRadius < 200.0f) {
-                cur_obj_play_sound_2(SOUND_OBJ_DIVING_INTO_WATER);
-            } else {
-                cur_obj_play_sound_2(SOUND_OBJ_DIVING_IN_WATER);
-            }
+        obj_die_if_health_non_positive();
+        return TRUE;
+
+    } else if (o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) {
+        if (o->oMoveFlags & (OBJ_MOVE_ON_GROUND | OBJ_MOVE_LANDED)) {
+            obj_die_if_health_non_positive();
+            return TRUE;
         }
-        return FALSE;
     }
 
-    obj_die_if_health_non_positive();
-    return TRUE;
+    if (o->oMoveFlags & OBJ_MOVE_ENTERED_WATER) {
+        if (o->oWallHitboxRadius < 200.0f) {
+            cur_obj_play_sound_2(SOUND_OBJ_DIVING_INTO_WATER);
+        } else {
+            cur_obj_play_sound_2(SOUND_OBJ_DIVING_IN_WATER);
+        }
+    }
+
+    return FALSE;
 }
 
 static s32 obj_handle_attacks(struct ObjectHitbox *hitbox, s32 attackedMarioAction,
diff --git a/src/game/object_helpers.c b/src/game/object_helpers.c
index 33aabfa4c..3a617cda7 100644
--- a/src/game/object_helpers.c
+++ b/src/game/object_helpers.c
@@ -1040,6 +1040,14 @@ static void cur_obj_move_update_ground_air_flags(UNUSED f32 gravity, f32 bouncin
         }
     }
 
+    o->oMoveFlags &= ~(OBJ_MOVE_ABOVE_LAVA | OBJ_MOVE_ABOVE_DEATH_BARRIER);
+    if (o->oFloorType == SURFACE_BURNING) {
+        o->oMoveFlags |= OBJ_MOVE_ABOVE_LAVA;
+    } else if ((o->oFloorType == SURFACE_DEATH_PLANE) || (o->oFloorType == SURFACE_VERTICAL_WIND)) {
+        //! This maybe misses SURFACE_WARP
+        o->oMoveFlags |= OBJ_MOVE_ABOVE_DEATH_BARRIER;
+    }
+
     o->oMoveFlags &= ~OBJ_MOVE_MASK_IN_WATER;
 }
 
@@ -1368,13 +1376,6 @@ static void cur_obj_update_floor(void) {
 
     if (floor != NULL) {
         SurfaceType floorType = floor->type;
-        if (floorType == SURFACE_BURNING) {
-            o->oMoveFlags |= OBJ_MOVE_ABOVE_LAVA;
-        } else if ((floorType == SURFACE_DEATH_PLANE) || (floorType == SURFACE_VERTICAL_WIND)) {
-            //! This maybe misses SURFACE_WARP
-            o->oMoveFlags |= OBJ_MOVE_ABOVE_DEATH_BARRIER;
-        }
-
         o->oFloorType = floorType;
         o->oFloorRoom = floor->room;
     } else {
@@ -1384,8 +1385,6 @@ static void cur_obj_update_floor(void) {
 }
 
 static void cur_obj_update_floor_and_resolve_wall_collisions(s16 steepSlopeDegrees) {
-    o->oMoveFlags &= ~(OBJ_MOVE_ABOVE_LAVA | OBJ_MOVE_ABOVE_DEATH_BARRIER);
-
     if (o->activeFlags & (ACTIVE_FLAG_FAR_AWAY | ACTIVE_FLAG_IN_DIFFERENT_ROOM)) {
         cur_obj_update_floor();
         o->oMoveFlags &= ~(OBJ_MOVE_HIT_WALL | OBJ_MOVE_MASK_IN_WATER);

From fc5ffa74386a37a2e818a7698e71a4dc63426038 Mon Sep 17 00:00:00 2001
From: Matt Pharoah <mtpharoah@gmail.com>
Date: Fri, 16 Feb 2024 12:45:29 -0500
Subject: [PATCH 16/23] Update libpl (#767)

* Update libpl

* Update libpl again

* would you believe its another libpl update
---
 lib/libpl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/libpl b/lib/libpl
index d6b3a90d0..cfc74e589 160000
--- a/lib/libpl
+++ b/lib/libpl
@@ -1 +1 @@
-Subproject commit d6b3a90d096183df54d2c69a4d64e0ced8dda811
+Subproject commit cfc74e5898945699bb1f5aeee8dd5507164e2384

From 524e35c97452b3a5b0eb76fcca02a3d793716bae Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Thu, 22 Feb 2024 00:38:19 -0500
Subject: [PATCH 17/23] Add `-falign-functions=32` to makefile flags (#745)

Seemingly provides minor performance benefit and should more importantly reduce perf lotto substantially (hopefully)
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5d7791592..16db20d8d 100644
--- a/Makefile
+++ b/Makefile
@@ -135,7 +135,7 @@ endif
 #==============================================================================#
 
 # Default non-gcc opt flags
-DEFAULT_OPT_FLAGS = -Ofast
+DEFAULT_OPT_FLAGS = -Ofast -falign-functions=32
 # Note: -fno-associative-math is used here to suppress warnings, ideally we would enable this as an optimization but
 # this conflicts with -ftrapping-math apparently.
 # TODO: Figure out how to allow -fassociative-math to be enabled

From 4c8233fe6a93ceefab16a5aefddedddf1ba9bd96 Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Tue, 27 Feb 2024 10:29:47 -0500
Subject: [PATCH 18/23] Fix automatic libpl cloning (#771)

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 16db20d8d..b63fe099a 100644
--- a/Makefile
+++ b/Makefile
@@ -350,7 +350,7 @@ ifeq ($(filter clean distclean print-%,$(MAKECMDGOALS)),)
 
   # Clone any needed submodules
   ifeq ($(LIBPL),1)
-    ifeq ($(wildcard $(LIBPL_DIR)),)
+    ifeq ($(wildcard $(LIBPL_DIR)/*.h),)
       $(info Cloning libpl submodule...)
       DUMMY != git submodule update --init $(LIBPL_DIR) > /dev/null || echo FAIL
       ifeq ($(DUMMY),FAIL)

From c96118bcd8641231f0cb7ccc9fe826aa3c87a75e Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Fri, 1 Mar 2024 13:03:08 +1300
Subject: [PATCH 19/23] Fix objects using throwmatrix having incorrect rotation
 when paused (#769)

* throwmatrix fix

* ok fixed it and removed some cringe matching

* happy bitch
---
 src/game/obj_behaviors.c        | 28 ++++++++++------------------
 src/game/rendering_graph_node.c |  4 +++-
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/src/game/obj_behaviors.c b/src/game/obj_behaviors.c
index 0e83b4b38..6fbb7fdda 100644
--- a/src/game/obj_behaviors.c
+++ b/src/game/obj_behaviors.c
@@ -187,8 +187,6 @@ s8 turn_obj_away_from_steep_floor(struct Surface *objFloor, f32 floorY, f32 objV
 void obj_orient_graph(struct Object *obj, f32 normalX, f32 normalY, f32 normalZ) {
     Vec3f objVisualPosition, surfaceNormals;
 
-    Mat4 *throwMatrix;
-
     // Passes on orienting certain objects that shouldn't be oriented, like boulders.
     if (!sOrientObjWithFloor) {
         return;
@@ -199,17 +197,11 @@ void obj_orient_graph(struct Object *obj, f32 normalX, f32 normalY, f32 normalZ)
         return;
     }
 
-    throwMatrix = alloc_display_list(sizeof(*throwMatrix));
-    // If out of memory, fail to try orienting the object.
-    if (throwMatrix == NULL) {
-        return;
-    }
-
     vec3f_copy_y_off(objVisualPosition, &obj->oPosVec, obj->oGraphYOffset);
     vec3f_set(surfaceNormals, normalX, normalY, normalZ);
 
-    mtxf_align_terrain_normal(*throwMatrix, surfaceNormals, objVisualPosition, obj->oFaceAngleYaw);
-    obj->header.gfx.throwMatrix = throwMatrix;
+    mtxf_align_terrain_normal(obj->transform, surfaceNormals, objVisualPosition, obj->oFaceAngleYaw);
+    obj->header.gfx.throwMatrix = &obj->transform;
 }
 
 /**
@@ -255,10 +247,7 @@ void calc_new_obj_vel_and_pos_y(struct Surface *objFloor, f32 objFloorY, f32 obj
         }
     }
 
-    //! (Obj Position Crash) If you got an object with height past 2^31, the game would crash.
-    if ((s32) o->oPosY >= (s32) objFloorY && (s32) o->oPosY < (s32) objFloorY + 37) {
-        obj_orient_graph(o, floor_nX, floor_nY, floor_nZ);
-
+    if ((o->oPosY >= objFloorY) && (o->oPosY < objFloorY + 37)) {
         // Adds horizontal component of gravity for horizontal speed.
         f32 nxz = sqr(floor_nX) + sqr(floor_nZ);
         f32 vel = ((nxz) / (nxz + sqr(floor_nY))) * o->oGravity * 2;
@@ -312,9 +301,7 @@ void calc_new_obj_vel_and_pos_y_underwater(struct Surface *objFloor, f32 floorY,
         o->oVelY = -o->oVelY;
     }
 
-    if ((s32) o->oPosY >= (s32) floorY && (s32) o->oPosY < (s32) floorY + 37) {
-        obj_orient_graph(o, floor_nX, floor_nY, floor_nZ);
-
+    if ((o->oPosY >= floorY) && (o->oPosY < floorY + 37)) {
         // Adds horizontal component of gravity for horizontal speed.
         f32 nxz = sqr(floor_nX) + sqr(floor_nZ);
         f32 velm = (nxz / (nxz + sqr(floor_nY))) * netYAccel * 2;
@@ -411,6 +398,11 @@ s16 object_step(void) {
     }
 
     obj_update_pos_vel_xz();
+
+    if (sObjFloor && (o->oPosY >= floorY) && (o->oPosY < floorY + 37)) {
+        obj_orient_graph(o, sObjFloor->normal.x, sObjFloor->normal.y, sObjFloor->normal.z);
+    }
+
     if ((s32) o->oPosY == (s32) floorY) {
         collisionFlags += OBJ_COL_FLAG_GROUNDED;
     }
@@ -420,7 +412,7 @@ s16 object_step(void) {
     }
 
     // Generate a splash if in water.
-    obj_splash((s32) waterY, (s32) o->oPosY);
+    obj_splash(waterY, o->oPosY);
     return collisionFlags;
 }
 
diff --git a/src/game/rendering_graph_node.c b/src/game/rendering_graph_node.c
index 44a902ddc..cb0d62cbb 100644
--- a/src/game/rendering_graph_node.c
+++ b/src/game/rendering_graph_node.c
@@ -1039,6 +1039,8 @@ void geo_process_object(struct Object *node) {
     if (node->header.gfx.areaIndex == gCurGraphNodeRoot->areaIndex) {
         s32 isInvisible = (node->header.gfx.node.flags & GRAPH_RENDER_INVISIBLE);
         s32 noThrowMatrix = (node->header.gfx.throwMatrix == NULL);
+        // Maintain throw matrix pointer if the game is paused as it won't be updated.
+        Mat4 *oldThrowMatrix = (sCurrPlayMode == PLAY_MODE_PAUSED) ? node->header.gfx.throwMatrix : NULL;
 
         // If the throw matrix is null and the object is invisible, there is no need
         // to update billboarding, scale, rotation, etc. 
@@ -1087,7 +1089,7 @@ void geo_process_object(struct Object *node) {
 
         gMatStackIndex--;
         gCurrAnimType = ANIM_TYPE_NONE;
-        node->header.gfx.throwMatrix = NULL;
+        node->header.gfx.throwMatrix = oldThrowMatrix;
     }
 }
 

From 78747e98363e309dbfc0b83f0f829d91e8dd499e Mon Sep 17 00:00:00 2001
From: arthurtilly <32559225+arthurtilly@users.noreply.github.com>
Date: Fri, 1 Mar 2024 13:03:43 +1300
Subject: [PATCH 20/23] Toggle the default status of a few defines to avoid
 confusing new people (#740)

* define toggles

* disable blue preview
---
 include/config/config_graphics.h |  4 ++--
 include/config/config_menu.h     |  2 +-
 include/config/config_movement.h | 12 ++++++------
 include/config/config_objects.h  |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/config/config_graphics.h b/include/config/config_graphics.h
index 179c833ae..ada791221 100644
--- a/include/config/config_graphics.h
+++ b/include/config/config_graphics.h
@@ -60,12 +60,12 @@
 /**
  * Makes the coins ia8 64x64 instead of ia16 32x32. Uses new ia8 textures so that vanilla coins look better.
  */
-#define IA8_COINS
+// #define IA8_COINS
 
 /**
  * Similar to the above, but 30 FPS (Textures by InTheBeef, cleaned up by Arceveti).
  */
-#define IA8_30FPS_COINS
+// #define IA8_30FPS_COINS
 
 /**
  * Use .rej microcode for certain objects (experimental - only should be used when F3DEX_GBI_2 is defined).
diff --git a/include/config/config_menu.h b/include/config/config_menu.h
index f5d434a62..2ac93b44a 100644
--- a/include/config/config_menu.h
+++ b/include/config/config_menu.h
@@ -14,7 +14,7 @@
 /**
  * Decides whether you can exit course while moving (has no effect if you disable Exit Course).
  */
-#define EXIT_COURSE_WHILE_MOVING
+// #define EXIT_COURSE_WHILE_MOVING
 
 /**
  * Decides whether to treat exiting course as if the player had died. 
diff --git a/include/config/config_movement.h b/include/config/config_movement.h
index 7034d1d86..c574ac141 100644
--- a/include/config/config_movement.h
+++ b/include/config/config_movement.h
@@ -26,12 +26,12 @@
  * - Precise turning control.
  * - Prevents falling from the edges.
  */
-#define BETTER_HANGING
+// #define BETTER_HANGING
 
 /**
  * Change the movement speed when hanging from a ceiling (the vanilla value is 4.0f, has no effect if BETTER_HANGING is enabled).
  */
-#define HANGING_SPEED 12.0f
+#define HANGING_SPEED 4.0f
 
 /**
  * Prevents Mario from falling asleep while idle.
@@ -81,12 +81,12 @@
 /**
  * If A and Z are pressed on the same frame while running, Mario will long jump instead of ground pound.
  */
-#define EASIER_LONG_JUMPS
+// #define EASIER_LONG_JUMPS
 
 /**
  * Enables the ability to hold Z while twirling to descend faster.
  */
-#define Z_TWIRL
+// #define Z_TWIRL
 
 /**
  * Disables bonks when ground pounding next to a wall.
@@ -101,7 +101,7 @@
 /**
  * Allows Mario to grab hangable ceilings from any state.
  */
-#define HANGING_FIX
+// #define HANGING_FIX
 
 /**
  * The maximum angle the player can wall kick, in degrees. 0..90. To allow 45 degree wall kicks, you must supply `46` to allow 45 and under.
@@ -134,7 +134,7 @@
  * be interrupted with text
  * Uncomment this to fix this bug, and frustrate speedrunners
  */
-// #define BUGFIX_DIALOG_TIME_STOP
+#define BUGFIX_DIALOG_TIME_STOP
 
 /**
  * Enables Platform Displacement 2, an upgrade to the physics involving moving platforms and how Mario interacts with them.
diff --git a/include/config/config_objects.h b/include/config/config_objects.h
index 72c37f639..10502b6bb 100644
--- a/include/config/config_objects.h
+++ b/include/config/config_objects.h
@@ -17,7 +17,7 @@
 /**
  * Moving Coins flicker and disappear when they hit lava instead of being instantly deleted.
  */
-#define COIN_LAVA_FLICKER
+// #define COIN_LAVA_FLICKER
 
 /**
  * Allows for retries on collecting the remaining blue coins from a blue coin switch.
@@ -56,7 +56,7 @@
 /**
  * Causes leaf particles to occasionally fall from trees which contain Hoot.
  */
-#define HOOT_TREE_PARTICLES
+// #define HOOT_TREE_PARTICLES
 
 /**************
  * -- MR I --

From 8257f77c575eb3ae3938b50f838d5de3d79339ab Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Fri, 1 Mar 2024 17:00:46 -0500
Subject: [PATCH 21/23] A few various Puppyprint bugfixes (#770)

---
 bin/segment2.c        | 8 ++++----
 src/game/fasttext.c   | 5 +++--
 src/game/puppyprint.c | 8 ++++----
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/bin/segment2.c b/bin/segment2.c
index 062eed45d..260e007fe 100644
--- a/bin/segment2.c
+++ b/bin/segment2.c
@@ -30,7 +30,7 @@ const u8 small_font_kerning_default[] = {
     /*A*/ 7, /*B*/ 7, /*C*/ 7, /*D*/ 7, /*E*/ 6, /*F*/ 5, /*G*/ 8, /*H*/ 6, /*I*/ 6, /*J*/ 5, /*K*/ 7, /*L*/ 6, /*M*/ 7, /*N*/ 7, /*O*/ 7, /*P*/ 6, 
     /*Q*/ 8, /*R*/ 6, /*S*/ 7, /*T*/ 7, /*U*/ 7, /*V*/ 7, /*W*/ 8, /*X*/ 7, /*Y*/ 7, /*Z*/ 7, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 8, /*_*/ 0, /*`*/ 0, 
     /*a*/ 7, /*b*/ 7, /*c*/ 6, /*d*/ 7, /*e*/ 7, /*f*/ 7, /*g*/ 7, /*h*/ 7, /*i*/ 3, /*j*/ 5, /*k*/ 8, /*l*/ 4, /*m*/ 7, /*n*/ 7, /*o*/ 7, /*p*/ 7, 
-    /*q*/ 7, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 7, /*w*/ 8, /*x*/ 6, /*y*/ 8, /*z*/ 7, /*~*/ 8, 
+    /*q*/ 7, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 7, /*w*/ 8, /*x*/ 6, /*y*/ 8, /*z*/ 7, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 8, 
 };
 
 const u16 small_font_offsets_default[] = {
@@ -49,7 +49,7 @@ static const u8 small_font_kerning_outline[] = {
     /*A*/ 6, /*B*/ 6, /*C*/ 6, /*D*/ 6, /*E*/ 6, /*F*/ 6, /*G*/ 6, /*H*/ 6, /*I*/ 5, /*J*/ 7, /*K*/ 6, /*L*/ 6, /*M*/ 6, /*N*/ 6, /*O*/ 6, /*P*/ 6, 
     /*Q*/ 6, /*R*/ 6, /*S*/ 6, /*T*/ 6, /*U*/ 6, /*V*/ 6, /*W*/ 6, /*X*/ 6, /*Y*/ 6, /*Z*/ 6, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 7, /*_*/ 0, /*`*/ 0,  
     /*a*/ 5, /*b*/ 5, /*c*/ 5, /*d*/ 5, /*e*/ 5, /*f*/ 5, /*g*/ 5, /*h*/ 5, /*i*/ 2, /*j*/ 6, /*k*/ 4, /*l*/ 2, /*m*/ 5, /*n*/ 5, /*o*/ 5, /*p*/ 5, 
-    /*q*/ 5, /*r*/ 5, /*s*/ 5, /*t*/ 5, /*u*/ 5, /*v*/ 5, /*w*/ 5, /*x*/ 5, /*y*/ 5, /*z*/ 5, /*~*/ 6,   
+    /*q*/ 5, /*r*/ 5, /*s*/ 5, /*t*/ 5, /*u*/ 5, /*v*/ 5, /*w*/ 5, /*x*/ 5, /*y*/ 5, /*z*/ 5, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 6,   
 };
 
 const u16 small_font_offsets_outline[] = {
@@ -68,7 +68,7 @@ static const u8 small_font_kerning_plain[] = {
     /*A*/ 6, /*B*/ 6, /*C*/ 6, /*D*/ 6, /*E*/ 6, /*F*/ 6, /*G*/ 6, /*H*/ 6, /*I*/ 4, /*J*/ 6, /*K*/ 6, /*L*/ 6, /*M*/ 7, /*N*/ 7, /*O*/ 6, /*P*/ 6, 
     /*Q*/ 6, /*R*/ 6, /*S*/ 6, /*T*/ 6, /*U*/ 6, /*V*/ 6, /*W*/ 7, /*X*/ 6, /*Y*/ 6, /*Z*/ 6, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 7, /*_*/ 0, /*`*/ 0,
     /*a*/ 6, /*b*/ 6, /*c*/ 6, /*d*/ 6, /*e*/ 6, /*f*/ 6, /*g*/ 6, /*h*/ 6, /*i*/ 3, /*j*/ 4, /*k*/ 6, /*l*/ 5, /*m*/ 7, /*n*/ 6, /*o*/ 6, /*p*/ 6, 
-    /*q*/ 6, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 6, /*w*/ 7, /*x*/ 6, /*y*/ 6, /*z*/ 6, /*~*/ 7,   
+    /*q*/ 6, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 6, /*w*/ 7, /*x*/ 6, /*y*/ 6, /*z*/ 6, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 7,   
 };
 
 const u16 small_font_offsets_plain[] = {
@@ -87,7 +87,7 @@ static const u8 small_font_kerning_vanilla[] = {
     /*A*/ 5, /*B*/ 5, /*C*/ 5, /*D*/ 5, /*E*/ 5, /*F*/ 5, /*G*/ 5, /*H*/ 5, /*I*/ 3, /*J*/ 5, /*K*/ 5, /*L*/ 5, /*M*/ 7, /*N*/ 7, /*O*/ 6, /*P*/ 5, 
     /*Q*/ 6, /*R*/ 5, /*S*/ 5, /*T*/ 5, /*U*/ 5, /*V*/ 5, /*W*/ 7, /*X*/ 6, /*Y*/ 5, /*Z*/ 5, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 7, /*_*/ 0, /*`*/ 0, 
     /*a*/ 5, /*b*/ 4, /*c*/ 4, /*d*/ 4, /*e*/ 4, /*f*/ 5, /*g*/ 5, /*h*/ 4, /*i*/ 3, /*j*/ 4, /*k*/ 3, /*l*/ 2, /*m*/ 6, /*n*/ 4, /*o*/ 4, /*p*/ 4, 
-    /*q*/ 5, /*r*/ 4, /*s*/ 4, /*t*/ 4, /*u*/ 4, /*v*/ 4, /*w*/ 7, /*x*/ 5, /*y*/ 4, /*z*/ 5, /*~*/ 6,   
+    /*q*/ 5, /*r*/ 4, /*s*/ 4, /*t*/ 4, /*u*/ 4, /*v*/ 4, /*w*/ 7, /*x*/ 5, /*y*/ 4, /*z*/ 5, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 6,   
 };
 
 const u16 small_font_offsets_vanilla[] = {
diff --git a/src/game/fasttext.c b/src/game/fasttext.c
index 89b9387b6..20357cb72 100644
--- a/src/game/fasttext.c
+++ b/src/game/fasttext.c
@@ -59,7 +59,7 @@ void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int
 
     while (string[i] != '\0') {
         unsigned int cur_char = string[i];
-        s32 goddamnJMeasure = string[i] == 'j' ? -1 : 0;
+        s32 goddamnJMeasure;
 
         if (cur_char == '\n') {
             xPos = x;
@@ -75,7 +75,8 @@ void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int
         } else {
             if (cur_char != ' ') {
                 s = computeS(cur_char);
-                gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, (s << 5) - goddamnJMeasure, 0, 1 << 10, 1 << 10);
+                goddamnJMeasure = (s == 512) ? 1 : 0;
+                gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, (s << 5) + goddamnJMeasure, 0, 1 << 10, 1 << 10);
             }
             xPos += fast_text_font_kerning[cur_char - ' '];
         }
diff --git a/src/game/puppyprint.c b/src/game/puppyprint.c
index 5da36fdd4..6afdc709a 100644
--- a/src/game/puppyprint.c
+++ b/src/game/puppyprint.c
@@ -1643,7 +1643,7 @@ void print_small_text(s32 x, s32 y, const char *str, s32 align, s32 amount, u8 f
         }
 
         get_char_from_byte(&textX, &textPos[0], str[i], &widthX, &spaceX, &offsetY, font);
-        s32 goddamnJMeasure = textX == 256 ? -1 : 0; // Hack to fix a rendering bug.
+        s32 goddamnJMeasure = textX == 256 ? 1 : 0; // Hack to fix a rendering bug.
         if (str[i] != ' ' && str[i] != '\t') {
             if (xlu != prevxlu) {
                 prevxlu = xlu;
@@ -1736,7 +1736,7 @@ void print_small_text_light(s32 x, s32 y, const char *str, s32 align, s32 amount
         }
 
         get_char_from_byte(&textX, &textPos[0], str[i], &widthX, &spaceX, &offsetY, font);
-        s32 goddamnJMeasure = textX == 256 ? -1 : 0; // Hack to fix a rendering bug.
+        s32 goddamnJMeasure = textX == 256 ? 1 : 0; // Hack to fix a rendering bug.
         if (str[i] != ' ' && str[i] != '\t') {
             if (xlu != prevxlu) {
                 prevxlu = xlu;
@@ -2092,7 +2092,7 @@ void render_multi_image(Texture *image, s32 x, s32 y, s32 width, s32 height, UNU
 
         gDPLoadSync(gDisplayListHead++);
         gDPLoadTextureTile(gDisplayListHead++,
-            image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), ((posH + imH) - 1), 0, (G_TX_NOMIRROR | G_TX_CLAMP), (G_TX_NOMIRROR | G_TX_CLAMP), maskW, maskH, 0, 0);
+            image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), ((posH + imH) - 1), 0, (G_TX_NOMIRROR | G_TX_WRAP), (G_TX_NOMIRROR | G_TX_WRAP), maskW, maskH, 0, 0);
         gSPScisTextureRectangle(gDisplayListHead++,
             ((x + posW) << 2),
             ((y + posH) << 2),
@@ -2108,7 +2108,7 @@ void render_multi_image(Texture *image, s32 x, s32 y, s32 width, s32 height, UNU
             posW = i * imW;
             gDPLoadSync(gDisplayListHead++);
             gDPLoadTextureTile(gDisplayListHead++,
-                image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), (height - 1), 0, (G_TX_NOMIRROR | G_TX_CLAMP), (G_TX_NOMIRROR | G_TX_CLAMP), maskW, maskH, 0, 0);
+                image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), (height - 1), 0, (G_TX_NOMIRROR | G_TX_WRAP), (G_TX_NOMIRROR | G_TX_WRAP), maskW, maskH, 0, 0);
             gSPScisTextureRectangle(gDisplayListHead++,
                 (x + posW) << 2,
                 (y + posH) << 2,

From c5d9f535eec102952e8403f467341116a0dfcbd8 Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Fri, 1 Mar 2024 17:03:30 -0500
Subject: [PATCH 22/23] HackerSM64 v2.2.0

---
 VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION.txt b/VERSION.txt
index 535b2f4d4..a4b6ac3de 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-v2.1.3
+v2.2.0

From 197d8074e335b9937f5ac4b96a1b2a3cd50a3f85 Mon Sep 17 00:00:00 2001
From: Gregory Heskett <gheskett@gmail.com>
Date: Fri, 1 Mar 2024 17:47:05 -0500
Subject: [PATCH 23/23] Vanilla bugfix: obj_init_animation referencing o
 instead of obj

---
 src/game/object_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/game/object_helpers.c b/src/game/object_helpers.c
index 3a617cda7..82435a4f6 100644
--- a/src/game/object_helpers.c
+++ b/src/game/object_helpers.c
@@ -468,7 +468,7 @@ void obj_set_gfx_pos_from_pos(struct Object *obj) {
 }
 
 void obj_init_animation(struct Object *obj, s32 animIndex) {
-    struct Animation **anims = o->oAnimations;
+    struct Animation **anims = obj->oAnimations;
     geo_obj_init_animation(&obj->header.gfx, &anims[animIndex]);
 }