diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..9fd99fb91
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "lib/libpl"]
+	path = lib/libpl
+	url = https://gitlab.com/parallel-launcher/libpl.git
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
index 3ff5fdaef..3de36d2bb 100644
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@@ -26,7 +26,8 @@
                 "VERSION_US=1",
                 "F3DEX_GBI_2=1",
                 "F3DZEX_NON_GBI_2=1",
-                "F3DEX_GBI_SHARED=1"
+                "F3DEX_GBI_SHARED=1",
+                "LIBPL=1"
             ],
             "compilerPath": "/usr/bin/mips-linux-gnu-gcc",
             "cStandard": "gnu17",
diff --git a/Makefile b/Makefile
index 35fb2a12d..b63fe099a 100644
--- a/Makefile
+++ b/Makefile
@@ -135,7 +135,7 @@ endif
 #==============================================================================#
 
 # Default non-gcc opt flags
-DEFAULT_OPT_FLAGS = -Ofast
+DEFAULT_OPT_FLAGS = -Ofast -falign-functions=32
 # Note: -fno-associative-math is used here to suppress warnings, ideally we would enable this as an optimization but
 # this conflicts with -ftrapping-math apparently.
 # TODO: Figure out how to allow -fassociative-math to be enabled
@@ -253,6 +253,18 @@ ifeq ($(HVQM),1)
   SRC_DIRS += src/hvqm
 endif
 
+# LIBPL - whether to include libpl library for interfacing with Parallel Launcher
+# (library will be pulled into repo after building with this enabled for the first time)
+#   1 - includes code in ROM
+#   0 - does not
+LIBPL ?= 0
+LIBPL_DIR := lib/libpl
+$(eval $(call validate-option,LIBPL,0 1))
+ifeq ($(LIBPL),1)
+  DEFINES += LIBPL=1
+  SRC_DIRS += $(LIBPL_DIR)
+endif
+
 BUILD_DIR_BASE := build
 # BUILD_DIR is the location where all build artifacts are placed
 BUILD_DIR      := $(BUILD_DIR_BASE)/$(VERSION)_$(CONSOLE)
@@ -335,6 +347,18 @@ ifeq ($(filter clean distclean print-%,$(MAKECMDGOALS)),)
     ifeq ($(DUMMY),FAIL)
       $(error Failed to build tools)
     endif
+
+  # Clone any needed submodules
+  ifeq ($(LIBPL),1)
+    ifeq ($(wildcard $(LIBPL_DIR)/*.h),)
+      $(info Cloning libpl submodule...)
+      DUMMY != git submodule update --init $(LIBPL_DIR) > /dev/null || echo FAIL
+      ifeq ($(DUMMY),FAIL)
+        $(error Failed to clone libpl submodule)
+      endif
+    endif
+  endif
+
   $(info Building ROM...)
 
 endif
diff --git a/README.md b/README.md
index b97dc4722..37850f2d2 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,6 @@ Thanks to Frame#5375 and AloXado320 for also helping with silhouette stuff
 
 **Lighting Engine by Wiseguy**
 - Lighting Engine is available on a separate branch ([base/lighting-engine](https://github.com/Reonu/HackerSM64/tree/base/lighting-engine)). Instructions on how to use it are in the readme of that branch.
-- Alternatively, the main repo has `Puppylights` available, which is a more lightweight, but limited lighting library intended to be used to modify existing light properties. You can look at `puppylights.c` to find out how to use it.
 
 **Puppycam**
 - Puppycam is available on the master branch now, you can toggle it in `config/config_camera.h`. *
diff --git a/VERSION.txt b/VERSION.txt
index 535b2f4d4..a4b6ac3de 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-v2.1.3
+v2.2.0
diff --git a/bin/segment2.c b/bin/segment2.c
index 062eed45d..260e007fe 100644
--- a/bin/segment2.c
+++ b/bin/segment2.c
@@ -30,7 +30,7 @@ const u8 small_font_kerning_default[] = {
     /*A*/ 7, /*B*/ 7, /*C*/ 7, /*D*/ 7, /*E*/ 6, /*F*/ 5, /*G*/ 8, /*H*/ 6, /*I*/ 6, /*J*/ 5, /*K*/ 7, /*L*/ 6, /*M*/ 7, /*N*/ 7, /*O*/ 7, /*P*/ 6, 
     /*Q*/ 8, /*R*/ 6, /*S*/ 7, /*T*/ 7, /*U*/ 7, /*V*/ 7, /*W*/ 8, /*X*/ 7, /*Y*/ 7, /*Z*/ 7, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 8, /*_*/ 0, /*`*/ 0, 
     /*a*/ 7, /*b*/ 7, /*c*/ 6, /*d*/ 7, /*e*/ 7, /*f*/ 7, /*g*/ 7, /*h*/ 7, /*i*/ 3, /*j*/ 5, /*k*/ 8, /*l*/ 4, /*m*/ 7, /*n*/ 7, /*o*/ 7, /*p*/ 7, 
-    /*q*/ 7, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 7, /*w*/ 8, /*x*/ 6, /*y*/ 8, /*z*/ 7, /*~*/ 8, 
+    /*q*/ 7, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 7, /*w*/ 8, /*x*/ 6, /*y*/ 8, /*z*/ 7, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 8, 
 };
 
 const u16 small_font_offsets_default[] = {
@@ -49,7 +49,7 @@ static const u8 small_font_kerning_outline[] = {
     /*A*/ 6, /*B*/ 6, /*C*/ 6, /*D*/ 6, /*E*/ 6, /*F*/ 6, /*G*/ 6, /*H*/ 6, /*I*/ 5, /*J*/ 7, /*K*/ 6, /*L*/ 6, /*M*/ 6, /*N*/ 6, /*O*/ 6, /*P*/ 6, 
     /*Q*/ 6, /*R*/ 6, /*S*/ 6, /*T*/ 6, /*U*/ 6, /*V*/ 6, /*W*/ 6, /*X*/ 6, /*Y*/ 6, /*Z*/ 6, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 7, /*_*/ 0, /*`*/ 0,  
     /*a*/ 5, /*b*/ 5, /*c*/ 5, /*d*/ 5, /*e*/ 5, /*f*/ 5, /*g*/ 5, /*h*/ 5, /*i*/ 2, /*j*/ 6, /*k*/ 4, /*l*/ 2, /*m*/ 5, /*n*/ 5, /*o*/ 5, /*p*/ 5, 
-    /*q*/ 5, /*r*/ 5, /*s*/ 5, /*t*/ 5, /*u*/ 5, /*v*/ 5, /*w*/ 5, /*x*/ 5, /*y*/ 5, /*z*/ 5, /*~*/ 6,   
+    /*q*/ 5, /*r*/ 5, /*s*/ 5, /*t*/ 5, /*u*/ 5, /*v*/ 5, /*w*/ 5, /*x*/ 5, /*y*/ 5, /*z*/ 5, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 6,   
 };
 
 const u16 small_font_offsets_outline[] = {
@@ -68,7 +68,7 @@ static const u8 small_font_kerning_plain[] = {
     /*A*/ 6, /*B*/ 6, /*C*/ 6, /*D*/ 6, /*E*/ 6, /*F*/ 6, /*G*/ 6, /*H*/ 6, /*I*/ 4, /*J*/ 6, /*K*/ 6, /*L*/ 6, /*M*/ 7, /*N*/ 7, /*O*/ 6, /*P*/ 6, 
     /*Q*/ 6, /*R*/ 6, /*S*/ 6, /*T*/ 6, /*U*/ 6, /*V*/ 6, /*W*/ 7, /*X*/ 6, /*Y*/ 6, /*Z*/ 6, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 7, /*_*/ 0, /*`*/ 0,
     /*a*/ 6, /*b*/ 6, /*c*/ 6, /*d*/ 6, /*e*/ 6, /*f*/ 6, /*g*/ 6, /*h*/ 6, /*i*/ 3, /*j*/ 4, /*k*/ 6, /*l*/ 5, /*m*/ 7, /*n*/ 6, /*o*/ 6, /*p*/ 6, 
-    /*q*/ 6, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 6, /*w*/ 7, /*x*/ 6, /*y*/ 6, /*z*/ 6, /*~*/ 7,   
+    /*q*/ 6, /*r*/ 6, /*s*/ 6, /*t*/ 6, /*u*/ 6, /*v*/ 6, /*w*/ 7, /*x*/ 6, /*y*/ 6, /*z*/ 6, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 7,   
 };
 
 const u16 small_font_offsets_plain[] = {
@@ -87,7 +87,7 @@ static const u8 small_font_kerning_vanilla[] = {
     /*A*/ 5, /*B*/ 5, /*C*/ 5, /*D*/ 5, /*E*/ 5, /*F*/ 5, /*G*/ 5, /*H*/ 5, /*I*/ 3, /*J*/ 5, /*K*/ 5, /*L*/ 5, /*M*/ 7, /*N*/ 7, /*O*/ 6, /*P*/ 5, 
     /*Q*/ 6, /*R*/ 5, /*S*/ 5, /*T*/ 5, /*U*/ 5, /*V*/ 5, /*W*/ 7, /*X*/ 6, /*Y*/ 5, /*Z*/ 5, /*[*/ 0, /*\\*/ 0, /*]*/ 0, /*^*/ 7, /*_*/ 0, /*`*/ 0, 
     /*a*/ 5, /*b*/ 4, /*c*/ 4, /*d*/ 4, /*e*/ 4, /*f*/ 5, /*g*/ 5, /*h*/ 4, /*i*/ 3, /*j*/ 4, /*k*/ 3, /*l*/ 2, /*m*/ 6, /*n*/ 4, /*o*/ 4, /*p*/ 4, 
-    /*q*/ 5, /*r*/ 4, /*s*/ 4, /*t*/ 4, /*u*/ 4, /*v*/ 4, /*w*/ 7, /*x*/ 5, /*y*/ 4, /*z*/ 5, /*~*/ 6,   
+    /*q*/ 5, /*r*/ 4, /*s*/ 4, /*t*/ 4, /*u*/ 4, /*v*/ 4, /*w*/ 7, /*x*/ 5, /*y*/ 4, /*z*/ 5, /*{*/ 0, /*|*/ 0, /*}*/ 0, /*~*/ 6,   
 };
 
 const u16 small_font_offsets_vanilla[] = {
diff --git a/data/behavior_data.c b/data/behavior_data.c
index 2e2c2dc1d..a0d4303e8 100644
--- a/data/behavior_data.c
+++ b/data/behavior_data.c
@@ -66,6 +66,7 @@
 #define BC_HH(a, b) (_SHIFTL(a, 16, 16) | _SHIFTL(b, 0, 16))
 #define BC_W(a) ((uintptr_t)(u32)(a))
 #define BC_PTR(a) ((uintptr_t)(a))
+#define BC_BPTR(a, b) (_SHIFTL(a, 24, 8) + OS_K0_TO_PHYSICAL(b))
 
 enum BehaviorCommands {
     /*0x00*/ BHV_CMD_BEGIN,
@@ -180,8 +181,7 @@ enum BehaviorCommands {
 
 // Executes a native game function.
 #define CALL_NATIVE(func) \
-    BC_B(BHV_CMD_CALL_NATIVE), \
-    BC_PTR(func)
+    BC_BPTR(BHV_CMD_CALL_NATIVE, func)
 
 // Adds a float to the specified field.
 #define ADD_FLOAT(field, value) \
@@ -386,8 +386,7 @@ enum BehaviorCommands {
 
 // Spawns a water droplet with the given parameters.
 #define SPAWN_WATER_DROPLET(dropletParams) \
-    BC_B(BHV_CMD_SPAWN_WATER_DROPLET), \
-    BC_PTR(dropletParams)
+    BC_BPTR(BHV_CMD_SPAWN_WATER_DROPLET, dropletParams)
 
 
 const BehaviorScript bhvStarDoor[] = {
@@ -5873,7 +5872,7 @@ const BehaviorScript bhvRacingPenguin[] = {
     OR_INT(oFlags, (OBJ_FLAG_COMPUTE_ANGLE_TO_MARIO | OBJ_FLAG_ACTIVE_FROM_AFAR | OBJ_FLAG_COMPUTE_DIST_TO_MARIO | OBJ_FLAG_SET_FACE_YAW_TO_MOVE_YAW | OBJ_FLAG_UPDATE_GFX_POS_AND_ANGLE)),
     LOAD_ANIMATIONS(oAnimations, penguin_seg5_anims_05008B74),
     ANIMATE(PENGUIN_ANIM_IDLE),
-    SET_OBJ_PHYSICS(/*Wall hitbox radius*/ 300, /*Gravity*/ -800, /*Bounciness*/ -5, /*Drag strength*/ 0, /*Friction*/ 0, /*Buoyancy*/ 0, /*Unused*/ 0, 0),
+    SET_OBJ_PHYSICS(/*Wall hitbox radius*/ 200, /*Gravity*/ -800, /*Bounciness*/ -5, /*Drag strength*/ 0, /*Friction*/ 0, /*Buoyancy*/ 0, /*Unused*/ 0, 0),
     SCALE(/*Unused*/ 0, /*Field*/ 400),
     CALL_NATIVE(bhv_racing_penguin_init),
     BEGIN_LOOP(),
diff --git a/include/config/config_cutscenes.h b/include/config/config_cutscenes.h
index e1b0bdc5d..ab2616b30 100644
--- a/include/config/config_cutscenes.h
+++ b/include/config/config_cutscenes.h
@@ -17,3 +17,9 @@
  * Skips the title/splash screen (Super Mario 64 logo).
  */
 // #define SKIP_TITLE_SCREEN
+
+/**
+ * Skips the File Select screen
+ */
+// #define SKIP_FILE_SELECT
+
diff --git a/include/config/config_graphics.h b/include/config/config_graphics.h
index 179c833ae..ada791221 100644
--- a/include/config/config_graphics.h
+++ b/include/config/config_graphics.h
@@ -60,12 +60,12 @@
 /**
  * Makes the coins ia8 64x64 instead of ia16 32x32. Uses new ia8 textures so that vanilla coins look better.
  */
-#define IA8_COINS
+// #define IA8_COINS
 
 /**
  * Similar to the above, but 30 FPS (Textures by InTheBeef, cleaned up by Arceveti).
  */
-#define IA8_30FPS_COINS
+// #define IA8_30FPS_COINS
 
 /**
  * Use .rej microcode for certain objects (experimental - only should be used when F3DEX_GBI_2 is defined).
diff --git a/include/config/config_menu.h b/include/config/config_menu.h
index f5d434a62..2ac93b44a 100644
--- a/include/config/config_menu.h
+++ b/include/config/config_menu.h
@@ -14,7 +14,7 @@
 /**
  * Decides whether you can exit course while moving (has no effect if you disable Exit Course).
  */
-#define EXIT_COURSE_WHILE_MOVING
+// #define EXIT_COURSE_WHILE_MOVING
 
 /**
  * Decides whether to treat exiting course as if the player had died. 
diff --git a/include/config/config_movement.h b/include/config/config_movement.h
index 882ae3b97..c574ac141 100644
--- a/include/config/config_movement.h
+++ b/include/config/config_movement.h
@@ -26,12 +26,12 @@
  * - Precise turning control.
  * - Prevents falling from the edges.
  */
-#define BETTER_HANGING
+// #define BETTER_HANGING
 
 /**
  * Change the movement speed when hanging from a ceiling (the vanilla value is 4.0f, has no effect if BETTER_HANGING is enabled).
  */
-#define HANGING_SPEED 12.0f
+#define HANGING_SPEED 4.0f
 
 /**
  * Prevents Mario from falling asleep while idle.
@@ -81,12 +81,12 @@
 /**
  * If A and Z are pressed on the same frame while running, Mario will long jump instead of ground pound.
  */
-#define EASIER_LONG_JUMPS
+// #define EASIER_LONG_JUMPS
 
 /**
  * Enables the ability to hold Z while twirling to descend faster.
  */
-#define Z_TWIRL
+// #define Z_TWIRL
 
 /**
  * Disables bonks when ground pounding next to a wall.
@@ -101,7 +101,7 @@
 /**
  * Allows Mario to grab hangable ceilings from any state.
  */
-#define HANGING_FIX
+// #define HANGING_FIX
 
 /**
  * The maximum angle the player can wall kick, in degrees. 0..90. To allow 45 degree wall kicks, you must supply `46` to allow 45 and under.
@@ -113,6 +113,11 @@
  */
 #define DONT_LEDGE_GRAB_STEEP_SLOPES
 
+/**
+ * Buffers an A input if you jump off a slope during the landing lag
+ */
+// #define SLOPE_BUFFER
+
 /**
  * Disables BLJs and crushes SimpleFlips's dreams.
  */
@@ -129,7 +134,7 @@
  * be interrupted with text
  * Uncomment this to fix this bug, and frustrate speedrunners
  */
-// #define BUGFIX_DIALOG_TIME_STOP
+#define BUGFIX_DIALOG_TIME_STOP
 
 /**
  * Enables Platform Displacement 2, an upgrade to the physics involving moving platforms and how Mario interacts with them.
diff --git a/include/config/config_objects.h b/include/config/config_objects.h
index 72c37f639..10502b6bb 100644
--- a/include/config/config_objects.h
+++ b/include/config/config_objects.h
@@ -17,7 +17,7 @@
 /**
  * Moving Coins flicker and disappear when they hit lava instead of being instantly deleted.
  */
-#define COIN_LAVA_FLICKER
+// #define COIN_LAVA_FLICKER
 
 /**
  * Allows for retries on collecting the remaining blue coins from a blue coin switch.
@@ -56,7 +56,7 @@
 /**
  * Causes leaf particles to occasionally fall from trees which contain Hoot.
  */
-#define HOOT_TREE_PARTICLES
+// #define HOOT_TREE_PARTICLES
 
 /**************
  * -- MR I --
diff --git a/include/config/config_safeguards.h b/include/config/config_safeguards.h
index 3c7dce3f5..6fb47fc08 100644
--- a/include/config/config_safeguards.h
+++ b/include/config/config_safeguards.h
@@ -49,26 +49,6 @@
     #undef BETTER_REVERB
 #endif
 
-
-/*****************
- * config_graphics.h
- */
-
-#ifndef F3DEX_GBI_2
-    #undef OBJECTS_REJ // OBJECTS_REJ requires f3dex2.
-#endif // !F3DEX_GBI_2
-
-#ifndef F3DEX_GBI_SHARED
-    #undef OBJECTS_REJ // Non F3DEX-based ucodes do NOT support ucode switching.
-#endif // !F3DEX_GBI_SHARED
-
-#ifdef OBJECTS_REJ
-    // Enable required ucodes.
-    #define F3DEX2_REJ_GBI
-    #define F3DLX2_REJ_GBI
-#endif // OBJECTS_REJ
-
-
 /*****************
  * config_debug.h
  */
diff --git a/include/level_commands.h b/include/level_commands.h
index 323312863..fa8f8d136 100644
--- a/include/level_commands.h
+++ b/include/level_commands.h
@@ -5,7 +5,6 @@
 
 #include "level_table.h"
 #include "config.h"
-#include "game/puppylights.h"
 
 enum LevelCommands {
     /*0x00*/ LEVEL_CMD_LOAD_AND_EXECUTE,
@@ -71,9 +70,7 @@ enum LevelCommands {
     /*0x3C*/ LEVEL_CMD_GET_OR_SET_VAR,
     /*0x3D*/ LEVEL_CMD_PUPPYVOLUME,
     /*0x3E*/ LEVEL_CMD_CHANGE_AREA_SKYBOX,
-    /*0x3F*/ LEVEL_CMD_PUPPYLIGHT_ENVIRONMENT,
-    /*0x40*/ LEVEL_CMD_PUPPYLIGHT_NODE,
-    /*0x41*/ LEVEL_CMD_SET_ECHO,
+    /*0x3F*/ LEVEL_CMD_SET_ECHO,
 };
 
 enum LevelActs {
diff --git a/include/object_constants.h b/include/object_constants.h
index 7ee6ad171..949dd8d6c 100644
--- a/include/object_constants.h
+++ b/include/object_constants.h
@@ -49,8 +49,6 @@ enum ObjFlags {
     OBJ_FLAG_PERSISTENT_RESPAWN                = (1 << 14), // 0x00004000
     OBJ_FLAG_VELOCITY_PLATFORM                 = (1 << 15), // 0x00008000
     OBJ_FLAG_DONT_CALC_COLL_DIST               = (1 << 16), // 0x00010000
-    OBJ_FLAG_UCODE_SMALL                       = (1 << 17), // 0x00020000
-    OBJ_FLAG_UCODE_LARGE                       = (1 << 18), // 0x00040000
     OBJ_FLAG_SILHOUETTE                        = (1 << 19), // 0x00080000
     OBJ_FLAG_OCCLUDE_SILHOUETTE                = (1 << 20), // 0x00100000
     OBJ_FLAG_OPACITY_FROM_CAMERA_DIST          = (1 << 21), // 0x00200000
diff --git a/include/object_fields.h b/include/object_fields.h
index d3487dadc..aa00f163f 100644
--- a/include/object_fields.h
+++ b/include/object_fields.h
@@ -169,9 +169,6 @@
 #define /*0x1BC*/ oAngleToHome                OBJECT_FIELD_S32(0x4D)
 #define /*0x1C0*/ oFloor                      OBJECT_FIELD_SURFACE(0x4E)
 #define /*0x1C4*/ oDeathSound                 OBJECT_FIELD_S32(0x4F)
-#ifdef PUPPYLIGHTS
-#define /*0x1C4*/ oLightID                                      OBJECT_FIELD_S32(0x50)
-#endif
 
 /* Pathed (see obj_follow_path) */
 #define /*0x0FC*/ oPathedStartWaypoint     OBJECT_FIELD_WAYPOINT(0x1D)
diff --git a/include/seq_macros.inc b/include/seq_macros.inc
index 1d12e1ab5..a551cc736 100644
--- a/include/seq_macros.inc
+++ b/include/seq_macros.inc
@@ -475,8 +475,8 @@
     .byte \a
 .endm
 
-.macro chan_testlayerfinished a
-    .byte 0x80 + \a
+.macro chan_testlayersfinished
+    .byte 0x80
 .endm
 
 .macro chan_setlayer a, b
@@ -488,14 +488,14 @@
     .byte 0x60 + \a
 .endm
 
-.macro chan_freelayer a
-    .byte 0x90 + \a
+.macro chan_freelayers
+    .byte 0x90
 .endm
 
 #else
 
-.macro chan_testlayerfinished a
-    .byte 0x0 + \a
+.macro chan_testlayersfinished
+    .byte 0x00
 .endm
 
 .macro chan_ioreadval a
@@ -507,8 +507,8 @@
     .byte (\b - sequence_start) >> 8, (\b - sequence_start) & 0xff
 .endm
 
-.macro chan_freelayer a
-    .byte 0xa0 + \a
+.macro chan_freelayers
+    .byte 0xa0
 .endm
 
 #ifdef VERSION_EU
diff --git a/include/types.h b/include/types.h
index 81b4813f6..68eb13792 100644
--- a/include/types.h
+++ b/include/types.h
@@ -248,9 +248,6 @@ struct GraphNodeObject {
     /*0x4C*/ struct SpawnInfo *spawnInfo;
     /*0x50*/ Mat4 *throwMatrix; // matrix ptr
     /*0x54*/ Vec3f cameraToObject;
-#ifdef OBJECTS_REJ
-    u16 ucode;
-#endif
 };
 
 struct ObjectNode {
@@ -259,27 +256,9 @@ struct ObjectNode {
     struct ObjectNode *prev;
 };
 
-#ifdef PUPPYLIGHTS
-struct PuppyLight {
-    Vec3t pos[2];   // The location of the light. First index is the absolute position, second index are offsets.
-    s16 yaw;        // Used by cubes. Allows epic rotating of the volume.
-    RoomData room;  // Which room to use. -1 is visible from all rooms.
-    s8 epicentre;   // What percentage inside the volume you'll be before maximum light strength is applied. (E.g: 100 will be full strength always, and 0 will be full strength at the centre.)
-    u8 flags;       // Some stuff to define how the volume is used. Mostly just shape stuff, but can potentially have other uses.
-    ColorRGBA rgba; // Colour. Go on, take even the tiniest guess as to what this entails.
-    u8 area;        // Which section of the level this light is stored in.
-    u8 active: 1;   // Whether the light will actually work. Mostly intended to be used for objects.
-};
-#endif
-
 // NOTE: Since ObjectNode is the first member of Object, it is difficult to determine
 // whether some of these pointers point to ObjectNode or Object.
-
-#ifdef PUPPYLIGHTS
-#define MAX_OBJECT_FIELDS 0x51
-#else
 #define MAX_OBJECT_FIELDS 0x50
-#endif
 
 struct Object {
     /*0x000*/ struct ObjectNode header;
@@ -338,9 +317,6 @@ struct Object {
     /*0x218*/ void *collisionData;
     /*0x21C*/ Mat4 transform;
     /*0x25C*/ void *respawnInfo;
-#ifdef PUPPYLIGHTS
-    struct PuppyLight puppylight;
-#endif
 };
 
 struct ObjectHitbox {
diff --git a/levels/menu/script.c b/levels/menu/script.c
index a4b032e2f..72474a6cc 100644
--- a/levels/menu/script.c
+++ b/levels/menu/script.c
@@ -6,6 +6,8 @@
 #include "segment_symbols.h"
 #include "level_commands.h"
 
+#include "config/config_cutscenes.h"
+
 #include "game/area.h"
 #include "game/level_update.h"
 #include "menu/file_select.h"
@@ -19,6 +21,7 @@
 #include "levels/menu/header.h"
 
 const LevelScript level_main_menu_entry_file_select[] = {
+#ifndef SKIP_FILE_SELECT
     INIT_LEVEL(),
     LOAD_GODDARD(),
     LOAD_LEVEL_DATA(menu),
@@ -53,6 +56,7 @@ const LevelScript level_main_menu_entry_file_select[] = {
     SLEEP(/*frames*/ 16),
     CLEAR_LEVEL(),
     SLEEP_BEFORE_EXIT(/*frames*/ 1),
+#endif // SKIP_FILE_SELECT
     SET_REG(/*value*/ START_LEVEL),
     EXIT_AND_EXECUTE(/*seg*/ SEGMENT_GLOBAL_LEVEL_SCRIPT, _scriptsSegmentRomStart, _scriptsSegmentRomEnd, level_main_scripts_entry),
 };
diff --git a/lib/libpl b/lib/libpl
new file mode 160000
index 000000000..cfc74e589
--- /dev/null
+++ b/lib/libpl
@@ -0,0 +1 @@
+Subproject commit cfc74e5898945699bb1f5aeee8dd5507164e2384
diff --git a/sm64.ld b/sm64.ld
index ebb6af13e..186b4d4e3 100755
--- a/sm64.ld
+++ b/sm64.ld
@@ -171,6 +171,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.text*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.text*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.text*);
 #endif
       */ULTRALIB.a:*.o(.text*);
       */libnustd.a:*.o(.text*);
@@ -191,6 +194,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.*data*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.*data*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.*data*);
 #endif
       */ULTRALIB.a:*.o(.data*);
       */libhvqm2.a:*.o(.data*);
@@ -207,6 +213,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.rodata*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.rodata*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.rodata*);
 #endif
       */ULTRALIB.a:*.o(.rodata*);
       */libgcc.a:*.o(.rodata*);
@@ -224,6 +233,9 @@ SECTIONS
       BUILD_DIR/src/audio*.o(.*bss*);
 #ifdef S2DEX_TEXT_ENGINE
       BUILD_DIR/src/s2d_engine*.o(.*bss*);
+#endif
+#ifdef LIBPL
+      BUILD_DIR/lib/libpl*.o(.*bss*);
 #endif
       */ULTRALIB.a:*.o(COMMON);
       */ULTRALIB.a:*.o(.scommon);
diff --git a/sound/sequences/00_sound_player.s b/sound/sequences/00_sound_player.s
index 1a7111618..464bc0d6e 100644
--- a/sound/sequences/00_sound_player.s
+++ b/sound/sequences/00_sound_player.s
@@ -73,13 +73,11 @@ chan_jump .main_loop_023589
 
 // Main loop for standard, non-continuous sound effects
 .main_loop_023589:
-chan_delay1
+chan_hang
 chan_ioreadval 0
 chan_bltz .main_loop_023589
 .start_playing_023589:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_setval 0
 chan_iowriteval 5
 chan_ioreadval 4
@@ -93,13 +91,11 @@ chan_bltz .skip_023589 // if we have a signal:
   chan_beqz .force_stop_023589 // told to stop
   chan_jump .start_playing_023589 // told to play something else
 .skip_023589:
-chan_testlayerfinished 0
-chan_beqz .poll_023589 // if layer 0 hasn't finished, keep polling
+chan_testlayersfinished
+chan_beqz .poll_023589 // if all layers haven't finished, keep polling
 chan_jump .main_loop_023589 // otherwise go back to the main loop
 .force_stop_023589:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_jump .main_loop_023589
 
 .channel1:
@@ -140,13 +136,11 @@ chan_jump .main_loop_146
 
 // Main loop for moving, env and air sound effects, which play continuously
 .main_loop_146:
-chan_delay1
+chan_hang
 chan_ioreadval 0
 chan_bltz .main_loop_146
 .start_playing_146:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_setvolscale 127
 chan_setval 0
 chan_iowriteval 5
@@ -161,9 +155,7 @@ chan_bltz .poll_146
 chan_beqz .force_stop_146
 chan_jump .start_playing_146
 .force_stop_146:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_jump .main_loop_146
 
 .channel7:
@@ -177,13 +169,11 @@ chan_setdyntable .channel7_table
 
 // Loop for menu sound effects
 .main_loop_7:
-chan_delay1
+chan_hang
 chan_ioreadval 0
 chan_bltz .main_loop_7
 .start_playing_7:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_setval 0
 chan_iowriteval 5
 chan_setreverb 0
@@ -201,14 +191,12 @@ chan_bltz .skip_7 // if we have a signal:
   chan_unreservenotes
   chan_jump .start_playing_7 // told to play something else
 .skip_7:
-chan_testlayerfinished 0
-chan_beqz .poll_7 // if layer 0 hasn't finished, keep polling
+chan_testlayersfinished
+chan_beqz .poll_7 // if all layers haven't finished, keep polling
 chan_unreservenotes
 chan_jump .main_loop_7 // otherwise go back to the main loop
 .force_stop_7:
-chan_freelayer 0
-chan_freelayer 1
-chan_freelayer 2
+chan_freelayers
 chan_unreservenotes
 chan_jump .main_loop_7
 
diff --git a/src/audio/data.c b/src/audio/data.c
index bf8f0de33..28332c845 100644
--- a/src/audio/data.c
+++ b/src/audio/data.c
@@ -73,7 +73,7 @@ u8 sReverbMultsArr[][NUM_ALLPASS / 3] = {
 
 /**
  * Format:
- * - useLightweightSettings (Reduce some runtime configurability options in favor of a slight speed boost during processing; Light configurability settings are found in synthesis.h)
+ * - useLightweightSettings (Reduce some runtime configurability options in favor of a significant speed boost during processing; Light configurability settings are found in synthesis.h)
  * - downsampleRate         (Higher values exponentially reduce the number of input samples to process, improving perfomance at cost of quality; number <= 0 signifies use of vanilla reverb)
  * - isMono                 (Only process reverb on the left channel and share it with the right channel, improving performance at cost of quality)
  * - filterCount            (Number of filters to process data with; in general, more filters means higher quality at the cost of performance demand; always 3 with light settings)
@@ -85,8 +85,8 @@ u8 sReverbMultsArr[][NUM_ALLPASS / 3] = {
  * 
  * - *delaysL               (Advanced parameter; array of variable audio buffer sizes / delays for each respective filter [left channel])
  * - *delaysR               (Advanced parameter; array of variable audio buffer sizes / delays for each respective filter [right channel])
- * - *reverbMultsL          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [left channel]; overridden when using light settings)
- * - *reverbMultsR          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [right channel]; overridden when using light settings)
+ * - *reverbMultsL          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [left channel]; unused when using light settings)
+ * - *reverbMultsR          (Advanced parameter; array of multipliers applied to the final output of each group of 3 filters [right channel]; unused when using light settings)
  * 
  * NOTE: The first entry will always be used by default when not using the level commands to specify a preset.
  * Please reference the HackerSM64 Wiki for more descriptive documentation of these parameters and usage of BETTER_REVERB in general.
@@ -244,41 +244,36 @@ u16 gAudioCosineTable[128] = {
 // between -1 and +1 octave.
 // gPitchBendFrequencyScale[k] = (0.5 * 2^(k/127))
 #ifndef VERSION_SH
-#if defined(VERSION_EU)
 f32 gPitchBendFrequencyScale[256] = {
-    0.5f,
-#else
-f32 gPitchBendFrequencyScale[255] = {
-#endif
-    0.500000f, 0.502736f, 0.505488f, 0.508254f, 0.511036f, 0.513833f, 0.516645f, 0.519472f, 0.522315f,
-    0.525174f, 0.528048f, 0.530938f, 0.533843f, 0.536765f, 0.539702f, 0.542656f, 0.545626f, 0.548612f,
-    0.551614f, 0.554633f, 0.557669f, 0.560721f, 0.563789f, 0.566875f, 0.569977f, 0.573097f, 0.576233f,
-    0.579387f, 0.582558f, 0.585746f, 0.588951f, 0.592175f, 0.595415f, 0.598674f, 0.601950f, 0.605245f,
-    0.608557f, 0.611888f, 0.615236f, 0.618603f, 0.621989f, 0.625393f, 0.628815f, 0.632257f, 0.635717f,
-    0.639196f, 0.642694f, 0.646212f, 0.649748f, 0.653304f, 0.656880f, 0.660475f, 0.664089f, 0.667724f,
-    0.671378f, 0.675052f, 0.678747f, 0.682461f, 0.686196f, 0.689952f, 0.693727f, 0.697524f, 0.701341f,
-    0.705180f, 0.709039f, 0.712919f, 0.716821f, 0.720744f, 0.724689f, 0.728655f, 0.732642f, 0.736652f,
-    0.740684f, 0.744737f, 0.748813f, 0.752911f, 0.757031f, 0.761175f, 0.765340f, 0.769529f, 0.773740f,
-    0.777975f, 0.782232f, 0.786513f, 0.790818f, 0.795146f, 0.799497f, 0.803873f, 0.808272f, 0.812696f,
-    0.817144f, 0.821616f, 0.826112f, 0.830633f, 0.835179f, 0.839750f, 0.844346f, 0.848966f, 0.853613f,
-    0.858284f, 0.862982f, 0.867704f, 0.872453f, 0.877228f, 0.882029f, 0.886856f, 0.891709f, 0.896590f,
-    0.901496f, 0.906430f, 0.911391f, 0.916379f, 0.921394f, 0.926436f, 0.931507f, 0.936604f, 0.941730f,
-    0.946884f, 0.952066f, 0.957277f, 0.962516f, 0.967783f, 0.973080f, 0.978405f, 0.983760f, 0.989144f,
-    0.994557f, 1.000000f, 1.005473f, 1.010975f, 1.016508f, 1.022071f, 1.027665f, 1.033289f, 1.038944f,
-    1.044630f, 1.050347f, 1.056095f, 1.061875f, 1.067687f, 1.073530f, 1.079405f, 1.085312f, 1.091252f,
-    1.097224f, 1.103229f, 1.109267f, 1.115337f, 1.121441f, 1.127579f, 1.133750f, 1.139955f, 1.146193f,
-    1.152466f, 1.158773f, 1.165115f, 1.171491f, 1.177903f, 1.184349f, 1.190831f, 1.197348f, 1.203901f,
-    1.210489f, 1.217114f, 1.223775f, 1.230473f, 1.237207f, 1.243978f, 1.250786f, 1.257631f, 1.264514f,
-    1.271434f, 1.278392f, 1.285389f, 1.292423f, 1.299497f, 1.306608f, 1.313759f, 1.320949f, 1.328178f,
-    1.335447f, 1.342756f, 1.350104f, 1.357493f, 1.364922f, 1.372392f, 1.379903f, 1.387455f, 1.395048f,
-    1.402683f, 1.410360f, 1.418078f, 1.425839f, 1.433642f, 1.441488f, 1.449377f, 1.457309f, 1.465285f,
-    1.473304f, 1.481367f, 1.489474f, 1.497626f, 1.505822f, 1.514063f, 1.522349f, 1.530681f, 1.539058f,
-    1.547481f, 1.555950f, 1.564465f, 1.573027f, 1.581636f, 1.590292f, 1.598995f, 1.607746f, 1.616545f,
-    1.625392f, 1.634287f, 1.643231f, 1.652224f, 1.661266f, 1.670358f, 1.679500f, 1.688691f, 1.697933f,
-    1.707225f, 1.716569f, 1.725963f, 1.735409f, 1.744906f, 1.754456f, 1.764058f, 1.773712f, 1.783419f,
-    1.793179f, 1.802993f, 1.812860f, 1.822782f, 1.832757f, 1.842788f, 1.852873f, 1.863013f, 1.873209f,
-    1.883461f, 1.893768f, 1.904132f, 1.914553f, 1.925031f, 1.935567f, 1.946159f, 1.956810f, 1.967520f,
-    1.978287f, 1.989114f, 2.000000f
+    0.500000f, 0.500000f, 0.502736f, 0.505488f, 0.508254f, 0.511036f, 0.513833f, 0.516645f, 0.519472f,
+    0.522315f, 0.525174f, 0.528048f, 0.530938f, 0.533843f, 0.536765f, 0.539702f, 0.542656f, 0.545626f,
+    0.548612f, 0.551614f, 0.554633f, 0.557669f, 0.560721f, 0.563789f, 0.566875f, 0.569977f, 0.573097f,
+    0.576233f, 0.579387f, 0.582558f, 0.585746f, 0.588951f, 0.592175f, 0.595415f, 0.598674f, 0.601950f,
+    0.605245f, 0.608557f, 0.611888f, 0.615236f, 0.618603f, 0.621989f, 0.625393f, 0.628815f, 0.632257f,
+    0.635717f, 0.639196f, 0.642694f, 0.646212f, 0.649748f, 0.653304f, 0.656880f, 0.660475f, 0.664089f,
+    0.667724f, 0.671378f, 0.675052f, 0.678747f, 0.682461f, 0.686196f, 0.689952f, 0.693727f, 0.697524f,
+    0.701341f, 0.705180f, 0.709039f, 0.712919f, 0.716821f, 0.720744f, 0.724689f, 0.728655f, 0.732642f,
+    0.736652f, 0.740684f, 0.744737f, 0.748813f, 0.752911f, 0.757031f, 0.761175f, 0.765340f, 0.769529f,
+    0.773740f, 0.777975f, 0.782232f, 0.786513f, 0.790818f, 0.795146f, 0.799497f, 0.803873f, 0.808272f,
+    0.812696f, 0.817144f, 0.821616f, 0.826112f, 0.830633f, 0.835179f, 0.839750f, 0.844346f, 0.848966f,
+    0.853613f, 0.858284f, 0.862982f, 0.867704f, 0.872453f, 0.877228f, 0.882029f, 0.886856f, 0.891709f,
+    0.896590f, 0.901496f, 0.906430f, 0.911391f, 0.916379f, 0.921394f, 0.926436f, 0.931507f, 0.936604f,
+    0.941730f, 0.946884f, 0.952066f, 0.957277f, 0.962516f, 0.967783f, 0.973080f, 0.978405f, 0.983760f,
+    0.989144f, 0.994557f, 1.000000f, 1.005473f, 1.010975f, 1.016508f, 1.022071f, 1.027665f, 1.033289f,
+    1.038944f, 1.044630f, 1.050347f, 1.056095f, 1.061875f, 1.067687f, 1.073530f, 1.079405f, 1.085312f,
+    1.091252f, 1.097224f, 1.103229f, 1.109267f, 1.115337f, 1.121441f, 1.127579f, 1.133750f, 1.139955f,
+    1.146193f, 1.152466f, 1.158773f, 1.165115f, 1.171491f, 1.177903f, 1.184349f, 1.190831f, 1.197348f,
+    1.203901f, 1.210489f, 1.217114f, 1.223775f, 1.230473f, 1.237207f, 1.243978f, 1.250786f, 1.257631f,
+    1.264514f, 1.271434f, 1.278392f, 1.285389f, 1.292423f, 1.299497f, 1.306608f, 1.313759f, 1.320949f,
+    1.328178f, 1.335447f, 1.342756f, 1.350104f, 1.357493f, 1.364922f, 1.372392f, 1.379903f, 1.387455f,
+    1.395048f, 1.402683f, 1.410360f, 1.418078f, 1.425839f, 1.433642f, 1.441488f, 1.449377f, 1.457309f,
+    1.465285f, 1.473304f, 1.481367f, 1.489474f, 1.497626f, 1.505822f, 1.514063f, 1.522349f, 1.530681f,
+    1.539058f, 1.547481f, 1.555950f, 1.564465f, 1.573027f, 1.581636f, 1.590292f, 1.598995f, 1.607746f,
+    1.616545f, 1.625392f, 1.634287f, 1.643231f, 1.652224f, 1.661266f, 1.670358f, 1.679500f, 1.688691f,
+    1.697933f, 1.707225f, 1.716569f, 1.725963f, 1.735409f, 1.744906f, 1.754456f, 1.764058f, 1.773712f,
+    1.783419f, 1.793179f, 1.802993f, 1.812860f, 1.822782f, 1.832757f, 1.842788f, 1.852873f, 1.863013f,
+    1.873209f, 1.883461f, 1.893768f, 1.904132f, 1.914553f, 1.925031f, 1.935567f, 1.946159f, 1.956810f,
+    1.967520f, 1.978287f, 1.989114f, 2.000000f
 };
 
 // Frequencies for notes using the standard twelve-tone equal temperament scale.
diff --git a/src/audio/data.h b/src/audio/data.h
index acca13717..b4706868d 100644
--- a/src/audio/data.h
+++ b/src/audio/data.h
@@ -65,11 +65,7 @@ extern u8 gReverbMultsArrCount;
 #endif // PUPPYPRINT_DEBUG
 #endif // BETTER_REVERB
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
 extern f32 gPitchBendFrequencyScale[256];
-#else
-extern f32 gPitchBendFrequencyScale[255];
-#endif
 extern f32 gNoteFrequencies[128];
 
 extern u8 gDefaultShortNoteVelocityTable[16];
diff --git a/src/audio/effects.c b/src/audio/effects.c
index 9b635e287..4fd6dc15f 100644
--- a/src/audio/effects.c
+++ b/src/audio/effects.c
@@ -3,7 +3,9 @@
 #include "effects.h"
 #include "load.h"
 #include "data.h"
+#include "external.h"
 #include "seqplayer.h"
+#include "game/game_init.h"
 #include "game/main.h"
 #include "engine/math_util.h"
 
@@ -53,20 +55,27 @@ void sequence_channel_process_sound(struct SequenceChannel *seqChannel, s32 reca
 }
 #else
 static void sequence_channel_process_sound(struct SequenceChannel *seqChannel) {
-    s32 i;
+    s32 hasProcessedChannel = FALSE;
+    f32 channelVolume;
+    f32 panFromChannel;
+    f32 panLayerWeight;
 
-    f32 channelVolume = seqChannel->volume * seqChannel->volumeScale * seqChannel->seqPlayer->fadeVolume;
-    if (seqChannel->seqPlayer->muted && (seqChannel->muteBehavior & MUTE_BEHAVIOR_SOFTEN) != 0) {
-        channelVolume *= seqChannel->seqPlayer->muteVolumeScale;
-    }
-
-    f32 panFromChannel = seqChannel->pan * seqChannel->panChannelWeight;
-    f32 panLayerWeight = 1.0f - seqChannel->panChannelWeight;
-
-    for (i = 0; i < 4; i++) {
+    for (s32 i = 0; i < LAYERS_MAX; i++) {
         struct SequenceChannelLayer *layer = seqChannel->layers[i];
         if (layer != NULL && layer->enabled && layer->note != NULL) {
-            layer->noteFreqScale = layer->freqScale * seqChannel->freqScale * gConfig.audioFrequency;
+            if (!hasProcessedChannel) {
+                hasProcessedChannel = TRUE;
+
+                channelVolume = seqChannel->volume * seqChannel->volumeScale * seqChannel->seqPlayer->fadeVolume;
+                if (seqChannel->seqPlayer->muted && (seqChannel->muteBehavior & MUTE_BEHAVIOR_SOFTEN) != 0) {
+                    channelVolume *= seqChannel->seqPlayer->muteVolumeScale;
+                }
+
+                panFromChannel = seqChannel->pan * seqChannel->panChannelWeight;
+                panLayerWeight = 1.0f - seqChannel->panChannelWeight;
+            }
+
+            layer->noteFreqScale = layer->freqScale * seqChannel->freqScale;
             layer->noteVelocity = layer->velocitySquare * channelVolume;
             layer->notePan = (layer->pan * panLayerWeight) + panFromChannel;
         }
@@ -141,19 +150,11 @@ f32 get_portamento_freq_scale(struct Portamento *p) {
     p->cur += p->speed;
     u32 v0 = (u32) p->cur;
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
     if (v0 > 127) {
-#else
-    if (v0 >= 127) {
-#endif
         v0 = 127;
     }
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
     return (1.0f + (p->extent * (gPitchBendFrequencyScale[v0 + 128] - 1.0f)));
-#else
-    return (1.0f + (p->extent * (gPitchBendFrequencyScale[v0 + 127] - 1.0f)));
-#endif
 }
 
 s32 get_vibrato_pitch_change(struct VibratoState *vib) {
@@ -240,11 +241,7 @@ f32 get_vibrato_freq_scale(struct VibratoState *vib) {
     s32 pitchChange = get_vibrato_pitch_change(vib);
     f32 extent = (f32) vib->extent / 4096.0f;
 
-#if defined(VERSION_EU) || defined(VERSION_SH)
     return 1.0f + extent * (gPitchBendFrequencyScale[pitchChange + 128] - 1.0f);
-#else
-    return 1.0f + extent * (gPitchBendFrequencyScale[pitchChange + 127] - 1.0f);
-#endif
 }
 
 void note_vibrato_update(struct Note *note) {
diff --git a/src/audio/external.c b/src/audio/external.c
index cf33cd378..ddf28c31d 100644
--- a/src/audio/external.c
+++ b/src/audio/external.c
@@ -1129,7 +1129,7 @@ static f32 get_sound_freq_scale(u8 bank, u8 item) {
 
     // Goes from 1 at the camera to 1 + 1/15 at AUDIO_MAX_DISTANCE (and continues rising
     // farther than that)
-    return amount / 15.0f + 1.0f;
+    return (amount / 15.0f + 1.0f) * gConfig.audioFrequency;
 }
 
 /**
@@ -1236,6 +1236,7 @@ static void update_game_sound(void) {
                     // Begin playing the sound
                     gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->soundScriptIO[4] = soundId;
                     gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->soundScriptIO[0] = 1;
+                    gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->stopScript = FALSE;
 
                     switch (bank) {
                         case SOUND_BANK_MOVING:
@@ -1311,7 +1312,7 @@ static void update_game_sound(void) {
 #else
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->volume = 1.0f;
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->pan = 0.5f;
-                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = 1.0f;
+                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = gConfig.audioFrequency;
 #endif
                             break;
                         case SOUND_BANK_ACTION:
@@ -1477,7 +1478,7 @@ static void update_game_sound(void) {
 #else
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->volume = 1.0f;
                             gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->pan = 0.5f;
-                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = 1.0f;
+                            gSequencePlayers[SEQ_PLAYER_SFX].channels[channelIndex]->freqScale = gConfig.audioFrequency;
 #endif
                             break;
                         case SOUND_BANK_ACTION:
diff --git a/src/audio/heap.c b/src/audio/heap.c
index c19f61f41..c9ec78c5c 100644
--- a/src/audio/heap.c
+++ b/src/audio/heap.c
@@ -1431,7 +1431,7 @@ void audio_reset_session(void) {
     init_reverb_us(reverbPresetId);
 #endif
 
-    init_sample_dma_buffers(gMaxSimultaneousNotes);
+    init_sample_dma_buffers();
 
 #if defined(VERSION_EU)
     build_vol_rampings_table(0, gAudioBufferParameters.samplesPerUpdate);
diff --git a/src/audio/internal.h b/src/audio/internal.h
index e9c65bc93..a70d55660 100644
--- a/src/audio/internal.h
+++ b/src/audio/internal.h
@@ -706,17 +706,14 @@ struct Note {
     /*0x8C*/ struct AudioListItem listItem;
     /*0x9C*/ s16 curVolLeft; // Q1.15, but will always be non-negative
     /*0x9E*/ s16 curVolRight; // Q1.15, but will always be non-negative
-    /*0xA0*/ s16 reverbVolShifted; // Q1.15
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-    /*0xA2*/ u16 headsetPanRight;
-    /*0xA4*/ u16 headsetPanLeft;
-    /*0xA6*/ u16 prevHeadsetPanRight;
-    /*0xA8*/ u16 prevHeadsetPanLeft;
-    /*    */ u8 align16Padding[0x06];
-#else
-    /*    */ u8 align16Padding[0x0E];
+    /*0xA0*/ u16 headsetPanRight;
+    /*0xA2*/ u16 headsetPanLeft;
+    /*0xA4*/ u16 prevHeadsetPanRight;
+    /*0xA6*/ u16 prevHeadsetPanLeft;
+    /*    */ u8 align16Padding[0x08];
 #endif
-}; // size = 0xB0
+}; // size = 0xA0, 0xB0
 #endif
 
 struct NoteSynthesisBuffers {
diff --git a/src/audio/load.c b/src/audio/load.c
index 295490e9d..c663d93c1 100644
--- a/src/audio/load.c
+++ b/src/audio/load.c
@@ -12,11 +12,9 @@
 struct SharedDma {
     /*0x0*/ u8 *buffer;       // target, points to pre-allocated buffer
     /*0x4*/ uintptr_t source; // device address
-    /*0x8*/ u16 sizeUnused;   // set to bufSize, never read
-    /*0xA*/ u16 bufSize;      // size of buffer
-    /*0xC*/ u8 unused2;       // set to 0, never read
-    /*0xD*/ u8 reuseIndex;    // position in sSampleDmaReuseQueue1/2, if ttl == 0
-    /*0xE*/ u8 ttl;           // duration after which the DMA can be discarded
+    /*0x8*/ u32 bufSize;      // size of buffer (converted from u16 for intentional padding to size 0x10)
+    /*0xC*/ u8 reuseIndex;    // position in sSampleDmaReuseQueue1/2, if ttl == 0
+    /*   */ // u8 pad[3];
 };                            // size = 0x10
 
 // EU only
@@ -43,6 +41,7 @@ OSMesg gAudioDmaMesg;
 OSIoMesg gAudioDmaIoMesg;
 
 struct SharedDma sSampleDmas[MAX_SIMULTANEOUS_NOTES * 4];
+u8 sSampleTTLs[MAX_SIMULTANEOUS_NOTES * 4];
 u32 gSampleDmaNumListItems; // sh: 0x803503D4
 u32 sSampleDmaListSize1; // sh: 0x803503D8
 
@@ -71,7 +70,6 @@ struct AudioBufferParametersEU gAudioBufferParameters;
 s32 gAiFrequency;
 #endif
 
-u32 sDmaBufSize;
 s32 gMaxAudioCmds;
 s32 gMaxSimultaneousNotes;
 
@@ -158,30 +156,20 @@ void decrease_sample_dma_ttls() {
     u32 i;
 
     for (i = 0; i < sSampleDmaListSize1; i++) {
-#if defined(VERSION_EU)
-        struct SharedDma *temp = &sSampleDmas[i];
-#else
-        struct SharedDma *temp = sSampleDmas + i;
-#endif
-        if (temp->ttl != 0) {
-            temp->ttl--;
-            if (temp->ttl == 0) {
-                temp->reuseIndex = sSampleDmaReuseQueueHead1;
+        if (sSampleTTLs[i] != 0) {
+            sSampleTTLs[i]--;
+            if (sSampleTTLs[i] == 0) {
+                sSampleDmas[i].reuseIndex = sSampleDmaReuseQueueHead1;
                 sSampleDmaReuseQueue1[sSampleDmaReuseQueueHead1++] = (u8) i;
             }
         }
     }
 
     for (i = sSampleDmaListSize1; i < gSampleDmaNumListItems; i++) {
-#if defined(VERSION_EU)
-        struct SharedDma *temp = &sSampleDmas[i];
-#else
-        struct SharedDma *temp = sSampleDmas + i;
-#endif
-        if (temp->ttl != 0) {
-            temp->ttl--;
-            if (temp->ttl == 0) {
-                temp->reuseIndex = sSampleDmaReuseQueueHead2;
+        if (sSampleTTLs[i] != 0) {
+            sSampleTTLs[i]--;
+            if (sSampleTTLs[i] == 0) {
+                sSampleDmas[i].reuseIndex = sSampleDmaReuseQueueHead2;
                 sSampleDmaReuseQueue2[sSampleDmaReuseQueueHead2++] = (u8) i;
             }
         }
@@ -203,7 +191,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
             bufferPos = devAddr - dma->source;
             if (0 <= bufferPos && (size_t) bufferPos <= dma->bufSize - size) {
                 // We already have a DMA request for this memory range.
-                if (dma->ttl == 0 && sSampleDmaReuseQueueTail2 != sSampleDmaReuseQueueHead2) {
+                if (sSampleTTLs[i] == 0 && sSampleDmaReuseQueueTail2 != sSampleDmaReuseQueueHead2) {
                     // Move the DMA out of the reuse queue, by swapping it with the
                     // tail, and then incrementing the tail.
                     if (dma->reuseIndex != sSampleDmaReuseQueueTail2) {
@@ -214,7 +202,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
                     }
                     sSampleDmaReuseQueueTail2++;
                 }
-                dma->ttl = 60;
+                sSampleTTLs[i] = 60;
                 *dmaIndexRef = (u8) i;
                 return (devAddr - dma->source) + dma->buffer;
             }
@@ -226,6 +214,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
             dmaIndex = sSampleDmaReuseQueue2[sSampleDmaReuseQueueTail2];
             sSampleDmaReuseQueueTail2++;
             dma = sSampleDmas + dmaIndex;
+            sSampleTTLs[dmaIndex] = 2;
             hasDma = TRUE;
         }
     } else {
@@ -233,7 +222,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
         bufferPos = devAddr - dma->source;
         if (0 <= bufferPos && (size_t) bufferPos <= dma->bufSize - size) {
             // We already have DMA for this memory range.
-            if (dma->ttl == 0) {
+            if (sSampleTTLs[*dmaIndexRef] == 0) {
                 // Move the DMA out of the reuse queue, by swapping it with the
                 // tail, and then incrementing the tail.
                 if (dma->reuseIndex != sSampleDmaReuseQueueTail1) {
@@ -244,7 +233,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
                 }
                 sSampleDmaReuseQueueTail1++;
             }
-            dma->ttl = 2;
+            sSampleTTLs[*dmaIndexRef] = 2;
             return dma->buffer + (devAddr - dma->source);
         }
     }
@@ -254,14 +243,13 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
         // be empty, since TTL 2 is so small.
         dmaIndex = sSampleDmaReuseQueue1[sSampleDmaReuseQueueTail1++];
         dma = sSampleDmas + dmaIndex;
+        sSampleTTLs[dmaIndex] = 2;
         hasDma = TRUE;
     }
 
     transfer = dma->bufSize;
     dmaDevAddr = devAddr & ~0xF;
-    dma->ttl = 2;
     dma->source = dmaDevAddr;
-    dma->sizeUnused = transfer;
 #ifdef VERSION_US // TODO: Is there a reason this only exists in US?
     osInvalDCache(dma->buffer, transfer);
 #endif
@@ -272,14 +260,10 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef) {
 }
 
 
-void init_sample_dma_buffers(UNUSED s32 arg0) {
+void init_sample_dma_buffers() {
     s32 i;
-#if defined(VERSION_EU)
-#define j i
-#else
-    s32 j;
-#endif
-
+    s32 sDmaBufSize;
+    
     sDmaBufSize = DMA_BUF_SIZE_0;
 
 #if defined(VERSION_EU)
@@ -289,30 +273,21 @@ void init_sample_dma_buffers(UNUSED s32 arg0) {
 #endif
         sSampleDmas[gSampleDmaNumListItems].buffer = soundAlloc(&gNotesAndBuffersPool, sDmaBufSize);
         if (sSampleDmas[gSampleDmaNumListItems].buffer == NULL) {
-#if defined(VERSION_EU)
             break;
-#else
-            goto out1;
-#endif
         }
         sSampleDmas[gSampleDmaNumListItems].bufSize = sDmaBufSize;
         sSampleDmas[gSampleDmaNumListItems].source = 0;
-        sSampleDmas[gSampleDmaNumListItems].sizeUnused = 0;
-        sSampleDmas[gSampleDmaNumListItems].unused2 = 0;
-        sSampleDmas[gSampleDmaNumListItems].ttl = 0;
+        sSampleTTLs[gSampleDmaNumListItems] = 0;
         gSampleDmaNumListItems++;
     }
-#if defined(VERSION_JP) || defined(VERSION_US)
-out1:
-#endif
 
     for (i = 0; (u32) i < gSampleDmaNumListItems; i++) {
         sSampleDmaReuseQueue1[i] = (u8) i;
         sSampleDmas[i].reuseIndex = (u8) i;
     }
 
-    for (j = gSampleDmaNumListItems; j < 0x100; j++) {
-        sSampleDmaReuseQueue1[j] = 0;
+    for (i = gSampleDmaNumListItems; i < ARRAY_COUNT(sSampleDmaReuseQueue1); i++) {
+        sSampleDmaReuseQueue1[i] = 0;
     }
 
     sSampleDmaReuseQueueTail1 = 0;
@@ -324,22 +299,13 @@ out1:
     for (i = 0; i < gMaxSimultaneousNotes; i++) {
         sSampleDmas[gSampleDmaNumListItems].buffer = soundAlloc(&gNotesAndBuffersPool, sDmaBufSize);
         if (sSampleDmas[gSampleDmaNumListItems].buffer == NULL) {
-#if defined(VERSION_EU)
             break;
-#else
-            goto out2;
-#endif
         }
         sSampleDmas[gSampleDmaNumListItems].bufSize = sDmaBufSize;
         sSampleDmas[gSampleDmaNumListItems].source = 0;
-        sSampleDmas[gSampleDmaNumListItems].sizeUnused = 0;
-        sSampleDmas[gSampleDmaNumListItems].unused2 = 0;
-        sSampleDmas[gSampleDmaNumListItems].ttl = 0;
+        sSampleTTLs[gSampleDmaNumListItems] = 0;
         gSampleDmaNumListItems++;
     }
-#if defined(VERSION_JP) || defined(VERSION_US)
-out2:
-#endif
 
     for (i = sSampleDmaListSize1; (u32) i < gSampleDmaNumListItems; i++) {
         sSampleDmaReuseQueue2[i - sSampleDmaListSize1] = (u8) i;
@@ -348,15 +314,12 @@ out2:
 
     // This probably meant to touch the range size1..size2 as well... but it
     // doesn't matter, since these values are never read anyway.
-    for (j = gSampleDmaNumListItems; j < 0x100; j++) {
-        sSampleDmaReuseQueue2[j] = sSampleDmaListSize1;
+    for (i = gSampleDmaNumListItems; i < ARRAY_COUNT(sSampleDmaReuseQueue2); i++) {
+        sSampleDmaReuseQueue2[i] = sSampleDmaListSize1;
     }
 
     sSampleDmaReuseQueueTail2 = 0;
     sSampleDmaReuseQueueHead2 = gSampleDmaNumListItems - sSampleDmaListSize1;
-#if defined(VERSION_EU)
-#undef j
-#endif
 }
 
 #if defined(VERSION_JP) || defined(VERSION_US)
diff --git a/src/audio/load.h b/src/audio/load.h
index 12b6ed6c6..1c69c688e 100644
--- a/src/audio/load.h
+++ b/src/audio/load.h
@@ -87,7 +87,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef, s3
 #else
 void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef);
 #endif
-void init_sample_dma_buffers(s32 arg0);
+void init_sample_dma_buffers();
 #if defined(VERSION_SH)
 void patch_audio_bank(s32 bankId, struct AudioBank *mem, struct PatchStruct *patchInfo);
 #else
diff --git a/src/audio/load_sh.c b/src/audio/load_sh.c
index bc24891b3..ac6097a54 100644
--- a/src/audio/load_sh.c
+++ b/src/audio/load_sh.c
@@ -237,7 +237,7 @@ void *dma_sample_data(uintptr_t devAddr, u32 size, s32 arg2, u8 *dmaIndexRef, s3
     return (devAddr - dmaDevAddr) + dma->buffer;
 }
 
-void init_sample_dma_buffers(UNUSED s32 arg0) {
+void init_sample_dma_buffers() {
     s32 i;
 
     sDmaBufSize = 0x2D0;
diff --git a/src/audio/playback.c b/src/audio/playback.c
index 50622a4b9..b36faeeb9 100644
--- a/src/audio/playback.c
+++ b/src/audio/playback.c
@@ -359,7 +359,6 @@ void process_notes(void) {
 #endif
     u8 bookOffset;
 #endif
-    struct NoteAttributes *attributes;
 #if defined(VERSION_JP) || defined(VERSION_US)
     struct AudioListItem *it;
 #endif
@@ -580,17 +579,18 @@ void process_notes(void) {
 
             adsr_update(note);
             note_vibrato_update(note);
-            attributes = &note->attributes;
             if (note->priority == NOTE_PRIORITY_STOPPING) {
+                struct NoteAttributes *attributes = &note->attributes;
                 frequency = attributes->freqScale;
                 velocity = attributes->velocity;
                 pan = attributes->pan;
                 reverbVol = attributes->reverbVol;
             } else {
-                frequency = note->parentLayer->noteFreqScale;
-                velocity = note->parentLayer->noteVelocity;
-                pan = note->parentLayer->notePan;
-                reverbVol = note->parentLayer->seqChannel->reverbVol;
+                struct SequenceChannelLayer *parentLayer = note->parentLayer;
+                frequency = parentLayer->noteFreqScale;
+                velocity = parentLayer->noteVelocity;
+                pan = parentLayer->notePan;
+                reverbVol = parentLayer->seqChannel->reverbVol;
             }
 
             scale = note->adsrVolScale;
@@ -875,14 +875,8 @@ void build_synthetic_wave(struct Note *note, struct SequenceChannelLayer *seqLay
     // Repeat sample
     for (offset = note->sampleCount; offset < 0x40; offset += note->sampleCount) {
         lim = note->sampleCount;
-        if (offset < 0 || offset > 0) {
-            for (j = 0; j < lim; j++) {
-                note->synthesisBuffers->samples[offset + j] = note->synthesisBuffers->samples[j];
-            }
-        } else {
-            for (j = 0; j < lim; j++) {
-                note->synthesisBuffers->samples[offset + j] = note->synthesisBuffers->samples[j];
-            }
+        for (j = 0; j < lim; j++) {
+            note->synthesisBuffers->samples[offset + j] = note->synthesisBuffers->samples[j];
         }
     }
 
diff --git a/src/audio/seqplayer.c b/src/audio/seqplayer.c
index 1e139b2b1..373ad9689 100644
--- a/src/audio/seqplayer.c
+++ b/src/audio/seqplayer.c
@@ -6,6 +6,8 @@
 #include "heap.h"
 #include "load.h"
 #include "seqplayer.h"
+#include "game/debug.h"
+#include "game/main.h"
 
 #ifdef VERSION_SH
 void seq_channel_layer_process_script_part1(struct SequenceChannelLayer *layer);
@@ -44,7 +46,7 @@ void sequence_channel_init(struct SequenceChannel *seqChannel) {
     seqChannel->scriptState.depth = 0;
     seqChannel->volume = 1.0f;
     seqChannel->volumeScale = 1.0f;
-    seqChannel->freqScale = 1.0f;
+    seqChannel->freqScale = gConfig.audioFrequency;
     seqChannel->pan = 0.5f;
     seqChannel->panChannelWeight = 1.0f;
     seqChannel->noteUnused = NULL;
@@ -77,7 +79,7 @@ void sequence_channel_init(struct SequenceChannel *seqChannel) {
 #if defined(VERSION_EU) || defined(VERSION_SH)
     seqChannel->volume = 1.0f;
     seqChannel->volumeScale = 1.0f;
-    seqChannel->freqScale = 1.0f;
+    seqChannel->freqScale = gConfig.audioFrequency;
 #endif
 
     for (i = 0; i < 8; i++) {
@@ -1684,7 +1686,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 
                     case 0xde: // chan_freqscale; pitch bend using raw frequency multiplier N/2^15 (N is u16)
                         sp5A = m64_read_s16(state);
-                        seqChannel->freqScale = FLOAT_CAST(sp5A) / 32768.0f;
+                        seqChannel->freqScale = FLOAT_CAST(sp5A) / 32768.0f * gConfig.audioFrequency;
 #if defined(VERSION_EU) || defined(VERSION_SH)
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
 #endif
@@ -1692,12 +1694,8 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 
                     case 0xd3: // chan_pitchbend; pitch bend by <= 1 octave in either direction (-127..127)
                         // (m64_read_u8(state) is really s8 here)
-#ifdef VERSION_SH
                         cmd = m64_read_u8(state) + 128;
-#else
-                        cmd = m64_read_u8(state) + 127;
-#endif
-                        seqChannel->freqScale = gPitchBendFrequencyScale[cmd];
+                        seqChannel->freqScale = gPitchBendFrequencyScale[cmd] * gConfig.audioFrequency;
 #if defined(VERSION_EU) || defined(VERSION_SH)
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
 #endif
@@ -1706,7 +1704,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
 #ifdef VERSION_SH
                     case 0xee:
                         cmd = m64_read_u8(state) + 0x80;
-                        seqChannel->freqScale = unk_sh_data_1[cmd];
+                        seqChannel->freqScale = unk_sh_data_1[cmd] * gConfig.audioFrequency;
                         seqChannel->changes.as_bitfields.freqScale = TRUE;
                         break;
 #endif
@@ -1955,7 +1953,7 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
                         seqChannel->vibratoRateTarget = 0;
                         seqChannel->vibratoRateStart = 0;
                         seqChannel->vibratoRateChangeDelay = 0;
-                        seqChannel->freqScale = 1.0f;
+                        seqChannel->freqScale = gConfig.audioFrequency;
                         break;
 
                     case 0xe9: // chan_setnotepriority
@@ -2074,15 +2072,14 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
                         }
                         break;
 #else
-                    case 0x00: // chan_testlayerfinished
-                        if (seqChannel->layers[loBits] != NULL) {
-                            value = seqChannel->layers[loBits]->finished;
+                    case 0x00: // chan_testlayersfinished (NOTE: does not use loBits)
+                        value = TRUE;
+                        for (i = 0; i < LAYERS_MAX; i++) {
+                            if (seqChannel->layers[i] != NULL && !seqChannel->layers[i]->finished) {
+                                value = FALSE;
+                                break;
+                            }
                         }
-#ifdef VERSION_EU
-                        else {
-                            value = -1;
-                        }
-#endif
                         break;
 #endif
 
@@ -2122,8 +2119,10 @@ void sequence_channel_process_script(struct SequenceChannel *seqChannel) {
                         }
                         break;
 
-                    case 0xa0: // chan_freelayer
-                        seq_channel_layer_free(seqChannel, loBits);
+                    case 0xa0: // chan_freelayers (NOTE: does not use loBits)
+                        for (i = 0; i < LAYERS_MAX; i++) {
+                            seq_channel_layer_free(seqChannel, i);
+                        }
                         break;
 
                     case 0xb0: // chan_dynsetlayer
diff --git a/src/audio/synthesis.c b/src/audio/synthesis.c
index 6e38a781a..feec87855 100644
--- a/src/audio/synthesis.c
+++ b/src/audio/synthesis.c
@@ -12,7 +12,6 @@
 #include "game/debug.h"
 #include "engine/math_util.h"
 
-
 #define DMEM_ADDR_TEMP 0x0
 #define DMEM_ADDR_RESAMPLED 0x20
 #define DMEM_ADDR_RESAMPLED2 0x160
@@ -48,11 +47,11 @@ u8 toggleBetterReverb = FALSE;
 u8 betterReverbLightweight = FALSE;
 u8 monoReverb;
 s8 betterReverbDownsampleRate;
-static s32        reverbMults[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS / 3] = {0};
-static s32         allpassIdx[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
-static s32 betterReverbDelays[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
-static s32     lastDelayLight[SYNTH_CHANNEL_STEREO_COUNT];
-static s16        **delayBufs[SYNTH_CHANNEL_STEREO_COUNT];
+static s32         reverbMults[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS / 3] = {0};
+static s32          allpassIdx[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
+static s32  betterReverbDelays[SYNTH_CHANNEL_STEREO_COUNT][NUM_ALLPASS] = {0};
+static s32 historySamplesLight[SYNTH_CHANNEL_STEREO_COUNT];
+static s16         **delayBufs[SYNTH_CHANNEL_STEREO_COUNT];
 u8 *gReverbMults[SYNTH_CHANNEL_STEREO_COUNT];
 s32 reverbLastFilterIndex;
 s32 reverbFilterCount;
@@ -61,7 +60,6 @@ s32 betterReverbRevIndex; // This one is okay to adjust whenever
 s32 betterReverbGainIndex; // This one is okay to adjust whenever
 #endif
 
-
 struct VolumeChange {
     u16 sourceLeft;
     u16 sourceRight;
@@ -69,33 +67,20 @@ struct VolumeChange {
     u16 targetRight;
 };
 
-u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateIndex);
-#ifdef VERSION_EU
-u64 *synthesis_process_note(struct Note *note, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s16 *aiBuf, s32 bufLen, u64 *cmd);
-u64 *load_wave_samples(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s32 nSamplesToLoad);
-u64 *process_envelope(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s32 nSamples, u16 inBuf, s32 headsetPanSettings, u32 flags);
-u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *note, s32 bufLen, s32 flags, s32 leftRight);
-#else
-u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd);
+u64 *synthesis_do_one_audio_update(s16 *aiBuf, u32 bufLen, u64 *cmd, s32 updateIndex);
+u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd);
 u64 *load_wave_samples(u64 *cmd, struct Note *note, s32 nSamplesToLoad);
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf, s32 headsetPanSettings);
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf,
-                            s32 headsetPanSettings, struct VolumeChange *vol);
 u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32 flags, s32 leftRight);
 #else
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf);
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf, struct VolumeChange *vol);
-#endif
 #endif
 
-#ifdef VERSION_EU
-struct SynthesisReverb gSynthesisReverbs[4];
-u8 sAudioSynthesisPad[0x10];
-#else
 struct SynthesisReverb gSynthesisReverb;
-u8 sAudioSynthesisPad[0x20];
-#endif
+
+f32 *currentRampingTableLeft;
+f32 *currentRampingTableRight;
 
 #ifdef BETTER_REVERB
 static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 channel) {
@@ -121,6 +106,7 @@ static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 chan
     j = 0;
 
     for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
+        // Mix the very last filter output with new incoming sample
         tmpCarryover = ((delayBufsLocal[lastFilterIndex][allpassIdxLocal[lastFilterIndex]] * revIndex) >> 8) + *downsampleBuffer;
         outSampleTotal = 0;
         i = 0;
@@ -149,7 +135,6 @@ static void reverb_samples(s16 *start, s16 *end, s16 *downsampleBuffer, s32 chan
     }
 }
 
-#define FILTERS_MINUS_1 (BETTER_REVERB_FILTER_COUNT_LIGHT - 1)
 static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s32 channel) {
     s16 *curDelaySample;
     s32 historySample;
@@ -159,14 +144,16 @@ static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s3
     s32 downsampleIncrement = gReverbDownsampleRate;
     s32 *delaysLocal = betterReverbDelays[channel];
     s32 *allpassIdxLocal = allpassIdx[channel];
-    s32 lastDelayLightLocal = lastDelayLight[channel];
     s16 **delayBufsLocal = delayBufs[channel];
 
-    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
-        tmpCarryover = (((delayBufsLocal[FILTERS_MINUS_1][allpassIdxLocal[FILTERS_MINUS_1]] * BETTER_REVERB_REVERB_INDEX_LIGHT) >> 8) + *downsampleBuffer);
-        i = 0;
+    // Get history sample from last processing tick
+    tmpCarryover = historySamplesLight[channel];
 
-        for (; i < FILTERS_MINUS_1; ++i) {
+    for (; start < end; start++, downsampleBuffer += downsampleIncrement) {
+        // Mix previous sample with new incoming sample
+        tmpCarryover = ((tmpCarryover * BETTER_REVERB_REVERB_INDEX_LIGHT) >> 8) + *downsampleBuffer;
+
+        for (i = 0; i < BETTER_REVERB_FILTER_COUNT_LIGHT; ++i) {
             curDelaySample = &delayBufsLocal[i][allpassIdxLocal[i]];
             historySample = *curDelaySample;
 
@@ -177,16 +164,13 @@ static void reverb_samples_light(s16 *start, s16 *end, s16 *downsampleBuffer, s3
             if (++allpassIdxLocal[i] == delaysLocal[i]) allpassIdxLocal[i] = 0;
         }
 
-        curDelaySample = &delayBufsLocal[FILTERS_MINUS_1][allpassIdxLocal[FILTERS_MINUS_1]];
-        historySample = ((*curDelaySample * BETTER_REVERB_MULTIPLE_LIGHT) >> 8); // outSampleTotal variable not needed, as there is no sample addition happening here. Not really a history sample though.
-        *curDelaySample = CLAMP_S16(tmpCarryover);
-
-        if (++allpassIdxLocal[FILTERS_MINUS_1] == lastDelayLightLocal) allpassIdxLocal[FILTERS_MINUS_1] = 0;
-
-        *start = CLAMP_S16(historySample);
+        // Lightweight does not use the final filter type at all, unlike standard reverb processing
+        *start = CLAMP_S16(tmpCarryover);
     }
+    
+    // Copy history sample to temporary buffer for processing next tick
+    historySamplesLight[channel] = tmpCarryover;
 }
-#undef FILTERS_MINUS_1
 
 void initialize_better_reverb_buffers(void) {
     delayBufs[SYNTH_CHANNEL_LEFT] = (s16**) soundAlloc(&gBetterReverbPool, BETTER_REVERB_PTR_SIZE);
@@ -195,8 +179,11 @@ void initialize_better_reverb_buffers(void) {
 
 void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {
     s32 bufOffset = 0;
-    s32 i;
     s32 filterCount = reverbFilterCount;
+    u32 *inputDelayPtrs[SYNTH_CHANNEL_STEREO_COUNT] = {
+        [SYNTH_CHANNEL_LEFT]  = inputDelaysL,
+        [SYNTH_CHANNEL_RIGHT] = inputDelaysR,
+    };
 
     if (betterReverbLightweight)
         filterCount = BETTER_REVERB_FILTER_COUNT_LIGHT;
@@ -209,92 +196,21 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR) {
 
     // NOTE: Using filterCount over NUM_ALLPASS will report less memory usage with fewer filters, but poses an additional
     // risk to anybody testing on console with performance compromises, as emulator can be easily overlooked.
-    for (i = 0; i < filterCount; ++i) {
-        betterReverbDelays[SYNTH_CHANNEL_LEFT][i] = (s32) (inputDelaysL[i] / gReverbDownsampleRate);
-        betterReverbDelays[SYNTH_CHANNEL_RIGHT][i] = (s32) (inputDelaysR[i] / gReverbDownsampleRate);
-        delayBufs[SYNTH_CHANNEL_LEFT][i] = soundAlloc(&gBetterReverbPool, betterReverbDelays[SYNTH_CHANNEL_LEFT][i] * sizeof(s16));
-        bufOffset += betterReverbDelays[SYNTH_CHANNEL_LEFT][i];
-        delayBufs[SYNTH_CHANNEL_RIGHT][i] = soundAlloc(&gBetterReverbPool, betterReverbDelays[SYNTH_CHANNEL_RIGHT][i] * sizeof(s16));
-        bufOffset += betterReverbDelays[SYNTH_CHANNEL_RIGHT][i];
+    for (s32 channel = 0; channel < SYNTH_CHANNEL_STEREO_COUNT; channel++) {
+        historySamplesLight[channel] = 0;
+        for (s32 filter = 0; filter < filterCount; filter++) {
+            betterReverbDelays[channel][filter] = (s32) (inputDelayPtrs[channel][filter] / gReverbDownsampleRate);
+            delayBufs[channel][filter] = soundAlloc(&gBetterReverbPool, betterReverbDelays[channel][filter] * sizeof(s16));
+            bufOffset += betterReverbDelays[channel][filter];
+        }
     }
 
     aggress(bufOffset * sizeof(s16) <= BETTER_REVERB_SIZE - BETTER_REVERB_PTR_SIZE, "BETTER_REVERB_SIZE is too small for this preset!");
 
-    lastDelayLight[SYNTH_CHANNEL_LEFT] = betterReverbDelays[SYNTH_CHANNEL_LEFT][filterCount-1];
-    lastDelayLight[SYNTH_CHANNEL_RIGHT] = betterReverbDelays[SYNTH_CHANNEL_RIGHT][filterCount-1];
-
     bzero(allpassIdx, sizeof(allpassIdx));
 }
 #endif
 
-#ifdef VERSION_EU
-s16 gVolume;
-s8 gUseReverb;
-s8 gNumSynthesisReverbs;
-struct NoteSubEu *gNoteSubsEu;
-#endif
-
-#ifdef VERSION_EU
-f32 gLeftVolRampings[3][1024];
-f32 gRightVolRampings[3][1024];
-f32 *gCurrentLeftVolRamping; // Points to any of the three left buffers above
-f32 *gCurrentRightVolRamping; // Points to any of the three right buffers above
-
-u8 audioString1[] = "pitch %x: delaybytes %d : olddelay %d\n";
-u8 audioString2[] = "cont %x: delaybytes %d : olddelay %d\n";
-#endif
-
-#ifdef VERSION_EU
-// Equivalent functionality as the US/JP version,
-// just that the reverb structure is chosen from an array with index
-void prepare_reverb_ring_buffer(s32 chunkLen, u32 updateIndex, s32 reverbIndex) {
-    struct ReverbRingBufferItem *item;
-    struct SynthesisReverb *reverb = &gSynthesisReverbs[reverbIndex];
-    s32 srcPos, dstPos;
-    s32 nSamples;
-    s32 excessiveSamples;
-    if (reverb->downsampleRate != 1) {
-        if (reverb->framesLeftToIgnore == 0) {
-            // Now that the RSP has finished, downsample the samples produced two frames ago by skipping
-            // samples.
-            item = &reverb->items[reverb->curFrame][updateIndex];
-
-            // Touches both left and right since they are adjacent in memory
-            osInvalDCache(item->toDownsampleLeft, DEFAULT_LEN_2CH);
-
-            for (srcPos = 0, dstPos = 0; dstPos < item->lengthA / 2;
-                 srcPos += reverb->downsampleRate, dstPos++) {
-                reverb->ringBuffer.left[item->startPos + dstPos] = item->toDownsampleLeft[srcPos];
-                reverb->ringBuffer.right[item->startPos + dstPos] = item->toDownsampleRight[srcPos];
-            }
-            for (dstPos = 0; dstPos < item->lengthB / 2; srcPos += reverb->downsampleRate, dstPos++) {
-                reverb->ringBuffer.left[dstPos] = item->toDownsampleLeft[srcPos];
-                reverb->ringBuffer.right[dstPos] = item->toDownsampleRight[srcPos];
-            }
-        }
-    }
-
-    item = &reverb->items[reverb->curFrame][updateIndex];
-    nSamples = chunkLen / reverb->downsampleRate;
-    excessiveSamples = (nSamples + reverb->nextRingBufferPos) - reverb->bufSizePerChannel;
-    if (excessiveSamples < 0) {
-        // There is space in the ring buffer before it wraps around
-        item->lengthA = nSamples * 2;
-        item->lengthB = 0;
-        item->startPos = (s32) reverb->nextRingBufferPos;
-        reverb->nextRingBufferPos += nSamples;
-    } else {
-        // Ring buffer wrapped around
-        item->lengthA = (nSamples - excessiveSamples) * 2;
-        item->lengthB = excessiveSamples * 2;
-        item->startPos = reverb->nextRingBufferPos;
-        reverb->nextRingBufferPos = excessiveSamples;
-    }
-    // These fields are never read later
-    item->numSamplesAfterDownsampling = nSamples;
-    item->chunkLen = chunkLen;
-}
-#else
 void prepare_reverb_ring_buffer(s32 chunkLen, u32 updateIndex) {
     struct ReverbRingBufferItem *item;
     s32 srcPos, dstPos;
@@ -406,108 +322,10 @@ void prepare_reverb_ring_buffer(s32 chunkLen, u32 updateIndex) {
     item->numSamplesAfterDownsampling = numSamplesAfterDownsampling;
     item->chunkLen = chunkLen;
 }
-#endif
 
-#ifdef VERSION_EU
-u64 *synthesis_load_reverb_ring_buffer(u64 *cmd, u16 addr, u16 srcOffset, s32 len, s32 reverbIndex) {
-    aSetBuffer(cmd++, 0, addr, 0, len);
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.left[srcOffset]));
-
-    aSetBuffer(cmd++, 0, addr + DEFAULT_LEN_1CH, 0, len);
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.right[srcOffset]));
-
-    return cmd;
-}
-#endif
-
-#ifdef VERSION_EU
-u64 *synthesis_save_reverb_ring_buffer(u64 *cmd, u16 addr, u16 destOffset, s32 len, s32 reverbIndex) {
-    aSetBuffer(cmd++, 0, 0, addr, len);
-    aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.left[destOffset]));
-
-    aSetBuffer(cmd++, 0, 0, addr + DEFAULT_LEN_1CH, len);
-    aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(&gSynthesisReverbs[reverbIndex].ringBuffer.right[destOffset]));
-
-    return cmd;
-}
-#endif
-
-#ifdef VERSION_EU
-void synthesis_load_note_subs_eu(s32 updateIndex) {
-    struct NoteSubEu *src;
-    struct NoteSubEu *dest;
-    s32 i;
-
-    for (i = 0; i < gMaxSimultaneousNotes; i++) {
-        src = &gNotes[i].noteSubEu;
-        dest = &gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i];
-        if (src->enabled) {
-            *dest = *src;
-            src->needsInit = FALSE;
-        } else {
-            dest->enabled = FALSE;
-        }
-    }
-}
-#endif
-
-#ifdef VERSION_EU
-// TODO: (Scrub C) pointless mask and whitespace
-u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
-    s32 i, j;
-    f32 *leftVolRamp;
-    f32 *rightVolRamp;
-    u32 *aiBufPtr;
-    u64 *cmd = cmdBuf;
-    s32 chunkLen;
-    s32 nextVolRampTable = 0;
-
-    for (i = gAudioBufferParameters.updatesPerFrame; i > 0; i--) {
-        process_sequences(i - 1);
-        synthesis_load_note_subs_eu(gAudioBufferParameters.updatesPerFrame - i);
-    }
-    aSegment(cmd++, 0, 0);
-    aiBufPtr = (u32 *) aiBuf;
-    for (i = gAudioBufferParameters.updatesPerFrame; i > 0; i--) {
-        if (i == 1) {
-            // self-assignment has no affect when added here, could possibly simplify a macro definition
-            chunkLen = bufLen;
-            leftVolRamp = gLeftVolRampings[nextVolRampTable];
-            rightVolRamp = gRightVolRampings[nextVolRampTable & 0xFFFFFFFF];
-        } else {
-            if (bufLen / i >= gAudioBufferParameters.samplesPerUpdateMax) {
-                chunkLen = gAudioBufferParameters.samplesPerUpdateMax; nextVolRampTable = 2; leftVolRamp = gLeftVolRampings[2]; rightVolRamp = gRightVolRampings[2];
-            } else if (bufLen / i <= gAudioBufferParameters.samplesPerUpdateMin) {
-                chunkLen = gAudioBufferParameters.samplesPerUpdateMin; nextVolRampTable = 0; leftVolRamp = gLeftVolRampings[0]; rightVolRamp = gRightVolRampings[0];
-            } else {
-                chunkLen = gAudioBufferParameters.samplesPerUpdate; nextVolRampTable = 1; leftVolRamp = gLeftVolRampings[1]; rightVolRamp = gRightVolRampings[1];
-            }
-        }
-        gCurrentLeftVolRamping = leftVolRamp;
-        gCurrentRightVolRamping = rightVolRamp;
-        for (j = 0; j < gNumSynthesisReverbs; j++) {
-            if (gSynthesisReverbs[j].useReverb) {
-                prepare_reverb_ring_buffer(chunkLen, gAudioBufferParameters.updatesPerFrame - i, j);
-            }
-        }
-        cmd = synthesis_do_one_audio_update((s16 *) aiBufPtr, chunkLen, cmd, gAudioBufferParameters.updatesPerFrame - i);
-        bufLen -= chunkLen;
-        aiBufPtr += chunkLen;
-    }
-
-    for (j = 0; j < gNumSynthesisReverbs; j++) {
-        if (gSynthesisReverbs[j].framesLeftToIgnore != 0) {
-            gSynthesisReverbs[j].framesLeftToIgnore--;
-        }
-        gSynthesisReverbs[j].curFrame ^= 1;
-    }
-    *writtenCmds = cmd - cmdBuf;
-    return cmd;
-}
-#else
 // bufLen will be divisible by 16
 u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
-    s32 chunkLen;
+    u32 chunkLen;
     s32 i;
     u32 *aiBufPtr = (u32 *) aiBuf;
     u64 *cmd = cmdBuf + 1;
@@ -567,7 +385,7 @@ u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
         if (gSynthesisReverb.useReverb) {
             prepare_reverb_ring_buffer(chunkLen, gAudioUpdatesPerFrame - i);
         }
-        cmd = synthesis_do_one_audio_update((s16 *) aiBufPtr, chunkLen, cmd, gAudioUpdatesPerFrame - i);
+        cmd = synthesis_do_one_audio_update((s16 *) aiBufPtr, chunkLen * 2, cmd, gAudioUpdatesPerFrame - i);
 
         AUDIO_PROFILER_COMPLETE_AND_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB, PROFILER_TIME_SUB_AUDIO_SYNTHESIS, PROFILER_TIME_SUB_AUDIO_UPDATE);
 
@@ -581,152 +399,8 @@ u64 *synthesis_execute(u64 *cmdBuf, s32 *writtenCmds, s16 *aiBuf, s32 bufLen) {
     *writtenCmds = cmd - cmdBuf;
     return cmd;
 }
-#endif
 
-
-#ifdef VERSION_EU
-u64 *synthesis_resample_and_mix_reverb(u64 *cmd, s32 bufLen, s16 reverbIndex, s16 updateIndex) {
-    struct ReverbRingBufferItem *item;
-    s16 startPad;
-    s16 paddedLengthA;
-
-    item = &gSynthesisReverbs[reverbIndex].items[gSynthesisReverbs[reverbIndex].curFrame][updateIndex];
-
-    aClearBuffer(cmd++, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
-    if (gSynthesisReverbs[reverbIndex].downsampleRate == 1) {
-        cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH, item->startPos, item->lengthA, reverbIndex);
-        if (item->lengthB != 0) {
-            cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH + item->lengthA, 0, item->lengthB, reverbIndex);
-        }
-        aSetBuffer(cmd++, 0, 0, 0, DEFAULT_LEN_2CH);
-        aMix(cmd++, 0, 0x7fff, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH);
-        aMix(cmd++, 0, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH);
-    } else {
-        startPad = (item->startPos & 0x7) * 2;
-        paddedLengthA = ALIGN16(startPad + item->lengthA);
-
-        cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED, (item->startPos - startPad / 2), DEFAULT_LEN_1CH, reverbIndex);
-        if (item->lengthB != 0) {
-            cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED + paddedLengthA, 0, DEFAULT_LEN_1CH - paddedLengthA, reverbIndex);
-        }
-
-        aSetBuffer(cmd++, 0, DMEM_ADDR_RESAMPLED + startPad, DMEM_ADDR_WET_LEFT_CH, bufLen * 2);
-        aResample(cmd++, gSynthesisReverbs[reverbIndex].resampleFlags, gSynthesisReverbs[reverbIndex].resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverbs[reverbIndex].resampleStateLeft));
-
-        aSetBuffer(cmd++, 0, DMEM_ADDR_RESAMPLED2 + startPad, DMEM_ADDR_WET_RIGHT_CH, bufLen * 2);
-        aResample(cmd++, gSynthesisReverbs[reverbIndex].resampleFlags, gSynthesisReverbs[reverbIndex].resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverbs[reverbIndex].resampleStateRight));
-
-        aSetBuffer(cmd++, 0, 0, 0, DEFAULT_LEN_2CH);
-        aMix(cmd++, 0, 0x7fff, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH);
-        aMix(cmd++, 0, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH);
-    }
-    return cmd;
-}
-
-u64 *synthesis_save_reverb_samples(u64 *cmd, s16 reverbIndex, s16 updateIndex) {
-    struct ReverbRingBufferItem *item;
-
-    item = &gSynthesisReverbs[reverbIndex].items[gSynthesisReverbs[reverbIndex].curFrame][updateIndex];
-    if (gSynthesisReverbs[reverbIndex].useReverb) {
-        switch (gSynthesisReverbs[reverbIndex].downsampleRate) {
-            case 1:
-                // Put the oldest samples in the ring buffer into the wet channels
-                cmd = synthesis_save_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH, item->startPos, item->lengthA, reverbIndex);
-                if (item->lengthB != 0) {
-                    // Ring buffer wrapped
-                    cmd = synthesis_save_reverb_ring_buffer(cmd, DMEM_ADDR_WET_LEFT_CH + item->lengthA, 0, item->lengthB, reverbIndex);
-                }
-                break;
-
-            default:
-                // Downsampling is done later by CPU when RSP is done, therefore we need to have double
-                // buffering. Left and right buffers are adjacent in memory.
-                aSetBuffer(cmd++, 0, 0, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
-                aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(gSynthesisReverbs[reverbIndex].items[gSynthesisReverbs[reverbIndex].curFrame][updateIndex].toDownsampleLeft));
-                gSynthesisReverbs[reverbIndex].resampleFlags = 0;
-                break;
-        }
-    }
-    return cmd;
-}
-#endif
-
-#ifdef VERSION_EU
-u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateIndex) {
-    struct NoteSubEu *noteSubEu;
-    u8 noteIndices[56];
-    s32 temp;
-    s32 i;
-    s16 j;
-    s16 notePos = 0;
-
-    if (gNumSynthesisReverbs == 0) {
-        for (i = 0; i < gMaxSimultaneousNotes; i++) {
-            if (gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i].enabled) {
-                noteIndices[notePos++] = i;
-            }
-        }
-    } else {
-        for (j = 0; j < gNumSynthesisReverbs; j++) {
-            for (i = 0; i < gMaxSimultaneousNotes; i++) {
-                noteSubEu = &gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i];
-                if (noteSubEu->enabled && j == noteSubEu->reverbIndex) {
-                    noteIndices[notePos++] = i;
-                }
-            }
-        }
-
-        for (i = 0; i < gMaxSimultaneousNotes; i++) {
-            noteSubEu = &gNoteSubsEu[gMaxSimultaneousNotes * updateIndex + i];
-            if (noteSubEu->enabled && noteSubEu->reverbIndex >= gNumSynthesisReverbs) {
-                noteIndices[notePos++] = i;
-            }
-        }
-    }
-    aClearBuffer(cmd++, DMEM_ADDR_LEFT_CH, DEFAULT_LEN_2CH);
-    i = 0;
-    for (j = 0; j < gNumSynthesisReverbs; j++) {
-        gUseReverb = gSynthesisReverbs[j].useReverb;
-        if (gUseReverb) {
-            cmd = synthesis_resample_and_mix_reverb(cmd, bufLen, j, updateIndex);
-        }
-        for (; i < notePos; i++) {
-            temp = updateIndex * gMaxSimultaneousNotes;
-            if (j == gNoteSubsEu[temp + noteIndices[i]].reverbIndex) {
-                cmd = synthesis_process_note(&gNotes[noteIndices[i]],
-                                             &gNoteSubsEu[temp + noteIndices[i]],
-                                             &gNotes[noteIndices[i]].synthesisState,
-                                             aiBuf, bufLen, cmd);
-                continue;
-            } else {
-                break;
-            }
-        }
-        if (gSynthesisReverbs[j].useReverb) {
-            cmd = synthesis_save_reverb_samples(cmd, j, updateIndex);
-        }
-    }
-    for (; i < notePos; i++) {
-        temp = updateIndex * gMaxSimultaneousNotes;
-        if (IS_BANK_LOAD_COMPLETE(gNoteSubsEu[temp + noteIndices[i]].bankId)) {
-            cmd = synthesis_process_note(&gNotes[noteIndices[i]],
-                                         &gNoteSubsEu[temp + noteIndices[i]],
-                                         &gNotes[noteIndices[i]].synthesisState,
-                                         aiBuf, bufLen, cmd);
-        } else {
-            gAudioErrorFlags = (gNoteSubsEu[temp + noteIndices[i]].bankId + (i << 8)) + 0x10000000;
-        }
-    }
-
-    temp = bufLen * 2;
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, temp);
-    aInterleave(cmd++, DMEM_ADDR_LEFT_CH, DMEM_ADDR_RIGHT_CH);
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, temp * 2);
-    aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(aiBuf));
-    return cmd;
-}
-#else
-u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateIndex) {
+u64 *synthesis_do_one_audio_update(s16 *aiBuf, u32 bufLen, u64 *cmd, s32 updateIndex) {
     s16 ra;
     s16 t4;
     struct ReverbRingBufferItem *v1;
@@ -766,9 +440,9 @@ u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateI
                 // Ring buffer wrapped
                 aSetLoadBufferPair(cmd++, ra, 0);
             }
-            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH, bufLen << 1);
+            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_LEFT_CH, bufLen);
             aResample(cmd++, gSynthesisReverb.resampleFlags, (u16) gSynthesisReverb.resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.resampleStateLeft));
-            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_RIGHT_CH, DMEM_ADDR_RIGHT_CH, bufLen << 1);
+            aSetBuffer(cmd++, 0, t4 + DMEM_ADDR_WET_RIGHT_CH, DMEM_ADDR_RIGHT_CH, bufLen);
             aResample(cmd++, gSynthesisReverb.resampleFlags, (u16) gSynthesisReverb.resampleRate, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.resampleStateRight));
 #ifdef BETTER_REVERB
             // NOTE: Technically using an if/else here means using BETTER_REVERB vanilla presets with downsampling won't match 1-to-1 in volume with BETTER_REVERB being disabled.
@@ -804,57 +478,28 @@ u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateI
     }
     return cmd;
 }
-#endif
 
-#ifdef VERSION_EU
-// Processes just one note, not all
-u64 *synthesis_process_note(struct Note *note, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, UNUSED s16 *aiBuf, s32 bufLen, u64 *cmd) {
-#else
-u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
+u64 *synthesis_process_notes(s16 *aiBuf, u32 bufLen, u64 *cmd) {
     s32 noteIndex;                           // sp174
     struct Note *note;                       // s7
-#endif
     struct AudioBankSample *audioBookSample; // sp164, sp138
     struct AdpcmLoop *loopInfo;              // sp160, sp134
     s16 *curLoadedBook = NULL;               // sp154, sp130
-#ifndef VERSION_EU
-    u16 resamplingRateFixedPoint;            // sp5c, sp11A
-#endif
     s32 noteFinished;                        // 150 t2, sp124
     s32 restart;                             // 14c t3, sp120
     s32 flags;                               // sp148, sp11C
-#ifdef VERSION_EU
-    u16 resamplingRateFixedPoint;            // sp5c, sp11A
-#endif
-    UNUSED s32 tempBufLen;
     s32 sp130 = 0;  //sp128, sp104
     s32 nAdpcmSamplesProcessed; // signed required for US
     s32 t0;
-#ifdef VERSION_EU
     u8 *sampleAddr;                          // sp120, spF4
     s32 s6;
-#else
-    s32 s6;
-    u8 *sampleAddr;                          // sp120, spF4
-#endif
 
-#ifdef VERSION_EU
-    s32 samplesLenAdjusted; // 108,      spEC
-    // Might have been used to store (samplesLenFixedPoint >> 0x10), but doing so causes strange
+    // Might have been used to store (samplesLenFixedPoint >> 16), but doing so causes strange
     // behavior with the break near the end of the loop, causing US and JP to need a goto instead
-    UNUSED s32 samplesLenInt;
-    s32 endPos;             // sp110,    spE4
-    s32 nSamplesToProcess;  // sp10c/a0, spE0
-    s32 s2;
-#else
-    // Might have been used to store (samplesLenFixedPoint >> 0x10), but doing so causes strange
-    // behavior with the break near the end of the loop, causing US and JP to need a goto instead
-    UNUSED s32 samplesLenInt;
     s32 samplesLenAdjusted; // 108
     s32 s2;
     s32 endPos;             // sp110,    spE4
     s32 nSamplesToProcess;  // sp10c/a0, spE0
-#endif
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
     s32 leftRight;
@@ -865,68 +510,49 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
     u32 samplesLenFixedPoint;    // v1_1
     s32 nSamplesInThisIteration; // v1_2
     u32 a3;
-#ifndef VERSION_EU
-    s32 t9;
-#endif
     u8 *v0_2;
     s32 nParts;                 // spE8, spBC
     s32 curPart;                // spE4, spB8
 
-#ifndef VERSION_EU
     f32 resamplingRate; // f12
-#endif
     s32 temp;
 
-#ifdef VERSION_EU
     s32 s5Aligned;
-#endif
     s32 resampledTempLen;                    // spD8, spAC
     u16 noteSamplesDmemAddrBeforeResampling = 0; // spD6, spAA
+    u16 resamplingRateFixedPoint;            // sp5c, sp11A
 
+    switch (bufLen) {
+        case (128 * 2):
+            currentRampingTableLeft = gVolRampingLhs128;
+            currentRampingTableRight = gVolRampingRhs128;
+            break;
+        case (144 * 2):
+            currentRampingTableLeft = gVolRampingLhs144;
+            currentRampingTableRight = gVolRampingRhs144;
+            break;
+        case (136 * 2):
+        default:
+            currentRampingTableLeft = gVolRampingLhs136;
+            currentRampingTableRight = gVolRampingRhs136;
+            break;
+    }
 
-#ifndef VERSION_EU
     for (noteIndex = 0; noteIndex < gMaxSimultaneousNotes; noteIndex++) {
         note = &gNotes[noteIndex];
-#ifdef VERSION_US
         //! This function requires note->enabled to be volatile, but it breaks other functions like note_enable.
         //! Casting to a struct with just the volatile bitfield works, but there may be a better way to match.
         if (((struct vNote *)note)->enabled && !IS_BANK_LOAD_COMPLETE(note->bankId)) {
-#else
-        if (!IS_BANK_LOAD_COMPLETE(note->bankId)) {
-#endif
             gAudioErrorFlags = (note->bankId << 8) + noteIndex + 0x1000000;
         } else if (((struct vNote *)note)->enabled) {
-#else
-        if (note->noteSubEu.enabled == FALSE) {
-            return cmd;
-        } else {
-#endif
             flags = 0;
-#ifdef VERSION_EU
-            tempBufLen = bufLen;
-#endif
 
-#ifdef VERSION_EU
-            if (noteSubEu->needsInit == TRUE) {
-#else
             if (note->needsInit == TRUE) {
-#endif
                 flags = A_INIT;
-#ifndef VERSION_EU
                 note->samplePosInt = 0;
                 note->samplePosFrac = 0;
-#else
-                synthesisState->restart = FALSE;
-                synthesisState->samplePosInt = 0;
-                synthesisState->samplePosFrac = 0;
-                synthesisState->curVolLeft = 1;
-                synthesisState->curVolRight = 1;
-                synthesisState->prevHeadsetPanRight = 0;
-                synthesisState->prevHeadsetPanLeft = 0;
-#endif
             }
 
-#ifndef VERSION_EU
             if (note->frequency < 2.0f) {
                 nParts = 1;
                 if (note->frequency > 1.99996f) {
@@ -943,39 +569,20 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
             }
 
             resamplingRateFixedPoint = (u16)(s32)(resamplingRate * 32768.0f);
-            samplesLenFixedPoint = note->samplePosFrac + (resamplingRateFixedPoint * bufLen) * 2;
+            samplesLenFixedPoint = note->samplePosFrac + (resamplingRateFixedPoint * bufLen);
             note->samplePosFrac = samplesLenFixedPoint & 0xFFFF; // 16-bit store, can't reuse
-#else
-            resamplingRateFixedPoint = noteSubEu->resamplingRateFixedPoint;
-            nParts = noteSubEu->hasTwoAdpcmParts + 1;
-            samplesLenFixedPoint = (resamplingRateFixedPoint * tempBufLen * 2) + synthesisState->samplePosFrac;
-            synthesisState->samplePosFrac = samplesLenFixedPoint & 0xFFFF;
-#endif
 
-#ifdef VERSION_EU
-            if (noteSubEu->isSyntheticWave) {
-                cmd = load_wave_samples(cmd, noteSubEu, synthesisState, samplesLenFixedPoint >> 0x10);
-                noteSamplesDmemAddrBeforeResampling = (synthesisState->samplePosInt * 2) + DMEM_ADDR_UNCOMPRESSED_NOTE;
-                synthesisState->samplePosInt += samplesLenFixedPoint >> 0x10;
-            }
-#else
             if (note->sound == NULL) {
                 // A wave synthesis note (not ADPCM)
 
-                cmd = load_wave_samples(cmd, note, samplesLenFixedPoint >> 0x10);
+                cmd = load_wave_samples(cmd, note, samplesLenFixedPoint >> 16);
                 noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_UNCOMPRESSED_NOTE + note->samplePosInt * 2;
-                note->samplePosInt += (samplesLenFixedPoint >> 0x10);
+                note->samplePosInt += (samplesLenFixedPoint >> 16);
                 flags = 0;
             }
-#endif
             else {
                 // ADPCM note
-
-#ifdef VERSION_EU
-                audioBookSample = noteSubEu->sound.audioBankSound->sample;
-#else
                 audioBookSample = note->sound->sample;
-#endif
 
                 loopInfo = audioBookSample->loop;
                 endPos = loopInfo->end;
@@ -986,32 +593,21 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                     s5 = 0;                     // s4
 
                     if (nParts == 1) {
-                        samplesLenAdjusted = samplesLenFixedPoint >> 0x10;
-                    } else if ((samplesLenFixedPoint >> 0x10) & 1) {
-                        samplesLenAdjusted = ((samplesLenFixedPoint >> 0x10) & ~1) + (curPart * 2);
+                        samplesLenAdjusted = samplesLenFixedPoint >> 16;
+                    } else if ((samplesLenFixedPoint >> 16) & 1) {
+                        samplesLenAdjusted = ((samplesLenFixedPoint >> 16) & ~1) + (curPart * 2);
                     }
                     else {
-                        samplesLenAdjusted = (samplesLenFixedPoint >> 0x10);
+                        samplesLenAdjusted = (samplesLenFixedPoint >> 16);
                     }
 
                     if (curLoadedBook != audioBookSample->book->book) {
                         u32 nEntries; // v1
                         curLoadedBook = audioBookSample->book->book;
-#ifdef VERSION_EU
-                        nEntries = 16 * audioBookSample->book->order * audioBookSample->book->npredictors;
-                        aLoadADPCM(cmd++, nEntries, VIRTUAL_TO_PHYSICAL2(curLoadedBook + noteSubEu->bookOffset));
-#else
-                        nEntries = audioBookSample->book->order * audioBookSample->book->npredictors;
-                        aLoadADPCM(cmd++, nEntries * 16, VIRTUAL_TO_PHYSICAL2(curLoadedBook));
-#endif
+                        nEntries = audioBookSample->book->order * audioBookSample->book->npredictors * 16U;
+                        aLoadADPCM(cmd++, nEntries, VIRTUAL_TO_PHYSICAL2(curLoadedBook));
                     }
 
-#ifdef VERSION_EU
-                    if (noteSubEu->bookOffset) {
-                        curLoadedBook = euUnknownData_80301950; // what's this? never read
-                    }
-#endif
-
                     while (nAdpcmSamplesProcessed != samplesLenAdjusted) {
                         s32 samplesRemaining; // v1
                         s32 s0;
@@ -1019,23 +615,13 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                         noteFinished = FALSE;
                         restart = FALSE;
                         nSamplesToProcess = samplesLenAdjusted - nAdpcmSamplesProcessed;
-#ifdef VERSION_EU
-                        s2 = synthesisState->samplePosInt & 0xf;
-                        samplesRemaining = endPos - synthesisState->samplePosInt;
-#else
                         s2 = note->samplePosInt & 0xf;
                         samplesRemaining = endPos - note->samplePosInt;
-#endif
 
-#ifdef VERSION_EU
-                        if (s2 == 0 && synthesisState->restart == FALSE) {
-                            s2 = 16;
-                        }
-#else
                         if (s2 == 0 && note->restart == FALSE) {
                             s2 = 16;
                         }
-#endif
+
                         s6 = 16 - s2; // a1
 
                         if (nSamplesToProcess < samplesRemaining) {
@@ -1043,11 +629,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             s0 = t0 * 16;
                             s3 = s6 + s0 - nSamplesToProcess;
                         } else {
-#ifndef VERSION_EU
-                            s0 = samplesRemaining + s2 - 0x10;
-#else
                             s0 = samplesRemaining - s6;
-#endif
                             s3 = 0;
                             if (s0 <= 0) {
                                 s0 = 0;
@@ -1063,22 +645,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                         }
 
                         if (t0 != 0) {
-#ifdef VERSION_EU
-                            temp = (synthesisState->samplePosInt - s2 + 0x10) / 16;
-                            if (audioBookSample->loaded == 0x81) {
-                                v0_2 = sampleAddr + temp * 9;
-                            } else {
-                                AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA);
-
-                                v0_2 = dma_sample_data(
-                                    (uintptr_t) (sampleAddr + temp * 9),
-                                    t0 * 9, flags, &synthesisState->sampleDmaIndex);
-
-                                AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
-                            }
-#else
-                            // HACKERSM64_TODO: Is the EU thing above applicable to US? Could potentially save some resources.
-                            temp = (note->samplePosInt - s2 + 0x10) / 16;
+                            temp = (note->samplePosInt - s2 + 16) / 16;
             
                             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA);
 
@@ -1087,7 +654,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                                 t0 * 9, flags, &note->sampleDmaIndex);
 
                             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_DMA, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
-#endif
+
                             a3 = (u32)((uintptr_t) v0_2 & 0xf);
                             aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA, 0, t0 * 9 + a3);
                             aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(v0_2 - a3));
@@ -1096,48 +663,23 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             a3 = 0;
                         }
 
-#ifdef VERSION_EU
-                        if (synthesisState->restart != FALSE) {
-                            aSetLoop(cmd++, VIRTUAL_TO_PHYSICAL2(audioBookSample->loop->state));
-                            flags = A_LOOP; // = 2
-                            synthesisState->restart = FALSE;
-                        }
-#else
                         if (note->restart != FALSE) {
                             aSetLoop(cmd++, VIRTUAL_TO_PHYSICAL2(audioBookSample->loop->state));
                             flags = A_LOOP; // = 2
                             note->restart = FALSE;
                         }
-#endif
 
                         nSamplesInThisIteration = s0 + s6 - s3;
-#ifdef VERSION_EU
-                        if (nAdpcmSamplesProcessed == 0) {
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3,
-                                       DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
-                            aADPCMdec(cmd++, flags,
-                                      VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
-                            sp130 = s2 * 2;
-                        } else {
-                            s5Aligned = ALIGN32(s5);
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3,
-                                       DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
-                            aADPCMdec(cmd++, flags,
-                                      VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
-                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned + (s2 * 2),
-                                      DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
-                        }
-#else
                         if (nAdpcmSamplesProcessed == 0) {
                             aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
                             aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
                             sp130 = s2 * 2;
                         } else {
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5), s0 * 2);
+                            s5Aligned = ALIGN32(s5);
+                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
                             aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
-                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5) + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
+                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
                         }
-#endif
 
                         nAdpcmSamplesProcessed += nSamplesInThisIteration;
 
@@ -1169,32 +711,18 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                         if (noteFinished) {
                             aClearBuffer(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + s5,
                                          (samplesLenAdjusted - nAdpcmSamplesProcessed) * 2);
-#ifdef VERSION_EU
-                            noteSubEu->finished = 1;
-                            note->noteSubEu.finished = 1;
-                            note->noteSubEu.enabled = 0;
-#else
                             note->samplePosInt = 0;
-                            note->finished = 1;
+                            note->finished = TRUE;
                             ((struct vNote *)note)->enabled = 0;
-#endif
                             break;
                         }
-#ifdef VERSION_EU
-                        if (restart) {
-                            synthesisState->restart = TRUE;
-                            synthesisState->samplePosInt = loopInfo->start;
-                        } else {
-                            synthesisState->samplePosInt += nSamplesToProcess;
-                        }
-#else
+
                         if (restart) {
                             note->restart = TRUE;
                             note->samplePosInt = loopInfo->start;
                         } else {
                             note->samplePosInt += nSamplesToProcess;
                         }
-#endif
                     }
 
                     switch (nParts) {
@@ -1206,19 +734,11 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             switch (curPart) {
                                 case 0:
                                     aSetBuffer(cmd++, 0, DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, DMEM_ADDR_RESAMPLED, samplesLenAdjusted + 4);
-#ifdef VERSION_EU
-                                    aResample(cmd++, A_INIT, 0xff60, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->dummyResampleState));
-#else
                                     aResample(cmd++, A_INIT, 0xff60, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->dummyResampleState));
-#endif
                                     resampledTempLen = samplesLenAdjusted + 4;
                                     noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_RESAMPLED + 4;
-#ifdef VERSION_EU
-                                    if (noteSubEu->finished != FALSE) {
-#else
-                                    if (note->finished != FALSE) {
-#endif
-                                        aClearBuffer(cmd++, DMEM_ADDR_RESAMPLED + resampledTempLen, samplesLenAdjusted + 0x10);
+                                    if (note->finished) {
+                                        aClearBuffer(cmd++, DMEM_ADDR_RESAMPLED + resampledTempLen, samplesLenAdjusted + 16);
                                     }
                                     break;
 
@@ -1226,15 +746,9 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                                     aSetBuffer(cmd++, 0, DMEM_ADDR_UNCOMPRESSED_NOTE + sp130,
                                                DMEM_ADDR_RESAMPLED2,
                                                samplesLenAdjusted + 8);
-#ifdef VERSION_EU
-                                    aResample(cmd++, A_INIT, 0xff60,
-                                              VIRTUAL_TO_PHYSICAL2(
-                                                  synthesisState->synthesisBuffers->dummyResampleState));
-#else
                                     aResample(cmd++, A_INIT, 0xff60,
                                               VIRTUAL_TO_PHYSICAL2(
                                                   note->synthesisBuffers->dummyResampleState));
-#endif
                                     aDMEMMove(cmd++, DMEM_ADDR_RESAMPLED2 + 4,
                                               DMEM_ADDR_RESAMPLED + resampledTempLen,
                                               samplesLenAdjusted + 4);
@@ -1242,137 +756,86 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                             }
                     }
 
-#ifdef VERSION_EU
-                    if (noteSubEu->finished != FALSE) {
-#else
-                    if (note->finished != FALSE) {
-#endif
+                    if (note->finished) {
                         break;
                     }
                 }
             }
 
             flags = 0;
-
-#ifdef VERSION_EU
-            if (noteSubEu->needsInit == TRUE) {
-                flags = A_INIT;
-                noteSubEu->needsInit = FALSE;
-            }
-
-            // final resample
-            aSetBuffer(cmd++, /*flags*/ 0, noteSamplesDmemAddrBeforeResampling, /*dmemout*/ DMEM_ADDR_TEMP, bufLen * 2);
-            aResample(cmd++, flags, resamplingRateFixedPoint, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->finalResampleState));
-#else
             if (note->needsInit == TRUE) {
                 flags = A_INIT;
                 note->needsInit = FALSE;
             }
 
             // final resample
-            aSetBuffer(cmd++, /*flags*/ 0, noteSamplesDmemAddrBeforeResampling, /*dmemout*/ DMEM_ADDR_TEMP, bufLen * 2);
+            aSetBuffer(cmd++, /*flags*/ 0, noteSamplesDmemAddrBeforeResampling, /*dmemout*/ DMEM_ADDR_TEMP, bufLen);
             aResample(cmd++, flags, resamplingRateFixedPoint, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->finalResampleState));
-#endif
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-#ifdef VERSION_EU
-            if (noteSubEu->headsetPanRight != 0 || synthesisState->prevHeadsetPanRight != 0) {
-                leftRight = 1;
-            } else if (noteSubEu->headsetPanLeft != 0 || synthesisState->prevHeadsetPanLeft != 0) {
-                leftRight = 2;
-#else
             if (note->headsetPanRight != 0 || note->prevHeadsetPanRight != 0) {
                 leftRight = 1;
             } else if (note->headsetPanLeft != 0 || note->prevHeadsetPanLeft != 0) {
                 leftRight = 2;
-#endif
             } else {
                 leftRight = 0;
             }
 
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB);
-#ifdef VERSION_EU
-            cmd = process_envelope(cmd, noteSubEu, synthesisState, bufLen, 0, leftRight, flags);
-#else
             cmd = process_envelope(cmd, note, bufLen, 0, leftRight);
-#endif
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
 
-#ifdef VERSION_EU
-            if (noteSubEu->usesHeadsetPanEffects) {
-                cmd = note_apply_headset_pan_effects(cmd, noteSubEu, synthesisState, bufLen * 2, flags, leftRight);
-            }
-#else
             if (note->usesHeadsetPanEffects) {
-                cmd = note_apply_headset_pan_effects(cmd, note, bufLen * 2, flags, leftRight);
+                cmd = note_apply_headset_pan_effects(cmd, note, bufLen, flags, leftRight);
             }
-#endif
 #else
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB);
             cmd = process_envelope(cmd, note, bufLen, 0);
             AUDIO_PROFILER_SWITCH(PROFILER_TIME_SUB_AUDIO_SYNTHESIS_ENVELOPE_REVERB, PROFILER_TIME_SUB_AUDIO_SYNTHESIS_PROCESSING);
 #endif
         }
-#ifndef VERSION_EU
     }
 
-    t9 = bufLen * 2;
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, t9);
+    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, bufLen);
     aInterleave(cmd++, DMEM_ADDR_LEFT_CH, DMEM_ADDR_RIGHT_CH);
-    t9 *= 2;
-    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, t9);
+    aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, bufLen * 2);
     aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(aiBuf));
-#endif
 
     return cmd;
 }
 
-#ifdef VERSION_EU
-u64 *load_wave_samples(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *synthesisState, s32 nSamplesToLoad) {
+u64 *load_wave_samples(u64 *cmd, struct Note *note, s32 nSamplesToLoad) {
     s32 a3;
     s32 repeats;
     s32 i;
-    aSetBuffer(cmd++, /*flags*/ 0, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ 0, /*count*/ 128);
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(noteSubEu->sound.samples));
+    aSetBuffer(cmd++, /*flags*/ 0, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ 0,
+               /*count*/ sizeof(note->synthesisBuffers->samples));
+    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->samples));
 
-    synthesisState->samplePosInt &= 0x3f;
-    a3 = 64 - synthesisState->samplePosInt;
+    note->samplePosInt &= (note->sampleCount - 1);
+    a3 = 64 - note->samplePosInt;
     if (a3 < nSamplesToLoad) {
         repeats = (nSamplesToLoad - a3 + 63) / 64;
         for (i = 0; i < repeats; i++) {
             aDMEMMove(cmd++,
                       /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE,
-                      /*dmemout*/ DMEM_ADDR_UNCOMPRESSED_NOTE + (1 + i) * 128,
-                      /*count*/ 128);
+                      /*dmemout*/ DMEM_ADDR_UNCOMPRESSED_NOTE + (1 + i) * sizeof(note->synthesisBuffers->samples),
+                      /*count*/ sizeof(note->synthesisBuffers->samples));
         }
     }
     return cmd;
 }
-#else
-u64 *load_wave_samples(u64 *cmd, struct Note *note, s32 nSamplesToLoad) {
-    s32 a3;
-    s32 i;
-    aSetBuffer(cmd++, /*flags*/ 0, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ 0,
-               /*count*/ sizeof(note->synthesisBuffers->samples));
-    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->samples));
-    note->samplePosInt &= (note->sampleCount - 1);
-    a3 = 64 - note->samplePosInt;
-    if (a3 < nSamplesToLoad) {
-        for (i = 0; i <= (nSamplesToLoad - a3 + 63) / 64 - 1; i++) {
-            aDMEMMove(cmd++, /*dmemin*/ DMEM_ADDR_UNCOMPRESSED_NOTE, /*dmemout*/ DMEM_ADDR_UNCOMPRESSED_NOTE + (1 + i) * sizeof(note->synthesisBuffers->samples), /*count*/ sizeof(note->synthesisBuffers->samples));
-        }
-    }
-    return cmd;
-}
-#endif
 
-#ifndef VERSION_EU
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf, s32 headsetPanSettings) {
 #else
 u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf) {
 #endif
+    u8 mixerFlags;
+    s32 rampLeft;
+    s32 rampRight;
     struct VolumeChange vol;
+
     if (note->initFullVelocity) {
         note->initFullVelocity = FALSE;
         vol.sourceLeft = note->targetVolLeft;
@@ -1385,45 +848,6 @@ u64 *process_envelope(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf) {
     vol.targetRight = note->targetVolRight;
     note->curVolLeft = vol.targetLeft;
     note->curVolRight = vol.targetRight;
-#ifdef ENABLE_STEREO_HEADSET_EFFECTS
-    return process_envelope_inner(cmd, note, nSamples, inBuf, headsetPanSettings, &vol);
-#else
-    return process_envelope_inner(cmd, note, nSamples, inBuf, &vol);
-#endif
-}
-
-#ifdef ENABLE_STEREO_HEADSET_EFFECTS
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf,
-                            s32 headsetPanSettings, struct VolumeChange *vol) {
-#else
-u64 *process_envelope_inner(u64 *cmd, struct Note *note, s32 nSamples, u16 inBuf,
-                            struct VolumeChange *vol) {
-#endif
-    u8 mixerFlags;
-    s32 rampLeft, rampRight;
-#elif defined(VERSION_EU)
-u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisState *synthesisState, s32 nSamples, u16 inBuf, s32 headsetPanSettings, UNUSED u32 flags) {
-    u16 sourceRight;
-    u16 sourceLeft;
-    u16 targetLeft;
-    u16 targetRight;
-    s32 mixerFlags;
-    s32 rampLeft;
-    s32 rampRight;
-
-    sourceLeft = synthesisState->curVolLeft;
-    sourceRight = synthesisState->curVolRight;
-    targetLeft = (note->targetVolLeft << 5);
-    targetRight = (note->targetVolRight << 5);
-    if (targetLeft == 0) {
-        targetLeft++;
-    }
-    if (targetRight == 0) {
-        targetRight++;
-    }
-    synthesisState->curVolLeft = targetLeft;
-    synthesisState->curVolRight = targetRight;
-#endif
 
     // For aEnvMixer, five buffers and count are set using aSetBuffer.
     // in, dry left, count without A_AUX flag.
@@ -1435,17 +859,17 @@ u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisStat
 
         switch (headsetPanSettings) {
             case 1:
-                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_NOTE_PAN_TEMP, nSamples * 2);
+                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_NOTE_PAN_TEMP, nSamples);
                 aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH,
                            DMEM_ADDR_WET_RIGHT_CH);
                 break;
             case 2:
-                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
                 aSetBuffer(cmd++, A_AUX, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_WET_LEFT_CH,
                            DMEM_ADDR_WET_RIGHT_CH);
                 break;
             default:
-                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+                aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
                 aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH,
                            DMEM_ADDR_WET_RIGHT_CH);
                 break;
@@ -1456,106 +880,66 @@ u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisStat
         // mixed into a temporary buffer and then subtracted from the normal buffer.
         if (note->stereoStrongRight) {
             aClearBuffer(cmd++, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, DEFAULT_LEN_2CH);
-            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, nSamples * 2);
+            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, nSamples);
             aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_STEREO_STRONG_TEMP_WET,
                        DMEM_ADDR_WET_RIGHT_CH);
         } else if (note->stereoStrongLeft) {
             aClearBuffer(cmd++, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, DEFAULT_LEN_2CH);
-            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
             aSetBuffer(cmd++, A_AUX, DMEM_ADDR_STEREO_STRONG_TEMP_DRY, DMEM_ADDR_WET_LEFT_CH,
                        DMEM_ADDR_STEREO_STRONG_TEMP_WET);
         } else {
-            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+            aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
             aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_RIGHT_CH);
         }
     }
 #else
-    aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples * 2);
+    aSetBuffer(cmd++, 0, inBuf, DMEM_ADDR_LEFT_CH, nSamples);
     aSetBuffer(cmd++, A_AUX, DMEM_ADDR_RIGHT_CH, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_RIGHT_CH);
 #endif
 
-#ifdef VERSION_EU
-    if (targetLeft == sourceLeft && targetRight == sourceRight && !note->envMixerNeedsInit) {
-#else
-    if (vol->targetLeft == vol->sourceLeft && vol->targetRight == vol->sourceRight
+    if (vol.targetLeft == vol.sourceLeft && vol.targetRight == vol.sourceRight
         && !note->envMixerNeedsInit) {
-#endif
         mixerFlags = A_CONTINUE;
     } else {
         mixerFlags = A_INIT;
 
-#ifdef VERSION_EU
-        rampLeft = gCurrentLeftVolRamping[targetLeft >> 5] * gCurrentRightVolRamping[sourceLeft >> 5];
-        rampRight = gCurrentLeftVolRamping[targetRight >> 5] * gCurrentRightVolRamping[sourceRight >> 5];
-#else
         // volume ramping
         // This roughly computes 2^16 * (targetVol / sourceVol) ^ (8 / arg2),
         // but with discretizations of targetVol, sourceVol and arg2.
-        switch (nSamples) {
-            case 128:
-                rampLeft = gVolRampingLhs128[vol->targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs128[vol->sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
-                rampRight = gVolRampingLhs128[vol->targetRight >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs128[vol->sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
-                break;
-            case 144:
-                rampLeft = gVolRampingLhs144[vol->targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs144[vol->sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
-                rampRight = gVolRampingLhs144[vol->targetRight >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs144[vol->sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
-                break;
-            case 136:
-            default:
-                rampLeft = gVolRampingLhs136[vol->targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs136[vol->sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
-                rampRight = gVolRampingLhs136[vol->targetRight >> (15 - VOL_RAMPING_EXPONENT)] * gVolRampingRhs136[vol->sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
-                break;
-        }
-#endif
+        rampLeft = currentRampingTableLeft[vol.targetLeft >> (15 - VOL_RAMPING_EXPONENT)] * currentRampingTableRight[vol.sourceLeft >> (15 - VOL_RAMPING_EXPONENT)];
+        rampRight = currentRampingTableLeft[vol.targetRight >> (15 - VOL_RAMPING_EXPONENT)] * currentRampingTableRight[vol.sourceRight >> (15 - VOL_RAMPING_EXPONENT)];
 
         // The operation's parameters change meanings depending on flags
-#ifdef VERSION_EU
-        aSetVolume(cmd++, A_VOL | A_LEFT, sourceLeft, 0, 0);
-        aSetVolume(cmd++, A_VOL | A_RIGHT, sourceRight, 0, 0);
-        aSetVolume32(cmd++, A_RATE | A_LEFT, targetLeft, rampLeft);
-        aSetVolume32(cmd++, A_RATE | A_RIGHT, targetRight, rampRight);
+        aSetVolume(cmd++, A_VOL | A_LEFT, vol.sourceLeft, 0, 0);
+        aSetVolume(cmd++, A_VOL | A_RIGHT, vol.sourceRight, 0, 0);
+        aSetVolume32(cmd++, A_RATE | A_LEFT, vol.targetLeft, rampLeft);
+        aSetVolume32(cmd++, A_RATE | A_RIGHT, vol.targetRight, rampRight);
         aSetVolume(cmd++, A_AUX, gVolume, 0, note->reverbVol << 8);
-#else
-        aSetVolume(cmd++, A_VOL | A_LEFT, vol->sourceLeft, 0, 0);
-        aSetVolume(cmd++, A_VOL | A_RIGHT, vol->sourceRight, 0, 0);
-        aSetVolume32(cmd++, A_RATE | A_LEFT, vol->targetLeft, rampLeft);
-        aSetVolume32(cmd++, A_RATE | A_RIGHT, vol->targetRight, rampRight);
-        aSetVolume(cmd++, A_AUX, gVolume, 0, note->reverbVolShifted);
-#endif
     }
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-#ifdef VERSION_EU
-    if (gUseReverb && note->reverbVol != 0) {
-        aEnvMixer(cmd++, mixerFlags | A_AUX,
-                  VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->mixEnvelopeState));
-#else
     if (gSynthesisReverb.useReverb && note->reverbVol != 0) {
         aEnvMixer(cmd++, mixerFlags | A_AUX,
                   VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->mixEnvelopeState));
-#endif
         if (note->stereoStrongRight) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             // 0x8000 is -100%, so subtract sound instead of adding...
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY, /*out*/ DMEM_ADDR_LEFT_CH);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_WET, /*out*/ DMEM_ADDR_WET_LEFT_CH);
         } else if (note->stereoStrongLeft) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY, /*out*/ DMEM_ADDR_RIGHT_CH);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_WET, /*out*/ DMEM_ADDR_WET_RIGHT_CH);
         }
     } else {
-#ifdef VERSION_EU
-        aEnvMixer(cmd++, mixerFlags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->mixEnvelopeState));
-#else
         aEnvMixer(cmd++, mixerFlags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->mixEnvelopeState));
-#endif
         if (note->stereoStrongRight) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY,
                  /*out*/ DMEM_ADDR_LEFT_CH);
         } else if (note->stereoStrongLeft) {
-            aSetBuffer(cmd++, 0, 0, 0, nSamples * 2);
+            aSetBuffer(cmd++, 0, 0, 0, nSamples);
             aMix(cmd++, 0, /*gain*/ 0x8000, /*in*/ DMEM_ADDR_STEREO_STRONG_TEMP_DRY,
                  /*out*/ DMEM_ADDR_RIGHT_CH);
         }
@@ -1570,41 +954,23 @@ u64 *process_envelope(u64 *cmd, struct NoteSubEu *note, struct NoteSynthesisStat
 }
 
 #ifdef ENABLE_STEREO_HEADSET_EFFECTS
-#ifdef VERSION_EU
-u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struct NoteSynthesisState *note, s32 bufLen, s32 flags, s32 leftRight) {
-#else
 u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32 flags, s32 leftRight) {
-#endif
     u16 dest;
     u16 pitch;
-#ifdef VERSION_EU
-    u8 prevPanShift;
-    u8 panShift;
-    UNUSED u8 unkDebug;
-#else
     u16 prevPanShift;
     u16 panShift;
-#endif
 
     switch (leftRight) {
         case 1:
             dest = DMEM_ADDR_LEFT_CH;
-#ifdef VERSION_EU
-            panShift = noteSubEu->headsetPanRight;
-#else
             panShift = note->headsetPanRight;
-#endif
             note->prevHeadsetPanLeft = 0;
             prevPanShift = note->prevHeadsetPanRight;
             note->prevHeadsetPanRight = panShift;
             break;
         case 2:
             dest = DMEM_ADDR_RIGHT_CH;
-#ifdef VERSION_EU
-            panShift = noteSubEu->headsetPanLeft;
-#else
             panShift = note->headsetPanLeft;
-#endif
             note->prevHeadsetPanRight = 0;
 
             prevPanShift = note->prevHeadsetPanLeft;
@@ -1620,8 +986,8 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32
             // Kind of a hack that moves the first samples into the resample state
             aDMEMMove(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_TEMP, 8);
             aClearBuffer(cmd++, 8, 8); // Set pitch accumulator to 0 in the resample state
-            aDMEMMove(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_TEMP + 0x10,
-                      0x10); // No idea, result seems to be overwritten later
+            aDMEMMove(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, DMEM_ADDR_TEMP + 16,
+                      16); // No idea, result seems to be overwritten later
 
             aSetBuffer(cmd++, 0, 0, DMEM_ADDR_TEMP, 32);
             aSaveBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panResampleState));
@@ -1667,14 +1033,10 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct Note *note, s32 bufLen, s32
 }
 #endif
 
-#ifndef VERSION_EU
-// Moved to playback.c in EU
-
 void note_init_volume(struct Note *note) {
     note->targetVolLeft = 0;
     note->targetVolRight = 0;
     note->reverbVol = 0;
-    note->reverbVolShifted = 0;
     note->curVolLeft = 1;
     note->curVolRight = 1;
     note->frequency = 0.0f;
@@ -1737,7 +1099,6 @@ void note_set_vel_pan_reverb(struct Note *note, f32 velocity, f32 pan, u8 reverb
     }
     if (note->reverbVol != reverbVol) {
         note->reverbVol = reverbVol;
-        note->reverbVolShifted = reverbVol << 8;
         note->envMixerNeedsInit = TRUE;
         return;
     }
@@ -1779,4 +1140,3 @@ void note_disable(struct Note *note) {
     note->prevParentLayer = NO_LAYER;
 }
 #endif
-#endif
diff --git a/src/audio/synthesis.h b/src/audio/synthesis.h
index 5c4050267..f66267b76 100644
--- a/src/audio/synthesis.h
+++ b/src/audio/synthesis.h
@@ -42,13 +42,13 @@ enum ChannelIndexes {
 
 /* ------ BETTER REVERB LIGHTWEIGHT PARAMETER OVERRIDES ------ */
 
-// Filter count works differently than normal when used with light settings and can support numbers that are not multiples of 3, though 3 is generally recommended.
-// This can be reduced to 2 to save a third of runtime overhead, but substantially reduces reverb saturation.
-// Similarly this can be increased from 3, but likely won't have beneficial outcomes worth the runtime expense compared to the modification of other parameters without using light settings.
-#define BETTER_REVERB_FILTER_COUNT_LIGHT 3
-#define BETTER_REVERB_GAIN_INDEX_LIGHT 0xA0 // Advanced parameter; used to tune the outputs of every filter except for the final one
-#define BETTER_REVERB_REVERB_INDEX_LIGHT 0x30 // Advanced parameter; used to tune the incoming output of the final filter
-#define BETTER_REVERB_MULTIPLE_LIGHT 0xD0 // Advanced parameter; multiplier applied to the final output signal for both the left and right channels (divided by 256)
+// Filter count works differently than normal when used with light settings and can support numbers that are not multiples of 3.
+// A value of 2 is generally recommended for most similar behavior to non-lightweight reverb.
+// This can be reduced to 1 to save additional runtime overhead, but will reduce some reverb saturation as consequence.
+// Similarly this can be increased from 2, but likely won't have beneficial outcomes worth the runtime expense compared to the modification of other parameters without using light settings.
+#define BETTER_REVERB_FILTER_COUNT_LIGHT 2
+#define BETTER_REVERB_GAIN_INDEX_LIGHT 0xA0 // Advanced parameter; used to tune the outputs of every filter except for the final one (multiples of 0x10 will compile more efficiently)
+#define BETTER_REVERB_REVERB_INDEX_LIGHT 0x30 // Advanced parameter; used to tune the reuse of the previously processed output sample (multiples of 0x10 will compile more efficiently)
 
 
 /* ------------ BETTER REVERB EXTERNED VARIABLES ------------ */
@@ -74,7 +74,7 @@ void set_better_reverb_buffers(u32 *inputDelaysL, u32 *inputDelaysR);
 /* -------------- BETTER REVERB STATIC ASSERTS -------------- */
 
 STATIC_ASSERT(NUM_ALLPASS % 3 == 0, "NUM_ALLPASS must be a multiple of 3!");
-STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT >= 2, "BETTER_REVERB_FILTER_COUNT_LIGHT should be no less than 2!");
+STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT > 0, "BETTER_REVERB_FILTER_COUNT_LIGHT must be greater than 0!");
 STATIC_ASSERT(BETTER_REVERB_FILTER_COUNT_LIGHT <= NUM_ALLPASS, "BETTER_REVERB_FILTER_COUNT_LIGHT cannot be larger than NUM_ALLPASS!");
 
 #else
diff --git a/src/boot/main.c b/src/boot/main.c
index 9d812aadc..2fe93aaff 100644
--- a/src/boot/main.c
+++ b/src/boot/main.c
@@ -20,7 +20,6 @@
 #include "usb/debug.h"
 #endif
 #include "game/puppyprint.h"
-#include "game/puppylights.h"
 #include "game/profiling.h"
 #include "game/emutest.h"
 
@@ -115,9 +114,6 @@ void alloc_pool(void) {
 
     main_pool_init(start, end);
     gEffectsMemoryPool = mem_pool_init(EFFECTS_MEMORY_POOL, MEMORY_POOL_LEFT);
-#ifdef PUPPYLIGHTS
-    gLightsPool = mem_pool_init(PUPPYLIGHTS_POOL, MEMORY_POOL_LEFT);
-#endif
 }
 
 void create_thread(OSThread *thread, OSId id, void (*entry)(void *), void *arg, void *sp, OSPri pri) {
diff --git a/src/boot/memory.c b/src/boot/memory.c
index c086f8b0b..0066f8ba5 100644
--- a/src/boot/memory.c
+++ b/src/boot/memory.c
@@ -94,11 +94,13 @@ void *virtual_to_segmented(u32 segment, const void *addr) {
 }
 
 void move_segment_table_to_dmem(void) {
-    s32 i;
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    for (i = 0; i < 16; i++) {
-        gSPSegment(gDisplayListHead++, i, sSegmentTable[i]);
+    for (s32 i = 0; i < 16; i++) {
+        gSPSegment(tempGfxHead++, i, sSegmentTable[i]);
     }
+
+    gDisplayListHead = tempGfxHead;
 }
 #else
 void *segmented_to_virtual(const void *addr) {
diff --git a/src/engine/behavior_script.c b/src/engine/behavior_script.c
index 299eec42f..282b8b3b1 100644
--- a/src/engine/behavior_script.c
+++ b/src/engine/behavior_script.c
@@ -14,19 +14,19 @@
 #include "math_util.h"
 #include "graph_node.h"
 #include "surface_collision.h"
-#include "game/puppylights.h"
 
 // Macros for retrieving arguments from behavior scripts.
-#define BHV_CMD_GET_1ST_U8(index)  (u8)((gCurBhvCommand[index] >> 24) & 0xFF) // unused
-#define BHV_CMD_GET_2ND_U8(index)  (u8)((gCurBhvCommand[index] >> 16) & 0xFF)
-#define BHV_CMD_GET_3RD_U8(index)  (u8)((gCurBhvCommand[index] >> 8) & 0xFF)
-#define BHV_CMD_GET_4TH_U8(index)  (u8)((gCurBhvCommand[index]) & 0xFF)
+#define BHV_CMD_GET_1ST_U8(index)     (u8)((gCurBhvCommand[index] >> 24) & 0xFF) // unused
+#define BHV_CMD_GET_2ND_U8(index)     (u8)((gCurBhvCommand[index] >> 16) & 0xFF)
+#define BHV_CMD_GET_3RD_U8(index)     (u8)((gCurBhvCommand[index] >> 8) & 0xFF)
+#define BHV_CMD_GET_4TH_U8(index)     (u8)((gCurBhvCommand[index]) & 0xFF)
 
-#define BHV_CMD_GET_1ST_S16(index) (s16)(gCurBhvCommand[index] >> 16)
-#define BHV_CMD_GET_2ND_S16(index) (s16)(gCurBhvCommand[index] & 0xFFFF)
+#define BHV_CMD_GET_1ST_S16(index)    (s16)(gCurBhvCommand[index] >> 16)
+#define BHV_CMD_GET_2ND_S16(index)    (s16)(gCurBhvCommand[index] & 0xFFFF)
 
-#define BHV_CMD_GET_U32(index)     (u32)(gCurBhvCommand[index])
-#define BHV_CMD_GET_VPTR(index)    (void *)(gCurBhvCommand[index])
+#define BHV_CMD_GET_U32(index)        (u32)(gCurBhvCommand[index])
+#define BHV_CMD_GET_VPTR(index)       (void *)(gCurBhvCommand[index])
+#define BHV_CMD_GET_VPTR_SMALL(index) (void *)(OS_PHYSICAL_TO_K0(gCurBhvCommand[index] & 0xFFFFFF))
 
 #define BHV_CMD_GET_ADDR_OF_CMD(index) (uintptr_t)(&gCurBhvCommand[index])
 
@@ -49,11 +49,6 @@ void obj_update_gfx_pos_and_angle(struct Object *obj) {
 #define OBJ_OPACITY_LENGTH 512.0f
 void obj_set_opacity_from_cam_dist(struct Object *obj) {
     s32 opacityDist = ((-obj->header.gfx.cameraToObject[2] - OBJ_OPACITY_NEAR) * (256.0f / OBJ_OPACITY_LENGTH));
-#ifdef OBJECTS_REJ
-    if (opacityDist > 0) {
-        obj->header.gfx.ucode = GRAPH_NODE_UCODE_REJ;
-    }
-#endif
     obj->oOpacity = CLAMP(opacityDist, 0x00, 0xFF);
 }
 #undef OBJ_OPACITY_NEAR
@@ -316,11 +311,11 @@ static s32 bhv_cmd_end_loop(void) {
 // Usage: CALL_NATIVE(func)
 typedef void (*NativeBhvFunc)(void);
 static s32 bhv_cmd_call_native(void) {
-    NativeBhvFunc behaviorFunc = BHV_CMD_GET_VPTR(1);
+    NativeBhvFunc behaviorFunc = BHV_CMD_GET_VPTR_SMALL(0);
 
     behaviorFunc();
 
-    gCurBhvCommand += 2;
+    gCurBhvCommand++;
     return BHV_PROC_CONTINUE;
 }
 
@@ -734,11 +729,11 @@ static s32 bhv_cmd_parent_bit_clear(void) {
 // Command 0x37: Spawns a water droplet with the given parameters.
 // Usage: SPAWN_WATER_DROPLET(dropletParams)
 static s32 bhv_cmd_spawn_water_droplet(void) {
-    struct WaterDropletParams *dropletParams = BHV_CMD_GET_VPTR(1);
+    struct WaterDropletParams *dropletParams = BHV_CMD_GET_VPTR_SMALL(0);
 
     spawn_water_droplet(gCurrentObject, dropletParams);
 
-    gCurBhvCommand += 2;
+    gCurBhvCommand++;
     return BHV_PROC_CONTINUE;
 }
 
@@ -909,24 +904,12 @@ void cur_obj_update(void) {
     COND_BIT((objFlags & OBJ_FLAG_OCCLUDE_SILHOUETTE), o->header.gfx.node.flags, GRAPH_RENDER_OCCLUDE_SILHOUETTE);
 #endif
 
-#ifdef OBJECTS_REJ
-    if ((objFlags & OBJ_FLAG_SILHOUETTE) || (objFlags & OBJ_FLAG_UCODE_SMALL)) {
-        o->header.gfx.ucode = GRAPH_NODE_UCODE_REJ;
-    } else {
-        o->header.gfx.ucode = GRAPH_NODE_UCODE_DEFAULT;
-    }
-#endif
-
 #ifdef OBJ_OPACITY_BY_CAM_DIST
     if (objFlags & OBJ_FLAG_OPACITY_FROM_CAMERA_DIST) {
         obj_set_opacity_from_cam_dist(o);
     }
 #endif
 
-#ifdef PUPPYLIGHTS
-    puppylights_object_emit(o);
-#endif
-
     // Handle visibility of object
     if (o->oRoom != -1) {
         // If the object is in a room, only show it when Mario is in the room.
diff --git a/src/engine/colors.c b/src/engine/colors.c
index 32d7fe4b5..41f67d498 100644
--- a/src/engine/colors.c
+++ b/src/engine/colors.c
@@ -81,8 +81,8 @@ void rgba32_to_colorRGBAf(ColorRGBAf dst, RGBA32 src) {
     dst[3] = COMPOSITE_TO_COLORF(src, MSK_RGBA32_A, IDX_RGBA32_A);
 }
 
-void colorRGB_to_colorRGBf(ColorRGBf dst, ColorRGB src) { vec3_quot_val(dst, src, 255.0f); }
-void colorRGBf_to_colorRGB(ColorRGB dst, ColorRGBf src) { vec3_prod_val(dst, src, 255.0f); }
+void colorRGB_to_colorRGBf(ColorRGBf dst, ColorRGB src) { vec3_scale_dest(dst, src, 1/255.0f); }
+void colorRGBf_to_colorRGB(ColorRGB dst, ColorRGBf src) { vec3_scale_dest(dst, src, 255.0f); }
 
 RGBA16Return32 colorRGBf_to_rgba16(ColorRGBf src) {
     return (COLORF_TO_COMPOSITE(src[0], MSK_RGBA16_C, IDX_RGBA16_R)
@@ -137,7 +137,7 @@ Bool32 colorRGBA_average_3(ColorRGBA dst, ColorRGBA c1, ColorRGBA c2, ColorRGBA
 RGBA16Return32 rgba16_make_grayscale(RGBA16 rgba) {
     ColorRGBf color;
     rgba16_to_colorRGBf(color, rgba);
-    ColorF avg = vec3_average(color);
+    ColorF avg = (color[0] + color[1] + color[2]) / 3.f;
     vec3_same(color, avg);
     return colorRGBf_to_rgba16(color);
 }
diff --git a/src/engine/colors.h b/src/engine/colors.h
index 7db18298c..866e68424 100644
--- a/src/engine/colors.h
+++ b/src/engine/colors.h
@@ -103,10 +103,10 @@
 #define COMPOSITE_TO_COLORF(src, bitmask, index)    ((ColorF)(((src) >> (index)) & (bitmask)) / (bitmask))
 #define COLORF_TO_COMPOSITE(src, bitmask, index)    (((CompositeColor)((src) * (bitmask)) & (bitmask)) << (index))
 
-#define COLORRGB_TO_COLORRGBF(  dst, src) vec3_quot_val((dst), (src), 255.0f)
-#define COLORRGBF_TO_COLORRGB(  dst, src) vec3_prod_val((dst), (src), 255.0f)
-#define COLORRGBA_TO_COLORRGBAF(dst, src) vec4_quot_val((dst), (src), 255.0f)
-#define COLORRGBAF_TO_COLORRGBA(dst, src) vec4_prod_val((dst), (src), 255.0f)
+#define COLORRGB_TO_COLORRGBF(  dst, src) vec3_scale_dest((dst), (src), 1/255.0f)
+#define COLORRGBF_TO_COLORRGB(  dst, src) vec3_scale_dest((dst), (src), 255.0f)
+#define COLORRGBA_TO_COLORRGBAF(dst, src) vec4_scale_dest((dst), (src), 1/255.0f)
+#define COLORRGBAF_TO_COLORRGBA(dst, src) vec4_scale_dest((dst), (src), 255.0f)
 
 #define colorRGB_set(    dst, r, g, b) vec3_set( (dst), (r), (g), (b))
 #define colorRGB_copy(   dst, src    ) vec3_copy((dst), (src)        )
diff --git a/src/engine/graph_node.h b/src/engine/graph_node.h
index 4d0d4d2a9..bccb8f2b3 100644
--- a/src/engine/graph_node.h
+++ b/src/engine/graph_node.h
@@ -9,15 +9,6 @@
 #include "geo_commands.h"
 #include "game/memory.h"
 
-// UCode indices for listHeads & listTails
-enum GraphNodeUCodes {
-    GRAPH_NODE_UCODE_DEFAULT,
-#ifdef OBJECTS_REJ
-    GRAPH_NODE_UCODE_REJ,
-#endif
-    GRAPH_NODE_NUM_UCODES,
-};
-
 enum GraphRenderFlags {
     GRAPH_RENDER_ACTIVE             = (1 << 0), // 0x0001
     GRAPH_RENDER_CHILDREN_FIRST     = (1 << 1), // 0x0002
@@ -156,8 +147,8 @@ struct DisplayListNode {
  */
 struct GraphNodeMasterList {
     /*0x00*/ struct GraphNode node;
-    /*0x14*/ struct DisplayListNode *listHeads[GRAPH_NODE_NUM_UCODES][LAYER_COUNT];
-    /*0x34*/ struct DisplayListNode *listTails[GRAPH_NODE_NUM_UCODES][LAYER_COUNT];
+    /*0x14*/ struct DisplayListNode *listHeads[LAYER_COUNT];
+    /*0x34*/ struct DisplayListNode *listTails[LAYER_COUNT];
 };
 
 /** Simply used as a parent to group multiple children.
diff --git a/src/engine/level_script.c b/src/engine/level_script.c
index 927c64946..c7a47e6d6 100644
--- a/src/engine/level_script.c
+++ b/src/engine/level_script.c
@@ -29,7 +29,6 @@
 #include "string.h"
 #include "game/puppycam2.h"
 #include "game/puppyprint.h"
-#include "game/puppylights.h"
 #include "game/emutest.h"
 
 #include "config.h"
@@ -863,52 +862,6 @@ static void level_cmd_puppyvolume(void) {
     sCurrentCmd = CMD_NEXT;
 }
 
-static void level_cmd_puppylight_environment(void) {
-#ifdef PUPPYLIGHTS
-    Lights1 temp = gdSPDefLights1(CMD_GET(u8, 2), CMD_GET(u8, 3), CMD_GET(u8, 4),
-                                  CMD_GET(u8, 5), CMD_GET(u8, 6), CMD_GET(u8, 7),
-                                  CMD_GET(u8, 8), CMD_GET(u8, 9), CMD_GET(u8, 10));
-
-    memcpy(&gLevelLight, &temp, sizeof(Lights1));
-    levelAmbient = TRUE;
-#endif
-    sCurrentCmd = CMD_NEXT;
-}
-
-static void level_cmd_puppylight_node(void) {
-#ifdef PUPPYLIGHTS
-    gPuppyLights[gNumLights] = mem_pool_alloc(gLightsPool, sizeof(struct PuppyLight));
-    if (gPuppyLights[gNumLights] == NULL) {
-        append_puppyprint_log("Puppylight allocation failed.");
-        sCurrentCmd = CMD_NEXT;
-        return;
-    }
-
-    vec4_set(gPuppyLights[gNumLights]->rgba, CMD_GET(u8,   2),
-                                             CMD_GET(u8,   3),
-                                             CMD_GET(u8,   4),
-                                             CMD_GET(u8,   5));
-
-    vec3s_set(gPuppyLights[gNumLights]->pos[0], CMD_GET(s16,  6),
-                                                CMD_GET(s16,  8),
-                                                CMD_GET(s16, 10));
-
-    vec3s_set(gPuppyLights[gNumLights]->pos[1], CMD_GET(s16, 12),
-                                                CMD_GET(s16, 14),
-                                                CMD_GET(s16, 16));
-    gPuppyLights[gNumLights]->yaw       = CMD_GET(s16, 18);
-    gPuppyLights[gNumLights]->epicentre = CMD_GET(u8,  20);
-    gPuppyLights[gNumLights]->flags     = CMD_GET(u8,  21);
-    gPuppyLights[gNumLights]->active    = TRUE;
-    gPuppyLights[gNumLights]->area      = sCurrAreaIndex;
-    gPuppyLights[gNumLights]->room      = CMD_GET(s16, 22);
-
-    gNumLights++;
-
-#endif
-    sCurrentCmd = CMD_NEXT;
-}
-
 static void level_cmd_set_echo(void) {
     if (sCurrAreaIndex >= 0 && sCurrAreaIndex < AREA_COUNT) {
         gAreaData[sCurrAreaIndex].useEchoOverride = TRUE;
@@ -984,8 +937,6 @@ static void (*LevelScriptJumpTable[])(void) = {
     /*LEVEL_CMD_GET_OR_SET_VAR              */ level_cmd_get_or_set_var,
     /*LEVEL_CMD_PUPPYVOLUME                 */ level_cmd_puppyvolume,
     /*LEVEL_CMD_CHANGE_AREA_SKYBOX          */ level_cmd_change_area_skybox,
-    /*LEVEL_CMD_PUPPYLIGHT_ENVIRONMENT      */ level_cmd_puppylight_environment,
-    /*LEVEL_CMD_PUPPYLIGHT_NODE             */ level_cmd_puppylight_node,
     /*LEVEL_CMD_SET_ECHO                    */ level_cmd_set_echo,
 };
 
diff --git a/src/engine/math_util.c b/src/engine/math_util.c
index 16efd0277..bcaca282d 100644
--- a/src/engine/math_util.c
+++ b/src/engine/math_util.c
@@ -63,28 +63,7 @@ s32 random_sign(void) {
     return ((random_u16() >= 0x7FFF) ? 1 : -1);
 }
 
-/// Returns the lowest of three values.
-#define min_3_func(a0, a1, a2) {\
-    if (a1 < a0) a0 = a1;       \
-    if (a2 < a0) a0 = a2;       \
-    return a0;                  \
-}
-
-f32 min_3f(f32 a, f32 b, f32 c) { min_3_func(a, b, c); }
-s32 min_3i(s32 a, s32 b, s32 c) { min_3_func(a, b, c); }
-s32 min_3s(s16 a, s16 b, s16 c) { min_3_func(a, b, c); }
-
-/// Returns the highest of three values.
-#define max_3_func(a0, a1, a2) {\
-    if (a1 > a0) a0 = a1;       \
-    if (a2 > a0) a0 = a2;       \
-    return a0;                  \
-}
-f32 max_3f(f32 a, f32 b, f32 c) { max_3_func(a, b, c); }
-s32 max_3i(s32 a, s32 b, s32 c) { max_3_func(a, b, c); }
-s32 max_3s(s16 a, s16 b, s16 c) { max_3_func(a, b, c); }
-
-/// A combination of the above.
+// Get the maximum and minimum of three numbers at the same time.
 #define min_max_3_func(a, b, c, min, max) { \
     if (b < a) {                            \
         *max = a;                           \
@@ -96,232 +75,24 @@ s32 max_3s(s16 a, s16 b, s16 c) { max_3_func(a, b, c); }
     if (c < *min) *min = c;                 \
     if (c > *max) *max = c;                 \
 }
+
 void min_max_3f(f32 a, f32 b, f32 c, f32 *min, f32 *max) { min_max_3_func(a, b, c, min, max); }
 void min_max_3i(s32 a, s32 b, s32 c, s32 *min, s32 *max) { min_max_3_func(a, b, c, min, max); }
 void min_max_3s(s16 a, s16 b, s16 c, s16 *min, s16 *max) { min_max_3_func(a, b, c, min, max); }
 
-/// Perform a bitwise copy from vector 'src' to 'dest'
-#define vec3_copy_bits(destFmt, dest, srcFmt, src) { \
-    register destFmt x = ((srcFmt *) src)[0];        \
-    register destFmt y = ((srcFmt *) src)[1];        \
-    register destFmt z = ((srcFmt *) src)[2];        \
-    ((destFmt *) dest)[0] = x;                       \
-    ((destFmt *) dest)[1] = y;                       \
-    ((destFmt *) dest)[2] = z;                       \
-}
-void vec3f_copy    (Vec3f dest, const Vec3f src) { vec3_copy_bits(f32, dest, f32, src); } // 32 -> 32
-void vec3i_copy    (Vec3i dest, const Vec3i src) { vec3_copy_bits(s32, dest, s32, src); } // 32 -> 32
-void vec3s_copy    (Vec3s dest, const Vec3s src) { vec3_copy_bits(s16, dest, s16, src); } // 16 -> 16
-void vec3s_to_vec3i(Vec3i dest, const Vec3s src) { vec3_copy_bits(s32, dest, s16, src); } // 16 -> 32
-void vec3s_to_vec3f(Vec3f dest, const Vec3s src) { vec3_copy_bits(f32, dest, s16, src); } // 16 -> 32
-void vec3i_to_vec3s(Vec3s dest, const Vec3i src) { vec3_copy_bits(s16, dest, s32, src); } // 32 -> 16
-void vec3i_to_vec3f(Vec3f dest, const Vec3i src) { vec3_copy_bits(f32, dest, s32, src); } // 32 -> 32
-
-void surface_normal_to_vec3f(Vec3f dest, struct Surface *surf) {
-    register f32 x = surf->normal.x;
-    register f32 y = surf->normal.y;
-    register f32 z = surf->normal.z;
-    ((f32 *) dest)[0] = x;
-    ((f32 *) dest)[1] = y;
-    ((f32 *) dest)[2] = z;
-}
-
-/// Convert float vector a to a short vector 'dest' by rounding the components to the nearest integer.
-#define vec3_copy_bits_roundf(fmt, dest, src) { \
-    register fmt x = roundf(src[0]);            \
-    register fmt y = roundf(src[1]);            \
-    register fmt z = roundf(src[2]);            \
-    ((fmt *) dest)[0] = x;                      \
-    ((fmt *) dest)[1] = y;                      \
-    ((fmt *) dest)[2] = z;                      \
-}
-void vec3f_to_vec3s(Vec3s dest, const Vec3f src) { vec3_copy_bits_roundf(s16, dest, src); } // 32 -> 16
-void vec3f_to_vec3i(Vec3i dest, const Vec3f src) { vec3_copy_bits_roundf(s32, dest, src); } // 32 -> 32
-#undef vec3_copy_bits_roundf
-
-#define vec3_copy_y_off_func(destFmt, dest, srcFmt, src, yOff) {\
-    register destFmt x = ((srcFmt *) src)[0];                   \
-    register destFmt y = ((srcFmt *) src)[1] + yOff;            \
-    register destFmt z = ((srcFmt *) src)[2];                   \
-    ((destFmt *) dest)[0] = x;                                  \
-    ((destFmt *) dest)[1] = y;                                  \
-    ((destFmt *) dest)[2] = z;                                  \
-}
-void vec3f_copy_y_off(Vec3f dest, Vec3f src, f32 yOff) { vec3_copy_y_off_func(f32, dest, f32, src, yOff); }
-#undef vec3_copy_y_off_func
-
-/// Set vector 'dest' to (x, y, z)
-inline void vec3f_set(Vec3f dest, const f32 x, const f32 y, const f32 z) { vec3_set(dest, x, y, z); }
-inline void vec3i_set(Vec3i dest, const s32 x, const s32 y, const s32 z) { vec3_set(dest, x, y, z); }
-inline void vec3s_set(Vec3s dest, const s16 x, const s16 y, const s16 z) { vec3_set(dest, x, y, z); }
-
-/// Add vector 'a' to 'dest'
-#define vec3_add_func(fmt, dest, a) {   \
-    register fmt *temp = (fmt *)(dest); \
-    register fmt sum, sum2;             \
-    register s32 i;                     \
-    for (i = 0; i < 3; i++) {           \
-        sum = *(a);                     \
-        (a)++;                          \
-        sum2 = *temp;                   \
-        *temp = (sum + sum2);           \
-        temp++;                         \
-    }                                   \
-}
-void vec3f_add(Vec3f dest, const Vec3f a) { vec3_add_func(f32, dest, a); }
-void vec3i_add(Vec3i dest, const Vec3i a) { vec3_add_func(s32, dest, a); }
-void vec3s_add(Vec3s dest, const Vec3s a) { vec3_add_func(s16, dest, a); }
-#undef vec3_add_func
-
-/// Make 'dest' the sum of vectors a and b.
-#define vec3_sum_func(fmt, dest, a, b) {\
-    register fmt *temp = (fmt *)(dest); \
-    register fmt sum, sum2;             \
-    register s32 i;                     \
-    for (i = 0; i < 3; i++) {           \
-        sum = *(a);                     \
-        (a)++;                          \
-        sum2 = *(b);                    \
-        (b)++;                          \
-        *temp = (sum + sum2);           \
-        temp++;                         \
-    }                                   \
-}
-void vec3f_sum(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_sum_func(f32, dest, a, b); }
-void vec3i_sum(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_sum_func(s32, dest, a, b); }
-void vec3s_sum(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_sum_func(s16, dest, a, b); }
-#undef vec3_sum_func
-
-/// Subtract vector a from 'dest'
-#define vec3_sub_func(fmt, dest, a) {   \
-    register fmt x = ((fmt *) a)[0];    \
-    register fmt y = ((fmt *) a)[1];    \
-    register fmt z = ((fmt *) a)[2];    \
-    ((fmt *) dest)[0] -= x;             \
-    ((fmt *) dest)[1] -= y;             \
-    ((fmt *) dest)[2] -= z;             \
-}
-void vec3f_sub(Vec3f dest, const Vec3f a) { vec3_sub_func(f32, dest, a); }
-void vec3i_sub(Vec3i dest, const Vec3i a) { vec3_sub_func(s32, dest, a); }
-void vec3s_sub(Vec3s dest, const Vec3s a) { vec3_sub_func(s16, dest, a); }
-#undef vec3_sub_func
-
-/// Make 'dest' the difference of vectors a and b.
-#define vec3_diff_func(fmt, dest, a, b) {   \
-    register fmt x1 = ((fmt *) a)[0];       \
-    register fmt y1 = ((fmt *) a)[1];       \
-    register fmt z1 = ((fmt *) a)[2];       \
-    register fmt x2 = ((fmt *) b)[0];       \
-    register fmt y2 = ((fmt *) b)[1];       \
-    register fmt z2 = ((fmt *) b)[2];       \
-    ((fmt *) dest)[0] = (x1 - x2);          \
-    ((fmt *) dest)[1] = (y1 - y2);          \
-    ((fmt *) dest)[2] = (z1 - z2);          \
-}
-void vec3f_diff(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_diff_func(f32, dest, a, b); }
-void vec3i_diff(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_diff_func(s32, dest, a, b); }
-void vec3s_diff(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_diff_func(s16, dest, a, b); }
-#undef vec3_diff_func
-
-/// Multiply vector 'a' into 'dest'
-#define vec3_mul_func(fmt, dest, a) {   \
-    register fmt x = ((fmt *) a)[0];    \
-    register fmt y = ((fmt *) a)[1];    \
-    register fmt z = ((fmt *) a)[2];    \
-    ((fmt *) dest)[0] *= x;             \
-    ((fmt *) dest)[1] *= y;             \
-    ((fmt *) dest)[2] *= z;             \
-}
-void vec3f_mul(Vec3f dest, const Vec3f a) { vec3_mul_func(f32, dest, a); }
-void vec3i_mul(Vec3i dest, const Vec3i a) { vec3_mul_func(s32, dest, a); }
-void vec3s_mul(Vec3s dest, const Vec3s a) { vec3_mul_func(s16, dest, a); }
-#undef vec3_mul_func
-
-/// Make 'dest' the product of vectors a and b.
-#define vec3_prod_func(fmt, dest, a, b) {   \
-    register fmt x1 = ((fmt *) a)[0];       \
-    register fmt y1 = ((fmt *) a)[1];       \
-    register fmt z1 = ((fmt *) a)[2];       \
-    register fmt x2 = ((fmt *) b)[0];       \
-    register fmt y2 = ((fmt *) b)[1];       \
-    register fmt z2 = ((fmt *) b)[2];       \
-    ((fmt *) dest)[0] = (x1 * x2);          \
-    ((fmt *) dest)[1] = (y1 * y2);          \
-    ((fmt *) dest)[2] = (z1 * z2);          \
-}
-void vec3f_prod(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_prod_func(f32, dest, a, b); }
-void vec3i_prod(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_prod_func(s32, dest, a, b); }
-void vec3s_prod(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_prod_func(s16, dest, a, b); }
-#undef vec3_prod_func
-
-
-/// Performs element-wise division of two 3-vectors
-#define vec3_div_func(fmt, dest, a) {   \
-    register fmt x = ((fmt *) a)[0];    \
-    register fmt y = ((fmt *) a)[1];    \
-    register fmt z = ((fmt *) a)[2];    \
-    ((fmt *) dest)[0] /= x;             \
-    ((fmt *) dest)[1] /= y;             \
-    ((fmt *) dest)[2] /= z;             \
-}
-void vec3f_div(Vec3f dest, const Vec3f a) { vec3_div_func(f32, dest, a); }
-void vec3i_div(Vec3i dest, const Vec3i a) { vec3_div_func(s32, dest, a); }
-void vec3s_div(Vec3s dest, const Vec3s a) { vec3_div_func(s16, dest, a); }
-#undef vec3_div_func
-
-/// Make 'dest' the quotient of vectors a and b.
-#define vec3_quot_func(fmt, dest, a, b) {   \
-    register fmt x1 = ((fmt *) a)[0];       \
-    register fmt y1 = ((fmt *) a)[1];       \
-    register fmt z1 = ((fmt *) a)[2];       \
-    register fmt x2 = ((fmt *) b)[0];       \
-    register fmt y2 = ((fmt *) b)[1];       \
-    register fmt z2 = ((fmt *) b)[2];       \
-    ((fmt *) dest)[0] = (x1 / x2);          \
-    ((fmt *) dest)[1] = (y1 / y2);          \
-    ((fmt *) dest)[2] = (z1 / z2);          \
-}
-void vec3f_quot(Vec3f dest, const Vec3f a, const Vec3f b) { vec3_quot_func(f32, dest, a, b); }
-void vec3i_quot(Vec3i dest, const Vec3i a, const Vec3i b) { vec3_quot_func(s32, dest, a, b); }
-void vec3s_quot(Vec3s dest, const Vec3s a, const Vec3s b) { vec3_quot_func(s16, dest, a, b); }
-#undef vec3_quot_func
-
-/// Return the dot product of vectors a and b.
-f32 vec3f_dot(const Vec3f a, const Vec3f b) {
-    return vec3_dot(a, b);
-}
-
-/// Make vector 'dest' the cross product of vectors a and b.
-void vec3f_cross(Vec3f dest, const Vec3f a, const Vec3f b) {
-    vec3_cross(dest, a, b);
-}
-
-/// Scale vector 'dest' so it has length 1
-void vec3f_normalize(Vec3f dest) {
-    register f32 mag = (sqr(dest[0]) + sqr(dest[1]) + sqr(dest[2]));
-    if (mag > NEAR_ZERO) {
-        register f32 invsqrt = (1.0f / sqrtf(mag));
-        vec3_mul_val(dest, invsqrt);
-    } else {
-        // Default to up vector.
-        dest[0] = 0;
-        ((u32 *) dest)[1] = FLOAT_ONE;
-        dest[2] = 0;
-    }
-}
-
 /// Struct the same data size as a Mat4
 struct CopyMat4 {
     f32 a[0x10];
 };
 
 /// Copy matrix 'src' to 'dest' by casting to a struct CopyMat4 pointer.
-void mtxf_copy(register Mat4 dest, register Mat4 src) {
+void mtxf_copy(Mat4 dest, Mat4 src) {
     *((struct CopyMat4 *) dest) = *((struct CopyMat4 *) src);
 }
 
 /// Set mtx to the identity matrix.
-void mtxf_identity(register Mat4 mtx) {
+
+void mtxf_identity(Mat4 mtx) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
     s32 i;
     f32 *dest;
@@ -336,8 +107,8 @@ void mtxf_identity(register Mat4 mtx) {
 /// Set dest to a translation matrix of vector b.
 void mtxf_translate(Mat4 dest, Vec3f b) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register s32 i;
-    register f32 *pen;
+    s32 i;
+    f32 *pen;
     for (pen = ((f32 *) dest + 1), i = 0; i < 12; pen++, i++) {
         *pen = 0;
     }
@@ -347,65 +118,20 @@ void mtxf_translate(Mat4 dest, Vec3f b) {
     vec3f_copy(&dest[3][0], &b[0]);
 }
 
-/**
- * Multiply a vector by a matrix of the form
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | 0 0 0 1 |
- * i.e. a matrix representing a linear transformation over 3 space.
- */
-void linear_mtxf_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    s32 i;
-    for (i = 0; i < 3; i++) {
-        dst[i] = ((m[0][i] * v[0])
-                + (m[1][i] * v[1])
-                + (m[2][i] * v[2]));
-    }
-}
-
-void linear_mtxf_mul_vec3f_and_translate(Mat4 m, Vec3f dst, Vec3f v) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    s32 i;
-    for (i = 0; i < 3; i++) {
-        dst[i] = ((m[0][i] * v[0])
-                + (m[1][i] * v[1])
-                + (m[2][i] * v[2])
-                +  m[3][i]);
-    }
-}
-
-/**
- * Multiply a vector by the transpose of a matrix of the form
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | 0 0 0 1 |
- * i.e. a matrix representing a linear transformation over 3 space.
- */
-void linear_mtxf_transpose_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    s32 i;
-    for (i = 0; i < 3; i++) {
-        dst[i] = vec3_dot(m[i], v);
-    }
-}
-
 /// Build a matrix that rotates around the z axis, then the x axis, then the y axis, and then translates.
 void mtxf_rotate_zxy_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx   = sins(rot[0]);
-    register f32 cx   = coss(rot[0]);
-    register f32 sy   = sins(rot[1]);
-    register f32 cy   = coss(rot[1]);
-    register f32 sz   = sins(rot[2]);
-    register f32 cz   = coss(rot[2]);
-    register f32 sysz = (sy * sz);
-    register f32 cycz = (cy * cz);
+    f32 sx   = sins(rot[0]);
+    f32 cx   = coss(rot[0]);
+    f32 sy   = sins(rot[1]);
+    f32 cy   = coss(rot[1]);
+    f32 sz   = sins(rot[2]);
+    f32 cz   = coss(rot[2]);
+    f32 sysz = (sy * sz);
+    f32 cycz = (cy * cz);
     dest[0][0] = ((sysz * sx) + cycz);
-    register f32 cysz = (cy * sz);
-    register f32 sycz = (sy * cz);
+    f32 cysz = (cy * sz);
+    f32 sycz = (sy * cz);
     dest[1][0] = ((sycz * sx) - cysz);
     dest[2][0] = (cx * sy);
     dest[0][1] = (cx * sz);
@@ -421,20 +147,20 @@ void mtxf_rotate_zxy_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
 /// Build a matrix that rotates around the x axis, then the y axis, then the z axis, and then translates.
 UNUSED void mtxf_rotate_xyz_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx   = sins(rot[0]);
-    register f32 cx   = coss(rot[0]);
-    register f32 sy   = sins(rot[1]);
-    register f32 cy   = coss(rot[1]);
-    register f32 sz   = sins(rot[2]);
-    register f32 cz   = coss(rot[2]);
+    f32 sx   = sins(rot[0]);
+    f32 cx   = coss(rot[0]);
+    f32 sy   = sins(rot[1]);
+    f32 cy   = coss(rot[1]);
+    f32 sz   = sins(rot[2]);
+    f32 cz   = coss(rot[2]);
     dest[0][0] = (cy * cz);
     dest[0][1] = (cy * sz);
     dest[0][2] = -sy;
-    register f32 sxcz = (sx * cz);
-    register f32 cxsz = (cx * sz);
+    f32 sxcz = (sx * cz);
+    f32 cxsz = (cx * sz);
     dest[1][0] = ((sxcz * sy) - cxsz);
-    register f32 sxsz = (sx * sz);
-    register f32 cxcz = (cx * cz);
+    f32 sxsz = (sx * sz);
+    f32 cxcz = (cx * cz);
     dest[1][1] = ((sxsz * sy) + cxcz);
     dest[1][2] = (sx * cy);
     dest[2][0] = ((cxcz * sy) + sxsz);
@@ -447,19 +173,19 @@ UNUSED void mtxf_rotate_xyz_and_translate(Mat4 dest, Vec3f trans, Vec3s rot) {
 /// Build a matrix that rotates around the z axis, then the x axis, then the y axis, and then translates and multiplies.
 void mtxf_rotate_zxy_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Mat4 src) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx = sins(rot[0]);
-    register f32 cx = coss(rot[0]);
-    register f32 sy = sins(rot[1]);
-    register f32 cy = coss(rot[1]);
-    register f32 sz = sins(rot[2]);
-    register f32 cz = coss(rot[2]);
+    f32 sx = sins(rot[0]);
+    f32 cx = coss(rot[0]);
+    f32 sy = sins(rot[1]);
+    f32 cy = coss(rot[1]);
+    f32 sz = sins(rot[2]);
+    f32 cz = coss(rot[2]);
     Vec3f entry;
-    register f32 sysz = (sy * sz);
-    register f32 cycz = (cy * cz);
+    f32 sysz = (sy * sz);
+    f32 cycz = (cy * cz);
     entry[0] = ((sysz * sx) + cycz);
     entry[1] = (sz * cx);
-    register f32 cysz = (cy * sz);
-    register f32 sycz = (sy * cz);
+    f32 cysz = (cy * sz);
+    f32 sycz = (sy * cz);
     entry[2] = ((cysz * sx) - sycz);
     linear_mtxf_mul_vec3f(src, dest[0], entry);
     entry[0] = ((sycz * sx) - cysz);
@@ -478,22 +204,22 @@ void mtxf_rotate_zxy_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Ma
 /// Build a matrix that rotates around the x axis, then the y axis, then the z axis, and then translates and multiplies.
 void mtxf_rotate_xyz_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Mat4 src) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 sx = sins(rot[0]);
-    register f32 cx = coss(rot[0]);
-    register f32 sy = sins(rot[1]);
-    register f32 cy = coss(rot[1]);
-    register f32 sz = sins(rot[2]);
-    register f32 cz = coss(rot[2]);
+    f32 sx = sins(rot[0]);
+    f32 cx = coss(rot[0]);
+    f32 sy = sins(rot[1]);
+    f32 cy = coss(rot[1]);
+    f32 sz = sins(rot[2]);
+    f32 cz = coss(rot[2]);
     Vec3f entry;
     entry[0] = (cy * cz);
     entry[1] = (cy * sz);
     entry[2] = -sy;
     linear_mtxf_mul_vec3f(src, dest[0], entry);
-    register f32 sxcz = (sx * cz);
-    register f32 cxsz = (cx * sz);
+    f32 sxcz = (sx * cz);
+    f32 cxsz = (cx * sz);
     entry[0] = ((sxcz * sy) - cxsz);
-    register f32 sxsz = (sx * sz);
-    register f32 cxcz = (cx * cz);
+    f32 sxsz = (sx * sz);
+    f32 cxcz = (cx * cz);
     entry[1] = ((sxsz * sy) + cxcz);
     entry[2] = (sx * cy);
     linear_mtxf_mul_vec3f(src, dest[1], entry);
@@ -515,9 +241,9 @@ void mtxf_rotate_xyz_and_translate_and_mul(Vec3s rot, Vec3f trans, Mat4 dest, Ma
 void mtxf_lookat(Mat4 mtx, Vec3f from, Vec3f to, s16 roll) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
     Vec3f colX, colY, colZ;
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    register f32 invLength = sqrtf(sqr(dx) + sqr(dz));
+    f32 dx = (to[0] - from[0]);
+    f32 dz = (to[2] - from[2]);
+    f32 invLength = sqrtf(sqr(dx) + sqr(dz));
     invLength = -(1.0f / MAX(invLength, NEAR_ZERO));
     dx *= invLength;
     dz *= invLength;
@@ -555,10 +281,10 @@ void mtxf_lookat(Mat4 mtx, Vec3f from, Vec3f to, s16 roll) {
  */
 void mtxf_billboard(Mat4 dest, Mat4 mtx, Vec3f position, Vec3f scale, s16 angle) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register s32 i;
-    register f32 sx = scale[0];
-    register f32 sy = scale[1];
-    register f32 sz = scale[2];
+    s32 i;
+    f32 sx = scale[0];
+    f32 sy = scale[1];
+    f32 sz = scale[2];
     Mat4* cameraMat = &gCameraTransform;
     for (i = 0; i < 3; i++) {
         for (int j = 0; j < 3; j++) {
@@ -676,7 +402,7 @@ void mtxf_align_terrain_triangle(Mat4 mtx, Vec3f pos, s16 yaw, f32 radius) {
     if ((point1[1] - pos[1]) < minY) point1[1] = pos[1];
     if ((point2[1] - pos[1]) < minY) point2[1] = pos[1];
 
-    f32 avgY = average_3(point0[1], point1[1], point2[1]);
+    f32 avgY = (point0[1] + point1[1] + point2[1]) / 3.f;
 
     vec3f_set(forward, sins(yaw), 0.0f, coss(yaw));
     find_vector_perpendicular_to_plane(yColumn, point0, point1, point2);
@@ -707,10 +433,10 @@ void mtxf_align_terrain_triangle(Mat4 mtx, Vec3f pos, s16 yaw, f32 radius) {
 void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
     Vec3f entry;
-    register f32 *temp  = (f32 *)a;
-    register f32 *temp2 = (f32 *)dest;
-    register f32 *temp3;
-    register s32 i;
+    f32 *temp  = (f32 *)a;
+    f32 *temp2 = (f32 *)dest;
+    f32 *temp3;
+    s32 i;
     for (i = 0; i < 16; i++) {
         vec3_copy(entry, temp);
         for (temp3 = (f32 *)b; (i & 3) != 3; i++) {
@@ -731,11 +457,11 @@ void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b) {
 /**
  * Set matrix 'dest' to 'mtx' scaled by vector s
  */
-void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, register Vec3f s) {
+void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, Vec3f s) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 *temp  = (f32 *)dest;
-    register f32 *temp2 = (f32 *)mtx;
-    register s32 i;
+    f32 *temp  = (f32 *)dest;
+    f32 *temp2 = (f32 *)mtx;
+    s32 i;
 
     for (i = 0; i < 4; i++) {
         temp[ 0] = temp2[ 0] * s[0];
@@ -747,29 +473,6 @@ void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, register Vec3f s) {
     }
 }
 
-/**
- * Multiply a vector with a transformation matrix, which applies the transformation
- * to the point. Note that the bottom row is assumed to be [0, 0, 0, 1], which is
- * true for transformation matrices if the translation has a w component of 1.
- */
-UNUSED void mtxf_mul_vec3s(Mat4 mtx, Vec3s b) {
-    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register f32 x = b[0];
-    register f32 y = b[1];
-    register f32 z = b[2];
-    register f32 *temp2 = (f32 *)mtx;
-    register s32 i;
-    register s16 *c = b;
-    for (i = 0; i < 3; i++) {
-        c[0] = ((x * temp2[ 0])
-              + (y * temp2[ 4])
-              + (z * temp2[ 8])
-              +      temp2[12]);
-        c++;
-        temp2++;
-    }
-}
-
 /**
  * Set 'mtx' to a transformation matrix that rotates around the z axis.
  */
@@ -778,10 +481,10 @@ UNUSED void mtxf_mul_vec3s(Mat4 mtx, Vec3s b) {
     ((s16 *) mtx)[a + 16] = (((s32) b) & 0xFFFF);
 void mtxf_rotate_xy(Mtx *mtx, s16 angle) {
     PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);
-    register s32 i = (coss(angle) * 0x10000);
-    register s32 j = (sins(angle) * 0x10000);
-    register f32 *temp = (f32 *)mtx;
-    register s32 k;
+    s32 i = (coss(angle) * 0x10000);
+    s32 j = (sins(angle) * 0x10000);
+    f32 *temp = (f32 *)mtx;
+    s32 k;
     for (k = 0; k < 16; k++) {
         *temp = 0;
         temp++;
@@ -794,168 +497,13 @@ void mtxf_rotate_xy(Mtx *mtx, s16 angle) {
     ((s16 *) mtx)[15] = 1;
 }
 
-/**
- * Take the vector starting at 'from' pointed at 'to' an retrieve the length
- * of that vector, as well as the yaw and pitch angles.
- * Basically it converts the direction to spherical coordinates.
- */
-
-/// Finds the horizontal distance between two vectors.
-void vec3f_get_lateral_dist(Vec3f from, Vec3f to, f32 *lateralDist) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *lateralDist = sqrtf(sqr(dx) + sqr(dz));
-}
-
-/// Finds the squared horizontal distance between two vectors. Avoids a sqrtf call.
-void vec3f_get_lateral_dist_squared(Vec3f from, Vec3f to, f32 *lateralDist) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *lateralDist = (sqr(dx) + sqr(dz));
-}
-
-/// Finds the distance between two vectors.
-void vec3f_get_dist(Vec3f from, Vec3f to, f32 *dist) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *dist = vec3_mag(d);
-}
-
-/// Finds the squared distance between two vectors. Avoids a sqrtf call.
-void vec3f_get_dist_squared(Vec3f from, Vec3f to, f32 *dist) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *dist = vec3_sumsq(d);
-}
-
-/// Finds the distance and yaw etween two vectors.
-void vec3f_get_dist_and_yaw(Vec3f from, Vec3f to, f32 *dist, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *dist = vec3_mag(d);
-    *yaw = atan2s(d[2], d[0]);
-}
-
-/// Finds the pitch between two vectors.
-void vec3f_get_pitch(Vec3f from, Vec3f to, s16 *pitch) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *pitch = atan2s(sqrtf(sqr(d[0]) + sqr(d[2])), d[1]);
-}
-
-/// Finds the yaw between two vectors.
-void vec3f_get_yaw(Vec3f from, Vec3f to, s16 *yaw) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *yaw = atan2s(dz, dx);
-}
-
-/// Finds the pitch and yaw between two vectors.
-void vec3f_get_angle(Vec3f from, Vec3f to, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *pitch = atan2s(sqrtf(sqr(d[0]) + sqr(d[2])), d[1]);
-    *yaw   = atan2s(d[2], d[0]);
-}
-
-/// Finds the horizontal distance and pitch between two vectors.
-void vec3f_get_lateral_dist_and_pitch(Vec3f from, Vec3f to, f32 *lateralDist, s16 *pitch) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *lateralDist = sqrtf(sqr(d[0]) + sqr(d[2]));
-    *pitch       = atan2s(*lateralDist, d[1]);
-}
-
-/// Finds the horizontal distance and yaw between two vectors.
-void vec3f_get_lateral_dist_and_yaw(Vec3f from, Vec3f to, f32 *lateralDist, s16 *yaw) {
-    register f32 dx = (to[0] - from[0]);
-    register f32 dz = (to[2] - from[2]);
-    *lateralDist = sqrtf(sqr(dx) + sqr(dz));
-    *yaw         = atan2s(dz, dx);
-}
-
-/// Finds the horizontal distance and angles between two vectors.
-void vec3f_get_lateral_dist_and_angle(Vec3f from, Vec3f to, f32 *lateralDist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    *lateralDist = sqrtf(sqr(d[0]) + sqr(d[2]));
-    *pitch       = atan2s(*lateralDist, d[1]);
-    *yaw         = atan2s(d[2], d[0]);
-}
-
-/// Finds the distance and angles between two vectors.
-void vec3f_get_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *pitch          = atan2s(sqrtf(xz), d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-void vec3s_get_dist_and_angle(Vec3s from, Vec3s to, s16 *dist, s16 *pitch, s16 *yaw) {
-    Vec3s d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *pitch          = atan2s(sqrtf(xz), d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-void vec3f_to_vec3s_get_dist_and_angle(Vec3f from, Vec3s to, f32 *dist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *pitch          = atan2s(sqrtf(xz), d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-
-/// Finds the distance, horizontal distance, and angles between two vectors.
-void vec3f_get_dist_and_lateral_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, f32 *lateralDist, s16 *pitch, s16 *yaw) {
-    Vec3f d;
-    vec3_diff(d, to, from);
-    register f32 xz = (sqr(d[0]) + sqr(d[2]));
-    *dist           = sqrtf(xz + sqr(d[1]));
-    *lateralDist    = sqrtf(xz);
-    *pitch          = atan2s(*lateralDist, d[1]);
-    *yaw            = atan2s(d[2], d[0]);
-}
-
-/**
- * Construct the 'to' point which is distance 'dist' away from the 'from' position,
- * and has the angles pitch and yaw.
- */
-#define vec3_set_dist_and_angle(from, to, dist, pitch, yaw) { \
-    register f32 dcos = (dist * coss(pitch)); \
-    to[0] = (from[0] + (dcos * sins(yaw  ))); \
-    to[1] = (from[1] + (dist * sins(pitch))); \
-    to[2] = (from[2] + (dcos * coss(yaw  ))); \
-}
-void vec3f_set_dist_and_angle(Vec3f from, Vec3f to, f32 dist, s16 pitch, s16 yaw) {
-    vec3_set_dist_and_angle(from, to, dist, pitch, yaw);
-}
-void vec3s_set_dist_and_angle(Vec3s from, Vec3s to, s16 dist, s16 pitch, s16 yaw) {
-    vec3_set_dist_and_angle(from, to, dist, pitch, yaw);
-}
-
 /**
  * Similar to approach_s32, but converts to s16 and allows for overflow between 32767 and -32768
  */
-s16 approach_angle(s16 current, s16 target, s16 inc) {
-    s32 dist = (s16)(target - current);
-    if (dist < 0) {
-        dist += inc;
-        if (dist > 0) dist = 0;
-    } else if (dist > 0) {
-        dist -= inc;
-        if (dist < 0) dist = 0;
-    }
-    return (target - dist);
-}
-Bool32 approach_angle_bool(s16 *current, s16 target, s16 inc) {
-    *current = approach_angle(*current, target, inc);
+Bool32 approach_s16_bool(s16 *current, s16 target, s16 inc, s16 dec) {
+    *current = approach_s16(*current, target, inc, dec);
     return (*current != target);
 }
-
 s16 approach_s16(s16 current, s16 target, s16 inc, s16 dec) {
     s16 dist = (target - current);
     if (dist >= 0) { // target >= current
@@ -965,15 +513,15 @@ s16 approach_s16(s16 current, s16 target, s16 inc, s16 dec) {
     }
     return current;
 }
-Bool32 approach_s16_bool(s16 *current, s16 target, s16 inc, s16 dec) {
-    *current = approach_s16(*current, target, inc, dec);
-    return (*current != target);
-}
 
 /**
  * Return the value 'current' after it tries to approach target, going up at
  * most 'inc' and going down at most 'dec'.
  */
+Bool32 approach_s32_bool(s32 *current, s32 target, s32 inc, s32 dec) {
+    *current = approach_s32(*current, target, inc, dec);
+    return (*current != target);
+}
 s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec) {
     s32 dist = (target - current);
     if (dist > 0) { // current < target
@@ -983,15 +531,15 @@ s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec) {
     }
     return current;
 }
-Bool32 approach_s32_bool(s32 *current, s32 target, s32 inc, s32 dec) {
-    *current = approach_s32(*current, target, inc, dec);
-    return (*current != target);
-}
 
 /**
  * Return the value 'current' after it tries to approach target, going up at
  * most 'inc' and going down at most 'dec'.
  */
+Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec) {
+    *current = approach_f32(*current, target, inc, dec);
+    return !(*current == target);
+}
 f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec) {
     f32 dist = (target - current);
     if (dist >= 0.0f) { // target >= current
@@ -1001,10 +549,6 @@ f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec) {
     }
     return current;
 }
-Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec) {
-    *current = approach_f32(*current, target, inc, dec);
-    return !(*current == target);
-}
 
 s32 approach_f32_signed(f32 *current, f32 target, f32 inc) {
     *current += inc;
@@ -1077,7 +621,7 @@ s16 approach_s16_asymptotic(s16 current, s16 target, s16 divisor) {
 }
 
 s16 abs_angle_diff(s16 a0, s16 a1) {
-    register s16 diff = (a1 - a0);
+    s16 diff = (a1 - a0);
     if (diff == -0x8000) return 0x7FFF;
     return abss(diff);
 }
@@ -1334,7 +878,7 @@ s32 ray_surface_intersect(Vec3f orig, Vec3f dir, f32 dir_length, struct Surface
     // Successful contact.
     // Make 'add_dir' into 'dir' scaled by 'length'.
     Vec3f add_dir;
-    vec3_prod_val(add_dir, dir, *length);
+    vec3_scale_dest(add_dir, dir, *length);
     // Make 'hit_pos' into the sum of 'orig' and 'add_dir'.
     vec3f_sum(hit_pos, orig, add_dir);
     return TRUE;
diff --git a/src/engine/math_util.h b/src/engine/math_util.h
index 7a80952b3..5554791af 100644
--- a/src/engine/math_util.h
+++ b/src/engine/math_util.h
@@ -4,6 +4,7 @@
 #include <PR/ultratypes.h>
 
 #include "types.h"
+#include "game/puppyprint.h"
 
 #define NEAR_ZERO   __FLT_EPSILON__
 #define NEAR_ONE    (1.0f - __FLT_EPSILON__)
@@ -22,26 +23,18 @@ extern Vec3i gVec3iZero;
 extern Vec3f gVec3fOne;
 extern Vec3s gVec3sOne;
 
+
+// Angles
+
 /**
  * Converts an angle in degrees to sm64's s16 angle units. For example, DEGREES(90) == 0x4000
  * This should be used mainly to make camera code clearer at first glance.
  */
 // #define DEGREES(x) ((x) * 0x10000 / 360)
 #define DEGREES(x) ((x) * 0x2000 / 45)
-// #define DEGREES(x) (((x) << 13) / 45)
 
-/*
- * The sine and cosine tables overlap, but "#define gCosineTable (gSineTable +
- * 0x400)" doesn't give expected codegen; gSineTable and gCosineTable need to
- * be different symbols for code to match. Most likely the tables were placed
- * adjacent to each other, and gSineTable cut short, such that reads overflow
- * into gCosineTable.
- *
- * These kinds of out of bounds reads are undefined behavior, and break on
- * e.g. GCC (which doesn't place the tables next to each other, and probably
- * exploits array sizes for range analysis-based optimizations as well).
- * Thus, for non-IDO compilers we use the standard-compliant version.
- */
+// Trig functions
+
 extern f32 gSineTable[];
 #define gCosineTable (gSineTable + 0x400)
 
@@ -51,6 +44,8 @@ extern f32 gSineTable[];
 #define cots(x) (coss(x) / sins(x))
 #define atans(x) gArctanTable[(s32)((((x) * 1024) + 0.5f))] // is this correct? used for atan2_lookup
 
+// Angle conversion macros
+
 #define RAD_PER_DEG (M_PI / 180.0f)
 #define DEG_PER_RAD (180.0f / M_PI)
 
@@ -61,90 +56,121 @@ extern f32 gSineTable[];
 #define degrees_to_radians(x) (f32)( (f32)(x) * RAD_PER_DEG       )
 #define radians_to_degrees(x) (f32)( (f32)(x) * DEG_PER_RAD       )
 
-#define signum_positive(x) ((x < 0) ? -1 : 1)
 
-// #define min(a, b) MIN((a), (b)) // ((a) < (b) ? (a) : (b))
-// #define max(a, b) MAX((a), (b)) // ((a) > (b) ? (a) : (b))
-#define CLAMP(x, low, high)  (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
+// Various basic helper macros
 
-// from limits.h
-#define S8_MAX __SCHAR_MAX__
-#define S8_MIN (-S8_MAX - 1)
-#define U8_MAX (S8_MAX * 2 + 1)
-#define S16_MAX __SHRT_MAX__
-#define S16_MIN (-S16_MAX - 1)
-#define U16_MAX (S16_MAX * 2 + 1)
-#define S32_MAX __INT_MAX__
-#define S32_MIN (-S32_MAX - 1)
-#define U32_MAX (S32_MAX * 2U + 1U)
-#define S64_MAX __LONG_LONG_MAX__
-#define S64_MIN (-S64_MAX - 1LL)
-#define U64_MAX (S64_MAX * 2ULL + 1ULL)
-#define F32_MAX __FLT_MAX__
-#define F32_MIN __FLT_MIN__
-#define F64_MAX __DBL_MAX__
-#define F64_MIN __DBL_MIN__
+// Get the square of a number
+#define sqr(x) ({         \
+    __auto_type _x = (x); \
+    _x * _x; })
 
-#define CLAMP_U8( x)        CLAMP((x),     0x0,  U8_MAX)
+// Get the sign of a number
+#define signum_positive(x) (((x) < 0) ? -1 : 1)
+
+// Absolute value
+#define ABS(x) ({         \
+    __auto_type _x = (x); \
+    _x > 0 ? _x : -_x; })
+#define absi ABS
+#define abss ABS
+
+// Absolute value of a float (faster than using the above macro)
+ALWAYS_INLINE f32 absf(f32 in) {
+    f32 out;
+    __asm__("abs.s %0,%1" : "=f" (out) : "f" (in));
+    return out;
+}
+
+// Get the minimum / maximum of a set of numbers
+#undef MIN
+#define MIN(a, b) ({      \
+    __auto_type _a = (a); \
+    __auto_type _b = (b); \
+    _a < _b ? _a : _b; })
+
+#undef MAX
+#define MAX(a, b) ({      \
+    __auto_type _a = (a); \
+    __auto_type _b = (b); \
+    _a > _b ? _a : _b; })
+
+#define min_3(a, b, c) MIN(MIN(a, b), c)
+
+#define max_3(a, b, c) MAX(MAX(a, b), c)
+
+#define min_3f min_3
+#define min_3i min_3
+#define min_3s min_3
+
+#define max_3f max_3
+#define max_3i max_3
+#define max_3s max_3
+
+void min_max_3f(f32 a, f32 b, f32 c, f32 *min, f32 *max);
+void min_max_3i(s32 a, s32 b, s32 c, s32 *min, s32 *max);
+void min_max_3s(s16 a, s16 b, s16 c, s16 *min, s16 *max);
+
+// From Wiseguy
+// Round a float to the nearest integer
+ALWAYS_INLINE s32 roundf(f32 in) {
+    f32 tmp;
+    s32 out;
+    __asm__("round.w.s %0,%1" : "=f" (tmp) : "f" (in ));
+    __asm__("mfc1      %0,%1" : "=r" (out) : "f" (tmp));
+    return out;
+}
+
+#define round_float roundf
+
+#define FLT_IS_NONZERO(x) (absf(x) > NEAR_ZERO)
+
+
+// Integer limits and clamping
+
+#define S8_MAX   127
+#define S8_MIN  -128
+#define U8_MAX   255
+#define S16_MAX  32767
+#define S16_MIN -32768
+#define U16_MAX  65535
+#define S32_MAX  2147483647
+#define S32_MIN -2147483648
+#define U32_MAX  4294967295
+
+// Clamp a value inbetween a range
+#define CLAMP(x, low, high)  MIN(MAX((x), (low)), (high))
+
+// Clamp a value to the range of a specific data type
+#define CLAMP_U8( x)        CLAMP((x),       0,  U8_MAX)
 #define CLAMP_S8( x)        CLAMP((x),  S8_MIN,  S8_MAX)
-#define CLAMP_U16(x)        CLAMP((x),     0x0, U16_MAX)
+#define CLAMP_U16(x)        CLAMP((x),       0, U16_MAX)
 #define CLAMP_S16(x)        CLAMP((x), S16_MIN, S16_MAX)
-#define CLAMP_U32(x)        CLAMP((x),     0x0, U32_MAX)
-#define CLAMP_S32(x)        CLAMP((x), S32_MIN, S32_MAX)
-#define CLAMP_U64(x)        CLAMP((x),     0x0, U64_MAX)
-#define CLAMP_S64(x)        CLAMP((x), S64_MIN, S64_MAX)
-#define CLAMP_F32(x)        CLAMP((x), F32_MIN, F32_MAX)
-#define CLAMP_F64(x)        CLAMP((x), F64_MIN, F64_MAX)
 
-#define SWAP(a, b)          { ((a) ^= (b)); ((b) ^= (a)); ((a) ^= (b)); }
 
-#define sqr(x)              (    (x) * (x))
-#define cube(x)             ( sqr(x) * (x))
-#define quad(x)             (cube(x) * (x))
-
-#define average_2(a, b      )   (((a) + (b)            ) / 2.0f)
-#define average_3(a, b, c   )   (((a) + (b) + (c)      ) / 3.0f)
-#define average_4(a, b, c, d)   (((a) + (b) + (c) + (d)) / 4.0f)
+// Vector operations
 
+// Set all elements of a vector to the same constant
 #define vec2_same(v, s)     (((v)[0]) = ((v)[1])                       = (s))
 #define vec3_same(v, s)     (((v)[0]) = ((v)[1]) = ((v)[2])            = (s))
 #define vec4_same(v, s)     (((v)[0]) = ((v)[1]) = ((v)[2]) = ((v)[3]) = (s))
 
+// Set all elements of a vector to zero
 #define vec2_zero(v)        (vec2_same((v), 0))
 #define vec3_zero(v)        (vec3_same((v), 0))
 #define vec4_zero(v)        (vec4_same((v), 0))
 
-#define vec2_c(v)           (   (v)[0] + (v)[1])
-#define vec3_c(v)           (vec2_c(v) + (v)[2])
-#define vec4_c(v)           (vec3_c(v) + (v)[3])
-
-#define vec2_average(v)     (vec2_c(v) / 2.0f)
-#define vec3_average(v)     (vec3_c(v) / 3.0f)
-#define vec4_average(v)     (vec4_c(v) / 4.0f)
-
-#define vec2_sumsq(v)       (  sqr((v)[0]) + sqr((v)[1]))
-#define vec3_sumsq(v)       (vec2_sumsq(v) + sqr((v)[2]))
-#define vec4_sumsq(v)       (vec3_sumsq(v) + sqr((v)[3]))
+// Sum of the squares of all elements of a vector
+#define vec2_sumsq(v)       (sqr((v)[0]) + sqr((v)[1]))
+#define vec3_sumsq(v)       (sqr((v)[0]) + sqr((v)[1]) + sqr((v)[2]))
+#define vec4_sumsq(v)       (sqr((v)[0]) + sqr((v)[1]) + sqr((v)[2]) + sqr((v)[3]))
 
+// Calculate the magnitude of a vector
 #define vec2_mag(v)         (sqrtf(vec2_sumsq(v)))
 #define vec3_mag(v)         (sqrtf(vec3_sumsq(v)))
 #define vec4_mag(v)         (sqrtf(vec4_sumsq(v)))
 
-#define vec3_yaw(from, to)  (atan2s(((to)[2] - (from)[2]), ((to)[0] - (from)[0])))
-
-#define vec2_dot(a, b)       (((a)[0] * (b)[0]) + ((a)[1] * (b)[1]))
-#define vec3_dot(a, b)      (vec2_dot((a), (b)) + ((a)[2] * (b)[2]))
-#define vec4_dot(a, b)      (vec3_dot((a), (b)) + ((a)[3] * (b)[3]))
-
-/// Make vector 'dest' the cross product of vectors a and b.
-#define vec3_cross(dst, a, b) {                         \
-    (dst)[0] = (((a)[1] * (b)[2]) - ((a)[2] * (b)[1])); \
-    (dst)[1] = (((a)[2] * (b)[0]) - ((a)[0] * (b)[2])); \
-    (dst)[2] = (((a)[0] * (b)[1]) - ((a)[1] * (b)[0])); \
-}
-
 /**
- * Set 'dest' the normal vector of a triangle with vertices a, b and c.
+ * Set 'dest' to the normal vector of a triangle with vertices a, b and c.
  * Equivalent to cross((c-b), (c-a)).
  */
 #define find_vector_perpendicular_to_plane(dest, a, b, c) {                                     \
@@ -153,247 +179,413 @@ extern f32 gSineTable[];
     (dest)[2] = ((b)[0] - (a)[0]) * ((c)[1] - (b)[1]) - ((c)[0] - (b)[0]) * ((b)[1] - (a)[1]);  \
 }
 
-/**
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | ? ? ? 0 |
- * | 0 0 0 1 |
- * i.e. a matrix representing a linear transformation over 3 space.
- */
-// Multiply a vector by a matrix of the form
-#define linear_mtxf_mul_vec3(mtx, dstV, srcV) {                                                     \
-    (dstV)[0] = (((mtx)[0][0] * (srcV)[0]) + ((mtx)[1][0] * (srcV)[1]) + ((mtx)[2][0] * (srcV)[2]));\
-    (dstV)[1] = (((mtx)[0][1] * (srcV)[0]) + ((mtx)[1][1] * (srcV)[1]) + ((mtx)[2][1] * (srcV)[2]));\
-    (dstV)[2] = (((mtx)[0][2] * (srcV)[0]) + ((mtx)[1][2] * (srcV)[1]) + ((mtx)[2][2] * (srcV)[2]));\
-}
-
-#define linear_mtxf_mul_vec3_and_translate(mtx, dstV, srcV) {   \
-    linear_mtxf_mul_vec3((mtx), (dstV), (srcV));                \
-    vec3_add((dstV), (mtx)[3]);                                 \
-}
-
-// Multiply a vector by the transpose of a matrix of the form
-#define linear_mtxf_transpose_mul_vec3(mtx, dstV, srcV) {   \
-    (dstV)[0] = vec3_dot((mtx)[0], (srcV));                 \
-    (dstV)[1] = vec3_dot((mtx)[1], (srcV));                 \
-    (dstV)[2] = vec3_dot((mtx)[2], (srcV));                 \
-}
-
+// Set the elements of vector 'dst' to the given values
 #define vec2_set(dst, x, y) {           \
     (dst)[0] = (x);                     \
     (dst)[1] = (y);                     \
 }
 #define vec3_set(dst, x, y, z) {        \
-    vec2_set((dst), (x), (y));          \
+    (dst)[0] = (x);                     \
+    (dst)[1] = (y);                     \
     (dst)[2] = (z);                     \
 }
 #define vec4_set(dst, x, y, z, w) {     \
-    vec3_set((dst), (x), (y), (z));     \
+    (dst)[0] = (x);                     \
+    (dst)[1] = (y);                     \
+    (dst)[2] = (z);                     \
     (dst)[3] = (w);                     \
 }
 
+#define vec3f_set vec3_set
+#define vec3i_set vec3_set
+#define vec3s_set vec3_set
+
+// Copy vector 'src' to vector 'dst'
 #define vec2_copy(dst, src) {           \
-    (dst)[0] = (src)[0];                \
-    (dst)[1] = (src)[1];                \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
 }
 #define vec3_copy(dst, src) {           \
-    vec2_copy((dst), (src));            \
-    (dst)[2] = (src)[2];                \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1];          \
+    __auto_type _z = (src)[2];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
 }
 #define vec4_copy(dst, src) {           \
-    vec3_copy((dst), (src));            \
-    (dst)[3] = (src)[3];                \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1];          \
+    __auto_type _z = (src)[2];          \
+    __auto_type _w = (src)[3];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
+    (dst)[3] = _w;                      \
 }
 
+#define vec3f_copy vec3_copy
+#define vec3i_copy vec3_copy
+#define vec3s_copy vec3_copy
+#define vec3s_to_vec3i vec3_copy
+#define vec3s_to_vec3f vec3_copy
+#define vec3i_to_vec3s vec3_copy
+#define vec3i_to_vec3f vec3_copy
+#define vec3f_to_vec3s vec3_copy
+#define vec3f_to_vec3i vec3_copy
+
+#define surface_normal_to_vec3f(dst, surf) vec3f_copy((dst), &((surf)->normal.x))
+
+// Copy vector 'src' to vector 'dst' and add a scalar to the y component
 #define vec3_copy_y_off(dst, src, y) {  \
-    (dst)[0] =  (src)[0];               \
-    (dst)[1] = ((src)[1] + (y));        \
-    (dst)[2] =  (src)[2];               \
+    __auto_type _x = (src)[0];          \
+    __auto_type _y = (src)[1] + (y);    \
+    __auto_type _z = (src)[2];          \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
 }
 
-#define vec2_copy_roundf(dst, src) {    \
-    (dst)[0] = roundf((src)[0]);        \
-    (dst)[1] = roundf((src)[1]);        \
+#define vec3f_copy_y_off vec3_copy_y_off
+
+// Set vector 'dst' to the sum of vectors 'src1' and 'src2'
+#define vec2_sum(dst, src1, src2) {         \
+    __auto_type _x = (src1)[0] + (src2)[0]; \
+    __auto_type _y = (src1)[1] + (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_copy_roundf(dst, src) {    \
-    vec2_copy_roundf((dst), (src));     \
-    (dst)[2] = roundf((src)[2]);        \
+#define vec3_sum(dst, src1, src2) {         \
+    __auto_type _x = (src1)[0] + (src2)[0]; \
+    __auto_type _y = (src1)[1] + (src2)[1]; \
+    __auto_type _z = (src1)[2] + (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_copy_roundf(dst, src) {    \
-    vec3_copy_roundf((dst), (src));     \
-    (dst)[3] = roundf((src)[3]);        \
+#define vec4_sum(dst, src1, src2) {         \
+    __auto_type _x = (src1)[0] + (src2)[0]; \
+    __auto_type _y = (src1)[1] + (src2)[1]; \
+    __auto_type _z = (src1)[2] + (src2)[2]; \
+    __auto_type _w = (src1)[3] + (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
-#define vec2_copy_inverse(dst, src) {   \
-    (dst)[0] = (src)[1];                \
-    (dst)[1] = (src)[0];                \
-}
-#define vec3_copy_inverse(dst, src) {   \
-    (dst)[0] = (src)[2];                \
-    (dst)[1] = (src)[1];                \
-    (dst)[2] = (src)[0];                \
-}
-#define vec4_copy_inverse(dst, src) {   \
-    (dst)[0] = (src)[3];                \
-    (dst)[1] = (src)[2];                \
-    (dst)[2] = (src)[1];                \
-    (dst)[3] = (src)[0];                \
-}
-
-#define vec3_copy_offset_m1(dst, src) { \
-    (dst)[0] = (src)[1];                \
-    (dst)[1] = (src)[2];                \
-    (dst)[2] = (src)[0];                \
-}
-
-#define vec2_copy_negative(dst, src) {  \
-    (dst)[0] = -(src)[0];               \
-    (dst)[1] = -(src)[1];               \
-}
-#define vec3_copy_negative(dst, src) {  \
-    vec2_copy_negative((dst), (src));   \
-    (dst)[2] = -(src)[2];               \
-}
-#define vec4_copy_negative(dst, src) {  \
-    vec3_copy_negative((dst), (src));   \
-    (dst)[3] = -(src)[3];               \
-}
-
-#define vec2_sum(dst, src1, src2) {     \
-    (dst)[0] = ((src1)[0] + (src2)[0]); \
-    (dst)[1] = ((src1)[1] + (src2)[1]); \
-}
-#define vec3_sum(dst, src1, src2) {     \
-    vec2_sum((dst), (src1), (src2));    \
-    (dst)[2] = ((src1)[2] + (src2)[2]); \
-}
-#define vec4_sum(dst, src1, src2) {     \
-    vec3_sum((dst), (src1), (src2));    \
-    (dst)[3] = ((src1)[3] + (src2)[3]); \
-}
+#define vec3f_sum vec3_sum
+#define vec3i_sum vec3_sum
+#define vec3s_sum vec3_sum
 
+// Add the vector 'src' to vector 'dst'
 #define vec2_add(dst, src) vec2_sum((dst), (dst), (src))
 #define vec3_add(dst, src) vec3_sum((dst), (dst), (src))
 #define vec4_add(dst, src) vec4_sum((dst), (dst), (src))
 
-#define vec2_sum_val(dst, src, x) {     \
-    (dst)[0] = ((src)[0] + (x));        \
-    (dst)[1] = ((src)[1] + (x));        \
+#define vec3f_add vec3_add
+#define vec3i_add vec3_add
+#define vec3s_add vec3_add
+
+// Set vector 'dst' to the difference of vectors 'src1' and 'src2'
+#define vec2_diff(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] - (src2)[0]; \
+    __auto_type _y = (src1)[1] - (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_sum_val(dst, src, x) {     \
-    vec2_sum_val((dst), (src), (x));    \
-    (dst)[2] = ((src)[2] + (x));        \
+#define vec3_diff(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] - (src2)[0]; \
+    __auto_type _y = (src1)[1] - (src2)[1]; \
+    __auto_type _z = (src1)[2] - (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_sum_val(dst, src, x) {     \
-    vec3_sum_val((dst), (src), (x));    \
-    (dst)[3] = ((src)[2] + (x));        \
+#define vec4_diff(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] - (src2)[0]; \
+    __auto_type _y = (src1)[1] - (src2)[1]; \
+    __auto_type _z = (src1)[2] - (src2)[2]; \
+    __auto_type _w = (src1)[3] - (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
-#define vec2_add_val(dst, x) vec2_sum_val((dst), (dst), (x))
-#define vec3_add_val(dst, x) vec3_sum_val((dst), (dst), (x))
-#define vec4_add_val(dst, x) vec4_sum_val((dst), (dst), (x))
-
-#define vec2_diff(dst, src1, src2) {    \
-    (dst)[0] = ((src1)[0] - (src2)[0]); \
-    (dst)[1] = ((src1)[1] - (src2)[1]); \
-}
-#define vec3_diff(dst, src1, src2) {    \
-    vec2_diff((dst), (src1), (src2));   \
-    (dst)[2] = ((src1)[2] - (src2)[2]); \
-}
-#define vec4_diff(dst, src1, src2) {    \
-    vec3_diff((dst), (src1), (src2));   \
-    (dst)[3] = ((src1)[3] - (src2)[3]); \
-}
+#define vec3f_diff vec3_diff
+#define vec3i_diff vec3_diff
+#define vec3s_diff vec3_diff
 
+// Subtract the vector 'src' from vector 'dst'
 #define vec2_sub(dst, src) vec2_diff((dst), (dst), (src))
 #define vec3_sub(dst, src) vec3_diff((dst), (dst), (src))
 #define vec4_sub(dst, src) vec4_diff((dst), (dst), (src))
 
-#define vec2_diff_val(dst, src, x) {    \
-    (dst)[0] = ((src)[0] - (x));        \
-    (dst)[1] = ((src)[1] - (x));        \
+#define vec3f_sub vec3_sub
+#define vec3i_sub vec3_sub
+#define vec3s_sub vec3_sub
+
+// Set vector 'dst' to the product of vectors 'src1' and 'src2'
+#define vec2_prod(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] * (src2)[0]; \
+    __auto_type _y = (src1)[1] * (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_diff_val(dst, src, x) {    \
-    vec2_diff_val((dst), (src), (x));   \
-    (dst)[2] = ((src)[2] - (x));        \
+#define vec3_prod(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] * (src2)[0]; \
+    __auto_type _y = (src1)[1] * (src2)[1]; \
+    __auto_type _z = (src1)[2] * (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_diff_val(dst, src, x) {    \
-    vec3_diff_val((dst), (src), (x));   \
-    (dst)[3] = ((src)[3] - (x));        \
+#define vec4_prod(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] * (src2)[0]; \
+    __auto_type _y = (src1)[1] * (src2)[1]; \
+    __auto_type _z = (src1)[2] * (src2)[2]; \
+    __auto_type _w = (src1)[3] * (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
-#define vec2_sub_val(dst, x) vec2_diff_val((dst), (dst), (x))
-#define vec3_sub_val(dst, x) vec3_diff_val((dst), (dst), (x))
-#define vec4_sub_val(dst, x) vec4_diff_val((dst), (dst), (x))
-
-#define vec2_prod(dst, src1, src2) {    \
-    (dst)[0] = ((src1)[0] * (src2)[0]); \
-    (dst)[1] = ((src1)[1] * (src2)[1]); \
-}
-#define vec3_prod(dst, src1, src2) {    \
-    vec2_prod((dst), (src1), (src2));   \
-    (dst)[2] = ((src1)[2] * (src2)[2]); \
-}
-#define vec4_prod(dst, src1, src2) {    \
-    vec3_prod((dst), (src1), (src2));   \
-    (dst)[3] = ((src1)[3] * (src2)[3]); \
-}
+#define vec3f_prod vec3_prod
+#define vec3i_prod vec3_prod
+#define vec3s_prod vec3_prod
 
+// Multiply vector 'dst' by vector 'src'
 #define vec2_mul(dst, src) vec2_prod((dst), (dst), (src))
 #define vec3_mul(dst, src) vec3_prod((dst), (dst), (src))
 #define vec4_mul(dst, src) vec4_prod((dst), (dst), (src))
 
-#define vec2_prod_val(dst, src, x) {    \
-    (dst)[0] = ((src)[0] * (x));        \
-    (dst)[1] = ((src)[1] * (x));        \
+#define vec3f_mul vec3_mul
+#define vec3i_mul vec3_mul
+#define vec3s_mul vec3_mul
+
+// Set vector 'dst' to vector 'src' scaled by the scalar 'x'
+#define vec2_scale_dest(dst, src, x) {  \
+    __auto_type _x = (src)[0] * (x);    \
+    __auto_type _y = (src)[1] * (x);    \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
 }
-#define vec3_prod_val(dst, src, x) {    \
-    vec2_prod_val((dst), (src), (x));   \
-    (dst)[2] = ((src)[2] * (x));        \
+#define vec3_scale_dest(dst, src, x) {  \
+    __auto_type _x = (src)[0] * (x);    \
+    __auto_type _y = (src)[1] * (x);    \
+    __auto_type _z = (src)[2] * (x);    \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
 }
-#define vec4_prod_val(dst, src, x) {    \
-    vec3_prod_val((dst), (src), (x));   \
-    (dst)[3] = ((src)[3] * (x));        \
+#define vec4_scale_dest(dst, src, x) {  \
+    __auto_type _x = (src)[0] * (x);    \
+    __auto_type _y = (src)[1] * (x);    \
+    __auto_type _z = (src)[2] * (x);    \
+    __auto_type _w = (src)[3] * (x);    \
+    (dst)[0] = _x;                      \
+    (dst)[1] = _y;                      \
+    (dst)[2] = _z;                      \
+    (dst)[3] = _w;                      \
 }
 
-#define vec2_mul_val(dst, x) vec2_prod_val(dst, dst, x)
-#define vec3_mul_val(dst, x) vec3_prod_val(dst, dst, x)
-#define vec4_mul_val(dst, x) vec4_prod_val(dst, dst, x)
+// Scale vector 'dst' by the scalar 'x'
+#define vec2_scale(dst, x) vec2_scale_dest(dst, dst, x)
+#define vec3_scale(dst, x) vec3_scale_dest(dst, dst, x)
+#define vec4_scale(dst, x) vec4_scale_dest(dst, dst, x)
 
-#define vec2_quot(dst, src1, src2) {    \
-    (dst)[0] = ((src1)[0] / (src2)[0]); \
-    (dst)[1] = ((src1)[1] / (src2)[1]); \
+// Set vector 'dst' to vector 'src1' divided by vector 'src2'
+#define vec2_quot(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] / (src2)[0]; \
+    __auto_type _y = (src1)[1] / (src2)[1]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
 }
-#define vec3_quot(dst, src1, src2) {    \
-    vec2_quot((dst), (src1), (src2));   \
-    (dst)[2] = ((src1)[2] / (src2)[2]); \
+#define vec3_quot(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] / (src2)[0]; \
+    __auto_type _y = (src1)[1] / (src2)[1]; \
+    __auto_type _z = (src1)[2] / (src2)[2]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
 }
-#define vec4_quot(dst, src1, src2) {    \
-    vec3_quot((dst), (src1), (src2));   \
-    (dst)[3] = ((src1)[3] / (src2)[3]); \
+#define vec4_quot(dst, src1, src2) {        \
+    __auto_type _x = (src1)[0] / (src2)[0]; \
+    __auto_type _y = (src1)[1] / (src2)[1]; \
+    __auto_type _z = (src1)[2] / (src2)[2]; \
+    __auto_type _w = (src1)[3] / (src2)[3]; \
+    (dst)[0] = _x;                          \
+    (dst)[1] = _y;                          \
+    (dst)[2] = _z;                          \
+    (dst)[3] = _w;                          \
 }
 
+#define vec3f_quot vec3_quot
+#define vec3i_quot vec3_quot
+#define vec3s_quot vec3_quot
+
+// Divide vector 'dst' by vector 'src'
 #define vec2_div(dst, src) vec2_quot((dst), (dst), (src))
 #define vec3_div(dst, src) vec3_quot((dst), (dst), (src))
 #define vec4_div(dst, src) vec4_quot((dst), (dst), (src))
 
-#define vec2_quot_val(dst, src, x) {    \
-    (dst)[0] = ((src)[0] / (x));        \
-    (dst)[1] = ((src)[1] / (x));        \
-}
-#define vec3_quot_val(dst, src, x) {    \
-    vec2_quot_val((dst), (src), (x));   \
-    (dst)[2] = ((src)[2] / (x));        \
-}
-#define vec4_quot_val(dst, src, x) {    \
-    vec3_quot_val((dst), (src), (x));   \
-    (dst)[3] = ((src)[3] / (x));        \
+#define vec3f_div vec3_div
+#define vec3i_div vec3_div
+#define vec3s_div vec3_div
+
+// The yaw between two points in 3D space
+#define vec3_yaw(from, to)  (atan2s(((to)[2] - (from)[2]), ((to)[0] - (from)[0])))
+
+// Calculate the dot product of two vectors
+#define vec2_dot(a, b)       (((a)[0] * (b)[0]) + ((a)[1] * (b)[1]))
+#define vec3_dot(a, b)      (vec2_dot((a), (b)) + ((a)[2] * (b)[2]))
+#define vec4_dot(a, b)      (vec3_dot((a), (b)) + ((a)[3] * (b)[3]))
+
+#define vec3f_dot vec3_dot
+
+// Make vector 'dest' the cross product of vectors a and b.
+#define vec3_cross(dst, a, b) {                             \
+    __auto_type _x = ((a)[1] * (b)[2]) - ((a)[2] * (b)[1]); \
+    __auto_type _y = ((a)[2] * (b)[0]) - ((a)[0] * (b)[2]); \
+    __auto_type _z = ((a)[0] * (b)[1]) - ((a)[1] * (b)[0]); \
+    (dst)[0] = _x;                                          \
+    (dst)[1] = _y;                                          \
+    (dst)[2] = _z;                                          \
 }
 
-#define vec2_div_val(dst, x) vec2_quot_val((dst), (dst), (x))
-#define vec3_div_val(dst, x) vec3_quot_val((dst), (dst), (x))
-#define vec4_div_val(dst, x) vec4_quot_val((dst), (dst), (x))
+#define vec3f_cross vec3_cross
+
+// Scale vector 'v' so it has length 1
+#define vec3_normalize(v) {                       \
+    f32 _v_invmag = vec3_mag((v));                 \
+    _v_invmag = (1.0f / MAX(_v_invmag, NEAR_ZERO)); \
+    vec3_scale((v), _v_invmag);                    \
+}
+
+#define vec3f_normalize vec3_normalize
+
+// If the magnitude of vector 'v' is greater than 'max', scale it down to 'max'
+#define vec3_set_max_dist(v, max) { \
+    f32 _v_mag = vec3_mag(v);        \
+    f32 _max = max;                 \
+    _v_mag = MAX(_v_mag, NEAR_ZERO);  \
+    if (_v_mag > _max) {             \
+        _v_mag = (_max / _v_mag);     \
+        vec3_scale(v, _v_mag);       \
+    }                               \
+}
+
+// Transform the vector 'srcV' by the matrix 'mtx' and store the result in 'dstV'. Ignores translation.
+#define linear_mtxf_mul_vec3(mtx, dstV, srcV) {                                                         \
+    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);                                                   \
+    __auto_type _x = ((mtx)[0][0] * (srcV)[0]) + ((mtx)[1][0] * (srcV)[1]) + ((mtx)[2][0] * (srcV)[2]); \
+    __auto_type _y = ((mtx)[0][1] * (srcV)[0]) + ((mtx)[1][1] * (srcV)[1]) + ((mtx)[2][1] * (srcV)[2]); \
+    __auto_type _z = ((mtx)[0][2] * (srcV)[0]) + ((mtx)[1][2] * (srcV)[1]) + ((mtx)[2][2] * (srcV)[2]); \
+    (dstV)[0] = _x;                                                                                     \
+    (dstV)[1] = _y;                                                                                     \
+    (dstV)[2] = _z;                                                                                     \
+}
+
+// Transform the vector 'srcV' by the matrix 'mtx' including translation, and store the result in 'dstV'
+#define linear_mtxf_mul_vec3_and_translate(mtx, dstV, srcV) { \
+    linear_mtxf_mul_vec3((mtx), (dstV), (srcV));              \
+    vec3_add((dstV), (mtx)[3]);                               \
+}
+
+// Transform the vector 'srcV' by the transpose of the matrix 'mtx'
+// and store the result in 'dstV'. Ignores translation.
+// For most transformation matrices, this will apply the inverse of the transformation.
+#define linear_mtxf_transpose_mul_vec3(mtx, dstV, srcV) { \
+    PUPPYPRINT_ADD_COUNTER(gPuppyCallCounter.matrix);     \
+    __auto_type _x = vec3_dot((mtx)[0], (srcV));          \
+    __auto_type _y = vec3_dot((mtx)[1], (srcV));          \
+    __auto_type _z = vec3_dot((mtx)[2], (srcV));          \
+    (dstV)[0] = _x;                                       \
+    (dstV)[1] = _y;                                       \
+    (dstV)[2] = _z;                                       \
+}
+
+#define linear_mtxf_mul_vec3f linear_mtxf_mul_vec3
+#define linear_mtxf_mul_vec3f_and_translate linear_mtxf_mul_vec3_and_translate
+#define linear_mtxf_transpose_mul_vec3f linear_mtxf_transpose_mul_vec3
+
+
+// Angles and distances between vectors
+
+/// Finds the distance between two vectors
+#define vec3_get_dist(from, to, dist) { \
+    Vec3f _d;                           \
+    vec3_diff(_d, (to), (from));        \
+    *(dist) = vec3_mag((_d));           \
+}
+
+#define vec3f_get_dist vec3_get_dist
+#define vec3s_get_dist vec3_get_dist
+
+/// Finds the horizontal distance between two vectors
+#define vec3_get_lateral_dist(from, to, lateralDist) { \
+    Vec3f _d;                                          \
+    vec3_diff(_d, (to), (from));                       \
+    *(lateralDist) = sqrtf(sqr(_d[0]) + sqr(_d[2]));   \
+}
+
+#define vec3f_get_lateral_dist vec3_get_lateral_dist
+#define vec3s_get_lateral_dist vec3_get_lateral_dist
+
+/// Finds the pitch between two vectors
+#define vec3_get_pitch(from, to, pitch) {                     \
+    Vec3f _d;                                                 \
+    vec3_diff(_d, (to), (from));                              \
+    *(pitch) = atan2s(sqrtf(sqr(_d[0]) + sqr(_d[2])), _d[1]); \
+}
+
+#define vec3f_get_pitch vec3_get_pitch
+#define vec3s_get_pitch vec3_get_pitch
+
+/// Finds the yaw between two vectors
+#define vec3_get_yaw(from, to, yaw) { \
+    f32 _dx = ((to)[0] - (from)[0]);  \
+    f32 _dz = ((to)[2] - (from)[2]);  \
+    *(yaw) = atan2s(_dz, _dx);        \
+}
+
+#define vec3f_get_yaw vec3_get_yaw
+#define vec3s_get_yaw vec3_get_yaw
+
+// Finds the distance, pitch, and yaw between two vectors
+#define vec3_get_dist_and_angle(from, to, dist, pitch, yaw) { \
+    Vec3f _d;                                                 \
+    vec3f_diff(_d, (to), (from));                             \
+    f32 _xz = (sqr(_d[0]) + sqr(_d[2]));                      \
+    *(dist)  = sqrtf(_xz + sqr(_d[1]));                         \
+    *(pitch) = atan2s(sqrtf(_xz), _d[1]);                       \
+    *(yaw)   = atan2s(_d[2], _d[0]);                            \
+}
+
+#define vec3f_get_dist_and_angle vec3_get_dist_and_angle
+#define vec3s_get_dist_and_angle vec3_get_dist_and_angle
+
+// Constructs the 'to' point which is distance 'dist' away from the 'from' position,
+// and has the angles pitch and yaw.
+#define vec3_set_dist_and_angle(from, to, dist, pitch, yaw) { \
+    f32 _dcos = ((dist) * coss(pitch));                         \
+    __auto_type _x = ((from)[0] + (_dcos  * sins(yaw)));       \
+    __auto_type _y = ((from)[1] + ((dist) * sins(pitch)));     \
+    __auto_type _z = ((from)[2] + (_dcos  * coss(yaw)));       \
+    (to)[0] = _x;                                             \
+    (to)[1] = _y;                                             \
+    (to)[2] = _z;                                             \
+}
+
+#define vec3f_set_dist_and_angle vec3_set_dist_and_angle
+#define vec3s_set_dist_and_angle vec3_set_dist_and_angle
+
+
+// Matrices
 
 #define MAT4_VEC_DOT_PROD(R, A, B, row, col) {              \
     (R)[(row)][(col)]  = ((A)[(row)][0] * (B)[0][(col)]);   \
@@ -431,96 +623,10 @@ extern f32 gSineTable[];
     ((u32 *)(mtx))[15] = FLOAT_ONE;             \
 }
 
-#define NAME_INVMAG(v) v##_invmag
-
-/// Scale vector 'v' so it has length 1
-#define vec3_normalize(v) {                                     \
-    register f32 NAME_INVMAG(v) = vec3_mag((v));                \
-    NAME_INVMAG(v) = (1.0f / MAX(NAME_INVMAG(v), NEAR_ZERO));   \
-    vec3_mul_val((v), NAME_INVMAG(v));                          \
-}
-
-#define vec3_normalize_max(v, max) {    \
-    register f32 v##_mag = vec3_mag(v); \
-    v##_mag = MAX(v##_mag, NEAR_ZERO);  \
-    if (v##_mag > max) {                \
-        v##_mag = (max / v##_mag);      \
-        vec3_mul_val(v, v##_mag);       \
-    }                                   \
-}
-
-#define ABS(x)  (((x) > 0) ? (x) : -(x))
-
-extern s32 roundf(f32);
-// backwards compatibility
-#define round_float(in) roundf(in)
-
-#define absf ABS
-#define absi ABS
-#define abss ABS
-
-#define FLT_IS_NONZERO(x) (absf(x) > NEAR_ZERO)
-
 u16 random_u16(void);
 f32 random_float(void);
 s32 random_sign(void);
 
-f32  min_3f(   f32 a, f32 b, f32 c);
-s32  min_3i(   s32 a, s32 b, s32 c);
-s32  min_3s(   s16 a, s16 b, s16 c);
-f32  max_3f(   f32 a, f32 b, f32 c);
-s32  max_3i(   s32 a, s32 b, s32 c);
-s32  max_3s(   s16 a, s16 b, s16 c);
-void min_max_3f(f32 a, f32 b, f32 c, f32 *min, f32 *max);
-void min_max_3i(s32 a, s32 b, s32 c, s32 *min, s32 *max);
-void min_max_3s(s16 a, s16 b, s16 c, s16 *min, s16 *max);
-
-void vec3f_copy    (Vec3f dest, const Vec3f src);
-void vec3i_copy    (Vec3i dest, const Vec3i src);
-void vec3s_copy    (Vec3s dest, const Vec3s src);
-void vec3s_to_vec3i(Vec3i dest, const Vec3s src);
-void vec3s_to_vec3f(Vec3f dest, const Vec3s src);
-void vec3i_to_vec3s(Vec3s dest, const Vec3i src);
-void vec3i_to_vec3f(Vec3f dest, const Vec3i src);
-void vec3f_to_vec3s(Vec3s dest, const Vec3f src);
-void vec3f_to_vec3i(Vec3i dest, const Vec3f src);
-
-void vec3f_copy_y_off(Vec3f dest, Vec3f src, f32 yOff);
-
-void surface_normal_to_vec3f(Vec3f dest, struct Surface *surf);
-
-void vec3f_set(Vec3f dest, const f32 x, const f32 y, const f32 z);
-void vec3i_set(Vec3i dest, const s32 x, const s32 y, const s32 z);
-void vec3s_set(Vec3s dest, const s16 x, const s16 y, const s16 z);
-
-void vec3f_add (Vec3f dest, const Vec3f a               );
-void vec3i_add (Vec3i dest, const Vec3i a               );
-void vec3s_add (Vec3s dest, const Vec3s a               );
-void vec3f_sum (Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_sum (Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_sum (Vec3s dest, const Vec3s a, const Vec3s b);
-void vec3f_sub (Vec3f dest, const Vec3f a               );
-void vec3i_sub (Vec3i dest, const Vec3i a               );
-void vec3s_sub (Vec3s dest, const Vec3s a               );
-void vec3f_diff(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_diff(Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_diff(Vec3s dest, const Vec3s a, const Vec3s b);
-void vec3f_mul (Vec3f dest, const Vec3f a               );
-void vec3i_mul (Vec3i dest, const Vec3i a               );
-void vec3s_mul (Vec3s dest, const Vec3s a               );
-void vec3f_prod(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_prod(Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_prod(Vec3s dest, const Vec3s a, const Vec3s b);
-void vec3f_div (Vec3f dest, const Vec3f a               );
-void vec3i_div (Vec3i dest, const Vec3i a               );
-void vec3s_div (Vec3s dest, const Vec3s a               );
-void vec3f_quot(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3i_quot(Vec3i dest, const Vec3i a, const Vec3i b);
-void vec3s_quot(Vec3s dest, const Vec3s a, const Vec3s b);
-
-f32  vec3f_dot(              const Vec3f a, const Vec3f b);
-void vec3f_cross(Vec3f dest, const Vec3f a, const Vec3f b);
-void vec3f_normalize(Vec3f dest);
 void mtxf_copy(Mat4 dest, Mat4 src);
 void mtxf_identity(Mat4 mtx);
 void mtxf_translate(Mat4 dest, Vec3f b);
@@ -537,41 +643,17 @@ void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b);
 void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, Vec3f s);
 void mtxf_mul_vec3s(Mat4 mtx, Vec3s b);
 
-extern void mtxf_to_mtx_fast(register s16 *dest, register float *src);
-ALWAYS_INLINE void mtxf_to_mtx(register void *dest, register void *src) {
+extern void mtxf_to_mtx_fast(s16 *dest, float *src);
+ALWAYS_INLINE void mtxf_to_mtx(void *dest, void *src) {
     mtxf_to_mtx_fast((s16*)dest, (float*)src);
     // guMtxF2L(src, dest);
 }
 
 void mtxf_rotate_xy(Mtx *mtx, s16 angle);
-void linear_mtxf_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v);
-void linear_mtxf_mul_vec3f_and_translate(Mat4 m, Vec3f dst, Vec3f v);
-void linear_mtxf_transpose_mul_vec3f(Mat4 m, Vec3f dst, Vec3f v);
 
-void vec2f_get_lateral_dist(                   Vec2f from, Vec2f to,            f32 *lateralDist                      );
-void vec3f_get_lateral_dist(                   Vec3f from, Vec3f to,            f32 *lateralDist                      );
-void vec3f_get_lateral_dist_squared(           Vec3f from, Vec3f to,            f32 *lateralDist                      );
-void vec3f_get_dist(                           Vec3f from, Vec3f to, f32 *dist                                        );
-void vec3f_get_dist_squared(                   Vec3f from, Vec3f to, f32 *dist                                        );
-void vec3f_get_dist_and_yaw(                   Vec3f from, Vec3f to, f32 *dist,                               s16 *yaw);
-void vec3f_get_pitch(                          Vec3f from, Vec3f to,                              s16 *pitch          );
-void vec3f_get_yaw(                            Vec3f from, Vec3f to,                                          s16 *yaw);
-void vec3f_get_angle(                          Vec3f from, Vec3f to,                              s16 *pitch, s16 *yaw);
-void vec3f_get_lateral_dist_and_pitch(         Vec3f from, Vec3f to,            f32 *lateralDist, s16 *pitch          );
-void vec3f_get_lateral_dist_and_yaw(           Vec3f from, Vec3f to,            f32 *lateralDist,             s16 *yaw);
-void vec3f_get_lateral_dist_and_angle(         Vec3f from, Vec3f to,            f32 *lateralDist, s16 *pitch, s16 *yaw);
-void vec3f_get_dist_and_lateral_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, f32 *lateralDist, s16 *pitch, s16 *yaw);
-void vec3f_get_dist_and_angle(                 Vec3f from, Vec3f to, f32 *dist,                   s16 *pitch, s16 *yaw);
-void vec3s_get_dist_and_angle(                 Vec3s from, Vec3s to, s16 *dist,                   s16 *pitch, s16 *yaw);
-void vec3f_to_vec3s_get_dist_and_angle(        Vec3f from, Vec3s to, f32 *dist,                   s16 *pitch, s16 *yaw);
-void vec3s_set_dist_and_angle(                 Vec3s from, Vec3s to, s16  dist,                   s16  pitch, s16  yaw);
-void vec3f_set_dist_and_angle(                 Vec3f from, Vec3f to, f32  dist,                   s16  pitch, s16  yaw);
-
-s16 approach_angle(s16 current, s16 target, s16 inc);
 s16 approach_s16(s16 current, s16 target, s16 inc, s16 dec);
 s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec);
 f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec);
-Bool32 approach_angle_bool(s16 *current, s16 target, s16 inc);
 Bool32 approach_s16_bool(s16 *current, s16 target, s16 inc, s16 dec);
 Bool32 approach_s32_bool(s32 *current, s32 target, s32 inc, s32 dec);
 Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec);
@@ -581,6 +663,8 @@ Bool32 approach_f32_bool(f32 *current, f32 target, f32 inc, f32 dec);
 #define approach_s16_symmetric_bool(current, target, inc) approach_s16_bool((current), (target), (inc), (inc))
 #define approach_s32_symmetric_bool(current, target, inc) approach_s32_bool((current), (target), (inc), (inc))
 #define approach_f32_symmetric_bool(current, target, inc) approach_f32_bool((current), (target), (inc), (inc))
+#define approach_angle approach_s16_symmetric
+#define approach_angle_bool approach_s16_symmetric_bool
 s32 approach_f32_signed(f32 *current, f32 target, f32 inc);
 s32 approach_f32_asymptotic_bool(f32 *current, f32 target, f32 multiplier);
 f32 approach_f32_asymptotic(f32 current, f32 target, f32 multiplier);
diff --git a/src/engine/surface_collision.c b/src/engine/surface_collision.c
index 007c5748c..8b54ef8af 100644
--- a/src/engine/surface_collision.c
+++ b/src/engine/surface_collision.c
@@ -63,11 +63,6 @@ static s32 find_wall_collisions_from_list(struct SurfaceNode *surfaceNode, struc
     TerrainData type = SURFACE_DEFAULT;
     s32 numCols = 0;
 
-    // Max collision radius = 200
-    if (radius > 200) {
-        radius = 200;
-    }
-
     f32 margin_radius = radius - 1.0f;
 
     // Stay in this loop until out of walls.
@@ -216,19 +211,25 @@ s32 find_wall_collisions(struct WallCollisionData *colData) {
     }
 
     // World (level) consists of a 16x16 grid. Find where the collision is on the grid (round toward -inf)
-    s32 cellX = GET_CELL_COORD(x);
-    s32 cellZ = GET_CELL_COORD(z);
+    s32 minCellX = GET_CELL_COORD(x - colData->radius);
+    s32 minCellZ = GET_CELL_COORD(z - colData->radius);
+    s32 maxCellX = GET_CELL_COORD(x + colData->radius);
+    s32 maxCellZ = GET_CELL_COORD(z + colData->radius);
 
-    if (!(gCollisionFlags & COLLISION_FLAG_EXCLUDE_DYNAMIC)) {
-        // Check for surfaces belonging to objects.
-        node = gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
-        numCollisions += find_wall_collisions_from_list(node, colData);
+    for (s32 cellX = minCellX; cellX <= maxCellX; cellX++) {
+        for (s32 cellZ = minCellZ; cellZ <= maxCellZ; cellZ++) {
+            if (!(gCollisionFlags & COLLISION_FLAG_EXCLUDE_DYNAMIC)) {
+                // Check for surfaces belonging to objects.
+                node = gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
+                numCollisions += find_wall_collisions_from_list(node, colData);
+            }
+
+            // Check for surfaces that are a part of level geometry.
+            node = gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
+            numCollisions += find_wall_collisions_from_list(node, colData);
+        }
     }
 
-    // Check for surfaces that are a part of level geometry.
-    node = gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS].next;
-    numCollisions += find_wall_collisions_from_list(node, colData);
-
     gCollisionFlags &= ~(COLLISION_FLAG_RETURN_FIRST | COLLISION_FLAG_EXCLUDE_DYNAMIC | COLLISION_FLAG_INCLUDE_INTANGIBLE);
 #ifdef VANILLA_DEBUG
     // Increment the debug tracker.
diff --git a/src/engine/surface_load.c b/src/engine/surface_load.c
index 4474bbb2e..a68362d69 100644
--- a/src/engine/surface_load.c
+++ b/src/engine/surface_load.c
@@ -187,13 +187,6 @@ static s32 lower_cell_index(s32 coord) {
     // [0, NUM_CELLS)
     s32 index = coord / CELL_SIZE;
 
-    // Include extra cell if close to boundary
-    //! Some wall checks are larger than the buffer, meaning wall checks can
-    //  miss walls that are near a cell border.
-    if (coord % CELL_SIZE < 50) {
-        index--;
-    }
-
     // Potentially > NUM_CELLS - 1, but since the upper index is <= NUM_CELLS - 1, not exploitable
     return MAX(0, index);
 }
@@ -213,13 +206,6 @@ static s32 upper_cell_index(s32 coord) {
     // [0, NUM_CELLS)
     s32 index = coord / CELL_SIZE;
 
-    // Include extra cell if close to boundary
-    //! Some wall checks are larger than the buffer, meaning wall checks can
-    //  miss walls that are near a cell border.
-    if (coord % CELL_SIZE > CELL_SIZE - 50) {
-        index++;
-    }
-
     // Potentially < 0, but since lower index is >= 0, not exploitable
     return MIN((NUM_CELLS - 1), index);
 }
@@ -262,7 +248,7 @@ static struct Surface *read_surface_data(TerrainData *vertexData, TerrainData **
     Vec3t offset;
     s16 min, max;
 
-    vec3_prod_val(offset, (*vertexIndices), 3);
+    vec3_scale_dest(offset, (*vertexIndices), 3);
 
     vec3s_copy(v[0], (vertexData + offset[0]));
     vec3s_copy(v[1], (vertexData + offset[1]));
@@ -279,7 +265,7 @@ static struct Surface *read_surface_data(TerrainData *vertexData, TerrainData **
     }
 #endif
     mag = 1.0f / sqrtf(mag);
-    vec3_mul_val(n, mag);
+    vec3_scale(n, mag);
 
     struct Surface *surface = alloc_surface(dynamic);
 
@@ -700,8 +686,9 @@ void load_object_collision_model(void) {
     PUPPYPRINT_GET_SNAPSHOT();
     TerrainData *collisionData = o->collisionData;
 
-    f32 sqrLateralDist;
-    vec3f_get_lateral_dist_squared(&o->oPosVec, &gMarioObject->oPosVec, &sqrLateralDist);
+    Vec3f dist;
+    vec3_diff(dist, &o->oPosVec, &gMarioObject->oPosVec);
+    f32 sqrLateralDist = sqr(dist[0]) + sqr(dist[2]);
 
     f32 verticalMarioDiff = gMarioObject->oPosY - o->oPosY;
 
diff --git a/src/game/behavior_actions.c b/src/game/behavior_actions.c
index d2f80578a..d400977ab 100644
--- a/src/game/behavior_actions.c
+++ b/src/game/behavior_actions.c
@@ -44,7 +44,6 @@
 #include "spawn_object.h"
 #include "spawn_sound.h"
 #include "rumble_init.h"
-#include "puppylights.h"
 
 #include "behaviors/star_door.inc.c"
 #include "behaviors/mr_i.inc.c"
diff --git a/src/game/behaviors/chain_chomp.inc.c b/src/game/behaviors/chain_chomp.inc.c
index a23480d1e..f3831896f 100644
--- a/src/game/behaviors/chain_chomp.inc.c
+++ b/src/game/behaviors/chain_chomp.inc.c
@@ -104,12 +104,12 @@ static void chain_chomp_update_chain_segments(void) {
         // Cap distance to previous chain part (so that the tail follows the chomp)
         Vec3f offset;
         vec3f_diff(offset, segment->pos, prevSegment->pos);
-        vec3_normalize_max(offset, o->oChainChompMaxDistBetweenChainParts);
+        vec3_set_max_dist(offset, o->oChainChompMaxDistBetweenChainParts);
 
         // Cap distance to pivot (so that it stretches when the chomp moves far from the wooden post)
         vec3f_add(offset, prevSegment->pos);
         f32 maxTotalDist = o->oChainChompMaxDistFromPivotPerChainPart * (CHAIN_CHOMP_NUM_SEGMENTS - i);
-        vec3_normalize_max(offset, maxTotalDist);
+        vec3_set_max_dist(offset, maxTotalDist);
 
         vec3f_copy(segment->pos, offset);
     }
@@ -365,7 +365,7 @@ static void chain_chomp_act_move(void) {
             f32 ratio = maxDistToPivot / o->oChainChompDistToPivot;
             o->oChainChompDistToPivot = maxDistToPivot;
 
-            vec3_mul_val(o->oChainChompSegments[0].pos, ratio);
+            vec3_scale(o->oChainChompSegments[0].pos, ratio);
 
             if (o->oChainChompReleaseStatus == CHAIN_CHOMP_NOT_RELEASED) {
                 // Restrict chain chomp position
diff --git a/src/game/behaviors/coin.inc.c b/src/game/behaviors/coin.inc.c
index 802ceda27..0162df336 100644
--- a/src/game/behaviors/coin.inc.c
+++ b/src/game/behaviors/coin.inc.c
@@ -102,20 +102,33 @@ void bhv_coin_loop(void) {
     struct Surface *floor = o->oFloor;
 
     if (floor != NULL) {
-        if (o->oMoveFlags & OBJ_MOVE_ON_GROUND) {
-            o->oAction = BOUNCING_COIN_ACT_BOUNCING;
-        }
-        if (o->oAction == BOUNCING_COIN_ACT_BOUNCING) {
-            o->oBounciness = 0;
-            if (floor->normal.y < 0.9f) {
-                s16 targetYaw = SURFACE_YAW(floor);
-                cur_obj_rotate_yaw_toward(targetYaw, 0x400);
-            }
-        }
-    }
+        switch (o->oAction) {
+            case BOUNCING_COIN_ACT_FALLING:
+                if (o->oTimer == 0) {
+                    cur_obj_play_sound_2(SOUND_GENERAL_COIN_SPURT);
+                }
+                if (o->oMoveFlags & OBJ_MOVE_LANDED) {
+                    o->oAction = BOUNCING_COIN_ACT_BOUNCING;
+                }
+                
+                break;
 
-    if (o->oTimer == 0) {
-        cur_obj_play_sound_2(SOUND_GENERAL_COIN_SPURT);
+            case BOUNCING_COIN_ACT_BOUNCING:
+                o->oBounciness = 0;
+                if (floor->normal.y < 0.9f) {
+                    s16 targetYaw = SURFACE_YAW(floor);
+                    cur_obj_rotate_yaw_toward(targetYaw, 0x400);
+                }
+                break;
+
+            case OBJ_ACT_LAVA_DEATH:
+#ifdef COIN_LAVA_FLICKER
+                obj_flicker_and_disappear(o, 0);
+#else
+                obj_mark_for_deletion(o);
+#endif
+                break;
+        }
     }
 
     if (o->oVelY < 0) {
@@ -123,16 +136,11 @@ void bhv_coin_loop(void) {
     }
 
     if (o->oMoveFlags & OBJ_MOVE_LANDED) {
-#ifdef COIN_LAVA_FLICKER
-        if ((o->oMoveFlags & OBJ_MOVE_ABOVE_DEATH_BARRIER)
-        || ((o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) && cur_obj_wait_then_blink(0, 20))) {
+        if (o->oMoveFlags & OBJ_MOVE_ABOVE_DEATH_BARRIER) {
             obj_mark_for_deletion(o);
+        } else if (o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) {
+            o->oAction = OBJ_ACT_LAVA_DEATH;
         }
-#else
-        if (o->oMoveFlags & (OBJ_MOVE_ABOVE_DEATH_BARRIER | OBJ_MOVE_ABOVE_LAVA)) {
-            obj_mark_for_deletion(o);
-        }
-#endif
     }
 
     if (o->oMoveFlags & OBJ_MOVE_BOUNCE) {
diff --git a/src/game/behaviors/hoot.inc.c b/src/game/behaviors/hoot.inc.c
index 48073fc2a..cd4078398 100644
--- a/src/game/behaviors/hoot.inc.c
+++ b/src/game/behaviors/hoot.inc.c
@@ -194,7 +194,8 @@ void hoot_action_loop(void) {
 
 void hoot_turn_to_home(void) {
     s16 pitchToHome, yawToHome;
-    vec3f_get_angle(&o->oPosVec, &o->oHomeVec, &pitchToHome, &yawToHome);
+    f32 distToHome;
+    vec3f_get_dist_and_angle(&o->oPosVec, &o->oHomeVec, &distToHome, &pitchToHome, &yawToHome);
 
     o->oMoveAngleYaw = approach_s16_symmetric(o->oMoveAngleYaw, yawToHome, 0x140);
     o->oMoveAnglePitch = approach_s16_symmetric(o->oMoveAnglePitch, -pitchToHome, 0x140);
diff --git a/src/game/behaviors/intro_peach.inc.c b/src/game/behaviors/intro_peach.inc.c
index 781938fe5..cfe3d0a9c 100644
--- a/src/game/behaviors/intro_peach.inc.c
+++ b/src/game/behaviors/intro_peach.inc.c
@@ -7,8 +7,9 @@
 void intro_peach_set_pos_and_opacity(struct Object *obj, f32 targetOpacity, f32 increment) {
     Vec3f newPos;
     s16 focusPitch, focusYaw;
+    f32 dist;
 
-    vec3f_get_angle(gLakituState.pos, gLakituState.focus, &focusPitch, &focusYaw);
+    vec3f_get_dist_and_angle(gLakituState.pos, gLakituState.focus, &dist, &focusPitch, &focusYaw);
     vec3f_set_dist_and_angle(gLakituState.pos, newPos, obj->oIntroPeachDistToCamera,
                              obj->oIntroPeachPitchFromFocus + focusPitch,
                              obj->oIntroPeachYawFromFocus + focusYaw);
diff --git a/src/game/behaviors/moving_coin.inc.c b/src/game/behaviors/moving_coin.inc.c
index 61d1f9d2d..dabf74915 100644
--- a/src/game/behaviors/moving_coin.inc.c
+++ b/src/game/behaviors/moving_coin.inc.c
@@ -93,9 +93,6 @@ void bhv_moving_yellow_coin_loop(void) {
             break;
     }
 
-#ifdef COIN_LAVA_FLICKER
-    if (o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) moving_coin_flicker();
-#endif
     if (o->oInteractStatus & INT_STATUS_INTERACTED) {
         coin_collected();
         o->oInteractStatus = INT_STATUS_NONE;
diff --git a/src/game/behaviors/spawn_star.inc.c b/src/game/behaviors/spawn_star.inc.c
index b3f6408d3..84135dbe0 100644
--- a/src/game/behaviors/spawn_star.inc.c
+++ b/src/game/behaviors/spawn_star.inc.c
@@ -40,7 +40,8 @@ void bhv_collect_star_loop(void) {
 
 void bhv_star_spawn_init(void) {
     s16 yaw;
-    vec3f_get_lateral_dist_and_yaw(&o->oPosVec, &o->oHomeVec, &o->oStarSpawnDisFromHome, &yaw);
+    vec3f_get_yaw(&o->oPosVec, &o->oHomeVec, &yaw);
+    vec3f_get_lateral_dist(&o->oPosVec, &o->oHomeVec, &o->oStarSpawnDisFromHome)
     o->oMoveAngleYaw = yaw;
     o->oVelY = (o->oHomeY - o->oPosY) / 30.0f;
     o->oForwardVel = o->oStarSpawnDisFromHome / 30.0f;
diff --git a/src/game/camera.c b/src/game/camera.c
index 9ef41f806..8cd629d0a 100644
--- a/src/game/camera.c
+++ b/src/game/camera.c
@@ -3462,7 +3462,7 @@ void evaluate_cubic_spline(f32 u, Vec3f Q, Vec3f spline1, Vec3f spline2, Vec3f s
     register f32 su = sqr(u);
     register f32 hcu = (su * u) / 2.0f;
 
-    B[0] = cube(nu) / 6.0f;
+    B[0] = (nu * nu * nu) / 6.0f;
     B[1] = hcu - su + (2.0f / 3.0f);
     B[2] = -hcu + (su / 2.0f) + (u / 2.0f) + (1.0f / 6.0f);
     B[3] =  hcu / 3.0f;
diff --git a/src/game/debug_box.c b/src/game/debug_box.c
index cd7a9e73c..9281625fe 100644
--- a/src/game/debug_box.c
+++ b/src/game/debug_box.c
@@ -263,30 +263,26 @@ void iterate_surfaces_envbox(Vtx *verts) {
 }
 
 // VERTCOUNT = The highest number divisible by 6, which is less than the maximum vertex buffer divided by 2.
-// The vertex buffer is 64 if OBJECTS_REJ is enabled, 32 otherwise.
-//! TODO: Why can this only use half of the vertex buffer?
-#ifdef OBJECTS_REJ
 #define VERTCOUNT 30
-#else
-#define VERTCOUNT 12
-#endif // OBJECTS_REJ
 
-void visual_surface_display(Vtx *verts, s32 iteration) {
+void visual_surface_display(Gfx **gfx, Vtx *verts, s32 iteration) {
     s32 vts = (iteration ? gVisualOffset : gVisualSurfaceCount);
     s32 vtl = 0;
     s32 count = VERTCOUNT;
     s32 ntx = 0;
 
+    Gfx *tempGfxHead = gDisplayListHead;
+
     while (vts > 0) {
         if (count == VERTCOUNT) {
             ntx = MIN(VERTCOUNT, vts);
-            gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts + (gVisualSurfaceCount - vts)), ntx, 0);
+            gSPVertex((*gfx)++, VIRTUAL_TO_PHYSICAL(verts + (gVisualSurfaceCount - vts)), ntx, 0);
             count = 0;
             vtl   = VERTCOUNT;
         }
 
         if (vtl >= 6) {
-            gSP2Triangles(gDisplayListHead++, (count + 0),
+            gSP2Triangles((*gfx)++, (count + 0),
                                               (count + 1),
                                               (count + 2), 0x0,
                                               (count + 3),
@@ -296,7 +292,7 @@ void visual_surface_display(Vtx *verts, s32 iteration) {
             vtl   -= 6;
             count += 6;
         } else if (vtl >= 3) {
-            gSP1Triangle(gDisplayListHead++, (count + 0),
+            gSP1Triangle((*gfx)++, (count + 0),
                                              (count + 1),
                                              (count + 2), 0x0);
             vts   -= 3;
@@ -304,6 +300,8 @@ void visual_surface_display(Vtx *verts, s32 iteration) {
             count += 3;
         }
     }
+
+    gDisplayListHead = tempGfxHead;
 }
 
 s32 iterate_surface_count(s32 x, s32 z) {
@@ -343,7 +341,7 @@ s32 iterate_surface_count(s32 x, s32 z) {
     return j;
 }
 
-void visual_surface_loop(void) {
+void visual_surface_loop(Gfx **gfx) {
     if (!gSurfaceNodesAllocated
      || !gSurfacesAllocated
      || !gMarioState->marioObj) {
@@ -358,19 +356,20 @@ void visual_surface_loop(void) {
         return;
     }
 
-    gSPDisplayList(gDisplayListHead++, dl_visual_surface);
+    gSPDisplayList((*gfx)++, dl_visual_surface);
 
     iterate_surfaces_visual(gMarioState->pos[0], gMarioState->pos[2], verts);
 
-    visual_surface_display(verts, 0);
+    visual_surface_display(gfx, verts, 0);
 
+    gDPPipeSync((*gfx)++);
     iterate_surfaces_envbox(verts);
 
-    gDPSetRenderMode(gDisplayListHead++, G_RM_ZB_XLU_SURF, G_RM_NOOP2);
+    gDPSetRenderMode((*gfx)++, G_RM_ZB_XLU_SURF, G_RM_NOOP2);
 
-    visual_surface_display(verts, 1);
+    visual_surface_display(gfx, verts, 1);
 
-    gSPDisplayList(gDisplayListHead++, dl_debug_box_end);
+    gSPDisplayList((*gfx)++, dl_debug_box_end);
 }
 
 /**
@@ -388,9 +387,6 @@ static void append_debug_box(Vec3f center, Vec3f bounds, s16 yaw, s32 type) {
         sBoxes[sNumBoxes].yaw   = yaw;
         sBoxes[sNumBoxes].color = sCurBoxColor;
         sBoxes[sNumBoxes].type  = type;
-        if (!(sBoxes[sNumBoxes].type & (DEBUG_UCODE_REJ | DEBUG_UCODE_DEFAULT))) {
-            sBoxes[sNumBoxes].type |= DEBUG_UCODE_DEFAULT;
-        }
         ++sNumBoxes;
     }
 }
@@ -448,7 +444,7 @@ void debug_box_pos_rot(Vec3f pMin, Vec3f pMax, s16 yaw, s32 type) {
     append_debug_box(center, bounds, yaw, type);
 }
 
-static void render_box(int index) {
+static void render_box(Gfx **gfx, int index) {
     struct DebugBox *box = &sBoxes[index];
     s32 color = box->color;
     Mat4 mtxFloat;
@@ -477,20 +473,20 @@ static void render_box(int index) {
     mtxf_to_mtx(mtx, mtxFloat);
 
     // Load the calculated matrix
-    gSPMatrix(gDisplayListHead++, mtx, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
+    gSPMatrix((*gfx)++, mtx, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
 
     // Set env color to the color of this box
-    gDPSetColor(gDisplayListHead++, G_SETENVCOLOR, color);
+    gDPSetColor((*gfx)++, G_SETENVCOLOR, color);
 
     if (box->type & DEBUG_SHAPE_BOX) {
-        gSPDisplayList(gDisplayListHead++, dl_debug_box_verts);
+        gSPDisplayList((*gfx)++, dl_debug_box_verts);
     }
     if (box->type & DEBUG_SHAPE_CYLINDER) {
-        gSPDisplayList(gDisplayListHead++, dl_debug_cylinder_verts);
+        gSPDisplayList((*gfx)++, dl_debug_cylinder_verts);
     }
 }
 
-void render_debug_boxes(s32 type) {
+void render_debug_boxes(Gfx **gfx) {
     s32 i;
 
     debug_box_color(DBG_BOX_DEF_COLOR);
@@ -498,17 +494,14 @@ void render_debug_boxes(s32 type) {
     if (sNumBoxes == 0) return;
     if (gAreaUpdateCounter < 3) return;
 
-    gSPDisplayList(gDisplayListHead++, dl_debug_box_begin);
+    gSPDisplayList((*gfx)++, dl_debug_box_begin);
 
     for (i = 0; i < sNumBoxes; ++i) {
-        if ((type & DEBUG_UCODE_DEFAULT) && (sBoxes[i].type & DEBUG_UCODE_DEFAULT)) render_box(i);
-        if ((type & DEBUG_UCODE_REJ    ) && (sBoxes[i].type & DEBUG_UCODE_REJ    )) render_box(i);
+        render_box(gfx, i);
     }
 
-    if (type & DEBUG_BOX_CLEAR) {
-        sNumBoxes = 0;
-    }
-    gSPDisplayList(gDisplayListHead++, dl_debug_box_end);
+    sNumBoxes = 0;
+    gSPDisplayList((*gfx)++, dl_debug_box_end);
 }
 
 #endif
diff --git a/src/game/debug_box.h b/src/game/debug_box.h
index 1041c51c6..0786d77b2 100644
--- a/src/game/debug_box.h
+++ b/src/game/debug_box.h
@@ -19,13 +19,7 @@
 enum DebugBoxFlags {
     DEBUG_SHAPE_BOX      = (1 << 0), // 0x01
     DEBUG_SHAPE_CYLINDER = (1 << 1), // 0x02
-    DEBUG_UCODE_DEFAULT  = (1 << 2), // 0x04
-#ifdef OBJECTS_REJ
-    DEBUG_UCODE_REJ      = (1 << 3), // 0x08
-#else
-    DEBUG_UCODE_REJ      = DEBUG_UCODE_DEFAULT,
-#endif
-    DEBUG_BOX_CLEAR      = (1 << 4), // 0x10
+    DEBUG_BOX_CLEAR      = (1 << 2), // 0x04
 };
 
 extern u8 hitboxView;
@@ -39,8 +33,8 @@ void debug_box_rot(Vec3f center, Vec3f bounds, s16 yaw, s32 type);
 void debug_box_pos(Vec3f pMin, Vec3f pMax, s32 type);
 void debug_box_pos_rot(Vec3f pMin, Vec3f pMax, s16 yaw, s32 type);
 
-void render_debug_boxes(s32 type);
-extern void visual_surface_loop(void);
+void render_debug_boxes(Gfx **gfx);
+extern void visual_surface_loop(Gfx **gfx);
 
 #endif
 
diff --git a/src/game/emutest.c b/src/game/emutest.c
index ef2b912c9..872e51de4 100644
--- a/src/game/emutest.c
+++ b/src/game/emutest.c
@@ -9,7 +9,10 @@
 #include <string.h>
 #include "emutest_vc.h"
 #include "float.h"
-#include "types.h"
+
+#ifdef LIBPL
+#include "lib/libpl/libpl-emu.h"
+#endif
 
 extern OSMesgQueue gSIEventMesgQueue;
 extern u8 __osContPifRam[];
@@ -20,6 +23,7 @@ extern void __osPiGetAccess(void);
 extern void __osPiRelAccess(void);
 
 enum Emulator gEmulator = EMU_CONSOLE;
+u8 gSupportsLibpl = FALSE;
 
 u32 pj64_get_count_factor_asm(void); // defined in asm/pj64_get_count_factor_asm.s
 u32 emux_detect(void); // defined in asm/emux.s
@@ -140,6 +144,9 @@ void detect_emulator() {
             if (magic == 0x00500000u) {
                 // libpl is supported. Must be ParallelN64
                 gEmulator = EMU_PARALLELN64;
+#ifdef LIBPL
+                gSupportsLibpl = libpl_is_supported(LPL_ABI_VERSION_CURRENT);
+#endif
                 return;
             }
             
diff --git a/src/game/emutest.h b/src/game/emutest.h
index 8ffb2f5d9..7afd57d49 100644
--- a/src/game/emutest.h
+++ b/src/game/emutest.h
@@ -1,6 +1,8 @@
 #ifndef EMUTEST_H
 #define EMUTEST_H
 
+#include "types.h"
+
 enum Emulator {
     EMU_WIIVC = 0x0001,
     EMU_PROJECT64_ANY = 0x001E,
@@ -37,6 +39,9 @@ extern void detect_emulator();
  */
 extern enum Emulator gEmulator;
 
+// determines whether libpl is safe to use
+extern u8 gSupportsLibpl;
+
 // Included for backwards compatibility when upgrading from HackerSM64 2.0
 #define gIsConsole ((gEmulator & EMU_CONSOLE) != 0)
 
diff --git a/src/game/fasttext.c b/src/game/fasttext.c
index 89b9387b6..20357cb72 100644
--- a/src/game/fasttext.c
+++ b/src/game/fasttext.c
@@ -59,7 +59,7 @@ void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int
 
     while (string[i] != '\0') {
         unsigned int cur_char = string[i];
-        s32 goddamnJMeasure = string[i] == 'j' ? -1 : 0;
+        s32 goddamnJMeasure;
 
         if (cur_char == '\n') {
             xPos = x;
@@ -75,7 +75,8 @@ void drawSmallString_impl(Gfx **dl, int x, int y, const char* string, int r, int
         } else {
             if (cur_char != ' ') {
                 s = computeS(cur_char);
-                gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, (s << 5) - goddamnJMeasure, 0, 1 << 10, 1 << 10);
+                goddamnJMeasure = (s == 512) ? 1 : 0;
+                gSPTextureRectangle(dlHead++, (xPos + 0) << 2, (yPos + 0) << 2, (xPos + 8) << 2, (yPos + 12) << 2, 0, (s << 5) + goddamnJMeasure, 0, 1 << 10, 1 << 10);
             }
             xPos += fast_text_font_kerning[cur_char - ' '];
         }
diff --git a/src/game/game_init.c b/src/game/game_init.c
index 4d92e9aa1..bec0cfd3e 100644
--- a/src/game/game_init.c
+++ b/src/game/game_init.c
@@ -156,32 +156,40 @@ void my_rsp_init(void) {
  * Initialize the z buffer for the current frame.
  */
 void init_z_buffer(s32 resetZB) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetDepthSource(gDisplayListHead++, G_ZS_PIXEL);
-    gDPSetDepthImage(gDisplayListHead++, gPhysicalZBuffer);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetColorImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH, gPhysicalZBuffer);
+    gDPSetDepthSource(tempGfxHead++, G_ZS_PIXEL);
+    gDPSetDepthImage(tempGfxHead++, gPhysicalZBuffer);
+
+    gDPSetColorImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH, gPhysicalZBuffer);
     if (!resetZB)
         return;
-    gDPSetFillColor(gDisplayListHead++,
+    gDPSetFillColor(tempGfxHead++,
                     GPACK_ZDZ(G_MAXFBZ, 0) << 16 | GPACK_ZDZ(G_MAXFBZ, 0));
 
-    gDPFillRectangle(gDisplayListHead++, 0, gBorderHeight, SCREEN_WIDTH - 1,
+    gDPFillRectangle(tempGfxHead++, 0, gBorderHeight, SCREEN_WIDTH - 1,
                      SCREEN_HEIGHT - 1 - gBorderHeight);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
  * Tells the RDP which of the three framebuffers it shall draw to.
  */
 void select_framebuffer(void) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
-    gDPSetColorImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH,
+    gDPPipeSync(tempGfxHead++);
+
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+    gDPSetColorImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, SCREEN_WIDTH,
                      gPhysicalFramebuffers[sRenderingFramebuffer]);
-    gDPSetScissor(gDisplayListHead++, G_SC_NON_INTERLACE, 0, gBorderHeight, SCREEN_WIDTH,
+    gDPSetScissor(tempGfxHead++, G_SC_NON_INTERLACE, 0, gBorderHeight, SCREEN_WIDTH,
                   SCREEN_HEIGHT - gBorderHeight);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -189,19 +197,23 @@ void select_framebuffer(void) {
  * Information about the color argument: https://jrra.zone/n64/doc/n64man/gdp/gDPSetFillColor.htm
  */
 void clear_framebuffer(s32 color) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
-    gDPSetCycleType(gDisplayListHead++, G_CYC_FILL);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetFillColor(gDisplayListHead++, color);
-    gDPFillRectangle(gDisplayListHead++,
+    gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
+    gDPSetCycleType(tempGfxHead++, G_CYC_FILL);
+
+    gDPSetFillColor(tempGfxHead++, color);
+    gDPFillRectangle(tempGfxHead++,
                      GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), gBorderHeight,
                      GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(0) - 1, SCREEN_HEIGHT - gBorderHeight - 1);
 
-    gDPPipeSync(gDisplayListHead++);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -218,38 +230,46 @@ void clear_viewport(Vp *viewport, s32 color) {
     vpLrx = GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(SCREEN_WIDTH - vpLrx);
 #endif
 
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
-    gDPSetCycleType(gDisplayListHead++, G_CYC_FILL);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetFillColor(gDisplayListHead++, color);
-    gDPFillRectangle(gDisplayListHead++, vpUlx, vpUly, vpLrx, vpLry);
+    gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
+    gDPSetCycleType(tempGfxHead++, G_CYC_FILL);
 
-    gDPPipeSync(gDisplayListHead++);
+    gDPSetFillColor(tempGfxHead++, color);
+    gDPFillRectangle(tempGfxHead++, vpUlx, vpUly, vpLrx, vpLry);
 
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
+    gDPPipeSync(tempGfxHead++);
+
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
  * Draw the horizontal screen borders.
  */
 void draw_screen_borders(void) {
-    gDPPipeSync(gDisplayListHead++);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPSetScissor(gDisplayListHead++, G_SC_NON_INTERLACE, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT);
-    gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
-    gDPSetCycleType(gDisplayListHead++, G_CYC_FILL);
+    gDPPipeSync(tempGfxHead++);
 
-    gDPSetFillColor(gDisplayListHead++, GPACK_RGBA5551(0, 0, 0, 0) << 16 | GPACK_RGBA5551(0, 0, 0, 0));
+    gDPSetScissor(tempGfxHead++, G_SC_NON_INTERLACE, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT);
+    gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF2);
+    gDPSetCycleType(tempGfxHead++, G_CYC_FILL);
+
+    gDPSetFillColor(tempGfxHead++, GPACK_RGBA5551(0, 0, 0, 0) << 16 | GPACK_RGBA5551(0, 0, 0, 0));
 
     if (gBorderHeight) {
-        gDPFillRectangle(gDisplayListHead++, GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), 0,
+        gDPFillRectangle(tempGfxHead++, GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), 0,
                         GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(0) - 1, gBorderHeight - 1);
-        gDPFillRectangle(gDisplayListHead++,
+        gDPFillRectangle(tempGfxHead++,
                         GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(0), SCREEN_HEIGHT - gBorderHeight,
                         GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(0) - 1, SCREEN_HEIGHT - 1);
     }
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -277,11 +297,7 @@ void create_gfx_task_structure(void) {
     gGfxSPTask->task.t.type = M_GFXTASK;
     gGfxSPTask->task.t.ucode_boot = rspbootTextStart;
     gGfxSPTask->task.t.ucode_boot_size = ((u8 *) rspbootTextEnd - (u8 *) rspbootTextStart);
-#if defined(F3DEX_GBI_SHARED) && defined(OBJECTS_REJ)
     gGfxSPTask->task.t.flags = (OS_TASK_LOADABLE | OS_TASK_DP_WAIT);
-#else
-    gGfxSPTask->task.t.flags = 0x0;
-#endif
 #ifdef  L3DEX2_ALONE
     gGfxSPTask->task.t.ucode = gspL3DEX2_fifoTextStart;
     gGfxSPTask->task.t.ucode_data = gspL3DEX2_fifoDataStart;
diff --git a/src/game/gamecube_controller.c b/src/game/gamecube_controller.c
index 1a44c7da0..760c618b8 100644
--- a/src/game/gamecube_controller.c
+++ b/src/game/gamecube_controller.c
@@ -1,4 +1,5 @@
 #include "PR/os_internal.h"
+#include "engine/math_util.h"
 
 #include "game_init.h"
 
@@ -9,11 +10,6 @@
 #define ARRLEN(x) ((s32)(sizeof(x) / sizeof(x[0])))
 #define CHNL_ERR(format) (((format).rxsize & CHNL_ERR_MASK) >> 4)
 
-#define CLAMP(x, low, high)  (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
-#define S8_MAX __SCHAR_MAX__
-#define S8_MIN (-S8_MAX - 1)
-#define CLAMP_S8( x)        CLAMP((x),  S8_MIN,  S8_MAX)
-
 #define CHNL_ERR_MASK		0xC0	/* Bit 6-7: channel errors */
 
 typedef struct
diff --git a/src/game/hud.c b/src/game/hud.c
index 75118b56b..88b8e7ca0 100644
--- a/src/game/hud.c
+++ b/src/game/hud.c
@@ -117,29 +117,37 @@ static struct CameraHUD sCameraHUD = { CAM_STATUS_NONE };
  * Renders a rgba16 16x16 glyph texture from a table list.
  */
 void render_hud_tex_lut(s32 x, s32 y, Texture *texture) {
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
-    gSPDisplayList(gDisplayListHead++, &dl_hud_img_load_tex_block);
-    gSPTextureRectangle(gDisplayListHead++, x << 2, y << 2, (x + 15) << 2, (y + 15) << 2,
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
+    gSPDisplayList(tempGfxHead++, &dl_hud_img_load_tex_block);
+    gSPTextureRectangle(tempGfxHead++, x << 2, y << 2, (x + 15) << 2, (y + 15) << 2,
                         G_TX_RENDERTILE, 0, 0, 4 << 10, 1 << 10);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
  * Renders a rgba16 8x8 glyph texture from a table list.
  */
 void render_hud_small_tex_lut(s32 x, s32 y, Texture *texture) {
-    gDPSetTile(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 0, 0, G_TX_LOADTILE, 0,
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPSetTile(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 0, 0, G_TX_LOADTILE, 0,
                 G_TX_WRAP | G_TX_NOMIRROR, G_TX_NOMASK, G_TX_NOLOD, G_TX_WRAP | G_TX_NOMIRROR, G_TX_NOMASK, G_TX_NOLOD);
-    gDPTileSync(gDisplayListHead++);
-    gDPSetTile(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 2, 0, G_TX_RENDERTILE, 0,
+    gDPTileSync(tempGfxHead++);
+    gDPSetTile(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 2, 0, G_TX_RENDERTILE, 0,
                 G_TX_CLAMP, 3, G_TX_NOLOD, G_TX_CLAMP, 3, G_TX_NOLOD);
-    gDPSetTileSize(gDisplayListHead++, G_TX_RENDERTILE, 0, 0, (8 - 1) << G_TEXTURE_IMAGE_FRAC, (8 - 1) << G_TEXTURE_IMAGE_FRAC);
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
-    gDPLoadSync(gDisplayListHead++);
-    gDPLoadBlock(gDisplayListHead++, G_TX_LOADTILE, 0, 0, 8 * 8 - 1, CALC_DXT(8, G_IM_SIZ_16b_BYTES));
-    gSPTextureRectangle(gDisplayListHead++, x << 2, y << 2, (x + 7) << 2, (y + 7) << 2, G_TX_RENDERTILE,
+    gDPSetTileSize(tempGfxHead++, G_TX_RENDERTILE, 0, 0, (8 - 1) << G_TEXTURE_IMAGE_FRAC, (8 - 1) << G_TEXTURE_IMAGE_FRAC);
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, texture);
+    gDPLoadSync(tempGfxHead++);
+    gDPLoadBlock(tempGfxHead++, G_TX_LOADTILE, 0, 0, 8 * 8 - 1, CALC_DXT(8, G_IM_SIZ_16b_BYTES));
+    gSPTextureRectangle(tempGfxHead++, x << 2, y << 2, (x + 7) << 2, (y + 7) << 2, G_TX_RENDERTILE,
                         0, 0, 4 << 10, 1 << 10);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -147,14 +155,17 @@ void render_hud_small_tex_lut(s32 x, s32 y, Texture *texture) {
  */
 void render_power_meter_health_segment(s16 numHealthWedges) {
     Texture *(*healthLUT)[] = segmented_to_virtual(&power_meter_health_segments_lut);
+    Gfx *tempGfxHead = gDisplayListHead;
 
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1,
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1,
                        (*healthLUT)[numHealthWedges - 1]);
-    gDPLoadSync(gDisplayListHead++);
-    gDPLoadBlock(gDisplayListHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
-    gSP1Triangle(gDisplayListHead++, 0, 1, 2, 0);
-    gSP1Triangle(gDisplayListHead++, 0, 2, 3, 0);
+    gDPLoadSync(tempGfxHead++);
+    gDPLoadBlock(tempGfxHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
+    gSP1Triangle(tempGfxHead++, 0, 1, 2, 0);
+    gSP1Triangle(tempGfxHead++, 0, 2, 3, 0);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -293,12 +304,16 @@ void render_hud_power_meter(void) {
 void render_breath_meter_segment(s16 numBreathWedges) {
     Texture *(*breathLUT)[];
     breathLUT = segmented_to_virtual(&breath_meter_segments_lut);
-    gDPPipeSync(gDisplayListHead++);
-    gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, (*breathLUT)[numBreathWedges - 1]);
-    gDPLoadSync(gDisplayListHead++);
-    gDPLoadBlock(gDisplayListHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
-    gSP1Triangle(gDisplayListHead++, 0, 1, 2, 0);
-    gSP1Triangle(gDisplayListHead++, 0, 2, 3, 0);
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPPipeSync(tempGfxHead++);
+    gDPSetTextureImage(tempGfxHead++, G_IM_FMT_RGBA, G_IM_SIZ_16b, 1, (*breathLUT)[numBreathWedges - 1]);
+    gDPLoadSync(tempGfxHead++);
+    gDPLoadBlock(tempGfxHead++, G_TX_LOADTILE, 0, 0, 32 * 32 - 1, CALC_DXT(32, G_IM_SIZ_16b_BYTES));
+    gSP1Triangle(tempGfxHead++, 0, 1, 2, 0);
+    gSP1Triangle(tempGfxHead++, 0, 2, 3, 0);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
diff --git a/src/game/level_update.c b/src/game/level_update.c
index 6674dc49a..fc4938fd8 100644
--- a/src/game/level_update.c
+++ b/src/game/level_update.c
@@ -30,7 +30,6 @@
 #include "rumble_init.h"
 #include "puppycam2.h"
 #include "puppyprint.h"
-#include "puppylights.h"
 #include "level_commands.h"
 
 #include "config.h"
@@ -625,30 +624,16 @@ void initiate_warp(s16 destLevel, s16 destArea, s16 destWarpNode, s32 warpFlags)
     sWarpDest.areaIdx = destArea;
     sWarpDest.nodeId = destWarpNode;
     sWarpDest.arg = warpFlags;
-#if defined(PUPPYCAM) || defined(PUPPYLIGHTS)
-    s32 i = 0;
-#endif
 #ifdef PUPPYCAM
     if (sWarpDest.type == WARP_TYPE_CHANGE_LEVEL)
     {
-        for (i = 0; i < gPuppyVolumeCount; i++)
+        for (s32 i = 0; i < gPuppyVolumeCount; i++)
         {
             mem_pool_free(gPuppyMemoryPool, sPuppyVolumeStack[i]);
         }
         gPuppyVolumeCount = 0;
     }
 #endif
-#ifdef PUPPYLIGHTS
-    if (sWarpDest.type == WARP_TYPE_CHANGE_LEVEL)
-    {
-        for (i = 0; i < gNumLights; i++)
-        {
-            mem_pool_free(gLightsPool, gPuppyLights[i]);
-        }
-        gNumLights = 0;
-        levelAmbient = FALSE;
-    }
-#endif
 }
 
 // From Surface 0xD3 to 0xFC
@@ -987,9 +972,6 @@ void update_hud_values(void) {
 void basic_update(void) {
     area_update_objects();
     update_hud_values();
-#ifdef PUPPYLIGHTS
-    delete_lights();
-#endif
 
     if (gCurrentArea != NULL) {
         update_camera(gCurrentArea->camera);
@@ -1030,9 +1012,6 @@ s32 play_mode_normal(void) {
     area_update_objects();
 #endif
     update_hud_values();
-#ifdef PUPPYLIGHTS
-    delete_lights();
-#endif
     if (gCurrentArea != NULL) {
 #ifdef PUPPYPRINT_DEBUG
 #ifdef BETTER_REVERB
@@ -1313,10 +1292,6 @@ s32 init_level(void) {
         sound_banks_disable(SEQ_PLAYER_SFX, SOUND_BANKS_DISABLED_DURING_INTRO_CUTSCENE);
     }
 
-#ifdef PUPPYLIGHTS
-    puppylights_allocate();
-#endif
-
     append_puppyprint_log("Level loaded in %d" PP_CYCLE_STRING ".", (s32)(PP_CYCLE_CONV(osGetTime() - first)));
     return TRUE;
 }
diff --git a/src/game/mario.c b/src/game/mario.c
index b2603c0e9..af2678113 100644
--- a/src/game/mario.c
+++ b/src/game/mario.c
@@ -1706,7 +1706,8 @@ s32 execute_mario_action(UNUSED struct Object *obj) {
     s32 inLoop = TRUE;
 
     // Updates once per frame:
-    vec3f_get_dist_and_lateral_dist_and_angle(gMarioState->prevPos, gMarioState->pos, &gMarioState->moveSpeed, &gMarioState->lateralSpeed, &gMarioState->movePitch, &gMarioState->moveYaw);
+    vec3f_get_dist_and_angle(gMarioState->prevPos, gMarioState->pos, &gMarioState->moveSpeed, &gMarioState->movePitch, &gMarioState->moveYaw);
+    vec3f_get_lateral_dist(gMarioState->prevPos, gMarioState->pos, &gMarioState->lateralSpeed);
     vec3f_copy(gMarioState->prevPos, gMarioState->pos);
 
     if (gMarioState->action) {
diff --git a/src/game/mario_actions_moving.c b/src/game/mario_actions_moving.c
index 902a18232..5f6fb46e6 100644
--- a/src/game/mario_actions_moving.c
+++ b/src/game/mario_actions_moving.c
@@ -678,35 +678,34 @@ void push_or_sidle_wall(struct MarioState *m, Vec3f startPos) {
     if (m->forwardVel > 6.0f) {
         mario_set_forward_vel(m, 6.0f);
     }
-
-    if (m->forwardVel > 0.0f) {
-        if (m->wall != NULL) {
-            wallAngle = m->wallYaw;
-            dWallAngle = wallAngle - m->faceAngle[1];
-        }
-
-        if (m->wall == NULL || dWallAngle <= -DEGREES(160) || dWallAngle >= DEGREES(160)) {
-            m->flags |= MARIO_PUSHING;
-            set_mario_animation(m, MARIO_ANIM_PUSHING);
-            play_step_sound(m, 6, 18);
-        } else {
-            if (dWallAngle < 0) {
-                set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_RIGHT, animSpeed);
-            } else {
-                set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_LEFT, animSpeed);
-            }
-
-            if (m->marioObj->header.gfx.animInfo.animFrame < 20) {
-                play_sound((SOUND_MOVING_TERRAIN_SLIDE + m->terrainSoundAddend), m->marioObj->header.gfx.cameraToObject);
-                m->particleFlags |= PARTICLE_DUST;
-            }
-
-            m->actionState = ACT_STATE_PUSH_OR_SIDLE_WALL_SIDLING;
-            m->actionArg = wallAngle + 0x8000;
-            m->marioObj->header.gfx.angle[1] = wallAngle + 0x8000;
-            m->marioObj->header.gfx.angle[2] = find_floor_slope(m, 0x4000);
-        }
+    
+    if (m->wall != NULL) {
+        wallAngle = m->wallYaw;
+        dWallAngle = wallAngle - m->faceAngle[1];
     }
+
+    if (m->wall == NULL || dWallAngle <= -DEGREES(160) || dWallAngle >= DEGREES(160)) {
+        m->flags |= MARIO_PUSHING;
+        set_mario_animation(m, MARIO_ANIM_PUSHING);
+        play_step_sound(m, 6, 18);
+    } else {
+        if (dWallAngle < 0) {
+            set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_RIGHT, animSpeed);
+        } else {
+            set_mario_anim_with_accel(m, MARIO_ANIM_SIDESTEP_LEFT, animSpeed);
+        }
+
+        if (m->marioObj->header.gfx.animInfo.animFrame < 20) {
+            play_sound((SOUND_MOVING_TERRAIN_SLIDE + m->terrainSoundAddend), m->marioObj->header.gfx.cameraToObject);
+            m->particleFlags |= PARTICLE_DUST;
+        }
+
+        m->actionState = ACT_STATE_PUSH_OR_SIDLE_WALL_SIDLING;
+        m->actionArg = wallAngle + 0x8000;
+        m->marioObj->header.gfx.angle[1] = wallAngle + 0x8000;
+        m->marioObj->header.gfx.angle[2] = find_floor_slope(m, 0x4000);
+    }
+    
 }
 
 void tilt_body_walking(struct MarioState *m, s16 startYaw) {
@@ -1353,9 +1352,6 @@ void tilt_body_butt_slide(struct MarioState *m) {
 }
 
 void common_slide_action(struct MarioState *m, u32 endAction, u32 airAction, s32 animation) {
-    Vec3f pos;
-
-    vec3f_copy(pos, m->pos);
     play_sound(SOUND_MOVING_TERRAIN_SLIDE + m->terrainSoundAddend, m->marioObj->header.gfx.cameraToObject);
 
 #if ENABLE_RUMBLE
@@ -1405,6 +1401,20 @@ void common_slide_action(struct MarioState *m, u32 endAction, u32 airAction, s32
 
 s32 common_slide_action_with_jump(struct MarioState *m, u32 stopAction, u32 jumpAction, u32 airAction,
                                   s32 animation) {
+#ifdef SLOPE_BUFFER
+    if (m->input & INPUT_A_PRESSED) {
+        m->actionState = 1;
+    } else if (!(m->input & INPUT_A_DOWN)) {
+        m->actionState = 0;
+    }
+    if (m->actionTimer == 5) {
+        if (m->actionState == 1) {
+            return set_jumping_action(m, jumpAction, 0);
+        }
+    } else {
+        m->actionTimer++;
+    }
+#else
     if (m->actionTimer == 5) {
         if (m->input & INPUT_A_PRESSED) {
             return set_jumping_action(m, jumpAction, 0);
@@ -1412,6 +1422,7 @@ s32 common_slide_action_with_jump(struct MarioState *m, u32 stopAction, u32 jump
     } else {
         m->actionTimer++;
     }
+#endif
 
     if (update_sliding(m, 4.0f)) {
         return set_mario_action(m, stopAction, 0);
diff --git a/src/game/mario_actions_submerged.c b/src/game/mario_actions_submerged.c
index 47ebaaac7..6c8a73729 100644
--- a/src/game/mario_actions_submerged.c
+++ b/src/game/mario_actions_submerged.c
@@ -137,7 +137,7 @@ static void apply_water_current(struct MarioState *m, Vec3f step) {
         if (whirlpool != NULL) {
             strength = 0.0f;
 
-            vec3f_to_vec3s_get_dist_and_angle(m->pos, whirlpool->pos, &distance, &pitchToWhirlpool, &yawToWhirlpool);
+            vec3_get_dist_and_angle(m->pos, whirlpool->pos, &distance, &pitchToWhirlpool, &yawToWhirlpool);
 
             yawToWhirlpool -= (s16)(0x2000 * 1000.0f / (distance + 1000.0f));
 
diff --git a/src/game/obj_behaviors.c b/src/game/obj_behaviors.c
index 46c699df0..6fbb7fdda 100644
--- a/src/game/obj_behaviors.c
+++ b/src/game/obj_behaviors.c
@@ -32,7 +32,6 @@
 #include "spawn_object.h"
 #include "spawn_sound.h"
 #include "rumble_init.h"
-#include "puppylights.h"
 
 /**
  * @file obj_behaviors.c
@@ -188,8 +187,6 @@ s8 turn_obj_away_from_steep_floor(struct Surface *objFloor, f32 floorY, f32 objV
 void obj_orient_graph(struct Object *obj, f32 normalX, f32 normalY, f32 normalZ) {
     Vec3f objVisualPosition, surfaceNormals;
 
-    Mat4 *throwMatrix;
-
     // Passes on orienting certain objects that shouldn't be oriented, like boulders.
     if (!sOrientObjWithFloor) {
         return;
@@ -200,17 +197,11 @@ void obj_orient_graph(struct Object *obj, f32 normalX, f32 normalY, f32 normalZ)
         return;
     }
 
-    throwMatrix = alloc_display_list(sizeof(*throwMatrix));
-    // If out of memory, fail to try orienting the object.
-    if (throwMatrix == NULL) {
-        return;
-    }
-
     vec3f_copy_y_off(objVisualPosition, &obj->oPosVec, obj->oGraphYOffset);
     vec3f_set(surfaceNormals, normalX, normalY, normalZ);
 
-    mtxf_align_terrain_normal(*throwMatrix, surfaceNormals, objVisualPosition, obj->oFaceAngleYaw);
-    obj->header.gfx.throwMatrix = throwMatrix;
+    mtxf_align_terrain_normal(obj->transform, surfaceNormals, objVisualPosition, obj->oFaceAngleYaw);
+    obj->header.gfx.throwMatrix = &obj->transform;
 }
 
 /**
@@ -256,10 +247,7 @@ void calc_new_obj_vel_and_pos_y(struct Surface *objFloor, f32 objFloorY, f32 obj
         }
     }
 
-    //! (Obj Position Crash) If you got an object with height past 2^31, the game would crash.
-    if ((s32) o->oPosY >= (s32) objFloorY && (s32) o->oPosY < (s32) objFloorY + 37) {
-        obj_orient_graph(o, floor_nX, floor_nY, floor_nZ);
-
+    if ((o->oPosY >= objFloorY) && (o->oPosY < objFloorY + 37)) {
         // Adds horizontal component of gravity for horizontal speed.
         f32 nxz = sqr(floor_nX) + sqr(floor_nZ);
         f32 vel = ((nxz) / (nxz + sqr(floor_nY))) * o->oGravity * 2;
@@ -313,9 +301,7 @@ void calc_new_obj_vel_and_pos_y_underwater(struct Surface *objFloor, f32 floorY,
         o->oVelY = -o->oVelY;
     }
 
-    if ((s32) o->oPosY >= (s32) floorY && (s32) o->oPosY < (s32) floorY + 37) {
-        obj_orient_graph(o, floor_nX, floor_nY, floor_nZ);
-
+    if ((o->oPosY >= floorY) && (o->oPosY < floorY + 37)) {
         // Adds horizontal component of gravity for horizontal speed.
         f32 nxz = sqr(floor_nX) + sqr(floor_nZ);
         f32 velm = (nxz / (nxz + sqr(floor_nY))) * netYAccel * 2;
@@ -412,6 +398,11 @@ s16 object_step(void) {
     }
 
     obj_update_pos_vel_xz();
+
+    if (sObjFloor && (o->oPosY >= floorY) && (o->oPosY < floorY + 37)) {
+        obj_orient_graph(o, sObjFloor->normal.x, sObjFloor->normal.y, sObjFloor->normal.z);
+    }
+
     if ((s32) o->oPosY == (s32) floorY) {
         collisionFlags += OBJ_COL_FLAG_GROUNDED;
     }
@@ -421,7 +412,7 @@ s16 object_step(void) {
     }
 
     // Generate a splash if in water.
-    obj_splash((s32) waterY, (s32) o->oPosY);
+    obj_splash(waterY, o->oPosY);
     return collisionFlags;
 }
 
diff --git a/src/game/obj_behaviors.h b/src/game/obj_behaviors.h
index d71530894..5e367daa2 100644
--- a/src/game/obj_behaviors.h
+++ b/src/game/obj_behaviors.h
@@ -18,6 +18,7 @@ enum ObjCollisionFlags {
 
 //! Lots of these are duplicates
 void set_yoshi_as_not_dead(void);
+s32 obj_flicker_and_disappear(struct Object *obj, s16 lifeSpan);
 s32 coin_step(s16 *collisionFlagsPtr);
 void moving_coin_flicker(void);
 void coin_collected(void);
diff --git a/src/game/obj_behaviors_2.c b/src/game/obj_behaviors_2.c
index e7626ab16..eacaceaf9 100644
--- a/src/game/obj_behaviors_2.c
+++ b/src/game/obj_behaviors_2.c
@@ -45,7 +45,6 @@
 #include "save_file.h"
 #include "seq_ids.h"
 #include "spawn_sound.h"
-#include "puppylights.h"
 
 //! TODO: remove static
 
@@ -522,24 +521,30 @@ static void obj_set_squished_action(void) {
 }
 
 static s32 obj_die_if_above_lava_and_health_non_positive(void) {
-    if (o->oMoveFlags & OBJ_MOVE_UNDERWATER_ON_GROUND) {
+    if (o->oMoveFlags & OBJ_MOVE_MASK_IN_WATER) {
         if (o->oGravity + o->oBuoyancy > 0.0f
-            || find_water_level(o->oPosX, o->oPosZ) - o->oPosY < 150.0f) {
+            || find_water_level(o->oPosX, o->oPosZ) - o->oPosY < 10.0f) {
             return FALSE;
         }
-    } else if (!(o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA)) {
-        if (o->oMoveFlags & OBJ_MOVE_ENTERED_WATER) {
-            if (o->oWallHitboxRadius < 200.0f) {
-                cur_obj_play_sound_2(SOUND_OBJ_DIVING_INTO_WATER);
-            } else {
-                cur_obj_play_sound_2(SOUND_OBJ_DIVING_IN_WATER);
-            }
+        obj_die_if_health_non_positive();
+        return TRUE;
+
+    } else if (o->oMoveFlags & OBJ_MOVE_ABOVE_LAVA) {
+        if (o->oMoveFlags & (OBJ_MOVE_ON_GROUND | OBJ_MOVE_LANDED)) {
+            obj_die_if_health_non_positive();
+            return TRUE;
         }
-        return FALSE;
     }
 
-    obj_die_if_health_non_positive();
-    return TRUE;
+    if (o->oMoveFlags & OBJ_MOVE_ENTERED_WATER) {
+        if (o->oWallHitboxRadius < 200.0f) {
+            cur_obj_play_sound_2(SOUND_OBJ_DIVING_INTO_WATER);
+        } else {
+            cur_obj_play_sound_2(SOUND_OBJ_DIVING_IN_WATER);
+        }
+    }
+
+    return FALSE;
 }
 
 static s32 obj_handle_attacks(struct ObjectHitbox *hitbox, s32 attackedMarioAction,
diff --git a/src/game/object_helpers.c b/src/game/object_helpers.c
index 2f9673415..82435a4f6 100644
--- a/src/game/object_helpers.c
+++ b/src/game/object_helpers.c
@@ -26,7 +26,6 @@
 #include "rendering_graph_node.h"
 #include "spawn_object.h"
 #include "spawn_sound.h"
-#include "puppylights.h"
 
 static s32 clear_move_flag(u32 *bitSet, s32 flag);
 
@@ -469,16 +468,16 @@ void obj_set_gfx_pos_from_pos(struct Object *obj) {
 }
 
 void obj_init_animation(struct Object *obj, s32 animIndex) {
-    struct Animation **anims = o->oAnimations;
+    struct Animation **anims = obj->oAnimations;
     geo_obj_init_animation(&obj->header.gfx, &anims[animIndex]);
 }
 
 void obj_apply_scale_to_transform(struct Object *obj) {
     Vec3f scale;
     vec3f_copy(scale, obj->header.gfx.scale);
-    vec3_mul_val(obj->transform[0], scale[0]);
-    vec3_mul_val(obj->transform[1], scale[1]);
-    vec3_mul_val(obj->transform[2], scale[2]);
+    vec3_scale(obj->transform[0], scale[0]);
+    vec3_scale(obj->transform[1], scale[1]);
+    vec3_scale(obj->transform[2], scale[2]);
 }
 
 void obj_copy_scale(struct Object *dst, struct Object *src) {
@@ -878,9 +877,6 @@ s32 cur_obj_clear_interact_status_flag(s32 flag) {
  * Mark an object to be unloaded at the end of the frame.
  */
 void obj_mark_for_deletion(struct Object *obj) {
-#ifdef PUPPYLIGHTS
-    obj_disable_light(obj);
-#endif
     //! This clears all activeFlags. Since some of these flags disable behavior,
     //  setting it to 0 could potentially enable unexpected behavior. After an
     //  object is marked for deletion, it still updates on that frame (I think),
@@ -1044,6 +1040,14 @@ static void cur_obj_move_update_ground_air_flags(UNUSED f32 gravity, f32 bouncin
         }
     }
 
+    o->oMoveFlags &= ~(OBJ_MOVE_ABOVE_LAVA | OBJ_MOVE_ABOVE_DEATH_BARRIER);
+    if (o->oFloorType == SURFACE_BURNING) {
+        o->oMoveFlags |= OBJ_MOVE_ABOVE_LAVA;
+    } else if ((o->oFloorType == SURFACE_DEATH_PLANE) || (o->oFloorType == SURFACE_VERTICAL_WIND)) {
+        //! This maybe misses SURFACE_WARP
+        o->oMoveFlags |= OBJ_MOVE_ABOVE_DEATH_BARRIER;
+    }
+
     o->oMoveFlags &= ~OBJ_MOVE_MASK_IN_WATER;
 }
 
@@ -1372,13 +1376,6 @@ static void cur_obj_update_floor(void) {
 
     if (floor != NULL) {
         SurfaceType floorType = floor->type;
-        if (floorType == SURFACE_BURNING) {
-            o->oMoveFlags |= OBJ_MOVE_ABOVE_LAVA;
-        } else if ((floorType == SURFACE_DEATH_PLANE) || (floorType == SURFACE_VERTICAL_WIND)) {
-            //! This maybe misses SURFACE_WARP
-            o->oMoveFlags |= OBJ_MOVE_ABOVE_DEATH_BARRIER;
-        }
-
         o->oFloorType = floorType;
         o->oFloorRoom = floor->room;
     } else {
@@ -1388,8 +1385,6 @@ static void cur_obj_update_floor(void) {
 }
 
 static void cur_obj_update_floor_and_resolve_wall_collisions(s16 steepSlopeDegrees) {
-    o->oMoveFlags &= ~(OBJ_MOVE_ABOVE_LAVA | OBJ_MOVE_ABOVE_DEATH_BARRIER);
-
     if (o->activeFlags & (ACTIVE_FLAG_FAR_AWAY | ACTIVE_FLAG_IN_DIFFERENT_ROOM)) {
         cur_obj_update_floor();
         o->oMoveFlags &= ~(OBJ_MOVE_HIT_WALL | OBJ_MOVE_MASK_IN_WATER);
diff --git a/src/game/object_list_processor.c b/src/game/object_list_processor.c
index abb281834..5dd018e24 100644
--- a/src/game/object_list_processor.c
+++ b/src/game/object_list_processor.c
@@ -20,7 +20,6 @@
 #include "platform_displacement.h"
 #include "spawn_object.h"
 #include "puppyprint.h"
-#include "puppylights.h"
 #include "profiling.h"
 
 
@@ -382,10 +381,6 @@ s32 unload_deactivated_objects_in_list(struct ObjectNode *objList) {
         obj = obj->next;
 
         if ((gCurrentObject->activeFlags & ACTIVE_FLAG_ACTIVE) != ACTIVE_FLAG_ACTIVE) {
-#ifdef PUPPYLIGHTS
-            if (gCurrentObject->oLightID != 0xFFFF)
-                obj_disable_light(gCurrentObject);
-#endif
             // Prevent object from respawning after exiting and re-entering the
             // area
             if (!(gCurrentObject->oFlags & OBJ_FLAG_PERSISTENT_RESPAWN)) {
diff --git a/src/game/puppycam2.c b/src/game/puppycam2.c
index c6f9669ea..384d8af0c 100644
--- a/src/game/puppycam2.c
+++ b/src/game/puppycam2.c
@@ -223,7 +223,6 @@ s32 puppycam_move_spline(struct sPuppySpline splinePos[], struct sPuppySpline sp
     f32 tempProgress[2] = {0.0f, 0.0f};
     f32 progChange = 0.0f;
     s32 i;
-    Vec3f prevPos;
 
     if (gPuppyCam.splineIndex == 65000) {
         gPuppyCam.splineIndex = index;
@@ -236,7 +235,6 @@ s32 puppycam_move_spline(struct sPuppySpline splinePos[], struct sPuppySpline sp
             return TRUE;
         }
     }
-    vec3f_set(prevPos, gPuppyCam.pos[0], gPuppyCam.pos[1], gPuppyCam.pos[2]);
 
     for (i = 0; i < 4; i++) {
         vec3f_set(tempPoints[i], splinePos[gPuppyCam.splineIndex + i].pos[0], splinePos[gPuppyCam.splineIndex + i].pos[1], splinePos[gPuppyCam.splineIndex + i].pos[2]);
@@ -288,19 +286,23 @@ static void puppycam_process_cutscene(void) {
 #define BLANK 0, 0, 0, ENVIRONMENT, 0, 0, 0, ENVIRONMENT
 
 static void puppycam_display_box(s32 x1, s32 y1, s32 x2, s32 y2, u8 r, u8 g, u8 b, u8 a) {
-    gDPSetCombineMode(gDisplayListHead++, BLANK, BLANK);
-    gDPSetCycleType(  gDisplayListHead++, G_CYC_1CYCLE);
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPSetCombineMode(tempGfxHead++, BLANK, BLANK);
+    gDPSetCycleType(  tempGfxHead++, G_CYC_1CYCLE);
     if (a !=255) {
-        gDPSetRenderMode(gDisplayListHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2);
+        gDPSetRenderMode(tempGfxHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2);
     } else {
-        gDPSetRenderMode(gDisplayListHead++, G_RM_OPA_SURF, G_RM_OPA_SURF);
+        gDPSetRenderMode(tempGfxHead++, G_RM_OPA_SURF, G_RM_OPA_SURF);
     }
-    gDPSetEnvColor(   gDisplayListHead++, r, g, b, a);
-    gDPFillRectangle( gDisplayListHead++, x1, y1, x2, y2);
-    gDPPipeSync(      gDisplayListHead++);
-    gDPSetEnvColor(   gDisplayListHead++, 255, 255, 255, 255);
-    gDPSetCycleType(  gDisplayListHead++, G_CYC_1CYCLE);
-    gSPDisplayList(   gDisplayListHead++,dl_hud_img_end);
+    gDPSetEnvColor(   tempGfxHead++, r, g, b, a);
+    gDPFillRectangle( tempGfxHead++, x1, y1, x2, y2);
+    gDPPipeSync(      tempGfxHead++);
+    gDPSetEnvColor(   tempGfxHead++, 255, 255, 255, 255);
+    gDPSetCycleType(  tempGfxHead++, G_CYC_1CYCLE);
+    gSPDisplayList(   tempGfxHead++,dl_hud_img_end);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 //I actually took the time to redo this, properly. Lmao. Please don't bully me over this anymore :(
@@ -680,7 +682,6 @@ static void puppycam_input_hold_preset2(f32 ivX) {
 
 // Another alternative control scheme. This one aims to mimic the parallel camera scheme down to the last bit from the original game.
 static void puppycam_input_hold_preset3(f32 ivX) {
-    f32 stickMag[2] = {gPlayer1Controller->rawStickX*0.65f, gPlayer1Controller->rawStickY*0.2f};
     // Just in case it happens to be nonzero.
     gPuppyCam.yawAcceleration = 0;
 
@@ -1374,7 +1375,7 @@ static void puppycam_collision(void) {
     vec3f_normalize(dirToCam);
     // Get the vector from mario's head to the camera plus the extra check dist
     Vec3f vecToCam;
-    vec3_prod_val(vecToCam, dirToCam, colCheckDist);
+    vec3_scale_dest(vecToCam, dirToCam, colCheckDist);
 
     dist[0] = find_surface_on_ray(target[0], vecToCam, &surf[0], hitpos[0], RAYCAST_FIND_FLOOR | RAYCAST_FIND_CEIL | RAYCAST_FIND_WALL);
     dist[1] = find_surface_on_ray(target[1], vecToCam, &surf[1], hitpos[1], RAYCAST_FIND_FLOOR | RAYCAST_FIND_CEIL | RAYCAST_FIND_WALL);
@@ -1391,7 +1392,7 @@ static void puppycam_collision(void) {
             closestDist -= surfOffset;
             // Allow the camera to ride right up next to the wall (mario's wall radius is 50u so this is safe)
             closestDist = MAX(closestDist, 50);
-            vec3_mul_val(dirToCam, closestDist);
+            vec3_scale(dirToCam, closestDist);
             vec3_sum(gPuppyCam.pos, target[0], dirToCam);
 
             // If the camera is uncomfortably close to the wall, move it up a bit
diff --git a/src/game/puppylights.c b/src/game/puppylights.c
deleted file mode 100644
index ad01a4e20..000000000
--- a/src/game/puppylights.c
+++ /dev/null
@@ -1,382 +0,0 @@
-///Puppylights 2.0 by Fazana. What happened to 1.0? Tragic accident.
-/**
-Intended for use with manipulating existing Lights1 structs for objects in real time.
-Can support static lights that are loaded with the level, or lights created by objects.
-
-Puppylights is generally intended to be used with things that don't directly use lights to colour
-themselves. Inside the main function, you can pass through a colour to override the default light
-but it will not be affected by environmental tinting. If you wish for an object to emit a light,
-simply set the object flag OBJ_FLAG_EMIT_LIGHT and set some values to o->puppylight.
-
-For easy light modification, you can call set_light_properties, so set all the attributes of any
-given loaded puppylight struct. Objects will ignore x, y, z, active and room, as it will set all
-of these automatically. It will force the PUPPYLIGHT_DYNAMIC flag, too.
-
-If you're introducing a static light in the level script with PUPPYLIGHT_NODE, ensure it's contained
-inside the respective area node it's going to be inside, otherwise it will not show up. If you do not
-use rooms in your level, or if you wish for this light to be seen from any room, use -1 for that param.
-
-If you have visual debug enabled, light nodes will show up as magenta in the world. They will be
-shaped and rotated correctly, for accurate representation of their properties.
-**/
-
-#include <ultra64.h>
-#include "types.h"
-#include "puppylights.h"
-#include "area.h"
-#include "engine/math_util.h"
-#include "string.h"
-#include "object_fields.h"
-#include "object_constants.h"
-#include "camera.h"
-#include "memory.h"
-#include "print.h"
-#include "debug_box.h"
-#include "object_list_processor.h"
-#include "level_update.h"
-#include "engine/surface_collision.h"
-#include "surface_terrains.h"
-
-#ifdef PUPPYLIGHTS
-
-Lights1 gLevelLight; // Existing ambient light in the area. Will be set by the level script, though can always be changed afterwards if desired.
-u8 levelAmbient = FALSE;
-Lights1 *sLightBase; // The base value where lights are written to when worked with.
-Lights1 sDefaultLights = gdSPDefLights1(0x7F, 0x7F, 0x7F, 0xFE, 0xFE, 0xFE, 0x28, 0x28, 0x28); // Default lights default lights
-u16 gNumLights = 0; // How many lights are loaded.
-u16 gDynLightStart = 0; // Where the dynamic lights will start.
-struct PuppyLight *gPuppyLights[MAX_LIGHTS]; // This contains all the loaded data.
-struct MemoryPool *gLightsPool; // The memory pool where the above is stored.
-
-// Runs after an area load, allocates the dynamic light slots.
-void puppylights_allocate(void) {
-    s32 numAllocate = MIN(MAX_LIGHTS - gNumLights, MAX_LIGHTS_DYNAMIC);
-    s32 i;
-
-    gDynLightStart = gNumLights;
-
-    if (numAllocate <= 0) { // If this happens you've allocated too many static lights and therefore cucked dynamic.
-        return;
-    }
-    // Now it has the number it wants, it will allocate this many extra lights, intended for dynamic lights.
-    for (i = 0; i < numAllocate; i++) {
-        gPuppyLights[gNumLights] = mem_pool_alloc(gLightsPool, sizeof(struct PuppyLight));
-        if (gPuppyLights[gNumLights] == NULL) {
-            return;
-        }
-        gPuppyLights[gNumLights]->active = FALSE;
-        gPuppyLights[gNumLights]->flags = 0;
-        gNumLights++;
-    }
-}
-
-extern Mat4 gMatStack[32];
-
-// Function that iterates through each light.
-void puppylights_iterate(struct PuppyLight *light, Lights1 *src, struct Object *obj, s32 flags) {
-    Lights1 *tempLight;
-    s32 lightPos[2];
-    Vec3i lightRelative;
-    Vec3i lightDir = {0, 0, 0};
-    s32 i;
-    s32 colour;
-    s32 ambient;
-    f64 scaleOrig;
-    f32 scale;
-    f32 scale2;
-    f64 scaleVal = 1.0f;
-    Vec3f debugPos[2];
-
-    // Relative positions of the object vs. the centre of the node.
-    lightRelative[0] = light->pos[0][0] - obj->oPosX;
-    lightRelative[1] = light->pos[0][1] - obj->oPosY;
-    lightRelative[2] = light->pos[0][2] - obj->oPosZ;
-
-    // If the nodes X and Z values are equal, then a check is made if the angle is a derivative of 90.
-    // If so, then it will completely skip over the calculation that figures out position from rotation.
-    // If it's a cylinder, then it ignores that check, simply because an equal sided cylinder will have the
-    // same result no matter the yaw. If neither is true, then it simply checks if it's 180 degrees, since
-    // That will just be the same as 0.
-    if (light->pos[1][0] == light->pos[1][2]) {
-        if (light->yaw % 0x4000 == 0 || light->flags & PUPPYLIGHT_SHAPE_CYLINDER) {
-            lightPos[0] = lightRelative[0];
-            lightPos[1] = lightRelative[2];
-            goto skippingTrig;
-        }
-    } else if (light->yaw % 0x8000 == 0) {
-        lightPos[0] = lightRelative[0];
-        lightPos[1] = lightRelative[2];
-        goto skippingTrig;
-    }
-
-    // Get the position based off the rotation of the box.
-    lightPos[0] = lightRelative[2] * sins(-light->yaw) + lightRelative[0] * coss(-light->yaw);
-    lightPos[1] = lightRelative[2] * coss(-light->yaw) - lightRelative[0] * sins(-light->yaw);
-    skippingTrig:
-
-#ifdef VISUAL_DEBUG
-    vec3f_set(debugPos[0], light->pos[0][0], light->pos[0][1], light->pos[0][2]);
-    vec3f_set(debugPos[1], light->pos[1][0], light->pos[1][1], light->pos[1][2]);
-    debug_box_color(0xFF00FF08);
-    if (light->flags & PUPPYLIGHT_SHAPE_CYLINDER) {
-        debug_box_rot(debugPos[0], debugPos[1], light->yaw, DEBUG_SHAPE_CYLINDER | DEBUG_UCODE_DEFAULT);
-    } else {
-        debug_box_rot(debugPos[0], debugPos[1], light->yaw, DEBUG_SHAPE_BOX | DEBUG_UCODE_DEFAULT);
-    }
-#endif
-    // Check if the object is inside the box, after correcting it for rotation.
-    if (-light->pos[1][0] < lightPos[0] && lightPos[0] < light->pos[1][0] &&
-        -light->pos[1][1] < lightRelative[1] && lightRelative[1] < light->pos[1][1] &&
-        -light->pos[1][2] < lightPos[1] && lightPos[1] < light->pos[1][2]) {
-        // If so, then start making preparations to see how alongside they're in.
-        // This takes the largest side of the box and multiplies the other axis to match the numbers.
-        // This way, the colour value will scale correctly, no matter which side is entered.
-        // Because positions are a vector, and Y is up, it means tempID needs to be multiplied
-        // By 2 in order to reach the X and Z axis. Thanks SM64.
-        // It will skip scaling the opposite axis if there's no need to.
-
-        // Every axis needs to be the same as Z, so X and Y, if necessary, will be scaled to match it.
-        // This is done, so that when calculating scale, it's done spherically.
-        if (light->pos[1][0] != light->pos[1][2]) {
-            lightPos[0] /= ((f32)light->pos[1][0] / light->pos[1][2]);
-        }
-        // Same for Y axis.
-        if (light->pos[1][1] != light->pos[1][2]) {
-            lightRelative[1] /= ((f32)light->pos[1][1] / light->pos[1][2]);
-        }
-        if (light->flags & PUPPYLIGHT_IGNORE_Y) {
-            scaleOrig = sqr(lightPos[0]) + sqr(lightPos[1]);
-        } else {
-            scaleOrig = sqr(lightPos[0]) + sqr(lightRelative[1]) + sqr(lightPos[1]);
-        }
-        scaleVal = (light->pos[1][2]*light->pos[1][2]);
-        // If it's a cylinder, then bin anything outside it.
-        if (light->flags & PUPPYLIGHT_SHAPE_CYLINDER) {
-            if (scaleOrig > scaleVal) {
-                return;
-            }
-        }
-    }
-    else
-        return;
-
-    f32 epc = (f32)(light->epicentre/100.0f);
-    tempLight = segmented_to_virtual(src);
-    //Now we have a scale value and a scale factor, we can start lighting things up.
-    // Convert to a percentage.
-    scale = CLAMP(scaleOrig/scaleVal, 0.0f, 1.0f);
-    // Reduce scale2 by the epicentre.
-    scale2 = CLAMP((scale - epc) * (1 + epc), 0.0f, 1.0f);
-
-    // Get the direction numbers we want by applying some maths to the relative positions. We use 64 because light directions range from -64 to 63.
-    // Note: can this be optimised further? Simply squaring lightRelative and then dividing it by preScale doesn't work.
-    if (light->flags & PUPPYLIGHT_DIRECTIONAL) {
-        lightDir[0] = ((lightRelative[0]) * 64.0f) / light->pos[1][0];
-        lightDir[1] = ((lightRelative[1]) * 64.0f) / light->pos[1][1];
-        lightDir[2] = ((lightRelative[2]) * 64.0f) / light->pos[1][2];
-    }
-    //Get direction if applicable.
-    for (i = 0; i < 3; i++) {
-        //So it works by starting from the final colour, and then lerping to the original colour, by a factor of the epicentre corrected scale. Light opacity affects this further.
-        colour = approach_f32_asymptotic(light->rgba[i], tempLight->l[0].l.col[i], scale2 * ((f32)light->rgba[3]/255.0f));
-        // If it's a directional light, then increase the current ambient by 50%, to give the effect better.
-        // Otherwise, just normalise the brightness to keep it in line with the current ambient.
-        // And now to apply the values.
-        tempLight->l[0].l.col[i] = colour;
-        tempLight->l[0].l.colc[i] = colour;
-        // Ambient, too.
-        if (!(light->flags & PUPPYLIGHT_DIRECTIONAL)) {
-            ambient = approach_f32_asymptotic(light->rgba[i]/2, tempLight->a.l.col[i], scale*((f32)light->rgba[3] / 255.0f));
-            tempLight->a.l.col[i] = ambient;
-            tempLight->a.l.colc[i] = ambient;
-        }
-        // A slightly hacky way to offset the ambient lighting in order to prevent directional lighting from having a noticeable change in ambient brightness.
-        if (flags & LIGHTFLAG_DIRECTIONAL_OFFSET) {
-            ambient = approach_f32_asymptotic(MIN(tempLight->a.l.col[i] * 2, 0xFF), tempLight->a.l.col[i], scale2*((f32)light->rgba[3] / 255.0f));
-            tempLight->a.l.col[i] = ambient;
-            tempLight->a.l.colc[i] = ambient;
-        }
-        // Apply direction. It takes the relative positions, and then multiplies them with the perspective matrix to get a correct direction.
-        // Index 1 of the first dimension of gMatStack is perspective. Note that if you ever decide to cheat your way into rendering things after the game does :^)
-        if (light->flags & PUPPYLIGHT_DIRECTIONAL) {
-            tempLight->l->l.dir[i] = approach_f32_asymptotic((s8)(lightDir[0] * gMatStack[1][0][i] + lightDir[1] * gMatStack[1][1][i] + lightDir[2] * gMatStack[1][2][i]), tempLight->l->l.dir[i], scale);
-        }
-    }
-}
-
-// Main function. Run this in the object you wish to illuminate, and just give it its light, object pointer and any potential flags if you want to use them.
-// If the object has multiple lights, then you run this for each light.
-void puppylights_run(Lights1 *src, struct Object *obj, s32 flags, u32 baseColour) {
-    s32 i;
-    s32 numlights = 0;
-    s32 offsetPlaced = 0;
-    s32 lightFlags = flags;
-
-    if (gCurrLevelNum < LEVEL_BBH) {
-        return;
-    }
-    // Checks if there's a hardset colour. Colours are only the first 3 bytes, so you can really put whatever you want in the last.
-    // If there isn't a colour, then it decides whether to apply the ambient lighting, or the default lighting as the baseline.
-    // Otherwise, it hardsets a colour to begin with. I don't recommend you use this, simply because it's intended to be used
-    // As a hacky quick-fix for models coloured by lights. Lightcoloured models don't blend nearly as nicely as ones coloured
-    // By other means.
-    if (baseColour < 0x100) {
-        sLightBase = (levelAmbient ? &gLevelLight : &sDefaultLights);
-    } else {
-        s32 colour;
-        sLightBase = (levelAmbient) ? &gLevelLight : &sDefaultLights;
-        for (i = 0; i < 3; i++) {
-            colour = (((baseColour >> (24-(i*8)))) & 0xFF);
-            sLightBase->l[0].l.col[i] = colour;
-            sLightBase->l[0].l.colc[i] = colour;
-            sLightBase->a.l.col[i] = colour/2;
-            sLightBase->a.l.colc[i] = colour/2;
-            sLightBase->l->l.dir[i] = 0x28;
-        }
-    }
-    memcpy(segmented_to_virtual(src), &sLightBase[0], sizeof(Lights1));
-
-    for (i = 0; i < gNumLights; i++) {
-        if (gPuppyLights[i]->rgba[3] > 0 && gPuppyLights[i]->active == TRUE && gPuppyLights[i]->area == gCurrAreaIndex && (gPuppyLights[i]->room == -1 || gPuppyLights[i]->room == gMarioCurrentRoom)) {
-            if (gPuppyLights[i]->flags & PUPPYLIGHT_DIRECTIONAL && !offsetPlaced) {
-                lightFlags |= LIGHTFLAG_DIRECTIONAL_OFFSET;
-                offsetPlaced = 1;
-            } else {
-                lightFlags &= ~LIGHTFLAG_DIRECTIONAL_OFFSET;
-            }
-            puppylights_iterate(gPuppyLights[i], src, obj, lightFlags);
-            numlights++;
-        }
-    }
-}
-
-// Sets and updates dynamic lights from objects.
-// 0xFFFF is essentially the null ID. If the display flag is met, it will find and set an ID, otherwise it frees up the spot.
-void puppylights_object_emit(struct Object *obj) {
-    s32 i;
-    if (gCurrLevelNum < LEVEL_BBH) {
-        return;
-    }
-    if (obj->oFlags & OBJ_FLAG_EMIT_LIGHT) {
-        f64 dist = ((obj->oPosX - gMarioState->pos[0]) * (obj->oPosX - gMarioState->pos[0])) +
-               ((obj->oPosY - gMarioState->pos[1]) * (obj->oPosY - gMarioState->pos[1])) +
-               ((obj->oPosZ - gMarioState->pos[2]) * (obj->oPosZ - gMarioState->pos[2]));
-        f64 lightSize = ((obj->puppylight.pos[1][0]) * (obj->puppylight.pos[1][0])) +
-                        ((obj->puppylight.pos[1][1]) * (obj->puppylight.pos[1][1])) +
-                        ((obj->puppylight.pos[1][2]) * (obj->puppylight.pos[1][2]));
-        if (dist > lightSize) {
-            goto deallocate; // That's right. I used a goto. Eat your heart out xkcd.
-        }
-        if (obj->oLightID == 0xFFFF) {
-            s32 fadingExists = FALSE;
-            if (ABS(gNumLights - gDynLightStart) < MAX_LIGHTS_DYNAMIC) {
-                goto deallocate;
-            }
-            for (i = gDynLightStart; i < MIN(gDynLightStart+MAX_LIGHTS_DYNAMIC, MAX_LIGHTS); i++) {
-                if (gPuppyLights[i]->active == TRUE) {
-                    if (gPuppyLights[i]->flags & PUPPYLIGHT_DELETE) {
-                        fadingExists = TRUE;
-                    }
-                    continue;
-                }
-                memcpy(gPuppyLights[i], &obj->puppylight, sizeof(struct PuppyLight));
-                gPuppyLights[i]->active = TRUE;
-                gPuppyLights[i]->area = gCurrAreaIndex;
-                gPuppyLights[i]->room = obj->oRoom;
-                obj->oLightID = i;
-                goto updatepos;
-            }
-            // Go through all the lights again, now this time, ignore the fading light flag and overwrite them.
-            if (fadingExists) {
-                for (i = gDynLightStart; i < MIN(gDynLightStart+MAX_LIGHTS_DYNAMIC, MAX_LIGHTS); i++) {
-                    if (gPuppyLights[i]->active == TRUE && !(gPuppyLights[i]->flags & PUPPYLIGHT_DELETE)) {
-                        continue;
-                    }
-                    memcpy(gPuppyLights[i], &obj->puppylight, sizeof(struct PuppyLight));
-                    gPuppyLights[i]->active = TRUE;
-                    gPuppyLights[i]->area = gCurrAreaIndex;
-                    gPuppyLights[i]->room = obj->oRoom;
-                    gPuppyLights[i]->flags &= ~PUPPYLIGHT_DELETE;
-                    obj->oLightID = i;
-                    goto updatepos;
-                }
-            }
-        } else {
-            updatepos:
-            gPuppyLights[obj->oLightID]->pos[0][0] = obj->oPosX;
-            gPuppyLights[obj->oLightID]->pos[0][1] = obj->oPosY;
-            gPuppyLights[obj->oLightID]->pos[0][2] = obj->oPosZ;
-        }
-    } else {
-        deallocate:
-        if (obj->oLightID != 0xFFFF) {
-            gPuppyLights[obj->oLightID]->active = FALSE;
-            gPuppyLights[obj->oLightID]->flags = 0;
-        }
-        obj->oLightID = 0xFFFF;
-    }
-}
-
-// A bit unorthodox, but anything to avoid having to set up data to pass through in the original function.
-// Objects will completely ignore X, Y, Z and active though.
-void set_light_properties(struct PuppyLight *light, s32 x, s32 y, s32 z, s32 offsetX, s32 offsetY, s32 offsetZ, s32 yaw, s32 epicentre, s32 colour, s32 flags, s32 room, s32 active) {
-    light->active = active;
-    light->pos[0][0] = x;
-    light->pos[0][1] = y;
-    light->pos[0][2] = z;
-    light->pos[1][0] = MAX(offsetX, 10);
-    light->pos[1][1] = MAX(offsetY, 10);
-    light->pos[1][2] = MAX(offsetZ, 10);
-    light->rgba[0] = (colour >> 24) & 0xFF;
-    light->rgba[1] = (colour >> 16) & 0xFF;
-    light->rgba[2] = (colour >>  8) & 0xFF;
-    light->rgba[3] = colour & 0xFF;
-    light->yaw = yaw;
-    light->area = gCurrAreaIndex;
-    light->room = room;
-    light->epicentre = epicentre;
-    if (!(flags & PUPPYLIGHT_SHAPE_CYLINDER) && flags & PUPPYLIGHT_SHAPE_CUBE)
-        light->flags |= PUPPYLIGHT_SHAPE_CYLINDER;
-    light->flags |= flags | PUPPYLIGHT_DYNAMIC;
-}
-
-// You can run these in objects to enable or disable their light properties.
-void cur_obj_enable_light(void) {
-    gCurrentObject->oFlags |= OBJ_FLAG_EMIT_LIGHT;
-}
-
-void cur_obj_disable_light(void) {
-    gCurrentObject->oFlags &= ~OBJ_FLAG_EMIT_LIGHT;
-    if (gPuppyLights[gCurrentObject->oLightID] && gCurrentObject->oLightID != 0xFFFF)
-        gPuppyLights[gCurrentObject->oLightID]->flags |= PUPPYLIGHT_DELETE;
-}
-
-void obj_enable_light(struct Object *obj) {
-    obj->oFlags |= OBJ_FLAG_EMIT_LIGHT;
-}
-
-void obj_disable_light(struct Object *obj) {
-    obj->oFlags &= ~OBJ_FLAG_EMIT_LIGHT;
-    if (gPuppyLights[obj->oLightID] && obj->oLightID != 0xFFFF) {
-        gPuppyLights[obj->oLightID]->flags |= PUPPYLIGHT_DELETE;
-    }
-}
-
-// This is ran during a standard area update
-void delete_lights(void) {
-    s32 i;
-
-    for (i = 0; i < gNumLights; i++) {
-        if (gPuppyLights[i]->active == TRUE && gPuppyLights[i]->flags & PUPPYLIGHT_DELETE) {
-            gPuppyLights[i]->pos[1][0] = approach_f32_asymptotic(gPuppyLights[i]->pos[1][0], 0, 0.15f);
-            gPuppyLights[i]->pos[1][1] = approach_f32_asymptotic(gPuppyLights[i]->pos[1][1], 0, 0.15f);
-            gPuppyLights[i]->pos[1][2] = approach_f32_asymptotic(gPuppyLights[i]->pos[1][2], 0, 0.15f);
-            if (gPuppyLights[i]->pos[1][0] < 1.0f && gPuppyLights[i]->pos[1][1] < 1.0f && gPuppyLights[i]->pos[1][2] < 1.0f) {
-                gPuppyLights[i]->flags &= ~ PUPPYLIGHT_DELETE;
-                gPuppyLights[i]->active = FALSE;
-            }
-        }
-    }
-}
-
-#endif
diff --git a/src/game/puppylights.h b/src/game/puppylights.h
deleted file mode 100644
index bd0bcbcf9..000000000
--- a/src/game/puppylights.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifdef PUPPYLIGHTS
-#ifndef PUPPYLIGHTS_H
-#define PUPPYLIGHTS_H
-
-#include "types.h"
-#include "command_macros_base.h"
-
-// The maximum number of lights that can be loaded at once. Any further lights that attempt to be created past this will simply not spawn.
-#define MAX_LIGHTS 32
-// The maximum number of dynamic lights available at one time.
-#define MAX_LIGHTS_DYNAMIC 8
-
-// Two shapes. Choose your destiny.
-#define PUPPYLIGHT_SHAPE_CUBE     (1 << 0) // 0x01
-#define PUPPYLIGHT_SHAPE_CYLINDER (1 << 1) // 0x02
-#define PUPPYLIGHT_DYNAMIC        (1 << 2) // 0x04
-#define PUPPYLIGHT_DIRECTIONAL    (1 << 3) // 0x08
-#define PUPPYLIGHT_SHADOW         (1 << 4) // 0x10
-#define PUPPYLIGHT_WET            (1 << 5) // 0x20
-#define PUPPYLIGHT_DELETE         (1 << 6) // 0x40
-#define PUPPYLIGHT_IGNORE_Y       (1 << 7) // 0x80
-
-#define LIGHTFLAG_DIRECTIONAL_OFFSET    0x1
-
-#define PUPPYLIGHT_ENVIRONMENT(ambientR, ambientG, ambientB, diffuseR, diffuseG, diffuseB, diffuseX, diffuseY, diffuseZ) \
-    CMD_BBBB(0x3F, 0x0C, ambientR, ambientG), \
-    CMD_BBBB(ambientB, diffuseR, diffuseG, diffuseB), \
-    CMD_BBBB(diffuseX, diffuseY, diffuseZ, 0x0)
-
-#define PUPPYLIGHT_NODE(r, g, b, a, x, y, z, offsetX, offsetY, offsetZ, yaw, epicentre, flags, room) \
-    CMD_BBBB(0x40, 0x18, r, g), \
-    CMD_BBH(b, a, x), \
-    CMD_HH(y, z), \
-    CMD_HH(offsetX, offsetY), \
-    CMD_HH(offsetZ, yaw), \
-    CMD_BBH(epicentre, flags, room)
-
-//How much RAM is allocated to puppylights
-#define PUPPYLIGHTS_POOL sizeof(struct PuppyLight) * MAX_LIGHTS
-
-extern Lights1 gLevelLight;
-extern u16 gNumLights;
-extern u8 levelAmbient;
-extern struct PuppyLight *gPuppyLights[MAX_LIGHTS];
-extern struct MemoryPool *gLightsPool;
-extern void puppylights_run(Lights1 *src, struct Object *obj, s32 flags, u32 baseColour);
-extern void puppylights_object_emit(struct Object *obj);
-extern void cur_obj_enable_light(void);
-extern void cur_obj_disable_light(void);
-extern void obj_enable_light(struct Object *obj);
-extern void obj_disable_light(struct Object *obj);
-extern void set_light_properties(struct PuppyLight *light, s32 x, s32 y, s32 z, s32 offsetX, s32 offsetY, s32 offsetZ, s32 yaw, s32 epicentre, s32 colour, s32 flags, s32 room, s32 active);
-extern void puppylights_allocate(void);
-extern void delete_lights(void);
-
-#endif
-#endif
diff --git a/src/game/puppyprint.c b/src/game/puppyprint.c
index 9ef3b863f..6afdc709a 100644
--- a/src/game/puppyprint.c
+++ b/src/game/puppyprint.c
@@ -522,12 +522,16 @@ void puppyprint_render_minimal(void) {
 }
 
 void render_coverage_map(void) {
-    gDPSetCycleType(gDisplayListHead++, G_CYC_1CYCLE);
-    gDPSetBlendColor(gDisplayListHead++, 0xFF, 0xFF, 0xFF, 0xFF);
-    gDPSetPrimDepth(gDisplayListHead++, 0xFFFF, 0xFFFF);
-    gDPSetDepthSource(gDisplayListHead++, G_ZS_PRIM);
-    gDPSetRenderMode(gDisplayListHead++, G_RM_VISCVG, G_RM_VISCVG2);
-    gDPFillRectangle(gDisplayListHead++, 0,0, SCREEN_WIDTH-1, SCREEN_HEIGHT-1);
+    Gfx *tempGfxHead = gDisplayListHead;
+
+    gDPSetCycleType(tempGfxHead++, G_CYC_1CYCLE);
+    gDPSetBlendColor(tempGfxHead++, 0xFF, 0xFF, 0xFF, 0xFF);
+    gDPSetPrimDepth(tempGfxHead++, 0xFFFF, 0xFFFF);
+    gDPSetDepthSource(tempGfxHead++, G_ZS_PRIM);
+    gDPSetRenderMode(tempGfxHead++, G_RM_VISCVG, G_RM_VISCVG2);
+    gDPFillRectangle(tempGfxHead++, 0,0, SCREEN_WIDTH-1, SCREEN_HEIGHT-1);
+
+    gDisplayListHead = tempGfxHead;
 }
 
 void puppycamera_debug_view(void) {
@@ -1639,7 +1643,7 @@ void print_small_text(s32 x, s32 y, const char *str, s32 align, s32 amount, u8 f
         }
 
         get_char_from_byte(&textX, &textPos[0], str[i], &widthX, &spaceX, &offsetY, font);
-        s32 goddamnJMeasure = textX == 256 ? -1 : 0; // Hack to fix a rendering bug.
+        s32 goddamnJMeasure = textX == 256 ? 1 : 0; // Hack to fix a rendering bug.
         if (str[i] != ' ' && str[i] != '\t') {
             if (xlu != prevxlu) {
                 prevxlu = xlu;
@@ -1732,7 +1736,7 @@ void print_small_text_light(s32 x, s32 y, const char *str, s32 align, s32 amount
         }
 
         get_char_from_byte(&textX, &textPos[0], str[i], &widthX, &spaceX, &offsetY, font);
-        s32 goddamnJMeasure = textX == 256 ? -1 : 0; // Hack to fix a rendering bug.
+        s32 goddamnJMeasure = textX == 256 ? 1 : 0; // Hack to fix a rendering bug.
         if (str[i] != ' ' && str[i] != '\t') {
             if (xlu != prevxlu) {
                 prevxlu = xlu;
@@ -2088,7 +2092,7 @@ void render_multi_image(Texture *image, s32 x, s32 y, s32 width, s32 height, UNU
 
         gDPLoadSync(gDisplayListHead++);
         gDPLoadTextureTile(gDisplayListHead++,
-            image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), ((posH + imH) - 1), 0, (G_TX_NOMIRROR | G_TX_CLAMP), (G_TX_NOMIRROR | G_TX_CLAMP), maskW, maskH, 0, 0);
+            image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), ((posH + imH) - 1), 0, (G_TX_NOMIRROR | G_TX_WRAP), (G_TX_NOMIRROR | G_TX_WRAP), maskW, maskH, 0, 0);
         gSPScisTextureRectangle(gDisplayListHead++,
             ((x + posW) << 2),
             ((y + posH) << 2),
@@ -2104,7 +2108,7 @@ void render_multi_image(Texture *image, s32 x, s32 y, s32 width, s32 height, UNU
             posW = i * imW;
             gDPLoadSync(gDisplayListHead++);
             gDPLoadTextureTile(gDisplayListHead++,
-                image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), (height - 1), 0, (G_TX_NOMIRROR | G_TX_CLAMP), (G_TX_NOMIRROR | G_TX_CLAMP), maskW, maskH, 0, 0);
+                image, G_IM_FMT_RGBA, G_IM_SIZ_16b, width, height, posW, posH, ((posW + imW) - 1), (height - 1), 0, (G_TX_NOMIRROR | G_TX_WRAP), (G_TX_NOMIRROR | G_TX_WRAP), maskW, maskH, 0, 0);
             gSPScisTextureRectangle(gDisplayListHead++,
                 (x + posW) << 2,
                 (y + posH) << 2,
diff --git a/src/game/rendering_graph_node.c b/src/game/rendering_graph_node.c
index 7ea44b152..cb0d62cbb 100644
--- a/src/game/rendering_graph_node.c
+++ b/src/game/rendering_graph_node.c
@@ -195,76 +195,10 @@ static const Gfx dl_silhouette_end[] = {
 struct RenderPhase {
     u8 startLayer;
     u8 endLayer;
-#ifdef OBJECTS_REJ
-    u8 ucode;
-#endif
 };
 
 static struct RenderPhase sRenderPhases[] = {
-#ifdef OBJECTS_REJ
- #if SILHOUETTE
-    // Silhouette, .rej
-    [RENDER_PHASE_ZEX_BEFORE_SILHOUETTE]   = {
-        .startLayer = LAYER_FIRST,
-        .endLayer   = LAYER_LAST_BEFORE_SILHOUETTE,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_ZB]                  = {
-        .startLayer = LAYER_ZB_FIRST,
-        .endLayer   = LAYER_LAST_BEFORE_SILHOUETTE,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_REJ_SILHOUETTE]          = {
-        .startLayer = LAYER_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_SILHOUETTE_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_REJ_NON_SILHOUETTE]      = {
-        .startLayer = LAYER_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_SILHOUETTE_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_REJ_OCCLUDE_SILHOUETTE]  = {
-        .startLayer = LAYER_OCCLUDE_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_OCCLUDE_SILHOUETTE_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_ZEX_AFTER_SILHOUETTE]    = {
-        .startLayer = LAYER_OCCLUDE_SILHOUETTE_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_NON_ZB]              = {
-        .startLayer = LAYER_NON_ZB_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
- #else
-    // No silhouette, .rej
-    [RENDER_PHASE_ZEX_BG]                  = {
-        .startLayer = LAYER_FIRST,
-        .endLayer   = LAYER_FIRST,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_ZB]                  = {
-        .startLayer = LAYER_ZB_FIRST,
-        .endLayer   = LAYER_ZB_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
-    [RENDER_PHASE_ZEX_ALL]                 = {
-        .startLayer = LAYER_ZB_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_DEFAULT
-    },
-    [RENDER_PHASE_REJ_NON_ZB]              = {
-        .startLayer = LAYER_NON_ZB_FIRST,
-        .endLayer   = LAYER_LAST,
-        .ucode      = GRAPH_NODE_UCODE_REJ
-    },
- #endif
-#else
- #if SILHOUETTE
-    // Silhouette, no .rej
+#if SILHOUETTE
     [RENDER_PHASE_ZEX_BEFORE_SILHOUETTE]   = {
         .startLayer = LAYER_FIRST,
         .endLayer   = LAYER_LAST_BEFORE_SILHOUETTE,
@@ -289,46 +223,16 @@ static struct RenderPhase sRenderPhases[] = {
         .startLayer = LAYER_NON_ZB_FIRST,
         .endLayer   = LAYER_LAST,
     },
-
- #else
-    // No silhouette, no .rej
+#else
     [RENDER_PHASE_ZEX_ALL]                 = {
         .startLayer = LAYER_FIRST,
         .endLayer   = LAYER_LAST,
     },
-
- #endif
 #endif
 };
 
 extern const Gfx init_rsp[];
 
-#ifdef OBJECTS_REJ
-void switch_ucode(s32 ucode) {
-    // Set the ucode and RCP settings
-    switch (ucode) {
-        default: // GRAPH_NODE_UCODE_DEFAULT
-        case GRAPH_NODE_UCODE_DEFAULT:
-            gSPLoadUcodeL(gDisplayListHead++, gspF3DZEX2_NoN_PosLight_fifo); // F3DZEX2_PosLight
-            // Reload the necessary RSP settings
-            gSPDisplayList(gDisplayListHead++, init_rsp);
-            break;
-        case GRAPH_NODE_UCODE_REJ:
-            // Use .rej Microcode, skip sub-pixel processing on console
-            if (gEmulator & EMU_CONSOLE) {
-                gSPLoadUcodeL(gDisplayListHead++, gspF3DLX2_Rej_fifo); // F3DLX2_Rej
-            } else {
-                gSPLoadUcodeL(gDisplayListHead++, gspF3DEX2_Rej_fifo); // F3DEX2_Rej
-            }
-            // Reload the necessary RSP settings
-            gSPDisplayList(gDisplayListHead++, init_rsp);
-            // Set the clip ratio (see init_rsp)
-            gSPClipRatio(gDisplayListHead++, FRUSTRATIO_2);
-            break;
-    }
-}
-#endif
-
 #define UPPER_FIXED(x) ((int)((unsigned int)((x) * 0x10000) >> 16))
 #define LOWER_FIXED(x) ((int)((unsigned int)((x) * 0x10000) & 0xFFFF))
 
@@ -357,67 +261,65 @@ void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
     s32 currLayer     = LAYER_FIRST;
     s32 startLayer    = LAYER_FIRST;
     s32 endLayer      = LAYER_LAST;
-    s32 ucode         = GRAPH_NODE_UCODE_DEFAULT;
     s32 phaseIndex    = RENDER_PHASE_FIRST;
     s32 enableZBuffer = (node->node.flags & GRAPH_RENDER_Z_BUFFER) != 0;
+    s32 finalPhase    = enableZBuffer ? RENDER_PHASE_END : 1;
     struct RenderModeContainer *mode1List = &renderModeTable_1Cycle[enableZBuffer];
     struct RenderModeContainer *mode2List = &renderModeTable_2Cycle[enableZBuffer];
+    Gfx *tempGfxHead = gDisplayListHead;
 
     // Loop through the render phases
-    for (phaseIndex = RENDER_PHASE_FIRST; phaseIndex < RENDER_PHASE_END; phaseIndex++) {
-        // Get the render phase information.
-        renderPhase = &sRenderPhases[phaseIndex];
-        startLayer  = renderPhase->startLayer;
-        endLayer    = renderPhase->endLayer;
-#ifdef OBJECTS_REJ
-        ucode       = renderPhase->ucode;
-        // Set the ucode for the current render phase
-        switch_ucode(ucode);
-        gSPLookAt(gDisplayListHead++, gCurLookAt);
-#endif
+    for (phaseIndex = RENDER_PHASE_FIRST; phaseIndex < finalPhase; phaseIndex++) {
         if (enableZBuffer) {
+            // Get the render phase information.
+            renderPhase = &sRenderPhases[phaseIndex];
+            startLayer  = renderPhase->startLayer;
+            endLayer    = renderPhase->endLayer;
             // Enable z buffer.
-            gDPPipeSync(gDisplayListHead++);
-            gSPSetGeometryMode(gDisplayListHead++, G_ZBUFFER);
+            gDPPipeSync(tempGfxHead++);
+            gSPSetGeometryMode(tempGfxHead++, G_ZBUFFER);
+        } else {
+            startLayer = LAYER_FORCE;
+            endLayer = LAYER_TRANSPARENT;
         }
         // Iterate through the layers on the current render phase.
         for (currLayer = startLayer; currLayer <= endLayer; currLayer++) {
             // Set 'currList' to the first DisplayListNode on the current layer.
-            currList = node->listHeads[ucode][currLayer];
+            currList = node->listHeads[currLayer];
 #if defined(DISABLE_AA) || !SILHOUETTE
             // Set the render mode for the current layer.
-            gDPSetRenderMode(gDisplayListHead++, mode1List->modes[currLayer],
+            gDPSetRenderMode(tempGfxHead++, mode1List->modes[currLayer],
                                                  mode2List->modes[currLayer]);
 #else
             if (phaseIndex == RENDER_PHASE_NON_SILHOUETTE) {
                 // To properly cover the silhouette, disable AA.
                 // The silhouette model does not have AA due to the hack used to prevent triangle overlap.
-                gDPSetRenderMode(gDisplayListHead++, (mode1List->modes[currLayer] & ~IM_RD),
+                gDPSetRenderMode(tempGfxHead++, (mode1List->modes[currLayer] & ~IM_RD),
                                                      (mode2List->modes[currLayer] & ~IM_RD));
             } else {
                 // Set the render mode for the current dl.
-                gDPSetRenderMode(gDisplayListHead++, mode1List->modes[currLayer],
+                gDPSetRenderMode(tempGfxHead++, mode1List->modes[currLayer],
                                                      mode2List->modes[currLayer]);
             }
 #endif
             // Iterate through all the displaylists on the current layer.
             while (currList != NULL) {
                 // Add the display list's transformation to the master list.
-                gSPMatrix(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(currList->transform),
+                gSPMatrix(tempGfxHead++, VIRTUAL_TO_PHYSICAL(currList->transform),
                           (G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH));
 #if SILHOUETTE
                 if (phaseIndex == RENDER_PHASE_SILHOUETTE) {
                     // Add the current display list to the master list, with silhouette F3D.
-                    gSPDisplayList(gDisplayListHead++, dl_silhouette_begin);
-                    gSPDisplayList(gDisplayListHead++, currList->displayList);
-                    gSPDisplayList(gDisplayListHead++, dl_silhouette_end);
+                    gSPDisplayList(tempGfxHead++, dl_silhouette_begin);
+                    gSPDisplayList(tempGfxHead++, currList->displayList);
+                    gSPDisplayList(tempGfxHead++, dl_silhouette_end);
                 } else {
                     // Add the current display list to the master list.
-                    gSPDisplayList(gDisplayListHead++, currList->displayList);
+                    gSPDisplayList(tempGfxHead++, currList->displayList);
                 }
 #else
                 // Add the current display list to the master list.
-                gSPDisplayList(gDisplayListHead++, currList->displayList);
+                gSPDisplayList(tempGfxHead++, currList->displayList);
 #endif
                 // Move to the next DisplayListNode.
                 currList = currList->next;
@@ -427,21 +329,17 @@ void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
 
     if (enableZBuffer) {
         // Disable z buffer.
-        gDPPipeSync(gDisplayListHead++);
-        gSPClearGeometryMode(gDisplayListHead++, G_ZBUFFER);
-    }
-#ifdef OBJECTS_REJ
- #if defined(F3DEX_GBI_2) && defined(VISUAL_DEBUG)
-    if (hitboxView) render_debug_boxes(DEBUG_UCODE_REJ);
- #endif
-    switch_ucode(GRAPH_NODE_UCODE_DEFAULT);
-#endif
+        gDPPipeSync(tempGfxHead++);
+        gSPClearGeometryMode(tempGfxHead++, G_ZBUFFER);
 #ifdef VISUAL_DEBUG
-    if ( hitboxView) render_debug_boxes(DEBUG_UCODE_DEFAULT | DEBUG_BOX_CLEAR);
-    // Load the world scale identity matrix
-    gSPMatrix(gDisplayListHead++, &identityMatrixWorldScale, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
-    if (surfaceView) visual_surface_loop();
+        // Load the world scale identity matrix
+        gSPMatrix(tempGfxHead++, &identityMatrixWorldScale, G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
+        if (surfaceView) visual_surface_loop(&tempGfxHead);
+        render_debug_boxes(&tempGfxHead);
 #endif
+    }
+
+    gDisplayListHead = tempGfxHead;
 }
 
 /**
@@ -450,16 +348,11 @@ void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
  * render modes of layers.
  */
 void geo_append_display_list(void *displayList, s32 layer) {
-    s32 ucode = GRAPH_NODE_UCODE_DEFAULT;
 #ifdef F3DEX_GBI_2
     gSPLookAt(gDisplayListHead++, gCurLookAt);
 #endif
-#if defined(OBJECTS_REJ) || SILHOUETTE
+#if SILHOUETTE
     if (gCurGraphNodeObject != NULL) {
- #ifdef OBJECTS_REJ
-        ucode = gCurGraphNodeObject->ucode;
- #endif
- #if SILHOUETTE
         if (gCurGraphNodeObject->node.flags & GRAPH_RENDER_SILHOUETTE) {
             switch (layer) {
                 case LAYER_OPAQUE: layer = LAYER_SILHOUETTE_OPAQUE; break;
@@ -472,7 +365,6 @@ void geo_append_display_list(void *displayList, s32 layer) {
                 case LAYER_ALPHA:  layer = LAYER_OCCLUDE_SILHOUETTE_ALPHA;  break;
             }
         }
- #endif // SILHOUETTE
     }
 #endif // F3DEX_GBI_2 || SILHOUETTE
     if (gCurGraphNodeMasterList != NULL) {
@@ -482,12 +374,12 @@ void geo_append_display_list(void *displayList, s32 layer) {
         listNode->transform = gMatStackFixed[gMatStackIndex];
         listNode->displayList = displayList;
         listNode->next = NULL;
-        if (gCurGraphNodeMasterList->listHeads[ucode][layer] == NULL) {
-            gCurGraphNodeMasterList->listHeads[ucode][layer] = listNode;
+        if (gCurGraphNodeMasterList->listHeads[layer] == NULL) {
+            gCurGraphNodeMasterList->listHeads[layer] = listNode;
         } else {
-            gCurGraphNodeMasterList->listTails[ucode][layer]->next = listNode;
+            gCurGraphNodeMasterList->listTails[layer]->next = listNode;
         }
-        gCurGraphNodeMasterList->listTails[ucode][layer] = listNode;
+        gCurGraphNodeMasterList->listTails[layer] = listNode;
     }
 }
 
@@ -512,14 +404,12 @@ static void append_dl_and_return(struct GraphNodeDisplayList *node) {
  * Process the master list node.
  */
 void geo_process_master_list(struct GraphNodeMasterList *node) {
-    s32 ucode, layer;
+    s32 layer;
 
     if (gCurGraphNodeMasterList == NULL && node->node.children != NULL) {
         gCurGraphNodeMasterList = node;
-        for (ucode = 0; ucode < GRAPH_NODE_NUM_UCODES; ucode++) {
-            for (layer = LAYER_FIRST; layer < LAYER_COUNT; layer++) {
-                node->listHeads[ucode][layer] = NULL;
-            }
+        for (layer = LAYER_FIRST; layer < LAYER_COUNT; layer++) {
+            node->listHeads[layer] = NULL;
         }
         geo_process_node_and_siblings(node->node.children);
         geo_process_master_list_sub(gCurGraphNodeMasterList);
@@ -1128,16 +1018,16 @@ void visualise_object_hitbox(struct Object *node) {
             debug_box_color(COLOR_RGBA32_DEBUG_HITBOX);
         }
 
-        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER | DEBUG_UCODE_REJ));
+        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER));
         vec3f_set(bnds1, node->oPosX, (node->oPosY - node->hitboxDownOffset), node->oPosZ);
         vec3f_set(bnds2, node->hurtboxRadius, node->hurtboxHeight, node->hurtboxRadius);
         debug_box_color(COLOR_RGBA32_DEBUG_HURTBOX);
-        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER | DEBUG_UCODE_REJ));
+        debug_box(bnds1, bnds2, (DEBUG_SHAPE_CYLINDER));
     } else {
         vec3f_set(bnds1, node->oPosX, (node->oPosY - 15), node->oPosZ);
         vec3f_set(bnds2, 30, 30, 30);
         debug_box_color(COLOR_RGBA32_DEBUG_POSITION);
-        debug_box(bnds1, bnds2, (DEBUG_SHAPE_BOX | DEBUG_UCODE_REJ));
+        debug_box(bnds1, bnds2, (DEBUG_SHAPE_BOX));
     }
 }
 #endif
@@ -1149,6 +1039,8 @@ void geo_process_object(struct Object *node) {
     if (node->header.gfx.areaIndex == gCurGraphNodeRoot->areaIndex) {
         s32 isInvisible = (node->header.gfx.node.flags & GRAPH_RENDER_INVISIBLE);
         s32 noThrowMatrix = (node->header.gfx.throwMatrix == NULL);
+        // Maintain throw matrix pointer if the game is paused as it won't be updated.
+        Mat4 *oldThrowMatrix = (sCurrPlayMode == PLAY_MODE_PAUSED) ? node->header.gfx.throwMatrix : NULL;
 
         // If the throw matrix is null and the object is invisible, there is no need
         // to update billboarding, scale, rotation, etc. 
@@ -1197,7 +1089,7 @@ void geo_process_object(struct Object *node) {
 
         gMatStackIndex--;
         gCurrAnimType = ANIM_TYPE_NONE;
-        node->header.gfx.throwMatrix = NULL;
+        node->header.gfx.throwMatrix = oldThrowMatrix;
     }
 }
 
@@ -1233,7 +1125,7 @@ void geo_process_held_object(struct GraphNodeHeldObject *node) {
         node->fnNode.func(GEO_CONTEXT_RENDER, &node->fnNode.node, gMatStack[gMatStackIndex]);
     }
     if (node->objNode != NULL && node->objNode->header.gfx.sharedChild != NULL) {
-        vec3_prod_val(translation, node->translation, 0.25f);
+        vec3_scale_dest(translation, node->translation, 0.25f);
 
         mtxf_translate(mat, translation);
         mtxf_copy(gMatStack[gMatStackIndex + 1], *gCurGraphNodeObject->throwMatrix);
diff --git a/src/game/rendering_graph_node.h b/src/game/rendering_graph_node.h
index f681163a1..ee72feea4 100644
--- a/src/game/rendering_graph_node.h
+++ b/src/game/rendering_graph_node.h
@@ -15,6 +15,9 @@ extern struct GraphNodeHeldObject  *gCurGraphNodeHeldObject;
 extern u16 gAreaUpdateCounter;
 extern Vec3f globalLightDirection;
 
+#define GRAPH_ROOT_PERSP 0
+#define GRAPH_ROOT_ORTHO 1
+
 enum AnimType {
     // after processing an object, the type is reset to this
     ANIM_TYPE_NONE,
@@ -41,34 +44,7 @@ struct RenderModeContainer {
     u32 modes[LAYER_COUNT];
 };
 
-#ifdef OBJECTS_REJ
- #if SILHOUETTE
-    // Silhouette, .rej
-    enum RenderPhases {
-        RENDER_PHASE_ZEX_BEFORE_SILHOUETTE,
-        RENDER_PHASE_REJ_ZB,
-        RENDER_PHASE_REJ_SILHOUETTE,
-        RENDER_PHASE_REJ_NON_SILHOUETTE,
-        RENDER_PHASE_REJ_OCCLUDE_SILHOUETTE,
-        RENDER_PHASE_ZEX_AFTER_SILHOUETTE,
-        RENDER_PHASE_REJ_NON_ZB,
-        RENDER_PHASE_END,
-    };
-    #define RENDER_PHASE_SILHOUETTE RENDER_PHASE_REJ_SILHOUETTE
-    #define RENDER_PHASE_NON_SILHOUETTE RENDER_PHASE_REJ_NON_SILHOUETTE
- #else
-    // No silhouette, .rej
-    enum RenderPhases {
-        RENDER_PHASE_ZEX_BG,
-        RENDER_PHASE_REJ_ZB,
-        RENDER_PHASE_ZEX_ALL,
-        RENDER_PHASE_REJ_NON_ZB,
-        RENDER_PHASE_END,
-    };
- #endif
-#else
- #if SILHOUETTE
-    // Silhouette, no .rej
+#if SILHOUETTE
     enum RenderPhases {
         RENDER_PHASE_ZEX_BEFORE_SILHOUETTE,
         RENDER_PHASE_ZEX_SILHOUETTE,
@@ -79,13 +55,11 @@ struct RenderModeContainer {
     };
     #define RENDER_PHASE_SILHOUETTE RENDER_PHASE_ZEX_SILHOUETTE
     #define RENDER_PHASE_NON_SILHOUETTE RENDER_PHASE_ZEX_NON_SILHOUETTE
- #else
-    // No silhouette, no .rej
+#else
     enum RenderPhases {
         RENDER_PHASE_ZEX_ALL,
         RENDER_PHASE_END,
     };
- #endif
 #endif
 
 #if SILHOUETTE
diff --git a/src/game/screen_transition.c b/src/game/screen_transition.c
index 5e17ac835..4b82cb371 100644
--- a/src/game/screen_transition.c
+++ b/src/game/screen_transition.c
@@ -139,43 +139,47 @@ s32 render_textured_transition(u8 transTime, struct WarpTransitionData *transDat
     Vtx *verts = alloc_display_list(8 * sizeof(Vtx));
 
     if (verts != NULL) {
+        Gfx *tempGfxHead = gDisplayListHead;
+
         make_tex_transition_vertices(verts, centerTransX, centerTransY, texTransRadius, transTexType);
-        gSPDisplayList(gDisplayListHead++, dl_proj_mtx_fullscreen);
+        gSPDisplayList(tempGfxHead++, dl_proj_mtx_fullscreen);
 
         u8 r = transData->red;
         u8 g = transData->green;
         u8 b = transData->blue;
-        gDPSetPrimColor(gDisplayListHead++, 0, 0, r, g, b, 255);
+        gDPSetPrimColor(tempGfxHead++, 0, 0, r, g, b, 255);
 
-        gDPSetCombineMode(gDisplayListHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
-        gDPSetRenderMode(gDisplayListHead++, G_RM_AA_OPA_SURF, G_RM_AA_OPA_SURF2);
+        gDPSetCombineMode(tempGfxHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
+        gDPSetRenderMode(tempGfxHead++, G_RM_AA_OPA_SURF, G_RM_AA_OPA_SURF2);
 
-        gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts), 8, 0);
-        gSPDisplayList(gDisplayListHead++, dl_transition_draw_filled_region);
-        gDPPipeSync(gDisplayListHead++);
+        gSPVertex(tempGfxHead++, VIRTUAL_TO_PHYSICAL(verts), 8, 0);
+        gSPDisplayList(tempGfxHead++, dl_transition_draw_filled_region);
+        gDPPipeSync(tempGfxHead++);
 
-        gDPSetCombineLERP(gDisplayListHead++, 0, 0, 0, PRIMITIVE, 0, 0, 0, TEXEL0, 
+        gDPSetCombineLERP(tempGfxHead++, 0, 0, 0, PRIMITIVE, 0, 0, 0, TEXEL0, 
                                               0, 0, 0, PRIMITIVE, 0, 0, 0, TEXEL0);
 
-        gDPSetRenderMode(gDisplayListHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
-        gDPSetTextureFilter(gDisplayListHead++, G_TF_BILERP);
+        gDPSetRenderMode(tempGfxHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
+        gDPSetTextureFilter(tempGfxHead++, G_TF_BILERP);
 
         switch (transTexType) {
             case TRANS_TYPE_MIRROR:
-                gDPLoadTextureBlock(gDisplayListHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 32, 64, 0,
+                gDPLoadTextureBlock(tempGfxHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 32, 64, 0,
                     G_TX_WRAP | G_TX_MIRROR, G_TX_WRAP | G_TX_MIRROR, 5, 6, G_TX_NOLOD, G_TX_NOLOD);
                 break;
             case TRANS_TYPE_CLAMP:
-                gDPLoadTextureBlock(gDisplayListHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 64, 64, 0,
+                gDPLoadTextureBlock(tempGfxHead++, sTextureTransitionID[texID], G_IM_FMT_IA, G_IM_SIZ_8b, 64, 64, 0,
                     G_TX_CLAMP, G_TX_CLAMP, 6, 6, G_TX_NOLOD, G_TX_NOLOD);
                 break;
         }
-        gSPTexture(gDisplayListHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_ON);
-        gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
-        gSPDisplayList(gDisplayListHead++, dl_draw_quad_verts_0123);
-        gSPTexture(gDisplayListHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_OFF);
-        gSPDisplayList(gDisplayListHead++, dl_screen_transition_end);
+        gSPTexture(tempGfxHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_ON);
+        gSPVertex(tempGfxHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
+        gSPDisplayList(tempGfxHead++, dl_draw_quad_verts_0123);
+        gSPTexture(tempGfxHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_OFF);
+        gSPDisplayList(tempGfxHead++, dl_screen_transition_end);
         sTransitionTextureAngle += transData->angleSpeed;
+
+        gDisplayListHead = tempGfxHead;
     }
     return set_and_reset_transition_fade_timer(transTime);
 }
@@ -196,17 +200,21 @@ s32 dl_transition_color(u8 transTime, struct WarpTransitionData *transData, u8 a
     Vtx *verts = vertex_transition_color();
 
     if (verts != NULL) {
+        Gfx *tempGfxHead = gDisplayListHead;
+
         u8 r = transData->red;
         u8 g = transData->green;
         u8 b = transData->blue;
-        gDPSetPrimColor(gDisplayListHead++, 0, 0, r, g, b, alpha);
+        gDPSetPrimColor(tempGfxHead++, 0, 0, r, g, b, alpha);
 
-        gSPDisplayList(gDisplayListHead++, dl_proj_mtx_fullscreen);
-        gDPSetCombineMode(gDisplayListHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
-        gDPSetRenderMode(gDisplayListHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
-        gSPVertex(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
-        gSPDisplayList(gDisplayListHead++, dl_draw_quad_verts_0123);
-        gSPDisplayList(gDisplayListHead++, dl_screen_transition_end);
+        gSPDisplayList(tempGfxHead++, dl_proj_mtx_fullscreen);
+        gDPSetCombineMode(tempGfxHead++, G_CC_PRIMITIVE, G_CC_PRIMITIVE);
+        gDPSetRenderMode(tempGfxHead++, G_RM_AA_XLU_SURF, G_RM_AA_XLU_SURF2);
+        gSPVertex(tempGfxHead++, VIRTUAL_TO_PHYSICAL(verts), 4, 0);
+        gSPDisplayList(tempGfxHead++, dl_draw_quad_verts_0123);
+        gSPDisplayList(tempGfxHead++, dl_screen_transition_end);
+
+        gDisplayListHead = tempGfxHead;
     }
     return set_and_reset_transition_fade_timer(transTime);
 }
diff --git a/src/game/skybox.c b/src/game/skybox.c
index 490d32576..7485d3832 100644
--- a/src/game/skybox.c
+++ b/src/game/skybox.c
@@ -61,7 +61,7 @@ struct Skybox {
 
 struct Skybox sSkyBoxInfo[2];
 
-typedef const Texture *const SkyboxTexture[80 * sqr(SKYBOX_SIZE)];
+typedef const Texture *const SkyboxTexture[80 * SKYBOX_SIZE * SKYBOX_SIZE];
 
 extern SkyboxTexture bbh_skybox_ptrlist;
 extern SkyboxTexture bidw_skybox_ptrlist;
@@ -304,7 +304,8 @@ Gfx *create_skybox_facing_camera(s8 player, s8 background, f32 fov, Vec3f pos, V
     //! the first frame, which causes a floating point divide by 0
     fov = 90.0f;
     s16 yaw;
-    vec3f_get_angle(pos, focus, &sSkyBoxInfo[player].pitch, &yaw);
+    f32 dist;
+    vec3f_get_dist_and_angle(pos, focus, &dist, &sSkyBoxInfo[player].pitch, &yaw);
     sSkyBoxInfo[player].yaw = yaw;
     sSkyBoxInfo[player].scaledX = calculate_skybox_scaled_x(player, fov);
     sSkyBoxInfo[player].scaledY = calculate_skybox_scaled_y(player, fov);
diff --git a/src/game/spawn_object.c b/src/game/spawn_object.c
index adb191bbc..c0ec32332 100644
--- a/src/game/spawn_object.c
+++ b/src/game/spawn_object.c
@@ -12,7 +12,6 @@
 #include "object_list_processor.h"
 #include "spawn_object.h"
 #include "types.h"
-#include "puppylights.h"
 
 /**
  * Attempt to allocate an object from freeList (singly linked) and append it
@@ -188,14 +187,8 @@ struct Object *allocate_object(struct ObjectNode *objList) {
     obj->oRoom = -1;
 
     obj->header.gfx.node.flags &= ~GRAPH_RENDER_INVISIBLE;
-#ifdef OBJECTS_REJ
-    obj->header.gfx.ucode = GRAPH_NODE_UCODE_REJ;
-#endif
     vec3_same(obj->header.gfx.pos, -10000.0f);
     obj->header.gfx.throwMatrix = NULL;
-#ifdef PUPPYLIGHTS
-    obj->oLightID = 0xFFFF;
-#endif
 
     return obj;
 }