diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0b3b0f98..9c0cd8be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+0.5.2
+=====
+- AudioFlinger: optimize WAV volume scaling
+
 0.5.1
 =====
 - Fri3d Camp 2024 Board: add startup light and sound
diff --git a/internal_filesystem/lib/mpos/audio/stream_wav.py b/internal_filesystem/lib/mpos/audio/stream_wav.py
index e08261b5..84723800 100644
--- a/internal_filesystem/lib/mpos/audio/stream_wav.py
+++ b/internal_filesystem/lib/mpos/audio/stream_wav.py
@@ -3,6 +3,7 @@
 # Ported from MusicPlayer's AudioPlayer class
 
 import machine
+import micropython
 import os
 import time
 import sys
@@ -10,7 +11,6 @@ import sys
 # Volume scaling function - Viper-optimized for ESP32 performance
 # NOTE: The line below is automatically commented out by build_mpos.sh during
 # Unix/macOS builds (cross-compiler doesn't support Viper), then uncommented after build.
-import micropython
 @micropython.viper
 def _scale_audio(buf: ptr8, num_bytes: int, scale_fixed: int):
     """Fast volume scaling for 16-bit audio samples using Viper (ESP32 native code emitter)."""
@@ -28,99 +28,46 @@ def _scale_audio(buf: ptr8, num_bytes: int, scale_fixed: int):
         buf[i] = sample & 255
         buf[i + 1] = (sample >> 8) & 255
 
-import micropython
 @micropython.viper
 def _scale_audio_optimized(buf: ptr8, num_bytes: int, scale_fixed: int):
-    """
-    Very fast 16-bit volume scaling using only shifts + adds.
-    - 100 % and above → no change
-    - < ~12.5 %        → pure right-shift (fastest)
-    - otherwise        → high-quality shift/add approximation
-    """
-    if scale_fixed >= 32768:                    # 100 % or more
+    if scale_fixed >= 32768:
         return
-    if scale_fixed <= 0:                        # muted
+    if scale_fixed <= 0:
         for i in range(num_bytes):
             buf[i] = 0
         return
 
-    # --------------------------------------------------------------
-    # Very low volumes → simple right-shift (super cheap)
-    # --------------------------------------------------------------
-    if scale_fixed < 4096:                      # < ~12.5 %
-        shift: int = 0
-        tmp: int = 32768
-        while tmp > scale_fixed:
-            shift += 1
-            tmp >>= 1
-        for i in range(0, num_bytes, 2):
-            lo: int = int(buf[i])
-            hi: int = int(buf[i + 1])
-            s: int = (hi << 8) | lo
-            if hi & 128:                       # sign extend
-                s -= 65536
-            s >>= shift
-            buf[i]     = s & 255
-            buf[i + 1] = (s >> 8) & 255
-        return
+    mask: int = scale_fixed
 
-    # --------------------------------------------------------------
-    # Medium → high volumes:  sample * scale_fixed // 32768
-    # approximated with shifts + adds only
-    # --------------------------------------------------------------
-    # Build a 16-bit mask:
-    #   bit 0  → add (s >> 15)
-    #   bit 1  → add (s >> 14)
-    #   ...
-    #   bit 15 → add s          (>> 0)
-    mask: int = 0
-    bit_value: int = 16384                     # starts at 2^-1
-    remaining: int = scale_fixed
-
-    shift_idx: int = 1                         # corresponds to >>1
-    while bit_value > 0:
-        if remaining >= bit_value:
-            mask |= (1 << (16 - shift_idx))    # correct bit position
-            remaining -= bit_value
-        bit_value >>= 1
-        shift_idx += 1
-
-    # Apply the mask
     for i in range(0, num_bytes, 2):
-        lo: int = int(buf[i])
-        hi: int = int(buf[i + 1])
-        s: int = (hi << 8) | lo
-        if hi & 128:
-            s -= 65536
+        s: int = int(buf[i]) | (int(buf[i+1]) << 8)
+        if s >= 0x8000:
+            s -= 0x10000
 
-        result: int = 0
-        if mask & 0x8000: result += s               # >>0
-        if mask & 0x4000: result += (s >> 1)
-        if mask & 0x2000: result += (s >> 2)
-        if mask & 0x1000: result += (s >> 3)
-        if mask & 0x0800: result += (s >> 4)
-        if mask & 0x0400: result += (s >> 5)
-        if mask & 0x0200: result += (s >> 6)
-        if mask & 0x0100: result += (s >> 7)
-        if mask & 0x0080: result += (s >> 8)
-        if mask & 0x0040: result += (s >> 9)
-        if mask & 0x0020: result += (s >>10)
-        if mask & 0x0010: result += (s >>11)
-        if mask & 0x0008: result += (s >>12)
-        if mask & 0x0004: result += (s >>13)
-        if mask & 0x0002: result += (s >>14)
-        if mask & 0x0001: result += (s >>15)
+        r: int = 0
+        if mask & 0x8000: r += s
+        if mask & 0x4000: r += s>>1
+        if mask & 0x2000: r += s>>2
+        if mask & 0x1000: r += s>>3
+        if mask & 0x0800: r += s>>4
+        if mask & 0x0400: r += s>>5
+        if mask & 0x0200: r += s>>6
+        if mask & 0x0100: r += s>>7
+        if mask & 0x0080: r += s>>8
+        if mask & 0x0040: r += s>>9
+        if mask & 0x0020: r += s>>10
+        if mask & 0x0010: r += s>>11
+        if mask & 0x0008: r += s>>12
+        if mask & 0x0004: r += s>>13
+        if mask & 0x0002: r += s>>14
+        if mask & 0x0001: r += s>>15
 
-        # Clamp to 16-bit signed range
-        if result > 32767:
-            result = 32767
-        elif result < -32768:
-            result = -32768
+        if r > 32767:  r = 32767
+        if r < -32768: r = -32768
 
-        buf[i]     = result & 255
-        buf[i + 1] = (result >> 8) & 255
+        buf[i]   = r & 0xFF
+        buf[i+1] = (r >> 8) & 0xFF
 
-import micropython
 @micropython.viper
 def _scale_audio_rough(buf: ptr8, num_bytes: int, scale_fixed: int):
     """Rough volume scaling for 16-bit audio samples using right shifts for performance."""
@@ -375,9 +322,12 @@ class WAVStream:
 
                 # smaller chunk size means less jerks but buffer can run empty
                 # at 22050 Hz, 16-bit, 2-ch, 4096/4 = 1024 samples / 22050 = 46ms
+                # with rough volume scaling:
                 # 4096 => audio stutters during quasibird
                 # 8192 => no audio stutters and quasibird runs at ~16 fps => good compromise!
                 # 16384 => no audio stutters during quasibird but low framerate (~8fps)
+                # with optimized volume scaling:
+                # 8192 => no audio stutters and quasibird runs at ~12fps
                 chunk_size = 4096*2
                 bytes_per_original_sample = (bits_per_sample // 8) * channels
                 total_original = 0
diff --git a/scripts/install.sh b/scripts/install.sh
index 7dd15113..984121af 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -47,6 +47,8 @@ fi
 # The issue is that this brings all the .git folders with it:
 #$mpremote fs cp -r apps :/
 
+$mpremote fs cp -r lib :/
+
 $mpremote fs mkdir :/apps
 $mpremote fs cp -r apps/com.micropythonos.* :/apps/
 find apps/ -maxdepth 1 -type l | while read symlink; do
@@ -59,7 +61,6 @@ done
 #echo "Unmounting builtin/ so that it can be customized..." # not sure this is necessary
 #$mpremote exec "import os ; os.umount('/builtin')"
 $mpremote fs cp -r builtin :/
-$mpremote fs cp -r lib :/
 
 #$mpremote fs cp -r data :/
 #$mpremote fs cp -r data/images :/data/