Update macros.h + allow ALIGN macros to be used anywhere (#574)

* Update macros.h + allow ALIGN macros to be used anywhere * Fix wrong alignment in synthesis_sh.c * Revert bitmask in VIRTUAL_TO_PHYSICAL macro
2026-01-21 10:17:19 -08:00 · 2023-01-31 15:50:19 -05:00
parent 1fadb78632
commit 337dac828f
15 changed files with 134 additions and 97 deletions
--- a/include/ique/PR/region.h
+++ b/include/ique/PR/region.h
@@ -73,7 +73,9 @@ extern "C" {
 */

 /* Perform alignment on input 's' */
+#ifndef ALIGN
 #define	ALIGN(s, align)	(((u32)(s) + ((align)-1)) & ~((align)-1))
+#endif


 /***************************************
--- a/include/macros.h
+++ b/include/macros.h
@@ -12,82 +12,130 @@
 #define GLUE(a, b) a ## b
 #define GLUE2(a, b) GLUE(a, b)

-// Avoid compiler warnings for unused variables
+// Avoid compiler warnings for unused variables.
 #ifdef __GNUC__
 #define UNUSED __attribute__((unused))
 #else
 #define UNUSED
 #endif

-#ifdef __GNUC__
-#define FALL_THROUGH __attribute__((fallthrough))
-#else
-#define FALL_THROUGH
-#endif
-
-// Avoid undefined behaviour for non-returning functions
+// Avoid undefined behaviour for non-returning functions.
 #ifdef __GNUC__
 #define NORETURN __attribute__((noreturn))
 #else
 #define NORETURN
 #endif

-// Static assertions
-#ifdef __GNUC__
-#define STATIC_ASSERT(cond, msg) _Static_assert(cond, msg)
-#else
-#define STATIC_ASSERT(cond, msg) typedef char GLUE2(static_assertion_failed, __LINE__)[(cond) ? 1 : -1]
-#endif
-
-// Align to 8-byte boundary for DMA requirements
-#ifdef __GNUC__
-#define ALIGNED8 __attribute__((aligned(8)))
-#else
-#define ALIGNED8
-#endif
-
-// Align to 16-byte boundary for audio lib requirements
-#ifdef __GNUC__
-#define ALIGNED16 __attribute__((aligned(16)))
-#else
-#define ALIGNED16
-#endif
-
-// Align to 16-byte boundary for audio lib requirements
-#ifdef __GNUC__
-#define ALIGNED64 __attribute__((aligned(64)))
-#else
-#define ALIGNED64
-#endif
-
-// Align to 16-byte boundary for audio lib requirements
+// Always inline a function.
 #ifdef __GNUC__
 #define ALWAYS_INLINE inline __attribute__((always_inline))
 #else
 #define ALWAYS_INLINE inline
 #endif

+// Fall through a switch case.
+#ifdef __GNUC__
+#define FALL_THROUGH __attribute__((fallthrough))
+#else
+#define FALL_THROUGH
+#endif
+
+// Use Og when compiling the function.
+#ifdef __GNUC__
+#define OPTIMIZE_OG __attribute__((optimize("Og")))
+#else
+#define OPTIMIZE_OG 
+#endif
+
+// Use Os when compiling the function.
+#ifdef __GNUC__
+#define OPTIMIZE_OS __attribute__((optimize("Os")))
+#else
+#define OPTIMIZE_OS 
+#endif
+
+// Use Ofast when compiling the function.
+#ifdef __GNUC__
+#define OPTIMIZE_OFAST __attribute__((optimize("Ofast")))
+#else
+#define OPTIMIZE_OFAST 
+#endif
+
+// Ignore 4-byte alignment in structs.
+#ifdef __GNUC__
+#define PACKED __attribute__((packed))
+#else
+#define PACKED 
+#endif
+
+// Align to 4-byte boundary.
+#ifdef __GNUC__
+#define ALIGNED4 __attribute__((aligned(4)))
+#else
+#define ALIGNED4
+#endif
+
+// Align to 8-byte boundary (for DMA requirements).
+#ifdef __GNUC__
+#define ALIGNED8 __attribute__((aligned(8)))
+#else
+#define ALIGNED8
+#endif
+
+// Align to 16-byte boundary (for audio lib requirements).
+#ifdef __GNUC__
+#define ALIGNED16 __attribute__((aligned(16)))
+#else
+#define ALIGNED16
+#endif
+
+// Align to 32-byte boundary.
+#ifdef __GNUC__
+#define ALIGNED32 __attribute__((aligned(32)))
+#else
+#define ALIGNED32
+#endif
+
+// Align to 64-byte boundary.
+#ifdef __GNUC__
+#define ALIGNED64 __attribute__((aligned(64)))
+#else
+#define ALIGNED64
+#endif
+
+#ifndef ALIGN
+#define ALIGN(VAL_, ALIGNMENT_) (((VAL_) + ((ALIGNMENT_) - 1)) & ~((ALIGNMENT_) - 1))
+#endif
+
+// Round up to the next multiple.
+#define ALIGN4(val)  ALIGN((val),  4)
+#define ALIGN8(val)  ALIGN((val),  8)
+#define ALIGN16(val) ALIGN((val), 16)
+#define ALIGN32(val) ALIGN((val), 32)
+#define ALIGN64(val) ALIGN((val), 64)
+
 #ifndef NO_SEGMENTED_MEMORY
-// convert a virtual address to physical.
+// Convert a virtual address to physical.
 #define VIRTUAL_TO_PHYSICAL(addr)   ((uintptr_t)(addr) & 0x1FFFFFFF)

-// convert a physical address to virtual.
+// Convert a physical address to virtual.
 #define PHYSICAL_TO_VIRTUAL(addr)   ((uintptr_t)(addr) | 0x80000000)

-// another way of converting virtual to physical
+// Another way of converting virtual to physical.
 #define VIRTUAL_TO_PHYSICAL2(addr)  ((u8 *)(addr) - 0x80000000U)
-#else
-// no conversion needed other than cast
+#else // NO_SEGMENTED_MEMORY
+// No conversion needed other than cast.
 #define VIRTUAL_TO_PHYSICAL(addr)   ((uintptr_t)(addr))
 #define PHYSICAL_TO_VIRTUAL(addr)   ((uintptr_t)(addr))
 #define VIRTUAL_TO_PHYSICAL2(addr)  ((void *)(addr))
-#endif
+#endif // NO_SEGMENTED_MEMORY

-enum VIModes {
-    MODE_NTSC,
-    MODE_MPAL,
-    MODE_PAL,
-};
+// Static (compile-time) assertions.
+#ifdef __GNUC__
+#define STATIC_ASSERT(cond, msg) _Static_assert(cond, msg)
+#else
+#define STATIC_ASSERT(cond, msg) typedef char GLUE2(static_assertion_failed, __LINE__)[(cond) ? 1 : -1]
+#endif

 #define FORCE_CRASH { *(vs8*)0 = 0; }

--- a/include/n64/PR/region.h
+++ b/include/n64/PR/region.h
@@ -73,8 +73,9 @@ extern "C" {
 */

 /* Perform alignment on input 's' */
+#ifndef ALIGN
 #define	ALIGN(s, align)	(((u32)(s) + ((align)-1)) & ~((align)-1))
-//commented out due to sm64 conflict
+#endif

 /***************************************
 *
--- a/src/audio/load.c
+++ b/src/audio/load.c
@@ -9,8 +9,6 @@
 #include "seqplayer.h"
 #include "game/puppyprint.h"

-#define ALIGN16(val) (((val) + 0xF) & ~0xF)
-
 struct SharedDma {
    /*0x0*/ u8 *buffer;       // target, points to pre-allocated buffer
    /*0x4*/ uintptr_t source; // device address
--- a/src/audio/load_sh.c
+++ b/src/audio/load_sh.c
@@ -8,8 +8,6 @@
 #include "load.h"
 #include "seqplayer.h"

-#define ALIGN16(val) (((val) + 0xF) & ~0xF)
-
 struct SharedDma {
    /*0x0*/ u8 *buffer;       // target, points to pre-allocated buffer
    /*0x4*/ uintptr_t source; // device address
--- a/src/audio/synthesis.c
+++ b/src/audio/synthesis.c
@@ -38,8 +38,6 @@
    aSetBuffer(pkt, 0, 0, c + DMEM_ADDR_WET_RIGHT_CH, d);                                              \
    aSaveBuffer(pkt, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.ringBuffer.right + (off)));

-#define AUDIO_ALIGN(val, amnt) (((val) + (1 << amnt) - 1) & ~((1 << amnt) - 1))
-
 #ifdef BETTER_REVERB
 // Do not touch these values manually, unless you want potential for problems.
 u8 gBetterReverbPreset = 0;
@@ -575,7 +573,7 @@ u64 *synthesis_resample_and_mix_reverb(u64 *cmd, s32 bufLen, s16 reverbIndex, s1
        aMix(cmd++, 0, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH);
    } else {
        startPad = (item->startPos & 0x7) * 2;
-        paddedLengthA = AUDIO_ALIGN(startPad + item->lengthA, 4);
+        paddedLengthA = ALIGN16(startPad + item->lengthA);

        cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED, (item->startPos - startPad / 2), DEFAULT_LEN_1CH, reverbIndex);
        if (item->lengthB != 0) {
@@ -729,7 +727,7 @@ u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateI
        } else {
            // Same as above but upsample the previously downsampled samples used for reverb first
            t4 = (v1->startPos & 7) * 2;
-            ra = AUDIO_ALIGN(v1->lengthA + t4, 4);
+            ra = ALIGN16(v1->lengthA + t4);
            aSetLoadBufferPair(cmd++, 0, v1->startPos - t4 / 2);
            if (v1->lengthB != 0) {
                // Ring buffer wrapped
@@ -1063,7 +1061,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                                      VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
                            sp130 = s2 * 2;
                        } else {
-                            s5Aligned = AUDIO_ALIGN(s5, 5);
+                            s5Aligned = ALIGN32(s5);
                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3,
                                       DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
                            aADPCMdec(cmd++, flags,
@@ -1077,9 +1075,9 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
                            aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
                            sp130 = s2 * 2;
                        } else {
-                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + AUDIO_ALIGN(s5, 5), s0 * 2);
+                            aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5), s0 * 2);
                            aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
-                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + AUDIO_ALIGN(s5, 5) + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
+                            aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5) + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
                        }
 #endif

--- a/src/audio/synthesis.h
+++ b/src/audio/synthesis.h
@@ -17,8 +17,6 @@
 #define MAX_UPDATES_PER_FRAME 4
 #endif

-#define ALIGN16(val) (((val) + 0xF) & ~0xF)
-
 #ifdef BETTER_REVERB

 #define NUM_ALLPASS 12 // Maximum number of delay filters to use with better reverb; do not change this value if you don't know what you're doing.
--- a/src/audio/synthesis_sh.c
+++ b/src/audio/synthesis_sh.c
@@ -33,9 +33,6 @@
    aSetBuffer(pkt, 0, 0, c + DMEM_ADDR_WET_RIGHT_CH, d);                                              \
    aSaveBuffer(pkt, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.ringBuffer.right + (off)));

-#undef ALIGN
-#define ALIGN(val, amnt) (((val) + (1 << amnt) - 1) & ~((1 << amnt) - 1))
-
 struct VolumeChange {
    u16 sourceLeft;
    u16 sourceRight;
@@ -212,7 +209,7 @@ u64 *synthesis_resample_and_mix_reverb(u64 *cmd, s32 bufLen, s16 reverbIndex, s1
        aMix(cmd++, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
    } else {
        startPad = (item->startPos & 0x7) * 2;
-        paddedLengthA = ALIGN(startPad + item->lengthA, 4);
+        paddedLengthA = ALIGN16(startPad + item->lengthA);

        cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED, (item->startPos - startPad / 2), DEFAULT_LEN_1CH, reverbIndex);
        if (item->lengthB != 0) {
@@ -542,13 +539,13 @@ u64 *synthesis_process_note(s32 noteIndex, struct NoteSubEu *noteSubEu, struct N
                        v0_2 = sp84 + (temp * unk_s6) + sampleAddr;
                    } else {
                        v0_2 = dma_sample_data((uintptr_t)(sp84 + (temp * unk_s6) + sampleAddr),
-                                ALIGN(t0 * unk_s6 + 16, 4), flags, &synthesisState->sampleDmaIndex, audioBookSample->medium);
+                                ALIGN16(t0 * unk_s6 + 16), flags, &synthesisState->sampleDmaIndex, audioBookSample->medium);
                    }

                    a3 = ((uintptr_t)v0_2 & 0xf);
-                    aligned = ALIGN(t0 * unk_s6 + 16, 4);
+                    aligned = ALIGN16(t0 * unk_s6 + 16);
                    addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
-                    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(v0_2 - a3), addr, ALIGN(t0 * unk_s6 + 16, 4));
+                    aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(v0_2 - a3), addr, ALIGN16(t0 * unk_s6 + 16));
                } else {
                    s0 = 0;
                    a3 = 0;
@@ -562,13 +559,13 @@ u64 *synthesis_process_note(s32 noteIndex, struct NoteSubEu *noteSubEu, struct N
                if (nAdpcmSamplesProcessed == 0) {
                    switch (audioBookSample->codec) {
                        case CODEC_ADPCM:
-                            aligned = ALIGN(t0 * unk_s6 + 16, 4);
+                            aligned = ALIGN16(t0 * unk_s6 + 16);
                            addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
                            aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
                            aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
                            break;
                        case CODEC_S8:
-                            aligned = ALIGN(t0 * unk_s6 + 16, 4);
+                            aligned = ALIGN16(t0 * unk_s6 + 16);
                            addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
                            aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
                            aS8Dec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
@@ -576,16 +573,16 @@ u64 *synthesis_process_note(s32 noteIndex, struct NoteSubEu *noteSubEu, struct N
                    }
                    sp130 = s2 * 2;
                } else {
-                    s5Aligned = ALIGN(s5 + 16, 4);
+                    s5Aligned = ALIGN16(s5 + 16);
                    switch (audioBookSample->codec) {
                        case CODEC_ADPCM:
-                            aligned = ALIGN(t0 * unk_s6 + 16, 4);
+                            aligned = ALIGN16(t0 * unk_s6 + 16);
                            addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
                            aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
                            aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
                            break;
                        case CODEC_S8:
-                            aligned = ALIGN(t0 * unk_s6 + 16, 4);
+                            aligned = ALIGN16(t0 * unk_s6 + 16);
                            addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
                            aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
                            aS8Dec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
@@ -635,7 +632,7 @@ skip:
                case 2:
                    switch (curPart) {
                        case 0:
-                            aDownsampleHalf(cmd++, ALIGN(samplesLenAdjusted / 2, 3), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, DMEM_ADDR_RESAMPLED);
+                            aDownsampleHalf(cmd++, ALIGN8(samplesLenAdjusted / 2), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, DMEM_ADDR_RESAMPLED);
                            resampledTempLen = samplesLenAdjusted;
                            noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_RESAMPLED;
                            if (noteSubEu->finished != FALSE) {
@@ -643,7 +640,7 @@ skip:
                            }
                            break;
                        case 1:
-                            aDownsampleHalf(cmd++, ALIGN(samplesLenAdjusted / 2, 3), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, resampledTempLen + DMEM_ADDR_RESAMPLED);
+                            aDownsampleHalf(cmd++, ALIGN8(samplesLenAdjusted / 2), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, resampledTempLen + DMEM_ADDR_RESAMPLED);
                            break;
                    }
            }
@@ -858,7 +855,7 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struc

        if (prevPanShift != 0) {
            aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panSamplesBuffer),
-                        DMEM_ADDR_NOTE_PAN_TEMP, ALIGN(prevPanShift, 4));
+                        DMEM_ADDR_NOTE_PAN_TEMP, ALIGN16(prevPanShift));
            aDMEMMove(cmd++, DMEM_ADDR_TEMP, DMEM_ADDR_NOTE_PAN_TEMP + prevPanShift, bufLen + panShift - prevPanShift);
        } else {
            aDMEMMove(cmd++, DMEM_ADDR_TEMP, DMEM_ADDR_NOTE_PAN_TEMP, bufLen + panShift);
@@ -873,7 +870,7 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struc
    if (panShift) {
        // Save excessive samples for next iteration
        aSaveBuffer(cmd++, DMEM_ADDR_NOTE_PAN_TEMP + bufLen,
-                    VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panSamplesBuffer), ALIGN(panShift, 4));
+                    VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panSamplesBuffer), ALIGN16(panShift));
    }

    aAddMixer(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, dest, (bufLen + 0x3f) & 0xffc0);
--- a/src/boot/memory.c
+++ b/src/boot/memory.c
@@ -22,11 +22,6 @@
 #include "game/puppyprint.h"


-// round up to the next multiple
-#define ALIGN4(val) (((val) + 0x3) & ~0x3)
-#define ALIGN8(val) (((val) + 0x7) & ~0x7)
-#define ALIGN16(val) (((val) + 0xF) & ~0xF)
-
 struct MainPoolState {
    u32 freeSpace;
    struct MainPoolBlock *listHeadL;
--- a/src/buffers/buffers.c
+++ b/src/buffers/buffers.c
@@ -29,6 +29,6 @@ ALIGNED8 u8 gGfxSPTaskStack[SP_DRAM_STACK_SIZE8];
 ALIGNED8 u8 gGfxSPTaskYieldBuffer[OS_YIELD_DATA_SIZE];
 #endif // UNF
 // 0x200 bytes
-struct SaveBuffer __attribute__ ((aligned (8))) gSaveBuffer;
+ALIGNED8 struct SaveBuffer gSaveBuffer;
 // 0x190a0 bytes
 struct GfxPool gGfxPools[2];
--- a/src/engine/math_util.c
+++ b/src/engine/math_util.c
@@ -1487,9 +1487,7 @@ static ALWAYS_INLINE float construct_float(const float f)

 // Converts a floating point matrix to a fixed point matrix
 // Makes some assumptions about certain fields in the matrix, which will always be true for valid matrices.
-__attribute__((optimize("Os")))
-void mtxf_to_mtx_fast(s16* dst, float* src)
-{
+OPTIMIZE_OS void mtxf_to_mtx_fast(s16* dst, float* src) {
    float scale = construct_float(65536.0f / WORLD_SCALE);
    // Iterate over pairs of values in the input matrix
    for (int i = 0; i < 8; i++)
--- a/src/game/insn_disasm.c
+++ b/src/game/insn_disasm.c
@@ -22,13 +22,13 @@ enum ParamTypes {
 extern far char *parse_map(u32 pc);
 static char insn_as_string[100];

-typedef struct __attribute__((packed)) {
+typedef struct PACKED {
    u16 rd        : 5;
    u16 shift_amt : 5;
    u16 function  : 6;
 } RTypeData;

-typedef struct __attribute__((packed)) {
+typedef struct PACKED {
    u16 opcode : 6;
    u16 rs     : 5;
    u16 rt     : 5;
@@ -43,7 +43,7 @@ typedef union {
    u32  d;
 } InsnData;

-typedef struct __attribute__((packed)) {
+typedef struct PACKED {
    u32 type;
    u32 arbitraryParam;
    u16 opcode   : 6;
--- a/src/game/main.h
+++ b/src/game/main.h
@@ -3,6 +3,12 @@

 #include "config.h"

+enum VIModes {
+    MODE_NTSC,
+    MODE_MPAL,
+    MODE_PAL,
+};
+
 #define THREAD1_STACK 0x100
 #define THREAD2_STACK 0x800
 #define THREAD3_STACK 0x200
--- a/src/game/save_file.c
+++ b/src/game/save_file.c
@@ -18,8 +18,6 @@
 #endif
 #include "puppycam2.h"

-#define ALIGN4(val) (((val) + 0x3) & ~0x3)
-
 #define MENU_DATA_MAGIC 0x4849
 #define SAVE_FILE_MAGIC 0x4441

--- a/src/goddard/renderer.c
+++ b/src/goddard/renderer.c
@@ -987,7 +987,7 @@ void gd_free(void *ptr) {
 void *gd_allocblock(u32 size) {
    void *block; // 1c

-    size = ALIGN(size, 8);
+    size = ALIGN8(size);
    if ((sMemBlockPoolUsed + size) > sMemBlockPoolSize) {
        gd_printf("gd_allocblock(): Failed request: %dk (%d bytes)\n", size / 1024, size);
        gd_printf("gd_allocblock(): Heap usage: %dk (%d bytes) \n", sMemBlockPoolUsed / 1024,
@@ -1005,7 +1005,7 @@ void *gd_allocblock(u32 size) {
 /* 24A318 -> 24A3E8 */
 void *gd_malloc(u32 size, u8 perm) {
    void *ptr; // 1c
-    size = ALIGN(size, 8);
+    size = ALIGN8(size);
    ptr = gd_request_mem(size, perm);

    if (ptr == NULL) {
@@ -2757,8 +2757,8 @@ s32 setup_view_buffers(const char *name, struct ObjView *view, UNUSED s32 ulx, U
                view->colourBufs[1] = view->colourBufs[0];
            }

-            view->colourBufs[0] = (void *) ALIGN((uintptr_t) view->colourBufs[0], 64);
-            view->colourBufs[1] = (void *) ALIGN((uintptr_t) view->colourBufs[1], 64);
+            view->colourBufs[0] = (void *) ALIGN64((uintptr_t) view->colourBufs[0]);
+            view->colourBufs[1] = (void *) ALIGN64((uintptr_t) view->colourBufs[1]);
            stop_memtracker(memtrackerName);

            if (view->colourBufs[0] == NULL || view->colourBufs[1] == NULL) {
@@ -2778,7 +2778,7 @@ s32 setup_view_buffers(const char *name, struct ObjView *view, UNUSED s32 ulx, U
                if (view->zbuf == NULL) {
                    fatal_printf("Not enough DRAM for Z buffer\n");
                }
-                view->zbuf = (void *) ALIGN((uintptr_t) view->zbuf, 64);
+                view->zbuf = (void *) ALIGN64((uintptr_t) view->zbuf);
            }
            stop_memtracker(memtrackerName);
        } else {