Update macros.h + allow ALIGN macros to be used anywhere (#574)

* Update macros.h + allow ALIGN macros to be used anywhere

* Fix wrong alignment in synthesis_sh.c

* Revert bitmask in VIRTUAL_TO_PHYSICAL macro
This commit is contained in:
Arceveti
2023-01-31 15:50:19 -05:00
committed by GitHub
parent 1fadb78632
commit 337dac828f
15 changed files with 134 additions and 97 deletions

View File

@@ -73,7 +73,9 @@ extern "C" {
*/
/* Perform alignment on input 's' */
#ifndef ALIGN
#define ALIGN(s, align) (((u32)(s) + ((align)-1)) & ~((align)-1))
#endif
/***************************************

View File

@@ -12,82 +12,130 @@
#define GLUE(a, b) a ## b
#define GLUE2(a, b) GLUE(a, b)
// Avoid compiler warnings for unused variables
// Avoid compiler warnings for unused variables.
#ifdef __GNUC__
#define UNUSED __attribute__((unused))
#else
#define UNUSED
#endif
#ifdef __GNUC__
#define FALL_THROUGH __attribute__((fallthrough))
#else
#define FALL_THROUGH
#endif
// Avoid undefined behaviour for non-returning functions
// Avoid undefined behaviour for non-returning functions.
#ifdef __GNUC__
#define NORETURN __attribute__((noreturn))
#else
#define NORETURN
#endif
// Static assertions
#ifdef __GNUC__
#define STATIC_ASSERT(cond, msg) _Static_assert(cond, msg)
#else
#define STATIC_ASSERT(cond, msg) typedef char GLUE2(static_assertion_failed, __LINE__)[(cond) ? 1 : -1]
#endif
// Align to 8-byte boundary for DMA requirements
#ifdef __GNUC__
#define ALIGNED8 __attribute__((aligned(8)))
#else
#define ALIGNED8
#endif
// Align to 16-byte boundary for audio lib requirements
#ifdef __GNUC__
#define ALIGNED16 __attribute__((aligned(16)))
#else
#define ALIGNED16
#endif
// Align to 16-byte boundary for audio lib requirements
#ifdef __GNUC__
#define ALIGNED64 __attribute__((aligned(64)))
#else
#define ALIGNED64
#endif
// Align to 16-byte boundary for audio lib requirements
// Always inline a function.
#ifdef __GNUC__
#define ALWAYS_INLINE inline __attribute__((always_inline))
#else
#define ALWAYS_INLINE inline
#endif
// Fall through a switch case.
#ifdef __GNUC__
#define FALL_THROUGH __attribute__((fallthrough))
#else
#define FALL_THROUGH
#endif
// Use Og when compiling the function.
#ifdef __GNUC__
#define OPTIMIZE_OG __attribute__((optimize("Og")))
#else
#define OPTIMIZE_OG
#endif
// Use Os when compiling the function.
#ifdef __GNUC__
#define OPTIMIZE_OS __attribute__((optimize("Os")))
#else
#define OPTIMIZE_OS
#endif
// Use Ofast when compiling the function.
#ifdef __GNUC__
#define OPTIMIZE_OFAST __attribute__((optimize("Ofast")))
#else
#define OPTIMIZE_OFAST
#endif
// Ignore 4-byte alignment in structs.
#ifdef __GNUC__
#define PACKED __attribute__((packed))
#else
#define PACKED
#endif
// Align to 4-byte boundary.
#ifdef __GNUC__
#define ALIGNED4 __attribute__((aligned(4)))
#else
#define ALIGNED4
#endif
// Align to 8-byte boundary (for DMA requirements).
#ifdef __GNUC__
#define ALIGNED8 __attribute__((aligned(8)))
#else
#define ALIGNED8
#endif
// Align to 16-byte boundary (for audio lib requirements).
#ifdef __GNUC__
#define ALIGNED16 __attribute__((aligned(16)))
#else
#define ALIGNED16
#endif
// Align to 32-byte boundary.
#ifdef __GNUC__
#define ALIGNED32 __attribute__((aligned(32)))
#else
#define ALIGNED32
#endif
// Align to 64-byte boundary.
#ifdef __GNUC__
#define ALIGNED64 __attribute__((aligned(64)))
#else
#define ALIGNED64
#endif
#ifndef ALIGN
#define ALIGN(VAL_, ALIGNMENT_) (((VAL_) + ((ALIGNMENT_) - 1)) & ~((ALIGNMENT_) - 1))
#endif
// Round up to the next multiple.
#define ALIGN4(val) ALIGN((val), 4)
#define ALIGN8(val) ALIGN((val), 8)
#define ALIGN16(val) ALIGN((val), 16)
#define ALIGN32(val) ALIGN((val), 32)
#define ALIGN64(val) ALIGN((val), 64)
#ifndef NO_SEGMENTED_MEMORY
// convert a virtual address to physical.
// Convert a virtual address to physical.
#define VIRTUAL_TO_PHYSICAL(addr) ((uintptr_t)(addr) & 0x1FFFFFFF)
// convert a physical address to virtual.
// Convert a physical address to virtual.
#define PHYSICAL_TO_VIRTUAL(addr) ((uintptr_t)(addr) | 0x80000000)
// another way of converting virtual to physical
// Another way of converting virtual to physical.
#define VIRTUAL_TO_PHYSICAL2(addr) ((u8 *)(addr) - 0x80000000U)
#else
// no conversion needed other than cast
#else // NO_SEGMENTED_MEMORY
// No conversion needed other than cast.
#define VIRTUAL_TO_PHYSICAL(addr) ((uintptr_t)(addr))
#define PHYSICAL_TO_VIRTUAL(addr) ((uintptr_t)(addr))
#define VIRTUAL_TO_PHYSICAL2(addr) ((void *)(addr))
#endif
#endif // NO_SEGMENTED_MEMORY
enum VIModes {
MODE_NTSC,
MODE_MPAL,
MODE_PAL,
};
// Static (compile-time) assertions.
#ifdef __GNUC__
#define STATIC_ASSERT(cond, msg) _Static_assert(cond, msg)
#else
#define STATIC_ASSERT(cond, msg) typedef char GLUE2(static_assertion_failed, __LINE__)[(cond) ? 1 : -1]
#endif
#define FORCE_CRASH { *(vs8*)0 = 0; }

View File

@@ -73,8 +73,9 @@ extern "C" {
*/
/* Perform alignment on input 's' */
#ifndef ALIGN
#define ALIGN(s, align) (((u32)(s) + ((align)-1)) & ~((align)-1))
//commented out due to sm64 conflict
#endif
/***************************************
*

View File

@@ -9,8 +9,6 @@
#include "seqplayer.h"
#include "game/puppyprint.h"
#define ALIGN16(val) (((val) + 0xF) & ~0xF)
struct SharedDma {
/*0x0*/ u8 *buffer; // target, points to pre-allocated buffer
/*0x4*/ uintptr_t source; // device address

View File

@@ -8,8 +8,6 @@
#include "load.h"
#include "seqplayer.h"
#define ALIGN16(val) (((val) + 0xF) & ~0xF)
struct SharedDma {
/*0x0*/ u8 *buffer; // target, points to pre-allocated buffer
/*0x4*/ uintptr_t source; // device address

View File

@@ -38,8 +38,6 @@
aSetBuffer(pkt, 0, 0, c + DMEM_ADDR_WET_RIGHT_CH, d); \
aSaveBuffer(pkt, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.ringBuffer.right + (off)));
#define AUDIO_ALIGN(val, amnt) (((val) + (1 << amnt) - 1) & ~((1 << amnt) - 1))
#ifdef BETTER_REVERB
// Do not touch these values manually, unless you want potential for problems.
u8 gBetterReverbPreset = 0;
@@ -575,7 +573,7 @@ u64 *synthesis_resample_and_mix_reverb(u64 *cmd, s32 bufLen, s16 reverbIndex, s1
aMix(cmd++, 0, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH);
} else {
startPad = (item->startPos & 0x7) * 2;
paddedLengthA = AUDIO_ALIGN(startPad + item->lengthA, 4);
paddedLengthA = ALIGN16(startPad + item->lengthA);
cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED, (item->startPos - startPad / 2), DEFAULT_LEN_1CH, reverbIndex);
if (item->lengthB != 0) {
@@ -729,7 +727,7 @@ u64 *synthesis_do_one_audio_update(s16 *aiBuf, s32 bufLen, u64 *cmd, s32 updateI
} else {
// Same as above but upsample the previously downsampled samples used for reverb first
t4 = (v1->startPos & 7) * 2;
ra = AUDIO_ALIGN(v1->lengthA + t4, 4);
ra = ALIGN16(v1->lengthA + t4);
aSetLoadBufferPair(cmd++, 0, v1->startPos - t4 / 2);
if (v1->lengthB != 0) {
// Ring buffer wrapped
@@ -1063,7 +1061,7 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
sp130 = s2 * 2;
} else {
s5Aligned = AUDIO_ALIGN(s5, 5);
s5Aligned = ALIGN32(s5);
aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3,
DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
aADPCMdec(cmd++, flags,
@@ -1077,9 +1075,9 @@ u64 *synthesis_process_notes(s16 *aiBuf, s32 bufLen, u64 *cmd) {
aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
sp130 = s2 * 2;
} else {
aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + AUDIO_ALIGN(s5, 5), s0 * 2);
aSetBuffer(cmd++, 0, DMEM_ADDR_COMPRESSED_ADPCM_DATA + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5), s0 * 2);
aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->adpcmdecState));
aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + AUDIO_ALIGN(s5, 5) + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
aDMEMMove(cmd++, DMEM_ADDR_UNCOMPRESSED_NOTE + ALIGN32(s5) + (s2 * 2), DMEM_ADDR_UNCOMPRESSED_NOTE + s5, (nSamplesInThisIteration) * 2);
}
#endif

View File

@@ -17,8 +17,6 @@
#define MAX_UPDATES_PER_FRAME 4
#endif
#define ALIGN16(val) (((val) + 0xF) & ~0xF)
#ifdef BETTER_REVERB
#define NUM_ALLPASS 12 // Maximum number of delay filters to use with better reverb; do not change this value if you don't know what you're doing.

View File

@@ -33,9 +33,6 @@
aSetBuffer(pkt, 0, 0, c + DMEM_ADDR_WET_RIGHT_CH, d); \
aSaveBuffer(pkt, VIRTUAL_TO_PHYSICAL2(gSynthesisReverb.ringBuffer.right + (off)));
#undef ALIGN
#define ALIGN(val, amnt) (((val) + (1 << amnt) - 1) & ~((1 << amnt) - 1))
struct VolumeChange {
u16 sourceLeft;
u16 sourceRight;
@@ -212,7 +209,7 @@ u64 *synthesis_resample_and_mix_reverb(u64 *cmd, s32 bufLen, s16 reverbIndex, s1
aMix(cmd++, 0x8000 + gSynthesisReverbs[reverbIndex].reverbGain, DMEM_ADDR_WET_LEFT_CH, DMEM_ADDR_WET_LEFT_CH, DEFAULT_LEN_2CH);
} else {
startPad = (item->startPos & 0x7) * 2;
paddedLengthA = ALIGN(startPad + item->lengthA, 4);
paddedLengthA = ALIGN16(startPad + item->lengthA);
cmd = synthesis_load_reverb_ring_buffer(cmd, DMEM_ADDR_RESAMPLED, (item->startPos - startPad / 2), DEFAULT_LEN_1CH, reverbIndex);
if (item->lengthB != 0) {
@@ -542,13 +539,13 @@ u64 *synthesis_process_note(s32 noteIndex, struct NoteSubEu *noteSubEu, struct N
v0_2 = sp84 + (temp * unk_s6) + sampleAddr;
} else {
v0_2 = dma_sample_data((uintptr_t)(sp84 + (temp * unk_s6) + sampleAddr),
ALIGN(t0 * unk_s6 + 16, 4), flags, &synthesisState->sampleDmaIndex, audioBookSample->medium);
ALIGN16(t0 * unk_s6 + 16), flags, &synthesisState->sampleDmaIndex, audioBookSample->medium);
}
a3 = ((uintptr_t)v0_2 & 0xf);
aligned = ALIGN(t0 * unk_s6 + 16, 4);
aligned = ALIGN16(t0 * unk_s6 + 16);
addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(v0_2 - a3), addr, ALIGN(t0 * unk_s6 + 16, 4));
aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(v0_2 - a3), addr, ALIGN16(t0 * unk_s6 + 16));
} else {
s0 = 0;
a3 = 0;
@@ -562,13 +559,13 @@ u64 *synthesis_process_note(s32 noteIndex, struct NoteSubEu *noteSubEu, struct N
if (nAdpcmSamplesProcessed == 0) {
switch (audioBookSample->codec) {
case CODEC_ADPCM:
aligned = ALIGN(t0 * unk_s6 + 16, 4);
aligned = ALIGN16(t0 * unk_s6 + 16);
addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
break;
case CODEC_S8:
aligned = ALIGN(t0 * unk_s6 + 16, 4);
aligned = ALIGN16(t0 * unk_s6 + 16);
addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE, s0 * 2);
aS8Dec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
@@ -576,16 +573,16 @@ u64 *synthesis_process_note(s32 noteIndex, struct NoteSubEu *noteSubEu, struct N
}
sp130 = s2 * 2;
} else {
s5Aligned = ALIGN(s5 + 16, 4);
s5Aligned = ALIGN16(s5 + 16);
switch (audioBookSample->codec) {
case CODEC_ADPCM:
aligned = ALIGN(t0 * unk_s6 + 16, 4);
aligned = ALIGN16(t0 * unk_s6 + 16);
addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
aADPCMdec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
break;
case CODEC_S8:
aligned = ALIGN(t0 * unk_s6 + 16, 4);
aligned = ALIGN16(t0 * unk_s6 + 16);
addr = (DMEM_ADDR_COMPRESSED_ADPCM_DATA - aligned) & 0xffff;
aSetBuffer(cmd++, 0, addr + a3, DMEM_ADDR_UNCOMPRESSED_NOTE + s5Aligned, s0 * 2);
aS8Dec(cmd++, flags, VIRTUAL_TO_PHYSICAL2(synthesisState->synthesisBuffers->adpcmdecState));
@@ -635,7 +632,7 @@ skip:
case 2:
switch (curPart) {
case 0:
aDownsampleHalf(cmd++, ALIGN(samplesLenAdjusted / 2, 3), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, DMEM_ADDR_RESAMPLED);
aDownsampleHalf(cmd++, ALIGN8(samplesLenAdjusted / 2), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, DMEM_ADDR_RESAMPLED);
resampledTempLen = samplesLenAdjusted;
noteSamplesDmemAddrBeforeResampling = DMEM_ADDR_RESAMPLED;
if (noteSubEu->finished != FALSE) {
@@ -643,7 +640,7 @@ skip:
}
break;
case 1:
aDownsampleHalf(cmd++, ALIGN(samplesLenAdjusted / 2, 3), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, resampledTempLen + DMEM_ADDR_RESAMPLED);
aDownsampleHalf(cmd++, ALIGN8(samplesLenAdjusted / 2), DMEM_ADDR_UNCOMPRESSED_NOTE + sp130, resampledTempLen + DMEM_ADDR_RESAMPLED);
break;
}
}
@@ -858,7 +855,7 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struc
if (prevPanShift != 0) {
aLoadBuffer(cmd++, VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panSamplesBuffer),
DMEM_ADDR_NOTE_PAN_TEMP, ALIGN(prevPanShift, 4));
DMEM_ADDR_NOTE_PAN_TEMP, ALIGN16(prevPanShift));
aDMEMMove(cmd++, DMEM_ADDR_TEMP, DMEM_ADDR_NOTE_PAN_TEMP + prevPanShift, bufLen + panShift - prevPanShift);
} else {
aDMEMMove(cmd++, DMEM_ADDR_TEMP, DMEM_ADDR_NOTE_PAN_TEMP, bufLen + panShift);
@@ -873,7 +870,7 @@ u64 *note_apply_headset_pan_effects(u64 *cmd, struct NoteSubEu *noteSubEu, struc
if (panShift) {
// Save excessive samples for next iteration
aSaveBuffer(cmd++, DMEM_ADDR_NOTE_PAN_TEMP + bufLen,
VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panSamplesBuffer), ALIGN(panShift, 4));
VIRTUAL_TO_PHYSICAL2(note->synthesisBuffers->panSamplesBuffer), ALIGN16(panShift));
}
aAddMixer(cmd++, DMEM_ADDR_NOTE_PAN_TEMP, dest, (bufLen + 0x3f) & 0xffc0);

View File

@@ -22,11 +22,6 @@
#include "game/puppyprint.h"
// round up to the next multiple
#define ALIGN4(val) (((val) + 0x3) & ~0x3)
#define ALIGN8(val) (((val) + 0x7) & ~0x7)
#define ALIGN16(val) (((val) + 0xF) & ~0xF)
struct MainPoolState {
u32 freeSpace;
struct MainPoolBlock *listHeadL;

View File

@@ -29,6 +29,6 @@ ALIGNED8 u8 gGfxSPTaskStack[SP_DRAM_STACK_SIZE8];
ALIGNED8 u8 gGfxSPTaskYieldBuffer[OS_YIELD_DATA_SIZE];
#endif // UNF
// 0x200 bytes
struct SaveBuffer __attribute__ ((aligned (8))) gSaveBuffer;
ALIGNED8 struct SaveBuffer gSaveBuffer;
// 0x190a0 bytes
struct GfxPool gGfxPools[2];

View File

@@ -1487,9 +1487,7 @@ static ALWAYS_INLINE float construct_float(const float f)
// Converts a floating point matrix to a fixed point matrix
// Makes some assumptions about certain fields in the matrix, which will always be true for valid matrices.
__attribute__((optimize("Os")))
void mtxf_to_mtx_fast(s16* dst, float* src)
{
OPTIMIZE_OS void mtxf_to_mtx_fast(s16* dst, float* src) {
float scale = construct_float(65536.0f / WORLD_SCALE);
// Iterate over pairs of values in the input matrix
for (int i = 0; i < 8; i++)

View File

@@ -22,13 +22,13 @@ enum ParamTypes {
extern far char *parse_map(u32 pc);
static char insn_as_string[100];
typedef struct __attribute__((packed)) {
typedef struct PACKED {
u16 rd : 5;
u16 shift_amt : 5;
u16 function : 6;
} RTypeData;
typedef struct __attribute__((packed)) {
typedef struct PACKED {
u16 opcode : 6;
u16 rs : 5;
u16 rt : 5;
@@ -43,7 +43,7 @@ typedef union {
u32 d;
} InsnData;
typedef struct __attribute__((packed)) {
typedef struct PACKED {
u32 type;
u32 arbitraryParam;
u16 opcode : 6;

View File

@@ -3,6 +3,12 @@
#include "config.h"
enum VIModes {
MODE_NTSC,
MODE_MPAL,
MODE_PAL,
};
#define THREAD1_STACK 0x100
#define THREAD2_STACK 0x800
#define THREAD3_STACK 0x200

View File

@@ -18,8 +18,6 @@
#endif
#include "puppycam2.h"
#define ALIGN4(val) (((val) + 0x3) & ~0x3)
#define MENU_DATA_MAGIC 0x4849
#define SAVE_FILE_MAGIC 0x4441

View File

@@ -987,7 +987,7 @@ void gd_free(void *ptr) {
void *gd_allocblock(u32 size) {
void *block; // 1c
size = ALIGN(size, 8);
size = ALIGN8(size);
if ((sMemBlockPoolUsed + size) > sMemBlockPoolSize) {
gd_printf("gd_allocblock(): Failed request: %dk (%d bytes)\n", size / 1024, size);
gd_printf("gd_allocblock(): Heap usage: %dk (%d bytes) \n", sMemBlockPoolUsed / 1024,
@@ -1005,7 +1005,7 @@ void *gd_allocblock(u32 size) {
/* 24A318 -> 24A3E8 */
void *gd_malloc(u32 size, u8 perm) {
void *ptr; // 1c
size = ALIGN(size, 8);
size = ALIGN8(size);
ptr = gd_request_mem(size, perm);
if (ptr == NULL) {
@@ -2757,8 +2757,8 @@ s32 setup_view_buffers(const char *name, struct ObjView *view, UNUSED s32 ulx, U
view->colourBufs[1] = view->colourBufs[0];
}
view->colourBufs[0] = (void *) ALIGN((uintptr_t) view->colourBufs[0], 64);
view->colourBufs[1] = (void *) ALIGN((uintptr_t) view->colourBufs[1], 64);
view->colourBufs[0] = (void *) ALIGN64((uintptr_t) view->colourBufs[0]);
view->colourBufs[1] = (void *) ALIGN64((uintptr_t) view->colourBufs[1]);
stop_memtracker(memtrackerName);
if (view->colourBufs[0] == NULL || view->colourBufs[1] == NULL) {
@@ -2778,7 +2778,7 @@ s32 setup_view_buffers(const char *name, struct ObjView *view, UNUSED s32 ulx, U
if (view->zbuf == NULL) {
fatal_printf("Not enough DRAM for Z buffer\n");
}
view->zbuf = (void *) ALIGN((uintptr_t) view->zbuf, 64);
view->zbuf = (void *) ALIGN64((uintptr_t) view->zbuf);
}
stop_memtracker(memtrackerName);
} else {