From e77e6683d9a775df52c35121180712edcedd37fe Mon Sep 17 00:00:00 2001 From: Tharo <17233964+Thar0@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:33:59 +0000 Subject: [PATCH] Support building for multiple MIPS ABIs: o32 (default), n32 and eabi32 (#135) * Support building for multiple MIPS ABIs: o32 (default), n32 and eabi32 * Default is EABI32 * Try fix mgu register name error * Restore comment * Make o32 default again, warn about bad emulators --- Makefile | 45 +++-- include/ultra64/asm.h | 9 +- include/ultra64/regdef.h | 249 +++++++++++++++++++++++----- include/ultra64/thread.h | 42 +++++ include/ultra64/xstdio.h | 3 +- src/gcc_fix/missing_gcc_functions.c | 19 +++ src/libultra/mgu/scale.s | 19 ++- src/libultra/mgu/translate.s | 18 ++ src/libultra/os/createthread.c | 4 + src/libultra/os/exceptasm.s | 59 ++++++- src/libultra/os/initialize.c | 7 + 11 files changed, 411 insertions(+), 63 deletions(-) diff --git a/Makefile b/Makefile index 3a7e3a700..8a90a5c3e 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,9 @@ REGION ?= US # This means the debug test scenes and some debug graphics in the elf_msg actors will not work as expected. # This may also be used to disable debug features on debug ROMs by setting DEBUG_FEATURES to 0 DEBUG_FEATURES ?= 1 +# MIPS ABI, can be one of "32", "n32" or "eabi" +# Note n32 and/or eabi may break some old emulators or emulator versions +ABI := 32 # Version-specific settings REGIONAL_CHECKSUM := 0 @@ -266,12 +269,12 @@ EGCS_PREFIX := tools/egcs/$(DETECTED_OS)/ EGCS_CC := $(EGCS_PREFIX)gcc -B $(EGCS_PREFIX) EGCS_CCAS := $(EGCS_CC) -x assembler-with-cpp -AS := $(MIPS_BINUTILS_PREFIX)as -LD := $(MIPS_BINUTILS_PREFIX)ld -OBJCOPY := $(MIPS_BINUTILS_PREFIX)objcopy -OBJDUMP := $(MIPS_BINUTILS_PREFIX)objdump -NM := $(MIPS_BINUTILS_PREFIX)nm -STRIP := $(MIPS_BINUTILS_PREFIX)strip +AS := $(MIPS_BINUTILS_PREFIX)as +LD := $(MIPS_BINUTILS_PREFIX)ld +OBJCOPY := $(MIPS_BINUTILS_PREFIX)objcopy +OBJDUMP := $(MIPS_BINUTILS_PREFIX)objdump +NM := $(MIPS_BINUTILS_PREFIX)nm +STRIP := $(MIPS_BINUTILS_PREFIX)strip # Command prefix to preprocess a file before running the compiler PREPROCESS := @@ -342,12 +345,24 @@ ifeq ($(DEBUG_FEATURES),1) GBI_DEFINES += -DGBI_DEBUG endif +ABI_FLAGS := + +# Select ld output format based on toolchain default and any additional ABI-specific flags +ifeq ($(ABI),n32) + LD_OUTPUT_FORMAT := $(shell $(LD) --print-output-format | sed -E 's/elf(32|64)-(n)?(trad)?(big|little)mips/elf\1-n\3\4mips/') +else + ifeq ($(ABI),eabi) + ABI_FLAGS += -mgp32 -mfp32 + endif + LD_OUTPUT_FORMAT := $(shell $(LD) --print-output-format | sed -E 's/elf(32|64)-(n)?(trad)?(big|little)mips/elf\1-\3\4mips/') +endif + CPPFLAGS += -P -xc -fno-dollars-in-identifiers $(CPP_DEFINES) -ASFLAGS += -march=vr4300 -32 -no-pad-sections -Iinclude -I$(EXTRACTED_DIR) +ASFLAGS += -march=vr4300 -mabi=$(ABI) $(ABI_FLAGS) -no-pad-sections -Iinclude -I$(EXTRACTED_DIR) ifeq ($(COMPILER),gcc) - CFLAGS += $(CPP_DEFINES) $(GBI_DEFINES) -G 0 -nostdinc -MD -MP $(INC) -march=vr4300 -mfix4300 -mabi=32 -mno-abicalls -mdivide-breaks -fno-PIC -fno-common -ffreestanding -funsigned-char -fbuiltin -fno-builtin-sinf -fno-builtin-cosf $(CHECK_WARNINGS) - CCASFLAGS += $(CPP_DEFINES) $(GBI_DEFINES) -G 0 -nostdinc -MD -MP $(INC) -march=vr4300 -mfix4300 -mabi=32 -mno-abicalls -fno-PIC -fno-common -Wa,-no-pad-sections + CFLAGS += $(CPP_DEFINES) $(GBI_DEFINES) -G 0 -nostdinc -MD -MP $(INC) -march=vr4300 -mfix4300 -mabi=$(ABI) $(ABI_FLAGS) -mno-abicalls -mdivide-breaks -fno-PIC -fno-common -ffreestanding -funsigned-char -fbuiltin -fno-builtin-sinf -fno-builtin-cosf $(CHECK_WARNINGS) + CCASFLAGS += $(CPP_DEFINES) $(GBI_DEFINES) -G 0 -nostdinc -MD -MP $(INC) -march=vr4300 -mfix4300 -mabi=$(ABI) $(ABI_FLAGS) -mno-abicalls -fno-PIC -fno-common -Wa,-no-pad-sections MIPS_VERSION := -mips3 CC_CHECK = @: endif @@ -631,6 +646,8 @@ $(ROM): $(ELF) $(V)$(ELF2ROM) -cic $(CIC) $< $@ @$(PRINT) "==== Build Options ====$(NO_COL)\n" @$(PRINT) "${GREEN}OoT Version: $(BLUE)$(VERSION)$(NO_COL)\n" + @$(PRINT) "${GREEN}MIPS Toolchain: $(BLUE)$(MIPS_BINUTILS_PREFIX)$(NO_COL)\n" + @$(PRINT) "${GREEN}MIPS ABI: $(BLUE)$(ABI)$(NO_COL)\n" @$(PRINT) "${GREEN}Code Version: $(BLUE)$(PACKAGE_VERSION)$(NO_COL)\n" @$(PRINT) "${GREEN}Debug Build: $(BLUE)$(DEBUG_FEATURES)$(NO_COL)\n" @$(PRINT) "${GREEN}Opt. Flags: $(BLUE)$(OPTFLAGS)$(NO_COL)\n" @@ -652,7 +669,7 @@ endif COM_PLUGIN := tools/com-plugin/common-plugin.so -LDFLAGS := -T $(LDSCRIPT) -T $(BUILD_DIR)/linker_scripts/makerom.ld -T $(BUILD_DIR)/undefined_syms.txt --no-check-sections --accept-unknown-input-arch --emit-relocs -Map $(MAP) +LDFLAGS := --oformat $(LD_OUTPUT_FORMAT) -T $(LDSCRIPT) -T $(BUILD_DIR)/linker_scripts/makerom.ld -T $(BUILD_DIR)/undefined_syms.txt --no-check-sections --accept-unknown-input-arch --emit-relocs -Map $(MAP) ifeq ($(PLATFORM),IQUE) ifeq ($(NON_MATCHING),0) LDFLAGS += -plugin $(COM_PLUGIN) -plugin-opt order=$(BASEROM_DIR)/bss-order.txt @@ -765,7 +782,7 @@ $(BUILD_DIR)/src/%.o: src/%.s # Incremental link to move z_message and z_game_over data into rodata $(BUILD_DIR)/src/code/z_message_z_game_over.o: $(BUILD_DIR)/src/code/z_message.o $(BUILD_DIR)/src/code/z_game_over.o - $(V)$(LD) -r -G 0 -T linker_scripts/data_with_rodata.ld -o $@ $^ + $(V)$(LD) -r -G 0 --oformat $(LD_OUTPUT_FORMAT) -T linker_scripts/data_with_rodata.ld -o $@ $^ $(V)$(PYTHON) tools/patch_data_with_rodata_mdebug.py $@ DEP_FILES += $(BUILD_DIR)/src/code/z_message.d $(BUILD_DIR)/src/code/z_game_over.d @@ -788,7 +805,7 @@ $(BUILD_DIR)/src/audio/game/session_init.o: src/audio/game/session_init.c $(BUIL $(call print_two_args,Compiling:,$<,$@) $(V)$(CC_CHECK) $< -o $@ $(V)$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $(@:.o=.tmp) $< - $(V)$(LD) -r -T linker_scripts/data_with_rodata.ld -o $@ $(@:.o=.tmp) + $(V)$(LD) -r --oformat $(LD_OUTPUT_FORMAT) -T linker_scripts/data_with_rodata.ld -o $@ $(@:.o=.tmp) $(V)$(PYTHON) tools/patch_data_with_rodata_mdebug.py $@ $(V)$(OBJDUMP_CMD) @@ -906,7 +923,7 @@ $(BUILD_DIR)/assets/audio/soundfonts/%.o: $(BUILD_DIR)/assets/audio/soundfonts/% # compile c to unlinked object $(V)$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -I include/audio -o $(@:.o=.tmp) $< # partial link - $(V)$(LD) -r -T linker_scripts/soundfont.ld $(@:.o=.tmp) -o $(@:.o=.tmp2) + $(V)$(LD) -r --oformat $(LD_OUTPUT_FORMAT) -T linker_scripts/soundfont.ld $(@:.o=.tmp) -o $(@:.o=.tmp2) # patch defined symbols to be ABS symbols so that they remain file-relative offsets forever $(V)$(SFPATCH) $(@:.o=.tmp2) $(@:.o=.tmp2) # write start and size symbols afterwards, filename != symbolic name so source symbolic name from the .name file written by sfc @@ -963,7 +980,7 @@ $(BUILD_DIR)/src/audio/tables/sequence_table.o: CFLAGS += -I include/tables $(BUILD_DIR)/src/audio/tables/%.o: src/audio/tables/%.c $(V)$(CC_CHECK) $< -o $@ $(V)$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $(@:.o=.tmp) $< - $(V)$(LD) -r -T linker_scripts/data_with_rodata.ld $(@:.o=.tmp) -o $@ + $(V)$(LD) -r --oformat $(LD_OUTPUT_FORMAT) -T linker_scripts/data_with_rodata.ld $(@:.o=.tmp) -o $@ $(V)$(PYTHON) tools/patch_data_with_rodata_mdebug.py $@ @$(RM) $(@:.o=.tmp) diff --git a/include/ultra64/asm.h b/include/ultra64/asm.h index d887673fa..82165db39 100644 --- a/include/ultra64/asm.h +++ b/include/ultra64/asm.h @@ -108,12 +108,19 @@ .set x, y #endif +#if !defined(_MIPS_SIM) || _MIPS_SIM != _ABIO32 +.set $t4, $ta0 +.set $t5, $ta1 +.set $t6, $ta2 +.set $t7, $ta3 +#endif + #endif /** * Stack Alignment */ -#if (_MIPS_SIM == _ABIO32) +#if defined(_MIPS_SIM) && (_MIPS_SIM == _ABIO32) #define NARGSAVE 4 /* space for 4 args must be allocated */ #define ALSZ (8-1) #define ALMASK ~(8-1) diff --git a/include/ultra64/regdef.h b/include/ultra64/regdef.h index a3b283218..94e79bbaa 100644 --- a/include/ultra64/regdef.h +++ b/include/ultra64/regdef.h @@ -7,7 +7,92 @@ #define _MIPS_SIM_ABI64 _ABI64 #endif -#if (_MIPS_SIM == _MIPS_SIM_ABI32) +#if !defined(_MIPS_SIM) /* EABI32 */ + +#define zero $0 +#define AT $at +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define a4 $8 +#define t1 $9 +#define a5 $9 +#define t2 $10 +#define a6 $10 +#define t3 $11 +#define a7 $11 +#define t4 $12 +#define ta0 $12 +#define t5 $13 +#define ta1 $13 +#define t6 $14 +#define ta2 $14 +#define t7 $15 +#define ta3 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define jp $25 +#define k0 $26 +#define k1 $27 +#define GP $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 + +#define fv0 $f0 +#define fv0f $f1 +#define fv1 $f2 +#define fv1f $f3 +#define ft0 $f4 +#define ft0f $f5 +#define ft1 $f6 +#define ft1f $f7 +#define ft2 $f8 +#define ft2f $f9 +#define ft3 $f10 +#define ft3f $f11 +#define fa0 $f12 +#define fa0f $f13 +#define fa1 $f14 +#define fa1f $f15 +#define fa2 $f16 +#define fa2f $f17 +#define ft4 $f16 +#define ft4f $f17 +#define fa3 $f18 +#define fa3f $f19 +#define ft5 $f16 +#define ft5f $f17 +#define fs0 $f20 +#define fs0f $f21 +#define fs1 $f22 +#define fs1f $f23 +#define fs2 $f24 +#define fs2f $f25 +#define fs3 $f26 +#define fs3f $f27 +#define fs4 $f28 +#define fs4f $f29 +#define fs5 $f30 +#define fs5f $f31 + +#else + +#if (_MIPS_SIM == _MIPS_SIM_ABI32) /* O32 */ + #define zero $0 #define AT $at #define v0 $2 @@ -41,14 +126,130 @@ #define jp $25 #define k0 $26 #define k1 $27 -#define gp $28 +#define GP $28 #define sp $29 #define fp $30 #define s8 $30 #define ra $31 + +#define fv0 $f0 +#define fv0f $f1 +#define fv1 $f2 +#define fv1f $f3 +#define fa0 $f12 +#define fa0f $f13 +#define fa1 $f14 +#define fa1f $f15 +#define ft0 $f4 +#define ft0f $f5 +#define ft1 $f6 +#define ft1f $f7 +#define ft2 $f8 +#define ft2f $f9 +#define ft3 $f10 +#define ft3f $f11 +#define ft4 $f16 +#define ft4f $f17 +#define ft5 $f18 +#define ft5f $f19 +#define fs0 $f20 +#define fs0f $f21 +#define fs1 $f22 +#define fs1f $f23 +#define fs2 $f24 +#define fs2f $f25 +#define fs3 $f26 +#define fs3f $f27 +#define fs4 $f28 +#define fs4f $f29 +#define fs5 $f30 +#define fs5f $f31 + #endif -#if (_MIPS_SIM == _MIPS_SIM_ABI64) +#if (_MIPS_SIM == _MIPS_SIM_NABI32) /* N32 */ + +#define zero $0 +#define AT $1 +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define a4 $8 +#define t0 $8 +#define ta0 $8 +#define a5 $9 +#define t1 $9 +#define ta1 $9 +#define a6 $10 +#define t2 $10 +#define ta2 $10 +#define a7 $11 +#define t3 $11 +#define ta3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define k0 $26 +#define kt0 $26 +#define k1 $27 +#define kt1 $27 +#define GP $28 +#define sp $29 +#define s8 $30 +#define fp $30 +#define ra $31 + +#define fv0 $f0 +#define ft14 $f1 +#define fv1 $f2 +#define ft15 $f3 +#define ft0 $f4 +#define ft1 $f5 +#define ft2 $f6 +#define ft3 $f7 +#define ft4 $f8 +#define ft5 $f9 +#define ft6 $f10 +#define ft7 $f11 +#define fa0 $f12 +#define fa1 $f13 +#define fa2 $f14 +#define fa3 $f15 +#define fa4 $f16 +#define fa5 $f17 +#define fa6 $f18 +#define fa7 $f19 +#define fs0 $f20 +#define ft8 $f21 +#define fs1 $f22 +#define ft9 $f23 +#define fs2 $f24 +#define ft10 $f25 +#define fs3 $f26 +#define ft11 $f27 +#define fs4 $f28 +#define ft12 $f29 +#define fs5 $f30 +#define ft13 $f31 + +#endif + +#if (_MIPS_SIM == _MIPS_SIM_ABI64) /* O64 */ + #define zero $0 #define AT $at #define v0 $2 @@ -82,49 +283,12 @@ #define jp $25 #define k0 $26 #define k1 $27 -#define gp $28 +#define GP $28 #define sp $29 #define fp $30 #define s8 $30 #define ra $31 -#endif -#if (_MIPS_SIM == _MIPS_SIM_ABI32) -#define fv0 $f0 -#define fv0f $f1 -#define fv1 $f2 -#define fv1f $f3 -#define fa0 $f12 -#define fa0f $f13 -#define fa1 $f14 -#define fa1f $f15 -#define ft0 $f4 -#define ft0f $f5 -#define ft1 $f6 -#define ft1f $f7 -#define ft2 $f8 -#define ft2f $f9 -#define ft3 $f10 -#define ft3f $f11 -#define ft4 $f16 -#define ft4f $f17 -#define ft5 $f18 -#define ft5f $f19 -#define fs0 $f20 -#define fs0f $f21 -#define fs1 $f22 -#define fs1f $f23 -#define fs2 $f24 -#define fs2f $f25 -#define fs3 $f26 -#define fs3f $f27 -#define fs4 $f28 -#define fs4f $f29 -#define fs5 $f30 -#define fs5f $f31 -#endif - -#if (_MIPS_SIM == _MIPS_SIM_ABI64) #define fv0 $f0 #define fv1 $f2 #define fa0 $f12 @@ -157,6 +321,9 @@ #define fs5 $f29 #define fs6 $f30 #define fs7 $f31 + +#endif + #endif #define fcr31 $31 diff --git a/include/ultra64/thread.h b/include/ultra64/thread.h index 3c2733f15..6737261f0 100644 --- a/include/ultra64/thread.h +++ b/include/ultra64/thread.h @@ -43,8 +43,15 @@ typedef struct __OSThreadContext { /* 0x0E8 */ u64 lo, hi; /* 0x0F8 */ u32 sr, pc, cause, badvaddr, rcp; /* 0x10C */ u32 fpcsr; +#if !defined(_MIPS_SIM) || _MIPS_SIM != _ABIN32 /* 0x110 */ __OSfp fp0, fp2, fp4, fp6, fp8, fp10, fp12, fp14; /* 0x150 */ __OSfp fp16, fp18, fp20, fp22, fp24, fp26, fp28, fp30; +#else + __OSfp fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7; + __OSfp fp8, fp9, fp10, fp11, fp12, fp13, fp14, fp15; + __OSfp fp16, fp17, fp18, fp19, fp20, fp21, fp22, fp23; + __OSfp fp24, fp25, fp26, fp27, fp28, fp29, fp30, fp31; +#endif } __OSThreadContext; // size = 0x190 typedef struct __OSThreadprofile { @@ -122,6 +129,7 @@ typedef struct __OSThreadTail { #define THREAD_BADVADDR (THREAD_CONTEXT + 0x104) #define THREAD_RCP (THREAD_CONTEXT + 0x108) #define THREAD_FPCSR (THREAD_CONTEXT + 0x10C) +#if !defined(_MIPS_SIM) || _MIPS_SIM != _ABIN32 #define THREAD_FP0 (THREAD_CONTEXT + 0x110) #define THREAD_FP2 (THREAD_CONTEXT + 0x118) #define THREAD_FP4 (THREAD_CONTEXT + 0x120) @@ -138,6 +146,40 @@ typedef struct __OSThreadTail { #define THREAD_FP26 (THREAD_CONTEXT + 0x178) #define THREAD_FP28 (THREAD_CONTEXT + 0x180) #define THREAD_FP30 (THREAD_CONTEXT + 0x188) +#else +#define THREAD_FP0 (THREAD_CONTEXT + 0x110) +#define THREAD_FP1 (THREAD_CONTEXT + 0x118) +#define THREAD_FP2 (THREAD_CONTEXT + 0x120) +#define THREAD_FP3 (THREAD_CONTEXT + 0x128) +#define THREAD_FP4 (THREAD_CONTEXT + 0x130) +#define THREAD_FP5 (THREAD_CONTEXT + 0x138) +#define THREAD_FP6 (THREAD_CONTEXT + 0x140) +#define THREAD_FP7 (THREAD_CONTEXT + 0x148) +#define THREAD_FP8 (THREAD_CONTEXT + 0x150) +#define THREAD_FP9 (THREAD_CONTEXT + 0x158) +#define THREAD_FP10 (THREAD_CONTEXT + 0x160) +#define THREAD_FP11 (THREAD_CONTEXT + 0x168) +#define THREAD_FP12 (THREAD_CONTEXT + 0x170) +#define THREAD_FP13 (THREAD_CONTEXT + 0x178) +#define THREAD_FP14 (THREAD_CONTEXT + 0x180) +#define THREAD_FP15 (THREAD_CONTEXT + 0x188) +#define THREAD_FP16 (THREAD_CONTEXT + 0x190) +#define THREAD_FP17 (THREAD_CONTEXT + 0x198) +#define THREAD_FP18 (THREAD_CONTEXT + 0x1A0) +#define THREAD_FP19 (THREAD_CONTEXT + 0x1A8) +#define THREAD_FP20 (THREAD_CONTEXT + 0x1B0) +#define THREAD_FP21 (THREAD_CONTEXT + 0x1B8) +#define THREAD_FP22 (THREAD_CONTEXT + 0x1C0) +#define THREAD_FP23 (THREAD_CONTEXT + 0x1C8) +#define THREAD_FP24 (THREAD_CONTEXT + 0x1D0) +#define THREAD_FP25 (THREAD_CONTEXT + 0x1D8) +#define THREAD_FP26 (THREAD_CONTEXT + 0x1E0) +#define THREAD_FP27 (THREAD_CONTEXT + 0x1E8) +#define THREAD_FP28 (THREAD_CONTEXT + 0x1F0) +#define THREAD_FP29 (THREAD_CONTEXT + 0x1F8) +#define THREAD_FP30 (THREAD_CONTEXT + 0x200) +#define THREAD_FP31 (THREAD_CONTEXT + 0x208) +#endif #endif diff --git a/include/ultra64/xstdio.h b/include/ultra64/xstdio.h index 184ee4907..aa3a3957e 100644 --- a/include/ultra64/xstdio.h +++ b/include/ultra64/xstdio.h @@ -4,7 +4,8 @@ #include "stdarg.h" // IDO doesn't support long double types, improve portability for compilers supporting them -#ifdef __sgi +// Also make N32/N64 ABIs use double, TODO support long double in these ABIs (128-bit float) +#if defined(__sgi) || (defined(_MIPS_SIM) && _MIPS_SIM != _ABIO32) #define LONG_DOUBLE_TYPE double #else #define LONG_DOUBLE_TYPE long double diff --git a/src/gcc_fix/missing_gcc_functions.c b/src/gcc_fix/missing_gcc_functions.c index 26fd449a6..4a3256001 100644 --- a/src/gcc_fix/missing_gcc_functions.c +++ b/src/gcc_fix/missing_gcc_functions.c @@ -163,6 +163,16 @@ int __ucmpdi2(unsigned long long a, unsigned long long b) { // Compute division and modulo of 64-bit signed and unsigned integers +// eabi requires the following functions, but also needs to allocate stack +#if !defined(_MIPS_SIM) +#define STACK_ALLOC "addiu $sp, $sp, -0x10" +#define STACK_DEALLOC "addiu $sp, $sp, 0x10" +#else +#define STACK_ALLOC "" +#define STACK_DEALLOC "" +#endif + +#if !defined(_MIPS_SIM) || _MIPS_SIM == _ABIO32 __asm__(" \n\ .set push \n\ .set noreorder \n\ @@ -172,6 +182,7 @@ __asm__(" \n\ __umoddi3: \n\ .type __umoddi3, @function \n\ .ent __umoddi3 \n\ + " STACK_ALLOC " \n\ sw $a0, 0x0($sp) \n\ sw $a1, 0x4($sp) \n\ sw $a2, 0x8($sp) \n\ @@ -181,6 +192,7 @@ __umoddi3: \n\ dremu $v0, $t6, $t7 \n\ dsll32 $v1, $v0, 0 \n\ dsra32 $v1, $v1, 0 \n\ + " STACK_DEALLOC " \n\ jr $ra \n\ dsra32 $v0, $v0, 0 \n\ .end __umoddi3 \n\ @@ -190,6 +202,7 @@ __umoddi3: \n\ __udivdi3: \n\ .type __udivdi3, @function \n\ .ent __udivdi3 \n\ + " STACK_ALLOC " \n\ sw $a0, 0x0($sp) \n\ sw $a1, 0x4($sp) \n\ sw $a2, 0x8($sp) \n\ @@ -199,6 +212,7 @@ __udivdi3: \n\ ddivu $v0, $t6, $t7 \n\ dsll32 $v1, $v0, 0 \n\ dsra32 $v1, $v1, 0 \n\ + " STACK_DEALLOC " \n\ jr $ra \n\ dsra32 $v0, $v0, 0 \n\ .end __udivdi3 \n\ @@ -208,6 +222,7 @@ __udivdi3: \n\ __moddi3: \n\ .type __moddi3, @function \n\ .ent __moddi3 \n\ + " STACK_ALLOC " \n\ sw $a0, 0x0($sp) \n\ sw $a1, 0x4($sp) \n\ sw $a2, 0x8($sp) \n\ @@ -217,6 +232,7 @@ __moddi3: \n\ drem $v0, $t6, $t7 \n\ dsll32 $v1, $v0, 0 \n\ dsra32 $v1, $v1, 0 \n\ + " STACK_DEALLOC " \n\ jr $ra \n\ dsra32 $v0, $v0, 0 \n\ .end __moddi3 \n\ @@ -226,6 +242,7 @@ __moddi3: \n\ __divdi3: \n\ .type __divdi3, @function \n\ .ent __divdi3 \n\ + " STACK_ALLOC " \n\ sw $a0, 0x0($sp) \n\ sw $a1, 0x4($sp) \n\ sw $a2, 0x8($sp) \n\ @@ -235,6 +252,7 @@ __divdi3: \n\ ddiv $v0, $t6, $t7 \n\ dsll32 $v1, $v0, 0 \n\ dsra32 $v1, $v1, 0 \n\ + " STACK_DEALLOC " \n\ jr $ra \n\ dsra32 $v0, $v0, 0 \n\ .end __divdi3 \n\ @@ -242,3 +260,4 @@ __divdi3: \n\ \n\ .set pop \n\ \n"); +#endif diff --git a/src/libultra/mgu/scale.s b/src/libultra/mgu/scale.s index 5661b940c..160b3595a 100644 --- a/src/libultra/mgu/scale.s +++ b/src/libultra/mgu/scale.s @@ -1,14 +1,23 @@ #include "ultra64/asm.h" #include "ultra64/regdef.h" +#ifdef _MIPS_SIM +#define IS_O32 (_MIPS_SIM == _ABIO32) +#else +#define IS_O32 false +#endif + .text .align 5 LEAF(guScale) li.s ft0, 65536.0 - +#if IS_O32 mtc1 a1, ft1 mul.s ft2, ft1, ft0 +#else + mul.s ft2, fa0, ft0 +#endif trunc.w.s ft3, ft2 mfc1 t1, ft3 @@ -18,8 +27,12 @@ LEAF(guScale) sll t2, t1, 0x10 sw t2, 0x20(a0) +#if IS_O32 mtc1 a2, ft1 mul.s ft2, ft1, ft0 +#else + mul.s ft2, fa1, ft0 +#endif trunc.w.s ft3, ft2 mfc1 t1, ft3 @@ -28,8 +41,12 @@ LEAF(guScale) andi t2, t1, 0xFFFF sw t2, 0x28(a0) +#if IS_O32 mtc1 a3, ft1 mul.s ft2, ft1, ft0 +#else + mul.s ft2, fa2, ft0 +#endif trunc.w.s ft3, ft2 mfc1 t1, ft3 diff --git a/src/libultra/mgu/translate.s b/src/libultra/mgu/translate.s index cf2d24643..9115557e1 100644 --- a/src/libultra/mgu/translate.s +++ b/src/libultra/mgu/translate.s @@ -1,19 +1,33 @@ #include "ultra64/asm.h" #include "ultra64/regdef.h" +#ifdef _MIPS_SIM +#define IS_O32 (_MIPS_SIM == _ABIO32) +#else +#define IS_O32 false +#endif + .text .align 5 LEAF(guTranslate) li.s ft0, 65536.0 +#if IS_O32 mtc1 a1, ft1 mul.s ft2, ft1, ft0 +#else + mul.s ft2, fa0, ft0 +#endif trunc.w.s ft3, ft2 mfc1 t1, ft3 +#if IS_O32 mtc1 a2, ft1 mul.s ft2, ft1, ft0 +#else + mul.s ft2, fa1, ft0 +#endif trunc.w.s ft3, ft2 mfc1 t3, ft3 @@ -29,8 +43,12 @@ LEAF(guTranslate) or t0, t0, t2 sw t0, 0x38(a0) +#if IS_O32 mtc1 a3, ft1 mul.s ft2, ft1, ft0 +#else + mul.s ft2, fa2, ft0 +#endif trunc.w.s ft3, ft2 mfc1 t1, ft3 diff --git a/src/libultra/os/createthread.c b/src/libultra/os/createthread.c index 933220797..fe3e36936 100644 --- a/src/libultra/os/createthread.c +++ b/src/libultra/os/createthread.c @@ -15,7 +15,11 @@ void osCreateThread(OSThread* thread, OSId id, void (*entry)(void*), void* arg, thread->context.ra = (u64)(s32)__osCleanupThread; mask = OS_IM_ALL; +#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIN32 + thread->context.sr = (mask & OS_IM_CPU) | SR_EXL | SR_FR; +#else thread->context.sr = (mask & OS_IM_CPU) | SR_EXL; +#endif thread->context.rcp = (mask & RCP_IMASK) >> RCP_IMASKSHIFT; thread->context.fpcsr = FPCSR_FS | FPCSR_EV; thread->fp = 0; diff --git a/src/libultra/os/exceptasm.s b/src/libultra/os/exceptasm.s index 3a3919061..7f46a7049 100644 --- a/src/libultra/os/exceptasm.s +++ b/src/libultra/os/exceptasm.s @@ -1,6 +1,3 @@ -#ifdef __GNUC__ -.set gp=64 -#endif #include "ultra64/asm.h" #include "ultra64/regdef.h" #include "ultra64/R4300.h" @@ -98,7 +95,9 @@ LEAF(__osException) /* Load scratch space for thread saving */ la k0, __osThreadSave /* Save at */ +.set push; .set gp=64 sd AT, THREAD_AT(k0) +.set pop .set at /* Save sr */ MFC0( k1, C0_SR) @@ -107,9 +106,11 @@ LEAF(__osException) and k1, k1, ~(SR_IE | SR_EXL) MTC0( k1, C0_SR) /* Save some temp registers for use in the following */ +.set push; .set gp=64 sd t0, THREAD_T0(k0) sd t1, THREAD_T1(k0) sd t2, THREAD_T2(k0) +.set pop /* Mark FPU as unused */ sw zero, THREAD_FP(k0) /* Left over from misplaced ifdef, immediately overwritten on next instruction */ @@ -119,6 +120,7 @@ savecontext: /* Save the previously running thread's context to be restored when it resumes */ move t0, k0 lw k0, __osRunningThread +.set push; .set gp=64 ld t1, THREAD_AT(t0) sd t1, THREAD_AT(k0) ld t1, THREAD_SR(t0) @@ -129,7 +131,9 @@ savecontext: sd t1, THREAD_T1(k0) ld t1, THREAD_T2(t0) sd t1, THREAD_T2(k0) +.set pop .set reorder +.set push; .set gp=64 sd $2, THREAD_V0(k0) sd $3, THREAD_V1(k0) sd $4, THREAD_A0(k0) @@ -159,6 +163,7 @@ savecontext: sd t0, THREAD_LO(k0) mfhi t0 sd t0, THREAD_HI(k0) +.set pop lw k1, THREAD_SR(k0) andi t1, k1, SR_IMASK beqz t1, savercp @@ -220,6 +225,24 @@ endrcp: sdc1 $f26, THREAD_FP26(k0) sdc1 $f28, THREAD_FP28(k0) sdc1 $f30, THREAD_FP30(k0) +#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIN32 + sdc1 $f1, THREAD_FP1(k0) + sdc1 $f3, THREAD_FP3(k0) + sdc1 $f5, THREAD_FP5(k0) + sdc1 $f7, THREAD_FP7(k0) + sdc1 $f9, THREAD_FP9(k0) + sdc1 $f11, THREAD_FP11(k0) + sdc1 $f13, THREAD_FP13(k0) + sdc1 $f15, THREAD_FP15(k0) + sdc1 $f17, THREAD_FP17(k0) + sdc1 $f19, THREAD_FP19(k0) + sdc1 $f21, THREAD_FP21(k0) + sdc1 $f23, THREAD_FP23(k0) + sdc1 $f25, THREAD_FP25(k0) + sdc1 $f27, THREAD_FP27(k0) + sdc1 $f29, THREAD_FP29(k0) + sdc1 $f31, THREAD_FP31(k0) +#endif handle_interrupt: #if ENABLE_PROFILER @@ -769,6 +792,7 @@ LEAF(__osEnqueueAndYield) ori t0, t0, SR_EXL sw t0, THREAD_SR(a1) /* Save callee-saved registers */ +.set push; .set gp=64 sd s0, THREAD_S0(a1) sd s1, THREAD_S1(a1) sd s2, THREAD_S2(a1) @@ -777,10 +801,11 @@ LEAF(__osEnqueueAndYield) sd s5, THREAD_S5(a1) sd s6, THREAD_S6(a1) sd s7, THREAD_S7(a1) - sd gp, THREAD_GP(a1) + sd GP, THREAD_GP(a1) sd sp, THREAD_SP(a1) sd fp, THREAD_S8(a1) sd ra, THREAD_RA(a1) +.set pop sw ra, THREAD_PC(a1) /* Save FPU callee-saved registers if the current thread has used the FPU */ lw k1, THREAD_FP(a1) @@ -899,12 +924,16 @@ get_time_no_interrupts: /* osGetTime, but without the interrupts */ lw t2, %lo(__osBaseCounter)(t2) subu t2, t1, t2 /* base = count - __osBaseCounter */ lui t3, %hi(__osCurrentTime) +.set push; .set gp=64 ld t3, %lo(__osCurrentTime)(t3) +.set pop addu t1, t2, t3 /* time = base + t */ record: sll a2, a1, 3 /* numEvents * 8 for eventTimes */ addu a2, a2, t0 /* activeProfilerState->eventTimes[e] */ +.set push; .set gp=64 sd t1, (0)(a2) +.set pop addu a3, a1, t0 /* activeProfilerState->{null}[e] */ sb a0, (8*PROFILER_EVENT_COUNT)(a3) /* eventTypes */ addiu a1, a1, 1 /* numEvents++ */ @@ -948,6 +977,7 @@ LEAF(__osDispatchThread) MTC0( k1, C0_SR) /* Restore GPRs */ .set noat +.set push; .set gp=64 ld AT, THREAD_AT(k0) ld v0, THREAD_V0(k0) ld v1, THREAD_V1(k0) @@ -973,13 +1003,14 @@ LEAF(__osDispatchThread) ld s7, THREAD_S7(k0) ld t8, THREAD_T8(k0) ld t9, THREAD_T9(k0) - ld gp, THREAD_GP(k0) + ld GP, THREAD_GP(k0) ld sp, THREAD_SP(k0) ld fp, THREAD_S8(k0) ld ra, THREAD_RA(k0) ld k1, THREAD_LO(k0) mtlo k1 ld k1, THREAD_HI(k0) +.set pop mthi k1 /* Move thread pc to EPC so that eret will return execution to where the thread left off */ lw k1, THREAD_PC(k0) @@ -1008,6 +1039,24 @@ LEAF(__osDispatchThread) ldc1 $f26, THREAD_FP26(k0) ldc1 $f28, THREAD_FP28(k0) ldc1 $f30, THREAD_FP30(k0) +#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIN32 + ldc1 $f1, THREAD_FP1(k0) + ldc1 $f3, THREAD_FP3(k0) + ldc1 $f5, THREAD_FP5(k0) + ldc1 $f7, THREAD_FP7(k0) + ldc1 $f9, THREAD_FP9(k0) + ldc1 $f11, THREAD_FP11(k0) + ldc1 $f13, THREAD_FP13(k0) + ldc1 $f15, THREAD_FP15(k0) + ldc1 $f17, THREAD_FP17(k0) + ldc1 $f19, THREAD_FP19(k0) + ldc1 $f21, THREAD_FP21(k0) + ldc1 $f23, THREAD_FP23(k0) + ldc1 $f25, THREAD_FP25(k0) + ldc1 $f27, THREAD_FP27(k0) + ldc1 $f29, THREAD_FP29(k0) + ldc1 $f31, THREAD_FP31(k0) +#endif 1: /* Restore RCP interrupt mask, masking out any RCP interrupts that */ /* are not also enabled in the global interrupt mask */ diff --git a/src/libultra/os/initialize.c b/src/libultra/os/initialize.c index 53b84d433..1293b64a8 100644 --- a/src/libultra/os/initialize.c +++ b/src/libultra/os/initialize.c @@ -59,8 +59,15 @@ void OSINITIALIZE_FUNC(void) { #endif __osFinalrom = true; + +#if defined(_MIPS_SIM) && (_MIPS_SIM == _ABIN32) // N32 sets FR + __osSetSR(__osGetSR() | SR_CU1 | SR_FR); +#else // O32 and EABI don't set FR __osSetSR(__osGetSR() | SR_CU1); +#endif + __osSetFpcCsr(FPCSR_FS | FPCSR_EV | FPCSR_RM_RN); + #if LIBULTRA_VERSION >= LIBULTRA_VERSION_K __osSetWatchLo(0x04900000); #endif