diff --git a/Makefile b/Makefile index 7336806c..06ddec17 100644 --- a/Makefile +++ b/Makefile @@ -237,9 +237,7 @@ LEVEL_DIRS := $(patsubst levels/%,%,$(dir $(wildcard levels/*/header.h))) SRC_DIRS := src src/engine src/game src/menu src/buffers src/audio $(AUDIO_SRC_DIR) actors levels bin data assets asm lib sound BIN_DIRS := bin bin/$(VERSION) -ifeq ($(VERSION),cn) - LIBGCC_SRC_DIRS += lib/src/libgcc -endif +LIBGCC_SRC_DIRS += lib/src/libgcc GODDARD_SRC_DIRS := src/goddard src/goddard/dynlists @@ -396,6 +394,7 @@ else endif ASFLAGS := -march=vr4300 -mabi=32 $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(foreach d,$(DEFINES),--defsym $(d)) +ASMFLAGS := -G 0 $(DEF_INC_CFLAGS) -w -nostdinc -c -march=vr4300 -mfix4300 -mno-abicalls -DMIPSEB -D_LANGUAGE_ASSEMBLY -D_MIPS_SIM=1 -D_MIPS_SZLONG=32 RSPASMFLAGS := $(foreach d,$(DEFINES),-definelabel $(subst =, ,$(d))) ifeq ($(shell getconf LONG_BIT), 32) @@ -765,7 +764,11 @@ endif # Assemble assembly code $(BUILD_DIR)/%.o: %.s $(call print,Assembling:,$<,$@) - $(V)$(CPP) $(CPPFLAGS) -D_LANGUAGE_ASSEMBLY=1 $< | $(AS) $(ASFLAGS) -MD $(BUILD_DIR)/$*.d -o $@ +ifeq ($(COMPILER),gcc) + $(V)$(CC) -c $(ASMFLAGS) -x assembler-with-cpp -MMD -MF $(BUILD_DIR)/$*.d -o $@ $< +else + $(V)$(CPP) $(CPPFLAGS) -D_LANGUAGE_ASSEMBLY $< | $(AS) $(ASFLAGS) -MD $(BUILD_DIR)/$*.d -o $@ +endif # Assemble RSP assembly code $(BUILD_DIR)/rsp/%.bin $(BUILD_DIR)/rsp/%_data.bin: rsp/%.s diff --git a/lib/src/libgcc/longlong.h b/lib/src/libgcc/longlong.h index d0f7cad8..2771f376 100644 --- a/lib/src/libgcc/longlong.h +++ b/lib/src/libgcc/longlong.h @@ -78,7 +78,7 @@ for the CPUs below! (E.g. WE32100, IBM360.) */ -#if defined (__GNUC__) && !defined (NO_ASM) +#if defined(__GNUC__) && !defined(NO_ASM) /* We sometimes need to clobber "cc" with gcc2, but that would not be understood by gcc1. Use cpp to avoid major code duplication. */ @@ -90,13 +90,28 @@ #define __AND_CLOBBER_CC , "cc" #endif /* __GNUC__ < 2 */ -#if defined (__mips__) -#define umul_ppmm(w1, w0, u, v) \ - __asm__ ("multu %2,%3" \ - : "=l" ((USItype) (w0)), \ - "=h" ((USItype) (w1)) \ - : "d" ((USItype) (u)), \ - "d" ((USItype) (v))) +#if defined(__mips__) +#if defined(__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype _r; \ + _r = (UDItype) (u) * (v); \ + (w1) = _r >> 32; \ + (w0) = (USItype) _r; \ + } while (0) +#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__("multu %2,%3" \ + : "=l"((USItype) (w0)), "=h"((USItype) (w1)) \ + : "d"((USItype) (u)), "d"((USItype) (v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__("multu %2,%3 \n" \ + "mflo %0 \n" \ + "mfhi %1" \ + : "=d"((USItype) (w0)), "=d"((USItype) (w1)) \ + : "d"((USItype) (u)), "d"((USItype) (v))) +#endif #define UMUL_TIME 10 #define UDIV_TIME 100 #endif /* __mips__ */ @@ -105,136 +120,133 @@ /* If this machine has no inline assembler, use C macros. */ -#if !defined (add_ssaaaa) -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ - do { \ - USItype __x; \ - __x = (al) + (bl); \ - (sh) = (ah) + (bh) + (__x < (al)); \ - (sl) = __x; \ - } while (0) +#if !defined(add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + USItype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) #endif -#if !defined (sub_ddmmss) -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ - do { \ - USItype __x; \ - __x = (al) - (bl); \ - (sh) = (ah) - (bh) - (__x > (al)); \ - (sl) = __x; \ - } while (0) +#if !defined(sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + USItype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) #endif -#if !defined (umul_ppmm) -#define umul_ppmm(w1, w0, u, v) \ - do { \ - USItype __x0, __x1, __x2, __x3; \ - USItype __ul, __vl, __uh, __vh; \ - \ - __ul = __ll_lowpart (u); \ - __uh = __ll_highpart (u); \ - __vl = __ll_lowpart (v); \ - __vh = __ll_highpart (v); \ - \ - __x0 = (USItype) __ul * __vl; \ - __x1 = (USItype) __ul * __vh; \ - __x2 = (USItype) __uh * __vl; \ - __x3 = (USItype) __uh * __vh; \ - \ - __x1 += __ll_highpart (__x0);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += __ll_B; /* yes, add it in the proper pos. */ \ - \ - (w1) = __x3 + __ll_highpart (__x1); \ - (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ - } while (0) +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + USItype __x0, __x1, __x2, __x3; \ + USItype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart(u); \ + __uh = __ll_highpart(u); \ + __vl = __ll_lowpart(v); \ + __vh = __ll_highpart(v); \ + \ + __x0 = (USItype) __ul * __vl; \ + __x1 = (USItype) __ul * __vh; \ + __x2 = (USItype) __uh * __vl; \ + __x3 = (USItype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0); /* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \ + } while (0) #endif -#if !defined (__umulsidi3) -#define __umulsidi3(u, v) \ - ({DIunion __w; \ - umul_ppmm (__w.s.high, __w.s.low, u, v); \ - __w.ll; }) +#if !defined(__umulsidi3) +#define __umulsidi3(u, v) \ + ({ \ + DIunion __w; \ + umul_ppmm(__w.s.high, __w.s.low, u, v); \ + __w.ll; \ + }) #endif /* Define this unconditionally, so it can be used for debugging. */ -#define __udiv_qrnnd_c(q, r, n1, n0, d) \ - do { \ - USItype __d1, __d0, __q1, __q0; \ - USItype __r1, __r0, __m; \ - __d1 = __ll_highpart (d); \ - __d0 = __ll_lowpart (d); \ - \ - __r1 = (n1) % __d1; \ - __q1 = (n1) / __d1; \ - __m = (USItype) __q1 * __d0; \ - __r1 = __r1 * __ll_B | __ll_highpart (n0); \ - if (__r1 < __m) \ - { \ - __q1--, __r1 += (d); \ - if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ - if (__r1 < __m) \ - __q1--, __r1 += (d); \ - } \ - __r1 -= __m; \ - \ - __r0 = __r1 % __d1; \ - __q0 = __r1 / __d1; \ - __m = (USItype) __q0 * __d0; \ - __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ - if (__r0 < __m) \ - { \ - __q0--, __r0 += (d); \ - if (__r0 >= (d)) \ - if (__r0 < __m) \ - __q0--, __r0 += (d); \ - } \ - __r0 -= __m; \ - \ - (q) = (USItype) __q1 * __ll_B | __q0; \ - (r) = __r0; \ - } while (0) +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + USItype __d1, __d0, __q1, __q0; \ + USItype __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (USItype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (USItype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (USItype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through __udiv_w_sdiv (defined in libgcc or elsewhere). */ -#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) -#define udiv_qrnnd(q, r, nh, nl, d) \ - do { \ - USItype __r; \ - (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ - (r) = __r; \ - } while (0) +#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + USItype __r; \ + (q) = __udiv_w_sdiv(&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) #endif /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ -#if !defined (udiv_qrnnd) +#if !defined(udiv_qrnnd) #define UDIV_NEEDS_NORMALIZATION 1 #define udiv_qrnnd __udiv_qrnnd_c #endif -#if !defined (count_leading_zeros) -//extern const UQItype __clz_tab[]; -#define count_leading_zeros(count, x) \ - do { \ - USItype __xr = (x); \ - USItype __a; \ - \ - if (SI_TYPE_SIZE <= 32) \ - { \ - __a = __xr < ((USItype)1<<2*__BITS4) \ - ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4) \ - : (__xr < ((USItype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ - } \ - else \ - { \ - for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8) \ - if (((__xr >> __a) & 0xff) != 0) \ - break; \ - } \ - \ - (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ - } while (0) +#if !defined(count_leading_zeros) +// extern const UQItype __clz_tab[]; +#define count_leading_zeros(count, x) \ + do { \ + USItype __xr = (x); \ + USItype __a; \ + \ + if (SI_TYPE_SIZE <= 32) { \ + __a = __xr < ((USItype) 1 << 2 * __BITS4) \ + ? (__xr < ((USItype) 1 << __BITS4) ? 0 : __BITS4) \ + : (__xr < ((USItype) 1 << 3 * __BITS4) ? 2 * __BITS4 : 3 * __BITS4); \ + } else { \ + for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) #endif #ifndef UDIV_NEEDS_NORMALIZATION