From 82736c610af9edc9ab3236e123a85b3dcecc15ea Mon Sep 17 00:00:00 2001 From: a Date: Mon, 23 Jun 2025 20:50:28 -0400 Subject: [PATCH] git subrepo clone https://gitlab.com/mpharoah/n64-libc lib/n64-libc subrepo: subdir: "lib/n64-libc" merged: "70270d60" upstream: origin: "https://gitlab.com/mpharoah/n64-libc" branch: "main" commit: "70270d60" git-subrepo: version: "0.4.9" origin: "https://github.com/ingydotnet/git-subrepo" commit: "4f60dd7" --- lib/n64-libc/.gitignore | 2 + lib/n64-libc/.gitrepo | 12 + lib/n64-libc/README.md | 5 + lib/n64-libc/n64-alloca.h | 3 + lib/n64-libc/n64-assert.c | 9 + lib/n64-libc/n64-assert.h | 32 + lib/n64-libc/n64-ctype.c | 20 + lib/n64-libc/n64-ctype.h | 89 +++ lib/n64-libc/n64-fenv.c | 72 ++ lib/n64-libc/n64-fenv.h | 39 + lib/n64-libc/n64-float.h | 111 +++ lib/n64-libc/n64-libc.h | 15 + lib/n64-libc/n64-math.c | 558 +++++++++++++++ lib/n64-libc/n64-math.h | 275 +++++++ lib/n64-libc/n64-numbers.h | 43 ++ lib/n64-libc/n64-stdbool.h | 34 + lib/n64-libc/n64-stdckdint.h | 8 + lib/n64-libc/n64-stddef.h | 15 + lib/n64-libc/n64-stdio-bprint.c.inc | 432 +++++++++++ lib/n64-libc/n64-stdio-format.c.inc | 1030 +++++++++++++++++++++++++++ lib/n64-libc/n64-stdio-ryu.c.inc | 910 +++++++++++++++++++++++ lib/n64-libc/n64-stdio.c | 67 ++ lib/n64-libc/n64-stdio.h | 65 ++ lib/n64-libc/n64-stdlib.c | 155 ++++ lib/n64-libc/n64-stdlib.h | 67 ++ lib/n64-libc/n64-string.c | 356 +++++++++ lib/n64-libc/n64-string.h | 111 +++ lib/n64-libc/n64-time.c | 685 ++++++++++++++++++ lib/n64-libc/n64-time.h | 129 ++++ lib/n64-libc/n64-util.c | 24 + lib/n64-libc/n64-util.h | 62 ++ 31 files changed, 5435 insertions(+) create mode 100644 lib/n64-libc/.gitignore create mode 100644 lib/n64-libc/.gitrepo create mode 100644 lib/n64-libc/README.md create mode 100644 lib/n64-libc/n64-alloca.h create mode 100644 lib/n64-libc/n64-assert.c create mode 100644 lib/n64-libc/n64-assert.h create mode 100644 lib/n64-libc/n64-ctype.c create mode 100644 lib/n64-libc/n64-ctype.h create mode 100644 lib/n64-libc/n64-fenv.c create mode 100644 lib/n64-libc/n64-fenv.h create mode 100644 lib/n64-libc/n64-float.h create mode 100644 lib/n64-libc/n64-libc.h create mode 100644 lib/n64-libc/n64-math.c create mode 100644 lib/n64-libc/n64-math.h create mode 100644 lib/n64-libc/n64-numbers.h create mode 100644 lib/n64-libc/n64-stdbool.h create mode 100644 lib/n64-libc/n64-stdckdint.h create mode 100644 lib/n64-libc/n64-stddef.h create mode 100644 lib/n64-libc/n64-stdio-bprint.c.inc create mode 100644 lib/n64-libc/n64-stdio-format.c.inc create mode 100644 lib/n64-libc/n64-stdio-ryu.c.inc create mode 100644 lib/n64-libc/n64-stdio.c create mode 100644 lib/n64-libc/n64-stdio.h create mode 100644 lib/n64-libc/n64-stdlib.c create mode 100644 lib/n64-libc/n64-stdlib.h create mode 100644 lib/n64-libc/n64-string.c create mode 100644 lib/n64-libc/n64-string.h create mode 100644 lib/n64-libc/n64-time.c create mode 100644 lib/n64-libc/n64-time.h create mode 100644 lib/n64-libc/n64-util.c create mode 100644 lib/n64-libc/n64-util.h diff --git a/lib/n64-libc/.gitignore b/lib/n64-libc/.gitignore new file mode 100644 index 00000000..7cac32ea --- /dev/null +++ b/lib/n64-libc/.gitignore @@ -0,0 +1,2 @@ +/.vscode +*.kate-swp diff --git a/lib/n64-libc/.gitrepo b/lib/n64-libc/.gitrepo new file mode 100644 index 00000000..59b5db03 --- /dev/null +++ b/lib/n64-libc/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://gitlab.com/mpharoah/n64-libc + branch = main + commit = 70270d60f9b13d3cd896eaa8aa0a043992a823fd + parent = 4c89989f6f2ef03e71fc89e33e4e94fa067fda04 + method = merge + cmdver = 0.4.9 diff --git a/lib/n64-libc/README.md b/lib/n64-libc/README.md new file mode 100644 index 00000000..e834f6e7 --- /dev/null +++ b/lib/n64-libc/README.md @@ -0,0 +1,5 @@ +# N64 libc + +Provides a partial implementation of the C standard library for N64 + +WORK IN PROGRESS diff --git a/lib/n64-libc/n64-alloca.h b/lib/n64-libc/n64-alloca.h new file mode 100644 index 00000000..6e1d578a --- /dev/null +++ b/lib/n64-libc/n64-alloca.h @@ -0,0 +1,3 @@ +#ifndef n64_alloca +#define n64_alloca( size ) __builtin_alloca_with_align( size, 64 ) +#endif diff --git a/lib/n64-libc/n64-assert.c b/lib/n64-libc/n64-assert.c new file mode 100644 index 00000000..458afd70 --- /dev/null +++ b/lib/n64-libc/n64-assert.c @@ -0,0 +1,9 @@ +#include "n64-assert.h" + +#include "n64-stdlib.h" +#include "n64-stdio.h" + +void __n64_assert_fail( const char *assertion, const char *file, unsigned int line, const char *fcn ) { + n64_printf( "%s:%u: %s: Assertion `%s' failed.\n", file, line, fcn, assertion ); + n64_abort(); +} diff --git a/lib/n64-libc/n64-assert.h b/lib/n64-libc/n64-assert.h new file mode 100644 index 00000000..1d286b4c --- /dev/null +++ b/lib/n64-libc/n64-assert.h @@ -0,0 +1,32 @@ +#ifndef N64_STDLIB_N64_ASSERT_H_ +#define N64_STDLIB_N64_ASSERT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef NDEBUG +#define n64_assert( expr ) (void)(0) +#else + +__attribute__((noreturn)) +void __n64_assert_fail( const char *assertion, const char *file, unsigned int line, const char *fcn ); + +#define n64_assert( expr ) ((expr) ? (void)(0) : __n64_assert_fail( #expr, __FILE__, __LINE__, __func__ )) +#endif + +#ifdef __cplusplus + #define n64_static_assert( expr ) static_assert( expr ) +#else + #if __STDC_VERSION__ >= 202311L + #define n64_static_assert( expr ) static_assert( expr ) + #else + #define n64_static_assert( expr ) _Static_assert( expr ) + #endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-ctype.c b/lib/n64-libc/n64-ctype.c new file mode 100644 index 00000000..f154d039 --- /dev/null +++ b/lib/n64-libc/n64-ctype.c @@ -0,0 +1,20 @@ +#include "n64-ctype.h" + +unsigned short __n64_internal_charflag_table[256] = { + /* 0x00 - 0x08 : control codes */ 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, + /* 0x09 : tab */ 0x0E0, + /* 0x0A - 0x0D : whitespaces */ 0x060, 0x060, 0x060, 0x060, + /* 0x0E - 0x1F : control codes */ 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, + /* 0x20 : space */ 0x1C0, + /* 0x21 - 0x2F : punctuation */ 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, + /* 0x30 - 0x39 : digits */ 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, + /* 0x3A - 0x40 : punctuation */ 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, + /* 0x41 - 0x46 : hex letters (U) */ 0x116, 0x116, 0x116, 0x116, 0x116, 0x116, + /* 0x47 - 0x5A : letters (U) */ 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, + /* 0x5B - 0x60 : punctuation */ 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, + /* 0x61 - 0x66 : hex letters (L) */ 0x10E, 0x10E, 0x10E, 0x10E, 0x10E, 0x10E, + /* 0x67 - 0x7A : letters (L) */ 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, + /* 0x7B - 0x7E : punctuation */ 0x300, 0x300, 0x300, 0x300, + /* 0x7F : backspace */ 0x020, + /* 0x80 - 0xFF : non-ASCII */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; diff --git a/lib/n64-libc/n64-ctype.h b/lib/n64-libc/n64-ctype.h new file mode 100644 index 00000000..e7dec653 --- /dev/null +++ b/lib/n64-libc/n64-ctype.h @@ -0,0 +1,89 @@ +#ifndef N64_STDLIB_N64_CTYPE_H_ +#define N64_STDLIB_N64_CTYPE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern unsigned short __n64_internal_charflag_table[256]; + +__attribute__((const, always_inline)) +static inline int __n64_internal_has_charflag( int ch, unsigned short flag ) { + return __n64_internal_charflag_table[(unsigned int)ch & 0xFFu] & (int)flag; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isalnum( int ch ) { + return __n64_internal_has_charflag( ch, 0x003 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isalpha( int ch ) { + return __n64_internal_has_charflag( ch, 0x002 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_islower( int ch ) { + return __n64_internal_has_charflag( ch, 0x008 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isupper( int ch ) { + return __n64_internal_has_charflag( ch, 0x010 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isdigit( int ch ) { + return __n64_internal_has_charflag( ch, 0x001 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isxdigit( int ch ) { + return __n64_internal_has_charflag( ch, 0x004 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_iscntrl( int ch ) { + return __n64_internal_has_charflag( ch, 0x020 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isgraph( int ch ) { + return __n64_internal_has_charflag( ch, 0x203 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isspace( int ch ) { + return __n64_internal_has_charflag( ch, 0x040 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isblank( int ch ) { + return __n64_internal_has_charflag( ch, 0x080 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_isprint( int ch ) { + return __n64_internal_has_charflag( ch, 0x100 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_ispunct( int ch ) { + return __n64_internal_has_charflag( ch, 0x200 ); +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_tolower( int ch ) { + return (ch >= (int)'A' && ch <= (int)'Z') ? (ch + 32) : ch; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_toupper( int ch ) { + return (ch >= (int)'a' && ch <= (int)'z') ? (ch - 32) : ch; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-fenv.c b/lib/n64-libc/n64-fenv.c new file mode 100644 index 00000000..8f8db766 --- /dev/null +++ b/lib/n64-libc/n64-fenv.c @@ -0,0 +1,72 @@ +#include "n64-fenv.h" + +int n64_feclearexcept( int excepts ) { + if( excepts & ~FE_ALL_EXCEPT ) return excepts; + register unsigned int fcr31 = __builtin_mips_get_fcsr(); + fcr31 &= ~((unsigned int)excepts << 2); + __builtin_mips_set_fcsr( fcr31 ); + return ((int)__builtin_mips_get_fcsr() >> 2) & excepts; +} + +int n64_fetestexcept( int excepts ) { + return (int)(__builtin_mips_get_fcsr() >> 2) & FE_ALL_EXCEPT & excepts; +} + +int n64_feraiseexcept( int excepts ) { + if( excepts & ~FE_ALL_EXCEPT ) return excepts; + register unsigned int fcr31 = __builtin_mips_get_fcsr(); + fcr31 |= (unsigned int)excepts << 12; + __builtin_mips_set_fcsr( fcr31 ); + return (((int)__builtin_mips_get_fcsr() >> 2) & excepts) ^ excepts; +} + +int n64_fegetexceptflag( n64_fexcept_t *flagp, int excepts ) { + if( excepts & ~FE_ALL_EXCEPT ) return excepts; + *flagp = (n64_fexcept_t)((__builtin_mips_get_fcsr() >> 2) & (unsigned int)excepts); + return 0; +} + +int n64_fesetexceptflag( const n64_fexcept_t *flagp, int excepts ) { + if( excepts & ~FE_ALL_EXCEPT ) return excepts; + register unsigned int fcr31 = __builtin_mips_get_fcsr(); + fcr31 &= ~((unsigned int)excepts << 2); + fcr31 |= ((unsigned int)*flagp & (unsigned int)excepts) << 2; + __builtin_mips_set_fcsr( fcr31 ); + return (((int)__builtin_mips_get_fcsr() >> 2) & excepts) ^ ((int)*flagp & excepts); +} + +int n64_fesetround( int round ) { + if( round < 0 || round > 3 ) return round; + register unsigned int fcr31 = __builtin_mips_get_fcsr() & ~3u; + __builtin_mips_set_fcsr( fcr31 | (unsigned int)round ); + return (int)(__builtin_mips_get_fcsr() & 3u) ^ round; +} + +int n64_fegetround() { + return (unsigned int)__builtin_mips_get_fcsr() & 3u; +} + +int n64_fegetenv( n64_fenv_t* envp ) { + *envp = __builtin_mips_get_fcsr(); + return 0; +} + +int n64_fesetenv( const n64_fenv_t* envp ) { + register const unsigned int c = __builtin_mips_get_fcsr() & 0x00800000u; + __builtin_mips_set_fcsr( (*envp & 0x01000FFFu) | c ); + return (int)((*envp ^ __builtin_mips_get_fcsr()) & 0x01000FFFu); +} + +int n64_feholdexcept( n64_fenv_t* envp ) { + *envp = (n64_fenv_t)__builtin_mips_get_fcsr() & 0x0103FFFFu; + __builtin_mips_set_fcsr( *envp & 0x01800003u ); + return (int)(__builtin_mips_get_fcsr() & 0x0003FFFCu); +} + +int n64_feupdateenv( const n64_fenv_t* envp ) { + register const unsigned int e = __builtin_mips_get_fcsr() & 0x0083F000u; + register const unsigned int c = e & 0x00800000u; + __builtin_mips_set_fcsr( (*envp & 0x01000FFFu) | c ); + __builtin_mips_set_fcsr( (*envp & 0x01000FFFu) | e ); + return (int)((*envp ^ __builtin_mips_get_fcsr()) & 0x0103FFFFu); +} diff --git a/lib/n64-libc/n64-fenv.h b/lib/n64-libc/n64-fenv.h new file mode 100644 index 00000000..ac1c4a0f --- /dev/null +++ b/lib/n64-libc/n64-fenv.h @@ -0,0 +1,39 @@ +#ifndef N64_STDLIB_N64_FENV_H_ +#define N64_STDLIB_N64_FENV_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#define FE_DIVBYZERO 0x08 +#define FE_INEXACT 0x01 +#define FE_INVALID 0x10 +#define FE_OVERFLOW 0x04 +#define FE_UNDERFLOW 0x02 +#define FE_ALL_EXCEPT 0x1F + +#define FE_DOWNWARD 0x3 +#define FE_TONEAREST 0x0 +#define FE_TOWARDZERO 0x1 +#define FE_UPWARD 0x2 + +typedef unsigned int n64_fenv_t; +typedef unsigned char n64_fexcept_t; + +int n64_feclearexcept( int excepts ); +int n64_fetestexcept( int excepts ) __attribute__((warn_unused_result)); +int n64_feraiseexcept( int excepts ); +int n64_fegetexceptflag( n64_fexcept_t *flagp, int excepts ) __attribute__((nonnull(1), access(write_only, 1))); +int n64_fesetexceptflag( const n64_fexcept_t *flagp, int excepts ) __attribute__((nonnull(1))); +int n64_fesetround( int round ); +int n64_fegetround() __attribute__((warn_unused_result)); +int n64_fegetenv( n64_fenv_t* envp ) __attribute__((nonnull(1), access(write_only, 1))); +int n64_fesetenv( const n64_fenv_t* envp ) __attribute__((nonnull(1))); +int n64_feholdexcept( n64_fenv_t* envp ) __attribute__((nonnull(1), access(write_only, 1))); +int n64_feupdateenv( const n64_fenv_t* envp ) __attribute__((nonnull(1))); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-float.h b/lib/n64-libc/n64-float.h new file mode 100644 index 00000000..5a7497cc --- /dev/null +++ b/lib/n64-libc/n64-float.h @@ -0,0 +1,111 @@ +#ifndef FLT_RADIX + #define FLT_RADIX 2 +#endif + +#ifndef FLT_DECIMAL_DIG + #define FLT_DECIMAL_DIG 9 +#endif + +#ifndef DBL_DECIMAL_DIG + #define DBL_DECIMAL_DIG 17 +#endif + +#ifndef FLT_MIN + #define FLT_MIN 1.1754944e-38f +#endif + +#ifndef DBL_MIN + #define DBL_MIN 2.2250738585072014e-308 +#endif + +#ifndef FLT_TRUE_MIN + #define FLT_TRUE_MIN 1e-45f +#endif + +#ifndef DBL_TRUE_MIN + #define DBL_TRUE_MIN 5e-324 +#endif + +#ifndef FLT_MAX + #define FLT_MAX 3.4028235e+38f +#endif + +#ifndef DBL_MAX + #define DBL_MAX 1.7976931348623157e+308 +#endif + +#ifndef FLT_EPSILON + #define FLT_EPSILON 1.1920929e-07f +#endif + +#ifndef DBL_EPSILON + #define DBL_EPSILON 2.220446049250313e-16 +#endif + +#ifndef FLT_DIG + #define FLT_DIG 6 +#endif + +#ifndef DBL_DIG + #define DBL_DIG 15 +#endif + +#ifndef FLT_MANT_DIG + #define FLT_MANT_DIG 24 +#endif + +#ifndef DBL_MANT_DIG + #define DBL_MANT_DIG 53 +#endif + +#ifndef FLT_MIN_EXP + #define FLT_MIN_EXP -125 +#endif + +#ifndef DBL_MIN_EXP + #define DBL_MIN_EXP -1021 +#endif + +#ifndef FLT_MIN_10_EXP + #define FLT_MIN_10_EXP -37 +#endif + +#ifndef DBL_MIN_10_EXP + #define DBL_MIN_10_EXP -307 +#endif + +#ifndef FLT_MAX_EXP + #define FLT_MAX_EXP 128 +#endif + +#ifndef DBL_MAX_EXP + #define DBL_MAX_EXP 1024 +#endif + +#ifndef FLT_MAX_10_EXP + #define FLT_MAX_10_EXP 38 +#endif + +#ifndef DBL_MAX_10_EXP + #define DBL_MAX_10_EXP 308 +#endif + +#ifndef FLT_EVAL_METHOD + #define FLT_EVAL_METHOD 0 +#endif + +#ifndef FLT_HAS_SUBNORM + #define FLT_HAS_SUBNORM 1 +#endif + +#ifndef DBL_HAS_SUBNORM + #define DBL_HAS_SUBNORM 1 +#endif + +#ifndef DECIMAL_DIG + #if defined(_ABIO64) || defined(_ABIO32) + #define DECIMAL_DIG DBL_DECIMAL_DIG + #else + #define DECIMAL_DIG 36 + #endif +#endif diff --git a/lib/n64-libc/n64-libc.h b/lib/n64-libc/n64-libc.h new file mode 100644 index 00000000..b72bd328 --- /dev/null +++ b/lib/n64-libc/n64-libc.h @@ -0,0 +1,15 @@ +#include "n64-alloca.h" +#include "n64-assert.h" +#include "n64-ctype.h" +#include "n64-fenv.h" +#include "n64-float.h" +#include "n64-math.h" +#include "n64-numbers.h" +#include "n64-stdbool.h" +#include "n64-stdckdint.h" +#include "n64-stddef.h" +#include "n64-stdio.h" +#include "n64-stdlib.h" +#include "n64-string.h" +#include "n64-time.h" +#include "n64-util.h" diff --git a/lib/n64-libc/n64-math.c b/lib/n64-libc/n64-math.c new file mode 100644 index 00000000..9fe6a30e --- /dev/null +++ b/lib/n64-libc/n64-math.c @@ -0,0 +1,558 @@ +#include "n64-math.h" + +#include "n64-stdbool.h" +#include "n64-fenv.h" +#include "n64-util.h" + +#ifdef __NO_TRAPPING_MATH__ + #define _FTRAP( ex ) +#else + #define _FTRAP( ex ) n64_feraiseexcept( ex ) +#endif + +typedef struct { + double invc; + double logc; +} __attribute__((aligned(16))) __n64_log2_table_entry; + +static const __n64_log2_table_entry s_logTable[16] = { + { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, + { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 }, + { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 }, + { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 }, + { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 }, + { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 }, + { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 }, + { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 }, + { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 }, + { 0x1p+0, 0x0p+0 }, + { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 }, + { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 }, + { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 }, + { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 }, + { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 }, + { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 } +}; + +static const unsigned long long s_expTable[32] = { + 0x3ff0000000000000ull, 0x3fefd9b0d3158574ull, 0x3fefb5586cf9890full, 0x3fef9301d0125b51ull, + 0x3fef72b83c7d517bull, 0x3fef54873168b9aaull, 0x3fef387a6e756238ull, 0x3fef1e9df51fdee1ull, + 0x3fef06fe0a31b715ull, 0x3feef1a7373aa9cbull, 0x3feedea64c123422ull, 0x3feece086061892dull, + 0x3feebfdad5362a27ull, 0x3feeb42b569d4f82ull, 0x3feeab07dd485429ull, 0x3feea47eb03a5585ull, + 0x3feea09e667f3bcdull, 0x3fee9f75e8ec5f74ull, 0x3feea11473eb0187ull, 0x3feea589994cce13ull, + 0x3feeace5422aa0dbull, 0x3feeb737b0cdc5e5ull, 0x3feec49182a3f090ull, 0x3feed503b23e255dull, + 0x3feee89f995ad3adull, 0x3feeff76f2fb5e47ull, 0x3fef199bdd85529cull, 0x3fef3720dcef9069ull, + 0x3fef5818dcfba487ull, 0x3fef7c97337b9b5full, 0x3fefa4afa2a490daull, 0x3fefd0765b6e4540ull, +}; + +static double n64_log2_impl( unsigned int fbits ) { + if( fbits == 0x3f800000u ) { // 1.f -> +0 + return 0.0; + } else if( fbits - 0x00800000u >= 0x7F000000u ) { + if( !(fbits << 1) ) { // 0.f -> -inf + _FTRAP( FE_DIVBYZERO ); + return -N64_HUGE_VAL; + } else if( fbits == 0x7F800000u ) { // +inf -> +inf + return N64_HUGE_VAL; + } else if( (fbits << 1) == 0xFF000000u ) { // nan -> nan + return n64_bit_cast_itof( fbits ); + } else if( fbits >> 31 ) { // negative -> nan + _FTRAP( FE_INVALID ); + return NAN; + } else { + register const float norm = n64_bit_cast_ftoi( fbits ) * 0x1p23f; + fbits = n64_bit_cast_itof( norm ) - (23 << 23); + } + } + + register unsigned int top = fbits - 0x3F330000u; + register const unsigned int i = (top >> 19) & 0xF; + top &= 0xFF800000u; + register const double z = n64_bit_cast_itof( fbits - top ); + + register const double r = z * s_logTable[i].invc - 1.0; + register const double y0 = s_logTable[i].logc + (double)((int)top >> 23); + register const double r2 = r * r; + + register const double y = 0.288457581109214 * r - 0.36092606229713164; + register const double p = 0.480898481472577 * r - 0.7213474675006291; + register double q = 1.4426950408774342 * r + y0; + q += p * r2; + return y * (r2 * r2) + q; +} + +static float n64_exp2_impl( double e, unsigned int signBias ) { + double kd = e + 0x1.8p+47; + const long long ki = n64_bit_cast_dtol( kd ); + n64_memory_barrier(); + kd -= 0x1.8p+47; + n64_memory_barrier(); + + register const double r = e - kd; + register const double s = n64_bit_cast_ltod( s_expTable[ki & 0x1Fu] + ((ki + (unsigned long long)signBias) << 47) ); + + register const double y = ( + __builtin_assoc_barrier( 0.05550361559341535 * r + 0.2402284522445722 ) * + __builtin_assoc_barrier( r * r ) + ) + __builtin_assoc_barrier( 0.6931471806916203 * r + 1.0 ); + + return (float)(y * s); +} + +float n64_expf( float arg ) { +#if __FINITE_MATH_ONLY__ != 1 + register const unsigned int bits = n64_bit_cast_ftoi( arg ); + if( ((bits >> 23) & 0xFFu) == 0xFFu ) { + return (bits == 0xFF800000u) ? 0.f : arg; + } else if( arg > 88.72283f ) { + _FTRAP( FE_OVERFLOW ); + return N64_INFINITY; + } +#endif + if( arg < -103.27892f ) { + _FTRAP( FE_UNDERFLOW ); + return 0.f; + } + return n64_exp2_impl( (double)arg * 1.4426950408889634, 0u ); +} + +float n64_exp2f( float arg ) { +#if __FINITE_MATH_ONLY__ != 1 + register const unsigned int bits = n64_bit_cast_ftoi( arg ); + if( ((bits >> 23) & 0xFFu) == 0xFFu ) { + return (bits == 0xFF800000u) ? 0.f : arg; + } else if( arg >= 128.f ) { + _FTRAP( FE_OVERFLOW ); + return N64_INFINITY; + } +#endif + if( arg < -149.f ) { + _FTRAP( FE_UNDERFLOW ); + return 0.f; + } + return n64_exp2_impl( arg, 0u ); +} + +float n64_logf( float arg ) { + if( arg == 2.71828183f ) return 1.f; + return (float)(n64_log2_impl( n64_bit_cast_ftoi( arg ) ) * 0.69314718055994531); +} + +float n64_log10f( float arg ) { + return (float)(n64_log2_impl( n64_bit_cast_ftoi( arg ) ) * 0.3010299956639812); +} + +float n64_log2f( float arg ) { + // Use a slightly faster approximation of log2 since we need less precision + + unsigned int fbits = n64_bit_cast_ftoi( arg ); + if( fbits == 0x3f800000u ) { // 1.f -> +0 + return 0.f; + } else if( fbits - 0x00800000u >= 0x7F000000u ) { + if( !(fbits << 1) ) { // 0.f -> -inf + return -N64_INFINITY; + } else if( fbits == 0x7F800000u ) { // +inf -> +inf + return N64_INFINITY; + } else if( (fbits << 1) == 0xFF000000u ) { // nan -> nan + return arg; + } else if( fbits >> 31 ) { // negative -> nan + return NAN; + } else { + fbits = n64_bit_cast_itof( arg * 0x1p23f ) - (23 << 23); + } + } + + register unsigned int top = fbits - 0x3F330000u; + register const unsigned int i = (top >> 19) & 0xF; + top &= 0xFF800000u; + register const double z = n64_bit_cast_itof( fbits - top ); + + register const double r = z * s_logTable[i].invc - 1.0; + register const double y0 = s_logTable[i].logc + (double)((int)top >> 23); + register const double r2 = r * r; + + register const double y = 0.4811247078767291 * r - 0.7213476299867769; + register const double p = 1.4426950186867042 * r + y0; + return (float)((-0.36051725506874704 * r2 + y) * r2 + p); +} + +float n64_powf( float base, float exponent ) { + // an exponent of 0 or a base of 1 always results in 1, even if the other argument is NaN + if( exponent == 0.f || exponent == -0.f || base == 1.f ) { + return 1.f; + } + + unsigned int ib = n64_bit_cast_ftoi( base ); + unsigned int ie = n64_bit_cast_ftoi( exponent ); + +#if __FINITE_MATH_ONLY__ != 1 + // if the base is -1 and the exponent is plus or minus infinity, return 1 + if( exponent == -1.f && !(ie << 9) && (((ie >> 23) & 0xFFu) == 0xFFu) ) { + return 1.f; + } +#endif + + n64_bool expIsInteger = true; + n64_bool expIsOdd = false; + if( !ib || (ib >> 31) ) { // only care about these values when the base is non-positive + register const unsigned int iee = (ie >> 23) & 0xFFu; + if( iee == 0xFFu || iee < 127u ) { + expIsInteger = false; + } else if( iee > 150u ) { + } else if( ie & ((1u << (150u - iee)) - 1u) ) { + expIsInteger = false; + } else if( ie & (1 << (150u - iee)) ) { + expIsOdd = true; + } + } + + if( !(ib << 1) ) { + // base is +0 or -0 +#if __FINITE_MATH_ONLY__ != 1 + if( ((ie >> 23) & 0xFFu) == 0xFFu ) { + if( ie << 9 ) return base; // 0^NaN -> NaN + if( ie >> 31 ) return N64_INFINITY; // 0^-inf -> +inf + return 0.f; // 0^+inf -> +0 + } +#endif + + if( ie >> 31 ) { + _FTRAP( FE_DIVBYZERO ); + if( expIsInteger && expIsOdd && (ib >> 31) ) { + // -0 to the power of a negative odd integer -> -inf + return -N64_INFINITY; + } + + // 0 to the power of a finite even integer or a finite non-integer -> +inf + return N64_INFINITY; + } else { + // -0 to the power of a positive odd integer is -0 (0 to any other finite power is +0) + return (expIsInteger && expIsOdd) ? base : 0.f; + } + } + +#if __FINITE_MATH_ONLY__ != 1 + if( ((ib >> 23) & 0xFFu) == 0xFFu ) { + if( ib << 9 ) return base; // any base other than 1 to the power of NaN is NaN + if( ib >> 31 ) { + if( exponent > 0.f ) { + // -inf to the power of a positive odd integer is -inf + if( expIsInteger && expIsOdd ) { + return -N64_INFINITY; + } + + // -inf to any other positive power is +inf + return N64_INFINITY; + } else { + // -inf to the power of a negative odd integer is -0 + if( expIsInteger && expIsOdd ) { + return -0.f; + } + + // -inf to any other negative power is +0 + return 0.f; + } + } + + // +inf to the power of any positive exponent is +inf + // +inf to the power of any negative exponent is +0 + return (exponent > 0.f) ? N64_INFINITY : 0.f; + } +#endif + + unsigned int signBias = 0u; + if( ib >> 31 ) { + // negative base + if( !expIsInteger ) { + // a negative finite number to the power of any non-integer finite number is NaN + _FTRAP( FE_INVALID ); + return NAN; + } + + if( expIsOdd ) signBias = 0x10000u; + ib &= 0x7FFFFFFFu; + } else if( exponent == -1.f ) { + return 1.f / base; + } + + if( ib < 0x00800000u ) { + // subnormal base + ib = n64_bit_cast_ftoi( base * 0x1p23f ); + ib &= 0x7FFFFFFFu; + ib -= (23 << 23); + } + + const double elogb = (double)exponent * n64_log2_impl( ib ); +#if __FINITE_MATH_ONLY__ != 1 + if( (float)elogb >= 128.f ) { + _FTRAP( FE_OVERFLOW ); + return signBias ? -N64_INFINITY : N64_INFINITY; + } +#endif + if( (float)elogb < -149.f ) { + _FTRAP( FE_UNDERFLOW ); + return signBias ? -0.f : 0.f; + } + + return n64_exp2_impl( elogb, signBias ); +} + +// x >= y >= 0 +static double hypot_impl( double x, double y ) { + double e1, e2; + const double h = n64_sqrt( x * x + y * y ); + if( h <= 2.0 * y ) { + register const double dy = h - y; + e1 = x * (2.0 * dy - x); + e2 = (dy - 2.0 * (x - y)) * dy; + } else { + register const double dx = h - x; + e1 = 2.0 * dx * (x - 2.0 * y); + e2 = (4.0 * dx - y) * y + dx * dx; + } + + return h - __builtin_assoc_barrier((e1 + e2) / (2.0 * h)); +} + +float n64_hypotf( float x, float y ) { +#if __FINITE_MATH_ONLY__ != 1 + register const unsigned int xi = n64_bit_cast_ftoi( x ); + register const unsigned int yi = n64_bit_cast_ftoi( y ); + if( (xi & 0x7F800000u) || (yi & 0x7F800000u) ) { + register const unsigned int ynan = yi << 9; + if( + (!(xi << 9) && (!ynan || !(yi >> 31))) || + (!ynan && !(xi >> 31)) + ) { + return N64_INFINITY; + } + + return x + y; + } +#endif + + return (float)n64_sqrt( (double)x * (double)x + (double)y * (double)y ); +} + +typedef union { + double f; + struct { + unsigned int hi; + unsigned int lo; + }; +} __dbits_t; + +double n64_hypot( double x, double y ) { +#if __FINITE_MATH_ONLY__ != 1 + const __dbits_t xi = { x }; + const __dbits_t yi = { y }; + if( (xi.hi & 0x7FF00000u) || (yi.hi & 0x7FF00000u) ) { + register const unsigned int xnan = xi.lo || (xi.hi << 12); + register const unsigned int ynan = yi.lo || (yi.hi << 12); + if( + (!xnan && (!ynan || !(yi.hi >> 31))) || + (!ynan && !(xi.hi >> 31)) + ) { + return N64_HUGE_VAL; + } + + return x + y; + } +#endif + + x = n64_fabs( x ); + y = n64_fabs( y ); + + if( x < y ) { + register const double temp = x; + x = y; + y = temp; + } + + if( x > 0x1p+511 ) { + if( y <= x * 0x1p-54 ) { + return x + y; + } + + return hypot_impl( x * 0x1p-600, y * 0x1p-600 ) * 0x1p+600; + } + + if( y < 0x1p-459 ) { + if( x >= y * 0x1p+54 ) { + return x + y; + } + + return hypot_impl( x * 0x1p+600, y * 0x1p+600 ) * 0x1p-600; + } + + if( y <= x * 0x1p-54 ) { + return x + y; + } + + return hypot_impl( x, y ); +} + +float n64_cbrtf( float arg ) { + register unsigned int bits = n64_bit_cast_ftoi( arg ); + if( ((bits >> 23) & 0xFFu) == 0xFFu || !(bits << 1) ) { + return arg; + } + + n64_bool negative = false; + if( bits >> 31 ) { + negative = true; + bits &= 0x7FFFFFFFu; + } + + if( bits < 0x00800000u ) { + // subnormal base + bits = n64_bit_cast_ftoi( arg * 0x1p23f ); + bits &= 0x7FFFFFFFu; + bits -= (23 << 23); + } + + register const float result = n64_exp2_impl( n64_log2_impl( bits ) / 3.0, 0u ); + return negative ? -result : result; +} + +static const unsigned int s_4divPi[24] = { + 0x000000a2u, 0x0000a2f9u, 0x00a2f983u, 0xa2f9836eu, + 0xf9836e4eu, 0x836e4e44u, 0x6e4e4415u, 0x4e441529u, + 0x441529fcu, 0x1529fc27u, 0x29fc2757u, 0xfc2757d1u, + 0x2757d1f5u, 0x57d1f534u, 0xd1f534ddu, 0xf534ddc0u, + 0x34ddc0dbu, 0xddc0db62u, 0xc0db6295u, 0xdb629599u, + 0x6295993cu, 0x95993c43u, 0x993c4390u, 0x3c439041u +}; + +__attribute__((always_inline)) +static inline unsigned int absTop12( float x ) { + return (n64_bit_cast_ftoi( x ) >> 20) & 0x7FFu; +} + +__attribute__((always_inline)) +static inline double fastReduce( double x, unsigned int *quadrant ) { + register const double r = x * 0.6366197723675814; + *quadrant = n64_llround( r ); + return x - n64_round( r ) * 1.5707963267948966; +} + +static inline double largeReduce( unsigned int xi, unsigned int *quadrant ) { + const unsigned int *lut = &s_4divPi[(xi >> 26) & 0xFu]; + const unsigned int shift = (xi >> 23) & 0x7u; + + xi &= 0x00FFFFFFu; + xi |= 0x00800000u; + xi <<= shift; + + const unsigned long long hi = (unsigned long long)(xi * lut[0]) << 32; + const unsigned long long lo = ((unsigned long long)xi * lut[8]) >> 32; + unsigned long long result = (unsigned long long)xi * lut[4]; + result += hi | lo; + + *quadrant = ((unsigned int)(result >> 61) + 1u) >> 1; + result -= (unsigned long long)*quadrant << 62; + + return (double)(long long)result * 3.4061215800865545e-19; +} + +static const double s_cosPolyP[5] = { + 1.0, -0.49999999725108224, 0.041666623324344516, -0.001388676379437604, 2.4390450703564542e-5 +}; + +static const double s_cosPolyN[5] = { + -1.0, 0.49999999725108224, -0.041666623324344516, 0.001388676379437604, -2.4390450703564542e-5 +}; + +static float sinPoly( double x ) { + register const double x2 = x * x; + register const double x3 = x * x2; + + register const double s1 = 0.008332178146138854 + x2 * -1.9517298981385725e-4; + register const double s0 = x + x3 * -0.16666654943701084; + + return (float)(s0 + (x3 * x2) * s1); +} + +static float cosPoly( double x, unsigned int negatePoly ) { + const double *p = negatePoly ? s_cosPolyN : s_cosPolyP; + register const double x2 = x * x; + + register const double c2 = p[3] + x2 * p[4]; + register const double c1 = p[0] + x2 * p[1]; + + register const double x4 = x2 * x2; + register const double c0 = c1 + x4 * p[2]; + + return (float)(c0 + __builtin_assoc_barrier( x4 * x2 ) * c2); +} + +float n64_sinf( float arg ) { + if( arg == 0.f ) return arg; + + const unsigned int top = absTop12( arg ); + if( top < 1012u ) { + if( top < 920u ) { + if( top < 8u ) _FTRAP( FE_UNDERFLOW ); + return arg; + } + + return sinPoly( arg ); + } else if( top < 1071u ) { + unsigned int q; + double x = fastReduce( arg, &q ); + + if( ((q & 3u) + 1u) & 2u ) { + x = -x; + } + + return (q & 1u) ? cosPoly( x, q & 2u ) : sinPoly( x ); + } else if( top < 2040u ) { + unsigned int q; + const unsigned int xi = n64_bit_cast_ftoi( arg ); + double x = largeReduce( xi, &q ); + + const unsigned int s = q + (xi >> 31); + if( ((s & 3u) + 1u) & 2u ) { + x = -x; + } + + return (q & 1u) ? cosPoly( x, s & 2u ) : sinPoly( x ); + } else { + _FTRAP( FE_INVALID ); + return NAN; + } +} + +float n64_cosf( float arg ) { + const unsigned int top = absTop12( arg ); + if( top < 1012u ) { + if( top < 920u ) { + return 1.f; + } + + return cosPoly( arg, 0u ); + } else if( top < 1071u ) { + unsigned int q; + double x = fastReduce( arg, &q ); + + if( ((q & 3u) + 1u) & 2u ) { + x = -x; + } + + return (q & 1u) ? sinPoly( x ) : cosPoly( x, q & 2u ); + } else if( top < 2040u ) { + unsigned int q; + const unsigned int xi = n64_bit_cast_ftoi( arg ); + double x = largeReduce( xi, &q ); + + const unsigned int s = q + (xi >> 31); + if( ((s & 3u) + 1u) & 2u ) { + x = -x; + } + + return (q & 1u) ? sinPoly( x ) : cosPoly( x, s & 2u ); + } else { + _FTRAP( FE_INVALID ); + return NAN; + } +} diff --git a/lib/n64-libc/n64-math.h b/lib/n64-libc/n64-math.h new file mode 100644 index 00000000..3a80bf8f --- /dev/null +++ b/lib/n64-libc/n64-math.h @@ -0,0 +1,275 @@ +#ifndef N64_STDLIB_N64_MATH_H_ +#define N64_STDLIB_N64_MATH_H_ + +/* Only a subset of math.h is currently implemented */ + +#define N64_NAN __builtin_nanf( "0" ) +#define N64_HUGE_VALF __builtin_inff() +#define N64_HUGE_VAL __builtin_inf() +#define N64_INFINITY N64_HUGE_VALF + +#ifndef NAN + #define NAN N64_NAN +#endif + +#ifndef HUGE_VALF + #define HUGE_VALF N64_HUGE_VALF +#endif + +#ifndef HUGE_VAL + #define HUGE_VAL N64_HUGE_VAL +#endif + +#ifndef INFINITY + #define INFINITY N64_INFINITY +#endif + +__attribute__((const, warn_unused_result)) +float n64_expf( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_exp2f( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_logf( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_log10f( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_log2f( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_powf( float base, float exponent ); + +__attribute__((const, warn_unused_result)) +float n64_cbrtf( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_hypotf( float x, float y ); + +__attribute__((const, warn_unused_result)) +double n64_hypot( double x, double y ); + +__attribute__((const, warn_unused_result)) +float n64_sinf( float arg ); + +__attribute__((const, warn_unused_result)) +float n64_cosf( float arg ); + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_sqrtf( float arg ) { + float result; + asm volatile( "sqrt.s %0, %1" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_sqrt( double arg ) { + double result; + asm volatile( "sqrt.d %0, %1" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_fabsf( float arg ) { + float result; + asm volatile( "abs.s %0, %1" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_fabs( double arg ) { + double result; + asm volatile( "abs.d %0, %1" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_floor( double arg ) { + if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg; + double result; + asm volatile( "floor.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_ceil( double arg ) { + if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg; + double result; + asm volatile( "ceil.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_trunc( double arg ) { + if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg; + double result; + asm volatile( "trunc.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_round( double arg ) { + if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg; + double result; + asm volatile( "round.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline double n64_rint( double arg ) { + if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg; + double result; + asm volatile( "cvt.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_floorf( float arg ) { + if( arg > 8388608.f || arg < -8388608.f ) return arg; + float result; + asm volatile( "floor.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_ceilf( float arg ) { + if( arg > 8388608.f || arg < -8388608.f ) return arg; + float result; + asm volatile( "ceil.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_truncf( float arg ) { + if( arg > 8388608.f || arg < -8388608.f ) return arg; + float result; + asm volatile( "trunc.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_roundf( float arg ) { + if( arg > 8388608.f || arg < -8388608.f ) return arg; + float result; + asm volatile( "round.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline float n64_rintf( float arg ) { + if( arg > 8388608.f || arg < -8388608.f ) return arg; + float result; + asm volatile( "cvt.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline long long n64_llround( double arg ) { +#ifdef _ABIO32 + double bits; + asm volatile( "round.l.d %0, %1" : "=f"( bits ) : "f"( arg ) ); + union { double f; long long i; } b = { bits }; + return b.i; +#else + long long result; + asm volatile( "round.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +#endif +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline long long n64_llrint( double arg ) { +#ifdef _ABIO32 + double bits; + asm volatile( "cvt.l.d %0, %1" : "=f"( bits ) : "f"( arg ) ); + union { double f; long long i; } b = { bits }; + return b.i; +#else + long long result; + asm volatile( "cvt.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +#endif +} + +// Extensions: fill in gaps in the C standard for rounding functions + +__attribute__((const, warn_unused_result, always_inline)) +static inline long long n64_llfloor( double arg ) { +#ifdef _ABIO32 + double bits; + asm volatile( "floor.l.d %0, %1" : "=f"( bits ) : "f"( arg ) ); + union { double f; long long i; } b = { bits }; + return b.i; +#else + long long result; + asm volatile( "floor.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +#endif +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline long long n64_llceil( double arg ) { +#ifdef _ABIO32 + double bits; + asm volatile( "ceil.l.d %0, %1" : "=f"( bits ) : "f"( arg ) ); + union { double f; long long i; } b = { bits }; + return b.i; +#else + long long result; + asm volatile( "ceil.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +#endif +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline long long n64_lltrunc( double arg ) { +#ifdef _ABIO32 + double bits; + asm volatile( "trunc.l.d %0, %1" : "=f"( bits ) : "f"( arg ) ); + union { double f; long long i; } b = { bits }; + return b.i; +#else + long long result; + asm volatile( "trunc.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +#endif +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_ifloorf( float arg ) { + int result; + asm volatile( "floor.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_iceilf( float arg ) { + int result; + asm volatile( "ceil.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_itruncf( float arg ) { + int result; + asm volatile( "trunc.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_iroundf( float arg ) { + int result; + asm volatile( "round.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +} + +__attribute__((const, warn_unused_result, always_inline)) +static inline int n64_irintf( float arg ) { + int result; + asm volatile( "cvt.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" ); + return result; +} + +#endif diff --git a/lib/n64-libc/n64-numbers.h b/lib/n64-libc/n64-numbers.h new file mode 100644 index 00000000..e7fd0f71 --- /dev/null +++ b/lib/n64-libc/n64-numbers.h @@ -0,0 +1,43 @@ +#ifndef N64_STDLIB_N64_NUMBERS_H_ +#define N64_STDLIB_N64_NUMBERS_H_ + +#define N64_E 2.7182818284590452 +#define N64_Ef 2.71828183f + +#define N64_LOG2E 1.4426950408889634 +#define N64_LOG2Ef 1.44269504f + +#define N64_LOG10E 0.43429448190325183 +#define N64_LOG10Ef 0.434294481f + +#define N64_PI 3.1415926535897932 +#define N64_PIf 3.14159265f + +#define N64_INV_PI 0.31830988618379067 +#define N64_INV_PIf 0.318309886f + +#define N64_INV_SQRTPI 0.56418958354775629 +#define N64_INV_SQRTPIf 0.564189584f + +#define N64_LN2 0.69314718055994531 +#define N64_LN2f 0.693147181f + +#define N64_LN10 2.3025850929940457 +#define N64_LN10f 2.30258509f + +#define N64_SQRT2 1.414213562373095 +#define N64_SQRT2f 1.41421356f + +#define N64_SQRT3 1.7320508075688773 +#define N64_SQRT3f 1.73205081f + +#define N64_INV_SQRT3 0.57735026918962576 +#define N64_INV_SQRT3f 0.577350269f + +#define N64_EGAMMA 0.57721566490153286 +#define N64_EGAMMAf 0.577215665f + +#define N64_PHI 1.6180339887498948 +#define N64_PHIf 1.61803399f + +#endif diff --git a/lib/n64-libc/n64-stdbool.h b/lib/n64-libc/n64-stdbool.h new file mode 100644 index 00000000..560e74a7 --- /dev/null +++ b/lib/n64-libc/n64-stdbool.h @@ -0,0 +1,34 @@ +#ifndef N64_STDLIB_N64_STDBOOL_H_ +#define N64_STDLIB_N64_STDBOOL_H_ + +#ifdef __cplusplus + typedef bool n64_bool; +#else + #if __STDC_VERSION__ >= 202311L + typedef bool n64_bool; + #else + + #if __STDC_VERSION__ >= 199901L + typedef _Bool n64_bool; + #elif __GNUC__ >= 15 + typedef unsigned char __attribute__((hardbool(0, 1))) n64_bool; + #else + typedef unsigned char n64_bool; + #endif + + #ifndef true + #define true 1 + #endif + + #ifndef false + #define false 0 + #endif + + #endif +#endif + +#ifndef __bool_true_false_are_defined + #define __bool_true_false_are_defined 1 +#endif + +#endif diff --git a/lib/n64-libc/n64-stdckdint.h b/lib/n64-libc/n64-stdckdint.h new file mode 100644 index 00000000..0b956fa6 --- /dev/null +++ b/lib/n64-libc/n64-stdckdint.h @@ -0,0 +1,8 @@ +#ifndef N64_STDLIB_N64_STDCKDINT_H_ +#define N64_STDLIB_N64_STDCKDINT_H_ + +#define n64_ckd_add( result, a, b ) __builtin_add_overflow( a, b, result ) +#define n64_ckd_sub( result, a, b ) __builtin_sub_overflow( a, b, result ) +#define n64_ckd_mul( result, a, b ) __builtin_mul_overflow( a, b, result ) + +#endif diff --git a/lib/n64-libc/n64-stddef.h b/lib/n64-libc/n64-stddef.h new file mode 100644 index 00000000..1a71fa32 --- /dev/null +++ b/lib/n64-libc/n64-stddef.h @@ -0,0 +1,15 @@ +#ifndef NULL + #ifdef __cplusplus + #if __cplusplus >= 199711L + #define NULL nullptr + #else + #define NULL 0 + #endif + #else + #if __STDC_VERSION__ >= 202311L + #define NULL nullptr + #else + #define NULL ((void*)0) + #endif + #endif +#endif diff --git a/lib/n64-libc/n64-stdio-bprint.c.inc b/lib/n64-libc/n64-stdio-bprint.c.inc new file mode 100644 index 00000000..3e690724 --- /dev/null +++ b/lib/n64-libc/n64-stdio-bprint.c.inc @@ -0,0 +1,432 @@ +#include "n64-stddef.h" + +#include "n64-stdio-format.c.inc" + +typedef enum { + CSI_ALIGNMENT, + CSI_SIGN, + CSI_SPACE, + CSI_ALTERNATE, + CSI_ZEROPAD, + CSI_WIDTH, + CSI_PRECISION, + CSI_LENGTH, + CSI_FORMAT +} n64_csi; + +#define PCS_VARIABLE 0xAAAAAAAAu +#define PCS_DEFAULT 0xBBBBBBBBu + +static const char *parse_conversion_specifier( const char *format, n64_format_args *args ) { + n64_bool isLong = false; + + args->width = 0u; + args->dataSize = 4u; + args->precision = PCS_DEFAULT; + args->defaultPrecision = 1u; + args->leftAligned = false; + args->alternate = false; + args->capitalize = false; + args->padchar = ' '; + args->poschar = '\0'; + + n64_csi csi = CSI_ALIGNMENT; + while( *format != '\0' ) { + switch( csi ) { + case CSI_ALIGNMENT: + if( *format == '-' ) { + args->leftAligned = true; + format++; + } + csi = CSI_SIGN; + __attribute__((fallthrough)); + case CSI_SIGN: + if( *format == '+' ) { + args->poschar = '+'; + format++; + } + csi = CSI_SPACE; + __attribute__((fallthrough)); + case CSI_SPACE: + if( *format == ' ' ) { + if( args->poschar != '+' ) args->poschar = ' '; + format++; + } + csi = CSI_ALTERNATE; + __attribute__((fallthrough)); + case CSI_ALTERNATE: + if( *format == '#' ) { + args->alternate = true; + format++; + } + csi = CSI_ZEROPAD; + __attribute__((fallthrough)); + case CSI_ZEROPAD: + if( *format == '0' ) { + args->padchar = '0'; + format++; + } + csi = CSI_WIDTH; + __attribute__((fallthrough)); + case CSI_WIDTH: + if( *format == '-' ) { + args->leftAligned = true; + format++; + continue; + } else if( *format == '0' ) { + format++; + continue; + } else if( *format == '*' ) { + args->width = PCS_VARIABLE; + format++; + } else if( *format > '0' && *format <= '9' ) { + int w = 0; + while( *format >= '0' && *format <= '9' ) { + w *= 10; + if( w < 0 ) return NULL; + w += (int)*format - (int)'0'; + format++; + } + args->width = (unsigned int)w; + } + csi = CSI_PRECISION; + __attribute__((fallthrough)); + case CSI_PRECISION: + if( *format == '.' ) { + format++; + if( *format == '-' ) { + format++; + while( *format >= '0' && *format <= '9' ) format++; + } else if( *format == '*' ) { + args->precision = PCS_VARIABLE; + format++; + } else if( *format >= '0' && *format <= '9' ) { + int p = 0; + while( *format >= '0' && *format <= '9' ) { + p *= 10; + if( p < 0 ) return NULL; + p += (int)*format - (int)'0'; + format++; + } + args->precision = (unsigned int)p; + } else { + return NULL; + } + } + csi = CSI_LENGTH; + __attribute__((fallthrough)); + case CSI_LENGTH: + switch( *format ) { + case 'h': + format++; + if( *format == 'h' ) { + args->dataSize = 1u; + format++; + } else { + args->dataSize = 2u; + } + break; + case 'l': + isLong = true; + args->dataSize = __SIZEOF_LONG__; + format++; + if( *format == 'l' ) { + args->dataSize = 8u; + format++; + } + break; + case 'j': + case 'L': + isLong = true; + args->dataSize = 8u; + format++; + break; + case 'z': + args->dataSize = __SIZEOF_SIZE_T__; + format++; + break; + case 't': + args->dataSize = __SIZEOF_PTRDIFF_T__; + format++; + break; + default: + break; + } + csi = CSI_FORMAT; + __attribute__((fallthrough)); + case CSI_FORMAT: + switch( *format ) { + case 'c': + args->type = 'c'; + args->dataSize = 1; + break; + case 's': + args->type = 's'; + args->dataSize = 1; + args->defaultPrecision = 0xFFFFFFFFu; + break; + case 'd': + case 'i': + args->type = 'd'; + if( args->precision != PCS_DEFAULT ) args->padchar = ' '; + break; + case 'o': + args->type = 'o'; + args->poschar = '\0'; + if( args->precision != PCS_DEFAULT ) args->padchar = ' '; + break; + case 'X': + args->capitalize = true; + __attribute__((fallthrough)); + case 'x': + args->type = 'x'; + args->poschar = '\0'; + if( args->precision != PCS_DEFAULT ) args->padchar = ' '; + break; + case 'B': + args->capitalize = true; + __attribute__((fallthrough)); + case 'b': + args->type = 'b'; + args->poschar = '\0'; + if( args->precision != PCS_DEFAULT ) args->padchar = ' '; + break; + case 'u': + args->type = 'u'; + args->poschar = '\0'; + if( args->precision != PCS_DEFAULT ) args->padchar = ' '; + break; + case 'F': + args->capitalize = true; + __attribute__((fallthrough)); + case 'f': + args->type = 'f'; + args->dataSize = isLong ? 8u : 4u; + args->defaultPrecision = 6u; + break; + case 'E': + args->capitalize = true; + __attribute__((fallthrough)); + case 'e': + args->type = 'e'; + args->dataSize = isLong ? 8u : 4u; + args->defaultPrecision = 6u; + break; + case 'A': + args->capitalize = true; + __attribute__((fallthrough)); + case 'a': + args->type = 'a'; + args->dataSize = isLong ? 8u : 4u; + args->defaultPrecision = 0xffffffffu; + break; + case 'G': + args->capitalize = true; + __attribute__((fallthrough)); + case 'g': + args->type = 'g'; + args->dataSize = isLong ? 8u : 4u; + args->defaultPrecision = 6u; + break; + case 'n': + args->type = 'n'; + break; + case 'p': + args->type = 'p'; + args->dataSize = 4u; + break; + default: + return NULL; + } + return ++format; + default: + return NULL; + } + } + + return NULL; +} + +int n64_vbprintf( char *_buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, __builtin_va_list args ) { + n64_bprint_buffer buff = { + /* start */ _buffer, + /* head */ _buffer, + /* end */ _buffer + bufsz, + callback, + state, + 0u + }; + + while( *format != '\0' ) { + if( *format != '%' ) { + n64_putc( &buff, *format ); + format++; + continue; + } + + if( format[1] == '%' ) { + n64_putc( &buff, '%' ); + format += 2; + continue; + } + + n64_format_args fmt; + format = parse_conversion_specifier( ++format, &fmt ); + if( !format ) return -1; + + if( fmt.width == PCS_VARIABLE ) { + const int w = __builtin_va_arg( args, int ); + if( w < 0 ) { + fmt.leftAligned = true; + fmt.width = (unsigned int)-w; + } else { + fmt.width = (unsigned int)w; + } + } + + if( fmt.precision == PCS_VARIABLE ) { + const int p = __builtin_va_arg( args, int ); + fmt.precision = (p < 0) ? fmt.defaultPrecision : (unsigned int)p; + } else if( fmt.precision == PCS_DEFAULT ) { + fmt.precision = fmt.defaultPrecision; + } + + switch( fmt.type ) { + case 'c': { + const char c = (char)__builtin_va_arg( args, int ); + n64_format_char( &buff, &fmt, c ); + break; + } + case 's': { + const char *str = (const char*)__builtin_va_arg( args, const char* ); + n64_format_string( &buff, &fmt, str ); + break; + } + case 'd': { + if( fmt.dataSize == 8 ) { + const long long val = __builtin_va_arg( args, long long ); + n64_format_sdec64( &buff, &fmt, val ); + } else { + int val = __builtin_va_arg( args, int ); + if( fmt.dataSize == 2 ) { + val = (int)(short)val; + } else if( fmt.dataSize == 1 ) { + val = (int)(signed char)val; + } + n64_format_sdec32( &buff, &fmt, val ); + } + break; + } + case 'u': { + if( fmt.dataSize == 8 ) { + const unsigned long long val = __builtin_va_arg( args, unsigned long long ); + n64_format_udec64( &buff, &fmt, val ); + } else { + unsigned int val = __builtin_va_arg( args, unsigned int ); + if( fmt.dataSize == 2 ) { + val &= 0xFFFFu; + } else if( fmt.dataSize == 1 ) { + val &= 0xFFu; + } + n64_format_udec32( &buff, &fmt, val ); + } + break; + } + case 'o': { + if( fmt.dataSize == 8 ) { + const unsigned long long val = __builtin_va_arg( args, unsigned long long ); + n64_format_oct64( &buff, &fmt, val ); + } else { + unsigned int val = __builtin_va_arg( args, unsigned int ); + if( fmt.dataSize == 2 ) { + val &= 0xFFFFu; + } else if( fmt.dataSize == 1 ) { + val &= 0xFFu; + } + n64_format_oct32( &buff, &fmt, val ); + } + break; + } + case 'x': { + if( fmt.dataSize == 8 ) { + const unsigned long long val = __builtin_va_arg( args, unsigned long long ); + n64_format_hex64( &buff, &fmt, val ); + } else { + unsigned int val = __builtin_va_arg( args, unsigned int ); + if( fmt.dataSize == 2 ) { + val &= 0xFFFFu; + } else if( fmt.dataSize == 1 ) { + val &= 0xFFu; + } + n64_format_hex32( &buff, &fmt, val ); + } + break; + } + case 'b': { + if( fmt.dataSize == 8 ) { + const unsigned long long val = __builtin_va_arg( args, unsigned long long ); + n64_format_bin64( &buff, &fmt, val ); + } else { + unsigned int val = __builtin_va_arg( args, unsigned int ); + if( fmt.dataSize == 2 ) { + val &= 0xFFFFu; + } else if( fmt.dataSize == 1 ) { + val &= 0xFFu; + } + n64_format_bin32( &buff, &fmt, val ); + } + break; + } + case 'f': + case 'e': + case 'g': { + const double val = __builtin_va_arg( args, double ); + if( fmt.dataSize == 8 ) { + n64_format_double( &buff, &fmt, val ); + } else { + n64_format_float( &buff, &fmt, (float)val ); + } + break; + } + case 'a': { + const double val = __builtin_va_arg( args, double ); + if( fmt.dataSize == 8 ) { + n64_format_hexdouble( &buff, &fmt, val ); + } else { + n64_format_hexfloat( &buff, &fmt, (float)val ); + } + break; + } + case 'p': { + const void *ptr = __builtin_va_arg( args, const void* ); + n64_format_pointer( &buff, &fmt, ptr ); + break; + } + case 'n': { + switch( fmt.dataSize ) { + case 1: *__builtin_va_arg( args, signed char* ) = (signed char)buff.total; break; + case 2: *__builtin_va_arg( args, short* ) = (short)buff.total; break; + case 4: *__builtin_va_arg( args, int* ) = (int)buff.total; break; + case 8: *__builtin_va_arg( args, long long* ) = (long long)buff.total; break; + default: return -1; + } + break; + } + default: + return -1; + } + } + + if( buff.start != buff.end ) { + buff.callback( buff.userdata, (unsigned int)(buff.head - buff.start) ); + } + return (int)buff.total; +} + +int n64_bprintf( char *buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, ... ) { + __builtin_va_list args; + __builtin_va_start( args, format ); + register const int result = n64_vbprintf( buffer, bufsz, callback, state, format, args ); + __builtin_va_end( args ); + return result; +} diff --git a/lib/n64-libc/n64-stdio-format.c.inc b/lib/n64-libc/n64-stdio-format.c.inc new file mode 100644 index 00000000..b4f63ce6 --- /dev/null +++ b/lib/n64-libc/n64-stdio-format.c.inc @@ -0,0 +1,1030 @@ +#include "n64-assert.h" +#include "n64-stddef.h" + +#include "n64-stdio-ryu.c.inc" + +typedef unsigned char n64_smallbool; + +n64_static_assert( sizeof( long long ) == 8 ); +n64_static_assert( sizeof( double ) == 8 ); +n64_static_assert( sizeof( float ) == 4 ); +n64_static_assert( sizeof( int ) == 4 ); + +typedef struct { + char *const start; + char *head; + char *end; + n64_bool(*callback)(void*, unsigned int); + void *const userdata; + unsigned int total; +} n64_bprint_buffer; + +static void n64_putc( n64_bprint_buffer *buffer, char c ) { + buffer->total++; + + if( buffer->start == buffer->end ) return; + if( buffer->head != buffer->end ) { + *(buffer->head++) = c; + } else if( buffer->callback( buffer->userdata, (unsigned int)(buffer->head - buffer->start) ) ) { + buffer->head = buffer->start; + } else { + buffer->end = buffer->start; + } +} + +typedef struct { + unsigned int width; + unsigned int precision; + unsigned int defaultPrecision; + char type; + unsigned char dataSize; + n64_smallbool leftAligned; + n64_smallbool alternate; + n64_smallbool capitalize; + char padchar; + char poschar; +} n64_format_args; + +static void n64_format_nan_or_inf( + n64_bprint_buffer *buffer, + const n64_format_args *format, + n64_bool isNegative, + n64_bool isNaN +) { + const unsigned int len = (isNegative || format->poschar != '\0') ? 4u : 3u; + if( !format->leftAligned ) { + for( unsigned int i = len; i < format->width; i++ ) { + n64_putc( buffer, ' ' ); + } + } + + if( isNegative ) { + n64_putc( buffer, '-' ); + } else if( format->poschar != '\0' ) { + n64_putc( buffer, format->poschar ); + } + + if( format->capitalize ) { + if( isNaN ) { + n64_putc( buffer, 'N' ); + n64_putc( buffer, 'A' ); + n64_putc( buffer, 'N' ); + } else { + n64_putc( buffer, 'I' ); + n64_putc( buffer, 'N' ); + n64_putc( buffer, 'F' ); + } + } else { + if( isNaN ) { + n64_putc( buffer, 'n' ); + n64_putc( buffer, 'a' ); + n64_putc( buffer, 'n' ); + } else { + n64_putc( buffer, 'i' ); + n64_putc( buffer, 'n' ); + n64_putc( buffer, 'f' ); + } + } + + if( format->leftAligned ) { + for( unsigned int i = len; i < format->width; i++ ) { + n64_putc( buffer, ' ' ); + } + } +} + +static const unsigned int s_u32_pow10[10] = { + 1u, + 10u, + 100u, + 1000u, + 10000u, + 100000u, + 1000000u, + 10000000u, + 100000000u, + 1000000000u +}; + +static const unsigned long long s_u64_pow10[18] = { + 1ull, + 10ull, + 100ull, + 1000ull, + 10000ull, + 100000ull, + 1000000ull, + 10000000ull, + 100000000ull, + 1000000000ull, + 10000000000ull, + 100000000000ull, + 1000000000000ull, + 10000000000000ull, + 100000000000000ull, + 1000000000000000ull, + 10000000000000000ull, + 100000000000000000ull, +}; + +static void n64_float_exp_helper( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned int precision, + char *digitBuffer, + int originalExponent, + unsigned int digits, + n64_bool negative, + n64_bool trimTrailingZeros +) { + if( trimTrailingZeros ) { + if( digits > precision + 1u ) { + digits = precision + 1u; + } + + while( digits > 1u ) { + if( digitBuffer[digits - 1u] != '0' ) break; + digits--; + } + + if( digits <= precision ) { + precision = digits ? (digits - 1u) : 0u; + } + } + + if( !digits ) { + digitBuffer[0] = '0'; + digits = 1u; + } + + int e = originalExponent + (int)digits - 1; + unsigned int len = precision ? (precision + 6u) : (format->alternate ? 6u : 5u); + if( negative || format->poschar != '\0' ) len++; + if( e >= 100 || e <= -100 ) len++; + + if( !format->leftAligned && format->padchar == ' ' ) { + for( unsigned int i = len; i < format->width; i++ ) { + n64_putc( buffer, ' ' ); + } + } + + if( negative ) { + n64_putc( buffer, '-' ); + } else if( format->poschar != '\0' ) { + n64_putc( buffer, format->poschar ); + } + + if( !format->leftAligned && format->padchar == '0' ) { + for( unsigned int i = len; i < format->width; i++ ) { + n64_putc( buffer, '0' ); + } + } + + n64_putc( buffer, digitBuffer[digits - 1] ); + if( precision || format->alternate ) n64_putc( buffer, '.' ); + for( int i = (int)digits - 2; i >= 0; i-- ) { + n64_putc( buffer, digitBuffer[i] ); + } + + if( !trimTrailingZeros ) { + for( unsigned int i = digits; i <= precision; i++ ) { + n64_putc( buffer, '0' ); + } + } + + n64_putc( buffer, format->capitalize ? 'E' : 'e' ); + if( e < 0 ) { + n64_putc( buffer, '-' ); + e = -e; + } else { + n64_putc( buffer, '+' ); + } + + if( e >= 100 ) n64_putc( buffer, '0' + (char)(e / 100) ); + n64_putc( buffer, '0' + (char)((e / 10) % 10) ); + n64_putc( buffer, '0' + (char)(e % 10) ); + + if( format->leftAligned ) { + for( unsigned int i = len; i < format->width; i++ ) { + n64_putc( buffer, ' ' ); + } + } +} + +static void n64_format_float_exp( + n64_bprint_buffer *buffer, + const n64_format_args *format, + ryu_floating_decimal_32 decimal, + unsigned int precision, + n64_bool negative, + n64_bool trimTrailingZeros +) { + char digitBuffer[9]; + unsigned int digits = decimalLength9( decimal.mantissa ) - 1u; + if( digits > precision ) { + decimal.mantissa /= s_u32_pow10[digits - precision - 1u]; + if( decimal.mantissa % 10u >= 5u ) { + decimal.mantissa += 10u; + if( decimal.mantissa >= s_u32_pow10[precision + 2u] ) { + decimal.mantissa /= 10u; + decimal.exponent++; + } + } + decimal.mantissa /= 10u; + decimal.exponent += (int)(digits - precision); + } + + if( trimTrailingZeros ) { + while( decimal.mantissa && !(decimal.mantissa % 10u) ) { + decimal.mantissa /= 10u; + decimal.exponent++; + } + precision = decimalLength9( decimal.mantissa ) - 1u; + } + + digits = 0u; + while( decimal.mantissa ) { + digitBuffer[digits++] = '0' + (char)(decimal.mantissa % 10u); + decimal.mantissa /= 10u; + } + + n64_float_exp_helper( buffer, format, precision, digitBuffer, decimal.exponent, digits, negative, trimTrailingZeros ); +} + +static void n64_format_double_exp( + n64_bprint_buffer *buffer, + const n64_format_args *format, + ryu_floating_decimal_64 decimal, + unsigned int precision, + n64_bool negative, + n64_bool trimTrailingZeros +) { + char digitBuffer[17]; + unsigned int digits = decimalLength17( decimal.mantissa ) - 1u; + if( digits > precision ) { + decimal.mantissa /= s_u64_pow10[digits - precision - 1u]; + if( decimal.mantissa % 10ull >= 5ull ) { + decimal.mantissa += 10ull; + if( decimal.mantissa >= s_u64_pow10[precision + 2u] ) { + decimal.mantissa /= 10ull; + decimal.exponent++; + } + } + decimal.mantissa /= 10ull; + decimal.exponent += (int)(digits - precision); + } + + if( trimTrailingZeros ) { + while( decimal.mantissa && !(decimal.mantissa % 10ull) ) { + decimal.mantissa /= 10ull; + decimal.exponent++; + } + precision = decimalLength17( decimal.mantissa ) - 1u; + } + + digits = 0u; + while( decimal.mantissa ) { + digitBuffer[digits++] = '0' + (char)(decimal.mantissa % 10u); + decimal.mantissa /= 10u; + } + + n64_float_exp_helper( buffer, format, precision, digitBuffer, decimal.exponent, digits, negative, trimTrailingZeros ); +} + +#define n64_format_fixed_generic( FTYPE, FBITS, FDECLEN, ITYPE, ISUFFIX ) \ +static void n64_format_##FTYPE##_fixed(\ + n64_bprint_buffer *buffer,\ + const n64_format_args *format,\ + ryu_floating_decimal_##FBITS decimal,\ + unsigned int precision,\ + n64_bool negative,\ + n64_bool trimTrailingZeros\ +) {\ + const unsigned int digits = decimalLength##FDECLEN( decimal.mantissa );\ + const int e = decimal.exponent + (int)digits - 1;\ +\ + unsigned int integerDigits;\ + unsigned int decimalDigits;\ + unsigned int decimalLeadingZeros;\ + ITYPE integerPart;\ + ITYPE decimalPart;\ +\ + if( e < 0 ) {\ + integerDigits = 1u;\ + decimalDigits = digits;\ + decimalLeadingZeros = (unsigned int)(-e) - 1u;\ + integerPart = 0##ISUFFIX;\ + decimalPart = decimal.mantissa;\ + } else if( decimal.exponent >= 0 ) {\ + integerDigits = digits;\ + decimalDigits = 0u;\ + decimalLeadingZeros = 0u;\ + integerPart = decimal.mantissa;\ + decimalPart = 0##ISUFFIX;\ + } else {\ + integerDigits = (unsigned int)e + 1u;\ + decimalDigits = digits - integerDigits;\ + decimalLeadingZeros = 0u;\ + integerPart = decimal.mantissa / s_u##FBITS##_pow10[decimalDigits];\ + decimalPart = decimal.mantissa % s_u##FBITS##_pow10[decimalDigits];\ + }\ +\ + if( decimalLeadingZeros >= precision ) {\ + decimalLeadingZeros = precision;\ + decimalDigits = 0u;\ + decimalPart = 0##ISUFFIX;\ + } else if( decimalDigits + decimalLeadingZeros > precision ) {\ + const unsigned int newDigits = precision - decimalLeadingZeros;\ + decimalPart /= s_u##FBITS##_pow10[decimalDigits - newDigits];\ + decimalDigits = newDigits;\ + }\ +\ + if( trimTrailingZeros ) {\ + while( decimalPart && !(decimalPart % 10##ISUFFIX) ) {\ + decimalPart /= 10##ISUFFIX;\ + }\ + decimalDigits = decimalPart ? decimalLength##FDECLEN( decimalPart ) : 0u;\ + precision = decimalDigits + decimalLeadingZeros;\ + }\ +\ + unsigned int len = integerDigits + precision;\ + if( decimal.exponent > 0 ) len += (unsigned int)decimal.exponent;\ + if( negative || format->poschar != '\0' ) len++;\ + if( precision || format->alternate ) len++;\ +\ + if( !format->leftAligned && format->padchar == ' ' ) {\ + for( unsigned int i = len; i < format->width; i++ ) {\ + n64_putc( buffer, ' ' );\ + }\ + }\ +\ + if( negative ) {\ + n64_putc( buffer, '-' );\ + } else if( format->poschar != '\0' ) {\ + n64_putc( buffer, format->poschar );\ + }\ +\ + if( !format->leftAligned && format->padchar == '0' ) {\ + for( unsigned int i = len; i < format->width; i++ ) {\ + n64_putc( buffer, '0' );\ + }\ + }\ +\ + char tempBuffer[FDECLEN];\ + for( unsigned int i = 0u; i < integerDigits; i++ ) {\ + tempBuffer[i] = '0' + (char)(integerPart % 10##ISUFFIX);\ + integerPart /= 10##ISUFFIX;\ + }\ +\ + for( int i = (int)integerDigits - 1; i >= 0; i-- ) {\ + n64_putc( buffer, tempBuffer[i] );\ + }\ +\ + for( int i = 0; i < decimal.exponent; i++ ) {\ + n64_putc( buffer, '0' );\ + }\ +\ + if( precision || format->alternate ) {\ + n64_putc( buffer, '.' );\ + }\ +\ + for( unsigned int i = 0u; i < decimalLeadingZeros; i++ ) {\ + n64_putc( buffer, '0' );\ + }\ +\ + for( unsigned int i = 0u; i < decimalDigits; i++ ) {\ + tempBuffer[i] = '0' + (char)(decimalPart % 10##ISUFFIX);\ + decimalPart /= 10##ISUFFIX;\ + }\ +\ + for( int i = (int)decimalDigits - 1; i >= 0; i-- ) {\ + n64_putc( buffer, tempBuffer[i] );\ + }\ +\ + if( !trimTrailingZeros ) {\ + for( unsigned int i = decimalDigits + decimalLeadingZeros; i < precision; i++ ) {\ + n64_putc( buffer, '0' );\ + }\ + }\ +\ + if( format->leftAligned ) {\ + for( unsigned int i = len; i < format->width; i++ ) {\ + n64_putc( buffer, ' ' );\ + }\ + }\ +} + +n64_format_fixed_generic( float, 32, 9, unsigned int, u ) +n64_format_fixed_generic( double, 64, 17, unsigned long long, ull ) + +static inline void n64_format_float( + n64_bprint_buffer *buffer, + const n64_format_args *format, + float value +) { + unsigned int bits = n64_bit_cast_ftoi( value ); + const n64_bool sign = (n64_bool)(bits >> 31); + const unsigned int e = (bits >> 23) & 0xffu; + const unsigned int m = bits & 0x7fffffu; + + if( e == 0xffu ) { + n64_format_nan_or_inf( buffer, format, sign, m != 0u ); + return; + } + + const ryu_floating_decimal_32 decimal = ryu_float_to_decimal( m, e ); + switch( format->type ) { + case 'e': + n64_format_float_exp( buffer, format, decimal, format->precision, sign, false ); + break; + case 'f': + n64_format_float_fixed( buffer, format, decimal, format->precision, sign, false ); + break; + default: { + const int P = format->precision ? (int)format->precision : 1; + const int exp = decimal.exponent + (int)decimalLength9( decimal.mantissa ) - 1; + if( P > exp && exp >= -4 ) { + n64_format_float_fixed( buffer, format, decimal, (unsigned int)(P - 1 - exp), sign, !format->alternate ); + } else { + n64_format_float_exp( buffer, format, decimal, (unsigned int)(P - 1), sign, !format->alternate ); + } + break; + } + } +} + +static inline void n64_format_double( + n64_bprint_buffer *buffer, + const n64_format_args *format, + double value +) { + unsigned long long bits = n64_bit_cast_dtol( value ); + const n64_bool sign = (n64_bool)(bits >> 63); + const unsigned int e = (unsigned int)(bits >> 52) & 0x7ffu; + const unsigned long long m = bits & 0x000fffffffffffffull; + + if( e == 0x7ffu ) { + n64_format_nan_or_inf( buffer, format, sign, m != 0ull ); + return; + } + + const ryu_floating_decimal_64 decimal = ryu_double_to_decimal( m, e ); + switch( format->type ) { + case 'e': + n64_format_double_exp( buffer, format, decimal, format->precision, sign, false ); + break; + case 'f': + n64_format_double_fixed( buffer, format, decimal, format->precision, sign, false ); + break; + default: { + const int P = format->precision ? (int)format->precision : 1; + const int exp = decimal.exponent + (int)decimalLength17( decimal.mantissa ) - 1; + if( P > exp && exp >= -4 ) { + n64_format_double_fixed( buffer, format, decimal, (unsigned int)(P - 1 - exp), sign, !format->alternate ); + } else { + n64_format_double_exp( buffer, format, decimal, (unsigned int)(P - 1), sign, !format->alternate ); + } + break; + } + + } +} + +static inline void align_number( + n64_bprint_buffer *buffer, + const n64_format_args *format, + const char *prefix, + unsigned int prefixLen, + unsigned int minZeros, + const char *reverseDigits, + unsigned int numDigits +) { + if( !format->leftAligned ) { + if( format->padchar == '0' && prefixLen + numDigits < format->width ) { + register const unsigned int Z = format->width - prefixLen - numDigits; + if( minZeros < Z ) minZeros = Z; + } + + for( unsigned int i = prefixLen + minZeros + numDigits; i < format->width; i++ ) { + n64_putc( buffer, ' ' ); + } + } + + for( unsigned int i = 0; i < prefixLen; i++ ) { + n64_putc( buffer, prefix[i] ); + } + + for( unsigned int i = 0; i < minZeros; i++ ) { + n64_putc( buffer, '0' ); + } + + for( int i = (int)numDigits - 1; i >= 0; i-- ) { + n64_putc( buffer, reverseDigits[i] ); + } + + if( format->leftAligned ) { + for( unsigned int i = prefixLen + minZeros + numDigits; i < format->width; i++ ) { + n64_putc( buffer, ' ' ); + } + } +} + +static inline void n64_format_char( + n64_bprint_buffer *buffer, + const n64_format_args *format, + char value +) { + if( !format->leftAligned ) { + for( unsigned int i = 1; i < format->width; i++ ) n64_putc( buffer, ' ' ); + } + + n64_putc( buffer, value ); + + if( format->leftAligned ) { + for( unsigned int i = 1; i < format->width; i++ ) n64_putc( buffer, ' ' ); + } +} + +static inline void n64_format_string( + n64_bprint_buffer *buffer, + const n64_format_args *format, + const char *value +) { + if( !format->precision ) { + for( unsigned int i = 0; i < format->width; i++ ) n64_putc( buffer, ' ' ); + return; + } + + unsigned int i = 0; + if( format->leftAligned || !format->width ) { + while( value[i] != '\0' && i < format->precision ) n64_putc( buffer, value[i++] ); + while( i++ < format->width ) n64_putc( buffer, ' ' ); + } else { + register const unsigned int maxpad = (format->precision > format->width) ? format->precision : format->width; + while( value[i] != '\0' && i < maxpad ) i++; + while( i++ < format->width ) n64_putc( buffer, ' ' ); + while( *value != '\0' ) n64_putc( buffer, *(value++) ); + } +} + +static void n64_format_udec32( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned int value +) { + char digitBuffer[10]; + unsigned int numDigits = 0u; + while( value ) { + digitBuffer[numDigits++] = '0' + (char)(value % 10u); + value /= 10u; + } + + unsigned int minZeros = 0u; + if( format->precision > numDigits ) { + minZeros = format->precision - numDigits; + } else if( !numDigits && format->precision ) { + minZeros = 1u; + } + + align_number( buffer, format, &format->poschar, format->poschar ? 1u : 0u, minZeros, digitBuffer, numDigits ); +} + +static void n64_format_udec64( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned long long value +) { + char digitBuffer[20]; + unsigned int numDigits = 0u; + while( value ) { + digitBuffer[numDigits++] = '0' + (char)(value % 10ull); + value /= 10ull; + } + + unsigned int minZeros = 0u; + if( format->precision > numDigits ) { + minZeros = format->precision - numDigits; + } else if( !numDigits && format->precision ) { + minZeros = 1u; + } + + align_number( buffer, format, &format->poschar, format->poschar ? 1u : 0u, minZeros, digitBuffer, numDigits ); +} + +static void n64_format_sdec32( + n64_bprint_buffer *buffer, + const n64_format_args *format, + int value +) { + if( value >= 0 ) { + n64_format_udec32( buffer, format, (unsigned int)value ); + return; + } else { + char digitBuffer[10]; + unsigned int numDigits = 0u; + while( value ) { + digitBuffer[numDigits++] = '0' + (char)(-(value % 10)); + value /= 10; + } + + unsigned int minZeros = 0u; + if( format->precision > numDigits ) { + minZeros = format->precision - numDigits; + } else if( !numDigits && format->precision ) { + minZeros = 1u; + } + + align_number( buffer, format, "-", 1u, minZeros, digitBuffer, numDigits ); + } +} + +static inline void n64_format_sdec64( + n64_bprint_buffer *buffer, + const n64_format_args *format, + long long value +) { + if( value >= 0 ) { + n64_format_udec64( buffer, format, (unsigned long long)value ); + return; + } else { + char digitBuffer[19]; + unsigned int numDigits = 0u; + while( value ) { + digitBuffer[numDigits++] = '0' + (char)(-(value % 10ll)); + value /= 10ll; + } + + unsigned int minZeros = 0u; + if( format->precision > numDigits ) { + minZeros = format->precision - numDigits; + } else if( !numDigits && format->precision ) { + minZeros = 1u; + } + + align_number( buffer, format, "-", 1u, minZeros, digitBuffer, numDigits ); + } +} + +static void n64_format_oct32( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned int value +) { + char digitBuffer[11]; + unsigned int numDigits = 0u; + while( value ) { + digitBuffer[numDigits++] = '0' + (char)(value & 0x7u); + value >>= 3; + } + + unsigned int minZeros = 0u; + if( format->precision > numDigits ) { + minZeros = format->precision - numDigits; + } else if( format->alternate ) { + minZeros = 1u; + } else if( !numDigits && format->precision ) { + minZeros = 1u; + } + + align_number( buffer, format, NULL, 0u, minZeros, digitBuffer, numDigits ); +} + +static inline void n64_format_oct64( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned long value +) { + char digitBuffer[22]; + unsigned int numDigits = 0u; + while( value ) { + digitBuffer[numDigits++] = '0' + (char)(value & 0x7u); + value >>= 3; + } + + unsigned int minZeros = 0u; + if( format->precision > numDigits ) { + minZeros = format->precision - numDigits; + } else if( format->alternate ) { + minZeros = 1u; + } else if( !numDigits && format->precision ) { + minZeros = 1u; + } + + align_number( buffer, format, NULL, 0u, minZeros, digitBuffer, numDigits ); +} + +static void n64_format_hex32( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned int value +) { + char digitBuffer[8]; + unsigned int numDigits = 0u; + + const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u; + const char hb = (format->capitalize) ? ('A' - (char)10) : ('a' - (char)10); + while( value ) { + register const unsigned int h = value & 0xFu; + digitBuffer[numDigits++] = (h < 10u) ? ('0' + (char)h) : (hb + (char)h); + value >>= 4; + } + + const char *const prefix = (format->capitalize) ? "0X" : "0x"; + const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u; + + align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits ); +} + +static void n64_format_hex64( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned long long value +) { + char digitBuffer[16]; + unsigned int numDigits = 0u; + + const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u; + const char hb = (format->capitalize) ? ('A' - (char)10) : ('a' - (char)10); + while( value ) { + register const unsigned int h = value & 0xFu; + digitBuffer[numDigits++] = (h < 10u) ? ('0' + (char)h) : (hb + (char)h); + value >>= 4; + } + + const char *const prefix = (format->capitalize) ? "0X" : "0x"; + const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u; + + align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits ); +} + +static void n64_format_bin32( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned int value +) { + char digitBuffer[32]; + unsigned int numDigits = 0u; + + const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u; + while( value ) { + digitBuffer[numDigits++] = (value & 1u) ? '1' : '0'; + value >>= 1; + } + + const char *const prefix = (format->capitalize) ? "0B" : "0b"; + const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u; + + align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits ); +} + +static void n64_format_bin64( + n64_bprint_buffer *buffer, + const n64_format_args *format, + unsigned long long value +) { + char digitBuffer[64]; + unsigned int numDigits = 0u; + + const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u; + while( value ) { + digitBuffer[numDigits++] = (value & 1ull) ? '1' : '0'; + value >>= 1; + } + + const char *const prefix = (format->capitalize) ? "0B" : "0b"; + const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u; + + align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits ); +} + +__attribute__((always_inline)) +static inline void n64_format_hexreal_helper( + n64_bprint_buffer *buffer, + const n64_format_args *format, + const char *mbuff, + unsigned int mdigits, + char firstDigit, + int exp, + n64_bool sign, + n64_bool nonzeroMantissa +) { + unsigned int precision = format->precision; + if( precision == 0xFFFFFFFFu ) { + precision = mdigits; + } else if( mdigits > precision ) { + mdigits = precision; + } + + unsigned int len = precision + 6u; + if( sign || format->poschar != '\0' ) len++; + if( format->alternate || nonzeroMantissa ) len++; + if( exp >= 1000 || exp <= -1000 ) { len += 3; } + else if( exp >= 100 || exp <= -100 ) { len += 2; } + else if( exp >= 10 || exp <= -10 ) len++; + + if( !format->leftAligned && format->padchar == ' ' ) { + for( unsigned int i = len; i < format->width; i++ ) n64_putc( buffer, ' ' ); + } + + if( sign ) { + n64_putc( buffer, '-' ); + } else if( format->poschar != '\0' ) { + n64_putc( buffer, format->poschar ); + } + + n64_putc( buffer, '0' ); + n64_putc( buffer, format->capitalize ? 'X' : 'x' ); + + if( !format->leftAligned && format->padchar == '0' ) { + for( unsigned int i = len; i < format->width; i++ ) n64_putc( buffer, '0' ); + } + + n64_putc( buffer, firstDigit ); + + if( format->alternate || nonzeroMantissa ) { + n64_putc( buffer, '.' ); + } + + for( unsigned int i = 0u; i < mdigits; i++ ) { + n64_putc( buffer, mbuff[i] ); + } + + for( unsigned int i = mdigits; i < precision; i++ ) { + n64_putc( buffer, '0' ); + } + + n64_putc( buffer, format->capitalize ? 'P' : 'p' ); + if( exp < 0 ) { + n64_putc( buffer, '-' ); + exp = -exp; + } else { + n64_putc( buffer, '+' ); + } + + char ebuff[4]; + int elen = 0u; + do { + ebuff[elen++] = '0' + (char)(exp % 10); + exp /= 10; + } while( exp ); + + for( int i = elen - 1; i >= 0; i-- ) { + n64_putc( buffer, ebuff[i] ); + } + + if( format->leftAligned ) { + for( unsigned int i = len; i < format->width; i++ ) n64_putc( buffer, ' ' ); + } +} + +static inline void n64_format_hexfloat( + n64_bprint_buffer *buffer, + const n64_format_args *format, + float value +) { + const unsigned int bits = n64_bit_cast_ftoi( value ); + + const n64_bool sign = (n64_bool)(bits >> 31); + int exp = ((int)(bits >> 23) & 0xFF) - 127; + unsigned int mantissa = (bits & 0x7FFFFF) << 1; + const n64_bool nonzeroMantissa = (mantissa != 0u); + + if( exp == 128 ) { + // NaN or Infinity + n64_format_nan_or_inf( buffer, format, sign, nonzeroMantissa ); + return; + } + + char firstDigit = (nonzeroMantissa || exp != -127) ? '1' : '0'; + if( mantissa && format->precision < 6u ) { + // rounding + mantissa += 0x8u << (20u - (format->precision << 2)); + if( mantissa >> 24 ) { + firstDigit++; + mantissa = 0u; + } + } + + if( exp == -127 ) { + if( nonzeroMantissa ) { + // subnormal + exp++; + firstDigit--; + } else { + // zero + exp = 0; + firstDigit = '0'; + } + } + + unsigned int mdigits; + char mbuff[6]; + + if( mantissa ) { + mdigits = 6u; + while( !(mantissa & 0xFu) ) { + mdigits--; + mantissa >>= 4; + } + + const char *const hexits = format->capitalize ? "0123456789ABCDEF" : "0123456789abcdef"; + for( int i = (int)mdigits - 1; i >= 0; i-- ) { + mbuff[i] = hexits[mantissa & 0xFu]; + mantissa >>= 4; + } + } else { + mdigits = 0u; + } + + n64_format_hexreal_helper( buffer, format, mbuff, mdigits, firstDigit, exp, sign, nonzeroMantissa ); +} + +static inline void n64_format_hexdouble( + n64_bprint_buffer *buffer, + const n64_format_args *format, + double value +) { + const unsigned long long bits = n64_bit_cast_dtol( value ); + + const n64_bool sign = (n64_bool)(bits >> 63); + int exp = ((int)(bits >> 52) & 0x7FF) - 1023; + unsigned long long mantissa = bits & 0x000FFFFFFFFFFFFFull; + const n64_bool nonzeroMantissa = (mantissa != 0ull); + + if( exp == 1024 ) { + // NaN or Infinity + n64_format_nan_or_inf( buffer, format, sign, nonzeroMantissa ); + return; + } + + char firstDigit = (nonzeroMantissa || exp != -1023) ? '1' : '0'; + if( mantissa && format->precision < 13u ) { + // rounding + mantissa += 0x8ull << (48u - (format->precision << 2)); + if( mantissa >> 52 ) { + firstDigit++; + mantissa = 0ull; + } + } + + if( exp == -1023 ) { + if( nonzeroMantissa ) { + // subnormal + exp++; + firstDigit--; + } else { + // zero + exp = 0; + firstDigit = '0'; + } + } + + unsigned int mdigits; + char mbuff[13]; + + if( mantissa ) { + mdigits = 13u; + while( !(mantissa & 0xFull) ) { + mdigits--; + mantissa >>= 4; + } + + const char *const hexits = format->capitalize ? "0123456789ABCDEF" : "0123456789abcdef"; + for( int i = (int)mdigits - 1; i >= 0; i-- ) { + mbuff[i] = hexits[mantissa & 0xFull]; + mantissa >>= 4; + } + } else { + mdigits = 0u; + } + + n64_format_hexreal_helper( buffer, format, mbuff, mdigits, firstDigit, exp, sign, nonzeroMantissa ); +} + +static inline void n64_format_pointer( + n64_bprint_buffer *buffer, + n64_format_args *format, + const void *value +) { + if( value == NULL ) { + if( !format->leftAligned ) { + for( unsigned int i = 5; i < format->width; i++ ) n64_putc( buffer, ' ' ); + } + + n64_putc( buffer, '(' ); + n64_putc( buffer, 'n' ); + n64_putc( buffer, 'i' ); + n64_putc( buffer, 'l' ); + n64_putc( buffer, ')' ); + + if( format->leftAligned ) { + for( unsigned int i = 5; i < format->width; i++ ) n64_putc( buffer, ' ' ); + } + + return; + } + + format->alternate = true; + format->capitalize = false; + n64_format_hex32( buffer, format, (unsigned int)value ); +} diff --git a/lib/n64-libc/n64-stdio-ryu.c.inc b/lib/n64-libc/n64-stdio-ryu.c.inc new file mode 100644 index 00000000..eb94b782 --- /dev/null +++ b/lib/n64-libc/n64-stdio-ryu.c.inc @@ -0,0 +1,910 @@ +// Copyright 2018 Ulf Adams +// +// The contents of this file may be used under the terms of the Apache License, +// Version 2.0. +// +// (See accompanying file LICENSE-Apache or copy at +// http://www.apache.org/licenses/LICENSE-2.0) +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// (See accompanying file LICENSE-Boost or copy at +// https://www.boost.org/LICENSE_1_0.txt) +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. + +// NOTICE: +// This file was adapted from the original Apache licensed code, whose sources +// may be found at: +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/d2s.c +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/f2s.c +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/d2s_small_table.h +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/f2s_full_table.h +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/d2s_intrinsics.h +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/f2s_intrinsics.h +// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/common.h +// +// The files have been merged into this single file, all functions and debug +// code that were not necessary for this library were stripped out, and all +// explicit integer length types were swapped out for C keywords to remove the +// dependency on stdint.h. +// The f2d and d2d functions were also renamed to ryu_float_to_decimal and +// ryu_double_to_decimal for the sake of clarity. + +#include "n64-stdbool.h" + +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_BIAS 1023 + +#define FLOAT_MANTISSA_BITS 23 +#define FLOAT_EXPONENT_BITS 8 +#define FLOAT_BIAS 127 + +#define FLOAT_POW5_INV_BITCOUNT 59 +#define FLOAT_POW5_BITCOUNT 61 + +#define DOUBLE_POW5_INV_BITCOUNT 125 +#define DOUBLE_POW5_BITCOUNT 125 + +#define POW5_TABLE_SIZE 26 + +#define FLOAT_POW5_INV_BITCOUNT 59 +#define FLOAT_POW5_BITCOUNT 61 + +static unsigned long long FLOAT_POW5_INV_SPLIT[55] = { + 576460752303423489ull, 461168601842738791ull, 368934881474191033ull, 295147905179352826ull, + 472236648286964522ull, 377789318629571618ull, 302231454903657294ull, 483570327845851670ull, + 386856262276681336ull, 309485009821345069ull, 495176015714152110ull, 396140812571321688ull, + 316912650057057351ull, 507060240091291761ull, 405648192073033409ull, 324518553658426727ull, + 519229685853482763ull, 415383748682786211ull, 332306998946228969ull, 531691198313966350ull, + 425352958651173080ull, 340282366920938464ull, 544451787073501542ull, 435561429658801234ull, + 348449143727040987ull, 557518629963265579ull, 446014903970612463ull, 356811923176489971ull, + 570899077082383953ull, 456719261665907162ull, 365375409332725730ull, 292300327466180584ull, + 467680523945888934ull, 374144419156711148ull, 299315535325368918ull, 478904856520590269ull, + 383123885216472215ull, 306499108173177772ull, 490398573077084435ull, 392318858461667548ull, + 313855086769334039ull, 502168138830934462ull, 401734511064747569ull, 321387608851798056ull, + 514220174162876889ull, 411376139330301511ull, 329100911464241209ull, 526561458342785934ull, + 421249166674228747ull, 336999333339382998ull, 539198933343012796ull, 431359146674410237ull, + 345087317339528190ull, 552139707743245103ull, 441711766194596083ull +}; +static const unsigned long long FLOAT_POW5_SPLIT[47] = { + 1152921504606846976ull, 1441151880758558720ull, 1801439850948198400ull, 2251799813685248000ull, + 1407374883553280000ull, 1759218604441600000ull, 2199023255552000000ull, 1374389534720000000ull, + 1717986918400000000ull, 2147483648000000000ull, 1342177280000000000ull, 1677721600000000000ull, + 2097152000000000000ull, 1310720000000000000ull, 1638400000000000000ull, 2048000000000000000ull, + 1280000000000000000ull, 1600000000000000000ull, 2000000000000000000ull, 1250000000000000000ull, + 1562500000000000000ull, 1953125000000000000ull, 1220703125000000000ull, 1525878906250000000ull, + 1907348632812500000ull, 1192092895507812500ull, 1490116119384765625ull, 1862645149230957031ull, + 1164153218269348144ull, 1455191522836685180ull, 1818989403545856475ull, 2273736754432320594ull, + 1421085471520200371ull, 1776356839400250464ull, 2220446049250313080ull, 1387778780781445675ull, + 1734723475976807094ull, 2168404344971008868ull, 1355252715606880542ull, 1694065894508600678ull, + 2117582368135750847ull, 1323488980084844279ull, 1654361225106055349ull, 2067951531382569187ull, + 1292469707114105741ull, 1615587133892632177ull, 2019483917365790221ull +}; + +static const unsigned long long DOUBLE_POW5_INV_SPLIT2[15][2] = { + { 1ull, 2305843009213693952ull }, + { 5955668970331000884ull, 1784059615882449851ull }, + { 8982663654677661702ull, 1380349269358112757ull }, + { 7286864317269821294ull, 2135987035920910082ull }, + { 7005857020398200553ull, 1652639921975621497ull }, + { 17965325103354776697ull, 1278668206209430417ull }, + { 8928596168509315048ull, 1978643211784836272ull }, + { 10075671573058298858ull, 1530901034580419511ull }, + { 597001226353042382ull, 1184477304306571148ull }, + { 1527430471115325346ull, 1832889850782397517ull }, + { 12533209867169019542ull, 1418129833677084982ull }, + { 5577825024675947042ull, 2194449627517475473ull }, + { 11006974540203867551ull, 1697873161311732311ull }, + { 10313493231639821582ull, 1313665730009899186ull }, + { 12701016819766672773ull, 2032799256770390445ull } +}; +static const unsigned int POW5_INV_OFFSETS[19] = { + 0x54544554u, 0x04055545u, 0x10041000u, 0x00400414u, 0x40010000u, 0x41155555u, + 0x00000454u, 0x00010044u, 0x40000000u, 0x44000041u, 0x50454450u, 0x55550054u, + 0x51655554u, 0x40004000u, 0x01000001u, 0x00010500u, 0x51515411u, 0x05555554u, + 0x00000000u +}; + +static const unsigned long long DOUBLE_POW5_SPLIT2[13][2] = { + { 0ull, 1152921504606846976ull }, + { 0ull, 1490116119384765625ull }, + { 1032610780636961552ull, 1925929944387235853ull }, + { 7910200175544436838ull, 1244603055572228341ull }, + { 16941905809032713930ull, 1608611746708759036ull }, + { 13024893955298202172ull, 2079081953128979843ull }, + { 6607496772837067824ull, 1343575221513417750ull }, + { 17332926989895652603ull, 1736530273035216783ull }, + { 13037379183483547984ull, 2244412773384604712ull }, + { 1605989338741628675ull, 1450417759929778918ull }, + { 9630225068416591280ull, 1874621017369538693ull }, + { 665883850346957067ull, 1211445438634777304ull }, + { 14931890668723713708ull, 1565756531257009982ull } +}; +static const unsigned int POW5_OFFSETS[21] = { + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x40000000u, 0x59695995u, + 0x55545555u, 0x56555515u, 0x41150504u, 0x40555410u, 0x44555145u, 0x44504540u, + 0x45555550u, 0x40004000u, 0x96440440u, 0x55565565u, 0x54454045u, 0x40154151u, + 0x55559155u, 0x51405555u, 0x00000105u +}; + + +static const unsigned long long DOUBLE_POW5_TABLE[POW5_TABLE_SIZE] = { + 1ull, 5ull, 25ull, 125ull, 625ull, 3125ull, 15625ull, 78125ull, 390625ull, + 1953125ull, 9765625ull, 48828125ull, 244140625ull, 1220703125ull, 6103515625ull, + 30517578125ull, 152587890625ull, 762939453125ull, 3814697265625ull, + 19073486328125ull, 95367431640625ull, 476837158203125ull, + 2384185791015625ull, 11920928955078125ull, 59604644775390625ull, + 298023223876953125ull //, 1490116119384765625ull +}; + +// Returns e == 0 ? 1 : [log_2(5^e)]; requires 0 <= e <= 3528. +static inline int log2pow5(const int e) { + // This approximation works up to the point that the multiplication overflows at e = 3529. + // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater + // than 2^9297. + return (int) ((((unsigned int) e) * 1217359u) >> 19); +} + +// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528. +static inline int pow5bits(const int e) { + // This approximation works up to the point that the multiplication overflows at e = 3529. + // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater + // than 2^9297. + return (int) (((((unsigned int) e) * 1217359u) >> 19) + 1u); +} + +// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528. +static inline int ceil_log2pow5(const int e) { + return log2pow5(e) + 1; +} + +// Returns floor(log_10(2^e)); requires 0 <= e <= 1650. +static inline unsigned int log10Pow2(const int e) { + // The first value this approximation fails for is 2^1651 which is just greater than 10^297. + return (((unsigned int) e) * 78913u) >> 18; +} + +// Returns floor(log_10(5^e)); requires 0 <= e <= 2620. +static inline unsigned int log10Pow5(const int e) { + // The first value this approximation fails for is 5^2621 which is just greater than 10^1832. + return (((unsigned int) e) * 732923u) >> 20; +} + +// Returns the number of decimal digits in v, which must not contain more than 9 digits. +static inline unsigned int decimalLength9(const unsigned int v) { + // Function precondition: v is not a 10-digit number. + // (f2s: 9 digits are sufficient for round-tripping.) + // (d2fixed: We print 9-digit blocks.) + if (v >= 100000000u) { return 9u; } + if (v >= 10000000u) { return 8u; } + if (v >= 1000000u) { return 7u; } + if (v >= 100000u) { return 6u; } + if (v >= 10000u) { return 5u; } + if (v >= 1000u) { return 4u; } + if (v >= 100u) { return 3u; } + if (v >= 10u) { return 2u; } + return 1; +} + +static inline unsigned int decimalLength17(const unsigned long long v) { + // This is slightly faster than a loop. + // The average output length is 16.38 digits, so we check high-to-low. + // Function precondition: v is not an 18, 19, or 20-digit number. + // (17 digits are sufficient for round-tripping.) + if (v >= 10000000000000000ull) { return 17u; } + if (v >= 1000000000000000ull) { return 16u; } + if (v >= 100000000000000ull) { return 15u; } + if (v >= 10000000000000ull) { return 14u; } + if (v >= 1000000000000ull) { return 13u; } + if (v >= 100000000000ull) { return 12u; } + if (v >= 10000000000ull) { return 11u; } + if (v >= 1000000000ull) { return 10u; } + if (v >= 100000000ull) { return 9u; } + if (v >= 10000000ull) { return 8u; } + if (v >= 1000000ull) { return 7u; } + if (v >= 100000ull) { return 6u; } + if (v >= 10000ull) { return 5u; } + if (v >= 1000ull) { return 4u; } + if (v >= 100ull) { return 3u; } + if (v >= 10ull) { return 2u; } + return 1; +} + +static inline unsigned long long umul128(const unsigned long long a, const unsigned long long b, unsigned long long* const productHi) { + const unsigned int aLo = (unsigned int)a; + const unsigned int aHi = (unsigned int)(a >> 32); + const unsigned int bLo = (unsigned int)b; + const unsigned int bHi = (unsigned int)(b >> 32); + + const unsigned long long b00 = (unsigned long long)aLo * bLo; + const unsigned long long b01 = (unsigned long long)aLo * bHi; + const unsigned long long b10 = (unsigned long long)aHi * bLo; + const unsigned long long b11 = (unsigned long long)aHi * bHi; + + const unsigned int b00Lo = (unsigned int)b00; + const unsigned int b00Hi = (unsigned int)(b00 >> 32); + + const unsigned long long mid1 = b10 + b00Hi; + const unsigned int mid1Lo = (unsigned int)(mid1); + const unsigned int mid1Hi = (unsigned int)(mid1 >> 32); + + const unsigned long long mid2 = b01 + mid1Lo; + const unsigned int mid2Lo = (unsigned int)(mid2); + const unsigned int mid2Hi = (unsigned int)(mid2 >> 32); + + const unsigned long long pHi = b11 + mid1Hi + mid2Hi; + const unsigned long long pLo = ((unsigned long long)mid2Lo << 32) | b00Lo; + + *productHi = pHi; + return pLo; +} + +static inline unsigned long long shiftright128(const unsigned long long lo, const unsigned long long hi, const unsigned int dist) { + // We don't need to handle the case dist >= 64 here (see above). + return (hi << (64u - dist)) | (lo >> dist); +} + +// Computes 5^i in the form required by Ryu, and stores it in the given pointer. +static inline void double_computePow5(const unsigned int i, unsigned long long* const result) { + const unsigned int base = i / POW5_TABLE_SIZE; + const unsigned int base2 = base * POW5_TABLE_SIZE; + const unsigned int offset = i - base2; + const unsigned long long* const mul = DOUBLE_POW5_SPLIT2[base]; + if (offset == 0) { + result[0] = mul[0]; + result[1] = mul[1]; + return; + } + const unsigned long long m = DOUBLE_POW5_TABLE[offset]; + unsigned long long high1; + const unsigned long long low1 = umul128(m, mul[1], &high1); + unsigned long long high0; + const unsigned long long low0 = umul128(m, mul[0], &high0); + const unsigned long long sum = high0 + low1; + if (sum < high0) { + ++high1; // overflow into high1 + } + // high1 | sum | low0 + const unsigned int delta = pow5bits(i) - pow5bits(base2); + result[0] = shiftright128(low0, sum, delta) + ((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3); + result[1] = shiftright128(sum, high1, delta); +} + +// Computes 5^-i in the form required by Ryu, and stores it in the given pointer. +static inline void double_computeInvPow5(const unsigned int i, unsigned long long* const result) { + const unsigned int base = (i + POW5_TABLE_SIZE - 1) / POW5_TABLE_SIZE; + const unsigned int base2 = base * POW5_TABLE_SIZE; + const unsigned int offset = base2 - i; + const unsigned long long* const mul = DOUBLE_POW5_INV_SPLIT2[base]; // 1/5^base2 + if (offset == 0) { + result[0] = mul[0]; + result[1] = mul[1]; + return; + } + const unsigned long long m = DOUBLE_POW5_TABLE[offset]; + unsigned long long high1; + const unsigned long long low1 = umul128(m, mul[1], &high1); + unsigned long long high0; + const unsigned long long low0 = umul128(m, mul[0] - 1, &high0); + const unsigned long long sum = high0 + low1; + if (sum < high0) { + ++high1; // overflow into high1 + } + // high1 | sum | low0 + const unsigned int delta = pow5bits(base2) - pow5bits(i); + result[0] = shiftright128(low0, sum, delta) + 1 + ((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3u); + result[1] = shiftright128(sum, high1, delta); +} + +#ifdef _ABIO32 + +// Returns the high 64 bits of the 128-bit product of a and b. +__attribute__((always_inline)) +static inline unsigned long long umulh(const unsigned long long a, const unsigned long long b) { + // Reuse the umul128 implementation. + // Optimizers will likely eliminate the instructions used to compute the + // low part of the product. + unsigned long long hi; + umul128(a, b, &hi); + return hi; +} + +// On 32-bit platforms, compilers typically generate calls to library +// functions for 64-bit divisions, even if the divisor is a constant. +// +// E.g.: +// https://bugs.llvm.org/show_bug.cgi?id=37932 +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 +// +// The functions here perform division-by-constant using multiplications +// in the same way as 64-bit compilers would do. +// +// NB: +// The multipliers and shift values are the ones generated by clang x64 +// for expressions like x/5, x/10, etc. +__attribute__((always_inline)) +static inline unsigned long long div5(const unsigned long long x) { + return umulh(x, 0xCCCCCCCCCCCCCCCDull) >> 2; +} + +__attribute__((always_inline)) +static inline unsigned long long div10(const unsigned long long x) { + return umulh(x, 0xCCCCCCCCCCCCCCCDull) >> 3; +} + +__attribute__((always_inline)) +static inline unsigned long long div100(const unsigned long long x) { + return umulh(x >> 2, 0x28F5C28F5C28F5C3ull) >> 2; +} + +__attribute__((always_inline)) +static inline unsigned long long div1e8(const unsigned long long x) { + return umulh(x, 0xABCC77118461CEFDull) >> 26; +} + +__attribute__((always_inline)) +static inline unsigned long long div1e9(const unsigned long long x) { + return umulh(x >> 9, 0x44B82FA09B5A53ull) >> 11; +} + +__attribute__((always_inline)) +static inline unsigned int mod1e9(const unsigned long long x) { + // Avoid 64-bit math as much as possible. + // Returning (uint32_t) (x - 1000000000 * div1e9(x)) would + // perform 32x64-bit multiplication and 64-bit subtraction. + // x and 1000000000 * div1e9(x) are guaranteed to differ by + // less than 10^9, so their highest 32 bits must be identical, + // so we can truncate both sides to uint32_t before subtracting. + // We can also simplify (uint32_t) (1000000000 * div1e9(x)). + // We can truncate before multiplying instead of after, as multiplying + // the highest 32 bits of div1e9(x) can't affect the lowest 32 bits. + return ((unsigned int) x) - 1000000000u * ((unsigned int) div1e9(x)); +} + +#else + +__attribute__((always_inline)) +static inline unsigned long long div5(const unsigned long long x) { + return x / 5ull; +} + +__attribute__((always_inline)) +static inline unsigned long long div10(const unsigned long long x) { + return x / 10ull; +} + +__attribute__((always_inline)) +static inline unsigned long long div100(const unsigned long long x) { + return x / 100ull; +} + +__attribute__((always_inline)) +static inline unsigned long long div1e8(const unsigned long long x) { + return x / 100000000ull; +} + +__attribute__((always_inline)) +static inline unsigned long long div1e9(const unsigned long long x) { + return x / 1000000000ull; +} + +__attribute__((always_inline)) +static inline unsigned int mod1e9(const unsigned long long x) { + return (unsigned int) (x - 1000000000ull * div1e9(x)); +} + +#endif + +static inline unsigned int pow5Factor(unsigned long long value) { + const unsigned long long m_inv_5 = 14757395258967641293ull; // 5 * m_inv_5 = 1 (mod 2^64) + const unsigned long long n_div_5 = 3689348814741910323ull; // #{ n | n = 0 (mod 2^64) } = 2^64 / 5 + unsigned int count = 0u; + for (;;) { + value *= m_inv_5; + if (value > n_div_5) + break; + ++count; + } + return count; +} + +// Returns true if value is divisible by 5^p. +static inline n64_bool multipleOfPowerOf5(const unsigned long long value, const unsigned int p) { + // I tried a case distinction on p, but there was no performance difference. + return pow5Factor(value) >= p; +} + +// Returns true if value is divisible by 2^p. +static inline n64_bool multipleOfPowerOf2(const unsigned long long value, const unsigned int p) { + // __builtin_ctzll doesn't appear to be faster here. + return (value & ((1ull << p) - 1)) == 0; +} + +// We need a 64x128-bit multiplication and a subsequent 128-bit shift. +// Multiplication: +// The 64-bit factor is variable and passed in, the 128-bit factor comes +// from a lookup table. We know that the 64-bit factor only has 55 +// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit +// factor only has 124 significant bits (i.e., the 4 topmost bits are +// zeros). +// Shift: +// In principle, the multiplication result requires 55 + 124 = 179 bits to +// represent. However, we then shift this value to the right by j, which is +// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64 +// bits. This means that we only need the topmost 64 significant bits of +// the 64x128-bit multiplication. +// +// There are several ways to do this: +// 1. Best case: the compiler exposes a 128-bit type. +// We perform two 64x64-bit multiplications, add the higher 64 bits of the +// lower result to the higher result, and shift by j - 64 bits. +// +// We explicitly cast from 64-bit to 128-bit, so the compiler can tell +// that these are only 64-bit inputs, and can map these to the best +// possible sequence of assembly instructions. +// x64 machines happen to have matching assembly instructions for +// 64x64-bit multiplications and 128-bit shifts. +// +// 2. Second best case: the compiler exposes intrinsics for the x64 assembly +// instructions mentioned in 1. +// +// 3. We only have 64x64 bit instructions that return the lower 64 bits of +// the result, i.e., we have to use plain C. +// Our inputs are less than the full width, so we have three options: +// a. Ignore this fact and just implement the intrinsics manually. +// b. Split both into 31-bit pieces, which guarantees no internal overflow, +// but requires extra work upfront (unless we change the lookup table). +// c. Split only the first factor into 31-bit pieces, which also guarantees +// no internal overflow, but requires extra work since the intermediate +// results are not perfectly aligned. +static inline unsigned long long mulShift64(const unsigned long long m, const unsigned* const mul, const int j) { + // m is maximum 55 bits + unsigned long long high1; // 128 + const unsigned long long low1 = umul128(m, mul[1], &high1); // 64 + unsigned long long high0; // 64 + umul128(m, mul[0], &high0); // 0 + const unsigned long long sum = high0 + low1; + if (sum < high0) { + ++high1; // overflow into high1 + } + return shiftright128(sum, high1, j - 64); +} + +// This is faster if we don't have a 64x64->128-bit multiplication. +static inline unsigned long long mulShiftAll64(unsigned long long m, const unsigned long long* const mul, const int j, + unsigned long long* const vp, unsigned long long* const vm, const unsigned int mmShift) { + m <<= 1; + // m is maximum 55 bits + unsigned long long tmp; + const unsigned long long lo = umul128(m, mul[0], &tmp); + unsigned long long hi; + const unsigned long long mid = tmp + umul128(m, mul[1], &hi); + hi += mid < tmp; // overflow into hi + + const unsigned long long lo2 = lo + mul[0]; + const unsigned long long mid2 = mid + mul[1] + (lo2 < lo); + const unsigned long long hi2 = hi + (mid2 < mid); + *vp = shiftright128(mid2, hi2, (unsigned int) (j - 64 - 1)); + + if (mmShift == 1) { + const unsigned long long lo3 = lo - mul[0]; + const unsigned long long mid3 = mid - mul[1] - (lo3 > lo); + const unsigned long long hi3 = hi - (mid3 > mid); + *vm = shiftright128(mid3, hi3, (unsigned int) (j - 64 - 1)); + } else { + const unsigned long long lo3 = lo + lo; + const unsigned long long mid3 = mid + mid + (lo3 < lo); + const unsigned long long hi3 = hi + hi + (mid3 < mid); + const unsigned long long lo4 = lo3 - mul[0]; + const unsigned long long mid4 = mid3 - mul[1] - (lo4 > lo3); + const unsigned long long hi4 = hi3 - (mid4 > mid3); + *vm = shiftright128(mid4, hi4, (unsigned int) (j - 64)); + } + + return shiftright128(mid, hi, (unsigned int) (j - 64 - 1)); +} + +static inline unsigned int pow5factor_32(unsigned int value) { + unsigned int count = 0; + for (;;) { + const unsigned int q = value / 5; + const unsigned int r = value % 5; + if (r != 0) { + break; + } + value = q; + ++count; + } + return count; +} + +// Returns true if value is divisible by 5^p. +static inline n64_bool multipleOfPowerOf5_32(const unsigned int value, const unsigned int p) { + return pow5factor_32(value) >= p; +} + +// Returns true if value is divisible by 2^p. +static inline n64_bool multipleOfPowerOf2_32(const unsigned int value, const unsigned int p) { + // __builtin_ctz doesn't appear to be faster here. + return (value & ((1u << p) - 1)) == 0; +} + +// It seems to be slightly faster to avoid uint128_t here, although the +// generated code for uint128_t looks slightly nicer. +static inline unsigned int mulShift32(const unsigned int m, const unsigned long long factor, const int shift) { + + // The casts here help MSVC to avoid calls to the __allmul library + // function. + const unsigned int factorLo = (unsigned int)(factor); + const unsigned int factorHi = (unsigned int)(factor >> 32); + const unsigned long long bits0 = (unsigned long long)m * factorLo; + const unsigned long long bits1 = (unsigned long long)m * factorHi; + +#ifdef _ABIO32 + // On 32-bit platforms we can avoid a 64-bit shift-right since we only + // need the upper 32 bits of the result and the shift value is > 32. + const unsigned int bits0Hi = (unsigned int)(bits0 >> 32); + unsigned int bits1Lo = (unsigned int)(bits1); + unsigned int bits1Hi = (unsigned int)(bits1 >> 32); + bits1Lo += bits0Hi; + bits1Hi += (bits1Lo < bits0Hi); + if (shift >= 64) { + // s2f can call this with a shift value >= 64, which we have to handle. + // This could now be slower than the !defined(RYU_32_BIT_PLATFORM) case. + return (unsigned int)(bits1Hi >> (shift - 64)); + } else { + const int s = shift - 32; + return (bits1Hi << (32 - s)) | (bits1Lo >> s); + } +#else + const unsigned long long sum = (bits0 >> 32) + bits1; + const unsigned long long shiftedSum = sum >> (shift - 32); + return (unsigned int) shiftedSum; +#endif +} + +static inline unsigned int mulPow5InvDivPow2(const unsigned int m, const unsigned int q, const int j) { + return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j); +} + +static inline unsigned int mulPow5divPow2(const unsigned int m, const unsigned int i, const int j) { + return mulShift32(m, FLOAT_POW5_SPLIT[i], j); +} + +// A floating decimal representing m * 10^e. +typedef struct { + unsigned int mantissa; + // Decimal exponent's range is -45 to 38 + // inclusive, and can fit in a short if needed. + int exponent; +} ryu_floating_decimal_32; + +// A floating decimal representing m * 10^e. +typedef struct { + unsigned long long mantissa; + // Decimal exponent's range is -324 to 308 + // inclusive, and can fit in a short if needed. + int exponent; +} ryu_floating_decimal_64; + +static ryu_floating_decimal_32 ryu_float_to_decimal(const unsigned int ieeeMantissa, const unsigned int ieeeExponent) { + int e2; + unsigned int m2; + if (ieeeExponent == 0) { + if (ieeeMantissa == 0u) { + const ryu_floating_decimal_32 fd = { 0u, 0 }; + return fd; + } + // We subtract 2 so that the bounds computation has 2 additional bits. + e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = ieeeMantissa; + } else { + e2 = (int) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; + } + const n64_bool even = (m2 & 1) == 0; + const n64_bool acceptBounds = even; + + // Step 2: Determine the interval of valid decimal representations. + const unsigned int mv = 4 * m2; + const unsigned int mp = 4 * m2 + 2; + // Implicit bool -> int conversion. True is 1, false is 0. + const unsigned int mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + const unsigned int mm = 4 * m2 - 1 - mmShift; + + // Step 3: Convert to a decimal power base using 64-bit arithmetic. + unsigned int vr, vp, vm; + int e10; + n64_bool vmIsTrailingZeros = false; + n64_bool vrIsTrailingZeros = false; + unsigned char lastRemovedDigit = 0; + if (e2 >= 0) { + const unsigned int q = log10Pow2(e2); + e10 = (int) q; + const int k = FLOAT_POW5_INV_BITCOUNT + pow5bits((int) q) - 1; + const int i = -e2 + (int) q + k; + vr = mulPow5InvDivPow2(mv, q, i); + vp = mulPow5InvDivPow2(mp, q, i); + vm = mulPow5InvDivPow2(mm, q, i); + if (q != 0 && (vp - 1) / 10 <= vm / 10) { + // We need to know one removed digit even if we are not going to loop below. We could use + // q = X - 1 above, except that would require 33 bits for the result, and we've found that + // 32-bit arithmetic is faster even on 64-bit machines. + const int l = FLOAT_POW5_INV_BITCOUNT + pow5bits((int) (q - 1)) - 1; + lastRemovedDigit = (unsigned char) (mulPow5InvDivPow2(mv, q - 1, -e2 + (int) q - 1 + l) % 10); + } + if (q <= 9) { + // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well. + // Only one of mp, mv, and mm can be a multiple of 5, if any. + if (mv % 5 == 0) { + vrIsTrailingZeros = multipleOfPowerOf5_32(mv, q); + } else if (acceptBounds) { + vmIsTrailingZeros = multipleOfPowerOf5_32(mm, q); + } else { + vp -= multipleOfPowerOf5_32(mp, q); + } + } + } else { + const unsigned int q = log10Pow5(-e2); + e10 = (int) q + e2; + const int i = -e2 - (int) q; + const int k = pow5bits(i) - FLOAT_POW5_BITCOUNT; + int j = (int) q - k; + vr = mulPow5divPow2(mv, (unsigned int) i, j); + vp = mulPow5divPow2(mp, (unsigned int) i, j); + vm = mulPow5divPow2(mm, (unsigned int) i, j); + if (q != 0 && (vp - 1) / 10 <= vm / 10) { + j = (int) q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); + lastRemovedDigit = (unsigned char) (mulPow5divPow2(mv, (unsigned int) (i + 1), j) % 10); + } + if (q <= 1) { + // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. + // mv = 4 * m2, so it always has at least two trailing 0 bits. + vrIsTrailingZeros = true; + if (acceptBounds) { + // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. + vmIsTrailingZeros = mmShift == 1; + } else { + // mp = mv + 2, so it always has at least one trailing 0 bit. + --vp; + } + } else if (q < 31) { + vrIsTrailingZeros = multipleOfPowerOf2_32(mv, q - 1); + } + } + + // Step 4: Find the shortest decimal representation in the interval of valid representations. + int removed = 0; + unsigned int output; + if (vmIsTrailingZeros || vrIsTrailingZeros) { + // General case, which happens rarely (~4.0%). + while (vp / 10 > vm / 10) { + vmIsTrailingZeros &= vm % 10 == 0; + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (unsigned char) (vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + if (vmIsTrailingZeros) { + while (vm % 10 == 0) { + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (unsigned char) (vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + } + if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) { + // Round even if the exact number is .....50..0. + lastRemovedDigit = 4; + } + // We need to take vr + 1 if vr is outside bounds or we need to round up. + output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); + } else { + // Specialized for the common case (~96.0%). Percentages below are relative to this. + // Loop iterations below (approximately): + // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% + while (vp / 10 > vm / 10) { + lastRemovedDigit = (unsigned char) (vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + // We need to take vr + 1 if vr is outside bounds or we need to round up. + output = vr + (vr == vm || lastRemovedDigit >= 5); + } + const int exp = e10 + removed; + + ryu_floating_decimal_32 fd; + fd.exponent = exp; + fd.mantissa = output; + return fd; +} + +static ryu_floating_decimal_64 ryu_double_to_decimal(const unsigned long long ieeeMantissa, const unsigned int ieeeExponent) { + int e2; + unsigned long long m2; + if (ieeeExponent == 0) { + if (ieeeMantissa == 0ull) { + const ryu_floating_decimal_64 fd = { 0ull, 0 }; + return fd; + } + // We subtract 2 so that the bounds computation has 2 additional bits. + e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; + m2 = ieeeMantissa; + } else { + e2 = (int) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; + m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa; + } + const n64_bool even = (m2 & 1) == 0; + const n64_bool acceptBounds = even; + + // Step 2: Determine the interval of valid decimal representations. + const unsigned long long mv = 4 * m2; + // Implicit bool -> int conversion. True is 1, false is 0. + const unsigned int mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + // We would compute mp and mm like this: + // uint64_t mp = 4 * m2 + 2; + // uint64_t mm = mv - 1 - mmShift; + + // Step 3: Convert to a decimal power base using 128-bit arithmetic. + unsigned long long vr, vp, vm; + int e10; + n64_bool vmIsTrailingZeros = false; + n64_bool vrIsTrailingZeros = false; + if (e2 >= 0) { + // I tried special-casing q == 0, but there was no effect on performance. + // This expression is slightly faster than max(0, log10Pow2(e2) - 1). + const unsigned int q = log10Pow2(e2) - (e2 > 3); + e10 = (int) q; + const int k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int) q) - 1; + const int i = -e2 + (int) q + k; + unsigned long long pow5[2]; + double_computeInvPow5(q, pow5); + vr = mulShiftAll64(m2, pow5, i, &vp, &vm, mmShift); + if (q <= 21) { + // This should use q <= 22, but I think 21 is also safe. Smaller values + // may still be safe, but it's more difficult to reason about them. + // Only one of mp, mv, and mm can be a multiple of 5, if any. + const unsigned int mvMod5 = ((unsigned int) mv) - 5 * ((unsigned int) div5(mv)); + if (mvMod5 == 0) { + vrIsTrailingZeros = multipleOfPowerOf5(mv, q); + } else if (acceptBounds) { + // Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q + // <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q + // <=> true && pow5Factor(mm) >= q, since e2 >= q. + vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q); + } else { + // Same as min(e2 + 1, pow5Factor(mp)) >= q. + vp -= multipleOfPowerOf5(mv + 2, q); + } + } + } else { + // This expression is slightly faster than max(0, log10Pow5(-e2) - 1). + const unsigned int q = log10Pow5(-e2) - (-e2 > 1); + e10 = (int) q + e2; + const int i = -e2 - (int) q; + const int k = pow5bits(i) - DOUBLE_POW5_BITCOUNT; + const int j = (int) q - k; + unsigned long long pow5[2]; + double_computePow5(i, pow5); + vr = mulShiftAll64(m2, pow5, j, &vp, &vm, mmShift); + if (q <= 1) { + // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. + // mv = 4 * m2, so it always has at least two trailing 0 bits. + vrIsTrailingZeros = true; + if (acceptBounds) { + // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. + vmIsTrailingZeros = mmShift == 1; + } else { + // mp = mv + 2, so it always has at least one trailing 0 bit. + --vp; + } + } else if (q < 63) { + // We want to know if the full product has at least q trailing zeros. + // We need to compute min(p2(mv), p5(mv) - e2) >= q + // <=> p2(mv) >= q && p5(mv) - e2 >= q + // <=> p2(mv) >= q (because -e2 >= q) + vrIsTrailingZeros = multipleOfPowerOf2(mv, q); + } + } + + // Step 4: Find the shortest decimal representation in the interval of valid representations. + int removed = 0; + unsigned char lastRemovedDigit = 0; + unsigned long long output; + // On average, we remove ~2 digits. + if (vmIsTrailingZeros || vrIsTrailingZeros) { + // General case, which happens rarely (~0.7%). + for (;;) { + const unsigned long long vpDiv10 = div10(vp); + const unsigned long long vmDiv10 = div10(vm); + if (vpDiv10 <= vmDiv10) { + break; + } + const unsigned int vmMod10 = ((unsigned int) vm) - 10 * ((unsigned int) vmDiv10); + const unsigned long long vrDiv10 = div10(vr); + const unsigned int vrMod10 = ((unsigned int) vr) - 10 * ((unsigned int) vrDiv10); + vmIsTrailingZeros &= vmMod10 == 0; + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (unsigned char) vrMod10; + vr = vrDiv10; + vp = vpDiv10; + vm = vmDiv10; + ++removed; + } + if (vmIsTrailingZeros) { + for (;;) { + const unsigned long long vmDiv10 = div10(vm); + const unsigned int vmMod10 = ((unsigned int) vm) - 10 * ((unsigned int) vmDiv10); + if (vmMod10 != 0) { + break; + } + const unsigned long long vpDiv10 = div10(vp); + const unsigned long long vrDiv10 = div10(vr); + const unsigned int vrMod10 = ((unsigned int) vr) - 10 * ((unsigned int) vrDiv10); + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (unsigned char) vrMod10; + vr = vrDiv10; + vp = vpDiv10; + vm = vmDiv10; + ++removed; + } + } + if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) { + // Round even if the exact number is .....50..0. + lastRemovedDigit = 4; + } + // We need to take vr + 1 if vr is outside bounds or we need to round up. + output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); + } else { + // Specialized for the common case (~99.3%). Percentages below are relative to this. + n64_bool roundUp = false; + const unsigned long long vpDiv100 = div100(vp); + const unsigned long long vmDiv100 = div100(vm); + if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%). + const unsigned long long vrDiv100 = div100(vr); + const unsigned int vrMod100 = ((unsigned int) vr) - 100 * ((unsigned int) vrDiv100); + roundUp = vrMod100 >= 50; + vr = vrDiv100; + vp = vpDiv100; + vm = vmDiv100; + removed += 2; + } + // Loop iterations below (approximately), without optimization above: + // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% + // Loop iterations below (approximately), with optimization above: + // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% + for (;;) { + const unsigned long long vpDiv10 = div10(vp); + const unsigned long long vmDiv10 = div10(vm); + if (vpDiv10 <= vmDiv10) { + break; + } + const unsigned long long vrDiv10 = div10(vr); + const unsigned int vrMod10 = ((unsigned int) vr) - 10 * ((unsigned int) vrDiv10); + roundUp = vrMod10 >= 5; + vr = vrDiv10; + vp = vpDiv10; + vm = vmDiv10; + ++removed; + } + // We need to take vr + 1 if vr is outside bounds or we need to round up. + output = vr + (vr == vm || roundUp); + } + const int exp = e10 + removed; + + ryu_floating_decimal_64 fd; + fd.exponent = exp; + fd.mantissa = output; + return fd; +} diff --git a/lib/n64-libc/n64-stdio.c b/lib/n64-libc/n64-stdio.c new file mode 100644 index 00000000..5280725c --- /dev/null +++ b/lib/n64-libc/n64-stdio.c @@ -0,0 +1,67 @@ +#include "n64-stdio.h" + +#include "n64-util.h" +#include "n64-stdio-bprint.c.inc" + +static unsigned int s_isv_buffer[712/4]; + +static n64_bool s_sprint_callback( void *buffer, unsigned int len ) { + ((char*)buffer)[len] = '\0'; + return false; +} + +static n64_bool s_sprint_nobuffer_callback( void*, unsigned int ) { + return false; +} + +static n64_bool s_isv_bprint_callback( void *headPtr, unsigned int chunkSize ) { + const unsigned int n = (chunkSize + 3u) >> 2; + volatile unsigned int *head = *(volatile unsigned int**)headPtr; + + for( unsigned int i = 0; i < n; i++ ) { + *(head++) = s_isv_buffer[i]; + n64_await_pi_io(); + } + + *(volatile unsigned int**)headPtr = head; + return head != NULL && head < (volatile unsigned int*)0xb4000000u; +} + +int n64_vsnprintf( char *buffer, unsigned int bufsz, const char *format, __builtin_va_list args ) { + return bufsz ? + n64_vbprintf( buffer, bufsz - 1, s_sprint_callback, buffer, format, args ) : + n64_vbprintf( buffer, 0u, s_sprint_nobuffer_callback, NULL, format, args ); +} + +int n64_vprintf( const char *format, __builtin_va_list args ) { + const n64_bool inter = n64_set_interrupts( false ); + n64_await_pi_io(); + + volatile unsigned int *head = (volatile unsigned int*)0xb3ff0020u; + register const int len = n64_vbprintf( (char*)s_isv_buffer, 712u, s_isv_bprint_callback, &head, format, args ); + if( len >= 0 ) { + n64_memory_barrier(); + *((volatile int*)0xb3ff0014u) = (len > 0xFFE0) ? 0xFFE0 : len; + n64_memory_barrier(); + n64_await_pi_io(); + } + + n64_set_interrupts( inter ); + return len; +} + +int n64_snprintf( char *buffer, unsigned int bufsz, const char *format, ... ) { + __builtin_va_list args; + __builtin_va_start( args, format ); + register const int result = n64_vsnprintf( buffer, bufsz, format, args ); + __builtin_va_end( args ); + return result; +} + +int n64_printf( const char *format, ... ) { + __builtin_va_list args; + __builtin_va_start( args, format ); + register const int result = n64_vprintf( format, args ); + __builtin_va_end( args ); + return result; +} diff --git a/lib/n64-libc/n64-stdio.h b/lib/n64-libc/n64-stdio.h new file mode 100644 index 00000000..7ddcd7b9 --- /dev/null +++ b/lib/n64-libc/n64-stdio.h @@ -0,0 +1,65 @@ +#ifndef N64_STDLIB_N64_STDIO_H_ +#define N64_STDLIB_N64_STDIO_H_ + +#include "n64-stddef.h" +#include "n64-stdbool.h" + +#ifndef EOF +#define EOF -1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* IMPORTANT NOTICE + * The printf family of functions provided have some important deviations from the C standard library: + * 1. The floating point formats (%f, %e, %g, and %a) expect a (32-bit) float instead of a double. To format a double, use the 'l' + * length modifier (ie. %lf, %le, %lg, or %la) to indicate that the argument is double precision. You can still pass in a + * double argument, however it will be cast to a float if the 'l', 'll', or 'L' length specifier is not present. + * 2. Formatting 128-bit long doubles is not supported. The 'L' length specifier is treated the same as 'll' + * 3. The 'l' length modifier is ignored for characters and strings (wide chars are not supported) + */ + + +/* Extension: n64_bprintf / n64_vbprintf + * The n64_bprintf (buffered printf) is an extension intended to be used as a helper function for implementing your own printf + * function to do things like printing to something other than IS Viewer or drawing formatted text on screen. + * + * bprintf functions like snprintf, but when the provided buffer is filled up, it invokes the provided callback function, then, + * depending on the return value, either moves back to the start of the buffer and continues writing, or ceases output. Once all + * characters have been processed, the callback function is invoked one final time. + * + * When the provided buffer is filled, the callback function is invoked, passing `state` to the first argument, and the buffer + * size to the second argument. If you return false, the function will no longer write any data and will not invoke the callback + * again, but it will still continue processing the text so that it can return the character count. If you return true, the + * function will continue, moving back to the start buffer and continuing writing output. Every time the buffer is filled, the + * callback is invoked again in the same way. Finally, once all characters have been processed and output is complete, the + * callback will be invoked one final time, with the number of characters written since the last callback (which will be 0 if the + * output size is a multiple of the buffer size) passed into the second argument of the callback function. Note that this final + * callback is not invoked if a previous callback returned false. + */ +__attribute__((format(printf,5,0), nonnull(3, 5), access(write_only, 1))) +int n64_vbprintf( char *buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, __builtin_va_list args ); + +__attribute__((format(printf,3,0), nonnull(3), access(write_only, 1))) +int n64_vsnprintf( char *buffer, unsigned int bufsz, const char *format, __builtin_va_list args ); + +__attribute__((format(printf,1,0), nonnull(1))) +int n64_vprintf( const char *format, __builtin_va_list args ); + + +__attribute__((format(printf,5,6), nonnull(3, 5), access(write_only, 1))) +int n64_bprintf( char *buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, ... ); + +__attribute__((format(printf,3,4), nonnull(3), access(write_only, 1))) +int n64_snprintf( char *buffer, unsigned int bufsz, const char *format, ... ); + +__attribute__((format(printf,1,2), nonnull(1))) +int n64_printf( const char *format, ... ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-stdlib.c b/lib/n64-libc/n64-stdlib.c new file mode 100644 index 00000000..448f597a --- /dev/null +++ b/lib/n64-libc/n64-stdlib.c @@ -0,0 +1,155 @@ +#include "n64-stdlib.h" + +#include "n64-stddef.h" +#include "n64-util.h" + +typedef void(*memswp_func_t)( void*, void*, unsigned int ); + +#define __DECLARE_MEMSWP_FUNC(type) \ + static void memswp_##type( unsigned type *a, unsigned type *b, __attribute__((unused)) unsigned int n ) { \ + register unsigned type temp; \ + temp = *a; \ + *a = *b; \ + *b = temp; \ + } \ + \ + static void memswp_n##type( unsigned type *a, unsigned type *b, unsigned int n ) { \ + register unsigned type temp; \ + for( unsigned int i = 0; i < n; i++ ) { \ + temp = *a; \ + *(a++) = *b; \ + *(b++) = temp; \ + } \ + } + +__DECLARE_MEMSWP_FUNC(char) +__DECLARE_MEMSWP_FUNC(short) +__DECLARE_MEMSWP_FUNC(int) + +static void n64_qsort_impl( + void *first, + void *lo, + void *hi, + int(*comp)(const void*, const void*), + void(*swap)(void*, void*, unsigned int), + unsigned int sz, + unsigned int csz +) { + if( lo >= hi || lo < first ) return; + + void *i = lo; + void *p = hi; + for( void *j = lo; j < hi; j += sz ) { + if( comp( j, p ) <= 0 ) { + swap( i, j, csz ); + i += sz; + } + } + + if( i != hi ) swap( i, hi, csz ); + n64_qsort_impl( first, lo, i - sz, comp, swap, sz, csz ); + n64_qsort_impl( first, i + sz, hi, comp, swap, sz, csz ); +} + +void n64_qsort( void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) ) { + unsigned int csize = size; + + memswp_func_t memswp; + if( !size || !count ) { + return; + } else if( size == 1 ) { + memswp = (memswp_func_t)memswp_char; + } else if( size == 2 ) { + memswp = ((unsigned int)ptr & 0x1) ? (memswp_func_t)memswp_nchar : (memswp_func_t)memswp_short; + } else if( size == 4 ) { + if( !((unsigned int)ptr & 0x3) ) { + memswp = (memswp_func_t)memswp_int; + } else if( !((unsigned int)ptr & 0x1) ) { + memswp = (memswp_func_t)memswp_nshort; + csize <<= 1; + } else { + memswp = (memswp_func_t)memswp_nchar; + } + } else if( !(size & 0x3) && !((unsigned int)ptr & 0x3) ) { + memswp = (memswp_func_t)memswp_nint; + csize <<= 2; + } else if( !(size & 0x1) && !((unsigned int)ptr & 0x1) ) { + memswp = (memswp_func_t)memswp_nshort; + csize <<= 1; + } else { + memswp = (memswp_func_t)memswp_nchar; + } + + n64_qsort_impl( ptr, ptr, ptr + (size * (count - 1)), comp, memswp, size, csize ); +} + +void *n64_bsearch( const void *key, const void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) ) { + if( !size ) return NULL; + while( count ) { + register const int i = (count - 1) >> 1; + register const void *p = ptr + size * i; + register const int c = comp( p, key ); + + if( c < 0 ) { + ptr = p + size; + count -= i + 1; + } else if( c > 0 ) { + count = i; + } else { + return (void*)p; + } + } + + return NULL; +} + +static unsigned int g_randi = 24u; +static unsigned int g_randv[32] = { + 0xdb48f936u, 0x14898454u, 0x37ffd106u, 0xb58bff9cu, 0x59e17104u, 0xcf918a49u, 0x09378c83u, 0x52c7a471u, + 0x8d293ea9u, 0x1f4fc301u, 0xc3db71beu, 0x39b44e1cu, 0xf8a44ef9u, 0x4c8b80b1u, 0x19edc328u, 0x87bf4bddu, + 0xc9b240e5u, 0xe9ee4b1bu, 0x4382aee7u, 0x535b6b41u, 0xf3bec5dau, 0x991539b1u, 0x16a5bce3u, 0x6774a4cdu, + 0x73b5def3u, 0x3e01511eu, 0x4e508aaau, 0x61048c05u, 0xf5500617u, 0x846b7115u, 0x6a19892cu, 0x896a97afu +}; + +void n64_srand( unsigned int seed ) { + if( !seed ) seed = 1u; + + g_randv[0] = seed; + + int r = (int)seed; + for( int i = 1; i < 31; i++ ) { + r = (int)(16807ll * (long long)(r % 127773) - 2836ll * (long long)(r / 127773)); + if( r < 0 ) r += 0x7FFFFFFF; + g_randv[i] = (unsigned int)r; + } + + g_randv[31] = seed; + g_randv[0] = g_randv[1]; + g_randv[1] = g_randv[2]; + g_randi = 2u; + + for( int i = 0; i < 310; i++ ) { + n64_rand(); + } +} + +unsigned int n64_randu() { + register const unsigned int next = (g_randi + 1u) & 0x1Fu; + register const unsigned int r = g_randv[(g_randi + 29u) & 0x1Fu] + g_randv[next]; + g_randv[g_randi] = r; + g_randi = next; + return next; +} + +float n64_randf() { + const unsigned int bits = 0x3F800000u | (n64_randu() >> 9); + return n64_bit_cast_itof( bits ) - 1.f; +} + +double n64_randd() { + const unsigned int bits[2] __attribute__((aligned(8))) = { + 0x3FF00000u | (n64_randu() >> 12), + n64_randu() + }; + return *((const double*)bits) - 1.0; +} diff --git a/lib/n64-libc/n64-stdlib.h b/lib/n64-libc/n64-stdlib.h new file mode 100644 index 00000000..3a8c4a35 --- /dev/null +++ b/lib/n64-libc/n64-stdlib.h @@ -0,0 +1,67 @@ +#ifndef N64_STDLIB_N64_STDLIB_H_ +#define N64_STDLIB_N64_STDLIB_H_ + +#include "n64-stddef.h" + +#ifndef RAND_MAX +#define RAND_MAX 0x7fffffff +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +__attribute__((always_inline, artificial, noreturn)) +static inline void n64_abort() { + __builtin_trap(); +} + +__attribute__((access(none, 1), const, warn_unused_result, always_inline)) +static inline unsigned int n64_memalignment( const void *p ) { + return (unsigned int)p & (-(unsigned int)p); +} + +__attribute__((nonnull(1, 4), access(read_write, 1))) +void n64_qsort( void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) ); + +__attribute__((nonnull(2, 5), alloc_align(4), alloc_size(3, 4), warn_unused_result)) +void *n64_bsearch( const void *key, const void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) ); + +__attribute__((flatten)) +void n64_srand( unsigned int seed ); + +/* Extension. Works just like the standard C `rand` function, but returns an + * unsigned value with a full 32 bits of randomness instead of just 31 bits. + */ +unsigned int n64_randu(); + +/* Extension. Works just like the standard C `rand' function, but returns an + * unsigned short value with 16 bits of randomness. + */ +__attribute__((always_inline)) +static inline unsigned short n64_randhu() { + return (unsigned short)(n64_randu() >> 16); +} + +__attribute__((always_inline)) +static inline int n64_rand() { + return (int)(n64_randu() >> 1); +} + +/* Extension. Generate a random single precision floating point value + * greater than or equal to 0 and strictly less than 1 + */ +__attribute__((warn_unused_result)) +float n64_randf(); + +/* Extension. Generate a random double precision floating point value + * greater than or equal to 0 and strictly less than 1 + */ +__attribute__((warn_unused_result)) +double n64_randd(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-string.c b/lib/n64-libc/n64-string.c new file mode 100644 index 00000000..0634a07a --- /dev/null +++ b/lib/n64-libc/n64-string.c @@ -0,0 +1,356 @@ +#include "n64-string.h" + +#include "n64-stddef.h" +#include "n64-stdbool.h" + +inline unsigned int __attribute__((const, always_inline)) hasZeroByte( unsigned int x ) { + return (x - 0x01010101u) & ~x & 0x80808080u; +} + +inline unsigned int __attribute__((const, always_inline)) bytepattern( int ch ) { + register unsigned int w = (unsigned int)(unsigned char)ch; + w |= w << 8; + w |= w << 16; + return w; +} + +unsigned int n64_strncpy2( char *dest, const char *src, unsigned int count ) { + register const char *const end = n64_memchr( src, 0, count ); + if( !end ) { + n64_memcpy( dest, src, count ); + return count; + } else { + register const unsigned int len = (unsigned int)(end - src); + n64_memcpy( dest, src, len + 1u ); + return len; + } +} + +unsigned int n64_strlen( const char *str ) { + const char *const start = str; + + while( ((unsigned int)str & 3) ) { + if( !*str ) return (unsigned int)(str - start); + str++; + } + + while( !hasZeroByte( *(unsigned int*)str ) ) str += 4; + + while( *str ) str++; + return (unsigned int)(str - start); +} + +int n64_strcmp( const char *lhs, const char *rhs ) { + if( ((unsigned int)lhs & 3) == ((unsigned int)rhs & 3) ) { + while( (unsigned int)lhs & 3 ) { + register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs; + if( c ) return c; + if( !*lhs ) return 0; + lhs++; + rhs++; + } + + while( true ) { + register const unsigned int x = *((const unsigned int*)lhs); + if( hasZeroByte( x ) ) break; + + register const unsigned int y = *((const unsigned int*)rhs); + if( x != y ) return (x < y) ? -1 : 1; + + lhs += 4; + rhs += 4; + } + } + + while( true ) { + register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs; + if( c ) return c; + if( !*lhs ) return 0; + lhs++; + rhs++; + } +} + +int n64_strncmp( const char *lhs, const char *rhs, unsigned int count ) { + if( ((unsigned int)lhs & 3) == ((unsigned int)rhs & 3) ) { + while( count && ((unsigned int)lhs & 3) ) { + register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs; + if( c ) return c; + if( !*lhs ) return 0; + lhs++; + rhs++; + count--; + } + + while( count >= 4 ) { + register const unsigned int x = *((const unsigned int*)lhs); + if( hasZeroByte( x ) ) break; + + register const unsigned int y = *((const unsigned int*)rhs); + if( x != y ) return (x < y) ? -1 : 1; + + lhs += 4; + rhs += 4; + count -= 4; + } + } + + while( count ) { + register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs; + if( c ) return c; + if( !*lhs ) return 0; + lhs++; + rhs++; + count--; + } + + return 0; +} + +char *n64_strchr( const char *str, int ch ) { + while( (unsigned int)str & 3 ) { + if( (unsigned char)*str == (unsigned char)ch ) return (char*)str; + if( !*(str++) ) return NULL; + } + + register const unsigned int c = bytepattern( ch ); + while( !(hasZeroByte( *(const unsigned int*)str ) | hasZeroByte( (*(const unsigned int*)str) ^ c )) ) str += 4; + + while( (unsigned char)*str != (unsigned char)ch ) { + if( !*(str++) ) return NULL; + } + + return (char*)str; +} + +char *n64_strrchr( const char *str, int ch ) { + register const char *const start = str; + str += n64_strlen( str ); + + while( str != start && ((unsigned int)str & 3) != 3 ) { + if( (unsigned char)*str == (unsigned char)ch ) return (char*)str; + str--; + } + + register const unsigned int c = bytepattern( ch ); + while( start <= str - 4 ) { + if( hasZeroByte( *(const unsigned int*)(str - 3) ^ c ) ) break; + str -= 4; + } + + while( start != str ) { + if( (unsigned char)*str == (unsigned char)ch ) return (char*)str; + str--; + } + + return NULL; +} + +unsigned int n64_strspn( const char *str, const char *accept ) { + if( !accept[0] ) return 0u; + + char mask[256]; + n64_memset( mask, 0, 256 ); + mask[0] = 1; + + while( *accept ) mask[(unsigned char)*(accept++)] = 1; + + const char *const start = str; + while( !mask[(unsigned char)*str] ) str++; + return (unsigned int)(str - start); +} + +unsigned int n64_strcspn( const char *str, const char *reject ) { + if( !reject[0] ) return n64_strlen( str ); + + char mask[256]; + n64_memset( mask, 1, 256 ); + + while( *reject ) mask[(unsigned char)*(reject++)] = 0; + + const char *const start = str; + while( !mask[(unsigned char)*str] ) str++; + return (unsigned int)(str - start); +} + +void *n64_memchr( const void *ptr, int ch, unsigned int count ) { + + while( count && ((unsigned int)ptr & 3) ) { + if( *(unsigned char*)ptr == (unsigned char)ch ) return (void*)ptr; + ptr++; + count--; + } + + register const unsigned int cbp = bytepattern( ch ); + while( count >= 4u ) { + if( hasZeroByte( *(unsigned int*)ptr ^ cbp ) ) break; + ptr += 4; + count -= 4; + } + + while( count-- ) { + if( *(unsigned char*)ptr == (unsigned char)ch ) return (void*)ptr; + ptr++; + } + + return NULL; +} + +int n64_memcmp( const void *lhs, const void *rhs, unsigned int count ) { + if( lhs == rhs ) return 0; + + if( count > 8u && ((unsigned int)lhs & 0x3) == ((unsigned int)rhs & 0x3) ) { + if( (count & 3) || ((unsigned int)lhs & 0x3) ) { + const void *const stop = lhs + count; + const void *const astop = (const void*)((unsigned int)stop & 0xFFFFFFFCu); + + while( (unsigned int)lhs & 0x3 ) { + register const unsigned char a = *(unsigned char*)lhs; + register const unsigned char b = *(unsigned char*)rhs; + + if( a == b ) { + lhs++; + rhs++; + continue; + } + + return (a > b) ? 1 : -1; + } + + while( lhs != astop ) { + register const unsigned int a = *(unsigned int*)lhs; + register const unsigned int b = *(unsigned int*)rhs; + + if( a == b ) { + lhs += 4; + rhs += 4; + continue; + } + + return (a > b) ? 1 : -1; + } + + while( lhs != stop ) { + register const unsigned char a = *(unsigned char*)lhs; + register const unsigned char b = *(unsigned char*)rhs; + + if( a == b ) { + lhs++; + rhs++; + continue; + } + + return (a > b) ? 1 : -1; + } + + return 0; + } else { + const void *const stop = lhs + count; + while( lhs != stop ) { + register const unsigned int a = *(unsigned int*)lhs; + register const unsigned int b = *(unsigned int*)rhs; + + if( a == b ) { + lhs += 4; + rhs += 4; + continue; + } + + return (a > b) ? 1 : -1; + } + return 0; + } + } else { + const void *const stop = lhs + count; + while( lhs != stop ) { + register const unsigned char a = *(unsigned char*)lhs; + register const unsigned char b = *(unsigned char*)rhs; + + if( a == b ) { + lhs++; + rhs++; + continue; + } + + return (a > b) ? 1 : -1; + } + return 0; + } +} + +void *n64_memset( void *dest, int ch, unsigned int count ) { + if( count > 8u && !((unsigned int)dest & 0x3) ) { + register const unsigned int w = bytepattern( ch ); + unsigned int *head = (unsigned int*)dest; + const unsigned int *const end = (unsigned int*)(dest + count); + while( head != end ) *(head++) = w; + return dest; + } else { + register const unsigned char c = (unsigned char)ch; + unsigned char* head = (unsigned char*)dest; + unsigned char *const end = head + count; + while( head != end ) *(head++) = c; + return dest; + } +} + +void *n64_memcpy( void *dest, const void *src, unsigned int count ) { + if( count > 8u && ((unsigned int)dest & 0x3) == ((unsigned int)src & 0x3) ) { + if( (count & 3) || ((unsigned int)src & 0x3) ) { + register void *start = dest; + register const void *end = (void*)((unsigned int)(start + count) & 0xFFFFFFFCu); + + while( (unsigned int)start & 0x3 ) { + *((char*)start++) = *((char*)src++); + } + + while( start != end ) { + *((unsigned int*)start) = *((unsigned int*)src); + start += 4; + src += 4; + } + + end = (void*)((unsigned int)dest + count); + while( start != end ) { + *((char*)start++) = *((char*)src++); + } + } else { + register unsigned int *start = (unsigned int*)dest; + register const unsigned int *const end = (const unsigned int*)((unsigned int)start + count); + while( start != end ) { + *(start++) = *(const unsigned int*)src; + src += 4; + } + } + + return dest; + } else { + char *head = (char*)dest; + char *const end = head + count; + while( head != end ) *(head++) = *(char*)src++; + return dest; + } +} + +void *n64_memmove( void *dest, const void *src, unsigned int count ) { + if( src == dest || !count ) return dest; + + if( dest + count <= src || dest >= src + count ) { + return n64_memcpy( dest, src, count ); + } + + if( dest > src ) { + // start of dest overlaps end of src + void *const end = dest; + dest += count; + src += count; + while( dest != end ) *(char*)--dest = *(char*)--src; + return end; + } else { + // end of dest overlaps start of src + char *head = (char*)dest; + char *const end = head + count; + while( head != end ) *(head++) = *(char*)src++; + return dest; + } +} diff --git a/lib/n64-libc/n64-string.h b/lib/n64-libc/n64-string.h new file mode 100644 index 00000000..78de6659 --- /dev/null +++ b/lib/n64-libc/n64-string.h @@ -0,0 +1,111 @@ +#ifndef N64_STDLIB_N64_STRING_H_ +#define N64_STDLIB_N64_STRING_H_ + +#include "n64-stddef.h" +#include "n64-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Extension. Works the same as [n64_]strncpy except that it writes only a + * single null terminator after copying the string, rather than filling the + * entire rest of the buffer with zeros. Additionally, it returns the number + * of characters written not including the null terminator. + */ +__attribute__((access(write_only, 1), nonnull(1))) +unsigned int n64_strncpy2( char *dest, const char *src, unsigned int count ); + + +__attribute__((pure, warn_unused_result, nonnull(1))) +unsigned int n64_strlen( const char *str ); + +__attribute__((pure, warn_unused_result, nonnull(1, 2))) +int n64_strcmp( const char *lhs, const char *rhs ); + +__attribute__((pure, warn_unused_result, nonnull(1, 2))) +int n64_strncmp( const char *lhs, const char *rhs, unsigned int count ); + +__attribute__((pure, warn_unused_result, nonnull(1), returns_nonnull)) +char *n64_strchr( const char *str, int ch ); + +__attribute__((pure, warn_unused_result, nonnull(1), returns_nonnull)) +char *n64_strrchr( const char *str, int ch ); + +__attribute__((pure, warn_unused_result, nonnull(1, 2))) +unsigned int n64_strspn( const char *str, const char *accept ); + +__attribute__((pure, warn_unused_result, nonnull(1, 2))) +unsigned int n64_strcspn( const char *str, const char *reject ); + + +__attribute__((pure, warn_unused_result, nonnull(1), returns_nonnull)) +void *n64_memchr( const void *ptr, int ch, unsigned int count ); + +__attribute__((pure, warn_unused_result, nonnull(1, 2))) +int n64_memcmp( const void *lhs, const void *rhs, unsigned int count ); + +__attribute__((access(write_only, 1), nonnull(1), returns_nonnull)) +void *n64_memset( void *dest, int ch, unsigned int count ); + +__attribute__((access(write_only, 1), nonnull(1, 2), returns_nonnull)) +void *n64_memcpy( void *dest, const void *src, unsigned int count ); + +__attribute__((access(write_only, 1), nonnull(1, 2), returns_nonnull)) +void *n64_memmove( void *dest, const void *src, unsigned int count ); + +__attribute__((access(write_only, 1), nonnull(1))) +static inline void n64_bzero( void *s, unsigned int n ) { + n64_memset( s, 0, n ); +} + +__attribute__((nonnull(1), always_inline)) +static inline void n64_explicit_bzero( void *s, unsigned int n ) { + n64_bzero( s, n ); + n64_memory_barrier(); +} + +__attribute__((access(write_only, 2), nonnull(1, 2), always_inline)) +static inline void n64_bcopy( const void *src, void *dest, unsigned int n ) { + n64_memmove( dest, src, n ); +} + + +__attribute__((pure, warn_unused_result)) +static inline unsigned int n64_strlen_s( const char *str, unsigned int strsz ) { + if( !str ) return 0u; + const char *stop = (const char*)n64_memchr( str, 0, strsz ); + return stop ? (unsigned int)(stop - str) : strsz; +} + +__attribute__((access(write_only, 1), nonnull(1, 2), returns_nonnull)) +static inline char *n64_strcpy( char *dest, const char *src ) { + return (char*)n64_memcpy( dest, src, n64_strlen( src ) + 1u ); +} + +__attribute__((access(write_only, 1), nonnull(1), returns_nonnull)) +static inline char *n64_strncpy( char *dest, const char *src, unsigned int count ) { + const unsigned int len = 1u + n64_strncpy2( dest, src, count ); + if( len < count ) n64_memset( &dest[len], 0, count - len ); + return dest; +} + +__attribute__((access(read_write, 1), nonnull(1, 2), returns_nonnull)) +static inline char *n64_strcat( char *dest, const char *src ) { + n64_strcpy( &dest[n64_strlen( dest )], src ); + return dest; +} + +__attribute__((access(read_write, 1), nonnull(1), returns_nonnull)) +static inline char *n64_strncat( char *dest, const char *src, unsigned int count ) { + unsigned int len = n64_strlen( dest ); + len += n64_strncpy2( &dest[len], src, count ); + dest[len] = '\0'; + return dest; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-time.c b/lib/n64-libc/n64-time.c new file mode 100644 index 00000000..960fe2dc --- /dev/null +++ b/lib/n64-libc/n64-time.c @@ -0,0 +1,685 @@ +#include "n64-time.h" + +#include "n64-util.h" +#include "n64-string.h" + +static const n64_tm N64_UNIX_EPOCH_TM = { + 0, 0, 0, 1, 0, 70, 4, 0, -1 +}; + +static const char *s_weekdayNames[7] = { + "Sunday", + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday" +}; + +static const char *s_monthNames[12] = { + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December" +}; + +#define N64_SECONDS_IN_MINUTE 60ll +#define N64_SECONDS_IN_HOUR (60ll * N64_SECONDS_IN_MINUTE) +#define N64_SECONDS_IN_DAY (24ll * N64_SECONDS_IN_HOUR) +#define N64_SECONDS_IN_NON_LEAP_YEAR (365ll * N64_SECONDS_IN_DAY) + +static unsigned int s_si_buffer[16] __attribute__((aligned(16))); +static unsigned int s_si_backup[16] __attribute__((aligned(16))); +static n64_bool s_exec_on_write_bug = false; +static unsigned int s_prev_dma_addr; + +static const int s_yday_table[12] = { + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 +}; + +typedef enum { + N64_RTC_INIT_CALLED = 0x1, + N64_RTC_NOT_WAITING = 0x2, + N64_RTC_GOOD = 0x4, + + N64_RTC_READY = N64_RTC_INIT_CALLED | N64_RTC_NOT_WAITING +} n64_rtc_state; + +static n64_rtc_state g_rtc_state = 0; +static n64_clock_t g_wait_start = 0u; +static n64_clock_t g_wait_end = 0u; + +static inline void __attribute__((always_inline)) si_await_op() { + // Wait for the SI operation we started to finish, then clear the status + while( !(*((volatile unsigned int*)0xa4300008u) & *((volatile unsigned int*)0xa430000Cu) & 0x2u) ); + *((volatile unsigned int*)0xa4800018u) = 0u; +} + +static inline void __attribute__((always_inline)) si_wait_safe( n64_bool yield ) { + // If the SI is currently busy, wait until it isn't + while( *((volatile unsigned int*)0xa4800018u) & 0x3 ) { + n64_set_interrupts( yield ); + while( *((volatile unsigned int*)0xa4800018u) & 0x3 ); + n64_set_interrupts( false ); + } +} + +static void n64_pif_save() { + s_prev_dma_addr = *((volatile unsigned int*)0xa4800000u); + + // Save the state of PIF RAM to memory to restore it later. + // The joybus is only executed when using an SI DMA read, and not when doing + // a direct read via the memory-mapped address. Thus, this has no side effects. + + volatile unsigned int *si_pif_ram = (volatile unsigned int *)0xbfc007c0u; + for( int i = 0; i < 16; i++ ) { + s_si_backup[i] = si_pif_ram[i]; + } +} + +static void n64_pif_restore() { + if( s_exec_on_write_bug ) { + // The emulator incorrectly executes the joybus on a DMA write instead of a read + // Clear the command register so it doesn't execute again + s_si_backup[15] &= 0xffffff00u; + } else { + // Re-parse the stored PIF RAM state + s_si_backup[15] |= 1u; + } + + __builtin_mips_cache( 0x19, &s_si_backup[0] ); + __builtin_mips_cache( 0x19, &s_si_backup[4] ); + __builtin_mips_cache( 0x19, &s_si_backup[8] ); + __builtin_mips_cache( 0x19, &s_si_backup[12] ); + + *((volatile unsigned int*)0xa4800000u) = (unsigned int)s_si_backup & 0x1FFFFFFFu; + asm volatile( "":::"memory" ); + *((volatile unsigned int*)0xa4800010u) = 0x1fc007c0u; + asm volatile( "":::"memory" ); + si_await_op(); + + *((volatile unsigned int*)0xa4800000u) = s_prev_dma_addr; +} + +static void n64_joybus_dma_write() { + __builtin_mips_cache( 0x19, &s_si_buffer[0] ); + __builtin_mips_cache( 0x19, &s_si_buffer[4] ); + __builtin_mips_cache( 0x19, &s_si_buffer[8] ); + __builtin_mips_cache( 0x19, &s_si_buffer[12] ); + + *((volatile unsigned int*)0xa4800000u) = (unsigned int)s_si_buffer & 0x1FFFFFFFu; + asm volatile( "":::"memory" ); + *((volatile unsigned int*)0xa4800010u) = 0x1fc007c0u; + asm volatile( "":::"memory" ); + + si_await_op(); +} + +static void n64_joybus_dma_read() { + *((volatile unsigned int*)0xa4800000u) = (unsigned int)s_si_buffer & 0x1FFFFFFFu; + asm volatile( "":::"memory" ); + *((volatile unsigned int*)0xa4800004u) = 0x1fc007c0u; + asm volatile( "":::"memory" ); + + si_await_op(); + + __builtin_mips_cache( 0x11, &s_si_buffer[0] ); + __builtin_mips_cache( 0x11, &s_si_buffer[4] ); + __builtin_mips_cache( 0x11, &s_si_buffer[8] ); + __builtin_mips_cache( 0x11, &s_si_buffer[12] ); +} + +__attribute__((always_inline)) +static inline void n64_joybus_exec() { + n64_joybus_dma_write(); + n64_joybus_dma_read(); +} + +static inline unsigned char decode_rtc_byte( unsigned char x ) { + return (((x & 0xF0) >> 4) * 10) + (x & 0x0F); +} + +static inline n64_bool is_leap_year( long long year ) { + if( year % 4ll != 0ll ) return false; + if( year % 100ll == 0ll ) return (year % 400ll == 0ll) ? true : false; + return false; +} + +static n64_time_t year_to_unix_time( long long year ) { + register long long leapYears = -478ll; + leapYears += (year + 3ll) >> 2; + leapYears -= ((year > 0ll) ? (year + 99ll) : year) / 100ll; + leapYears += ((year > 0ll) ? (year + 399ll) : year) / 400ll; + + return ( + ((year - 1970ll) * N64_SECONDS_IN_NON_LEAP_YEAR) + + (leapYears * N64_SECONDS_IN_DAY) + ); +} + +n64_time_t n64_to_unix_time( const n64_tm *time ) { + int month = time->tm_mon % 12; + if( month < 0 ) month += 12; + + long long days = (long long)s_yday_table[month] + (long long)time->tm_mday - 1ll; + if( month >= 2 && is_leap_year( (long long)time->tm_year + 1900ll ) ) days++; + return ( + year_to_unix_time( (long long)time->tm_year + 1900ll + (long long)(time->tm_mon / 12) ) + + (days * N64_SECONDS_IN_DAY) + + ((long long)time->tm_hour * N64_SECONDS_IN_HOUR) + + ((long long)time->tm_min * N64_SECONDS_IN_MINUTE) + + (long long)time->tm_sec + ); +} + +__attribute__((always_inline)) +static inline n64_bool is_emulator() { + register const volatile unsigned int *const dpc = (const volatile unsigned int*)0xA4100000u; + return !(dpc[5] | dpc[6] | dpc[7]); +} + +n64_bool n64_rtc_init() { + if( g_rtc_state & N64_RTC_INIT_CALLED ) { + return g_rtc_state != N64_RTC_READY; + } + + const n64_bool intr = n64_set_interrupts( false ); + if( g_rtc_state & N64_RTC_INIT_CALLED ) { + n64_set_interrupts( intr ); + return g_rtc_state != N64_RTC_READY; + } + + si_wait_safe( intr ); + g_rtc_state |= N64_RTC_INIT_CALLED; + n64_pif_save(); + + s_si_buffer[0] = 0u; + s_si_buffer[1] = 0xff010306u; + s_si_buffer[2] = 0xfffffffeu; + n64_memset( &s_si_buffer[3], 0, 48u ); + s_si_buffer[15] = 1u; + n64_joybus_dma_write(); + + if( + *((volatile unsigned char*)0xbfc007c6u) != 0x03 || + *((volatile unsigned char*)0xbfc007c8u) != 0xff + ) { + // On hardware (and accurate emulators such as Ares), writing to the joybus merely causes it to parse the command. + // The commands are not actually executed until a DMA read is performed. So only the final byte should have changed. + s_exec_on_write_bug = true; + } + + n64_joybus_dma_read(); + + if( s_si_buffer[2] >> 8 != 0x001000u ) { + n64_pif_restore(); + n64_set_interrupts( intr ); + return false; + } + + g_rtc_state |= N64_RTC_GOOD; + g_wait_start = n64_clock(); + g_wait_end = g_wait_start + (N64_CLOCKS_PER_SEC / 50u); + + s_si_buffer[0] = 0u; + s_si_buffer[1] = 0x02090700u; + s_si_buffer[2] = 0u; + s_si_buffer[3] = 0u; + s_si_buffer[4] = 0x00fe0000u; + n64_memset( &s_si_buffer[5], 0, 40u ); + s_si_buffer[15] = 1u; + n64_joybus_exec(); + + s_si_buffer[1] = 0x0a010800u; + s_si_buffer[2] = 0x03000000u; + s_si_buffer[4] = 0x00fe0000u; + s_si_buffer[15] = 1u; + n64_joybus_exec(); + + if( is_emulator() ) { + g_rtc_state |= N64_RTC_NOT_WAITING; + } + + n64_pif_restore(); + n64_set_interrupts( intr ); + return true; +} + +n64_bool n64_rtc_ready( n64_bool *good ) { + if( good ) *good = (g_rtc_state & N64_RTC_GOOD) >> 2; + + if( !(g_rtc_state & N64_RTC_INIT_CALLED) ) return false; + if( !(g_rtc_state & N64_RTC_NOT_WAITING) ) { + const n64_clock_t now = n64_clock(); + if( g_wait_end < g_wait_start ) { + if( now >= g_wait_end && now < g_wait_start ) { + g_rtc_state |= N64_RTC_NOT_WAITING; + return true; + } else { + return false; + } + } else { + if( now < g_wait_start || now >= g_wait_end ) { + g_rtc_state |= N64_RTC_NOT_WAITING; + return true; + } else { + return false; + } + } + } + + return true; +} + +n64_bool n64_rtc_read( n64_tm *arg ) { + if( !(g_rtc_state & N64_RTC_READY) ) { + n64_rtc_init(); + while( !n64_rtc_ready( NULL ) ); + } + + if( !(g_rtc_state & N64_RTC_GOOD) ) { + n64_memcpy( arg, &N64_UNIX_EPOCH_TM, sizeof( n64_tm ) ); + return false; + } + + const n64_bool intr = n64_set_interrupts( false ); + si_wait_safe( intr ); + n64_pif_save(); + + s_si_buffer[0] = 0u; + s_si_buffer[1] = 0x02090702u; + s_si_buffer[2] = 0x00008001u; + s_si_buffer[3] = 0x04017000u; + s_si_buffer[4] = 0x80fe0000u; + n64_memset( &s_si_buffer[5], 0, 40u ); + s_si_buffer[15] = 1u; + n64_joybus_exec(); + + const unsigned char *const data = (const unsigned char*)&s_si_buffer[2]; + arg->tm_sec = (int)decode_rtc_byte( data[0] ); + arg->tm_min = (int)decode_rtc_byte( data[1] ); + arg->tm_hour = (int)decode_rtc_byte( data[2] - 0x80 ); + arg->tm_mday = (int)decode_rtc_byte( data[3] ); + arg->tm_wday = (int)decode_rtc_byte( data[4] ); + arg->tm_mon = (int)decode_rtc_byte( data[5] ) - 1u; + arg->tm_year = (100 * (int)decode_rtc_byte( data[7] )) + (int)decode_rtc_byte( data[6] ); + if( arg->tm_mon < 12 ) arg->tm_yday = s_yday_table[arg->tm_mon] + arg->tm_mday - 1; + arg->tm_isdst = -1; + + if( arg->tm_mon >= 2 && is_leap_year( 1900ll + (long long)arg->tm_year ) ) { + arg->tm_yday++; + } + + n64_pif_restore(); + n64_set_interrupts( intr ); + return true; +} + +n64_tm *n64_tm_add( n64_tm *time, n64_time_t seconds ) { + seconds += n64_to_unix_time( time ); + return n64_gmtime_r( &seconds, time ); +} + +n64_time_t n64_time( n64_time_t *arg ) { + n64_tm now; + register const n64_time_t ts = n64_rtc_read( &now ) ? n64_to_unix_time( &now ) : (n64_time_t)-1; + if( arg ) *arg = ts; + return ts; +} + +n64_time_t n64_mktime( n64_tm *time ) { + const n64_time_t ts = n64_to_unix_time( time ); + n64_gmtime_r( &ts, time ); + return ts; +} + +static inline void setMonthAndDay( long long year, int yday, int *mon, int *mday ) { + if( yday < 31 ) { + *mon = 0; + *mday = yday + 1; + return; + } else if( yday < 59 ) { + *mon = 1; + *mday = yday - 30; + return; + } else if( yday == 59 ) { + if( is_leap_year( year ) ) { + *mon = 1; + *mday = 29; + } else { + *mon = 2; + *mday = 1; + } + return; + } + + if( is_leap_year( year ) ) yday--; + if( yday >= 334 ) { + *mon = 11; + *mday = yday - 333; + } else for( int month = (yday + 4) >> 5;; month++ ) { + if( yday < s_yday_table[month+1] ) { + *mon = month; + *mday = yday + 1 - s_yday_table[month]; + return; + } + } +} + +n64_tm *n64_gmtime_r( const n64_time_t *timer, n64_tm *buf ) { + buf->tm_isdst = 0; + + if( *timer >= 67768036191676800ll ) { + // set to maximum valid n64_tm + buf->tm_sec = 59; + buf->tm_min = 59; + buf->tm_hour = 23; + buf->tm_mday = 31; + buf->tm_mon = 11; + buf->tm_year = 0x7FFFFFFF; + buf->tm_wday = 3; + buf->tm_yday = 364; + return NULL; + } else if( *timer < -67768040609721748ll ) { + // set to minimum valid n64_tm + buf->tm_sec = 0; + buf->tm_min = 0; + buf->tm_hour = 0; + buf->tm_mday = 1; + buf->tm_mon = 0; + buf->tm_year = -0x80000000; + buf->tm_wday = 4; + buf->tm_yday = 0; + return NULL; + } + + long long yearGuess = 1970ll + (*timer / 31556952ll); + long long yearTs = year_to_unix_time( yearGuess ); + if( *timer < yearTs ) { + do { + yearTs = year_to_unix_time( --yearGuess ); + } while( *timer < yearTs ); + } else if( yearGuess > 0x7fffffffll ) { + yearGuess = 0x7fffffffll; + yearTs = year_to_unix_time( yearGuess ); + } else { + register const long long nextYearTs = year_to_unix_time( yearGuess + 1ll ); + if( *timer >= nextYearTs ) { + yearGuess++; + yearTs = nextYearTs; + } + } + + int ts = (int)(*timer - yearTs); + buf->tm_year = (int)(yearGuess - 1900ll); + buf->tm_yday = ts / (int)N64_SECONDS_IN_DAY; + ts %= (int)N64_SECONDS_IN_DAY; + setMonthAndDay( yearGuess, buf->tm_yday, &buf->tm_mon, &buf->tm_mday ); + buf->tm_hour = ts / (int)N64_SECONDS_IN_HOUR; + ts %= (int)N64_SECONDS_IN_HOUR; + buf->tm_min = ts / (int)N64_SECONDS_IN_MINUTE; + buf->tm_sec = ts % (int)N64_SECONDS_IN_MINUTE; + buf->tm_wday = (int)((4ll + (*timer / N64_SECONDS_IN_DAY)) % 7ll); + if( buf->tm_wday < 0 ) buf->tm_wday += 7; + + return buf; +} + +static inline n64_bool strftime_push_text( char *str, unsigned int *i, unsigned int count, const char *text ) { + for( ; *text; (*i)++ ) { + if( *i >= count - 1 ) return false; + str[*i] = *(text++); + } + return true; +} + +static inline n64_bool strftime_push_number( char *str, unsigned int *i, unsigned int count, long long n ) { + if( n < 0ll ) { + str[(*i)++] = '-'; + n = -n; + } + + char buff[17]; + unsigned int j = 0u; + do { + buff[j++] = '0' + (char)(n % 10ll); + n /= 10ll; + } while( n ); + + if( *i + j < *i || *i + j >= count ) return false; + for( int k = (int)j - 1; k >= 0; k-- ) { + str[(*i)++] = buff[k]; + } + + return true; +} + +static inline int get_wby_days( int day, int wday ) { + return day - ((day - wday + 382) % 7) + 3; +} + +static long long get_wby_years( long long year, int day, int wday ) { + if( get_wby_days( day, wday ) < 0 ) { + return year - 1ll; + } else if( get_wby_days( day - (is_leap_year( year ) ? 366ll : 365ll), wday ) > 0 ) { + return year + 1ll; + } else { + return year; + } +} + +static int get_wby_weeks( long long year, int day, int wday ) { + int days = get_wby_days( day, wday ); + if( days < 0 ) { + days = get_wby_days( day + (is_leap_year( year ) ? 366ll : 365ll), wday ); + } else { + const int days2 = get_wby_days( day - (is_leap_year( year ) ? 366ll : 365ll), wday ); + if( days2 > 0 ) days = days2; + } + + return 1 + (days / 7); +} + +unsigned int n64_strftime_internal( char *str, unsigned int count, const char *format, const n64_tm *tp ) { + const long long year = (long long)tp->tm_year + 1900ll; + for( unsigned int i = 0; i < count; format++ ) { + if( !*format ) { + str[i] = '\0'; + return i; + } else if( *format != '%' ) { + str[i++] = *format; + continue; + } + + format++; + const n64_bool alt = (*format == '0'); + if( *format == 'E' || alt ) { + format++; + } + + switch( *format ) { + case '%': + str[i++] = '%'; + break; + case 'n': + str[i++] = '\n'; + break; + case 't': + str[i++] = '\t'; + break; + case 'Y': + if( !strftime_push_number( str, &i, count, year ) ) return 0; + break; + case 'y': + if( i < count - 2 ) { + register int yy = (int)(year % 100ll); + if( yy < 0 ) yy += 100; + str[i++] = '0' + (char)(yy / 10); + str[i++] = '0' + (char)(yy % 10); + break; + } else return 0; + case 'C': + if( !strftime_push_number( str, &i, count, year / 100ll ) ) return 0; + break; + case 'G': + if( !strftime_push_number( str, &i, count, get_wby_years(year, tp->tm_yday, tp->tm_wday ) ) ) return 0; + break; + case 'g': + if( i < count - 2 ) { + register int wyear = (int)(get_wby_years(year, tp->tm_yday, tp->tm_wday ) % 100ll); + if( wyear < 0 ) wyear += 100; + str[i++] = '0' + (char)(wyear / 10); + str[i++] = '0' + (char)(wyear % 10); + break; + } else return 0; + break; + case 'b': + case 'h': + if( i >= count - 3 ) return 0; + str[i++] = s_monthNames[tp->tm_mon][0]; + str[i++] = s_monthNames[tp->tm_mon][1]; + str[i++] = s_monthNames[tp->tm_mon][2]; + break; + case 'B': + if( !strftime_push_text( str, &i, count, s_monthNames[tp->tm_mon] ) ) return 0; + break; + case 'm': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)((tp->tm_mon + 1) / 10); + str[i++] = '0' + (char)((tp->tm_mon + 1) % 10); + break; + case 'U': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)((tp->tm_yday - tp->tm_wday + 7) / 70); + str[i++] = '0' + (char)(((tp->tm_yday - tp->tm_wday + 7) / 7) % 10); + break; + case 'W': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)((tp->tm_yday - ((tp->tm_wday + 6) % 7) + 7) / 70); + str[i++] = '0' + (char)(((tp->tm_yday - ((tp->tm_wday + 6) % 7) + 7) / 7) % 10); + break; + case 'V': + if( i < count - 2 ) { + register const int week = get_wby_weeks(year, tp->tm_yday, tp->tm_wday ); + str[i++] = '0' + (char)(week / 10); + str[i++] = '0' + (char)(week % 10); + break; + } else return 0; + case 'j': + if( i >= count - 3 ) return 0; + str[i++] = '0' + (char)(tp->tm_yday / 100); + str[i++] = '0' + (char)((tp->tm_yday / 10) % 10); + str[i++] = '0' + (char)(tp->tm_yday % 10); + break; + case 'd': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)(tp->tm_mday / 10); + str[i++] = '0' + (char)(tp->tm_mday % 10); + break; + case 'e': + if( i >= count - 2 ) return 0; + if( alt || tp->tm_mday > 9) { + str[i++] = '0' + (char)(tp->tm_mday / 10); + str[i++] = '0' + (char)(tp->tm_mday % 10); + } else { + str[i++] = ' '; + str[i++] = '0' + (char)tp->tm_mday; + } + break; + case 'a': + if( i >= count - 3 ) return 0; + str[i++] = s_weekdayNames[tp->tm_wday][0]; + str[i++] = s_weekdayNames[tp->tm_wday][1]; + str[i++] = s_weekdayNames[tp->tm_wday][2]; + break; + case 'A': + if( !strftime_push_text( str, &i, count, s_weekdayNames[tp->tm_wday] ) ) return 0; + break; + case 'w': + str[i++] = '0' + (char)tp->tm_wday; + break; + case 'u': + str[i++] = tp->tm_wday ? ('0' + (char)tp->tm_wday) : '7'; + break; + case 'H': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)(tp->tm_hour / 10); + str[i++] = '0' + (char)(tp->tm_hour % 10); + break; + case 'I': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)((1 + ((tp->tm_hour + 11) % 12)) / 10); + str[i++] = '0' + (char)((1 + ((tp->tm_hour + 11) % 12)) % 10); + break; + case 'M': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)(tp->tm_min / 10); + str[i++] = '0' + (char)(tp->tm_min % 10); + break; + case 'S': + if( i >= count - 2 ) return 0; + str[i++] = '0' + (char)(tp->tm_sec / 10); + str[i++] = '0' + (char)(tp->tm_sec % 10); + break; + case 'p': + if( i >= count - 2 ) return 0; + str[i++] = (tp->tm_hour < 12) ? 'A' : 'P'; + str[i++] = 'M'; + break; + case 'c': + if( i < count - 21 ) { + const unsigned int j = n64_strftime_internal( &str[i], count - (unsigned int)i, "%a %b %e %H:%M:%S %Y", tp ); + if( !j ) return 0; + i += j; + break; + } else return 0; + case 'x': + case 'D': + if( i >= count - 8 ) return 0; + i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%m/%d/%y", tp ); + break; + case 'X': + case 'T': + if( i >= count - 8 ) return 0; + i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%H:%M:%S", tp ); + break; + case 'F': + if( i < count - 7 ) { + const unsigned int j = n64_strftime_internal( &str[i], count - (unsigned int)i, "%Y-%m-%d", tp ); + if( !j ) return 0; + i += j; + break; + } else return 0; + case 'r': + if( i >= count - 11 ) return 0; + i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%I:%M:%S %p", tp ); + break; + case 'R': + if( i >= count - 5 ) return 0; + i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%H:%M", tp ); + break; + default: + return 0; + } + } + + return 0; +} + +unsigned int n64_strftime( char *str, unsigned int count, const char *format, const n64_tm *tp ) { + n64_tm time; + n64_memcpy( &time, tp, sizeof( n64_tm ) ); + n64_mktime( &time ); + + str[count - 1] = '\0'; + return n64_strftime_internal( str, count, format, &time ); +} diff --git a/lib/n64-libc/n64-time.h b/lib/n64-libc/n64-time.h new file mode 100644 index 00000000..350ba0ff --- /dev/null +++ b/lib/n64-libc/n64-time.h @@ -0,0 +1,129 @@ +#ifndef N64_STDLIB_N64_TIME_H_ +#define N64_STDLIB_N64_TIME_H_ + +#include "n64-stddef.h" +#include "n64-stdbool.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef long long n64_time_t; +typedef unsigned int n64_clock_t; + +// Don't expect this to be accurate on emulator! +#define N64_CLOCKS_PER_SEC 46875000u + +#ifndef CLOCKS_PER_SEC + #define CLOCKS_PER_SEC N64_CLOCKS_PER_SEC +#endif + +typedef struct { + int tm_sec; // seconds after the minute [0-59] + int tm_min; // minutes after the hour [0-59] + int tm_hour; // hours since midnight [0-23] + int tm_mday; // day of the month [1-31] + int tm_mon; // months since January [0-11] + int tm_year; // years since 1900 (remember the epoch is 1900, not AD/CE!) + int tm_wday; // days since Sunday [0-6] + int tm_yday; // days since January 1 [0-365] + int tm_isdst; // Field included for the sake of matching the C STL. Has no effect. +} n64_tm; + +/********************************************************************************** +* New functions made for this library that are not part of the C standard library * +**********************************************************************************/ + +/* !NOTICE! + * Some flashcarts have buggy RTC implementations that do not correctly set status + * registers. To handle these cases, after initializing the RTC, we need to wait a + * short time (~20ms) before requesting the time to make sure the RTC has had time + * to initialize, since we can't rely on the status registers to know when it's + * ready. + * + * Some helper functions are provided to help handle this case as laglessly as + * possible. You are NOT required to call the init functions, and can simply use + * the n64_rtc_read and n64_time functions without any init call; however, this + * will result in a short stutter when playing on console as the library will wait + * 20ms on the very first call to either n64_rtc_read or n64_time to ensure we + * don't read garbage data from buggy flashcarts. + * + * To avoid this stutter on console, you can call n64_rtc_init() at the start of + * your program to initialize the RTC at this point. Then, the next call to + * n64_rtc_read or n64_time will only wait if it has not yet been 20ms since + * n64_rtc_init was called. + * + * To determine whether 20ms has passed, a comparison is done to the COUNT + * register, but since this overflows every ~90 seconds, if the very first call to + * n64_rtc_read or n64_time occurs precisely at this time, it will incorrectly + * believe that 20ms have not passed yet. To avoid this scenario, you can call + * n64_rtc_ready( NULL ) periodically (such as at the start of each frame) to check + * if the 20ms timer has finished, which will mark it as complete if it has, so you + * will no longer need to worry about the clock counter overflowing. + */ + +/* Initializes the RTC clock. Returns true if the RTC is supported, and false + * otherwise. See the comment above for more information about what this call + * is used for. + */ +n64_bool n64_rtc_init(); + +/* Returns true if it has been at least 20ms since n64_rtc_init() was called + * or if it is being played on emulator. + */ +__attribute__((access(write_only, 1))) +n64_bool n64_rtc_ready( n64_bool *good ); + +/* New function that directly puts the current time in an n64_tm struct. + * + * Because the RTC already provides the time in a tm-like format, this is + * much faster than using n64_time followed by n64_gmtime_r. + * + * Note that the n64_tm struct still matches the C standard library's tm struct-- + * of particular note is that tm_year is the number of years since 1900 + * + * If the flashcart or emulator does not support RTC, returns false + */ +__attribute__((nonnull(1), access(write_only, 1))) +n64_bool n64_rtc_read( n64_tm *time ); + +/* Adds `seconds` to `time` and normalizes it. Returns back the `time` pointer. + * If this would cause the time to overflow, it is capped at its min/max value. + */ +__attribute__((nonnull(1), access(read_write, 1), returns_nonnull)) +n64_tm *n64_tm_add( n64_tm *time, n64_time_t seconds ); + +/* Like n64_mktime, but doesn't normalize the input time */ +__attribute__((pure, nonnull(1), warn_unused_result)) +n64_time_t n64_to_unix_time( const n64_tm *time ); + +/********************************************************************************** +* C standard library functions * +**********************************************************************************/ + +__attribute__((always_inline, warn_unused_result)) +static inline n64_clock_t n64_clock() { + n64_clock_t count; + asm volatile( "mfc0 %0, $9": "=r"( count ) ); + return count; +} + +__attribute__((access(write_only, 1))) +n64_time_t n64_time( n64_time_t *arg ); + +// Assumes UTC timezone +__attribute__((nonnull(1), access(read_write, 1))) +n64_time_t n64_mktime( n64_tm *time ); + +__attribute__((nonnull(1, 2), access(write_only, 2))) +n64_tm *n64_gmtime_r( const n64_time_t *timer, n64_tm *buf ); + +// Assumes UTC timezone and C locale +__attribute__((format(strftime, 3, 0), nonnull(1, 3), access(write_only, 1))) +unsigned int n64_strftime( char *str, unsigned int count, const char *format, const n64_tm *tp ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/n64-libc/n64-util.c b/lib/n64-libc/n64-util.c new file mode 100644 index 00000000..b3e9472f --- /dev/null +++ b/lib/n64-libc/n64-util.c @@ -0,0 +1,24 @@ +#include "n64-util.h" +#include "n64-stdbool.h" + +__attribute__((noinline)) +static n64_bool n64_enable_interrupts() { + register n64_bool status; + asm volatile( ".set noat \n\t .align 16 \n\t mfc0 $1, $12 \n\t andi %0, $1, 1 \n\t ori $1, $1, 1 \n\t mtc0 $1, $12" : "=r"( status ) :: "at" ); + return status; +} + +__attribute__((noinline)) +static n64_bool n64_disable_interrupts() { + register n64_bool status; + asm volatile( ".set noat \n\t .align 16 \n\t mfc0 $1, $12 \n\t andi %0, $1, 1 \n\t subu $1, $1, %0 \n\t mtc0 $1, $12" : "=r"( status ) :: "at" ); + return status; +} + +n64_bool n64_set_interrupts( n64_bool enable ) { + return enable ? n64_enable_interrupts() : n64_disable_interrupts(); +} + +void n64_await_pi_io() { + while( *((volatile unsigned int*)0xa4600010u) & 0x3u ); +} diff --git a/lib/n64-libc/n64-util.h b/lib/n64-libc/n64-util.h new file mode 100644 index 00000000..e7432802 --- /dev/null +++ b/lib/n64-libc/n64-util.h @@ -0,0 +1,62 @@ +#ifndef N64_STDLIB_N64_UTIL_H_ +#define N64_STDLIB_N64_UTIL_H_ + +#include "n64-stdbool.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Enables or disables interrupts and returns whether interrupts were previously enabled or not + * + * Example usage: + * const n64_bool intr = n64_set_interrupts( false ); + * // Do critical section work here + * n64_set_interrupts( intr ); + */ +n64_bool n64_set_interrupts( n64_bool enable ); + +/* Waits for PI I/O operations (like a DMA) to complete. You should disable interrupts before calling this. + * + * Example usage: + * const n64_bool intr = n64_set_interrupts( false ); + * n64_await_pi_io(); + * // Now you can safely do a PI write here + * n64_set_interrupts( intr ); + */ +void n64_await_pi_io(); + +__attribute__((always_inline)) +static inline void n64_memory_barrier() { + asm volatile( "":::"memory" ); +} + +__attribute__((const, always_inline)) +static inline unsigned int n64_bit_cast_ftoi( float x ) { + union { float f; unsigned int i; } b = { x }; + return b.i; +} + +__attribute__((const, always_inline)) +static inline float n64_bit_cast_itof( unsigned int x ) { + union { unsigned int i; float f; } b = { x }; + return b.f; +} + +__attribute__((const, always_inline)) +static inline unsigned long long n64_bit_cast_dtol( double x ) { + union { double f; unsigned long long i; } b = { x }; + return b.i; +} + +__attribute__((const, always_inline)) +static inline double n64_bit_cast_ltod( unsigned long long x ) { + union { unsigned long long i; double f; } b = { x }; + return b.f; +} + +#ifdef __cplusplus +} +#endif + +#endif