From 82736c610af9edc9ab3236e123a85b3dcecc15ea Mon Sep 17 00:00:00 2001
From: a <a>
Date: Mon, 23 Jun 2025 20:50:28 -0400
Subject: [PATCH] git subrepo clone https://gitlab.com/mpharoah/n64-libc
 lib/n64-libc

subrepo:
  subdir:   "lib/n64-libc"
  merged:   "70270d60"
upstream:
  origin:   "https://gitlab.com/mpharoah/n64-libc"
  branch:   "main"
  commit:   "70270d60"
git-subrepo:
  version:  "0.4.9"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "4f60dd7"
---
 lib/n64-libc/.gitignore             |    2 +
 lib/n64-libc/.gitrepo               |   12 +
 lib/n64-libc/README.md              |    5 +
 lib/n64-libc/n64-alloca.h           |    3 +
 lib/n64-libc/n64-assert.c           |    9 +
 lib/n64-libc/n64-assert.h           |   32 +
 lib/n64-libc/n64-ctype.c            |   20 +
 lib/n64-libc/n64-ctype.h            |   89 +++
 lib/n64-libc/n64-fenv.c             |   72 ++
 lib/n64-libc/n64-fenv.h             |   39 +
 lib/n64-libc/n64-float.h            |  111 +++
 lib/n64-libc/n64-libc.h             |   15 +
 lib/n64-libc/n64-math.c             |  558 +++++++++++++++
 lib/n64-libc/n64-math.h             |  275 +++++++
 lib/n64-libc/n64-numbers.h          |   43 ++
 lib/n64-libc/n64-stdbool.h          |   34 +
 lib/n64-libc/n64-stdckdint.h        |    8 +
 lib/n64-libc/n64-stddef.h           |   15 +
 lib/n64-libc/n64-stdio-bprint.c.inc |  432 +++++++++++
 lib/n64-libc/n64-stdio-format.c.inc | 1030 +++++++++++++++++++++++++++
 lib/n64-libc/n64-stdio-ryu.c.inc    |  910 +++++++++++++++++++++++
 lib/n64-libc/n64-stdio.c            |   67 ++
 lib/n64-libc/n64-stdio.h            |   65 ++
 lib/n64-libc/n64-stdlib.c           |  155 ++++
 lib/n64-libc/n64-stdlib.h           |   67 ++
 lib/n64-libc/n64-string.c           |  356 +++++++++
 lib/n64-libc/n64-string.h           |  111 +++
 lib/n64-libc/n64-time.c             |  685 ++++++++++++++++++
 lib/n64-libc/n64-time.h             |  129 ++++
 lib/n64-libc/n64-util.c             |   24 +
 lib/n64-libc/n64-util.h             |   62 ++
 31 files changed, 5435 insertions(+)
 create mode 100644 lib/n64-libc/.gitignore
 create mode 100644 lib/n64-libc/.gitrepo
 create mode 100644 lib/n64-libc/README.md
 create mode 100644 lib/n64-libc/n64-alloca.h
 create mode 100644 lib/n64-libc/n64-assert.c
 create mode 100644 lib/n64-libc/n64-assert.h
 create mode 100644 lib/n64-libc/n64-ctype.c
 create mode 100644 lib/n64-libc/n64-ctype.h
 create mode 100644 lib/n64-libc/n64-fenv.c
 create mode 100644 lib/n64-libc/n64-fenv.h
 create mode 100644 lib/n64-libc/n64-float.h
 create mode 100644 lib/n64-libc/n64-libc.h
 create mode 100644 lib/n64-libc/n64-math.c
 create mode 100644 lib/n64-libc/n64-math.h
 create mode 100644 lib/n64-libc/n64-numbers.h
 create mode 100644 lib/n64-libc/n64-stdbool.h
 create mode 100644 lib/n64-libc/n64-stdckdint.h
 create mode 100644 lib/n64-libc/n64-stddef.h
 create mode 100644 lib/n64-libc/n64-stdio-bprint.c.inc
 create mode 100644 lib/n64-libc/n64-stdio-format.c.inc
 create mode 100644 lib/n64-libc/n64-stdio-ryu.c.inc
 create mode 100644 lib/n64-libc/n64-stdio.c
 create mode 100644 lib/n64-libc/n64-stdio.h
 create mode 100644 lib/n64-libc/n64-stdlib.c
 create mode 100644 lib/n64-libc/n64-stdlib.h
 create mode 100644 lib/n64-libc/n64-string.c
 create mode 100644 lib/n64-libc/n64-string.h
 create mode 100644 lib/n64-libc/n64-time.c
 create mode 100644 lib/n64-libc/n64-time.h
 create mode 100644 lib/n64-libc/n64-util.c
 create mode 100644 lib/n64-libc/n64-util.h
diff --git a/lib/n64-libc/.gitignore b/lib/n64-libc/.gitignore
new file mode 100644
index 00000000..7cac32ea
--- /dev/null
+++ b/lib/n64-libc/.gitignore
@@ -0,0 +1,2 @@
+/.vscode
+*.kate-swp
diff --git a/lib/n64-libc/.gitrepo b/lib/n64-libc/.gitrepo
new file mode 100644
index 00000000..59b5db03
--- /dev/null
+++ b/lib/n64-libc/.gitrepo
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme
+;
+[subrepo]
+	remote = https://gitlab.com/mpharoah/n64-libc
+	branch = main
+	commit = 70270d60f9b13d3cd896eaa8aa0a043992a823fd
+	parent = 4c89989f6f2ef03e71fc89e33e4e94fa067fda04
+	method = merge
+	cmdver = 0.4.9
diff --git a/lib/n64-libc/README.md b/lib/n64-libc/README.md
new file mode 100644
index 00000000..e834f6e7
--- /dev/null
+++ b/lib/n64-libc/README.md
@@ -0,0 +1,5 @@
+# N64 libc
+
+Provides a partial implementation of the C standard library for N64
+
+WORK IN PROGRESS
diff --git a/lib/n64-libc/n64-alloca.h b/lib/n64-libc/n64-alloca.h
new file mode 100644
index 00000000..6e1d578a
--- /dev/null
+++ b/lib/n64-libc/n64-alloca.h
@@ -0,0 +1,3 @@
+#ifndef n64_alloca
+#define n64_alloca( size ) __builtin_alloca_with_align( size, 64 )
+#endif
diff --git a/lib/n64-libc/n64-assert.c b/lib/n64-libc/n64-assert.c
new file mode 100644
index 00000000..458afd70
--- /dev/null
+++ b/lib/n64-libc/n64-assert.c
@@ -0,0 +1,9 @@
+#include "n64-assert.h"
+
+#include "n64-stdlib.h"
+#include "n64-stdio.h"
+
+void __n64_assert_fail( const char *assertion, const char *file, unsigned int line, const char *fcn ) {
+	n64_printf( "%s:%u: %s: Assertion `%s' failed.\n", file, line, fcn, assertion );
+	n64_abort();
+}
diff --git a/lib/n64-libc/n64-assert.h b/lib/n64-libc/n64-assert.h
new file mode 100644
index 00000000..1d286b4c
--- /dev/null
+++ b/lib/n64-libc/n64-assert.h
@@ -0,0 +1,32 @@
+#ifndef N64_STDLIB_N64_ASSERT_H_
+#define N64_STDLIB_N64_ASSERT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef NDEBUG
+#define n64_assert( expr ) (void)(0)
+#else
+
+__attribute__((noreturn))
+void __n64_assert_fail( const char *assertion, const char *file, unsigned int line, const char *fcn );
+
+#define n64_assert( expr ) ((expr) ? (void)(0) : __n64_assert_fail( #expr, __FILE__, __LINE__, __func__ ))
+#endif
+
+#ifdef __cplusplus
+	#define n64_static_assert( expr ) static_assert( expr )
+#else
+	#if __STDC_VERSION__ >= 202311L
+		#define n64_static_assert( expr ) static_assert( expr )
+	#else
+		#define n64_static_assert( expr ) _Static_assert( expr )
+	#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-ctype.c b/lib/n64-libc/n64-ctype.c
new file mode 100644
index 00000000..f154d039
--- /dev/null
+++ b/lib/n64-libc/n64-ctype.c
@@ -0,0 +1,20 @@
+#include "n64-ctype.h"
+
+unsigned short __n64_internal_charflag_table[256] = {
+	/* 0x00 - 0x08 : control codes */	0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020,
+	/* 0x09        : tab */				0x0E0,
+	/* 0x0A - 0x0D : whitespaces */		0x060, 0x060, 0x060, 0x060,
+	/* 0x0E - 0x1F : control codes */	0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020, 0x020,
+	/* 0x20        : space */			0x1C0,
+	/* 0x21 - 0x2F : punctuation */		0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300,
+	/* 0x30 - 0x39 : digits */			0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105,
+	/* 0x3A - 0x40 : punctuation */		0x300, 0x300, 0x300, 0x300, 0x300, 0x300, 0x300,
+	/* 0x41 - 0x46 : hex letters (U) */	0x116, 0x116, 0x116, 0x116, 0x116, 0x116,
+	/* 0x47 - 0x5A : letters (U) */		0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112, 0x112,
+	/* 0x5B - 0x60 : punctuation */		0x300, 0x300, 0x300, 0x300, 0x300, 0x300,
+	/* 0x61 - 0x66 : hex letters (L) */ 0x10E, 0x10E, 0x10E, 0x10E, 0x10E, 0x10E,
+	/* 0x67 - 0x7A : letters (L) */		0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A, 0x10A,
+	/* 0x7B - 0x7E : punctuation */		0x300, 0x300, 0x300, 0x300,
+	/* 0x7F        : backspace */		0x020,
+	/* 0x80 - 0xFF : non-ASCII */		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
diff --git a/lib/n64-libc/n64-ctype.h b/lib/n64-libc/n64-ctype.h
new file mode 100644
index 00000000..e7dec653
--- /dev/null
+++ b/lib/n64-libc/n64-ctype.h
@@ -0,0 +1,89 @@
+#ifndef N64_STDLIB_N64_CTYPE_H_
+#define N64_STDLIB_N64_CTYPE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern unsigned short __n64_internal_charflag_table[256];
+
+__attribute__((const, always_inline))
+static inline int __n64_internal_has_charflag( int ch, unsigned short flag ) {
+	return __n64_internal_charflag_table[(unsigned int)ch & 0xFFu] & (int)flag;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isalnum( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x003 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isalpha( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x002 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_islower( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x008 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isupper( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x010 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isdigit( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x001 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isxdigit( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x004 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_iscntrl( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x020 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isgraph( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x203 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isspace( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x040 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isblank( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x080 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_isprint( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x100 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_ispunct( int ch ) {
+	return __n64_internal_has_charflag( ch, 0x200 );
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_tolower( int ch ) {
+	return (ch >= (int)'A' && ch <= (int)'Z') ? (ch + 32) : ch;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_toupper( int ch ) {
+	return (ch >= (int)'a' && ch <= (int)'z') ? (ch - 32) : ch;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-fenv.c b/lib/n64-libc/n64-fenv.c
new file mode 100644
index 00000000..8f8db766
--- /dev/null
+++ b/lib/n64-libc/n64-fenv.c
@@ -0,0 +1,72 @@
+#include "n64-fenv.h"
+
+int n64_feclearexcept( int excepts ) {
+	if( excepts & ~FE_ALL_EXCEPT ) return excepts;
+	register unsigned int fcr31 = __builtin_mips_get_fcsr();
+	fcr31 &= ~((unsigned int)excepts << 2);
+	__builtin_mips_set_fcsr( fcr31 );
+	return ((int)__builtin_mips_get_fcsr() >> 2) & excepts;
+}
+
+int n64_fetestexcept( int excepts ) {
+	return (int)(__builtin_mips_get_fcsr() >> 2) & FE_ALL_EXCEPT & excepts;
+}
+
+int n64_feraiseexcept( int excepts ) {
+	if( excepts & ~FE_ALL_EXCEPT ) return excepts;
+	register unsigned int fcr31 = __builtin_mips_get_fcsr();
+	fcr31 |= (unsigned int)excepts << 12;
+	__builtin_mips_set_fcsr( fcr31 );
+	return (((int)__builtin_mips_get_fcsr() >> 2) & excepts) ^ excepts;
+}
+
+int n64_fegetexceptflag( n64_fexcept_t *flagp, int excepts ) {
+	if( excepts & ~FE_ALL_EXCEPT ) return excepts;
+	*flagp = (n64_fexcept_t)((__builtin_mips_get_fcsr() >> 2) & (unsigned int)excepts);
+	return 0;
+}
+
+int n64_fesetexceptflag( const n64_fexcept_t *flagp, int excepts ) {
+	if( excepts & ~FE_ALL_EXCEPT ) return excepts;
+	register unsigned int fcr31 = __builtin_mips_get_fcsr();
+	fcr31 &= ~((unsigned int)excepts << 2);
+	fcr31 |= ((unsigned int)*flagp & (unsigned int)excepts) << 2;
+	__builtin_mips_set_fcsr( fcr31 );
+	return (((int)__builtin_mips_get_fcsr() >> 2) & excepts) ^ ((int)*flagp & excepts);
+}
+
+int n64_fesetround( int round ) {
+	if( round < 0 || round > 3 ) return round;
+	register unsigned int fcr31 = __builtin_mips_get_fcsr() & ~3u;
+	__builtin_mips_set_fcsr( fcr31 | (unsigned int)round );
+	return (int)(__builtin_mips_get_fcsr() & 3u) ^ round;
+}
+
+int n64_fegetround() {
+	return (unsigned int)__builtin_mips_get_fcsr() & 3u;
+}
+
+int n64_fegetenv( n64_fenv_t* envp ) {
+	*envp = __builtin_mips_get_fcsr();
+	return 0;
+}
+
+int n64_fesetenv( const n64_fenv_t* envp ) {
+	register const unsigned int c = __builtin_mips_get_fcsr() & 0x00800000u;
+	__builtin_mips_set_fcsr( (*envp & 0x01000FFFu) | c );
+	return (int)((*envp ^ __builtin_mips_get_fcsr()) & 0x01000FFFu);
+}
+
+int n64_feholdexcept( n64_fenv_t* envp ) {
+	*envp = (n64_fenv_t)__builtin_mips_get_fcsr() & 0x0103FFFFu;
+	__builtin_mips_set_fcsr( *envp & 0x01800003u );
+	return (int)(__builtin_mips_get_fcsr() & 0x0003FFFCu);
+}
+
+int n64_feupdateenv( const n64_fenv_t* envp ) {
+	register const unsigned int e = __builtin_mips_get_fcsr() & 0x0083F000u;
+	register const unsigned int c = e & 0x00800000u;
+	__builtin_mips_set_fcsr( (*envp & 0x01000FFFu) | c );
+	__builtin_mips_set_fcsr( (*envp & 0x01000FFFu) | e );
+	return (int)((*envp ^ __builtin_mips_get_fcsr()) & 0x0103FFFFu);
+}
diff --git a/lib/n64-libc/n64-fenv.h b/lib/n64-libc/n64-fenv.h
new file mode 100644
index 00000000..ac1c4a0f
--- /dev/null
+++ b/lib/n64-libc/n64-fenv.h
@@ -0,0 +1,39 @@
+#ifndef N64_STDLIB_N64_FENV_H_
+#define N64_STDLIB_N64_FENV_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FE_DIVBYZERO 0x08
+#define FE_INEXACT 0x01
+#define FE_INVALID 0x10
+#define FE_OVERFLOW 0x04
+#define FE_UNDERFLOW 0x02
+#define FE_ALL_EXCEPT 0x1F
+
+#define FE_DOWNWARD 0x3
+#define FE_TONEAREST 0x0
+#define FE_TOWARDZERO 0x1
+#define FE_UPWARD 0x2
+
+typedef unsigned int n64_fenv_t;
+typedef unsigned char n64_fexcept_t;
+
+int n64_feclearexcept( int excepts );
+int n64_fetestexcept( int excepts ) __attribute__((warn_unused_result));
+int n64_feraiseexcept( int excepts );
+int n64_fegetexceptflag( n64_fexcept_t *flagp, int excepts ) __attribute__((nonnull(1), access(write_only, 1)));
+int n64_fesetexceptflag( const n64_fexcept_t *flagp, int excepts ) __attribute__((nonnull(1)));
+int n64_fesetround( int round );
+int n64_fegetround() __attribute__((warn_unused_result));
+int n64_fegetenv( n64_fenv_t* envp ) __attribute__((nonnull(1), access(write_only, 1)));
+int n64_fesetenv( const n64_fenv_t* envp ) __attribute__((nonnull(1)));
+int n64_feholdexcept( n64_fenv_t* envp ) __attribute__((nonnull(1), access(write_only, 1)));
+int n64_feupdateenv( const n64_fenv_t* envp ) __attribute__((nonnull(1)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-float.h b/lib/n64-libc/n64-float.h
new file mode 100644
index 00000000..5a7497cc
--- /dev/null
+++ b/lib/n64-libc/n64-float.h
@@ -0,0 +1,111 @@
+#ifndef FLT_RADIX
+	#define FLT_RADIX 2
+#endif
+
+#ifndef FLT_DECIMAL_DIG
+	#define FLT_DECIMAL_DIG 9
+#endif
+
+#ifndef DBL_DECIMAL_DIG
+	#define DBL_DECIMAL_DIG 17
+#endif
+
+#ifndef FLT_MIN
+	#define FLT_MIN 1.1754944e-38f
+#endif
+
+#ifndef DBL_MIN
+	#define DBL_MIN 2.2250738585072014e-308
+#endif
+
+#ifndef FLT_TRUE_MIN
+	#define FLT_TRUE_MIN 1e-45f
+#endif
+
+#ifndef DBL_TRUE_MIN
+	#define DBL_TRUE_MIN 5e-324
+#endif
+
+#ifndef FLT_MAX
+	#define FLT_MAX 3.4028235e+38f
+#endif
+
+#ifndef DBL_MAX
+	#define DBL_MAX 1.7976931348623157e+308
+#endif
+
+#ifndef FLT_EPSILON
+	#define FLT_EPSILON 1.1920929e-07f
+#endif
+
+#ifndef DBL_EPSILON
+	#define DBL_EPSILON 2.220446049250313e-16
+#endif
+
+#ifndef FLT_DIG
+	#define FLT_DIG 6
+#endif
+
+#ifndef DBL_DIG
+	#define DBL_DIG 15
+#endif
+
+#ifndef FLT_MANT_DIG
+	#define FLT_MANT_DIG 24
+#endif
+
+#ifndef DBL_MANT_DIG
+	#define DBL_MANT_DIG 53
+#endif
+
+#ifndef FLT_MIN_EXP
+	#define FLT_MIN_EXP -125
+#endif
+
+#ifndef DBL_MIN_EXP
+	#define DBL_MIN_EXP -1021
+#endif
+
+#ifndef FLT_MIN_10_EXP
+	#define FLT_MIN_10_EXP -37
+#endif
+
+#ifndef DBL_MIN_10_EXP
+	#define DBL_MIN_10_EXP -307
+#endif
+
+#ifndef FLT_MAX_EXP
+	#define FLT_MAX_EXP 128
+#endif
+
+#ifndef DBL_MAX_EXP
+	#define DBL_MAX_EXP 1024
+#endif
+
+#ifndef FLT_MAX_10_EXP
+	#define FLT_MAX_10_EXP 38
+#endif
+
+#ifndef DBL_MAX_10_EXP
+	#define DBL_MAX_10_EXP 308
+#endif
+
+#ifndef FLT_EVAL_METHOD
+	#define FLT_EVAL_METHOD 0
+#endif
+
+#ifndef FLT_HAS_SUBNORM
+	#define FLT_HAS_SUBNORM 1
+#endif
+
+#ifndef DBL_HAS_SUBNORM
+	#define DBL_HAS_SUBNORM 1
+#endif
+
+#ifndef DECIMAL_DIG
+	#if defined(_ABIO64) || defined(_ABIO32)
+		#define DECIMAL_DIG DBL_DECIMAL_DIG
+	#else
+		#define DECIMAL_DIG 36
+	#endif
+#endif
diff --git a/lib/n64-libc/n64-libc.h b/lib/n64-libc/n64-libc.h
new file mode 100644
index 00000000..b72bd328
--- /dev/null
+++ b/lib/n64-libc/n64-libc.h
@@ -0,0 +1,15 @@
+#include "n64-alloca.h"
+#include "n64-assert.h"
+#include "n64-ctype.h"
+#include "n64-fenv.h"
+#include "n64-float.h"
+#include "n64-math.h"
+#include "n64-numbers.h"
+#include "n64-stdbool.h"
+#include "n64-stdckdint.h"
+#include "n64-stddef.h"
+#include "n64-stdio.h"
+#include "n64-stdlib.h"
+#include "n64-string.h"
+#include "n64-time.h"
+#include "n64-util.h"
diff --git a/lib/n64-libc/n64-math.c b/lib/n64-libc/n64-math.c
new file mode 100644
index 00000000..9fe6a30e
--- /dev/null
+++ b/lib/n64-libc/n64-math.c
@@ -0,0 +1,558 @@
+#include "n64-math.h"
+
+#include "n64-stdbool.h"
+#include "n64-fenv.h"
+#include "n64-util.h"
+
+#ifdef __NO_TRAPPING_MATH__
+	#define _FTRAP( ex ) 
+#else
+	#define _FTRAP( ex ) n64_feraiseexcept( ex )
+#endif
+
+typedef struct {
+	double invc;
+	double logc;
+} __attribute__((aligned(16))) __n64_log2_table_entry;
+
+static const __n64_log2_table_entry s_logTable[16] = {
+	{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
+	{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
+	{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
+	{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
+	{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
+	{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
+	{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
+	{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
+	{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
+	{ 0x1p+0, 0x0p+0 },
+	{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
+	{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
+	{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
+	{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
+	{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
+	{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
+};
+
+static const unsigned long long s_expTable[32] = {
+	0x3ff0000000000000ull, 0x3fefd9b0d3158574ull, 0x3fefb5586cf9890full, 0x3fef9301d0125b51ull,
+	0x3fef72b83c7d517bull, 0x3fef54873168b9aaull, 0x3fef387a6e756238ull, 0x3fef1e9df51fdee1ull,
+	0x3fef06fe0a31b715ull, 0x3feef1a7373aa9cbull, 0x3feedea64c123422ull, 0x3feece086061892dull,
+	0x3feebfdad5362a27ull, 0x3feeb42b569d4f82ull, 0x3feeab07dd485429ull, 0x3feea47eb03a5585ull,
+	0x3feea09e667f3bcdull, 0x3fee9f75e8ec5f74ull, 0x3feea11473eb0187ull, 0x3feea589994cce13ull,
+	0x3feeace5422aa0dbull, 0x3feeb737b0cdc5e5ull, 0x3feec49182a3f090ull, 0x3feed503b23e255dull,
+	0x3feee89f995ad3adull, 0x3feeff76f2fb5e47ull, 0x3fef199bdd85529cull, 0x3fef3720dcef9069ull,
+	0x3fef5818dcfba487ull, 0x3fef7c97337b9b5full, 0x3fefa4afa2a490daull, 0x3fefd0765b6e4540ull,
+};
+
+static double n64_log2_impl( unsigned int fbits ) {
+	if( fbits == 0x3f800000u ) { // 1.f -> +0
+		return 0.0;
+	} else if( fbits - 0x00800000u >= 0x7F000000u ) {
+		if( !(fbits << 1) ) { // 0.f -> -inf
+			_FTRAP( FE_DIVBYZERO );
+			return -N64_HUGE_VAL;
+		} else if( fbits == 0x7F800000u ) { // +inf -> +inf
+			return N64_HUGE_VAL;
+		} else if( (fbits << 1) == 0xFF000000u ) { // nan -> nan
+			return n64_bit_cast_itof( fbits );
+		} else if( fbits >> 31 ) { // negative -> nan
+			_FTRAP( FE_INVALID );
+			return NAN;
+		} else {
+			register const float norm = n64_bit_cast_ftoi( fbits ) * 0x1p23f;
+			fbits = n64_bit_cast_itof( norm ) - (23 << 23);
+		}
+	}
+
+	register unsigned int top = fbits - 0x3F330000u;
+	register const unsigned int i = (top >> 19) & 0xF;
+	top &= 0xFF800000u;
+	register const double z = n64_bit_cast_itof( fbits - top );
+
+	register const double r = z * s_logTable[i].invc - 1.0;
+	register const double y0 = s_logTable[i].logc + (double)((int)top >> 23);
+	register const double r2 = r * r;
+
+	register const double y = 0.288457581109214 * r - 0.36092606229713164;
+	register const double p = 0.480898481472577 * r - 0.7213474675006291;
+	register double q = 1.4426950408774342 * r + y0;
+	q += p * r2;
+	return y * (r2 * r2) + q;
+}
+
+static float n64_exp2_impl( double e, unsigned int signBias ) {
+	double kd = e + 0x1.8p+47;
+	const long long ki = n64_bit_cast_dtol( kd );
+	n64_memory_barrier();
+	kd -= 0x1.8p+47;
+	n64_memory_barrier();
+
+	register const double r = e - kd;
+	register const double s = n64_bit_cast_ltod( s_expTable[ki & 0x1Fu] + ((ki + (unsigned long long)signBias) << 47) );
+	
+	register const double y = (
+		__builtin_assoc_barrier( 0.05550361559341535 * r + 0.2402284522445722 ) *
+		__builtin_assoc_barrier( r * r )
+	) + __builtin_assoc_barrier( 0.6931471806916203 * r + 1.0 );
+	
+	return (float)(y * s);
+}
+
+float n64_expf( float arg ) {
+#if __FINITE_MATH_ONLY__ != 1
+	register const unsigned int bits = n64_bit_cast_ftoi( arg );
+	if( ((bits >> 23) & 0xFFu) == 0xFFu ) {
+		return (bits == 0xFF800000u) ? 0.f : arg;
+	} else if( arg > 88.72283f ) {
+		_FTRAP( FE_OVERFLOW );
+		return N64_INFINITY;
+	}
+#endif
+	if( arg < -103.27892f ) {
+		_FTRAP( FE_UNDERFLOW );
+		return 0.f;
+	}
+	return n64_exp2_impl( (double)arg * 1.4426950408889634, 0u );
+}
+
+float n64_exp2f( float arg ) {
+#if __FINITE_MATH_ONLY__ != 1
+	register const unsigned int bits = n64_bit_cast_ftoi( arg );
+	if( ((bits >> 23) & 0xFFu) == 0xFFu ) {
+		return (bits == 0xFF800000u) ? 0.f : arg;
+	} else if( arg >= 128.f ) {
+		_FTRAP( FE_OVERFLOW );
+		return N64_INFINITY;
+	}
+#endif
+	if( arg < -149.f ) {
+		_FTRAP( FE_UNDERFLOW );
+		return 0.f;
+	}
+	return n64_exp2_impl( arg, 0u );
+}
+
+float n64_logf( float arg ) {
+	if( arg == 2.71828183f ) return 1.f;
+	return (float)(n64_log2_impl( n64_bit_cast_ftoi( arg ) ) * 0.69314718055994531);
+}
+
+float n64_log10f( float arg ) {
+	return (float)(n64_log2_impl( n64_bit_cast_ftoi( arg ) ) * 0.3010299956639812);
+}
+
+float n64_log2f( float arg ) {
+	// Use a slightly faster approximation of log2 since we need less precision
+
+	unsigned int fbits = n64_bit_cast_ftoi( arg );
+	if( fbits == 0x3f800000u ) { // 1.f -> +0
+		return 0.f;
+	} else if( fbits - 0x00800000u >= 0x7F000000u ) {
+		if( !(fbits << 1) ) { // 0.f -> -inf
+			return -N64_INFINITY;
+		} else if( fbits == 0x7F800000u ) { // +inf -> +inf
+			return N64_INFINITY;
+		} else if( (fbits << 1) == 0xFF000000u ) { // nan -> nan
+			return arg;
+		} else if( fbits >> 31 ) { // negative -> nan
+			return NAN;
+		} else {
+			fbits = n64_bit_cast_itof( arg * 0x1p23f ) - (23 << 23);
+		}
+	}
+
+	register unsigned int top = fbits - 0x3F330000u;
+	register const unsigned int i = (top >> 19) & 0xF;
+	top &= 0xFF800000u;
+	register const double z = n64_bit_cast_itof( fbits - top );
+
+	register const double r = z * s_logTable[i].invc - 1.0;
+	register const double y0 = s_logTable[i].logc + (double)((int)top >> 23);
+	register const double r2 = r * r;
+
+	register const double y = 0.4811247078767291 * r - 0.7213476299867769;
+	register const double p = 1.4426950186867042 * r + y0;
+	return (float)((-0.36051725506874704 * r2 + y) * r2 + p);
+}
+
+float n64_powf( float base, float exponent ) {
+	// an exponent of 0 or a base of 1 always results in 1, even if the other argument is NaN
+	if( exponent == 0.f || exponent == -0.f || base == 1.f ) {
+		return 1.f;
+	}
+
+	unsigned int ib = n64_bit_cast_ftoi( base );
+	unsigned int ie = n64_bit_cast_ftoi( exponent );
+
+#if __FINITE_MATH_ONLY__ != 1
+	// if the base is -1 and the exponent is plus or minus infinity, return 1
+	if( exponent == -1.f && !(ie << 9) && (((ie >> 23) & 0xFFu) == 0xFFu) ) {
+		return 1.f;
+	}
+#endif
+
+	n64_bool expIsInteger = true;
+	n64_bool expIsOdd = false;
+	if( !ib || (ib >> 31) ) { // only care about these values when the base is non-positive
+		register const unsigned int iee = (ie >> 23) & 0xFFu;
+		if( iee == 0xFFu || iee < 127u ) {
+			expIsInteger = false;
+		} else if( iee > 150u ) {
+		} else if( ie & ((1u << (150u - iee)) - 1u) ) {
+			expIsInteger = false;
+		} else if( ie & (1 << (150u - iee)) ) {
+			expIsOdd = true;
+		}
+	}
+
+	if( !(ib << 1) ) {
+		// base is +0 or -0
+#if __FINITE_MATH_ONLY__ != 1
+		if( ((ie >> 23) & 0xFFu) == 0xFFu ) {
+			if( ie << 9 ) return base; // 0^NaN -> NaN
+			if( ie >> 31 ) return N64_INFINITY; // 0^-inf -> +inf
+			return 0.f; // 0^+inf -> +0
+		}
+#endif
+
+		if( ie >> 31 ) {
+			_FTRAP( FE_DIVBYZERO );
+			if( expIsInteger && expIsOdd && (ib >> 31) ) {
+				// -0 to the power of a negative odd integer -> -inf
+				return -N64_INFINITY;
+			}
+
+			// 0 to the power of a finite even integer or a finite non-integer -> +inf
+			return N64_INFINITY;
+		} else {
+			// -0 to the power of a positive odd integer is -0 (0 to any other finite power is +0)
+			return (expIsInteger && expIsOdd) ? base : 0.f;
+		}
+	}
+
+#if __FINITE_MATH_ONLY__ != 1
+	if( ((ib >> 23) & 0xFFu) == 0xFFu ) {
+		if( ib << 9 ) return base; // any base other than 1 to the power of NaN is NaN
+		if( ib >> 31 ) {
+			if( exponent > 0.f ) {
+				// -inf to the power of a positive odd integer is -inf
+				if( expIsInteger && expIsOdd ) {
+					return -N64_INFINITY;
+				}
+
+				// -inf to any other positive power is +inf
+				return N64_INFINITY;
+			} else {
+				// -inf to the power of a negative odd integer is -0
+				if( expIsInteger && expIsOdd ) {
+					return -0.f;
+				}
+				
+				// -inf to any other negative power is +0
+				return 0.f;
+			}
+		}
+
+		// +inf to the power of any positive exponent is +inf
+		// +inf to the power of any negative exponent is +0
+		return (exponent > 0.f) ? N64_INFINITY : 0.f;
+	}
+#endif
+
+	unsigned int signBias = 0u;
+	if( ib >> 31 ) {
+		// negative base
+		if( !expIsInteger ) {
+			// a negative finite number to the power of any non-integer finite number is NaN
+			_FTRAP( FE_INVALID );
+			return NAN;
+		}
+
+		if( expIsOdd ) signBias = 0x10000u;
+		ib &= 0x7FFFFFFFu;
+	} else if( exponent == -1.f ) {
+		return 1.f / base;
+	}
+
+	if( ib < 0x00800000u ) {
+		// subnormal base
+		ib = n64_bit_cast_ftoi( base * 0x1p23f );
+		ib &= 0x7FFFFFFFu;
+		ib -= (23 << 23);
+	}
+
+	const double elogb = (double)exponent * n64_log2_impl( ib );
+#if __FINITE_MATH_ONLY__ != 1
+	if( (float)elogb >= 128.f ) {
+		_FTRAP( FE_OVERFLOW );
+		return signBias ? -N64_INFINITY : N64_INFINITY;
+	}
+#endif
+	if( (float)elogb < -149.f ) {
+		_FTRAP( FE_UNDERFLOW );
+		return signBias ? -0.f : 0.f;
+	}
+
+	return n64_exp2_impl( elogb, signBias );
+}
+
+// x >= y >= 0
+static double hypot_impl( double x, double y ) {
+	double e1, e2;
+	const double h = n64_sqrt( x * x + y * y );
+	if( h <= 2.0 * y ) {
+		register const double dy = h - y;
+		e1 = x * (2.0 * dy - x);
+		e2 = (dy - 2.0 * (x - y)) * dy;
+	} else {
+		register const double dx = h - x;
+		e1 = 2.0 * dx  * (x - 2.0 * y);
+		e2 = (4.0 * dx - y) * y + dx * dx;
+	}
+
+	return h - __builtin_assoc_barrier((e1 + e2) / (2.0 * h));
+}
+
+float n64_hypotf( float x, float y ) {
+#if __FINITE_MATH_ONLY__ != 1
+	register const unsigned int xi = n64_bit_cast_ftoi( x );
+	register const unsigned int yi = n64_bit_cast_ftoi( y );
+	if( (xi & 0x7F800000u) || (yi & 0x7F800000u) ) {
+		register const unsigned int ynan = yi << 9;
+		if(
+			(!(xi << 9) && (!ynan || !(yi >> 31))) ||
+			(!ynan && !(xi >> 31))
+		) {
+			return N64_INFINITY;
+		}
+
+		return x + y;
+	}
+#endif
+
+	return (float)n64_sqrt( (double)x * (double)x + (double)y * (double)y );
+}
+
+typedef union {
+	double f;
+	struct {
+		unsigned int hi;
+		unsigned int lo;
+	};
+} __dbits_t;
+
+double n64_hypot( double x, double y ) {
+#if __FINITE_MATH_ONLY__ != 1
+	const __dbits_t xi = { x };
+	const __dbits_t yi = { y };
+	if( (xi.hi & 0x7FF00000u) || (yi.hi & 0x7FF00000u) ) {
+		register const unsigned int xnan = xi.lo || (xi.hi << 12);
+		register const unsigned int ynan = yi.lo || (yi.hi << 12);
+		if(
+			(!xnan && (!ynan || !(yi.hi >> 31))) ||
+			(!ynan && !(xi.hi >> 31))
+		) {
+			return N64_HUGE_VAL;
+		}
+
+		return x + y;
+	}
+#endif
+
+	x = n64_fabs( x );
+	y = n64_fabs( y );
+
+	if( x < y ) {
+		register const double temp = x;
+		x = y;
+		y = temp;
+	}
+
+	if( x > 0x1p+511 ) {
+		if( y <= x * 0x1p-54 ) {
+			return x + y;
+		}
+
+		return hypot_impl( x * 0x1p-600, y * 0x1p-600 ) * 0x1p+600;
+	}
+
+	if( y < 0x1p-459 ) {
+		if( x >= y * 0x1p+54 ) {
+			return x + y;
+		}
+
+		return hypot_impl( x * 0x1p+600, y * 0x1p+600 ) * 0x1p-600;
+	}
+
+	if( y <= x * 0x1p-54 ) {
+		return x + y;
+	}
+
+	return hypot_impl( x, y );
+}
+
+float n64_cbrtf( float arg ) {
+	register unsigned int bits = n64_bit_cast_ftoi( arg );
+	if( ((bits >> 23) & 0xFFu) == 0xFFu || !(bits << 1) ) {
+		return arg;
+	}
+
+	n64_bool negative = false;
+	if( bits >> 31 ) {
+		negative = true;
+		bits &= 0x7FFFFFFFu;
+	}
+
+	if( bits < 0x00800000u ) {
+		// subnormal base
+		bits = n64_bit_cast_ftoi( arg * 0x1p23f );
+		bits &= 0x7FFFFFFFu;
+		bits -= (23 << 23);
+	}
+
+	register const float result = n64_exp2_impl( n64_log2_impl( bits ) / 3.0, 0u );
+	return negative ? -result : result;
+}
+
+static const unsigned int s_4divPi[24] = {
+	0x000000a2u, 0x0000a2f9u, 0x00a2f983u, 0xa2f9836eu,
+	0xf9836e4eu, 0x836e4e44u, 0x6e4e4415u, 0x4e441529u,
+	0x441529fcu, 0x1529fc27u, 0x29fc2757u, 0xfc2757d1u,
+	0x2757d1f5u, 0x57d1f534u, 0xd1f534ddu, 0xf534ddc0u,
+	0x34ddc0dbu, 0xddc0db62u, 0xc0db6295u, 0xdb629599u,
+	0x6295993cu, 0x95993c43u, 0x993c4390u, 0x3c439041u
+};
+
+__attribute__((always_inline))
+static inline unsigned int absTop12( float x ) {
+	return (n64_bit_cast_ftoi( x ) >> 20) & 0x7FFu;
+}
+
+__attribute__((always_inline))
+static inline double fastReduce( double x, unsigned int *quadrant ) {
+	register const double r = x * 0.6366197723675814;
+	*quadrant = n64_llround( r );
+	return x - n64_round( r ) * 1.5707963267948966;
+}
+
+static inline double largeReduce( unsigned int xi, unsigned int *quadrant ) {
+	const unsigned int *lut = &s_4divPi[(xi >> 26) & 0xFu];
+	const unsigned int shift = (xi >> 23) & 0x7u;
+
+	xi &= 0x00FFFFFFu;
+	xi |= 0x00800000u;
+	xi <<= shift;
+
+	const unsigned long long hi = (unsigned long long)(xi * lut[0]) << 32;
+	const unsigned long long lo = ((unsigned long long)xi * lut[8]) >> 32;
+	unsigned long long result = (unsigned long long)xi * lut[4];
+	result += hi | lo;
+
+	*quadrant = ((unsigned int)(result >> 61) + 1u) >> 1;
+	result -= (unsigned long long)*quadrant << 62;
+  
+	return (double)(long long)result * 3.4061215800865545e-19;
+}
+
+static const double s_cosPolyP[5] = {
+	 1.0, -0.49999999725108224,  0.041666623324344516, -0.001388676379437604,  2.4390450703564542e-5
+};
+
+static const double s_cosPolyN[5] = {
+	-1.0,  0.49999999725108224, -0.041666623324344516,  0.001388676379437604, -2.4390450703564542e-5
+};
+
+static float sinPoly( double x ) {
+	register const double x2 = x * x;
+	register const double x3 = x * x2;
+
+	register const double s1 = 0.008332178146138854 + x2 * -1.9517298981385725e-4;
+	register const double s0 = x + x3 * -0.16666654943701084;
+
+	return (float)(s0 + (x3 * x2) * s1);
+}
+
+static float cosPoly( double x, unsigned int negatePoly ) {
+	const double *p = negatePoly ? s_cosPolyN : s_cosPolyP;
+	register const double x2 = x * x;
+
+	register const double c2 = p[3] + x2 * p[4];
+	register const double c1 = p[0] + x2 * p[1];
+
+	register const double x4 = x2 * x2;
+	register const double c0 = c1 + x4 * p[2];
+
+	return (float)(c0 + __builtin_assoc_barrier( x4 * x2 ) * c2);
+}
+
+float n64_sinf( float arg ) {
+	if( arg == 0.f ) return arg;
+
+	const unsigned int top = absTop12( arg );
+	if( top < 1012u ) {
+		if( top < 920u ) {
+			if( top < 8u ) _FTRAP( FE_UNDERFLOW );
+			return arg;
+		}
+
+		return sinPoly( arg );
+	} else if( top < 1071u ) {
+		unsigned int q;
+		double x = fastReduce( arg, &q );
+
+		if( ((q & 3u) + 1u) & 2u ) {
+			x = -x;
+		}
+
+		return (q & 1u) ? cosPoly( x, q & 2u ) : sinPoly( x );
+	} else if( top < 2040u ) {
+		unsigned int q;
+		const unsigned int xi = n64_bit_cast_ftoi( arg );
+		double x = largeReduce( xi, &q );
+
+		const unsigned int s = q + (xi >> 31);
+		if( ((s & 3u) + 1u) & 2u ) {
+			x = -x;
+		}
+
+		return (q & 1u) ? cosPoly( x, s & 2u ) : sinPoly( x );
+	} else {
+		_FTRAP( FE_INVALID );
+		return NAN;
+	}
+}
+
+float n64_cosf( float arg ) {
+	const unsigned int top = absTop12( arg );
+	if( top < 1012u ) {
+		if( top < 920u ) {
+			return 1.f;
+		}
+
+		return cosPoly( arg, 0u );
+	} else if( top < 1071u ) {
+		unsigned int q;
+		double x = fastReduce( arg, &q );
+
+		if( ((q & 3u) + 1u) & 2u ) {
+			x = -x;
+		}
+
+		return (q & 1u) ? sinPoly( x ) : cosPoly( x, q & 2u );
+	} else if( top < 2040u ) {
+		unsigned int q;
+		const unsigned int xi = n64_bit_cast_ftoi( arg );
+		double x = largeReduce( xi, &q );
+
+		const unsigned int s = q + (xi >> 31);
+		if( ((s & 3u) + 1u) & 2u ) {
+			x = -x;
+		}
+
+		return (q & 1u) ? sinPoly( x ) : cosPoly( x, s & 2u );
+	} else {
+		_FTRAP( FE_INVALID );
+		return NAN;
+	}
+}
diff --git a/lib/n64-libc/n64-math.h b/lib/n64-libc/n64-math.h
new file mode 100644
index 00000000..3a80bf8f
--- /dev/null
+++ b/lib/n64-libc/n64-math.h
@@ -0,0 +1,275 @@
+#ifndef N64_STDLIB_N64_MATH_H_
+#define N64_STDLIB_N64_MATH_H_
+
+/* Only a subset of math.h is currently implemented */
+
+#define N64_NAN __builtin_nanf( "0" )
+#define N64_HUGE_VALF __builtin_inff()
+#define N64_HUGE_VAL __builtin_inf()
+#define N64_INFINITY N64_HUGE_VALF
+
+#ifndef NAN
+	#define NAN N64_NAN
+#endif
+
+#ifndef HUGE_VALF
+	#define HUGE_VALF N64_HUGE_VALF
+#endif
+
+#ifndef HUGE_VAL
+	#define HUGE_VAL N64_HUGE_VAL
+#endif
+
+#ifndef INFINITY
+	#define INFINITY N64_INFINITY
+#endif
+
+__attribute__((const, warn_unused_result))
+float n64_expf( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_exp2f( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_logf( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_log10f( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_log2f( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_powf( float base, float exponent );
+
+__attribute__((const, warn_unused_result))
+float n64_cbrtf( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_hypotf( float x, float y );
+
+__attribute__((const, warn_unused_result))
+double n64_hypot( double x, double y );
+
+__attribute__((const, warn_unused_result))
+float n64_sinf( float arg );
+
+__attribute__((const, warn_unused_result))
+float n64_cosf( float arg );
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_sqrtf( float arg ) {
+	float result;
+	asm volatile( "sqrt.s %0, %1" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_sqrt( double arg ) {
+	double result;
+	asm volatile( "sqrt.d %0, %1" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_fabsf( float arg ) {
+	float result;
+	asm volatile( "abs.s %0, %1" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_fabs( double arg ) {
+	double result;
+	asm volatile( "abs.d %0, %1" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_floor( double arg ) {
+	if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg;
+	double result;
+	asm volatile( "floor.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_ceil( double arg ) {
+	if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg;
+	double result;
+	asm volatile( "ceil.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_trunc( double arg ) {
+	if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg;
+	double result;
+	asm volatile( "trunc.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_round( double arg ) {
+	if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg;
+	double result;
+	asm volatile( "round.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline double n64_rint( double arg ) {
+	if( arg > 4503599627370496.0 || arg < -4503599627370496.0 ) return arg;
+	double result;
+	asm volatile( "cvt.l.d %0, %1 \n\t cvt.d.l %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_floorf( float arg ) {
+	if( arg > 8388608.f || arg < -8388608.f ) return arg;
+	float result;
+	asm volatile( "floor.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_ceilf( float arg ) {
+	if( arg > 8388608.f || arg < -8388608.f ) return arg;
+	float result;
+	asm volatile( "ceil.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_truncf( float arg ) {
+	if( arg > 8388608.f || arg < -8388608.f ) return arg;
+	float result;
+	asm volatile( "trunc.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_roundf( float arg ) {
+	if( arg > 8388608.f || arg < -8388608.f ) return arg;
+	float result;
+	asm volatile( "round.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline float n64_rintf( float arg ) {
+	if( arg > 8388608.f || arg < -8388608.f ) return arg;
+	float result;
+	asm volatile( "cvt.w.s %0, %1 \n\t cvt.s.w %0, %0" : "=f"( result ) : "f"( arg ) );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline long long n64_llround( double arg ) {
+#ifdef _ABIO32
+	double bits;
+	asm volatile( "round.l.d %0, %1" : "=f"( bits ) : "f"( arg ) );
+	union { double f; long long i; } b = { bits };
+	return b.i;
+#else
+	long long result;
+	asm volatile( "round.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+#endif
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline long long n64_llrint( double arg ) {
+#ifdef _ABIO32
+	double bits;
+	asm volatile( "cvt.l.d %0, %1" : "=f"( bits ) : "f"( arg ) );
+	union { double f; long long i; } b = { bits };
+	return b.i;
+#else
+	long long result;
+	asm volatile( "cvt.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+#endif
+}
+
+// Extensions: fill in gaps in the C standard for rounding functions
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline long long n64_llfloor( double arg ) {
+#ifdef _ABIO32
+	double bits;
+	asm volatile( "floor.l.d %0, %1" : "=f"( bits ) : "f"( arg ) );
+	union { double f; long long i; } b = { bits };
+	return b.i;
+#else
+	long long result;
+	asm volatile( "floor.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+#endif
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline long long n64_llceil( double arg ) {
+#ifdef _ABIO32
+	double bits;
+	asm volatile( "ceil.l.d %0, %1" : "=f"( bits ) : "f"( arg ) );
+	union { double f; long long i; } b = { bits };
+	return b.i;
+#else
+	long long result;
+	asm volatile( "ceil.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+#endif
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline long long n64_lltrunc( double arg ) {
+#ifdef _ABIO32
+	double bits;
+	asm volatile( "trunc.l.d %0, %1" : "=f"( bits ) : "f"( arg ) );
+	union { double f; long long i; } b = { bits };
+	return b.i;
+#else
+	long long result;
+	asm volatile( "trunc.l.d $f10, %1 \n\t dmfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+#endif
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_ifloorf( float arg ) {
+	int result;
+	asm volatile( "floor.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_iceilf( float arg ) {
+	int result;
+	asm volatile( "ceil.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_itruncf( float arg ) {
+	int result;
+	asm volatile( "trunc.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_iroundf( float arg ) {
+	int result;
+	asm volatile( "round.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+}
+
+__attribute__((const, warn_unused_result, always_inline))
+static inline int n64_irintf( float arg ) {
+	int result;
+	asm volatile( "cvt.w.s $f10, %1 \n\t mfc1 %0, $f10" : "=r"( result ) : "f"( arg ) : "f10" );
+	return result;
+}
+
+#endif
diff --git a/lib/n64-libc/n64-numbers.h b/lib/n64-libc/n64-numbers.h
new file mode 100644
index 00000000..e7fd0f71
--- /dev/null
+++ b/lib/n64-libc/n64-numbers.h
@@ -0,0 +1,43 @@
+#ifndef N64_STDLIB_N64_NUMBERS_H_
+#define N64_STDLIB_N64_NUMBERS_H_
+
+#define N64_E 2.7182818284590452
+#define N64_Ef 2.71828183f
+
+#define N64_LOG2E 1.4426950408889634
+#define N64_LOG2Ef 1.44269504f
+
+#define N64_LOG10E 0.43429448190325183
+#define N64_LOG10Ef 0.434294481f
+
+#define N64_PI 3.1415926535897932
+#define N64_PIf 3.14159265f
+
+#define N64_INV_PI 0.31830988618379067
+#define N64_INV_PIf 0.318309886f
+
+#define N64_INV_SQRTPI 0.56418958354775629
+#define N64_INV_SQRTPIf 0.564189584f
+
+#define N64_LN2 0.69314718055994531
+#define N64_LN2f 0.693147181f
+
+#define N64_LN10 2.3025850929940457
+#define N64_LN10f 2.30258509f
+
+#define N64_SQRT2 1.414213562373095
+#define N64_SQRT2f 1.41421356f
+
+#define N64_SQRT3 1.7320508075688773
+#define N64_SQRT3f 1.73205081f
+
+#define N64_INV_SQRT3 0.57735026918962576
+#define N64_INV_SQRT3f 0.577350269f
+
+#define N64_EGAMMA 0.57721566490153286
+#define N64_EGAMMAf 0.577215665f
+
+#define N64_PHI 1.6180339887498948
+#define N64_PHIf 1.61803399f
+
+#endif
diff --git a/lib/n64-libc/n64-stdbool.h b/lib/n64-libc/n64-stdbool.h
new file mode 100644
index 00000000..560e74a7
--- /dev/null
+++ b/lib/n64-libc/n64-stdbool.h
@@ -0,0 +1,34 @@
+#ifndef N64_STDLIB_N64_STDBOOL_H_
+#define N64_STDLIB_N64_STDBOOL_H_
+
+#ifdef __cplusplus
+	typedef bool n64_bool;
+#else
+	#if __STDC_VERSION__ >= 202311L
+		typedef bool n64_bool;
+	#else
+
+		#if __STDC_VERSION__ >= 199901L
+			typedef _Bool n64_bool;
+		#elif __GNUC__ >= 15
+			typedef unsigned char __attribute__((hardbool(0, 1))) n64_bool;
+		#else
+			typedef unsigned char n64_bool;
+		#endif
+
+		#ifndef true
+			#define true 1
+		#endif
+
+		#ifndef false
+			#define false 0
+		#endif
+
+	#endif
+#endif
+
+#ifndef __bool_true_false_are_defined
+	#define __bool_true_false_are_defined 1
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-stdckdint.h b/lib/n64-libc/n64-stdckdint.h
new file mode 100644
index 00000000..0b956fa6
--- /dev/null
+++ b/lib/n64-libc/n64-stdckdint.h
@@ -0,0 +1,8 @@
+#ifndef N64_STDLIB_N64_STDCKDINT_H_
+#define N64_STDLIB_N64_STDCKDINT_H_
+
+#define n64_ckd_add( result, a, b ) __builtin_add_overflow( a, b, result )
+#define n64_ckd_sub( result, a, b ) __builtin_sub_overflow( a, b, result )
+#define n64_ckd_mul( result, a, b ) __builtin_mul_overflow( a, b, result )
+
+#endif
diff --git a/lib/n64-libc/n64-stddef.h b/lib/n64-libc/n64-stddef.h
new file mode 100644
index 00000000..1a71fa32
--- /dev/null
+++ b/lib/n64-libc/n64-stddef.h
@@ -0,0 +1,15 @@
+#ifndef NULL
+	#ifdef __cplusplus
+		#if __cplusplus >= 199711L
+			#define NULL nullptr
+		#else
+			#define NULL 0
+		#endif
+	#else
+		#if __STDC_VERSION__ >= 202311L
+			#define NULL nullptr
+		#else
+			#define NULL ((void*)0)
+		#endif
+	#endif
+#endif
diff --git a/lib/n64-libc/n64-stdio-bprint.c.inc b/lib/n64-libc/n64-stdio-bprint.c.inc
new file mode 100644
index 00000000..3e690724
--- /dev/null
+++ b/lib/n64-libc/n64-stdio-bprint.c.inc
@@ -0,0 +1,432 @@
+#include "n64-stddef.h"
+
+#include "n64-stdio-format.c.inc"
+
+typedef enum {
+	CSI_ALIGNMENT,
+	CSI_SIGN,
+	CSI_SPACE,
+	CSI_ALTERNATE,
+	CSI_ZEROPAD,
+	CSI_WIDTH,
+	CSI_PRECISION,
+	CSI_LENGTH,
+	CSI_FORMAT
+} n64_csi;
+
+#define PCS_VARIABLE 0xAAAAAAAAu
+#define PCS_DEFAULT 0xBBBBBBBBu
+
+static const char *parse_conversion_specifier( const char *format, n64_format_args *args ) {
+	n64_bool isLong = false;
+
+	args->width = 0u;
+	args->dataSize = 4u;
+	args->precision = PCS_DEFAULT;
+	args->defaultPrecision = 1u;
+	args->leftAligned = false;
+	args->alternate = false;
+	args->capitalize = false;
+	args->padchar = ' ';
+	args->poschar = '\0';
+
+	n64_csi csi = CSI_ALIGNMENT;
+	while( *format != '\0' ) {
+		switch( csi ) {
+			case CSI_ALIGNMENT:
+				if( *format == '-' ) {
+					args->leftAligned = true;
+					format++;
+				}
+				csi = CSI_SIGN;
+				__attribute__((fallthrough));
+			case CSI_SIGN:
+				if( *format == '+' ) {
+					args->poschar = '+';
+					format++;
+				}
+				csi = CSI_SPACE;
+				__attribute__((fallthrough));
+			case CSI_SPACE:
+				if( *format == ' ' ) {
+					if( args->poschar != '+' ) args->poschar = ' ';
+					format++;
+				}
+				csi = CSI_ALTERNATE;
+				__attribute__((fallthrough));
+			case CSI_ALTERNATE:
+				if( *format == '#' ) {
+					args->alternate = true;
+					format++;
+				}
+				csi = CSI_ZEROPAD;
+				__attribute__((fallthrough));
+			case CSI_ZEROPAD:
+				if( *format == '0' ) {
+					args->padchar = '0';
+					format++;
+				}
+				csi = CSI_WIDTH;
+				__attribute__((fallthrough));
+			case CSI_WIDTH:
+				if( *format == '-' ) {
+					args->leftAligned = true;
+					format++;
+					continue;
+				} else if( *format == '0' ) {
+					format++;
+					continue;
+				} else if( *format == '*' ) {
+					args->width = PCS_VARIABLE;
+					format++;
+				} else if( *format > '0' && *format <= '9' ) {
+					int w = 0;
+					while( *format >= '0' && *format <= '9' ) {
+						w *= 10;
+						if( w < 0 ) return NULL;
+						w += (int)*format - (int)'0';
+						format++;
+					}
+					args->width = (unsigned int)w;
+				}
+				csi = CSI_PRECISION;
+				__attribute__((fallthrough));
+			case CSI_PRECISION:
+				if( *format == '.' ) {
+					format++;
+					if( *format == '-' ) {
+						format++;
+						while( *format >= '0' && *format <= '9' ) format++;
+					} else if( *format == '*' ) {
+						args->precision = PCS_VARIABLE;
+						format++;
+					} else if( *format >= '0' && *format <= '9' ) {
+						int p = 0;
+						while( *format >= '0' && *format <= '9' ) {
+							p *= 10;
+							if( p < 0 ) return NULL;
+							p += (int)*format - (int)'0';
+							format++;
+						}
+						args->precision = (unsigned int)p;
+					} else {
+						return NULL;
+					}
+				}
+				csi = CSI_LENGTH;
+				__attribute__((fallthrough));
+			case CSI_LENGTH:
+				switch( *format ) {
+					case 'h':
+						format++;
+						if( *format == 'h' ) {
+							args->dataSize = 1u;
+							format++;
+						} else {
+							args->dataSize = 2u;
+						}
+						break;
+					case 'l':
+						isLong = true;
+						args->dataSize = __SIZEOF_LONG__;
+						format++;
+						if( *format == 'l' ) {
+							args->dataSize = 8u;
+							format++;
+						}
+						break;
+					case 'j':
+					case 'L':
+						isLong = true;
+						args->dataSize = 8u;
+						format++;
+						break;
+					case 'z':
+						args->dataSize = __SIZEOF_SIZE_T__;
+						format++;
+						break;
+					case 't':
+						args->dataSize = __SIZEOF_PTRDIFF_T__;
+						format++;
+						break;
+					default:
+						break;
+				}
+				csi = CSI_FORMAT;
+				__attribute__((fallthrough));
+			case CSI_FORMAT:
+				switch( *format ) {
+					case 'c':
+						args->type = 'c';
+						args->dataSize = 1;
+						break;
+					case 's':
+						args->type = 's';
+						args->dataSize = 1;
+						args->defaultPrecision = 0xFFFFFFFFu;
+						break;
+					case 'd':
+					case 'i':
+						args->type = 'd';
+						if( args->precision != PCS_DEFAULT ) args->padchar = ' ';
+						break;
+					case 'o':
+						args->type = 'o';
+						args->poschar = '\0';
+						if( args->precision != PCS_DEFAULT ) args->padchar = ' ';
+						break;
+					case 'X':
+						args->capitalize = true;
+						__attribute__((fallthrough));
+					case 'x':
+						args->type = 'x';
+						args->poschar = '\0';
+						if( args->precision != PCS_DEFAULT ) args->padchar = ' ';
+						break;
+					case 'B':
+						args->capitalize = true;
+						__attribute__((fallthrough));
+					case 'b':
+						args->type = 'b';
+						args->poschar = '\0';
+						if( args->precision != PCS_DEFAULT ) args->padchar = ' ';
+						break;
+					case 'u':
+						args->type = 'u';
+						args->poschar = '\0';
+						if( args->precision != PCS_DEFAULT ) args->padchar = ' ';
+						break;
+					case 'F':
+						args->capitalize = true;
+						__attribute__((fallthrough));
+					case 'f':
+						args->type = 'f';
+						args->dataSize = isLong ? 8u : 4u;
+						args->defaultPrecision = 6u;
+						break;
+					case 'E':
+						args->capitalize = true;
+						__attribute__((fallthrough));
+					case 'e':
+						args->type = 'e';
+						args->dataSize = isLong ? 8u : 4u;
+						args->defaultPrecision = 6u;
+						break;
+					case 'A':
+						args->capitalize = true;
+						__attribute__((fallthrough));
+					case 'a':
+						args->type = 'a';
+						args->dataSize = isLong ? 8u : 4u;
+						args->defaultPrecision = 0xffffffffu;
+						break;
+					case 'G':
+						args->capitalize = true;
+						__attribute__((fallthrough));
+					case 'g':
+						args->type = 'g';
+						args->dataSize = isLong ? 8u : 4u;
+						args->defaultPrecision = 6u;
+						break;
+					case 'n':
+						args->type = 'n';
+						break;
+					case 'p':
+						args->type = 'p';
+						args->dataSize = 4u;
+						break;
+					default:
+						return NULL;
+				}
+				return ++format;
+			default:
+				return NULL;
+		}
+	}
+
+	return NULL;
+}
+
+int n64_vbprintf( char *_buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, __builtin_va_list args ) {
+	n64_bprint_buffer buff = {
+		/* start */ _buffer,
+		/* head */ _buffer,
+		/* end */ _buffer + bufsz,
+		callback,
+		state,
+		0u
+	};
+
+	while( *format != '\0' ) {
+		if( *format != '%' ) {
+			n64_putc( &buff, *format );
+			format++;
+			continue;
+		}
+
+		if( format[1] == '%' ) {
+			n64_putc( &buff, '%' );
+			format += 2;
+			continue;
+		}
+
+		n64_format_args fmt;
+		format = parse_conversion_specifier( ++format, &fmt );
+		if( !format ) return -1;
+
+		if( fmt.width == PCS_VARIABLE ) {
+			const int w = __builtin_va_arg( args, int );
+			if( w < 0 ) {
+				fmt.leftAligned = true;
+				fmt.width = (unsigned int)-w;
+			} else {
+				fmt.width = (unsigned int)w;
+			}
+		}
+
+		if( fmt.precision == PCS_VARIABLE ) {
+			const int p = __builtin_va_arg( args, int );
+			fmt.precision = (p < 0) ? fmt.defaultPrecision : (unsigned int)p;
+		} else if( fmt.precision == PCS_DEFAULT ) {
+			fmt.precision = fmt.defaultPrecision;
+		}
+
+		switch( fmt.type ) {
+			case 'c': {
+				const char c = (char)__builtin_va_arg( args, int );
+				n64_format_char( &buff, &fmt, c );
+				break;
+			}
+			case 's': {
+				const char *str = (const char*)__builtin_va_arg( args, const char* );
+				n64_format_string( &buff, &fmt, str );
+				break;
+			}
+			case 'd': {
+				if( fmt.dataSize == 8 ) {
+					const long long val = __builtin_va_arg( args, long long );
+					n64_format_sdec64( &buff, &fmt, val );
+				} else {
+					int val = __builtin_va_arg( args, int );
+					if( fmt.dataSize == 2 ) {
+						val = (int)(short)val;
+					} else if( fmt.dataSize == 1 ) {
+						val = (int)(signed char)val;
+					}
+					n64_format_sdec32( &buff, &fmt, val );
+				}
+				break;
+			}
+			case 'u': {
+				if( fmt.dataSize == 8 ) {
+					const unsigned long long val = __builtin_va_arg( args, unsigned long long );
+					n64_format_udec64( &buff, &fmt, val );
+				} else {
+					unsigned int val = __builtin_va_arg( args, unsigned int );
+					if( fmt.dataSize == 2 ) {
+						val &= 0xFFFFu;
+					} else if( fmt.dataSize == 1 ) {
+						val &= 0xFFu;
+					}
+					n64_format_udec32( &buff, &fmt, val );
+				}
+				break;
+			}
+			case 'o': {
+				if( fmt.dataSize == 8 ) {
+					const unsigned long long val = __builtin_va_arg( args, unsigned long long );
+					n64_format_oct64( &buff, &fmt, val );
+				} else {
+					unsigned int val = __builtin_va_arg( args, unsigned int );
+					if( fmt.dataSize == 2 ) {
+						val &= 0xFFFFu;
+					} else if( fmt.dataSize == 1 ) {
+						val &= 0xFFu;
+					}
+					n64_format_oct32( &buff, &fmt, val );
+				}
+				break;
+			}
+			case 'x': {
+				if( fmt.dataSize == 8 ) {
+					const unsigned long long val = __builtin_va_arg( args, unsigned long long );
+					n64_format_hex64( &buff, &fmt, val );
+				} else {
+					unsigned int val = __builtin_va_arg( args, unsigned int );
+					if( fmt.dataSize == 2 ) {
+						val &= 0xFFFFu;
+					} else if( fmt.dataSize == 1 ) {
+						val &= 0xFFu;
+					}
+					n64_format_hex32( &buff, &fmt, val );
+				}
+				break;
+			}
+			case 'b': {
+				if( fmt.dataSize == 8 ) {
+					const unsigned long long val = __builtin_va_arg( args, unsigned long long );
+					n64_format_bin64( &buff, &fmt, val );
+				} else {
+					unsigned int val = __builtin_va_arg( args, unsigned int );
+					if( fmt.dataSize == 2 ) {
+						val &= 0xFFFFu;
+					} else if( fmt.dataSize == 1 ) {
+						val &= 0xFFu;
+					}
+					n64_format_bin32( &buff, &fmt, val );
+				}
+				break;
+			}
+			case 'f':
+			case 'e':
+			case 'g': {
+				const double val = __builtin_va_arg( args, double );
+				if( fmt.dataSize == 8 ) {
+					n64_format_double( &buff, &fmt, val );
+				} else {
+					n64_format_float( &buff, &fmt, (float)val );
+				}
+				break;
+			}
+			case 'a': {
+				const double val = __builtin_va_arg( args, double );
+				if( fmt.dataSize == 8 ) {
+					n64_format_hexdouble( &buff, &fmt, val );
+				} else {
+					n64_format_hexfloat( &buff, &fmt, (float)val );
+				}
+				break;
+			}
+			case 'p': {
+				const void *ptr = __builtin_va_arg( args, const void* );
+				n64_format_pointer( &buff, &fmt, ptr );
+				break;
+			}
+			case 'n': {
+				switch( fmt.dataSize ) {
+					case 1: *__builtin_va_arg( args, signed char* ) = (signed char)buff.total; break;
+					case 2: *__builtin_va_arg( args, short* ) = (short)buff.total; break;
+					case 4: *__builtin_va_arg( args, int* ) = (int)buff.total; break;
+					case 8: *__builtin_va_arg( args, long long* ) = (long long)buff.total; break;
+					default: return -1;
+				}
+				break;
+			}
+			default:
+				return -1;
+		}
+	}
+
+	if( buff.start != buff.end ) {
+		buff.callback( buff.userdata, (unsigned int)(buff.head - buff.start) );
+	}
+	return (int)buff.total;
+}
+
+int n64_bprintf( char *buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, ... ) {
+	__builtin_va_list args;
+	__builtin_va_start( args, format );
+	register const int result = n64_vbprintf( buffer, bufsz, callback, state, format, args );
+	__builtin_va_end( args );
+	return result;
+}
diff --git a/lib/n64-libc/n64-stdio-format.c.inc b/lib/n64-libc/n64-stdio-format.c.inc
new file mode 100644
index 00000000..b4f63ce6
--- /dev/null
+++ b/lib/n64-libc/n64-stdio-format.c.inc
@@ -0,0 +1,1030 @@
+#include "n64-assert.h"
+#include "n64-stddef.h"
+
+#include "n64-stdio-ryu.c.inc"
+
+typedef unsigned char n64_smallbool;
+
+n64_static_assert( sizeof( long long ) == 8 );
+n64_static_assert( sizeof( double ) == 8 );
+n64_static_assert( sizeof( float ) == 4 );
+n64_static_assert( sizeof( int ) == 4 );
+
+typedef struct {
+	char *const start;
+	char *head;
+	char *end;
+	n64_bool(*callback)(void*, unsigned int);
+	void *const userdata;
+	unsigned int total;
+} n64_bprint_buffer;
+
+static void n64_putc( n64_bprint_buffer *buffer, char c ) {
+	buffer->total++;
+
+	if( buffer->start == buffer->end ) return;
+	if( buffer->head != buffer->end ) {
+		*(buffer->head++) = c;
+	} else if( buffer->callback( buffer->userdata, (unsigned int)(buffer->head - buffer->start) ) ) {
+		buffer->head = buffer->start;
+	} else {
+		buffer->end = buffer->start;
+	}
+}
+
+typedef struct {
+	unsigned int width;
+	unsigned int precision;
+	unsigned int defaultPrecision;
+	char type;
+	unsigned char dataSize;
+	n64_smallbool leftAligned;
+	n64_smallbool alternate;
+	n64_smallbool capitalize;
+	char padchar;
+	char poschar;
+} n64_format_args;
+
+static void n64_format_nan_or_inf(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	n64_bool isNegative,
+	n64_bool isNaN
+) {
+	const unsigned int len = (isNegative || format->poschar != '\0') ? 4u : 3u;
+	if( !format->leftAligned ) {
+		for( unsigned int i = len; i < format->width; i++ ) {
+			n64_putc( buffer, ' ' );
+		}
+	}
+
+	if( isNegative ) {
+		n64_putc( buffer, '-' );
+	} else if( format->poschar != '\0' ) {
+		n64_putc( buffer, format->poschar );
+	}
+
+	if( format->capitalize ) {
+		if( isNaN ) {
+			n64_putc( buffer, 'N' );
+			n64_putc( buffer, 'A' );
+			n64_putc( buffer, 'N' );
+		} else {
+			n64_putc( buffer, 'I' );
+			n64_putc( buffer, 'N' );
+			n64_putc( buffer, 'F' );
+		}
+	} else {
+		if( isNaN ) {
+			n64_putc( buffer, 'n' );
+			n64_putc( buffer, 'a' );
+			n64_putc( buffer, 'n' );
+		} else {
+			n64_putc( buffer, 'i' );
+			n64_putc( buffer, 'n' );
+			n64_putc( buffer, 'f' );
+		}
+	}
+
+	if( format->leftAligned ) {
+		for( unsigned int i = len; i < format->width; i++ ) {
+			n64_putc( buffer, ' ' );
+		}
+	}
+}
+
+static const unsigned int s_u32_pow10[10] = {
+	1u,
+	10u,
+	100u,
+	1000u,
+	10000u,
+	100000u,
+	1000000u,
+	10000000u,
+	100000000u,
+	1000000000u
+};
+
+static const unsigned long long s_u64_pow10[18] = {
+	1ull,
+	10ull,
+	100ull,
+	1000ull,
+	10000ull,
+	100000ull,
+	1000000ull,
+	10000000ull,
+	100000000ull,
+	1000000000ull,
+	10000000000ull,
+	100000000000ull,
+	1000000000000ull,
+	10000000000000ull,
+	100000000000000ull,
+	1000000000000000ull,
+	10000000000000000ull,
+	100000000000000000ull,
+};
+
+static void n64_float_exp_helper(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned int precision,
+	char *digitBuffer,
+	int originalExponent,
+	unsigned int digits,
+	n64_bool negative,
+	n64_bool trimTrailingZeros
+) {
+	if( trimTrailingZeros ) {
+		if( digits > precision + 1u ) {
+			digits = precision + 1u;
+		}
+
+		while( digits > 1u ) {
+			if( digitBuffer[digits - 1u] != '0' ) break;
+			digits--;
+		}
+
+		if( digits <= precision ) {
+			precision = digits ? (digits - 1u) : 0u;
+		}
+	}
+
+	if( !digits ) {
+		digitBuffer[0] = '0';
+		digits = 1u;
+	}
+
+	int e = originalExponent + (int)digits - 1;
+	unsigned int len = precision ? (precision + 6u) : (format->alternate ? 6u : 5u);
+	if( negative || format->poschar != '\0' ) len++;
+	if( e >= 100 || e <= -100 ) len++;
+
+	if( !format->leftAligned && format->padchar == ' ' ) {
+		for( unsigned int i = len; i < format->width; i++ ) {
+			n64_putc( buffer, ' ' );
+		}
+	}
+
+	if( negative ) {
+		n64_putc( buffer, '-' );
+	} else if( format->poschar != '\0' ) {
+		n64_putc( buffer, format->poschar );
+	}
+
+	if( !format->leftAligned && format->padchar == '0' ) {
+		for( unsigned int i = len; i < format->width; i++ ) {
+			n64_putc( buffer, '0' );
+		}
+	}
+
+	n64_putc( buffer, digitBuffer[digits - 1] );
+	if( precision || format->alternate ) n64_putc( buffer, '.' );
+	for( int i = (int)digits - 2; i >= 0; i-- ) {
+		n64_putc( buffer, digitBuffer[i] );
+	}
+
+	if( !trimTrailingZeros ) {
+		for( unsigned int i = digits; i <= precision; i++ ) {
+			n64_putc( buffer, '0' );
+		}
+	}
+
+	n64_putc( buffer, format->capitalize ? 'E' : 'e' );
+	if( e < 0 ) {
+		n64_putc( buffer, '-' );
+		e = -e;
+	} else {
+		n64_putc( buffer, '+' );
+	}
+
+	if( e >= 100 ) n64_putc( buffer, '0' + (char)(e / 100) );
+	n64_putc( buffer, '0' + (char)((e / 10) % 10) );
+	n64_putc( buffer, '0' + (char)(e % 10) );
+
+	if( format->leftAligned ) {
+		for( unsigned int i = len; i < format->width; i++ ) {
+			n64_putc( buffer, ' ' );
+		}
+	}
+}
+
+static void n64_format_float_exp(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	ryu_floating_decimal_32 decimal,
+	unsigned int precision,
+	n64_bool negative,
+	n64_bool trimTrailingZeros
+) {
+	char digitBuffer[9];
+	unsigned int digits = decimalLength9( decimal.mantissa ) - 1u;
+	if( digits > precision ) {
+		decimal.mantissa /= s_u32_pow10[digits - precision - 1u];
+		if( decimal.mantissa % 10u >= 5u ) {
+			decimal.mantissa += 10u;
+			if( decimal.mantissa >= s_u32_pow10[precision + 2u] ) {
+				decimal.mantissa /= 10u;
+				decimal.exponent++;
+			}
+		}
+		decimal.mantissa /= 10u;
+		decimal.exponent += (int)(digits - precision);
+	}
+
+	if( trimTrailingZeros ) {
+		while( decimal.mantissa && !(decimal.mantissa % 10u) ) {
+			decimal.mantissa /= 10u;
+			decimal.exponent++;
+		}
+		precision = decimalLength9( decimal.mantissa ) - 1u;
+	}
+
+	digits = 0u;
+	while( decimal.mantissa ) {
+		digitBuffer[digits++] = '0' + (char)(decimal.mantissa % 10u);
+		decimal.mantissa /= 10u;
+	}
+
+	n64_float_exp_helper( buffer, format, precision, digitBuffer, decimal.exponent, digits, negative, trimTrailingZeros );
+}
+
+static void n64_format_double_exp(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	ryu_floating_decimal_64 decimal,
+	unsigned int precision,
+	n64_bool negative,
+	n64_bool trimTrailingZeros
+) {
+	char digitBuffer[17];
+	unsigned int digits = decimalLength17( decimal.mantissa ) - 1u;
+	if( digits > precision ) {
+		decimal.mantissa /= s_u64_pow10[digits - precision - 1u];
+		if( decimal.mantissa % 10ull >= 5ull ) {
+			decimal.mantissa += 10ull;
+			if( decimal.mantissa >= s_u64_pow10[precision + 2u] ) {
+				decimal.mantissa /= 10ull;
+				decimal.exponent++;
+			}
+		}
+		decimal.mantissa /= 10ull;
+		decimal.exponent += (int)(digits - precision);
+	}
+
+	if( trimTrailingZeros ) {
+		while( decimal.mantissa && !(decimal.mantissa % 10ull) ) {
+			decimal.mantissa /= 10ull;
+			decimal.exponent++;
+		}
+		precision = decimalLength17( decimal.mantissa ) - 1u;
+	}
+
+	digits = 0u;
+	while( decimal.mantissa ) {
+		digitBuffer[digits++] = '0' + (char)(decimal.mantissa % 10u);
+		decimal.mantissa /= 10u;
+	}
+
+	n64_float_exp_helper( buffer, format, precision, digitBuffer, decimal.exponent, digits, negative, trimTrailingZeros );
+}
+
+#define n64_format_fixed_generic( FTYPE, FBITS, FDECLEN, ITYPE, ISUFFIX ) \
+static void n64_format_##FTYPE##_fixed(\
+	n64_bprint_buffer *buffer,\
+	const n64_format_args *format,\
+	ryu_floating_decimal_##FBITS decimal,\
+	unsigned int precision,\
+	n64_bool negative,\
+	n64_bool trimTrailingZeros\
+) {\
+	const unsigned int digits = decimalLength##FDECLEN( decimal.mantissa );\
+	const int e = decimal.exponent + (int)digits - 1;\
+\
+	unsigned int integerDigits;\
+	unsigned int decimalDigits;\
+	unsigned int decimalLeadingZeros;\
+	ITYPE integerPart;\
+	ITYPE decimalPart;\
+\
+	if( e < 0 ) {\
+		integerDigits = 1u;\
+		decimalDigits = digits;\
+		decimalLeadingZeros = (unsigned int)(-e) - 1u;\
+		integerPart = 0##ISUFFIX;\
+		decimalPart = decimal.mantissa;\
+	} else if( decimal.exponent >= 0 ) {\
+		integerDigits = digits;\
+		decimalDigits = 0u;\
+		decimalLeadingZeros = 0u;\
+		integerPart = decimal.mantissa;\
+		decimalPart = 0##ISUFFIX;\
+	} else {\
+		integerDigits = (unsigned int)e + 1u;\
+		decimalDigits = digits - integerDigits;\
+		decimalLeadingZeros = 0u;\
+		integerPart = decimal.mantissa / s_u##FBITS##_pow10[decimalDigits];\
+		decimalPart = decimal.mantissa % s_u##FBITS##_pow10[decimalDigits];\
+	}\
+\
+	if( decimalLeadingZeros >= precision ) {\
+		decimalLeadingZeros = precision;\
+		decimalDigits = 0u;\
+		decimalPart = 0##ISUFFIX;\
+	} else if( decimalDigits + decimalLeadingZeros > precision ) {\
+		const unsigned int newDigits = precision - decimalLeadingZeros;\
+		decimalPart /= s_u##FBITS##_pow10[decimalDigits - newDigits];\
+		decimalDigits = newDigits;\
+	}\
+\
+	if( trimTrailingZeros ) {\
+		while( decimalPart && !(decimalPart % 10##ISUFFIX) ) {\
+			decimalPart /= 10##ISUFFIX;\
+		}\
+		decimalDigits = decimalPart ? decimalLength##FDECLEN( decimalPart ) : 0u;\
+		precision = decimalDigits + decimalLeadingZeros;\
+	}\
+\
+	unsigned int len = integerDigits + precision;\
+	if( decimal.exponent > 0 ) len += (unsigned int)decimal.exponent;\
+	if( negative || format->poschar != '\0' ) len++;\
+	if( precision || format->alternate ) len++;\
+\
+	if( !format->leftAligned && format->padchar == ' ' ) {\
+		for( unsigned int i = len; i < format->width; i++ ) {\
+			n64_putc( buffer, ' ' );\
+		}\
+	}\
+\
+	if( negative ) {\
+		n64_putc( buffer, '-' );\
+	} else if( format->poschar != '\0' ) {\
+		n64_putc( buffer, format->poschar );\
+	}\
+\
+	if( !format->leftAligned && format->padchar == '0' ) {\
+		for( unsigned int i = len; i < format->width; i++ ) {\
+			n64_putc( buffer, '0' );\
+		}\
+	}\
+\
+	char tempBuffer[FDECLEN];\
+	for( unsigned int i = 0u; i < integerDigits; i++ ) {\
+		tempBuffer[i] = '0' + (char)(integerPart % 10##ISUFFIX);\
+		integerPart /= 10##ISUFFIX;\
+	}\
+\
+	for( int i = (int)integerDigits - 1; i >= 0; i-- ) {\
+		n64_putc( buffer, tempBuffer[i] );\
+	}\
+\
+	for( int i = 0; i < decimal.exponent; i++ ) {\
+		n64_putc( buffer, '0' );\
+	}\
+\
+	if( precision || format->alternate ) {\
+		n64_putc( buffer, '.' );\
+	}\
+\
+	for( unsigned int i = 0u; i < decimalLeadingZeros; i++ ) {\
+		n64_putc( buffer, '0' );\
+	}\
+\
+	for( unsigned int i = 0u; i < decimalDigits; i++ ) {\
+		tempBuffer[i] = '0' + (char)(decimalPart % 10##ISUFFIX);\
+		decimalPart /= 10##ISUFFIX;\
+	}\
+\
+	for( int i = (int)decimalDigits - 1; i >= 0; i-- ) {\
+		n64_putc( buffer, tempBuffer[i] );\
+	}\
+\
+	if( !trimTrailingZeros ) {\
+		for( unsigned int i = decimalDigits + decimalLeadingZeros; i < precision; i++ ) {\
+			n64_putc( buffer, '0' );\
+		}\
+	}\
+\
+	if( format->leftAligned ) {\
+		for( unsigned int i = len; i < format->width; i++ ) {\
+			n64_putc( buffer, ' ' );\
+		}\
+	}\
+}
+
+n64_format_fixed_generic( float, 32, 9, unsigned int, u )
+n64_format_fixed_generic( double, 64, 17, unsigned long long, ull )
+
+static inline void n64_format_float(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	float value
+) {
+	unsigned int bits = n64_bit_cast_ftoi( value );
+	const n64_bool sign = (n64_bool)(bits >> 31);
+	const unsigned int e = (bits >> 23) & 0xffu;
+	const unsigned int m = bits & 0x7fffffu;
+
+	if( e == 0xffu ) {
+		n64_format_nan_or_inf( buffer, format, sign, m != 0u );
+		return;
+	}
+
+	const ryu_floating_decimal_32 decimal = ryu_float_to_decimal( m, e );
+	switch( format->type ) {
+		case 'e':
+			n64_format_float_exp( buffer, format, decimal, format->precision, sign, false );
+			break;
+		case 'f':
+			n64_format_float_fixed( buffer, format, decimal, format->precision, sign, false );
+			break;
+		default: {
+			const int P = format->precision ? (int)format->precision : 1;
+			const int exp = decimal.exponent + (int)decimalLength9( decimal.mantissa ) - 1;
+			if( P > exp && exp >= -4 ) {
+				n64_format_float_fixed( buffer, format, decimal, (unsigned int)(P - 1 - exp), sign, !format->alternate );
+			} else {
+				n64_format_float_exp( buffer, format, decimal, (unsigned int)(P - 1), sign, !format->alternate );
+			}
+			break;
+		}
+	}
+}
+
+static inline void n64_format_double(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	double value
+) {
+	unsigned long long bits = n64_bit_cast_dtol( value );
+	const n64_bool sign = (n64_bool)(bits >> 63);
+	const unsigned int e = (unsigned int)(bits >> 52) & 0x7ffu;
+	const unsigned long long m = bits & 0x000fffffffffffffull;
+
+	if( e == 0x7ffu ) {
+		n64_format_nan_or_inf( buffer, format, sign, m != 0ull );
+		return;
+	}
+
+	const ryu_floating_decimal_64 decimal = ryu_double_to_decimal( m, e );
+	switch( format->type ) {
+		case 'e':
+			n64_format_double_exp( buffer, format, decimal, format->precision, sign, false );
+			break;
+		case 'f':
+			n64_format_double_fixed( buffer, format, decimal, format->precision, sign, false );
+			break;
+		default: {
+			const int P = format->precision ? (int)format->precision : 1;
+			const int exp = decimal.exponent + (int)decimalLength17( decimal.mantissa ) - 1;
+			if( P > exp && exp >= -4 ) {
+				n64_format_double_fixed( buffer, format, decimal, (unsigned int)(P - 1 - exp), sign, !format->alternate );
+			} else {
+				n64_format_double_exp( buffer, format, decimal, (unsigned int)(P - 1), sign, !format->alternate );
+			}
+			break;
+		}
+
+	}
+}
+
+static inline void align_number(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	const char *prefix,
+	unsigned int prefixLen,
+	unsigned int minZeros,
+	const char *reverseDigits,
+	unsigned int numDigits
+) {
+	if( !format->leftAligned ) {
+		if( format->padchar == '0' && prefixLen + numDigits < format->width ) {
+			register const unsigned int Z = format->width - prefixLen - numDigits;
+			if( minZeros < Z ) minZeros = Z;
+		}
+
+		for( unsigned int i = prefixLen + minZeros + numDigits; i < format->width; i++ ) {
+			n64_putc( buffer, ' ' );
+		}
+	}
+
+	for( unsigned int i = 0; i < prefixLen; i++ ) {
+		n64_putc( buffer, prefix[i] );
+	}
+
+	for( unsigned int i = 0; i < minZeros; i++ ) {
+		n64_putc( buffer, '0' );
+	}
+
+	for( int i = (int)numDigits - 1; i >= 0; i-- ) {
+		n64_putc( buffer, reverseDigits[i] );
+	}
+
+	if( format->leftAligned ) {
+		for( unsigned int i = prefixLen + minZeros + numDigits; i < format->width; i++ ) {
+			n64_putc( buffer, ' ' );
+		}
+	}
+}
+
+static inline void n64_format_char(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	char value
+) {
+	if( !format->leftAligned ) {
+		for( unsigned int i = 1; i < format->width; i++ ) n64_putc( buffer, ' ' );
+	}
+
+	n64_putc( buffer, value );
+
+	if( format->leftAligned ) {
+		for( unsigned int i = 1; i < format->width; i++ ) n64_putc( buffer, ' ' );
+	}
+}
+
+static inline void n64_format_string(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	const char *value
+) {
+	if( !format->precision ) {
+		for( unsigned int i = 0; i < format->width; i++ ) n64_putc( buffer, ' ' );
+		return;
+	}
+
+	unsigned int i = 0;
+	if( format->leftAligned || !format->width ) {
+		while( value[i] != '\0' && i < format->precision ) n64_putc( buffer, value[i++] );
+		while( i++ < format->width ) n64_putc( buffer, ' ' );
+	} else {
+		register const unsigned int maxpad = (format->precision > format->width) ? format->precision : format->width;
+		while( value[i] != '\0' && i < maxpad ) i++;
+		while( i++ < format->width ) n64_putc( buffer, ' ' );
+		while( *value != '\0' ) n64_putc( buffer, *(value++) );
+	}
+}
+
+static void n64_format_udec32(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned int value
+) {
+	char digitBuffer[10];
+	unsigned int numDigits = 0u;
+	while( value ) {
+		digitBuffer[numDigits++] = '0' + (char)(value % 10u);
+		value /= 10u;
+	}
+
+	unsigned int minZeros = 0u;
+	if( format->precision > numDigits ) {
+		minZeros = format->precision - numDigits;
+	} else if( !numDigits && format->precision ) {
+		minZeros = 1u;
+	}
+
+	align_number( buffer, format, &format->poschar, format->poschar ? 1u : 0u, minZeros, digitBuffer, numDigits );
+}
+
+static void n64_format_udec64(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned long long value
+) {
+	char digitBuffer[20];
+	unsigned int numDigits = 0u;
+	while( value ) {
+		digitBuffer[numDigits++] = '0' + (char)(value % 10ull);
+		value /= 10ull;
+	}
+
+	unsigned int minZeros = 0u;
+	if( format->precision > numDigits ) {
+		minZeros = format->precision - numDigits;
+	} else if( !numDigits && format->precision ) {
+		minZeros = 1u;
+	}
+
+	align_number( buffer, format, &format->poschar, format->poschar ? 1u : 0u, minZeros, digitBuffer, numDigits );
+}
+
+static void n64_format_sdec32(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	int value
+) {
+	if( value >= 0 ) {
+		n64_format_udec32( buffer, format, (unsigned int)value );
+		return;
+	} else {
+		char digitBuffer[10];
+		unsigned int numDigits = 0u;
+		while( value ) {
+			digitBuffer[numDigits++] = '0' + (char)(-(value % 10));
+			value /= 10;
+		}
+
+		unsigned int minZeros = 0u;
+		if( format->precision > numDigits ) {
+			minZeros = format->precision - numDigits;
+		} else if( !numDigits && format->precision ) {
+			minZeros = 1u;
+		}
+
+		align_number( buffer, format, "-", 1u, minZeros, digitBuffer, numDigits );
+	}
+}
+
+static inline void n64_format_sdec64(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	long long value
+) {
+	if( value >= 0 ) {
+		n64_format_udec64( buffer, format, (unsigned long long)value );
+		return;
+	} else {
+		char digitBuffer[19];
+		unsigned int numDigits = 0u;
+		while( value ) {
+			digitBuffer[numDigits++] = '0' + (char)(-(value % 10ll));
+			value /= 10ll;
+		}
+
+		unsigned int minZeros = 0u;
+		if( format->precision > numDigits ) {
+			minZeros = format->precision - numDigits;
+		} else if( !numDigits && format->precision ) {
+			minZeros = 1u;
+		}
+
+		align_number( buffer, format, "-", 1u, minZeros, digitBuffer, numDigits );
+	}
+}
+
+static void n64_format_oct32(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned int value
+) {
+	char digitBuffer[11];
+	unsigned int numDigits = 0u;
+	while( value ) {
+		digitBuffer[numDigits++] = '0' + (char)(value & 0x7u);
+		value >>= 3;
+	}
+
+	unsigned int minZeros = 0u;
+	if( format->precision > numDigits ) {
+		minZeros = format->precision - numDigits;
+	} else if( format->alternate ) {
+		minZeros = 1u;
+	} else if( !numDigits && format->precision ) {
+		minZeros = 1u;
+	}
+
+	align_number( buffer, format, NULL, 0u, minZeros, digitBuffer, numDigits );
+}
+
+static inline void n64_format_oct64(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned long value
+) {
+	char digitBuffer[22];
+	unsigned int numDigits = 0u;
+	while( value ) {
+		digitBuffer[numDigits++] = '0' + (char)(value & 0x7u);
+		value >>= 3;
+	}
+
+	unsigned int minZeros = 0u;
+	if( format->precision > numDigits ) {
+		minZeros = format->precision - numDigits;
+	} else if( format->alternate ) {
+		minZeros = 1u;
+	} else if( !numDigits && format->precision ) {
+		minZeros = 1u;
+	}
+	
+	align_number( buffer, format, NULL, 0u, minZeros, digitBuffer, numDigits );
+}
+
+static void n64_format_hex32(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned int value
+) {
+	char digitBuffer[8];
+	unsigned int numDigits = 0u;
+
+	const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u;
+	const char hb = (format->capitalize) ? ('A' - (char)10) : ('a' - (char)10);
+	while( value ) {
+		register const unsigned int h = value & 0xFu;
+		digitBuffer[numDigits++] = (h < 10u) ? ('0' + (char)h) : (hb + (char)h);
+		value >>= 4;
+	}
+
+	const char *const prefix = (format->capitalize) ? "0X" : "0x";
+	const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u;
+
+	align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits );
+}
+
+static void n64_format_hex64(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned long long value
+) {
+	char digitBuffer[16];
+	unsigned int numDigits = 0u;
+
+	const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u;
+	const char hb = (format->capitalize) ? ('A' - (char)10) : ('a' - (char)10);
+	while( value ) {
+		register const unsigned int h = value & 0xFu;
+		digitBuffer[numDigits++] = (h < 10u) ? ('0' + (char)h) : (hb + (char)h);
+		value >>= 4;
+	}
+
+	const char *const prefix = (format->capitalize) ? "0X" : "0x";
+	const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u;
+
+	align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits );
+}
+
+static void n64_format_bin32(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned int value
+) {
+	char digitBuffer[32];
+	unsigned int numDigits = 0u;
+
+	const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u;
+	while( value ) {
+		digitBuffer[numDigits++] = (value & 1u) ? '1' : '0';
+		value >>= 1;
+	}
+
+	const char *const prefix = (format->capitalize) ? "0B" : "0b";
+	const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u;
+	
+	align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits );
+}
+
+static void n64_format_bin64(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	unsigned long long value
+) {
+	char digitBuffer[64];
+	unsigned int numDigits = 0u;
+
+	const unsigned int prefixLen = (format->alternate && value) ? 2u : 0u;
+	while( value ) {
+		digitBuffer[numDigits++] = (value & 1ull) ? '1' : '0';
+		value >>= 1;
+	}
+
+	const char *const prefix = (format->capitalize) ? "0B" : "0b";
+	const unsigned int minZeros = (format->precision > numDigits) ? (format->precision - numDigits) : 0u;
+
+	align_number( buffer, format, prefix, prefixLen, minZeros, digitBuffer, numDigits );
+}
+
+__attribute__((always_inline))
+static inline void n64_format_hexreal_helper(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	const char *mbuff,
+	unsigned int mdigits,
+	char firstDigit,
+	int exp,
+	n64_bool sign,
+	n64_bool nonzeroMantissa
+) {
+	unsigned int precision = format->precision;
+	if( precision == 0xFFFFFFFFu ) {
+		precision = mdigits;
+	} else if( mdigits > precision ) {
+		mdigits = precision;
+	}
+
+	unsigned int len = precision + 6u;
+	if( sign || format->poschar != '\0' ) len++;
+	if( format->alternate || nonzeroMantissa ) len++;
+	if( exp >= 1000 || exp <= -1000 ) { len += 3; }
+	else if( exp >= 100 || exp <= -100 ) { len += 2; }
+	else if( exp >= 10 || exp <= -10 ) len++;
+
+	if( !format->leftAligned && format->padchar == ' ' ) {
+		for( unsigned int i = len; i < format->width; i++ ) n64_putc( buffer, ' ' );
+	}
+
+	if( sign ) {
+		n64_putc( buffer, '-' );
+	} else if( format->poschar != '\0' ) {
+		n64_putc( buffer, format->poschar );
+	}
+
+	n64_putc( buffer, '0' );
+	n64_putc( buffer, format->capitalize ? 'X' : 'x' );
+
+	if( !format->leftAligned && format->padchar == '0' ) {
+		for( unsigned int i = len; i < format->width; i++ ) n64_putc( buffer, '0' );
+	}
+
+	n64_putc( buffer, firstDigit );
+
+	if( format->alternate || nonzeroMantissa ) {
+		n64_putc( buffer, '.' );
+	}
+
+	for( unsigned int i = 0u; i < mdigits; i++ ) {
+		n64_putc( buffer, mbuff[i] );
+	}
+
+	for( unsigned int i = mdigits; i < precision; i++ ) {
+		n64_putc( buffer, '0' );
+	}
+
+	n64_putc( buffer, format->capitalize ? 'P' : 'p' );
+	if( exp < 0 ) {
+		n64_putc( buffer, '-' );
+		exp = -exp;
+	} else {
+		n64_putc( buffer, '+' );
+	}
+
+	char ebuff[4];
+	int elen = 0u;
+	do {
+		ebuff[elen++] = '0' + (char)(exp % 10);
+		exp /= 10;
+	} while( exp );
+
+	for( int i = elen - 1; i >= 0; i-- ) {
+		n64_putc( buffer, ebuff[i] );
+	}
+
+	if( format->leftAligned ) {
+		for( unsigned int i = len; i < format->width; i++ ) n64_putc( buffer, ' ' );
+	}
+}
+
+static inline void n64_format_hexfloat(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	float value
+) {
+	const unsigned int bits = n64_bit_cast_ftoi( value );
+
+	const n64_bool sign = (n64_bool)(bits >> 31);
+	int exp = ((int)(bits >> 23) & 0xFF) - 127;
+	unsigned int mantissa = (bits & 0x7FFFFF) << 1;
+	const n64_bool nonzeroMantissa = (mantissa != 0u);
+
+	if( exp == 128 ) {
+		// NaN or Infinity
+		n64_format_nan_or_inf( buffer, format, sign, nonzeroMantissa );
+		return;
+	}
+
+	char firstDigit = (nonzeroMantissa || exp != -127) ? '1' : '0';
+	if( mantissa && format->precision < 6u ) {
+		// rounding
+		mantissa += 0x8u << (20u - (format->precision << 2));
+		if( mantissa >> 24 ) {
+			firstDigit++;
+			mantissa = 0u;
+		}
+	}
+
+	if( exp == -127 ) {
+		if( nonzeroMantissa ) {
+			// subnormal
+			exp++;
+			firstDigit--;
+		} else {
+			// zero
+			exp = 0;
+			firstDigit = '0';
+		}
+	}
+
+	unsigned int mdigits;
+	char mbuff[6];
+
+	if( mantissa ) {
+		mdigits = 6u;
+		while( !(mantissa & 0xFu) ) {
+			mdigits--;
+			mantissa >>= 4;
+		}
+
+		const char *const hexits = format->capitalize ? "0123456789ABCDEF" : "0123456789abcdef";
+		for( int i = (int)mdigits - 1; i >= 0; i-- ) {
+			mbuff[i] = hexits[mantissa & 0xFu];
+			mantissa >>= 4;
+		}
+	} else {
+		mdigits = 0u;
+	}
+
+	n64_format_hexreal_helper( buffer, format, mbuff, mdigits, firstDigit, exp, sign, nonzeroMantissa );
+}
+
+static inline void n64_format_hexdouble(
+	n64_bprint_buffer *buffer,
+	const n64_format_args *format,
+	double value
+) {
+	const unsigned long long bits = n64_bit_cast_dtol( value );
+
+	const n64_bool sign = (n64_bool)(bits >> 63);
+	int exp = ((int)(bits >> 52) & 0x7FF) - 1023;
+	unsigned long long mantissa = bits & 0x000FFFFFFFFFFFFFull;
+	const n64_bool nonzeroMantissa = (mantissa != 0ull);
+
+	if( exp == 1024 ) {
+		// NaN or Infinity
+		n64_format_nan_or_inf( buffer, format, sign, nonzeroMantissa );
+		return;
+	}
+
+	char firstDigit = (nonzeroMantissa || exp != -1023) ? '1' : '0';
+	if( mantissa && format->precision < 13u ) {
+		// rounding
+		mantissa += 0x8ull << (48u - (format->precision << 2));
+		if( mantissa >> 52 ) {
+			firstDigit++;
+			mantissa = 0ull;
+		}
+	}
+
+	if( exp == -1023 ) {
+		if( nonzeroMantissa ) {
+			// subnormal
+			exp++;
+			firstDigit--;
+		} else {
+			// zero
+			exp = 0;
+			firstDigit = '0';
+		}
+	}
+
+	unsigned int mdigits;
+	char mbuff[13];
+
+	if( mantissa ) {
+		mdigits = 13u;
+		while( !(mantissa & 0xFull) ) {
+			mdigits--;
+			mantissa >>= 4;
+		}
+
+		const char *const hexits = format->capitalize ? "0123456789ABCDEF" : "0123456789abcdef";
+		for( int i = (int)mdigits - 1; i >= 0; i-- ) {
+			mbuff[i] = hexits[mantissa & 0xFull];
+			mantissa >>= 4;
+		}
+	} else {
+		mdigits = 0u;
+	}
+
+	n64_format_hexreal_helper( buffer, format, mbuff, mdigits, firstDigit, exp, sign, nonzeroMantissa );
+}
+
+static inline void n64_format_pointer(
+	n64_bprint_buffer *buffer,
+	n64_format_args *format,
+	const void *value
+) {
+	if( value == NULL ) {
+		if( !format->leftAligned ) {
+			for( unsigned int i = 5; i < format->width; i++ ) n64_putc( buffer, ' ' );
+		}
+
+		n64_putc( buffer, '(' );
+		n64_putc( buffer, 'n' );
+		n64_putc( buffer, 'i' );
+		n64_putc( buffer, 'l' );
+		n64_putc( buffer, ')' );
+
+		if( format->leftAligned ) {
+			for( unsigned int i = 5; i < format->width; i++ ) n64_putc( buffer, ' ' );
+		}
+
+		return;
+	}
+
+	format->alternate = true;
+	format->capitalize = false;
+	n64_format_hex32( buffer, format, (unsigned int)value );
+}
diff --git a/lib/n64-libc/n64-stdio-ryu.c.inc b/lib/n64-libc/n64-stdio-ryu.c.inc
new file mode 100644
index 00000000..eb94b782
--- /dev/null
+++ b/lib/n64-libc/n64-stdio-ryu.c.inc
@@ -0,0 +1,910 @@
+// Copyright 2018 Ulf Adams
+//
+// The contents of this file may be used under the terms of the Apache License,
+// Version 2.0.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     http://www.apache.org/licenses/LICENSE-2.0)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+// NOTICE:
+// This file was adapted from the original Apache licensed code, whose sources
+// may be found at:
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/d2s.c
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/f2s.c
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/d2s_small_table.h
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/f2s_full_table.h
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/d2s_intrinsics.h
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/f2s_intrinsics.h
+// https://github.com/ulfjack/ryu/blob/1264a946ba66eab320e927bfd2362e0c8580c42f/ryu/common.h
+//
+// The files have been merged into this single file, all functions and debug
+// code that were not necessary for this library were stripped out, and all
+// explicit integer length types were swapped out for C keywords to remove the
+// dependency on stdint.h.
+// The f2d and d2d functions were also renamed to ryu_float_to_decimal and
+// ryu_double_to_decimal for the sake of clarity.
+
+#include "n64-stdbool.h"
+
+#define DOUBLE_MANTISSA_BITS 52
+#define DOUBLE_EXPONENT_BITS 11
+#define DOUBLE_BIAS 1023
+
+#define FLOAT_MANTISSA_BITS 23
+#define FLOAT_EXPONENT_BITS 8
+#define FLOAT_BIAS 127
+
+#define FLOAT_POW5_INV_BITCOUNT 59
+#define FLOAT_POW5_BITCOUNT 61
+
+#define DOUBLE_POW5_INV_BITCOUNT 125
+#define DOUBLE_POW5_BITCOUNT 125
+
+#define POW5_TABLE_SIZE 26
+
+#define FLOAT_POW5_INV_BITCOUNT 59
+#define FLOAT_POW5_BITCOUNT 61
+
+static unsigned long long FLOAT_POW5_INV_SPLIT[55] = {
+  576460752303423489ull,  461168601842738791ull,  368934881474191033ull,  295147905179352826ull,
+  472236648286964522ull,  377789318629571618ull,  302231454903657294ull,  483570327845851670ull,
+  386856262276681336ull,  309485009821345069ull,  495176015714152110ull,  396140812571321688ull,
+  316912650057057351ull,  507060240091291761ull,  405648192073033409ull,  324518553658426727ull,
+  519229685853482763ull,  415383748682786211ull,  332306998946228969ull,  531691198313966350ull,
+  425352958651173080ull,  340282366920938464ull,  544451787073501542ull,  435561429658801234ull,
+  348449143727040987ull,  557518629963265579ull,  446014903970612463ull,  356811923176489971ull,
+  570899077082383953ull,  456719261665907162ull,  365375409332725730ull,  292300327466180584ull,
+  467680523945888934ull,  374144419156711148ull,  299315535325368918ull,  478904856520590269ull,
+  383123885216472215ull,  306499108173177772ull,  490398573077084435ull,  392318858461667548ull,
+  313855086769334039ull,  502168138830934462ull,  401734511064747569ull,  321387608851798056ull,
+  514220174162876889ull,  411376139330301511ull,  329100911464241209ull,  526561458342785934ull,
+  421249166674228747ull,  336999333339382998ull,  539198933343012796ull,  431359146674410237ull,
+  345087317339528190ull,  552139707743245103ull,  441711766194596083ull
+};
+static const unsigned long long FLOAT_POW5_SPLIT[47] = {
+  1152921504606846976ull, 1441151880758558720ull, 1801439850948198400ull, 2251799813685248000ull,
+  1407374883553280000ull, 1759218604441600000ull, 2199023255552000000ull, 1374389534720000000ull,
+  1717986918400000000ull, 2147483648000000000ull, 1342177280000000000ull, 1677721600000000000ull,
+  2097152000000000000ull, 1310720000000000000ull, 1638400000000000000ull, 2048000000000000000ull,
+  1280000000000000000ull, 1600000000000000000ull, 2000000000000000000ull, 1250000000000000000ull,
+  1562500000000000000ull, 1953125000000000000ull, 1220703125000000000ull, 1525878906250000000ull,
+  1907348632812500000ull, 1192092895507812500ull, 1490116119384765625ull, 1862645149230957031ull,
+  1164153218269348144ull, 1455191522836685180ull, 1818989403545856475ull, 2273736754432320594ull,
+  1421085471520200371ull, 1776356839400250464ull, 2220446049250313080ull, 1387778780781445675ull,
+  1734723475976807094ull, 2168404344971008868ull, 1355252715606880542ull, 1694065894508600678ull,
+  2117582368135750847ull, 1323488980084844279ull, 1654361225106055349ull, 2067951531382569187ull,
+  1292469707114105741ull, 1615587133892632177ull, 2019483917365790221ull
+};
+
+static const unsigned long long DOUBLE_POW5_INV_SPLIT2[15][2] = {
+  {                    1ull, 2305843009213693952ull },
+  {  5955668970331000884ull, 1784059615882449851ull },
+  {  8982663654677661702ull, 1380349269358112757ull },
+  {  7286864317269821294ull, 2135987035920910082ull },
+  {  7005857020398200553ull, 1652639921975621497ull },
+  { 17965325103354776697ull, 1278668206209430417ull },
+  {  8928596168509315048ull, 1978643211784836272ull },
+  { 10075671573058298858ull, 1530901034580419511ull },
+  {   597001226353042382ull, 1184477304306571148ull },
+  {  1527430471115325346ull, 1832889850782397517ull },
+  { 12533209867169019542ull, 1418129833677084982ull },
+  {  5577825024675947042ull, 2194449627517475473ull },
+  { 11006974540203867551ull, 1697873161311732311ull },
+  { 10313493231639821582ull, 1313665730009899186ull },
+  { 12701016819766672773ull, 2032799256770390445ull }
+};
+static const unsigned int POW5_INV_OFFSETS[19] = {
+  0x54544554u, 0x04055545u, 0x10041000u, 0x00400414u, 0x40010000u, 0x41155555u,
+  0x00000454u, 0x00010044u, 0x40000000u, 0x44000041u, 0x50454450u, 0x55550054u,
+  0x51655554u, 0x40004000u, 0x01000001u, 0x00010500u, 0x51515411u, 0x05555554u,
+  0x00000000u
+};
+
+static const unsigned long long DOUBLE_POW5_SPLIT2[13][2] = {
+  {                    0ull, 1152921504606846976ull },
+  {                    0ull, 1490116119384765625ull },
+  {  1032610780636961552ull, 1925929944387235853ull },
+  {  7910200175544436838ull, 1244603055572228341ull },
+  { 16941905809032713930ull, 1608611746708759036ull },
+  { 13024893955298202172ull, 2079081953128979843ull },
+  {  6607496772837067824ull, 1343575221513417750ull },
+  { 17332926989895652603ull, 1736530273035216783ull },
+  { 13037379183483547984ull, 2244412773384604712ull },
+  {  1605989338741628675ull, 1450417759929778918ull },
+  {  9630225068416591280ull, 1874621017369538693ull },
+  {   665883850346957067ull, 1211445438634777304ull },
+  { 14931890668723713708ull, 1565756531257009982ull }
+};
+static const unsigned int POW5_OFFSETS[21] = {
+  0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x40000000u, 0x59695995u,
+  0x55545555u, 0x56555515u, 0x41150504u, 0x40555410u, 0x44555145u, 0x44504540u,
+  0x45555550u, 0x40004000u, 0x96440440u, 0x55565565u, 0x54454045u, 0x40154151u,
+  0x55559155u, 0x51405555u, 0x00000105u
+};
+
+
+static const unsigned long long DOUBLE_POW5_TABLE[POW5_TABLE_SIZE] = {
+  1ull, 5ull, 25ull, 125ull, 625ull, 3125ull, 15625ull, 78125ull, 390625ull,
+  1953125ull, 9765625ull, 48828125ull, 244140625ull, 1220703125ull, 6103515625ull,
+  30517578125ull, 152587890625ull, 762939453125ull, 3814697265625ull,
+  19073486328125ull, 95367431640625ull, 476837158203125ull,
+  2384185791015625ull, 11920928955078125ull, 59604644775390625ull,
+  298023223876953125ull //, 1490116119384765625ull
+};
+
+// Returns e == 0 ? 1 : [log_2(5^e)]; requires 0 <= e <= 3528.
+static inline int log2pow5(const int e) {
+  // This approximation works up to the point that the multiplication overflows at e = 3529.
+  // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
+  // than 2^9297.
+  return (int) ((((unsigned int) e) * 1217359u) >> 19);
+}
+
+// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
+static inline int pow5bits(const int e) {
+  // This approximation works up to the point that the multiplication overflows at e = 3529.
+  // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
+  // than 2^9297.
+  return (int) (((((unsigned int) e) * 1217359u) >> 19) + 1u);
+}
+
+// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
+static inline int ceil_log2pow5(const int e) {
+  return log2pow5(e) + 1;
+}
+
+// Returns floor(log_10(2^e)); requires 0 <= e <= 1650.
+static inline unsigned int log10Pow2(const int e) {
+  // The first value this approximation fails for is 2^1651 which is just greater than 10^297.
+  return (((unsigned int) e) * 78913u) >> 18;
+}
+
+// Returns floor(log_10(5^e)); requires 0 <= e <= 2620.
+static inline unsigned int log10Pow5(const int e) {
+  // The first value this approximation fails for is 5^2621 which is just greater than 10^1832.
+  return (((unsigned int) e) * 732923u) >> 20;
+}
+
+// Returns the number of decimal digits in v, which must not contain more than 9 digits.
+static inline unsigned int decimalLength9(const unsigned int v) {
+  // Function precondition: v is not a 10-digit number.
+  // (f2s: 9 digits are sufficient for round-tripping.)
+  // (d2fixed: We print 9-digit blocks.)
+  if (v >= 100000000u) { return 9u; }
+  if (v >= 10000000u) { return 8u; }
+  if (v >= 1000000u) { return 7u; }
+  if (v >= 100000u) { return 6u; }
+  if (v >= 10000u) { return 5u; }
+  if (v >= 1000u) { return 4u; }
+  if (v >= 100u) { return 3u; }
+  if (v >= 10u) { return 2u; }
+  return 1;
+}
+
+static inline unsigned int decimalLength17(const unsigned long long v) {
+  // This is slightly faster than a loop.
+  // The average output length is 16.38 digits, so we check high-to-low.
+  // Function precondition: v is not an 18, 19, or 20-digit number.
+  // (17 digits are sufficient for round-tripping.)
+  if (v >= 10000000000000000ull) { return 17u; }
+  if (v >= 1000000000000000ull) { return 16u; }
+  if (v >= 100000000000000ull) { return 15u; }
+  if (v >= 10000000000000ull) { return 14u; }
+  if (v >= 1000000000000ull) { return 13u; }
+  if (v >= 100000000000ull) { return 12u; }
+  if (v >= 10000000000ull) { return 11u; }
+  if (v >= 1000000000ull) { return 10u; }
+  if (v >= 100000000ull) { return 9u; }
+  if (v >= 10000000ull) { return 8u; }
+  if (v >= 1000000ull) { return 7u; }
+  if (v >= 100000ull) { return 6u; }
+  if (v >= 10000ull) { return 5u; }
+  if (v >= 1000ull) { return 4u; }
+  if (v >= 100ull) { return 3u; }
+  if (v >= 10ull) { return 2u; }
+  return 1;
+}
+
+static inline unsigned long long umul128(const unsigned long long a, const unsigned long long b, unsigned long long* const productHi) {
+  const unsigned int aLo = (unsigned int)a;
+  const unsigned int aHi = (unsigned int)(a >> 32);
+  const unsigned int bLo = (unsigned int)b;
+  const unsigned int bHi = (unsigned int)(b >> 32);
+
+  const unsigned long long b00 = (unsigned long long)aLo * bLo;
+  const unsigned long long b01 = (unsigned long long)aLo * bHi;
+  const unsigned long long b10 = (unsigned long long)aHi * bLo;
+  const unsigned long long b11 = (unsigned long long)aHi * bHi;
+
+  const unsigned int b00Lo = (unsigned int)b00;
+  const unsigned int b00Hi = (unsigned int)(b00 >> 32);
+
+  const unsigned long long mid1 = b10 + b00Hi;
+  const unsigned int mid1Lo = (unsigned int)(mid1);
+  const unsigned int mid1Hi = (unsigned int)(mid1 >> 32);
+
+  const unsigned long long mid2 = b01 + mid1Lo;
+  const unsigned int mid2Lo = (unsigned int)(mid2);
+  const unsigned int mid2Hi = (unsigned int)(mid2 >> 32);
+
+  const unsigned long long pHi = b11 + mid1Hi + mid2Hi;
+  const unsigned long long pLo = ((unsigned long long)mid2Lo << 32) | b00Lo;
+
+  *productHi = pHi;
+  return pLo;
+}
+
+static inline unsigned long long shiftright128(const unsigned long long lo, const unsigned long long hi, const unsigned int dist) {
+  // We don't need to handle the case dist >= 64 here (see above).
+  return (hi << (64u - dist)) | (lo >> dist);
+}
+
+// Computes 5^i in the form required by Ryu, and stores it in the given pointer.
+static inline void double_computePow5(const unsigned int i, unsigned long long* const result) {
+  const unsigned int base = i / POW5_TABLE_SIZE;
+  const unsigned int base2 = base * POW5_TABLE_SIZE;
+  const unsigned int offset = i - base2;
+  const unsigned long long* const mul = DOUBLE_POW5_SPLIT2[base];
+  if (offset == 0) {
+    result[0] = mul[0];
+    result[1] = mul[1];
+    return;
+  }
+  const unsigned long long m = DOUBLE_POW5_TABLE[offset];
+  unsigned long long high1;
+  const unsigned long long low1 = umul128(m, mul[1], &high1);
+  unsigned long long high0;
+  const unsigned long long low0 = umul128(m, mul[0], &high0);
+  const unsigned long long sum = high0 + low1;
+  if (sum < high0) {
+    ++high1; // overflow into high1
+  }
+  // high1 | sum | low0
+  const unsigned int delta = pow5bits(i) - pow5bits(base2);
+  result[0] = shiftright128(low0, sum, delta) + ((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
+  result[1] = shiftright128(sum, high1, delta);
+}
+
+// Computes 5^-i in the form required by Ryu, and stores it in the given pointer.
+static inline void double_computeInvPow5(const unsigned int i, unsigned long long* const result) {
+  const unsigned int base = (i + POW5_TABLE_SIZE - 1) / POW5_TABLE_SIZE;
+  const unsigned int base2 = base * POW5_TABLE_SIZE;
+  const unsigned int offset = base2 - i;
+  const unsigned long long* const mul = DOUBLE_POW5_INV_SPLIT2[base]; // 1/5^base2
+  if (offset == 0) {
+    result[0] = mul[0];
+    result[1] = mul[1];
+    return;
+  }
+  const unsigned long long m = DOUBLE_POW5_TABLE[offset];
+  unsigned long long high1;
+  const unsigned long long low1 = umul128(m, mul[1], &high1);
+  unsigned long long high0;
+  const unsigned long long low0 = umul128(m, mul[0] - 1, &high0);
+  const unsigned long long sum = high0 + low1;
+  if (sum < high0) {
+    ++high1; // overflow into high1
+  }
+  // high1 | sum | low0
+  const unsigned int delta = pow5bits(base2) - pow5bits(i);
+  result[0] = shiftright128(low0, sum, delta) + 1 + ((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3u);
+  result[1] = shiftright128(sum, high1, delta);
+}
+
+#ifdef _ABIO32
+
+// Returns the high 64 bits of the 128-bit product of a and b.
+__attribute__((always_inline))
+static inline unsigned long long umulh(const unsigned long long a, const unsigned long long b) {
+  // Reuse the umul128 implementation.
+  // Optimizers will likely eliminate the instructions used to compute the
+  // low part of the product.
+  unsigned long long hi;
+  umul128(a, b, &hi);
+  return hi;
+}
+
+// On 32-bit platforms, compilers typically generate calls to library
+// functions for 64-bit divisions, even if the divisor is a constant.
+//
+// E.g.:
+// https://bugs.llvm.org/show_bug.cgi?id=37932
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443
+//
+// The functions here perform division-by-constant using multiplications
+// in the same way as 64-bit compilers would do.
+//
+// NB:
+// The multipliers and shift values are the ones generated by clang x64
+// for expressions like x/5, x/10, etc.
+__attribute__((always_inline))
+static inline unsigned long long div5(const unsigned long long x) {
+  return umulh(x, 0xCCCCCCCCCCCCCCCDull) >> 2;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div10(const unsigned long long x) {
+  return umulh(x, 0xCCCCCCCCCCCCCCCDull) >> 3;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div100(const unsigned long long x) {
+  return umulh(x >> 2, 0x28F5C28F5C28F5C3ull) >> 2;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div1e8(const unsigned long long x) {
+  return umulh(x, 0xABCC77118461CEFDull) >> 26;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div1e9(const unsigned long long x) {
+  return umulh(x >> 9, 0x44B82FA09B5A53ull) >> 11;
+}
+
+__attribute__((always_inline))
+static inline unsigned int mod1e9(const unsigned long long x) {
+  // Avoid 64-bit math as much as possible.
+  // Returning (uint32_t) (x - 1000000000 * div1e9(x)) would
+  // perform 32x64-bit multiplication and 64-bit subtraction.
+  // x and 1000000000 * div1e9(x) are guaranteed to differ by
+  // less than 10^9, so their highest 32 bits must be identical,
+  // so we can truncate both sides to uint32_t before subtracting.
+  // We can also simplify (uint32_t) (1000000000 * div1e9(x)).
+  // We can truncate before multiplying instead of after, as multiplying
+  // the highest 32 bits of div1e9(x) can't affect the lowest 32 bits.
+  return ((unsigned int) x) - 1000000000u * ((unsigned int) div1e9(x));
+}
+
+#else
+
+__attribute__((always_inline))
+static inline unsigned long long div5(const unsigned long long x) {
+  return x / 5ull;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div10(const unsigned long long x) {
+  return x / 10ull;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div100(const unsigned long long x) {
+  return x / 100ull;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div1e8(const unsigned long long x) {
+  return x / 100000000ull;
+}
+
+__attribute__((always_inline))
+static inline unsigned long long div1e9(const unsigned long long x) {
+  return x / 1000000000ull;
+}
+
+__attribute__((always_inline))
+static inline unsigned int mod1e9(const unsigned long long x) {
+  return (unsigned int) (x - 1000000000ull * div1e9(x));
+}
+
+#endif
+
+static inline unsigned int pow5Factor(unsigned long long value) {
+  const unsigned long long m_inv_5 = 14757395258967641293ull; // 5 * m_inv_5 = 1 (mod 2^64)
+  const unsigned long long n_div_5 = 3689348814741910323ull;  // #{ n | n = 0 (mod 2^64) } = 2^64 / 5
+  unsigned int count = 0u;
+  for (;;) {
+    value *= m_inv_5;
+    if (value > n_div_5)
+      break;
+    ++count;
+  }
+  return count;
+}
+
+// Returns true if value is divisible by 5^p.
+static inline n64_bool multipleOfPowerOf5(const unsigned long long value, const unsigned int p) {
+  // I tried a case distinction on p, but there was no performance difference.
+  return pow5Factor(value) >= p;
+}
+
+// Returns true if value is divisible by 2^p.
+static inline n64_bool multipleOfPowerOf2(const unsigned long long value, const unsigned int p) {
+  // __builtin_ctzll doesn't appear to be faster here.
+  return (value & ((1ull << p) - 1)) == 0;
+}
+
+// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
+// Multiplication:
+//   The 64-bit factor is variable and passed in, the 128-bit factor comes
+//   from a lookup table. We know that the 64-bit factor only has 55
+//   significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
+//   factor only has 124 significant bits (i.e., the 4 topmost bits are
+//   zeros).
+// Shift:
+//   In principle, the multiplication result requires 55 + 124 = 179 bits to
+//   represent. However, we then shift this value to the right by j, which is
+//   at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
+//   bits. This means that we only need the topmost 64 significant bits of
+//   the 64x128-bit multiplication.
+//
+// There are several ways to do this:
+// 1. Best case: the compiler exposes a 128-bit type.
+//    We perform two 64x64-bit multiplications, add the higher 64 bits of the
+//    lower result to the higher result, and shift by j - 64 bits.
+//
+//    We explicitly cast from 64-bit to 128-bit, so the compiler can tell
+//    that these are only 64-bit inputs, and can map these to the best
+//    possible sequence of assembly instructions.
+//    x64 machines happen to have matching assembly instructions for
+//    64x64-bit multiplications and 128-bit shifts.
+//
+// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
+//    instructions mentioned in 1.
+//
+// 3. We only have 64x64 bit instructions that return the lower 64 bits of
+//    the result, i.e., we have to use plain C.
+//    Our inputs are less than the full width, so we have three options:
+//    a. Ignore this fact and just implement the intrinsics manually.
+//    b. Split both into 31-bit pieces, which guarantees no internal overflow,
+//       but requires extra work upfront (unless we change the lookup table).
+//    c. Split only the first factor into 31-bit pieces, which also guarantees
+//       no internal overflow, but requires extra work since the intermediate
+//       results are not perfectly aligned.
+static inline unsigned long long mulShift64(const unsigned long long m, const unsigned* const mul, const int j) {
+  // m is maximum 55 bits
+  unsigned long long high1;                                   // 128
+  const unsigned long long low1 = umul128(m, mul[1], &high1); // 64
+  unsigned long long high0;                                   // 64
+  umul128(m, mul[0], &high0);                       // 0
+  const unsigned long long sum = high0 + low1;
+  if (sum < high0) {
+    ++high1; // overflow into high1
+  }
+  return shiftright128(sum, high1, j - 64);
+}
+
+// This is faster if we don't have a 64x64->128-bit multiplication.
+static inline unsigned long long mulShiftAll64(unsigned long long m, const unsigned long long* const mul, const int j,
+  unsigned long long* const vp, unsigned long long* const vm, const unsigned int mmShift) {
+  m <<= 1;
+  // m is maximum 55 bits
+  unsigned long long tmp;
+  const unsigned long long lo = umul128(m, mul[0], &tmp);
+  unsigned long long hi;
+  const unsigned long long mid = tmp + umul128(m, mul[1], &hi);
+  hi += mid < tmp; // overflow into hi
+
+  const unsigned long long lo2 = lo + mul[0];
+  const unsigned long long mid2 = mid + mul[1] + (lo2 < lo);
+  const unsigned long long hi2 = hi + (mid2 < mid);
+  *vp = shiftright128(mid2, hi2, (unsigned int) (j - 64 - 1));
+
+  if (mmShift == 1) {
+    const unsigned long long lo3 = lo - mul[0];
+    const unsigned long long mid3 = mid - mul[1] - (lo3 > lo);
+    const unsigned long long hi3 = hi - (mid3 > mid);
+    *vm = shiftright128(mid3, hi3, (unsigned int) (j - 64 - 1));
+  } else {
+    const unsigned long long lo3 = lo + lo;
+    const unsigned long long mid3 = mid + mid + (lo3 < lo);
+    const unsigned long long hi3 = hi + hi + (mid3 < mid);
+    const unsigned long long lo4 = lo3 - mul[0];
+    const unsigned long long mid4 = mid3 - mul[1] - (lo4 > lo3);
+    const unsigned long long hi4 = hi3 - (mid4 > mid3);
+    *vm = shiftright128(mid4, hi4, (unsigned int) (j - 64));
+  }
+
+  return shiftright128(mid, hi, (unsigned int) (j - 64 - 1));
+}
+
+static inline unsigned int pow5factor_32(unsigned int value) {
+  unsigned int count = 0;
+  for (;;) {
+    const unsigned int q = value / 5;
+    const unsigned int r = value % 5;
+    if (r != 0) {
+      break;
+    }
+    value = q;
+    ++count;
+  }
+  return count;
+}
+
+// Returns true if value is divisible by 5^p.
+static inline n64_bool multipleOfPowerOf5_32(const unsigned int value, const unsigned int p) {
+  return pow5factor_32(value) >= p;
+}
+
+// Returns true if value is divisible by 2^p.
+static inline n64_bool multipleOfPowerOf2_32(const unsigned int value, const unsigned int p) {
+  // __builtin_ctz doesn't appear to be faster here.
+  return (value & ((1u << p) - 1)) == 0;
+}
+
+// It seems to be slightly faster to avoid uint128_t here, although the
+// generated code for uint128_t looks slightly nicer.
+static inline unsigned int mulShift32(const unsigned int m, const unsigned long long factor, const int shift) {
+
+  // The casts here help MSVC to avoid calls to the __allmul library
+  // function.
+  const unsigned int factorLo = (unsigned int)(factor);
+  const unsigned int factorHi = (unsigned int)(factor >> 32);
+  const unsigned long long bits0 = (unsigned long long)m * factorLo;
+  const unsigned long long bits1 = (unsigned long long)m * factorHi;
+
+#ifdef _ABIO32
+  // On 32-bit platforms we can avoid a 64-bit shift-right since we only
+  // need the upper 32 bits of the result and the shift value is > 32.
+  const unsigned int bits0Hi = (unsigned int)(bits0 >> 32);
+  unsigned int bits1Lo = (unsigned int)(bits1);
+  unsigned int bits1Hi = (unsigned int)(bits1 >> 32);
+  bits1Lo += bits0Hi;
+  bits1Hi += (bits1Lo < bits0Hi);
+  if (shift >= 64) {
+    // s2f can call this with a shift value >= 64, which we have to handle.
+    // This could now be slower than the !defined(RYU_32_BIT_PLATFORM) case.
+    return (unsigned int)(bits1Hi >> (shift - 64));
+  } else {
+    const int s = shift - 32;
+    return (bits1Hi << (32 - s)) | (bits1Lo >> s);
+  }
+#else
+  const unsigned long long sum = (bits0 >> 32) + bits1;
+  const unsigned long long shiftedSum = sum >> (shift - 32);
+  return (unsigned int) shiftedSum;
+#endif
+}
+
+static inline unsigned int mulPow5InvDivPow2(const unsigned int m, const unsigned int q, const int j) {
+  return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
+}
+
+static inline unsigned int mulPow5divPow2(const unsigned int m, const unsigned int i, const int j) {
+  return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
+}
+
+// A floating decimal representing m * 10^e.
+typedef struct {
+  unsigned int mantissa;
+  // Decimal exponent's range is -45 to 38
+  // inclusive, and can fit in a short if needed.
+  int exponent;
+} ryu_floating_decimal_32;
+
+// A floating decimal representing m * 10^e.
+typedef struct {
+  unsigned long long mantissa;
+  // Decimal exponent's range is -324 to 308
+  // inclusive, and can fit in a short if needed.
+  int exponent;
+} ryu_floating_decimal_64;
+
+static ryu_floating_decimal_32 ryu_float_to_decimal(const unsigned int ieeeMantissa, const unsigned int ieeeExponent) {
+  int e2;
+  unsigned int m2;
+  if (ieeeExponent == 0) {
+    if (ieeeMantissa == 0u) {
+      const ryu_floating_decimal_32 fd = { 0u, 0 };
+      return fd;
+    }
+    // We subtract 2 so that the bounds computation has 2 additional bits.
+    e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
+    m2 = ieeeMantissa;
+  } else {
+    e2 = (int) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
+    m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa;
+  }
+  const n64_bool even = (m2 & 1) == 0;
+  const n64_bool acceptBounds = even;
+
+  // Step 2: Determine the interval of valid decimal representations.
+  const unsigned int mv = 4 * m2;
+  const unsigned int mp = 4 * m2 + 2;
+  // Implicit bool -> int conversion. True is 1, false is 0.
+  const unsigned int mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+  const unsigned int mm = 4 * m2 - 1 - mmShift;
+
+  // Step 3: Convert to a decimal power base using 64-bit arithmetic.
+  unsigned int vr, vp, vm;
+  int e10;
+  n64_bool vmIsTrailingZeros = false;
+  n64_bool vrIsTrailingZeros = false;
+  unsigned char lastRemovedDigit = 0;
+  if (e2 >= 0) {
+    const unsigned int q = log10Pow2(e2);
+    e10 = (int) q;
+    const int k = FLOAT_POW5_INV_BITCOUNT + pow5bits((int) q) - 1;
+    const int i = -e2 + (int) q + k;
+    vr = mulPow5InvDivPow2(mv, q, i);
+    vp = mulPow5InvDivPow2(mp, q, i);
+    vm = mulPow5InvDivPow2(mm, q, i);
+    if (q != 0 && (vp - 1) / 10 <= vm / 10) {
+      // We need to know one removed digit even if we are not going to loop below. We could use
+      // q = X - 1 above, except that would require 33 bits for the result, and we've found that
+      // 32-bit arithmetic is faster even on 64-bit machines.
+      const int l = FLOAT_POW5_INV_BITCOUNT + pow5bits((int) (q - 1)) - 1;
+      lastRemovedDigit = (unsigned char) (mulPow5InvDivPow2(mv, q - 1, -e2 + (int) q - 1 + l) % 10);
+    }
+    if (q <= 9) {
+      // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well.
+      // Only one of mp, mv, and mm can be a multiple of 5, if any.
+      if (mv % 5 == 0) {
+        vrIsTrailingZeros = multipleOfPowerOf5_32(mv, q);
+      } else if (acceptBounds) {
+        vmIsTrailingZeros = multipleOfPowerOf5_32(mm, q);
+      } else {
+        vp -= multipleOfPowerOf5_32(mp, q);
+      }
+    }
+  } else {
+    const unsigned int q = log10Pow5(-e2);
+    e10 = (int) q + e2;
+    const int i = -e2 - (int) q;
+    const int k = pow5bits(i) - FLOAT_POW5_BITCOUNT;
+    int j = (int) q - k;
+    vr = mulPow5divPow2(mv, (unsigned int) i, j);
+    vp = mulPow5divPow2(mp, (unsigned int) i, j);
+    vm = mulPow5divPow2(mm, (unsigned int) i, j);
+    if (q != 0 && (vp - 1) / 10 <= vm / 10) {
+      j = (int) q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT);
+      lastRemovedDigit = (unsigned char) (mulPow5divPow2(mv, (unsigned int) (i + 1), j) % 10);
+    }
+    if (q <= 1) {
+      // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
+      // mv = 4 * m2, so it always has at least two trailing 0 bits.
+      vrIsTrailingZeros = true;
+      if (acceptBounds) {
+        // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
+        vmIsTrailingZeros = mmShift == 1;
+      } else {
+        // mp = mv + 2, so it always has at least one trailing 0 bit.
+        --vp;
+      }
+    } else if (q < 31) {
+      vrIsTrailingZeros = multipleOfPowerOf2_32(mv, q - 1);
+    }
+  }
+
+  // Step 4: Find the shortest decimal representation in the interval of valid representations.
+  int removed = 0;
+  unsigned int output;
+  if (vmIsTrailingZeros || vrIsTrailingZeros) {
+    // General case, which happens rarely (~4.0%).
+    while (vp / 10 > vm / 10) {
+      vmIsTrailingZeros &= vm % 10 == 0;
+      vrIsTrailingZeros &= lastRemovedDigit == 0;
+      lastRemovedDigit = (unsigned char) (vr % 10);
+      vr /= 10;
+      vp /= 10;
+      vm /= 10;
+      ++removed;
+    }
+    if (vmIsTrailingZeros) {
+      while (vm % 10 == 0) {
+        vrIsTrailingZeros &= lastRemovedDigit == 0;
+        lastRemovedDigit = (unsigned char) (vr % 10);
+        vr /= 10;
+        vp /= 10;
+        vm /= 10;
+        ++removed;
+      }
+    }
+    if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
+      // Round even if the exact number is .....50..0.
+      lastRemovedDigit = 4;
+    }
+    // We need to take vr + 1 if vr is outside bounds or we need to round up.
+    output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
+  } else {
+    // Specialized for the common case (~96.0%). Percentages below are relative to this.
+    // Loop iterations below (approximately):
+    // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
+    while (vp / 10 > vm / 10) {
+      lastRemovedDigit = (unsigned char) (vr % 10);
+      vr /= 10;
+      vp /= 10;
+      vm /= 10;
+      ++removed;
+    }
+    // We need to take vr + 1 if vr is outside bounds or we need to round up.
+    output = vr + (vr == vm || lastRemovedDigit >= 5);
+  }
+  const int exp = e10 + removed;
+
+  ryu_floating_decimal_32 fd;
+  fd.exponent = exp;
+  fd.mantissa = output;
+  return fd;
+}
+
+static ryu_floating_decimal_64 ryu_double_to_decimal(const unsigned long long ieeeMantissa, const unsigned int ieeeExponent) {
+  int e2;
+  unsigned long long m2;
+  if (ieeeExponent == 0) {
+    if (ieeeMantissa == 0ull) {
+      const ryu_floating_decimal_64 fd = { 0ull, 0 };
+      return fd;
+    }
+    // We subtract 2 so that the bounds computation has 2 additional bits.
+    e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
+    m2 = ieeeMantissa;
+  } else {
+    e2 = (int) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
+    m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
+  }
+  const n64_bool even = (m2 & 1) == 0;
+  const n64_bool acceptBounds = even;
+
+  // Step 2: Determine the interval of valid decimal representations.
+  const unsigned long long mv = 4 * m2;
+  // Implicit bool -> int conversion. True is 1, false is 0.
+  const unsigned int mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+  // We would compute mp and mm like this:
+  // uint64_t mp = 4 * m2 + 2;
+  // uint64_t mm = mv - 1 - mmShift;
+
+  // Step 3: Convert to a decimal power base using 128-bit arithmetic.
+  unsigned long long vr, vp, vm;
+  int e10;
+  n64_bool vmIsTrailingZeros = false;
+  n64_bool vrIsTrailingZeros = false;
+  if (e2 >= 0) {
+    // I tried special-casing q == 0, but there was no effect on performance.
+    // This expression is slightly faster than max(0, log10Pow2(e2) - 1).
+    const unsigned int q = log10Pow2(e2) - (e2 > 3);
+    e10 = (int) q;
+    const int k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int) q) - 1;
+    const int i = -e2 + (int) q + k;
+    unsigned long long pow5[2];
+    double_computeInvPow5(q, pow5);
+    vr = mulShiftAll64(m2, pow5, i, &vp, &vm, mmShift);
+    if (q <= 21) {
+      // This should use q <= 22, but I think 21 is also safe. Smaller values
+      // may still be safe, but it's more difficult to reason about them.
+      // Only one of mp, mv, and mm can be a multiple of 5, if any.
+      const unsigned int mvMod5 = ((unsigned int) mv) - 5 * ((unsigned int) div5(mv));
+      if (mvMod5 == 0) {
+        vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
+      } else if (acceptBounds) {
+        // Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
+        // <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
+        // <=> true && pow5Factor(mm) >= q, since e2 >= q.
+        vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
+      } else {
+        // Same as min(e2 + 1, pow5Factor(mp)) >= q.
+        vp -= multipleOfPowerOf5(mv + 2, q);
+      }
+    }
+  } else {
+    // This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
+    const unsigned int q = log10Pow5(-e2) - (-e2 > 1);
+    e10 = (int) q + e2;
+    const int i = -e2 - (int) q;
+    const int k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
+    const int j = (int) q - k;
+    unsigned long long pow5[2];
+    double_computePow5(i, pow5);
+    vr = mulShiftAll64(m2, pow5, j, &vp, &vm, mmShift);
+    if (q <= 1) {
+      // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
+      // mv = 4 * m2, so it always has at least two trailing 0 bits.
+      vrIsTrailingZeros = true;
+      if (acceptBounds) {
+        // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
+        vmIsTrailingZeros = mmShift == 1;
+      } else {
+        // mp = mv + 2, so it always has at least one trailing 0 bit.
+        --vp;
+      }
+    } else if (q < 63) {
+      // We want to know if the full product has at least q trailing zeros.
+      // We need to compute min(p2(mv), p5(mv) - e2) >= q
+      // <=> p2(mv) >= q && p5(mv) - e2 >= q
+      // <=> p2(mv) >= q (because -e2 >= q)
+      vrIsTrailingZeros = multipleOfPowerOf2(mv, q);
+    }
+  }
+
+  // Step 4: Find the shortest decimal representation in the interval of valid representations.
+  int removed = 0;
+  unsigned char lastRemovedDigit = 0;
+  unsigned long long output;
+  // On average, we remove ~2 digits.
+  if (vmIsTrailingZeros || vrIsTrailingZeros) {
+    // General case, which happens rarely (~0.7%).
+    for (;;) {
+      const unsigned long long vpDiv10 = div10(vp);
+      const unsigned long long vmDiv10 = div10(vm);
+      if (vpDiv10 <= vmDiv10) {
+        break;
+      }
+      const unsigned int vmMod10 = ((unsigned int) vm) - 10 * ((unsigned int) vmDiv10);
+      const unsigned long long vrDiv10 = div10(vr);
+      const unsigned int vrMod10 = ((unsigned int) vr) - 10 * ((unsigned int) vrDiv10);
+      vmIsTrailingZeros &= vmMod10 == 0;
+      vrIsTrailingZeros &= lastRemovedDigit == 0;
+      lastRemovedDigit = (unsigned char) vrMod10;
+      vr = vrDiv10;
+      vp = vpDiv10;
+      vm = vmDiv10;
+      ++removed;
+    }
+    if (vmIsTrailingZeros) {
+      for (;;) {
+        const unsigned long long vmDiv10 = div10(vm);
+        const unsigned int vmMod10 = ((unsigned int) vm) - 10 * ((unsigned int) vmDiv10);
+        if (vmMod10 != 0) {
+          break;
+        }
+        const unsigned long long vpDiv10 = div10(vp);
+        const unsigned long long vrDiv10 = div10(vr);
+        const unsigned int vrMod10 = ((unsigned int) vr) - 10 * ((unsigned int) vrDiv10);
+        vrIsTrailingZeros &= lastRemovedDigit == 0;
+        lastRemovedDigit = (unsigned char) vrMod10;
+        vr = vrDiv10;
+        vp = vpDiv10;
+        vm = vmDiv10;
+        ++removed;
+      }
+    }
+    if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
+      // Round even if the exact number is .....50..0.
+      lastRemovedDigit = 4;
+    }
+    // We need to take vr + 1 if vr is outside bounds or we need to round up.
+    output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
+  } else {
+    // Specialized for the common case (~99.3%). Percentages below are relative to this.
+    n64_bool roundUp = false;
+    const unsigned long long vpDiv100 = div100(vp);
+    const unsigned long long vmDiv100 = div100(vm);
+    if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
+      const unsigned long long vrDiv100 = div100(vr);
+      const unsigned int vrMod100 = ((unsigned int) vr) - 100 * ((unsigned int) vrDiv100);
+      roundUp = vrMod100 >= 50;
+      vr = vrDiv100;
+      vp = vpDiv100;
+      vm = vmDiv100;
+      removed += 2;
+    }
+    // Loop iterations below (approximately), without optimization above:
+    // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
+    // Loop iterations below (approximately), with optimization above:
+    // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
+    for (;;) {
+      const unsigned long long vpDiv10 = div10(vp);
+      const unsigned long long vmDiv10 = div10(vm);
+      if (vpDiv10 <= vmDiv10) {
+        break;
+      }
+      const unsigned long long vrDiv10 = div10(vr);
+      const unsigned int vrMod10 = ((unsigned int) vr) - 10 * ((unsigned int) vrDiv10);
+      roundUp = vrMod10 >= 5;
+      vr = vrDiv10;
+      vp = vpDiv10;
+      vm = vmDiv10;
+      ++removed;
+    }
+    // We need to take vr + 1 if vr is outside bounds or we need to round up.
+    output = vr + (vr == vm || roundUp);
+  }
+  const int exp = e10 + removed;
+
+  ryu_floating_decimal_64 fd;
+  fd.exponent = exp;
+  fd.mantissa = output;
+  return fd;
+}
diff --git a/lib/n64-libc/n64-stdio.c b/lib/n64-libc/n64-stdio.c
new file mode 100644
index 00000000..5280725c
--- /dev/null
+++ b/lib/n64-libc/n64-stdio.c
@@ -0,0 +1,67 @@
+#include "n64-stdio.h"
+
+#include "n64-util.h"
+#include "n64-stdio-bprint.c.inc"
+
+static unsigned int s_isv_buffer[712/4];
+
+static n64_bool s_sprint_callback( void *buffer, unsigned int len ) {
+	((char*)buffer)[len] = '\0';
+	return false;
+}
+
+static n64_bool s_sprint_nobuffer_callback( void*, unsigned int ) {
+	return false;
+}
+
+static n64_bool s_isv_bprint_callback( void *headPtr, unsigned int chunkSize ) {
+	const unsigned int n = (chunkSize + 3u) >> 2;
+	volatile unsigned int *head = *(volatile unsigned int**)headPtr;
+	
+	for( unsigned int i = 0; i < n; i++ ) {
+		*(head++) = s_isv_buffer[i];
+		n64_await_pi_io();
+	}
+
+	*(volatile unsigned int**)headPtr = head;
+	return head != NULL && head < (volatile unsigned int*)0xb4000000u;
+}
+
+int n64_vsnprintf( char *buffer, unsigned int bufsz, const char *format, __builtin_va_list args ) {
+	return bufsz ?
+		n64_vbprintf( buffer, bufsz - 1, s_sprint_callback, buffer, format, args ) :
+		n64_vbprintf( buffer, 0u, s_sprint_nobuffer_callback, NULL, format, args );
+}
+
+int n64_vprintf( const char *format, __builtin_va_list args ) {
+	const n64_bool inter = n64_set_interrupts( false );
+	n64_await_pi_io();
+
+	volatile unsigned int *head = (volatile unsigned int*)0xb3ff0020u;
+	register const int len = n64_vbprintf( (char*)s_isv_buffer, 712u, s_isv_bprint_callback, &head, format, args );
+	if( len >= 0 ) {
+		n64_memory_barrier();
+		*((volatile int*)0xb3ff0014u) = (len > 0xFFE0) ? 0xFFE0 : len;
+		n64_memory_barrier();
+		n64_await_pi_io();
+	}
+
+	n64_set_interrupts( inter );
+	return len;
+}
+
+int n64_snprintf( char *buffer, unsigned int bufsz, const char *format, ... ) {
+	__builtin_va_list args;
+	__builtin_va_start( args, format );
+	register const int result = n64_vsnprintf( buffer, bufsz, format, args );
+	__builtin_va_end( args );
+	return result;
+}
+
+int n64_printf( const char *format, ... ) {
+	__builtin_va_list args;
+	__builtin_va_start( args, format );
+	register const int result = n64_vprintf( format, args );
+	__builtin_va_end( args );
+	return result;
+}
diff --git a/lib/n64-libc/n64-stdio.h b/lib/n64-libc/n64-stdio.h
new file mode 100644
index 00000000..7ddcd7b9
--- /dev/null
+++ b/lib/n64-libc/n64-stdio.h
@@ -0,0 +1,65 @@
+#ifndef N64_STDLIB_N64_STDIO_H_
+#define N64_STDLIB_N64_STDIO_H_
+
+#include "n64-stddef.h"
+#include "n64-stdbool.h"
+
+#ifndef EOF
+#define EOF -1
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* IMPORTANT NOTICE
+ * The printf family of functions provided have some important deviations from the C standard library:
+ * 1. The floating point formats (%f, %e, %g, and %a) expect a (32-bit) float instead of a double. To format a double, use the 'l'
+ *    length modifier (ie. %lf, %le, %lg, or %la) to indicate that the argument is double precision. You can still pass in a
+ *    double argument, however it will be cast to a float if the 'l', 'll', or 'L' length specifier is not present.
+ * 2. Formatting 128-bit long doubles is not supported. The 'L' length specifier is treated the same as 'll'
+ * 3. The 'l' length modifier is ignored for characters and strings (wide chars are not supported)
+ */
+
+
+/* Extension: n64_bprintf / n64_vbprintf
+ * The n64_bprintf (buffered printf) is an extension intended to be used as a helper function for implementing your own printf
+ * function to do things like printing to something other than IS Viewer or drawing formatted text on screen.
+ * 
+ * bprintf functions like snprintf, but when the provided buffer is filled up, it invokes the provided callback function, then,
+ * depending on the return value, either moves back to the start of the buffer and continues writing, or ceases output. Once all
+ * characters have been processed, the callback function is invoked one final time.
+ * 
+ * When the provided buffer is filled, the callback function is invoked, passing `state` to the first argument, and the buffer
+ * size to the second argument. If you return false, the function will no longer write any data and will not invoke the callback
+ * again, but it will still continue processing the text so that it can return the character count. If you return true, the
+ * function will continue, moving back to the start buffer and continuing writing output. Every time the buffer is filled, the
+ * callback is invoked again in the same way. Finally, once all characters have been processed and output is complete, the
+ * callback will be invoked one final time, with the number of characters written since the last callback (which will be 0 if the
+ * output size is a multiple of the buffer size) passed into the second argument of the callback function. Note that this final
+ * callback is not invoked if a previous callback returned false.
+ */
+__attribute__((format(printf,5,0), nonnull(3, 5), access(write_only, 1)))
+int n64_vbprintf( char *buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, __builtin_va_list args );
+
+__attribute__((format(printf,3,0), nonnull(3), access(write_only, 1)))
+int n64_vsnprintf( char *buffer, unsigned int bufsz, const char *format, __builtin_va_list args );
+
+__attribute__((format(printf,1,0), nonnull(1)))
+int n64_vprintf( const char *format, __builtin_va_list args );
+
+
+__attribute__((format(printf,5,6), nonnull(3, 5), access(write_only, 1)))
+int n64_bprintf( char *buffer, unsigned int bufsz, n64_bool(*callback)(void*, unsigned int), void *state, const char *format, ... );
+
+__attribute__((format(printf,3,4), nonnull(3), access(write_only, 1)))
+int n64_snprintf( char *buffer, unsigned int bufsz, const char *format, ... );
+
+__attribute__((format(printf,1,2), nonnull(1)))
+int n64_printf( const char *format, ... );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-stdlib.c b/lib/n64-libc/n64-stdlib.c
new file mode 100644
index 00000000..448f597a
--- /dev/null
+++ b/lib/n64-libc/n64-stdlib.c
@@ -0,0 +1,155 @@
+#include "n64-stdlib.h"
+
+#include "n64-stddef.h"
+#include "n64-util.h"
+
+typedef void(*memswp_func_t)( void*, void*, unsigned int );
+
+#define __DECLARE_MEMSWP_FUNC(type) \
+	static void memswp_##type( unsigned type *a, unsigned type *b, __attribute__((unused)) unsigned int n ) { \
+		register unsigned type temp; \
+		temp = *a; \
+		*a = *b; \
+		*b = temp; \
+	} \
+	\
+	static void memswp_n##type( unsigned type *a, unsigned type *b, unsigned int n ) { \
+		register unsigned type temp; \
+		for( unsigned int i = 0; i < n; i++ ) { \
+			temp = *a; \
+			*(a++) = *b; \
+			*(b++) = temp; \
+		} \
+	}
+
+__DECLARE_MEMSWP_FUNC(char)
+__DECLARE_MEMSWP_FUNC(short)
+__DECLARE_MEMSWP_FUNC(int)
+
+static void n64_qsort_impl(
+	void *first,
+	void *lo,
+	void *hi,
+	int(*comp)(const void*, const void*),
+	void(*swap)(void*, void*, unsigned int),
+	unsigned int sz,
+	unsigned int csz
+) {
+	if( lo >= hi || lo < first ) return;
+
+	void *i = lo;
+	void *p = hi;
+	for( void *j = lo; j < hi; j += sz ) {
+		if( comp( j, p ) <= 0 ) {
+			swap( i, j, csz );
+			i += sz;
+		}
+	}
+
+	if( i != hi ) swap( i, hi, csz );
+	n64_qsort_impl( first, lo, i - sz, comp, swap, sz, csz );
+	n64_qsort_impl( first, i + sz, hi, comp, swap, sz, csz );
+}
+
+void n64_qsort( void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) ) {
+	unsigned int csize = size;
+
+	memswp_func_t memswp;
+	if( !size || !count ) {
+		return;
+	} else if( size == 1 ) {
+		memswp = (memswp_func_t)memswp_char;
+	} else if( size == 2 ) {
+		memswp = ((unsigned int)ptr & 0x1) ? (memswp_func_t)memswp_nchar : (memswp_func_t)memswp_short;
+	} else if( size == 4 ) {
+		if( !((unsigned int)ptr & 0x3) ) {
+			memswp = (memswp_func_t)memswp_int;
+		} else if( !((unsigned int)ptr & 0x1) ) {
+			memswp = (memswp_func_t)memswp_nshort;
+			csize <<= 1;
+		} else {
+			memswp = (memswp_func_t)memswp_nchar;
+		}
+	} else if( !(size & 0x3) && !((unsigned int)ptr & 0x3) ) {
+		memswp = (memswp_func_t)memswp_nint;
+		csize <<= 2;
+	} else if( !(size & 0x1) && !((unsigned int)ptr & 0x1) ) {
+		memswp = (memswp_func_t)memswp_nshort;
+		csize <<= 1;
+	} else {
+		memswp = (memswp_func_t)memswp_nchar;
+	}
+
+	n64_qsort_impl( ptr, ptr, ptr + (size * (count - 1)), comp, memswp, size, csize );
+}
+
+void *n64_bsearch( const void *key, const void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) ) {
+	if( !size ) return NULL;
+	while( count ) {
+		register const int i = (count - 1) >> 1;
+		register const void *p = ptr + size * i;
+		register const int c = comp( p, key );
+
+		if( c < 0 ) {
+			ptr = p + size;
+			count -= i + 1;
+		} else if( c > 0 ) {
+			count = i;
+		} else {
+			return (void*)p;
+		}
+	}
+
+	return NULL;
+}
+
+static unsigned int g_randi = 24u;
+static unsigned int g_randv[32] = {
+	0xdb48f936u, 0x14898454u, 0x37ffd106u, 0xb58bff9cu, 0x59e17104u, 0xcf918a49u, 0x09378c83u, 0x52c7a471u, 
+	0x8d293ea9u, 0x1f4fc301u, 0xc3db71beu, 0x39b44e1cu, 0xf8a44ef9u, 0x4c8b80b1u, 0x19edc328u, 0x87bf4bddu, 
+	0xc9b240e5u, 0xe9ee4b1bu, 0x4382aee7u, 0x535b6b41u, 0xf3bec5dau, 0x991539b1u, 0x16a5bce3u, 0x6774a4cdu, 
+	0x73b5def3u, 0x3e01511eu, 0x4e508aaau, 0x61048c05u, 0xf5500617u, 0x846b7115u, 0x6a19892cu, 0x896a97afu
+};
+
+void n64_srand( unsigned int seed ) {
+	if( !seed ) seed = 1u;
+
+	g_randv[0] = seed;
+
+	int r = (int)seed;
+	for( int i = 1; i < 31; i++ ) {
+		r = (int)(16807ll * (long long)(r % 127773) - 2836ll * (long long)(r / 127773));
+		if( r < 0 ) r += 0x7FFFFFFF;
+		g_randv[i] = (unsigned int)r;
+	}
+
+	g_randv[31] = seed;
+	g_randv[0] = g_randv[1];
+	g_randv[1] = g_randv[2];
+	g_randi = 2u;
+
+	for( int i = 0; i < 310; i++ ) {
+		n64_rand();
+	}
+}
+
+unsigned int n64_randu() {
+	register const unsigned int next = (g_randi + 1u) & 0x1Fu;
+	register const unsigned int r = g_randv[(g_randi + 29u) & 0x1Fu] + g_randv[next];
+	g_randv[g_randi] = r;
+	g_randi = next;
+	return next;
+}
+
+float n64_randf() {
+	const unsigned int bits = 0x3F800000u | (n64_randu() >> 9);
+	return n64_bit_cast_itof( bits ) - 1.f;
+}
+
+double n64_randd() {
+	const unsigned int bits[2] __attribute__((aligned(8))) = {
+		0x3FF00000u | (n64_randu() >> 12),
+		n64_randu()
+	};
+	return *((const double*)bits) - 1.0;
+}
diff --git a/lib/n64-libc/n64-stdlib.h b/lib/n64-libc/n64-stdlib.h
new file mode 100644
index 00000000..3a8c4a35
--- /dev/null
+++ b/lib/n64-libc/n64-stdlib.h
@@ -0,0 +1,67 @@
+#ifndef N64_STDLIB_N64_STDLIB_H_
+#define N64_STDLIB_N64_STDLIB_H_
+
+#include "n64-stddef.h"
+
+#ifndef RAND_MAX
+#define RAND_MAX 0x7fffffff
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__attribute__((always_inline, artificial, noreturn))
+static inline void n64_abort() {
+	__builtin_trap();
+}
+
+__attribute__((access(none, 1), const, warn_unused_result, always_inline))
+static inline unsigned int n64_memalignment( const void *p ) {
+	return (unsigned int)p & (-(unsigned int)p);
+}
+
+__attribute__((nonnull(1, 4), access(read_write, 1)))
+void n64_qsort( void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) );
+
+__attribute__((nonnull(2, 5), alloc_align(4), alloc_size(3, 4), warn_unused_result))
+void *n64_bsearch( const void *key, const void *ptr, unsigned int count, unsigned int size, int(*comp)(const void*, const void*) );
+
+__attribute__((flatten))
+void n64_srand( unsigned int seed );
+
+/* Extension. Works just like the standard C `rand` function, but returns an
+ * unsigned value with a full 32 bits of randomness instead of just 31 bits.
+ */
+unsigned int n64_randu();
+
+/* Extension. Works just like the standard C `rand' function, but returns an
+ * unsigned short value with 16 bits of randomness.
+ */
+__attribute__((always_inline))
+static inline unsigned short n64_randhu() {
+	return (unsigned short)(n64_randu() >> 16);
+}
+
+__attribute__((always_inline))
+static inline int n64_rand() {
+	return (int)(n64_randu() >> 1);
+}
+
+/* Extension. Generate a random single precision floating point value
+ * greater than or equal to 0 and strictly less than 1
+ */
+__attribute__((warn_unused_result))
+float n64_randf();
+
+/* Extension. Generate a random double precision floating point value
+ * greater than or equal to 0 and strictly less than 1
+ */
+__attribute__((warn_unused_result))
+double n64_randd();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-string.c b/lib/n64-libc/n64-string.c
new file mode 100644
index 00000000..0634a07a
--- /dev/null
+++ b/lib/n64-libc/n64-string.c
@@ -0,0 +1,356 @@
+#include "n64-string.h"
+
+#include "n64-stddef.h"
+#include "n64-stdbool.h"
+
+inline unsigned int __attribute__((const, always_inline)) hasZeroByte( unsigned int x ) {
+	return (x - 0x01010101u) & ~x & 0x80808080u;
+}
+
+inline unsigned int __attribute__((const, always_inline)) bytepattern( int ch ) {
+	register unsigned int w = (unsigned int)(unsigned char)ch;
+	w |= w << 8;
+	w |= w << 16;
+	return w;
+}
+
+unsigned int n64_strncpy2( char *dest, const char *src, unsigned int count ) {
+	register const char *const end = n64_memchr( src, 0, count );
+	if( !end ) {
+		n64_memcpy( dest, src, count );
+		return count;
+	} else {
+		register const unsigned int len = (unsigned int)(end - src);
+		n64_memcpy( dest, src, len + 1u );
+		return len;
+	}
+}
+
+unsigned int n64_strlen( const char *str ) {
+	const char *const start = str;
+
+	while( ((unsigned int)str & 3) ) {
+		if( !*str ) return (unsigned int)(str - start);
+		str++;
+	}
+
+	while( !hasZeroByte( *(unsigned int*)str ) ) str += 4;
+
+	while( *str ) str++;
+	return (unsigned int)(str - start);
+}
+
+int n64_strcmp( const char *lhs, const char *rhs ) {
+	if( ((unsigned int)lhs & 3) == ((unsigned int)rhs & 3) ) {
+		while( (unsigned int)lhs & 3 ) {
+			register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs;
+			if( c ) return c;
+			if( !*lhs ) return 0;
+			lhs++;
+			rhs++;
+		}
+
+		while( true ) {
+			register const unsigned int x = *((const unsigned int*)lhs);
+			if( hasZeroByte( x ) ) break;
+
+			register const unsigned int y = *((const unsigned int*)rhs);
+			if( x != y ) return (x < y) ? -1 : 1;
+
+			lhs += 4;
+			rhs += 4;
+		}
+	}
+
+	while( true ) {
+		register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs;
+		if( c ) return c;
+		if( !*lhs ) return 0;
+		lhs++;
+		rhs++;
+	}
+}
+
+int n64_strncmp( const char *lhs, const char *rhs, unsigned int count ) {
+	if( ((unsigned int)lhs & 3) == ((unsigned int)rhs & 3) ) {
+		while( count && ((unsigned int)lhs & 3) ) {
+			register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs;
+			if( c ) return c;
+			if( !*lhs ) return 0;
+			lhs++;
+			rhs++;
+			count--;
+		}
+
+		while( count >= 4 ) {
+			register const unsigned int x = *((const unsigned int*)lhs);
+			if( hasZeroByte( x ) ) break;
+
+			register const unsigned int y = *((const unsigned int*)rhs);
+			if( x != y ) return (x < y) ? -1 : 1;
+
+			lhs += 4;
+			rhs += 4;
+			count -= 4;
+		}
+	}
+	
+	while( count ) {
+		register const int c = (int)(unsigned char)*lhs - (int)(unsigned char)*rhs;
+		if( c ) return c;
+		if( !*lhs ) return 0;
+		lhs++;
+		rhs++;
+		count--;
+	}
+
+	return 0;
+}
+
+char *n64_strchr( const char *str, int ch ) {
+	while( (unsigned int)str & 3 ) {
+		if( (unsigned char)*str == (unsigned char)ch ) return (char*)str;
+		if( !*(str++) ) return NULL;
+	}
+
+	register const unsigned int c = bytepattern( ch );
+	while( !(hasZeroByte( *(const unsigned int*)str ) | hasZeroByte( (*(const unsigned int*)str) ^ c )) ) str += 4;
+
+	while( (unsigned char)*str != (unsigned char)ch ) {
+		if( !*(str++) ) return NULL;
+	}
+
+	return (char*)str;
+}
+
+char *n64_strrchr( const char *str, int ch ) {
+	register const char *const start = str;
+	str += n64_strlen( str );
+
+	while( str != start && ((unsigned int)str & 3) != 3 ) {
+		if( (unsigned char)*str == (unsigned char)ch ) return (char*)str;
+		str--;
+	}
+
+	register const unsigned int c = bytepattern( ch );
+	while( start <= str - 4 ) {
+		if( hasZeroByte( *(const unsigned int*)(str - 3) ^ c ) ) break;
+		str -= 4;
+	}
+
+	while( start != str ) {
+		if( (unsigned char)*str == (unsigned char)ch ) return (char*)str;
+		str--;
+	}
+
+	return NULL;
+}
+
+unsigned int n64_strspn( const char *str, const char *accept ) {
+	if( !accept[0] ) return 0u;
+
+	char mask[256];
+	n64_memset( mask, 0, 256 );
+	mask[0] = 1;
+
+	while( *accept ) mask[(unsigned char)*(accept++)] = 1;
+
+	const char *const start = str;
+	while( !mask[(unsigned char)*str] ) str++;
+	return (unsigned int)(str - start);
+}
+
+unsigned int n64_strcspn( const char *str, const char *reject ) {
+	if( !reject[0] ) return n64_strlen( str );
+	
+	char mask[256];
+	n64_memset( mask, 1, 256 );
+
+	while( *reject ) mask[(unsigned char)*(reject++)] = 0;
+
+	const char *const start = str;
+	while( !mask[(unsigned char)*str] ) str++;
+	return (unsigned int)(str - start);
+}
+
+void *n64_memchr( const void *ptr, int ch, unsigned int count ) {
+
+	while( count && ((unsigned int)ptr & 3) ) {
+		if( *(unsigned char*)ptr == (unsigned char)ch ) return (void*)ptr;
+		ptr++;
+		count--;
+	}
+
+	register const unsigned int cbp = bytepattern( ch );
+	while( count >= 4u ) {
+		if( hasZeroByte( *(unsigned int*)ptr ^ cbp ) ) break;
+		ptr += 4;
+		count -= 4;
+	}
+
+	while( count-- ) {
+		if( *(unsigned char*)ptr == (unsigned char)ch ) return (void*)ptr;
+		ptr++;
+	}
+
+	return NULL;
+}
+
+int n64_memcmp( const void *lhs, const void *rhs, unsigned int count ) {
+	if( lhs == rhs ) return 0;
+
+	if( count > 8u && ((unsigned int)lhs & 0x3) == ((unsigned int)rhs & 0x3) ) {
+		if( (count & 3) || ((unsigned int)lhs & 0x3) ) {
+			const void *const stop = lhs + count;
+			const void *const astop = (const void*)((unsigned int)stop & 0xFFFFFFFCu);
+
+			while( (unsigned int)lhs & 0x3 ) {
+				register const unsigned char a = *(unsigned char*)lhs;
+				register const unsigned char b = *(unsigned char*)rhs;
+
+				if( a == b ) {
+					lhs++;
+					rhs++;
+					continue;
+				}
+
+				return (a > b) ? 1 : -1;
+			}
+
+			while( lhs != astop ) {
+				register const unsigned int a = *(unsigned int*)lhs;
+				register const unsigned int b = *(unsigned int*)rhs;
+
+				if( a == b ) {
+					lhs += 4;
+					rhs += 4;
+					continue;
+				}
+
+				return (a > b) ? 1 : -1;
+			}
+
+			while( lhs != stop ) {
+				register const unsigned char a = *(unsigned char*)lhs;
+				register const unsigned char b = *(unsigned char*)rhs;
+
+				if( a == b ) {
+					lhs++;
+					rhs++;
+					continue;
+				}
+
+				return (a > b) ? 1 : -1;
+			}
+
+			return 0;
+		} else {
+			const void *const stop = lhs + count;
+			while( lhs != stop ) {
+				register const unsigned int a = *(unsigned int*)lhs;
+				register const unsigned int b = *(unsigned int*)rhs;
+
+				if( a == b ) {
+					lhs += 4;
+					rhs += 4;
+					continue;
+				}
+
+				return (a > b) ? 1 : -1;
+			}
+			return 0;
+		}
+	} else {
+		const void *const stop = lhs + count;
+		while( lhs != stop ) {
+			register const unsigned char a = *(unsigned char*)lhs;
+			register const unsigned char b = *(unsigned char*)rhs;
+
+			if( a == b ) {
+				lhs++;
+				rhs++;
+				continue;
+			}
+
+			return (a > b) ? 1 : -1;
+		}
+		return 0;
+	}
+}
+
+void *n64_memset( void *dest, int ch, unsigned int count ) {
+	if( count > 8u && !((unsigned int)dest & 0x3) ) {
+		register const unsigned int w = bytepattern( ch );
+		unsigned int *head = (unsigned int*)dest;
+		const unsigned int *const end = (unsigned int*)(dest + count);
+		while( head != end ) *(head++) = w;
+		return dest;
+	} else {
+		register const unsigned char c = (unsigned char)ch;
+		unsigned char* head = (unsigned char*)dest;
+		unsigned char *const end = head + count;
+		while( head != end ) *(head++) = c;
+		return dest;
+	}
+}
+
+void *n64_memcpy( void *dest, const void *src, unsigned int count ) {
+	if( count > 8u && ((unsigned int)dest & 0x3) == ((unsigned int)src & 0x3) ) {
+		if( (count & 3) || ((unsigned int)src & 0x3) ) {
+			register void *start = dest;
+			register const void *end = (void*)((unsigned int)(start + count) & 0xFFFFFFFCu);
+
+			while( (unsigned int)start & 0x3 ) {
+				*((char*)start++) = *((char*)src++);
+			}
+
+			while( start != end ) {
+				*((unsigned int*)start) = *((unsigned int*)src);
+				start += 4;
+				src += 4;
+			}
+
+			end = (void*)((unsigned int)dest + count);
+			while( start != end ) {
+				*((char*)start++) = *((char*)src++);
+			}
+		} else {
+			register unsigned int *start = (unsigned int*)dest;
+			register const unsigned int *const end = (const unsigned int*)((unsigned int)start + count);
+			while( start != end ) {
+				*(start++) = *(const unsigned int*)src;
+				src += 4;
+			}
+		}
+
+		return dest;
+	} else {
+		char *head = (char*)dest;
+		char *const end = head + count;
+		while( head != end ) *(head++) = *(char*)src++;
+		return dest;
+	}
+}
+
+void *n64_memmove( void *dest, const void *src, unsigned int count ) {
+	if( src == dest || !count ) return dest;
+
+	if( dest + count <= src || dest >= src + count ) {
+		return n64_memcpy( dest, src, count );
+	}
+
+	if( dest > src ) {
+		// start of dest overlaps end of src
+		void *const end = dest;
+		dest += count;
+		src += count;
+		while( dest != end ) *(char*)--dest = *(char*)--src;
+		return end;
+	} else {
+		// end of dest overlaps start of src
+		char *head = (char*)dest;
+		char *const end = head + count;
+		while( head != end ) *(head++) = *(char*)src++;
+		return dest;
+	}
+}
diff --git a/lib/n64-libc/n64-string.h b/lib/n64-libc/n64-string.h
new file mode 100644
index 00000000..78de6659
--- /dev/null
+++ b/lib/n64-libc/n64-string.h
@@ -0,0 +1,111 @@
+#ifndef N64_STDLIB_N64_STRING_H_
+#define N64_STDLIB_N64_STRING_H_
+
+#include "n64-stddef.h"
+#include "n64-util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Extension. Works the same as [n64_]strncpy except that it writes only a
+ * single null terminator after copying the string, rather than filling the
+ * entire rest of the buffer with zeros. Additionally, it returns the number
+ * of characters written not including the null terminator.
+ */
+__attribute__((access(write_only, 1),  nonnull(1)))
+unsigned int n64_strncpy2( char *dest, const char *src, unsigned int count );
+
+
+__attribute__((pure, warn_unused_result, nonnull(1)))
+unsigned int n64_strlen( const char *str );
+
+__attribute__((pure, warn_unused_result, nonnull(1, 2)))
+int n64_strcmp( const char *lhs, const char *rhs );
+
+__attribute__((pure, warn_unused_result, nonnull(1, 2)))
+int n64_strncmp( const char *lhs, const char *rhs, unsigned int count );
+
+__attribute__((pure, warn_unused_result, nonnull(1), returns_nonnull))
+char *n64_strchr( const char *str, int ch );
+
+__attribute__((pure, warn_unused_result, nonnull(1), returns_nonnull))
+char *n64_strrchr( const char *str, int ch );
+
+__attribute__((pure, warn_unused_result, nonnull(1, 2)))
+unsigned int n64_strspn( const char *str, const char *accept );
+
+__attribute__((pure, warn_unused_result, nonnull(1, 2)))
+unsigned int n64_strcspn( const char *str, const char *reject );
+
+
+__attribute__((pure, warn_unused_result, nonnull(1), returns_nonnull))
+void *n64_memchr( const void *ptr, int ch, unsigned int count );
+
+__attribute__((pure, warn_unused_result, nonnull(1, 2)))
+int n64_memcmp( const void *lhs, const void *rhs, unsigned int count );
+
+__attribute__((access(write_only, 1), nonnull(1), returns_nonnull))
+void *n64_memset( void *dest, int ch, unsigned int count );
+
+__attribute__((access(write_only, 1), nonnull(1, 2), returns_nonnull))
+void *n64_memcpy( void *dest, const void *src, unsigned int count );
+
+__attribute__((access(write_only, 1), nonnull(1, 2), returns_nonnull))
+void *n64_memmove( void *dest, const void *src, unsigned int count );
+
+__attribute__((access(write_only, 1), nonnull(1)))
+static inline void n64_bzero( void *s, unsigned int n ) {
+	n64_memset( s, 0, n );
+}
+
+__attribute__((nonnull(1), always_inline))
+static inline void n64_explicit_bzero( void *s, unsigned int n ) {
+	n64_bzero( s, n );
+	n64_memory_barrier();
+}
+
+__attribute__((access(write_only, 2), nonnull(1, 2), always_inline))
+static inline void n64_bcopy( const void *src, void *dest, unsigned int n ) {
+	n64_memmove( dest, src, n );
+}
+
+
+__attribute__((pure, warn_unused_result))
+static inline unsigned int n64_strlen_s( const char *str, unsigned int strsz ) {
+	if( !str ) return 0u;
+	const char *stop = (const char*)n64_memchr( str, 0, strsz );
+	return stop ? (unsigned int)(stop - str) : strsz;
+}
+
+__attribute__((access(write_only, 1),  nonnull(1, 2), returns_nonnull))
+static inline char *n64_strcpy( char *dest, const char *src ) {
+	return (char*)n64_memcpy( dest, src, n64_strlen( src ) + 1u );
+}
+
+__attribute__((access(write_only, 1),  nonnull(1), returns_nonnull))
+static inline char *n64_strncpy( char *dest, const char *src, unsigned int count ) {
+	const unsigned int len = 1u + n64_strncpy2( dest, src, count );
+	if( len < count ) n64_memset( &dest[len], 0, count - len );
+	return dest;
+}
+
+__attribute__((access(read_write, 1),  nonnull(1, 2), returns_nonnull))
+static inline char *n64_strcat( char *dest, const char *src ) {
+	n64_strcpy( &dest[n64_strlen( dest )], src );
+	return dest;
+}
+
+__attribute__((access(read_write, 1),  nonnull(1), returns_nonnull))
+static inline char *n64_strncat( char *dest, const char *src, unsigned int count ) {
+	unsigned int len = n64_strlen( dest );
+	len += n64_strncpy2( &dest[len], src, count );
+	dest[len] = '\0';
+	return dest;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-time.c b/lib/n64-libc/n64-time.c
new file mode 100644
index 00000000..960fe2dc
--- /dev/null
+++ b/lib/n64-libc/n64-time.c
@@ -0,0 +1,685 @@
+#include "n64-time.h"
+
+#include "n64-util.h"
+#include "n64-string.h"
+
+static const n64_tm N64_UNIX_EPOCH_TM = {
+	0, 0, 0, 1, 0, 70, 4, 0, -1
+};
+
+static const char *s_weekdayNames[7] = {
+	"Sunday",
+	"Monday",
+	"Tuesday",
+	"Wednesday",
+	"Thursday",
+	"Friday",
+	"Saturday"
+};
+
+static const char *s_monthNames[12] = {
+	"January",
+	"February",
+	"March",
+	"April",
+	"May",
+	"June",
+	"July",
+	"August",
+	"September",
+	"October",
+	"November",
+	"December"
+};
+
+#define N64_SECONDS_IN_MINUTE 60ll
+#define N64_SECONDS_IN_HOUR (60ll * N64_SECONDS_IN_MINUTE)
+#define N64_SECONDS_IN_DAY (24ll * N64_SECONDS_IN_HOUR)
+#define N64_SECONDS_IN_NON_LEAP_YEAR (365ll * N64_SECONDS_IN_DAY)
+
+static unsigned int s_si_buffer[16] __attribute__((aligned(16)));
+static unsigned int s_si_backup[16] __attribute__((aligned(16)));
+static n64_bool s_exec_on_write_bug = false;
+static unsigned int s_prev_dma_addr;
+
+static const int s_yday_table[12] = {
+	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
+};
+
+typedef enum {
+	N64_RTC_INIT_CALLED = 0x1,
+	N64_RTC_NOT_WAITING = 0x2,
+	N64_RTC_GOOD = 0x4,
+
+	N64_RTC_READY = N64_RTC_INIT_CALLED | N64_RTC_NOT_WAITING
+} n64_rtc_state;
+
+static n64_rtc_state g_rtc_state = 0;
+static n64_clock_t g_wait_start = 0u;
+static n64_clock_t g_wait_end = 0u;
+
+static inline void __attribute__((always_inline)) si_await_op() {
+	// Wait for the SI operation we started to finish, then clear the status
+	while( !(*((volatile unsigned int*)0xa4300008u) & *((volatile unsigned int*)0xa430000Cu) & 0x2u) );
+	*((volatile unsigned int*)0xa4800018u) = 0u;
+}
+
+static inline void __attribute__((always_inline)) si_wait_safe( n64_bool yield ) {
+	// If the SI is currently busy, wait until it isn't
+	while( *((volatile unsigned int*)0xa4800018u) & 0x3 ) {
+		n64_set_interrupts( yield );
+		while( *((volatile unsigned int*)0xa4800018u) & 0x3 );
+		n64_set_interrupts( false );
+	}
+}
+
+static void n64_pif_save() {
+	s_prev_dma_addr = *((volatile unsigned int*)0xa4800000u);
+
+	// Save the state of PIF RAM to memory to restore it later.
+	// The joybus is only executed when using an SI DMA read, and not when doing
+	// a direct read via the memory-mapped address. Thus, this has no side effects.
+
+	volatile unsigned int *si_pif_ram = (volatile unsigned int *)0xbfc007c0u;
+	for( int i = 0; i < 16; i++ ) {
+		s_si_backup[i] = si_pif_ram[i];
+	}
+}
+
+static void n64_pif_restore() {
+	if( s_exec_on_write_bug ) {
+		// The emulator incorrectly executes the joybus on a DMA write instead of a read
+		// Clear the command register so it doesn't execute again
+		s_si_backup[15] &= 0xffffff00u;
+	} else {
+		// Re-parse the stored PIF RAM state
+		s_si_backup[15] |= 1u;
+	}
+
+	__builtin_mips_cache( 0x19, &s_si_backup[0] );
+	__builtin_mips_cache( 0x19, &s_si_backup[4] );
+	__builtin_mips_cache( 0x19, &s_si_backup[8] );
+	__builtin_mips_cache( 0x19, &s_si_backup[12] );
+
+	*((volatile unsigned int*)0xa4800000u) = (unsigned int)s_si_backup & 0x1FFFFFFFu;
+	asm volatile( "":::"memory" );
+	*((volatile unsigned int*)0xa4800010u) = 0x1fc007c0u;
+	asm volatile( "":::"memory" );
+	si_await_op();
+	
+	*((volatile unsigned int*)0xa4800000u) = s_prev_dma_addr;
+}
+
+static void n64_joybus_dma_write() {
+	__builtin_mips_cache( 0x19, &s_si_buffer[0] );
+	__builtin_mips_cache( 0x19, &s_si_buffer[4] );
+	__builtin_mips_cache( 0x19, &s_si_buffer[8] );
+	__builtin_mips_cache( 0x19, &s_si_buffer[12] );
+
+	*((volatile unsigned int*)0xa4800000u) = (unsigned int)s_si_buffer & 0x1FFFFFFFu;
+	asm volatile( "":::"memory" );
+	*((volatile unsigned int*)0xa4800010u) = 0x1fc007c0u;
+	asm volatile( "":::"memory" );
+
+	si_await_op();
+}
+
+static void n64_joybus_dma_read() {
+	*((volatile unsigned int*)0xa4800000u) = (unsigned int)s_si_buffer & 0x1FFFFFFFu;
+	asm volatile( "":::"memory" );
+	*((volatile unsigned int*)0xa4800004u) = 0x1fc007c0u;
+	asm volatile( "":::"memory" );
+
+	si_await_op();
+
+	__builtin_mips_cache( 0x11, &s_si_buffer[0] );
+	__builtin_mips_cache( 0x11, &s_si_buffer[4] );
+	__builtin_mips_cache( 0x11, &s_si_buffer[8] );
+	__builtin_mips_cache( 0x11, &s_si_buffer[12] );
+}
+
+__attribute__((always_inline))
+static inline void n64_joybus_exec() {
+	n64_joybus_dma_write();
+	n64_joybus_dma_read();
+}
+
+static inline unsigned char decode_rtc_byte( unsigned char x ) {
+	return (((x & 0xF0) >> 4) * 10) + (x & 0x0F);
+}
+
+static inline n64_bool is_leap_year( long long year ) {
+	if( year % 4ll != 0ll ) return false;
+	if( year % 100ll == 0ll ) return (year % 400ll == 0ll) ? true : false;
+	return false;
+}
+
+static n64_time_t year_to_unix_time( long long year ) {
+	register long long leapYears = -478ll;
+	leapYears += (year + 3ll) >> 2;
+	leapYears -= ((year > 0ll) ? (year + 99ll) : year) / 100ll;
+	leapYears += ((year > 0ll) ? (year + 399ll) : year) / 400ll;
+
+	return (
+		((year - 1970ll) * N64_SECONDS_IN_NON_LEAP_YEAR) +
+		(leapYears * N64_SECONDS_IN_DAY)
+	);
+}
+
+n64_time_t n64_to_unix_time( const n64_tm *time ) {
+	int month = time->tm_mon % 12;
+	if( month < 0 ) month += 12;
+
+	long long days = (long long)s_yday_table[month] + (long long)time->tm_mday - 1ll;
+	if( month >= 2 && is_leap_year( (long long)time->tm_year + 1900ll ) ) days++;
+	return (
+		year_to_unix_time( (long long)time->tm_year + 1900ll + (long long)(time->tm_mon / 12) ) +
+		(days * N64_SECONDS_IN_DAY) +
+		((long long)time->tm_hour * N64_SECONDS_IN_HOUR) +
+		((long long)time->tm_min * N64_SECONDS_IN_MINUTE) +
+		(long long)time->tm_sec
+	);
+}
+
+__attribute__((always_inline))
+static inline n64_bool is_emulator() {
+	register const volatile unsigned int *const dpc = (const volatile unsigned int*)0xA4100000u;
+	return !(dpc[5] | dpc[6] | dpc[7]);
+}
+
+n64_bool n64_rtc_init() {
+	if( g_rtc_state & N64_RTC_INIT_CALLED ) {
+		return g_rtc_state != N64_RTC_READY;
+	}
+
+	const n64_bool intr = n64_set_interrupts( false );
+	if( g_rtc_state & N64_RTC_INIT_CALLED ) {
+		n64_set_interrupts( intr );
+		return g_rtc_state != N64_RTC_READY;
+	}
+
+	si_wait_safe( intr );
+	g_rtc_state |= N64_RTC_INIT_CALLED;
+	n64_pif_save();
+
+	s_si_buffer[0] = 0u;
+	s_si_buffer[1] = 0xff010306u;
+	s_si_buffer[2] = 0xfffffffeu;
+	n64_memset( &s_si_buffer[3], 0, 48u );
+	s_si_buffer[15] = 1u;
+	n64_joybus_dma_write();
+
+	if(
+		*((volatile unsigned char*)0xbfc007c6u) != 0x03 ||
+		*((volatile unsigned char*)0xbfc007c8u) != 0xff
+	) {
+		// On hardware (and accurate emulators such as Ares), writing to the joybus merely causes it to parse the command.
+		// The commands are not actually executed until a DMA read is performed. So only the final byte should have changed.
+		s_exec_on_write_bug = true;
+	}
+
+	n64_joybus_dma_read();
+
+	if( s_si_buffer[2] >> 8 != 0x001000u ) {
+		n64_pif_restore();
+		n64_set_interrupts( intr );
+		return false;
+	}
+
+	g_rtc_state |= N64_RTC_GOOD;
+	g_wait_start = n64_clock();
+	g_wait_end = g_wait_start + (N64_CLOCKS_PER_SEC / 50u);
+
+	s_si_buffer[0] = 0u;
+	s_si_buffer[1] = 0x02090700u;
+	s_si_buffer[2] = 0u;
+	s_si_buffer[3] = 0u;
+	s_si_buffer[4] = 0x00fe0000u;
+	n64_memset( &s_si_buffer[5], 0, 40u );
+	s_si_buffer[15] = 1u;
+	n64_joybus_exec();
+
+	s_si_buffer[1] = 0x0a010800u;
+	s_si_buffer[2] = 0x03000000u;
+	s_si_buffer[4] = 0x00fe0000u;
+	s_si_buffer[15] = 1u;
+	n64_joybus_exec();
+
+	if( is_emulator() ) {
+		g_rtc_state |= N64_RTC_NOT_WAITING;
+	}
+
+	n64_pif_restore();
+	n64_set_interrupts( intr );
+	return true;
+}
+
+n64_bool n64_rtc_ready( n64_bool *good ) {
+	if( good ) *good = (g_rtc_state & N64_RTC_GOOD) >> 2;
+
+	if( !(g_rtc_state & N64_RTC_INIT_CALLED) ) return false;
+	if( !(g_rtc_state & N64_RTC_NOT_WAITING) ) {
+		const n64_clock_t now = n64_clock();
+		if( g_wait_end < g_wait_start ) {
+			if( now >= g_wait_end && now < g_wait_start ) {
+				g_rtc_state |= N64_RTC_NOT_WAITING;
+				return true;
+			} else {
+				return false;
+			}
+		} else {
+			if( now < g_wait_start || now >= g_wait_end ) {
+				g_rtc_state |= N64_RTC_NOT_WAITING;
+				return true;
+			} else {
+				return false;
+			}
+		}
+	}
+	
+	return true;
+}
+
+n64_bool n64_rtc_read( n64_tm *arg ) {
+	if( !(g_rtc_state & N64_RTC_READY) ) {
+		n64_rtc_init();
+		while( !n64_rtc_ready( NULL ) );
+	}
+
+	if( !(g_rtc_state & N64_RTC_GOOD) ) {
+		n64_memcpy( arg, &N64_UNIX_EPOCH_TM, sizeof( n64_tm ) );
+		return false;
+	}
+
+	const n64_bool intr = n64_set_interrupts( false );
+	si_wait_safe( intr );
+	n64_pif_save();
+
+	s_si_buffer[0] = 0u;
+	s_si_buffer[1] = 0x02090702u;
+	s_si_buffer[2] = 0x00008001u;
+	s_si_buffer[3] = 0x04017000u;
+	s_si_buffer[4] = 0x80fe0000u;
+	n64_memset( &s_si_buffer[5], 0, 40u );
+	s_si_buffer[15] = 1u;
+	n64_joybus_exec();
+
+	const unsigned char *const data = (const unsigned char*)&s_si_buffer[2];
+	arg->tm_sec = (int)decode_rtc_byte( data[0] );
+	arg->tm_min = (int)decode_rtc_byte( data[1] );
+	arg->tm_hour = (int)decode_rtc_byte( data[2] - 0x80 );
+	arg->tm_mday = (int)decode_rtc_byte( data[3] );
+	arg->tm_wday = (int)decode_rtc_byte( data[4] );
+	arg->tm_mon = (int)decode_rtc_byte( data[5] ) - 1u;
+	arg->tm_year = (100 * (int)decode_rtc_byte( data[7] )) + (int)decode_rtc_byte( data[6] );
+	if( arg->tm_mon < 12 ) arg->tm_yday = s_yday_table[arg->tm_mon] + arg->tm_mday - 1;
+	arg->tm_isdst = -1;
+
+	if( arg->tm_mon >= 2 && is_leap_year( 1900ll + (long long)arg->tm_year ) ) {
+		arg->tm_yday++;
+	}
+
+	n64_pif_restore();
+	n64_set_interrupts( intr );
+	return true;
+}
+
+n64_tm *n64_tm_add( n64_tm *time, n64_time_t seconds ) {
+	seconds += n64_to_unix_time( time );
+	return n64_gmtime_r( &seconds, time );
+}
+
+n64_time_t n64_time( n64_time_t *arg ) {
+	n64_tm now;
+	register const n64_time_t ts = n64_rtc_read( &now ) ? n64_to_unix_time( &now ) : (n64_time_t)-1;
+	if( arg ) *arg = ts;
+	return ts;
+}
+
+n64_time_t n64_mktime( n64_tm *time ) {
+	const n64_time_t ts = n64_to_unix_time( time );
+	n64_gmtime_r( &ts, time );
+	return ts;
+}
+
+static inline void setMonthAndDay( long long year, int yday, int *mon, int *mday ) {
+	if( yday < 31 ) {
+		*mon = 0;
+		*mday = yday + 1;
+		return;
+	} else if( yday < 59 ) {
+		*mon = 1;
+		*mday = yday - 30;
+		return;
+	} else if( yday == 59 ) {
+		if( is_leap_year( year ) ) {
+			*mon = 1;
+			*mday = 29;
+		} else {
+			*mon = 2;
+			*mday = 1;
+		}
+		return;
+	}
+
+	if( is_leap_year( year ) ) yday--;
+	if( yday >= 334 ) {
+		*mon = 11;
+		*mday = yday - 333;
+	} else for( int month = (yday + 4) >> 5;; month++ ) {
+		if( yday < s_yday_table[month+1] ) {
+			*mon = month;
+			*mday = yday + 1 - s_yday_table[month];
+			return;
+		}
+	}
+}
+
+n64_tm *n64_gmtime_r( const n64_time_t *timer, n64_tm *buf ) {
+	buf->tm_isdst = 0;
+
+	if( *timer >= 67768036191676800ll ) {
+		// set to maximum valid n64_tm
+		buf->tm_sec = 59;
+		buf->tm_min = 59;
+		buf->tm_hour = 23;
+		buf->tm_mday = 31;
+		buf->tm_mon = 11;
+		buf->tm_year = 0x7FFFFFFF;
+		buf->tm_wday = 3;
+		buf->tm_yday = 364;
+		return NULL;
+	} else if( *timer < -67768040609721748ll ) {
+		// set to minimum valid n64_tm
+		buf->tm_sec = 0;
+		buf->tm_min = 0;
+		buf->tm_hour = 0;
+		buf->tm_mday = 1;
+		buf->tm_mon = 0;
+		buf->tm_year = -0x80000000;
+		buf->tm_wday = 4;
+		buf->tm_yday = 0;
+		return NULL;
+	}
+
+	long long yearGuess = 1970ll + (*timer / 31556952ll);
+	long long yearTs = year_to_unix_time( yearGuess );
+	if( *timer < yearTs ) {
+		do {
+			yearTs = year_to_unix_time( --yearGuess );
+		} while( *timer < yearTs );
+	} else if( yearGuess > 0x7fffffffll ) {
+		yearGuess = 0x7fffffffll;
+		yearTs = year_to_unix_time( yearGuess );
+	} else {
+		register const long long nextYearTs = year_to_unix_time( yearGuess + 1ll );
+		if( *timer >= nextYearTs ) {
+			yearGuess++;
+			yearTs = nextYearTs;
+		}
+	}
+
+	int ts = (int)(*timer - yearTs);
+	buf->tm_year = (int)(yearGuess - 1900ll);
+	buf->tm_yday = ts / (int)N64_SECONDS_IN_DAY;
+	ts %= (int)N64_SECONDS_IN_DAY;
+	setMonthAndDay( yearGuess, buf->tm_yday, &buf->tm_mon, &buf->tm_mday );
+	buf->tm_hour = ts / (int)N64_SECONDS_IN_HOUR;
+	ts %= (int)N64_SECONDS_IN_HOUR;
+	buf->tm_min = ts / (int)N64_SECONDS_IN_MINUTE;
+	buf->tm_sec = ts % (int)N64_SECONDS_IN_MINUTE;
+	buf->tm_wday = (int)((4ll + (*timer / N64_SECONDS_IN_DAY)) % 7ll);
+	if( buf->tm_wday < 0 ) buf->tm_wday += 7;
+
+	return buf;
+}
+
+static inline n64_bool strftime_push_text( char *str, unsigned int *i, unsigned int count, const char *text ) {
+	for( ; *text; (*i)++ ) {
+		if( *i >= count - 1 ) return false;
+		str[*i] = *(text++);
+	}
+	return true;
+}
+
+static inline n64_bool strftime_push_number( char *str, unsigned int *i, unsigned int count, long long n ) {
+	if( n < 0ll ) {
+		str[(*i)++] = '-';
+		n = -n;
+	}
+
+	char buff[17];
+	unsigned int j = 0u;
+	do {
+		buff[j++] = '0' + (char)(n % 10ll);
+		n /= 10ll;
+	} while( n );
+
+	if( *i + j < *i || *i + j >= count ) return false;
+	for( int k = (int)j - 1; k >= 0; k-- ) {
+		str[(*i)++] = buff[k];
+	}
+
+	return true;
+}
+
+static inline int get_wby_days( int day, int wday ) {
+	return day - ((day - wday + 382) % 7) + 3;
+}
+
+static long long get_wby_years( long long year, int day, int wday ) {
+	if( get_wby_days( day, wday ) < 0 ) {
+		return year - 1ll;
+	} else if( get_wby_days( day - (is_leap_year( year ) ? 366ll : 365ll), wday ) > 0 ) {
+		return year + 1ll;
+	} else {
+		return year;
+	}
+}
+
+static int get_wby_weeks( long long year, int day, int wday ) {
+	int days = get_wby_days( day, wday );
+	if( days < 0 ) {
+		days = get_wby_days( day + (is_leap_year( year ) ? 366ll : 365ll), wday );
+	} else {
+		const int days2 = get_wby_days( day - (is_leap_year( year ) ? 366ll : 365ll), wday );
+		if( days2 > 0 ) days = days2;
+	}
+
+	return 1 + (days / 7);
+}
+
+unsigned int n64_strftime_internal( char *str, unsigned int count, const char *format, const n64_tm *tp ) {
+	const long long year = (long long)tp->tm_year + 1900ll;
+	for( unsigned int i = 0; i < count; format++ ) {
+		if( !*format ) {
+			str[i] = '\0';
+			return i;
+		} else if( *format != '%' ) {
+			str[i++] = *format;
+			continue;
+		}
+
+		format++;
+		const n64_bool alt = (*format == '0');
+		if( *format == 'E' || alt ) {
+			format++;
+		}
+
+		switch( *format ) {
+			case '%':
+				str[i++] = '%';
+				break;
+			case 'n':
+				str[i++] = '\n';
+				break;
+			case 't':
+				str[i++] = '\t';
+				break;
+			case 'Y':
+				if( !strftime_push_number( str, &i, count, year ) ) return 0;
+				break;
+			case 'y':
+				if( i < count - 2 ) {
+					register int yy = (int)(year % 100ll);
+					if( yy < 0 ) yy += 100;
+					str[i++] = '0' + (char)(yy / 10);
+					str[i++] = '0' + (char)(yy % 10);
+					break;
+				} else return 0;
+			case 'C':
+				if( !strftime_push_number( str, &i, count, year / 100ll ) ) return 0;
+				break;
+			case 'G':
+				if( !strftime_push_number( str, &i, count, get_wby_years(year, tp->tm_yday, tp->tm_wday ) ) ) return 0;
+				break;
+			case 'g':
+				if( i < count - 2 ) {
+					register int wyear = (int)(get_wby_years(year, tp->tm_yday, tp->tm_wday ) % 100ll);
+					if( wyear < 0 ) wyear += 100;
+					str[i++] = '0' + (char)(wyear / 10);
+					str[i++] = '0' + (char)(wyear % 10);
+					break;
+				} else return 0;
+				break;
+			case 'b':
+			case 'h':
+				if( i >= count - 3 ) return 0;
+				str[i++] = s_monthNames[tp->tm_mon][0];
+				str[i++] = s_monthNames[tp->tm_mon][1];
+				str[i++] = s_monthNames[tp->tm_mon][2];
+				break;
+			case 'B':
+				if( !strftime_push_text( str, &i, count, s_monthNames[tp->tm_mon] ) ) return 0;
+				break;
+			case 'm':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)((tp->tm_mon + 1) / 10);
+				str[i++] = '0' + (char)((tp->tm_mon + 1) % 10);
+				break;
+			case 'U':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)((tp->tm_yday - tp->tm_wday + 7) / 70);
+				str[i++] = '0' + (char)(((tp->tm_yday - tp->tm_wday + 7) / 7) % 10);
+				break;
+			case 'W':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)((tp->tm_yday - ((tp->tm_wday + 6) % 7) + 7) / 70);
+				str[i++] = '0' + (char)(((tp->tm_yday - ((tp->tm_wday + 6) % 7) + 7) / 7) % 10);
+				break;
+			case 'V':
+				if( i < count - 2 ) {
+					register const int week = get_wby_weeks(year, tp->tm_yday, tp->tm_wday );
+					str[i++] = '0' + (char)(week / 10);
+					str[i++] = '0' + (char)(week % 10);
+					break;
+				} else return 0;
+			case 'j':
+				if( i >= count - 3 ) return 0;
+				str[i++] = '0' + (char)(tp->tm_yday / 100);
+				str[i++] = '0' + (char)((tp->tm_yday / 10) % 10);
+				str[i++] = '0' + (char)(tp->tm_yday % 10);
+				break;
+			case 'd':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)(tp->tm_mday / 10);
+				str[i++] = '0' + (char)(tp->tm_mday % 10);
+				break;
+			case 'e':
+				if( i >= count - 2 ) return 0;
+				if( alt || tp->tm_mday > 9) {
+					str[i++] = '0' + (char)(tp->tm_mday / 10);
+					str[i++] = '0' + (char)(tp->tm_mday % 10);
+				} else {
+					str[i++] = ' ';
+					str[i++] = '0' + (char)tp->tm_mday;
+				}
+				break;
+			case 'a':
+				if( i >= count - 3 ) return 0;
+				str[i++] = s_weekdayNames[tp->tm_wday][0];
+				str[i++] = s_weekdayNames[tp->tm_wday][1];
+				str[i++] = s_weekdayNames[tp->tm_wday][2];
+				break;
+			case 'A':
+				if( !strftime_push_text( str, &i, count, s_weekdayNames[tp->tm_wday] ) ) return 0;
+				break;
+			case 'w':
+				str[i++] = '0' + (char)tp->tm_wday;
+				break;
+			case 'u':
+				str[i++] = tp->tm_wday ? ('0' + (char)tp->tm_wday) : '7';
+				break;
+			case 'H':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)(tp->tm_hour / 10);
+				str[i++] = '0' + (char)(tp->tm_hour % 10);
+				break;
+			case 'I':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)((1 + ((tp->tm_hour + 11) % 12)) / 10);
+				str[i++] = '0' + (char)((1 + ((tp->tm_hour + 11) % 12)) % 10);
+				break;
+			case 'M':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)(tp->tm_min / 10);
+				str[i++] = '0' + (char)(tp->tm_min % 10);
+				break;
+			case 'S':
+				if( i >= count - 2 ) return 0;
+				str[i++] = '0' + (char)(tp->tm_sec / 10);
+				str[i++] = '0' + (char)(tp->tm_sec % 10);
+				break;
+			case 'p':
+				if( i >= count - 2 ) return 0;
+				str[i++] = (tp->tm_hour < 12) ? 'A' : 'P';
+				str[i++] = 'M';
+				break;
+			case 'c':
+				if( i < count - 21 ) {
+					const unsigned int j = n64_strftime_internal( &str[i], count - (unsigned int)i, "%a %b %e %H:%M:%S %Y", tp );
+					if( !j ) return 0;
+					i += j;
+					break;
+				} else return 0;
+			case 'x':
+			case 'D':
+				if( i >= count - 8 ) return 0;
+				i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%m/%d/%y", tp );
+				break;
+			case 'X':
+			case 'T':
+				if( i >= count - 8 ) return 0;
+				i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%H:%M:%S", tp );
+				break;
+			case 'F':
+				if( i < count - 7 ) {
+					const unsigned int j = n64_strftime_internal( &str[i], count - (unsigned int)i, "%Y-%m-%d", tp );
+					if( !j ) return 0;
+					i += j;
+					break;
+				} else return 0;
+			case 'r':
+				if( i >= count - 11 ) return 0;
+				i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%I:%M:%S %p", tp );
+				break;
+			case 'R':
+				if( i >= count - 5 ) return 0;
+				i += n64_strftime_internal( &str[i], count - (unsigned int)i, "%H:%M", tp );
+				break;
+			default:
+				return 0;
+		}
+	}
+
+	return 0;
+}
+
+unsigned int n64_strftime( char *str, unsigned int count, const char *format, const n64_tm *tp ) {
+	n64_tm time;
+	n64_memcpy( &time, tp, sizeof( n64_tm ) );
+	n64_mktime( &time );
+
+	str[count - 1] = '\0';
+	return n64_strftime_internal( str, count, format, &time );
+}
diff --git a/lib/n64-libc/n64-time.h b/lib/n64-libc/n64-time.h
new file mode 100644
index 00000000..350ba0ff
--- /dev/null
+++ b/lib/n64-libc/n64-time.h
@@ -0,0 +1,129 @@
+#ifndef N64_STDLIB_N64_TIME_H_
+#define N64_STDLIB_N64_TIME_H_
+
+#include "n64-stddef.h"
+#include "n64-stdbool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef long long n64_time_t;
+typedef unsigned int n64_clock_t;
+
+// Don't expect this to be accurate on emulator!
+#define N64_CLOCKS_PER_SEC 46875000u
+
+#ifndef CLOCKS_PER_SEC
+	#define CLOCKS_PER_SEC N64_CLOCKS_PER_SEC
+#endif
+
+typedef struct {
+	int tm_sec; // seconds after the minute [0-59]
+	int tm_min; // minutes after the hour [0-59]
+	int tm_hour; // hours since midnight [0-23]
+	int tm_mday; // day of the month [1-31]
+	int tm_mon; // months since January [0-11]
+	int tm_year; // years since 1900 (remember the epoch is 1900, not AD/CE!)
+	int tm_wday; // days since Sunday [0-6]
+	int tm_yday; // days since January 1 [0-365]
+	int tm_isdst; // Field included for the sake of matching the C STL. Has no effect.
+} n64_tm;
+
+/**********************************************************************************
+* New functions made for this library that are not part of the C standard library *
+**********************************************************************************/
+
+/* !NOTICE!
+ * Some flashcarts have buggy RTC implementations that do not correctly set status
+ * registers. To handle these cases, after initializing the RTC, we need to wait a
+ * short time (~20ms) before requesting the time to make sure the RTC has had time
+ * to initialize, since we can't rely on the status registers to know when it's
+ * ready.
+ * 
+ * Some helper functions are provided to help handle this case as laglessly as
+ * possible. You are NOT required to call the init functions, and can simply use
+ * the n64_rtc_read and n64_time functions without any init call; however, this
+ * will result in a short stutter when playing on console as the library will wait
+ * 20ms on the very first call to either n64_rtc_read or n64_time to ensure we
+ * don't read garbage data from buggy flashcarts.
+ * 
+ * To avoid this stutter on console, you can call n64_rtc_init() at the start of
+ * your program to initialize the RTC at this point. Then, the next call to
+ * n64_rtc_read or n64_time will only wait if it has not yet been 20ms since
+ * n64_rtc_init was called.
+ * 
+ * To determine whether 20ms has passed, a comparison is done to the COUNT
+ * register, but since this overflows every ~90 seconds, if the very first call to
+ * n64_rtc_read or n64_time occurs precisely at this time, it will incorrectly
+ * believe that 20ms have not passed yet. To avoid this scenario, you can call
+ * n64_rtc_ready( NULL ) periodically (such as at the start of each frame) to check
+ * if the 20ms timer has finished, which will mark it as complete if it has, so you
+ * will no longer need to worry about the clock counter overflowing.
+ */
+
+/* Initializes the RTC clock. Returns true if the RTC is supported, and false
+ * otherwise. See the comment above for more information about what this call
+ * is used for.
+ */
+n64_bool n64_rtc_init();
+
+/* Returns true if it has been at least 20ms since n64_rtc_init() was called
+ * or if it is being played on emulator.
+ */
+__attribute__((access(write_only, 1)))
+n64_bool n64_rtc_ready( n64_bool *good );
+
+/* New function that directly puts the current time in an n64_tm struct.
+ *
+ * Because the RTC already provides the time in a tm-like format, this is
+ * much faster than using n64_time followed by n64_gmtime_r.
+ *
+ * Note that the n64_tm struct still matches the C standard library's tm struct--
+ * of particular note is that tm_year is the number of years since 1900
+ *
+ * If the flashcart or emulator does not support RTC, returns false
+ */
+__attribute__((nonnull(1), access(write_only, 1)))
+n64_bool n64_rtc_read( n64_tm *time );
+
+/* Adds `seconds` to `time` and normalizes it. Returns back the `time` pointer.
+ * If this would cause the time to overflow, it is capped at its min/max value.
+ */
+__attribute__((nonnull(1), access(read_write, 1), returns_nonnull))
+n64_tm *n64_tm_add( n64_tm *time, n64_time_t seconds );
+
+/* Like n64_mktime, but doesn't normalize the input time */
+__attribute__((pure, nonnull(1), warn_unused_result))
+n64_time_t n64_to_unix_time( const n64_tm *time );
+
+/**********************************************************************************
+* C standard library functions                                                    *
+**********************************************************************************/
+
+__attribute__((always_inline, warn_unused_result))
+static inline n64_clock_t n64_clock() {
+	n64_clock_t count;
+	asm volatile( "mfc0 %0, $9": "=r"( count ) );
+	return count;
+}
+
+__attribute__((access(write_only, 1)))
+n64_time_t n64_time( n64_time_t *arg );
+
+// Assumes UTC timezone
+__attribute__((nonnull(1), access(read_write, 1)))
+n64_time_t n64_mktime( n64_tm *time );
+
+__attribute__((nonnull(1, 2), access(write_only, 2)))
+n64_tm *n64_gmtime_r( const n64_time_t *timer, n64_tm *buf );
+
+// Assumes UTC timezone and C locale
+__attribute__((format(strftime, 3, 0), nonnull(1, 3), access(write_only, 1)))
+unsigned int n64_strftime( char *str, unsigned int count, const char *format, const n64_tm *tp );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/n64-libc/n64-util.c b/lib/n64-libc/n64-util.c
new file mode 100644
index 00000000..b3e9472f
--- /dev/null
+++ b/lib/n64-libc/n64-util.c
@@ -0,0 +1,24 @@
+#include "n64-util.h"
+#include "n64-stdbool.h"
+
+__attribute__((noinline))
+static n64_bool n64_enable_interrupts() {
+	register n64_bool status;
+	asm volatile( ".set noat \n\t .align 16 \n\t mfc0 $1, $12 \n\t andi %0, $1, 1 \n\t ori  $1, $1, 1  \n\t mtc0 $1, $12" : "=r"( status ) :: "at" );
+	return status;
+}
+
+__attribute__((noinline))
+static n64_bool n64_disable_interrupts() {
+	register n64_bool status;
+	asm volatile( ".set noat \n\t .align 16 \n\t mfc0 $1, $12 \n\t andi %0, $1, 1 \n\t subu $1, $1, %0 \n\t mtc0 $1, $12" : "=r"( status ) :: "at" );
+	return status;
+}
+
+n64_bool n64_set_interrupts( n64_bool enable ) {
+	return enable ? n64_enable_interrupts() : n64_disable_interrupts();
+}
+
+void n64_await_pi_io() {
+	while( *((volatile unsigned int*)0xa4600010u) & 0x3u );
+}
diff --git a/lib/n64-libc/n64-util.h b/lib/n64-libc/n64-util.h
new file mode 100644
index 00000000..e7432802
--- /dev/null
+++ b/lib/n64-libc/n64-util.h
@@ -0,0 +1,62 @@
+#ifndef N64_STDLIB_N64_UTIL_H_
+#define N64_STDLIB_N64_UTIL_H_
+
+#include "n64-stdbool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Enables or disables interrupts and returns whether interrupts were previously enabled or not
+ * 
+ * Example usage:
+ * const n64_bool intr = n64_set_interrupts( false );
+ * // Do critical section work here
+ * n64_set_interrupts( intr );
+ */
+n64_bool n64_set_interrupts( n64_bool enable );
+
+/* Waits for PI I/O operations (like a DMA) to complete. You should disable interrupts before calling this.
+ *
+ * Example usage:
+ * const n64_bool intr = n64_set_interrupts( false );
+ * n64_await_pi_io();
+ * // Now you can safely do a PI write here
+ * n64_set_interrupts( intr );
+ */
+void n64_await_pi_io();
+
+__attribute__((always_inline))
+static inline void n64_memory_barrier() {
+	asm volatile( "":::"memory" );
+}
+
+__attribute__((const, always_inline))
+static inline unsigned int n64_bit_cast_ftoi( float x ) {
+	union { float f; unsigned int i; } b = { x };
+	return b.i;
+}
+
+__attribute__((const, always_inline))
+static inline float n64_bit_cast_itof( unsigned int x ) {
+	union { unsigned int i; float f; } b = { x };
+	return b.f;
+}
+
+__attribute__((const, always_inline))
+static inline unsigned long long n64_bit_cast_dtol( double x ) {
+	union { double f; unsigned long long i; } b = { x };
+	return b.i;
+}
+
+__attribute__((const, always_inline))
+static inline double n64_bit_cast_ltod( unsigned long long x ) {
+	union { unsigned long long i; double f; } b = { x };
+	return b.f;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif