From c9720c06d3b3d73a31f738f646060436c7071e21 Mon Sep 17 00:00:00 2001
From: Sauraen <sauraen@gmail.com>
Date: Fri, 31 May 2024 22:34:13 -0700
Subject: [PATCH] Added SPDontSkipTexLoadsAcross

---
 README.md |  4 +++-
 gbi.h     | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 432c929..401b2db 100644
--- a/README.md
+++ b/README.md
@@ -67,7 +67,9 @@ breaking changes.**
   texture loads in the material are skipped (the second time). This effectively
   results in **auto-batched rendering** of repeated objects, as long as each
   only uses one material. This system supports multitexture and all types of
-  loads.
+  loads. If this system incorrectly culls supposedly repeated texture loads
+  which actually differ due to segment manipulation, you can locally disable it
+  using the new `SPDontSkipTexLoadsAcross` command.
 - New `SPTriangleStrip` and `SPTriangleFan` commands **pack up to 5 tris** into
   one 64-bit GBI command (up from 2 tris in F3DEX2). In any given object, most
   tris can be drawn with these commands, with only a few at the end drawn with
diff --git a/gbi.h b/gbi.h
index f179962..15d01f9 100644
--- a/gbi.h
+++ b/gbi.h
@@ -263,6 +263,7 @@ longer a multiple of 8 (DMA word). This was not used in any command anyway. */
 #define G_MWO_ATTR_OFFSET_Z      0x14
 #define G_MWO_ALPHA_COMPARE_CULL 0x16
 #define G_MWO_NORMALS_MODE       0x18
+#define G_MWO_LAST_MAT_DL_ADDR   0x1A
 
 /*
  * RDP command argument defines
@@ -2854,6 +2855,38 @@ _DW({                                         \
 #define gsSPNormalsMode(mode) \
     gsMoveHalfwd(G_MW_FX, G_MWO_NORMALS_MODE, (mode) & 0xFF)
 
+/*
+ * F3DEX3 has a basic auto-batched rendering system. At a high level, if a
+ * material display list being run is the same as the last material, the texture
+ * loads are automatically skipped the second time as they should already be in
+ * TMEM.
+ * 
+ * This design generally works, but can break if you call a display list twice
+ * but in between change a segment mapping so that a referenced image inside is
+ * actually different the two times. In these cases, run the below command
+ * between the two calls (e.g. when you change the segment) and the microcode
+ * will not skip the second texture loads.
+ * 
+ * Internally, a material is defined to start with any set image command, and
+ * end on any of the following: call, branch, return, vertex, all tri commands,
+ * modify vertex, branch Z/W, or cull. The physical address of the display list
+ * --not the address of the image--is stored when a material is started. If a
+ * material starts and its physical address is the same as the stored last start
+ * address, i.e. we're executing the same material display list as the last
+ * material, material cull mode is set. In this mode, load block, load tile, and
+ * load TLUT all are skipped. This mode is cleared when the material ends.
+ * 
+ * This design has the benefit that it works correctly even with complex
+ * materials, e.g. with two CI4 textures (four loads), whereas it would be
+ * difficult to implement tracking all these loads separately. Furthermore, a
+ * design based on tracking the image addresses could break if you loaded
+ * different tile sections of the same image in consecutive materials.
+ */
+#define gSPDontSkipTexLoadsAcross(pkt) \
+    gMoveWd(pkt, G_MW_FX, G_MWO_LAST_MAT_DL_ADDR, 0xFFFFFFFF)
+#define gsSPDontSkipTexLoadsAcross() \
+    gsMoveWd(G_MW_FX, G_MWO_LAST_MAT_DL_ADDR, 0xFFFFFFFF)
+
 typedef union {
     struct {
         s16 intPart[3][4];  /* Fourth row containing translations is omitted. */