tests/hlsl: Test minimum precision stride in constant buffers.

Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1507
2025-12-15 08:03:30 -08:00 · 2025-05-15 23:07:20 +02:00 · 2025-05-24 21:47:37 +02:00
parent bd6dbd096f
commit fdc173506e
1 changed files with 73 additions and 0 deletions
--- a/tests/hlsl/minimum-precision.shader_test
+++ b/tests/hlsl/minimum-precision.shader_test
@@ -73,3 +73,76 @@ uniform 0 uint4 0 0 0 0
 uniform 4 uint 0
 draw quad
 probe (0, 0) rgbaui (1, 2, 0, 0)
+
+% In SM4-5 minimum precision integers in constant buffers are treated just like
+% their 32-bit counterparts.
+
+[require]
+shader model >= 4.0
+shader model < 6.0
+
+[pixel shader]
+uniform min16uint4 p;
+uniform min16uint3 q;
+uniform min16uint2 r;
+uniform min16uint s;
+
+uint4 main() : sv_target
+{
+    if (p.x == 0x020001 && p.y == 0x040003 && p.z == 0x060005 && p.w == 0x080007
+            && q.x == 0x120011 && q.y == 0x140013 && q.z == 0x160015
+            && r.x == 0x220021 && r.y == 0x240023 && s == 0x260025)
+        return 1;
+    return 0;
+}
+
+[test]
+uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
+uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
+uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
+draw quad
+probe (0, 0) rgbaui(1, 1, 1, 1)
+
+% Minimum precision types have a funny behavior with respect to stride in SM6:
+% DXC allocates them assuming they're 32 bit, and the generated code first loads
+% a 16-bytes row from the constant buffer and then indexes inside it; drivers
+% agree that each row is indeed 16 bytes long, but disagree on the stride used
+% to index within a given row; on NVIDIA and WARP the stride is 2, so the 16-bit
+% values are packed in the lower half of the row and the upper half is unused;
+% on AMD the stride is 4, so only the lower 16-bit word of each of the four
+% 32-bit values composing a row is used. The offsets generated by DXC seem to
+% hint that AMD is right here (and, in particular, Microsoft's own WARP
+% implementation is wrong), but the offsets do not appear in the code, so each
+% driver takes its own stance anyway. We have no choice other than to accept
+% both behaviors as valid.
+
+[require]
+shader model >= 6.0
+
+[pixel shader]
+uniform min16uint4 p;
+uniform min16uint3 q;
+uniform min16uint2 r;
+uniform min16uint s;
+
+uint4 main() : sv_target
+{
+    /* On AMD the stride is 4. */
+    if (p.x == 0x01 && p.y == 0x03 && p.z == 0x05 && p.w == 0x07
+            && q.x == 0x11 && q.y == 0x13 && q.z == 0x15
+            && r.x == 0x21 && r.y == 0x23 && s == 0x25)
+        return 1;
+    /* On NVIDIA and WARP the stride is 2. */
+    if (p.x == 0x01 && p.y == 0x02 && p.z == 0x03 && p.w == 0x04
+            && q.x == 0x11 && q.y == 0x12 && q.z == 0x13
+            && r.x == 0x21 && r.y == 0x22 && s == 0x23)
+        return 1;
+    return 0;
+}
+
+[test]
+uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
+uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
+uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
+draw quad
+todo probe (0, 0) rgbaui(1, 1, 1, 1)