tests/hlsl: Test minimum precision stride in constant buffers.

Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1507
2025-09-12 18:50:22 -07:00 · 2025-05-15 23:07:20 +02:00 · 2025-05-24 21:47:37 +02:00
parent bd6dbd096f
commit fdc173506e
1 changed files with 73 additions and 0 deletions
--- a/tests/hlsl/minimum-precision.shader_test
+++ b/tests/hlsl/minimum-precision.shader_test
@@ -73,3 +73,76 @@ uniform 0 uint4 0 0 0 0
 uniform 4 uint 0
 draw quad
 probe (0, 0) rgbaui (1, 2, 0, 0)
 % In SM4-5 minimum precision integers in constant buffers are treated just like
 % their 32-bit counterparts.
 [require]
 shader model >= 4.0
 shader model < 6.0
 [pixel shader]
 uniform min16uint4 p;
 uniform min16uint3 q;
 uniform min16uint2 r;
 uniform min16uint s;
 uint4 main() : sv_target
 {
    if (p.x == 0x020001 && p.y == 0x040003 && p.z == 0x060005 && p.w == 0x080007
            && q.x == 0x120011 && q.y == 0x140013 && q.z == 0x160015
            && r.x == 0x220021 && r.y == 0x240023 && s == 0x260025)
        return 1;
    return 0;
 }
 [test]
 uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
 uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
 uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
 draw quad
 probe (0, 0) rgbaui(1, 1, 1, 1)
 % Minimum precision types have a funny behavior with respect to stride in SM6:
 % DXC allocates them assuming they're 32 bit, and the generated code first loads
 % a 16-bytes row from the constant buffer and then indexes inside it; drivers
 % agree that each row is indeed 16 bytes long, but disagree on the stride used
 % to index within a given row; on NVIDIA and WARP the stride is 2, so the 16-bit
 % values are packed in the lower half of the row and the upper half is unused;
 % on AMD the stride is 4, so only the lower 16-bit word of each of the four
 % 32-bit values composing a row is used. The offsets generated by DXC seem to
 % hint that AMD is right here (and, in particular, Microsoft's own WARP
 % implementation is wrong), but the offsets do not appear in the code, so each
 % driver takes its own stance anyway. We have no choice other than to accept
 % both behaviors as valid.
 [require]
 shader model >= 6.0
 [pixel shader]
 uniform min16uint4 p;
 uniform min16uint3 q;
 uniform min16uint2 r;
 uniform min16uint s;
 uint4 main() : sv_target
 {
    /* On AMD the stride is 4. */
    if (p.x == 0x01 && p.y == 0x03 && p.z == 0x05 && p.w == 0x07
            && q.x == 0x11 && q.y == 0x13 && q.z == 0x15
            && r.x == 0x21 && r.y == 0x23 && s == 0x25)
        return 1;
    /* On NVIDIA and WARP the stride is 2. */
    if (p.x == 0x01 && p.y == 0x02 && p.z == 0x03 && p.w == 0x04
            && q.x == 0x11 && q.y == 0x12 && q.z == 0x13
            && r.x == 0x21 && r.y == 0x22 && s == 0x23)
        return 1;
    return 0;
 }
 [test]
 uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
 uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
 uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
 draw quad
 todo probe (0, 0) rgbaui(1, 1, 1, 1)