[require]
shader model >= 4.0


[pixel shader]
float4 main() : sv_target
{
    min16float4 a = {0, 1, 2, 3};
    min10float2 b = {4, 5};
    min16int3 c = {6.4, 7, 8};
    min12int d = 9.4;
    min16uint4x2 e = {14.4, 15, 16, 17, 18, 19, 20, 21};
    vector<min10float, 3> g = {22, 23, 24};
    matrix<min16uint, 3, 2> h = {25.4, 26, 27, 28, 29, 30};

    return mul(e, b) + a + c.xyzx + d + g.xxyz + h[2].xyxy;
}

[test]
draw quad
probe (0, 0) rgba (197.0, 218.0, 238.0, 257.0)

[rtv 0]
format r32g32b32a32-uint
size (2d, 640, 480)

% In SM4-5 minimum precision integers in constant buffers are treated just like
% their 32-bit counterparts.

[require]
shader model >= 4.0
shader model < 6.0

[pixel shader]
uniform min16uint4 p;
uniform min16uint3 q;
uniform min16uint2 r;
uniform min16uint s;

uint4 main() : sv_target
{
    if (p.x == 0x020001 && p.y == 0x040003 && p.z == 0x060005 && p.w == 0x080007
            && q.x == 0x120011 && q.y == 0x140013 && q.z == 0x160015
            && r.x == 0x220021 && r.y == 0x240023 && s == 0x260025)
        return 1;
    return 0;
}

[test]
uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
draw quad
probe (0, 0) u32(1, 1, 1, 1)

% Minimum precision types have a funny behavior with respect to stride in SM6:
% DXC allocates them assuming they're 32 bit, and the generated code first loads
% a 16-bytes row from the constant buffer and then indexes inside it; drivers
% agree that each row is indeed 16 bytes long, but disagree on the stride used
% to index within a given row; on NVIDIA and WARP the stride is 2, so the 16-bit
% values are packed in the lower half of the row and the upper half is unused;
% on AMD the stride is 4, so only the lower 16-bit word of each of the four
% 32-bit values composing a row is used. The offsets generated by DXC seem to
% hint that AMD is right here (and, in particular, Microsoft's own WARP
% implementation is wrong), but the offsets do not appear in the code, so each
% driver takes its own stance anyway. We have no choice other than to accept
% both behaviors as valid.

[require]
shader model >= 6.0

[pixel shader]
uniform min16uint4 p;
uniform min16uint3 q;
uniform min16uint2 r;
uniform min16uint s;

uint4 main() : sv_target
{
    /* On AMD the stride is 4. */
    if (p.x == 0x01 && p.y == 0x03 && p.z == 0x05 && p.w == 0x07
            && q.x == 0x11 && q.y == 0x13 && q.z == 0x15
            && r.x == 0x21 && r.y == 0x23 && s == 0x25)
        return 1;
    /* On NVIDIA and WARP the stride is 2. */
    if (p.x == 0x01 && p.y == 0x02 && p.z == 0x03 && p.w == 0x04
            && q.x == 0x11 && q.y == 0x12 && q.z == 0x13
            && r.x == 0x21 && r.y == 0x22 && s == 0x23)
        return 1;
    return 0;
}

[test]
uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
todo(msl & sm>=6) draw quad
todo probe (0, 0) u32(1, 1, 1, 1)

% Same tests for signed integers

[require]
shader model >= 4.0
shader model < 6.0

[pixel shader]
uniform min16int4 p;
uniform min16int3 q;
uniform min16int2 r;
uniform min16int s;

uint4 main() : sv_target
{
    if (p.x == 0x020001 && p.y == 0x040003 && p.z == 0x060005 && p.w == 0x080007
            && q.x == 0x120011 && q.y == 0x140013 && q.z == 0x160015
            && r.x == 0x220021 && r.y == 0x240023 && s == 0x260025)
        return 1;
    return 0;
}

[test]
uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
draw quad
probe (0, 0) u32(1, 1, 1, 1)

[require]
shader model >= 6.0

[pixel shader]
uniform min16int4 p;
uniform min16int3 q;
uniform min16int2 r;
uniform min16int s;

uint4 main() : sv_target
{
    /* On AMD the stride is 4. */
    if (p.x == 0x01 && p.y == 0x03 && p.z == 0x05 && p.w == 0x07
            && q.x == 0x11 && q.y == 0x13 && q.z == 0x15
            && r.x == 0x21 && r.y == 0x23 && s == 0x25)
        return 1;
    /* On NVIDIA and WARP the stride is 2. */
    if (p.x == 0x01 && p.y == 0x02 && p.z == 0x03 && p.w == 0x04
            && q.x == 0x11 && q.y == 0x12 && q.z == 0x13
            && r.x == 0x21 && r.y == 0x22 && s == 0x23)
        return 1;
    return 0;
}

[test]
uniform 0 uint4 0x020001 0x040003 0x060005 0x080007
uniform 4 uint4 0x120011 0x140013 0x160015 0x180017
uniform 8 uint4 0x220021 0x240023 0x260025 0x280027
todo(msl & sm>=6) draw quad
todo probe (0, 0) u32(1, 1, 1, 1)

% Same tests for floating point numbers

[require]
shader model >= 4.0
shader model < 6.0

[pixel shader]
uniform min16float4 p;
uniform min16float3 q;
uniform min16float2 r;
uniform min16float s;

uint4 main() : sv_target
{
    if (p.x == 1.0 && p.y == 2.0 && p.z == 3.0 && p.w == 4.0
            && q.x == 11.0 && q.y == 12.0 && q.z == 13.0
            && r.x == 21.0 && r.y == 22.0 && s == 23.0)
        return 1;
    return 0;
}

[test]
uniform 0 float4 1.0 2.0 3.0 4.0
uniform 4 float4 11.0 12.0 13.0 14.0
uniform 8 float4 21.0 22.0 23.0 24.0
draw quad
probe (0, 0) u32(1, 1, 1, 1)

[require]
shader model >= 6.0

[pixel shader]
uniform min16float4 p;
uniform min16float3 q;
uniform min16float2 r;
uniform min16float s;

uint4 main() : sv_target
{
    /* On AMD the stride is 4. */
    if (p.x == 1.0 && p.y == 3.0 && p.z == 5.0 && p.w == 7.0
            && q.x == 11.0 && q.y == 13.0 && q.z == 15.0
            && r.x == 21.0 && r.y == 23.0 && s == 25.0)
        return 1;
    /* On NVIDIA and WARP the stride is 2. */
    if (p.x == 1.0 && p.y == 2.0 && p.z == 3.0 && p.w == 4.0
            && q.x == 11.0 && q.y == 12.0 && q.z == 13.0
            && r.x == 21.0 && r.y == 22.0 && s == 23.0)
        return 1;
    return 0;
}

[test]
uniform 0 uint4 0x40003c00 0x44004200 0x46004500 0x48004700
uniform 4 uint4 0x4a004980 0x4b004a80 0x4c004b80 0x4c804c40
uniform 8 uint4 0x4d804d40 0x4e004dc0 0x4e804e40 0x4f004ec0
todo(msl & sm>=6) draw quad
todo probe (0, 0) u32(1, 1, 1, 1)

[require]
shader model >= 4.0

[rtv 0]
format r32g32b32a32-uint
size (2d, 640, 480)

[pixel shader]
uniform uint i, x;

uint4 main() : sv_target
{
    min16uint arr[4] = {1, 2, 0x7ff, 0xfff};
    arr[i] = x;
    return uint4(arr);
}

[test]
uniform 0 uint 0
uniform 1 uint 100
todo(msl & sm>=6) draw quad
% In D3D12 with SM5 all reads to the array return 0
if(sm>=6 | !d3d12) probe (0, 0) u32(100, 2, 0x7ff, 0xfff)
uniform 0 uint 2
uniform 1 uint 1000
todo(msl & sm>=6) draw quad
if(sm>=6 | !d3d12) probe (0, 0) u32(1, 2, 1000, 0xfff)

[rtv 0]
format r32g32b32a32-sint
size (2d, 640, 480)

[pixel shader]
uniform uint i;
uniform int x;

int4 main() : sv_target
{
    min16int arr[4] = {1, 2, 0x7ff, 0xfff};
    arr[i] = x;
    return int4(arr);
}

[test]
uniform 0 uint 0
uniform 1 uint 100
todo(msl & sm>=6) draw quad
% In D3D12 with SM5 all reads to the array return 0
if(sm>=6 | !d3d12) probe (0, 0) u32(100, 2, 0x7ff, 0xfff)
uniform 0 uint 2
uniform 1 uint 1000
todo(msl & sm>=6) draw quad
if(sm>=6 | !d3d12) probe (0, 0) u32(1, 2, 1000, 0xfff)

[rtv 0]
format r32g32b32a32-float
size (2d, 640, 480)

[pixel shader]
uniform uint i;
uniform float x;

float4 main() : sv_target
{
    min16float arr[4] = {1.0, 2.0, 0.5, 111.0};
    arr[i] = x;
    return float4(arr);
}

[test]
uniform 0 uint 0
uniform 1 float 3.0
todo(msl & sm>=6) draw quad
probe (0, 0) rgba(3.0, 2.0, 0.5, 111.0)
uniform 0 uint 2
uniform 1 float 5.0
todo(msl & sm>=6) draw quad
probe (0, 0) rgba(1.0, 2.0, 5.0, 111.0)