vkd3d-shader/hlsl: Write SM4 thread ID registers.

2025-04-13 05:43:18 -07:00 · 2021-08-16 19:02:17 -05:00 · 2021-08-16 19:02:17 -05:00 · 653cc02f4c
commit 653cc02f4c
parent 809a43f06b
3 changed files with 92 additions and 81 deletions
--- a/libs/vkd3d-shader/hlsl_sm4.c
+++ b/libs/vkd3d-shader/hlsl_sm4.c
@ -49,21 +49,25 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem
        const char *semantic;
        bool output;
        enum vkd3d_shader_type shader_type;
-        enum vkd3d_sm4_register_type type;
        enum vkd3d_sm4_swizzle_type swizzle_type;
+        enum vkd3d_sm4_register_type type;
        bool has_idx;
    }
    register_table[] =
    {
-        {"sv_primitiveid",  false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_RT_PRIMID, VKD3D_SM4_SWIZZLE_NONE, false},
+        {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE,   VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID,         false},
+        {"sv_groupid",          false, VKD3D_SHADER_TYPE_COMPUTE,   VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID,   false},
+        {"sv_groupthreadid",    false, VKD3D_SHADER_TYPE_COMPUTE,   VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID,   false},
+
+        {"sv_primitiveid",      false, VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID,            false},

        /* Put sv_target in this table, instead of letting it fall through to
         * default varying allocation, so that the register index matches the
         * usage index. */
-        {"color",           true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_OUTPUT, VKD3D_SM4_SWIZZLE_VEC4, true},
-        {"depth",           true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4, false},
-        {"sv_depth",        true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4, false},
-        {"sv_target",       true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_OUTPUT, VKD3D_SM4_SWIZZLE_VEC4, true},
+        {"color",               true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT,            true},
+        {"depth",               true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT,          false},
+        {"sv_depth",            true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT,          false},
+        {"sv_target",           true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT,            true},
    };

    for (i = 0; i < ARRAY_SIZE(register_table); ++i)
@ -97,6 +101,10 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant
    }
    semantics[] =
    {
+        {"sv_dispatchthreadid",         false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
+        {"sv_groupid",                  false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
+        {"sv_groupthreadid",            false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
+
        {"position",                    false, VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_POSITION},
        {"sv_position",                 false, VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_POSITION},
        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_PRIMITIVE_ID},
@ -164,6 +172,8 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc,

        ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
        assert(ret);
+        if (usage == ~0u)
+            continue;
        usage_idx = var->semantic.index;

        if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx))
@ -226,6 +236,8 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc,
            continue;

        hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
+        if (usage == ~0u)
+            continue;

        if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color"))
            string_offset = put_string(&buffer, "SV_Target");
@ -1205,6 +1217,8 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
        instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR;

    hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
+    if (usage == ~0u)
+        usage = D3D_NAME_UNDEFINED;

    if (var->is_input_semantic)
    {
--- a/tests/hlsl-numthreads.shader_test
+++ b/tests/hlsl-numthreads.shader_test
@ -171,7 +171,7 @@ size (2, 2)
 1.0 1.0
 1.0 1.0

-[compute shader todo]
+[compute shader]
 /* Attributes are taken from the first function, and dropped from the second. */
 RWTexture2D<float> u;

@ -185,7 +185,7 @@ void main(uint2 id : sv_dispatchthreadid)
 }

 [test]
-todo dispatch 1 1 1
+dispatch 1 1 1
 probe uav 0 (0, 0) r (2.0)
 probe uav 0 (0, 1) r (1.0)
 probe uav 0 (1, 0) r (2.0)
--- a/tests/hlsl_d3d12.c
+++ b/tests/hlsl_d3d12.c
@ -488,87 +488,84 @@ static void test_thread_id(void)
                get_cpu_descriptor_handle(&context, heap, i));
    }

-    todo cs_code = compile_shader(cs_source, "cs_5_0");
-    if (cs_code)
+    cs_code = compile_shader(cs_source, "cs_5_0");
+    context.pipeline_state = create_compute_pipeline_state(device, context.root_signature,
+            shader_bytecode(ID3D10Blob_GetBufferPointer(cs_code), ID3D10Blob_GetBufferSize(cs_code)));
+
+    ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
+    ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, context.root_signature);
+    ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list,
+            0, get_gpu_descriptor_handle(&context, heap, 0));
+    ID3D12GraphicsCommandList_Dispatch(command_list, 2, 2, 2);
+
+    transition_resource_state(command_list, textures[0],
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
+    get_texture_readback_with_command_list(textures[0], 0, &rb, context.queue, command_list);
+    for (x = 0; x < 16; ++x)
    {
-        context.pipeline_state = create_compute_pipeline_state(device, context.root_signature,
-                shader_bytecode(ID3D10Blob_GetBufferPointer(cs_code), ID3D10Blob_GetBufferSize(cs_code)));
-
-        ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
-        ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, context.root_signature);
-        ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list,
-                0, get_gpu_descriptor_handle(&context, heap, 0));
-        ID3D12GraphicsCommandList_Dispatch(command_list, 2, 2, 2);
-
-        transition_resource_state(command_list, textures[0],
-                D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
-        get_texture_readback_with_command_list(textures[0], 0, &rb, context.queue, command_list);
-        for (x = 0; x < 16; ++x)
+        for (y = 0; y < 8; ++y)
        {
-            for (y = 0; y < 8; ++y)
+            for (z = 0; z < 8; ++z)
            {
-                for (z = 0; z < 8; ++z)
-                {
-                    const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4));
-                    struct uvec4 expect = {x / 5, y / 3, z / 2, 1};
+                const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4));
+                struct uvec4 expect = {x / 5, y / 3, z / 2, 1};

-                    if (x >= 10 || y >= 6 || z >= 4)
-                        memset(&expect, 0, sizeof(expect));
+                if (x >= 10 || y >= 6 || z >= 4)
+                    memset(&expect, 0, sizeof(expect));

-                    ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n",
-                            v->x, v->y, v->z, v->w, x, y, z);
-                }
+                ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n",
+                        v->x, v->y, v->z, v->w, x, y, z);
            }
        }
-        release_resource_readback(&rb);
-        reset_command_list(command_list, context.allocator);
-
-        transition_resource_state(command_list, textures[1],
-                D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
-        get_texture_readback_with_command_list(textures[1], 0, &rb, context.queue, command_list);
-        for (x = 0; x < 16; ++x)
-        {
-            for (y = 0; y < 8; ++y)
-            {
-                for (z = 0; z < 8; ++z)
-                {
-                    const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4));
-                    struct uvec4 expect = {x % 5, y % 3, z % 2, 2};
-
-                    if (x >= 10 || y >= 6 || z >= 4)
-                        memset(&expect, 0, sizeof(expect));
-
-                    ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n",
-                            v->x, v->y, v->z, v->w, x, y, z);
-                }
-            }
-        }
-        release_resource_readback(&rb);
-        reset_command_list(command_list, context.allocator);
-
-        transition_resource_state(command_list, textures[2],
-                D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
-        get_texture_readback_with_command_list(textures[2], 0, &rb, context.queue, command_list);
-        for (x = 0; x < 16; ++x)
-        {
-            for (y = 0; y < 8; ++y)
-            {
-                for (z = 0; z < 8; ++z)
-                {
-                    const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4));
-                    struct uvec4 expect = {x, y, z, 3};
-
-                    if (x >= 10 || y >= 6 || z >= 4)
-                        memset(&expect, 0, sizeof(expect));
-
-                    ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n",
-                            v->x, v->y, v->z, v->w, x, y, z);
-                }
-            }
-        }
-        release_resource_readback(&rb);
-        reset_command_list(command_list, context.allocator);
    }
+    release_resource_readback(&rb);
+    reset_command_list(command_list, context.allocator);
+
+    transition_resource_state(command_list, textures[1],
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
+    get_texture_readback_with_command_list(textures[1], 0, &rb, context.queue, command_list);
+    for (x = 0; x < 16; ++x)
+    {
+        for (y = 0; y < 8; ++y)
+        {
+            for (z = 0; z < 8; ++z)
+            {
+                const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4));
+                struct uvec4 expect = {x % 5, y % 3, z % 2, 2};
+
+                if (x >= 10 || y >= 6 || z >= 4)
+                    memset(&expect, 0, sizeof(expect));
+
+                ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n",
+                        v->x, v->y, v->z, v->w, x, y, z);
+            }
+        }
+    }
+    release_resource_readback(&rb);
+    reset_command_list(command_list, context.allocator);
+
+    transition_resource_state(command_list, textures[2],
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
+    get_texture_readback_with_command_list(textures[2], 0, &rb, context.queue, command_list);
+    for (x = 0; x < 16; ++x)
+    {
+        for (y = 0; y < 8; ++y)
+        {
+            for (z = 0; z < 8; ++z)
+            {
+                const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4));
+                struct uvec4 expect = {x, y, z, 3};
+
+                if (x >= 10 || y >= 6 || z >= 4)
+                    memset(&expect, 0, sizeof(expect));
+
+                ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n",
+                        v->x, v->y, v->z, v->w, x, y, z);
+            }
+        }
+    }
+    release_resource_readback(&rb);
+    reset_command_list(command_list, context.allocator);

    for (i = 0; i < 3; ++i)
        ID3D12Resource_Release(textures[i]);