diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 62266780..af59b7c7 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -49,21 +49,25 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem const char *semantic; bool output; enum vkd3d_shader_type shader_type; - enum vkd3d_sm4_register_type type; enum vkd3d_sm4_swizzle_type swizzle_type; + enum vkd3d_sm4_register_type type; bool has_idx; } register_table[] = { - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_RT_PRIMID, VKD3D_SM4_SWIZZLE_NONE, false}, + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, /* Put sv_target in this table, instead of letting it fall through to * default varying allocation, so that the register index matches the * usage index. */ - {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_OUTPUT, VKD3D_SM4_SWIZZLE_VEC4, true}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_OUTPUT, VKD3D_SM4_SWIZZLE_VEC4, true}, + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, }; for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -97,6 +101,10 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant } semantics[] = { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, @@ -164,6 +172,8 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); assert(ret); + if (usage == ~0u) + continue; usage_idx = var->semantic.index; if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) @@ -226,6 +236,8 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, continue; hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + continue; if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) string_offset = put_string(&buffer, "SV_Target"); @@ -1205,6 +1217,8 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; if (var->is_input_semantic) { diff --git a/tests/hlsl-numthreads.shader_test b/tests/hlsl-numthreads.shader_test index 9e561ae4..404d7d76 100644 --- a/tests/hlsl-numthreads.shader_test +++ b/tests/hlsl-numthreads.shader_test @@ -171,7 +171,7 @@ size (2, 2) 1.0 1.0 1.0 1.0 -[compute shader todo] +[compute shader] /* Attributes are taken from the first function, and dropped from the second. */ RWTexture2D u; @@ -185,7 +185,7 @@ void main(uint2 id : sv_dispatchthreadid) } [test] -todo dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (2.0) probe uav 0 (0, 1) r (1.0) probe uav 0 (1, 0) r (2.0) diff --git a/tests/hlsl_d3d12.c b/tests/hlsl_d3d12.c index 8de8d653..17788d9b 100644 --- a/tests/hlsl_d3d12.c +++ b/tests/hlsl_d3d12.c @@ -488,87 +488,84 @@ static void test_thread_id(void) get_cpu_descriptor_handle(&context, heap, i)); } - todo cs_code = compile_shader(cs_source, "cs_5_0"); - if (cs_code) + cs_code = compile_shader(cs_source, "cs_5_0"); + context.pipeline_state = create_compute_pipeline_state(device, context.root_signature, + shader_bytecode(ID3D10Blob_GetBufferPointer(cs_code), ID3D10Blob_GetBufferSize(cs_code))); + + ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state); + ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list, + 0, get_gpu_descriptor_handle(&context, heap, 0)); + ID3D12GraphicsCommandList_Dispatch(command_list, 2, 2, 2); + + transition_resource_state(command_list, textures[0], + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + get_texture_readback_with_command_list(textures[0], 0, &rb, context.queue, command_list); + for (x = 0; x < 16; ++x) { - context.pipeline_state = create_compute_pipeline_state(device, context.root_signature, - shader_bytecode(ID3D10Blob_GetBufferPointer(cs_code), ID3D10Blob_GetBufferSize(cs_code))); - - ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state); - ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, context.root_signature); - ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list, - 0, get_gpu_descriptor_handle(&context, heap, 0)); - ID3D12GraphicsCommandList_Dispatch(command_list, 2, 2, 2); - - transition_resource_state(command_list, textures[0], - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); - get_texture_readback_with_command_list(textures[0], 0, &rb, context.queue, command_list); - for (x = 0; x < 16; ++x) + for (y = 0; y < 8; ++y) { - for (y = 0; y < 8; ++y) + for (z = 0; z < 8; ++z) { - for (z = 0; z < 8; ++z) - { - const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4)); - struct uvec4 expect = {x / 5, y / 3, z / 2, 1}; + const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4)); + struct uvec4 expect = {x / 5, y / 3, z / 2, 1}; - if (x >= 10 || y >= 6 || z >= 4) - memset(&expect, 0, sizeof(expect)); + if (x >= 10 || y >= 6 || z >= 4) + memset(&expect, 0, sizeof(expect)); - ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n", - v->x, v->y, v->z, v->w, x, y, z); - } + ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n", + v->x, v->y, v->z, v->w, x, y, z); } } - release_resource_readback(&rb); - reset_command_list(command_list, context.allocator); - - transition_resource_state(command_list, textures[1], - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); - get_texture_readback_with_command_list(textures[1], 0, &rb, context.queue, command_list); - for (x = 0; x < 16; ++x) - { - for (y = 0; y < 8; ++y) - { - for (z = 0; z < 8; ++z) - { - const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4)); - struct uvec4 expect = {x % 5, y % 3, z % 2, 2}; - - if (x >= 10 || y >= 6 || z >= 4) - memset(&expect, 0, sizeof(expect)); - - ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n", - v->x, v->y, v->z, v->w, x, y, z); - } - } - } - release_resource_readback(&rb); - reset_command_list(command_list, context.allocator); - - transition_resource_state(command_list, textures[2], - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); - get_texture_readback_with_command_list(textures[2], 0, &rb, context.queue, command_list); - for (x = 0; x < 16; ++x) - { - for (y = 0; y < 8; ++y) - { - for (z = 0; z < 8; ++z) - { - const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4)); - struct uvec4 expect = {x, y, z, 3}; - - if (x >= 10 || y >= 6 || z >= 4) - memset(&expect, 0, sizeof(expect)); - - ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n", - v->x, v->y, v->z, v->w, x, y, z); - } - } - } - release_resource_readback(&rb); - reset_command_list(command_list, context.allocator); } + release_resource_readback(&rb); + reset_command_list(command_list, context.allocator); + + transition_resource_state(command_list, textures[1], + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + get_texture_readback_with_command_list(textures[1], 0, &rb, context.queue, command_list); + for (x = 0; x < 16; ++x) + { + for (y = 0; y < 8; ++y) + { + for (z = 0; z < 8; ++z) + { + const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4)); + struct uvec4 expect = {x % 5, y % 3, z % 2, 2}; + + if (x >= 10 || y >= 6 || z >= 4) + memset(&expect, 0, sizeof(expect)); + + ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n", + v->x, v->y, v->z, v->w, x, y, z); + } + } + } + release_resource_readback(&rb); + reset_command_list(command_list, context.allocator); + + transition_resource_state(command_list, textures[2], + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + get_texture_readback_with_command_list(textures[2], 0, &rb, context.queue, command_list); + for (x = 0; x < 16; ++x) + { + for (y = 0; y < 8; ++y) + { + for (z = 0; z < 8; ++z) + { + const struct uvec4 *v = get_readback_data(&rb.rb, x, y, z, sizeof(struct uvec4)); + struct uvec4 expect = {x, y, z, 3}; + + if (x >= 10 || y >= 6 || z >= 4) + memset(&expect, 0, sizeof(expect)); + + ok(compare_uvec4(v, &expect), "Got {%u, %u, %u, %u} at (%u, %u, %u).\n", + v->x, v->y, v->z, v->w, x, y, z); + } + } + } + release_resource_readback(&rb); + reset_command_list(command_list, context.allocator); for (i = 0; i < 3; ++i) ID3D12Resource_Release(textures[i]);