From 9fd13a6e06a7de0001a09bb7175d3c262d9a0a69 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Nov 2024 12:23:53 +1100 Subject: [PATCH] Updated vkd3d to 756b98f093ba26e8cd4d4fed1caa04a5c0d0bc35. --- libs/vkd3d/libs/vkd3d-shader/fx.c | 47 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 133 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 6 + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 113 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 954 ++++++++++++- libs/vkd3d/libs/vkd3d-shader/ir.c | 25 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 11 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 1182 +++-------------- .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + libs/vkd3d/libs/vkd3d/command.c | 30 +- 11 files changed, 1392 insertions(+), 1111 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 5382dd94f98..9b1ef3bb2e0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -514,6 +514,8 @@ enum fx_4_type_constants FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf, FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10, FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, + FX_4_OBJECT_TYPE_RTV = 0x13, + FX_4_OBJECT_TYPE_DSV = 0x14, FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, @@ -527,7 +529,12 @@ enum fx_4_type_constants FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22, FX_5_OBJECT_TYPE_UAV_3D = 0x23, FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24, + FX_5_OBJECT_TYPE_SRV_RAW_BUFFER = 0x25, + FX_5_OBJECT_TYPE_UAV_RAW_BUFFER = 0x26, + FX_5_OBJECT_TYPE_SRV_STRUCTURED_BUFFER = 0x27, FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28, + FX_5_OBJECT_TYPE_SRV_APPEND_STRUCTURED_BUFFER = 0x2b, + FX_5_OBJECT_TYPE_SRV_CONSUME_STRUCTURED_BUFFER = 0x2c, /* Types */ FX_4_TYPE_CLASS_NUMERIC = 1, @@ -613,6 +620,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) [HLSL_SAMPLER_DIM_3D] = "RWTexture3D", [HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer", [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", + [HLSL_SAMPLER_DIM_RAW_BUFFER] = "RWByteAddressBuffer", }; switch (type->class) @@ -821,17 +829,18 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D, [HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER, [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER, + [HLSL_SAMPLER_DIM_RAW_BUFFER] = FX_5_OBJECT_TYPE_UAV_RAW_BUFFER, }; put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); } else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) { - put_u32_unaligned(buffer, 20); + put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV); } else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) { - put_u32_unaligned(buffer, 19); + put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV); } else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) { @@ -3315,27 +3324,19 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int vkd3d_shader_free_shader_code(&output); } -static bool fx_4_is_shader_resource(const struct fx_4_binary_type *type) +static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) { switch (type->typeinfo) { - case FX_4_OBJECT_TYPE_TEXTURE: - case FX_4_OBJECT_TYPE_TEXTURE_1D: - case FX_4_OBJECT_TYPE_TEXTURE_1DARRAY: - case FX_4_OBJECT_TYPE_TEXTURE_2D: - case FX_4_OBJECT_TYPE_TEXTURE_2DARRAY: - case FX_4_OBJECT_TYPE_TEXTURE_2DMS: - case FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY: - case FX_4_OBJECT_TYPE_TEXTURE_3D: - case FX_4_OBJECT_TYPE_TEXTURE_CUBE: - case FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY: - case FX_5_OBJECT_TYPE_UAV_1D: - case FX_5_OBJECT_TYPE_UAV_1DARRAY: - case FX_5_OBJECT_TYPE_UAV_2D: - case FX_5_OBJECT_TYPE_UAV_2DARRAY: - case FX_5_OBJECT_TYPE_UAV_3D: - case FX_5_OBJECT_TYPE_UAV_BUFFER: - case FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER: + case FX_4_OBJECT_TYPE_STRING: + case FX_4_OBJECT_TYPE_PIXEL_SHADER: + case FX_4_OBJECT_TYPE_VERTEX_SHADER: + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: + case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: + case FX_5_OBJECT_TYPE_COMPUTE_SHADER: + case FX_5_OBJECT_TYPE_HULL_SHADER: + case FX_5_OBJECT_TYPE_DOMAIN_SHADER: return true; default: return false; @@ -3347,6 +3348,9 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct unsigned int i, element_count; uint32_t value; + if (!fx_4_object_has_initializer(type)) + return; + vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); element_count = max(type->element_count, 1); for (i = 0; i < element_count; ++i) @@ -3407,8 +3411,7 @@ static void fx_4_parse_objects(struct fx_parser *parser) if (type.element_count) vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); - if (!fx_4_is_shader_resource(&type)) - fx_4_parse_object_initializer(parser, &type); + fx_4_parse_object_initializer(parser, &type); vkd3d_string_buffer_printf(&parser->buffer, ";\n"); fx_parse_fx_4_annotations(parser); diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 363054cb6d9..0df0e30f399 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -763,16 +763,37 @@ static void shader_glsl_default(struct vkd3d_glsl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, "default:\n"); } +static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset) +{ + switch (offset_size) + { + case 1: + vkd3d_string_buffer_printf(buffer, "%d", offset->u); + break; + case 2: + vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v); + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Invalid texel offset size %u.", offset_size); + /* fall through */ + case 3: + vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w); + break; + } +} + static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { + unsigned int resource_id, resource_idx, resource_space, sample_count; const struct glsl_resource_type_info *resource_type_info; - unsigned int resource_id, resource_idx, resource_space; const struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_component_type sampled_type; enum vkd3d_shader_resource_type resource_type; struct vkd3d_string_buffer *fetch; enum vkd3d_data_type data_type; - struct glsl_src coord, lod; + struct glsl_src coord; struct glsl_dst dst; uint32_t coord_mask; @@ -790,6 +811,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ { resource_type = d->resource_type; resource_space = d->register_space; + sample_count = d->sample_count; sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); data_type = vkd3d_data_type_from_component_type(sampled_type); } @@ -799,6 +821,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ "Internal compiler error: Undeclared resource descriptor %u.", resource_id); resource_space = 0; resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + sample_count = 1; data_type = VKD3D_DATA_FLOAT; } @@ -815,7 +838,6 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); glsl_src_init(&coord, gen, &ins->src[0], coord_mask); - glsl_src_init(&lod, gen, &ins->src[0], VKD3DSP_WRITEMASK_3); fetch = vkd3d_string_buffer_get(&gen->string_buffers); vkd3d_string_buffer_printf(fetch, "texelFetch("); @@ -823,14 +845,23 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER) - vkd3d_string_buffer_printf(fetch, ", %s", lod.str->buffer); + { + vkd3d_string_buffer_printf(fetch, ", "); + if (ins->opcode != VKD3DSIH_LD2DMS) + shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type); + else if (sample_count == 1) + /* If the resource isn't a true multisample resource, this is the + * "lod" parameter instead of the "sample" parameter. */ + vkd3d_string_buffer_printf(fetch, "0"); + else + shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type); + } vkd3d_string_buffer_printf(fetch, ")"); shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask); shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer); vkd3d_string_buffer_release(&gen->string_buffers, fetch); - glsl_src_cleanup(&lod, &gen->string_buffers); glsl_src_cleanup(&coord, &gen->string_buffers); glsl_dst_cleanup(&dst, &gen->string_buffers); } @@ -868,8 +899,9 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { - bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow; + bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow; const struct glsl_resource_type_info *resource_type_info; + const struct vkd3d_shader_src_param *resource, *sampler; unsigned int resource_id, resource_idx, resource_space; unsigned int sampler_id, sampler_idx, sampler_space; const struct vkd3d_shader_descriptor_info1 *d; @@ -881,23 +913,24 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk struct glsl_dst dst; bias = ins->opcode == VKD3DSIH_SAMPLE_B; - gather = ins->opcode == VKD3DSIH_GATHER4; + dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO; + gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO; grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins); shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; - if (vkd3d_shader_instruction_has_texel_offset(ins)) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled texel sample offset."); + resource = &ins->src[1 + dynamic_offset]; + sampler = &ins->src[2 + dynamic_offset]; - if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr - || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) + if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr + || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, "Descriptor indexing is not supported."); - resource_id = ins->src[1].reg.idx[0].offset; - resource_idx = ins->src[1].reg.idx[1].offset; + resource_id = resource->reg.idx[0].offset; + resource_idx = resource->reg.idx[1].offset; if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) { resource_type = d->resource_type; @@ -927,8 +960,8 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk array = false; } - sampler_id = ins->src[2].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[1].offset; + sampler_id = sampler->reg.idx[0].offset; + sampler_idx = sampler->reg.idx[1].offset; if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) { sampler_space = d->register_space; @@ -958,13 +991,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk sample = vkd3d_string_buffer_get(&gen->string_buffers); if (gather) - vkd3d_string_buffer_printf(sample, "textureGather("); + vkd3d_string_buffer_printf(sample, "textureGather"); else if (grad) - vkd3d_string_buffer_printf(sample, "textureGrad("); + vkd3d_string_buffer_printf(sample, "textureGrad"); else if (lod) - vkd3d_string_buffer_printf(sample, "textureLod("); + vkd3d_string_buffer_printf(sample, "textureLod"); else - vkd3d_string_buffer_printf(sample, "texture("); + vkd3d_string_buffer_printf(sample, "texture"); + vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : ""); shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); vkd3d_string_buffer_printf(sample, ", "); if (shadow) @@ -985,18 +1019,32 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk { vkd3d_string_buffer_printf(sample, ", 0.0"); } - else if (bias || lod) + else if (lod) { vkd3d_string_buffer_printf(sample, ", "); shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); } - if (gather) + if (offset) + { + vkd3d_string_buffer_printf(sample, ", "); + if (dynamic_offset) + shader_glsl_print_src(sample, gen, &ins->src[1], + vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type); + else + shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset); + } + if (bias) { - if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0))) + vkd3d_string_buffer_printf(sample, ", "); + shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + } + else if (gather) + { + if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0))) vkd3d_string_buffer_printf(sample, ", %d", component_idx); } vkd3d_string_buffer_printf(sample, ")"); - shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); + shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask); shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); @@ -1268,7 +1316,13 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type); vkd3d_string_buffer_printf(buffer, "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))"); + break; + case VKD3D_SHADER_SV_SAMPLE_INDEX: + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))"); break; case VKD3D_SHADER_SV_TARGET: @@ -1390,6 +1444,9 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) case VKD3D_SHADER_COMPONENT_UINT: vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index); + break; default: vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled output component type %#x.", e->component_type); @@ -1499,6 +1556,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, shader_glsl_cast(gen, ins, "uint", "uvec"); break; case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_PO: case VKD3DSIH_SAMPLE: case VKD3DSIH_SAMPLE_B: case VKD3DSIH_SAMPLE_C: @@ -1553,6 +1611,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, shader_glsl_cast(gen, ins, "float", "vec"); break; case VKD3DSIH_LD: + case VKD3DSIH_LD2DMS: shader_glsl_ld(gen, ins); break; case VKD3DSIH_LD_UAV_TYPED: @@ -1911,6 +1970,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator struct vkd3d_string_buffer *buffer = gen->buffer; enum vkd3d_shader_component_type component_type; const char *sampler_type, *sampler_type_prefix; + enum vkd3d_shader_resource_type resource_type; unsigned int binding_idx; bool shadow = false; @@ -1936,18 +1996,32 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator return; } - if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) + resource_type = srv->resource_type; + if (srv->sample_count == 1) + { + /* The OpenGL API distinguishes between multi-sample textures with + * sample count 1 and single-sample textures. Direct3D and Vulkan + * don't make this distinction at the API level, but Direct3D shaders + * are capable of expressing both. We therefore map such multi-sample + * textures to their single-sample equivalents here. */ + if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + } + + if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) { sampler_type = resource_type_info->type_suffix; if (shadow && !resource_type_info->shadow) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, - "Comparison samplers are not supported with resource type %#x.", srv->resource_type); + "Comparison samplers are not supported with resource type %#x.", resource_type); } else { vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled resource type %#x for combined resource/sampler " - "for resource %u, space %u and sampler %u, space %u.", srv->resource_type, + "for resource %u, space %u and sampler %u, space %u.", resource_type, crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); sampler_type = ""; } @@ -1972,7 +2046,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator break; } - if (!shader_glsl_get_combined_sampler_binding(gen, crs, srv->resource_type, &binding_idx)) + if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx)) { vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, "No descriptor binding specified for combined resource/sampler " @@ -2213,6 +2287,9 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator case VKD3D_SHADER_COMPONENT_UINT: vkd3d_string_buffer_printf(buffer, "uvec4"); break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(buffer, "ivec4"); + break; case VKD3D_SHADER_COMPONENT_FLOAT: vkd3d_string_buffer_printf(buffer, "vec4"); break; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 1f90a4ba805..96de18dc886 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -2774,6 +2774,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru return string; case HLSL_CLASS_TEXTURE: + if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); + return string; + } + if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { vkd3d_string_buffer_printf(string, "Texture"); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 18effcc5be1..8dace11916a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -74,6 +74,7 @@ ANY (.) BlendState {return KW_BLENDSTATE; } break {return KW_BREAK; } Buffer {return KW_BUFFER; } +ByteAddressBuffer {return KW_BYTEADDRESSBUFFER; } case {return KW_CASE; } cbuffer {return KW_CBUFFER; } centroid {return KW_CENTROID; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index dcbba46ede6..60aade732db 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -53,7 +53,7 @@ struct parse_parameter struct parse_initializer initializer; }; -struct parse_colon_attribute +struct parse_colon_attributes { struct hlsl_semantic semantic; struct hlsl_reg_reservation reg_reservation; @@ -5175,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; struct hlsl_ir_node *expr; + if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); + if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) return false; @@ -5599,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct return false; } +static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; + struct hlsl_ir_node *load; + unsigned int value_dim; + + if (params->args_count != 1 && params->args_count != 2) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.", + params->args_count); + return false; + } + + if (params->args_count == 2) + { + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return false; + } + + if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name); + return false; + } + + if (!strcmp(name, "Load")) + value_dim = 1; + else if (!strcmp(name, "Load2")) + value_dim = 2; + else if (!strcmp(name, "Load3")) + value_dim = 3; + else + value_dim = 4; + + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + return false; + + load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); + load_params.resource = object; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + hlsl_block_add_instr(block, load); + return true; +} + static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5608,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *load; bool multisampled; + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + return add_raw_load_method_call(ctx, block, object, name, params, loc); + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) { hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name); @@ -6260,7 +6316,10 @@ texture_methods[] = { "GetDimensions", add_getdimensions_method_call, "00111111111110" }, - { "Load", add_load_method_call, "00111011110110" }, + { "Load", add_load_method_call, "00111011110111" }, + { "Load2", add_raw_load_method_call, "00000000000001" }, + { "Load3", add_raw_load_method_call, "00000000000001" }, + { "Load4", add_raw_load_method_call, "00000000000001" }, { "Sample", add_sample_method_call, "00111111001000" }, { "SampleBias", add_sample_lod_method_call, "00111111001000" }, @@ -6490,7 +6549,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct parse_if_body if_body; enum parse_assign_op assign_op; struct hlsl_reg_reservation reg_reservation; - struct parse_colon_attribute colon_attribute; + struct parse_colon_attributes colon_attributes; struct hlsl_semantic semantic; enum hlsl_buffer_type buffer_type; enum hlsl_sampler_dim sampler_dim; @@ -6505,6 +6564,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_BLENDSTATE %token KW_BREAK %token KW_BUFFER +%token KW_BYTEADDRESSBUFFER %token KW_CASE %token KW_CONSTANTBUFFER %token KW_CBUFFER @@ -6687,7 +6747,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type buffer_type -%type colon_attribute +%type colon_attributes %type field %type fields_list @@ -6875,7 +6935,7 @@ effect_group: } buffer_declaration: - var_modifiers buffer_type any_identifier colon_attribute annotations_opt + var_modifiers buffer_type any_identifier colon_attributes annotations_opt { if ($4.semantic.name) hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); @@ -7200,7 +7260,7 @@ func_declaration: func_prototype_no_attrs: /* var_modifiers is necessary to avoid shift/reduce conflicts. */ - var_modifiers type var_identifier '(' parameters ')' colon_attribute + var_modifiers type var_identifier '(' parameters ')' colon_attributes { uint32_t modifiers = $1; struct hlsl_ir_var *var; @@ -7377,28 +7437,39 @@ var_identifier: VAR_IDENTIFIER | NEW_IDENTIFIER -colon_attribute: +colon_attributes: %empty { $$.semantic = (struct hlsl_semantic){0}; $$.reg_reservation.reg_type = 0; $$.reg_reservation.offset_type = 0; } - | semantic + | colon_attributes semantic { - $$.semantic = $1; - $$.reg_reservation.reg_type = 0; - $$.reg_reservation.offset_type = 0; + hlsl_cleanup_semantic(&$$.semantic); + $$.semantic = $2; } - | register_reservation + | colon_attributes register_reservation { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; + if ($$.reg_reservation.reg_type) + hlsl_fixme(ctx, &@2, "Multiple register() reservations."); + + $$.reg_reservation.reg_type = $2.reg_type; + $$.reg_reservation.reg_index = $2.reg_index; + $$.reg_reservation.reg_space = $2.reg_space; } - | packoffset_reservation + | colon_attributes packoffset_reservation { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; + if (ctx->cur_buffer == ctx->globals_buffer) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "The packoffset() reservation is only allowed within 'cbuffer' blocks."); + } + else + { + $$.reg_reservation.offset_type = $2.offset_type; + $$.reg_reservation.offset_index = $2.offset_index; + } } semantic: @@ -7594,7 +7665,7 @@ parameter: } parameter_decl: - var_modifiers type_no_void any_identifier arrays colon_attribute + var_modifiers type_no_void any_identifier arrays colon_attributes { uint32_t modifiers = $1; struct hlsl_type *type; @@ -7863,6 +7934,10 @@ type_no_void: $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); } + | KW_BYTEADDRESSBUFFER + { + $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0); + } | uav_type '<' resource_format '>' { validate_uav_type(ctx, $1, $3, &@3); @@ -8095,7 +8170,7 @@ variables_def_typed: } variable_decl: - any_identifier arrays colon_attribute annotations_opt + any_identifier arrays colon_attributes annotations_opt { $$ = hlsl_alloc(ctx, sizeof(*$$)); $$->loc = @1; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 213e403dcbd..bea16fd4da6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -6520,6 +6520,21 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog } mask = (1 << var->data_type->dimx) - 1; + + if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output + && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + { + if (var->data_type->dimx > 1) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "PSIZE output must have only 1 component in this shader model."); + /* For some reason the writemask has all components set. */ + mask = VKD3DSP_WRITEMASK_ALL; + } + if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 + && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "FOG output must have only 1 component in this shader model."); + use_mask = mask; /* FIXME: retrieve use mask accurately. */ component_type = VKD3D_SHADER_COMPONENT_FLOAT; } @@ -6817,6 +6832,16 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } } +static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +{ + VKD3D_ASSERT(instr->reg.allocated); + vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + dst->reg.idx[0].offset = instr->reg.id; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = instr->reg.writemask; +} + static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_constant *constant) { @@ -6842,6 +6867,25 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, dst_param->write_mask = instr->reg.writemask; } +static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr) +{ + struct vkd3d_shader_src_param *src_param; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) + return; + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + src_param = &ins->src[0]; + vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +} + /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, @@ -6866,10 +6910,7 @@ static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, return; dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->reg.dimension = VSIR_DIMENSION_VEC4; - dst_param->write_mask = instr->reg.writemask; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); dst_param->modifiers = dst_mod; for (i = 0; i < src_count; ++i) @@ -7216,6 +7257,8 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, if (deref->var->is_output_semantic) { + const char *semantic_name = deref->var->semantic.name; + version.major = ctx->profile->major_version; version.minor = ctx->profile->minor_version; version.type = ctx->profile->type; @@ -7225,7 +7268,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, type = VKD3DSPR_TEMP; register_index = 0; } - else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, + else if (!sm1_register_from_semantic_name(&version, semantic_name, deref->var->semantic.index, true, &type, ®ister_index)) { VKD3D_ASSERT(reg.allocated); @@ -7234,6 +7277,14 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, } else writemask = (1u << deref->var->data_type->dimx) - 1; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") + || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) + { + /* These are always 1-component, but for some reason are written + * with a writemask containing all components. */ + writemask = VKD3DSP_WRITEMASK_ALL; + } } else VKD3D_ASSERT(reg.allocated); @@ -7642,6 +7693,123 @@ static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, hlsl_replace_node(instr, vsir_instr); } +static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const bool output = var->is_output_semantic; + enum vkd3d_shader_sysval_semantic semantic; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_register_type type; + enum vkd3d_shader_opcode opcode; + uint32_t write_mask; + unsigned int idx; + bool has_idx; + + sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, + ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + if (semantic == ~0u) + semantic = VKD3D_SHADER_SV_NONE; + + if (var->is_input_semantic) + { + switch (semantic) + { + case VKD3D_SHADER_SV_NONE: + opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; + break; + + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_PRIMITIVE_ID: + case VKD3D_SHADER_SV_SAMPLE_INDEX: + case VKD3D_SHADER_SV_VERTEX_ID: + opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; + break; + + default: + opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; + break; + } + } + else + { + if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) + opcode = VKD3DSIH_DCL_OUTPUT; + else + opcode = VKD3DSIH_DCL_OUTPUT_SIV; + } + + if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx)) + { + if (has_idx) + idx = var->semantic.index; + write_mask = (1u << var->data_type->dimx) - 1; + } + else + { + if (output) + type = VKD3DSPR_OUTPUT; + else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) + type = VKD3DSPR_PATCHCONST; + else + type = VKD3DSPR_INPUT; + + has_idx = true; + idx = var->regs[HLSL_REGSET_NUMERIC].id; + write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0))) + return; + + if (opcode == VKD3DSIH_DCL_OUTPUT) + { + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE + || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT); + dst_param = &ins->declaration.dst; + } + else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) + { + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); + dst_param = &ins->declaration.dst; + } + else + { + VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); + ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic, + var->semantic.index); + dst_param = &ins->declaration.register_semantic.reg; + } + + if (has_idx) + { + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = idx; + } + else + { + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); + } + + if (shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; + else + dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + + dst_param->write_mask = write_mask; + + if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) + ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); + + add_last_vsir_instr_to_block(ctx, program, block); +} + static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) { @@ -7674,76 +7842,754 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, add_last_vsir_instr_to_block(ctx, program, block); } -static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_expr *expr) +static bool type_is_float(const struct hlsl_type *type) { - switch (expr->op) - { - case HLSL_OP1_ABS: - generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); - return true; + return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; +} - default: - return false; - } +static bool type_is_integer(const struct hlsl_type *type) +{ + return type->e.numeric.type == HLSL_TYPE_BOOL + || type->e.numeric.type == HLSL_TYPE_INT + || type->e.numeric.type == HLSL_TYPE_UINT; } -static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_expr *expr, uint32_t bits) { - struct hlsl_ir_node *instr, *next; + struct hlsl_ir_node *operand = expr->operands[0].node; + const struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct hlsl_constant_value value = {0}; + struct vkd3d_shader_instruction *ins; - LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2))) + return; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask); + + value.u[0].u = bits; + vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0); +} + +static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr) +{ + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; + + static const union { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + uint32_t u; + float f; + } one = { .f = 1.0 }; + + /* Narrowing casts were already lowered. */ + VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + switch (src_type->e.numeric.type) { - hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true); + return true; + + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u); + return true; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + return false; + + default: + vkd3d_unreachable(); } - } + break; - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); + case HLSL_TYPE_INT: + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true); + return true; - case HLSL_IR_CONSTANT: - /* In SM4 all constants are inlined. */ - break; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; - case HLSL_IR_EXPR: - if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr))) - replace_instr_with_last_vsir_instr(ctx, program, instr); - break; + case HLSL_TYPE_BOOL: + sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); + return true; - case HLSL_IR_SWIZZLE: - generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); - replace_instr_with_last_vsir_instr(ctx, program, instr); - break; + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + return false; - default: - break; - } + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_UINT: + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); + return true; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + return false; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); + return false; + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ + default: + vkd3d_unreachable(); } } -static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, + enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx) { - struct hlsl_block block = {0}; - struct hlsl_scope *scope; - struct hlsl_ir_var *var; - uint32_t temp_count; + struct vkd3d_shader_dst_param *dst_param, *null_param; + const struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_instruction *ins; + unsigned int i, src_count; - compute_liveness(ctx, func); - mark_indexable_vars(ctx, func); - temp_count = allocate_temp_registers(ctx, func); - if (ctx->result) + VKD3D_ASSERT(instr->reg.allocated); + + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + { + if (expr->operands[i].node) + src_count = i + 1; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count))) return; - program->temp_count = max(program->temp_count, temp_count); - hlsl_block_init(&block); + dst_param = &ins->dst[dst_idx]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + null_param = &ins->dst[1 - dst_idx]; + vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0); + null_param->reg.dimension = VSIR_DIMENSION_NONE; + + for (i = 0; i < src_count; ++i) + vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask); +} + +static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_expr *expr) +{ + struct hlsl_ir_node *operand = expr->operands[0].node; + const struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct hlsl_constant_value value = {0}; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(type_is_float(expr->node.data_type)); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2))) + return; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + value.u[0].f = 1.0f; + value.u[1].f = 1.0f; + value.u[2].f = 1.0f; + value.u[3].f = 1.0f; + vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, + VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); + + vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); +} + +static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name) +{ + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = NULL; + + VKD3D_ASSERT(expr->node.reg.allocated); + if (expr->operands[0].node) + src_type = expr->operands[0].node->data_type; + + switch (expr->op) + { + case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: + sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr); + return true; + + case HLSL_OP1_ABS: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); + return true; + + case HLSL_OP1_BIT_NOT: + VKD3D_ASSERT(type_is_integer(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); + return true; + + case HLSL_OP1_CAST: + return sm4_generate_vsir_instr_expr_cast(ctx, program, expr); + + case HLSL_OP1_CEIL: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true); + return true; + + case HLSL_OP1_COS: + VKD3D_ASSERT(type_is_float(dst_type)); + sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1); + return true; + + case HLSL_OP1_DSX: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); + return true; + + case HLSL_OP1_DSX_COARSE: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true); + return true; + + case HLSL_OP1_DSX_FINE: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true); + return true; + + case HLSL_OP1_DSY: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); + return true; + + case HLSL_OP1_DSY_COARSE: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true); + return true; + + case HLSL_OP1_DSY_FINE: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true); + return true; + + case HLSL_OP1_EXP2: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true); + return true; + + case HLSL_OP1_F16TOF32: + VKD3D_ASSERT(type_is_float(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true); + return true; + + case HLSL_OP1_F32TOF16: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true); + return true; + + case HLSL_OP1_FLOOR: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true); + return true; + + case HLSL_OP1_FRACT: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); + return true; + + case HLSL_OP1_LOG2: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true); + return true; + + case HLSL_OP1_LOGIC_NOT: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); + return true; + + case HLSL_OP1_NEG: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); + return true; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name); + return false; + } + + case HLSL_OP1_RCP: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + /* SM5 comes with a RCP opcode */ + if (hlsl_version_ge(ctx, 5, 0)) + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true); + else + sm4_generate_vsir_rcp_using_div(ctx, program, expr); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name); + return false; + } + + case HLSL_OP1_REINTERPRET: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_OP1_ROUND: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true); + return true; + + case HLSL_OP1_RSQ: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true); + return true; + + case HLSL_OP1_SAT: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); + return true; + + case HLSL_OP1_SIN: + VKD3D_ASSERT(type_is_float(dst_type)); + sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0); + return true; + + case HLSL_OP1_SQRT: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true); + return true; + + case HLSL_OP1_TRUNC: + VKD3D_ASSERT(type_is_float(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true); + return true; + + case HLSL_OP2_ADD: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name); + return false; + } + + case HLSL_OP2_BIT_AND: + VKD3D_ASSERT(type_is_integer(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); + return true; + + case HLSL_OP2_BIT_OR: + VKD3D_ASSERT(type_is_integer(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); + return true; + + case HLSL_OP2_BIT_XOR: + VKD3D_ASSERT(type_is_integer(dst_type)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true); + return true; + + case HLSL_OP2_DIV: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true); + return true; + + case HLSL_TYPE_UINT: + sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name); + return false; + } + + case HLSL_OP2_DOT: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + switch (expr->operands[0].node->data_type->dimx) + { + case 4: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + return true; + + case 3: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + return true; + + case 2: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false); + return true; + + case 1: + default: + vkd3d_unreachable(); + } + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name); + return false; + } + + case HLSL_OP2_EQUAL: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + return false; + } + + case HLSL_OP2_GEQUAL: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + return false; + } + + case HLSL_OP2_LESS: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + return false; + } + + case HLSL_OP2_LOGIC_AND: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); + return true; + + case HLSL_OP2_LOGIC_OR: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); + return true; + + case HLSL_OP2_LSHIFT: + VKD3D_ASSERT(type_is_integer(dst_type)); + VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true); + return true; + + case HLSL_OP3_MAD: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name); + return false; + } + + case HLSL_OP2_MAX: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true); + return true; + + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name); + return false; + } + + case HLSL_OP2_MIN: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true); + return true; + + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name); + return false; + } + + case HLSL_OP2_MOD: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_UINT: + sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name); + return false; + } + + case HLSL_OP2_MUL: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); + return true; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ + sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name); + return false; + } + + case HLSL_OP2_NEQUAL: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + return false; + } + + case HLSL_OP2_RSHIFT: + VKD3D_ASSERT(type_is_integer(dst_type)); + VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, + dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true); + return true; + + case HLSL_OP3_TERNARY: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true); + return true; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + return false; + } +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct vkd3d_string_buffer *dst_type_string; + struct hlsl_ir_node *instr, *next; + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + /* In SM4 all constants are inlined. */ + break; + + case HLSL_IR_EXPR: + if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) + break; + + if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) + replace_instr_with_last_vsir_instr(ctx, program, instr); + + hlsl_release_string_buffer(ctx, dst_type_string); + break; + + case HLSL_IR_IF: + sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); + sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); + break; + + case HLSL_IR_LOOP: + sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); + break; + + case HLSL_IR_SWITCH: + LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) + sm4_generate_vsir_block(ctx, &c->body, program); + break; + + case HLSL_IR_SWIZZLE: + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + replace_instr_with_last_vsir_instr(ctx, program, instr); + break; + + default: + break; + } + } +} + +static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +{ + bool is_patch_constant_func = func == ctx->patch_constant_func; + struct hlsl_block block = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + uint32_t temp_count; + + compute_liveness(ctx, func); + mark_indexable_vars(ctx, func); + temp_count = allocate_temp_registers(ctx, func); + if (ctx->result) + return; + program->temp_count = max(program->temp_count, temp_count); + + hlsl_block_init(&block); + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) + || (var->is_output_semantic && var->first_write)) + sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); + } if (temp_count) sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 9b50a308e11..836e0ade32a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -1932,6 +1932,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par const struct shader_signature *signature; const struct signature_element *e; + write_mask = dst_param->write_mask; + switch (reg->type) { case VKD3DSPR_OUTPUT: @@ -1987,6 +1989,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; dcl_params = normaliser->output_dcl_params; + /* Fog and point size are scalar, but fxc/d3dcompiler emits a full + * write mask when writing to them. */ + if (reg->idx[0].offset > 0) + write_mask = VKD3DSP_WRITEMASK_0; break; default: @@ -1994,7 +2000,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par } id_idx = reg->idx_count - 1; - write_mask = dst_param->write_mask; if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) vkd3d_unreachable(); e = &signature->elements[element_idx]; @@ -6214,6 +6219,14 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr return VKD3D_OK; } +static bool is_pre_rasterization_shader(enum vkd3d_shader_type type) +{ + return type == VKD3D_SHADER_TYPE_VERTEX + || type == VKD3D_SHADER_TYPE_HULL + || type == VKD3D_SHADER_TYPE_DOMAIN + || type == VKD3D_SHADER_TYPE_GEOMETRY; +} + static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, const struct vkd3d_shader_instruction *ret, size_t *ret_pos) { @@ -6244,10 +6257,7 @@ static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *pro if (program->has_point_size) return VKD3D_OK; - if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX - && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY - && program->shader_version.type != VKD3D_SHADER_TYPE_HULL - && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + if (!is_pre_rasterization_shader(program->shader_version.type)) return VKD3D_OK; for (unsigned int i = 0; i < program->parameter_count; ++i) @@ -6298,10 +6308,7 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra if (!program->has_point_size) return VKD3D_OK; - if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX - && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY - && program->shader_version.type != VKD3D_SHADER_TYPE_HULL - && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + if (!is_pre_rasterization_shader(program->shader_version.type)) return VKD3D_OK; for (unsigned int i = 0; i < program->parameter_count; ++i) diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index fb7ce063c85..3fa4d68a48a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -8904,15 +8904,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler uint32_t base_coordinate_id, component_idx; uint32_t constituents[VKD3D_VEC4_SIZE]; struct vkd3d_shader_image image; + bool storage_buffer_uav = false; uint32_t indices[2]; unsigned int i, j; SpvOp op; resource = &src[instruction->src_count - 1]; - resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); - if (resource->reg.type == VKD3DSPR_UAV - && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + if (resource->reg.type == VKD3DSPR_UAV) + { + resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); + storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource); + } + + if (storage_buffer_uav) { texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index bbd2f761d29..9c41e2c2053 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -1719,7 +1719,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID, VKD3D_SM4_SWIZZLE_NONE}, {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4}, {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL, VKD3D_SM4_SWIZZLE_INVALID}, - {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_SCALAR}, {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK, VKD3D_SM4_SWIZZLE_VEC4}, {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM, VKD3D_SM4_SWIZZLE_VEC4}, {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY, VKD3D_SM4_SWIZZLE_VEC4}, @@ -2235,7 +2235,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui return true; } -static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) { switch (reg->type) { @@ -2995,20 +2995,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); -static bool type_is_integer(const struct hlsl_type *type) -{ - switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return true; - - default: - return false; - } -} - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { @@ -4845,7 +4831,15 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct } else { - instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; + switch (component_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_RAW_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; + break; + default: + instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; + break; + } } instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); @@ -4856,135 +4850,62 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct } } -static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, - const struct hlsl_ir_var *var, bool is_patch_constant_func) +static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) { - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const bool output = var->is_output_semantic; - enum vkd3d_shader_sysval_semantic semantic; - bool has_idx; - struct sm4_instruction instr = { - .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4, - .dst_count = 1, - }; - - if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) - { - if (has_idx) - { - instr.dsts[0].reg.idx[0].offset = var->semantic.index; - instr.dsts[0].reg.idx_count = 1; - } - else - { - instr.dsts[0].reg.idx_count = 0; - } - instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1; - } - else - { - if (output) - instr.dsts[0].reg.type = VKD3DSPR_OUTPUT; - else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) - instr.dsts[0].reg.type = VKD3DSPR_PATCHCONST; - else - instr.dsts[0].reg.type = VKD3DSPR_INPUT; - - instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - - if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) - instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; - - sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, - tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); - if (semantic == ~0u) - semantic = VKD3D_SHADER_SV_NONE; - - if (var->is_input_semantic) - { - switch (semantic) - { - case VKD3D_SHADER_SV_NONE: - instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; - break; - - case VKD3D_SHADER_SV_INSTANCE_ID: - case VKD3D_SHADER_SV_IS_FRONT_FACE: - case VKD3D_SHADER_SV_PRIMITIVE_ID: - case VKD3D_SHADER_SV_SAMPLE_INDEX: - case VKD3D_SHADER_SV_VERTEX_ID: - instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; - break; + .opcode = VKD3D_SM4_OP_DCL_TEMPS, - default: - instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; - break; - } + .idx = {count}, + .idx_count = 1, + }; - if (version->type == VKD3D_SHADER_TYPE_PIXEL) - { - enum vkd3d_shader_interpolation_mode mode; + write_sm4_instruction(tpf, &instr); +} - mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - } - } - else +static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) +{ + struct sm4_instruction instr = { - if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; - else - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; - } + .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) - { - VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET - || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); - } - else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) - { - VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); - } - else - { - VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); - instr.idx_count = 1; - instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); - } + .idx = {temp->register_idx, temp->register_size, temp->component_count}, + .idx_count = 3, + }; write_sm4_instruction(tpf, &instr); } -static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) +static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) { struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_DCL_TEMPS, + .opcode = opcode, - .idx = {count}, - .idx_count = 1, + .dsts[0] = *dst, + .dst_count = 1, + + .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, }; write_sm4_instruction(tpf, &instr); } -static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) +static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) { struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, + .opcode = opcode, - .idx = {temp->register_idx, temp->register_size, temp->component_count}, - .idx_count = 3, + .dsts[0] = semantic->reg, + .dst_count = 1, + + .idx[0] = semantic->sysval_semantic, + .idx_count = 1, + + .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, }; write_sm4_instruction(tpf, &instr); @@ -5111,125 +5032,6 @@ static void write_sm4_ret(const struct tpf_compiler *tpf) write_sm4_instruction(tpf, &instr); } -static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask); - instr.srcs[0].modifiers = src_mod; - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); - sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -/* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask); - sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, - const struct hlsl_ir_node *src3) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); - sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); - sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask); - instr.src_count = 3; - - write_sm4_instruction(tpf, &instr); -} - static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, @@ -5240,12 +5042,15 @@ static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_no && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); const struct vkd3d_shader_version *version = &tpf->program->shader_version; + bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; struct sm4_instruction instr; memset(&instr, 0, sizeof(instr)); if (uav) instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; + else if (raw) + instr.opcode = VKD3D_SM5_OP_LD_RAW; else instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; @@ -5441,742 +5246,67 @@ static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ write_sm4_instruction(tpf, &instr); } -static bool type_is_float(const struct hlsl_type *type) +static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) { - return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; -} + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_IF, + .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; -static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) -{ - struct sm4_instruction instr; + VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_AND; + sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); + write_sm4_instruction(tpf, &instr); - sm4_dst_from_node(&instr.dsts[0], &expr->node); - instr.dst_count = 1; + write_sm4_block(tpf, &iff->then_block); - sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask); - instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; - instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR; - instr.srcs[1].reg.u.immconst_u32[0] = mask; - instr.src_count = 2; + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; + write_sm4_instruction(tpf, &instr); + + write_sm4_block(tpf, &iff->else_block); + } + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; write_sm4_instruction(tpf, &instr); } -static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) +static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) { - static const union - { - uint32_t u; - float f; - } one = { .f = 1.0 }; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - - /* Narrowing casts were already lowered. */ - VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + struct sm4_instruction instr = {0}; - switch (dst_type->e.numeric.type) + switch (jump->type) { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_UINT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(tpf, expr, arg1, one.u); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); - break; - - default: - vkd3d_unreachable(); - } + case HLSL_IR_JUMP_BREAK: + instr.opcode = VKD3D_SM4_OP_BREAK; break; - case HLSL_TYPE_INT: - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(tpf, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); - break; - - default: - vkd3d_unreachable(); - } + case HLSL_IR_JUMP_CONTINUE: + instr.opcode = VKD3D_SM4_OP_CONTINUE; break; - case HLSL_TYPE_UINT: - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(tpf, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); - break; + case HLSL_IR_JUMP_DISCARD_NZ: + { + instr.opcode = VKD3D_SM4_OP_DISCARD; + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - default: - vkd3d_unreachable(); - } + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); + instr.src_count = 1; + sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); break; + } - case HLSL_TYPE_DOUBLE: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); - break; + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ default: - vkd3d_unreachable(); - } -} - -static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; - instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; - instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; - instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; - const struct hlsl_ir_node *arg3 = expr->operands[2].node; - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - - VKD3D_ASSERT(expr->node.reg.allocated); - - if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) - return; - - switch (expr->op) - { - case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: - if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) - write_sm4_rasterizer_sample_count(tpf, &expr->node); - else - hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); - break; - - case HLSL_OP1_ABS: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_BIT_NOT: - VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_CAST: - write_sm4_cast(tpf, expr); - break; - - case HLSL_OP1_CEIL: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_COS: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_DSX: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_COARSE: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_FINE: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_COARSE: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_FINE: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_EXP2: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - - case HLSL_OP1_F16TOF32: - VKD3D_ASSERT(type_is_float(dst_type)); - VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); - break; - - case HLSL_OP1_F32TOF16: - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); - VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FLOOR: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_RCP: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - /* SM5 comes with a RCP opcode */ - if (vkd3d_shader_ver_ge(version, 5, 0)) - { - write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); - } - else - { - /* For SM4, implement as DIV dst, 1.0, src */ - struct sm4_instruction instr; - struct hlsl_constant_value one; - - VKD3D_ASSERT(type_is_float(dst_type)); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_DIV; - - sm4_dst_from_node(&instr.dsts[0], &expr->node); - instr.dst_count = 1; - - for (unsigned int i = 0; i < 4; i++) - one.u[i].f = 1.0f; - sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); - sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); - } - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_TRUNC: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); - break; - - case HLSL_OP2_ADD: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_BIT_AND: - VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: - VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: - VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_DIV: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_DOT: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); - break; - - case 3: - write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); - break; - - case 2: - write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); - break; - - case 1: - default: - vkd3d_unreachable(); - } - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_EQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", - debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_GEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", - debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LESS: - { - const struct hlsl_type *src_type = arg1->data_type; - - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", - debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LOGIC_AND: - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - VKD3D_ASSERT(type_is_integer(dst_type)); - VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MIN: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MOD: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MUL: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - /* Using IMUL instead of UMUL because we're taking the low - * bits, and the native compiler generates IMUL. */ - write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_NEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", - debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_RSHIFT: - VKD3D_ASSERT(type_is_integer(dst_type)); - VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - - case HLSL_OP3_TERNARY: - write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); - break; - - case HLSL_OP3_MAD: - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3); - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } - - hlsl_release_string_buffer(tpf->ctx, dst_type_string); -} - -static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_IF, - .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->else_block); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) -{ - struct sm4_instruction instr = {0}; - - switch (jump->type) - { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_CONTINUE: - instr.opcode = VKD3D_SM4_OP_CONTINUE; - break; - - case HLSL_IR_JUMP_DISCARD_NZ: - { - instr.opcode = VKD3D_SM4_OP_DISCARD; - instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.src_count = 1; - sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); - - default: - hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; + hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return; } write_sm4_instruction(tpf, &instr); @@ -6506,11 +5636,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ } instr.opcode = info->opcode; + instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; instr.dst_count = ins->dst_count; instr.src_count = ins->src_count; for (unsigned int i = 0; i < ins->dst_count; ++i) + { instr.dsts[i] = ins->dst[i]; + + if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE) + { + /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */ + VKD3D_ASSERT(ins->dst_count == 1); + instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE; + instr.extra_bits |= VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + } + } for (unsigned int i = 0; i < ins->src_count; ++i) instr.srcs[i] = ins->src[i]; @@ -6529,7 +5670,99 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); break; + case VKD3DSIH_DCL_INPUT: + tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0); + break; + + case VKD3DSIH_DCL_INPUT_PS: + tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags); + break; + + case VKD3DSIH_DCL_INPUT_PS_SGV: + tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0); + break; + + case VKD3DSIH_DCL_INPUT_PS_SIV: + tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags); + break; + + case VKD3DSIH_DCL_INPUT_SGV: + tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0); + break; + + case VKD3DSIH_DCL_INPUT_SIV: + tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0); + break; + + case VKD3DSIH_DCL_OUTPUT: + tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0); + break; + + case VKD3DSIH_DCL_OUTPUT_SIV: + tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); + break; + + case VKD3DSIH_ADD: + case VKD3DSIH_AND: + case VKD3DSIH_DIV: + case VKD3DSIH_DP2: + case VKD3DSIH_DP3: + case VKD3DSIH_DP4: + case VKD3DSIH_DSX: + case VKD3DSIH_DSX_COARSE: + case VKD3DSIH_DSX_FINE: + case VKD3DSIH_DSY: + case VKD3DSIH_DSY_COARSE: + case VKD3DSIH_DSY_FINE: + case VKD3DSIH_EQO: + case VKD3DSIH_EXP: + case VKD3DSIH_F16TOF32: + case VKD3DSIH_F32TOF16: + case VKD3DSIH_FRC: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: + case VKD3DSIH_GEO: + case VKD3DSIH_IADD: + case VKD3DSIH_IEQ: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_IMAD: + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: + case VKD3DSIH_IMUL: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ITOF: + case VKD3DSIH_LOG: + case VKD3DSIH_LTO: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: case VKD3DSIH_MOV: + case VKD3DSIH_MOVC: + case VKD3DSIH_MUL: + case VKD3DSIH_NEU: + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_RCP: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: + case VKD3DSIH_SAMPLE_INFO: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SQRT: + case VKD3DSIH_UDIV: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: + case VKD3DSIH_UMAX: + case VKD3DSIH_UMIN: + case VKD3DSIH_USHR: + case VKD3DSIH_UTOF: + case VKD3DSIH_XOR: tpf_simple_instruction(tpf, ins); break; @@ -6568,10 +5801,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b case HLSL_IR_CONSTANT: vkd3d_unreachable(); - case HLSL_IR_EXPR: - write_sm4_expr(tpf, hlsl_ir_expr(instr)); - break; - case HLSL_IR_IF: write_sm4_if(tpf, hlsl_ir_if(instr)); break; @@ -6621,16 +5850,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) { - struct hlsl_ctx *ctx = tpf->ctx; - const struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) - || (var->is_output_semantic && var->first_write)) - tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); - } - if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); @@ -6648,6 +5867,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec const struct hlsl_buffer *cbuffer; struct hlsl_ctx *ctx = tpf->ctx; size_t token_count_position; + uint32_t global_flags = 0; static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -6669,6 +5889,27 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0); + if (version->major == 4) + { + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + const struct hlsl_type *type = resource->component_type; + + if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; + break; + } + } + } + + if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) + global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; + + if (global_flags) + write_sm4_dcl_global_flags(tpf, global_flags); + if (version->type == VKD3D_SHADER_TYPE_HULL) { tpf_write_hs_decls(tpf); @@ -6703,9 +5944,6 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec write_sm4_dcl_textures(tpf, resource, true); } - if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) - write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); - if (version->type == VKD3D_SHADER_TYPE_HULL) tpf_write_hs_control_point_phase(tpf); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 1b6c37343d1..db18e6d12bc 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1620,6 +1620,7 @@ bool sm1_usage_from_semantic_name(const char *semantic_name, uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); +bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index ed4cc370639..a55a97f6f2f 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -4804,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; const struct vkd3d_vk_device_procs *vk_procs; VkBuffer buffers[ARRAY_SIZE(list->strides)]; + struct d3d12_device *device = list->device; + unsigned int i, stride, max_view_count; struct d3d12_resource *resource; bool invalidate = false; - unsigned int i, stride; TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); - vk_procs = &list->device->vk_procs; - null_resources = &list->device->null_resources; - gpu_va_allocator = &list->device->gpu_va_allocator; + vk_procs = &device->vk_procs; + null_resources = &device->null_resources; + gpu_va_allocator = &device->gpu_va_allocator; if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides))) { @@ -4820,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi return; } + max_view_count = device->vk_info.device_limits.maxVertexInputBindings; + if (start_slot < max_view_count) + max_view_count -= start_slot; + else + max_view_count = 0; + + /* Although simply skipping unsupported binding slots isn't especially + * likely to work well in the general case, applications sometimes + * explicitly set all 32 vertex buffer bindings slots supported by + * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is + * an example of such an application. */ + if (view_count > max_view_count) + { + for (i = max_view_count; i < view_count; ++i) + { + if (views && views[i].BufferLocation) + WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i); + } + view_count = max_view_count; + } + for (i = 0; i < view_count; ++i) { if (views && views[i].BufferLocation) -- 2.45.2