diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch index d710cb28..041716d4 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch @@ -1,4 +1,4 @@ -From e81da48e8f4744caef2432dcf8044cbc5c5f2efb Mon Sep 17 00:00:00 2001 +From 0211f4dbd21e10be69d0a29ecd9d247c742696da Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch index b8b2653c..6840a28e 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch @@ -1,4 +1,4 @@ -From b74ccacc75f28c2fa4896b758df9f479ac3e2399 Mon Sep 17 00:00:00 2001 +From 1878bed164365c4bf885db2d84ef32854b122502 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 23 Oct 2024 13:50:07 +1100 Subject: [PATCH] Updated vkd3d to 4889c71857ce2152a9c9e014b9f5831f96dc349b. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch index b39da998..d0ea4c1e 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch @@ -1,4 +1,4 @@ -From 2fa55f52e1789b6f6d894ed810467c58de169df1 Mon Sep 17 00:00:00 2001 +From 9f1222c0cc241c9a3256403a637d16ffd26587fc Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 24 Oct 2024 07:08:51 +1100 Subject: [PATCH] Updated vkd3d to 91701f83035c0d67d1ab917e0f6b73f91e8583d4. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch index f3652284..1330cd1e 100644 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch @@ -1,4 +1,4 @@ -From 77d31a307f455b130cde7abaade54f37ca3184a3 Mon Sep 17 00:00:00 2001 +From d9841606332c7f2955b00b6a746f81fe3dc92e95 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 25 Oct 2024 07:38:01 +1100 Subject: [PATCH] Updated vkd3d to 5eff8bf9188c401cc31ce14d42798dc3751377bd. diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch index d0940543..14cf0d96 100644 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch @@ -1,4 +1,4 @@ -From c0efe70386c12a81e226ac9a1a8cf1b3f3ee3840 Mon Sep 17 00:00:00 2001 +From 2d04ceb2bf3eb640d772d63c91ff03895071892b Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 30 Oct 2024 10:33:09 +1100 Subject: [PATCH] Updated vkd3d to ad2208b726f825305f69d099790208e4e4f85e35. diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch index d5a1b584..123cb272 100644 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch +++ b/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch @@ -1,4 +1,4 @@ -From 6216860a91bbf1faffc1d527bf654f2ced7e7cfe Mon Sep 17 00:00:00 2001 +From 18c0eb6e1f1ab90c9a0f7c39c56618996e1a982f Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 5 Nov 2024 07:37:21 +1100 Subject: [PATCH] Updated vkd3d to 794f4c30f4873841aaa0c5c9745ee732437e70db. diff --git a/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch b/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch new file mode 100644 index 00000000..7d3e2fc6 --- /dev/null +++ b/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch @@ -0,0 +1,3253 @@ +From 9fd13a6e06a7de0001a09bb7175d3c262d9a0a69 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 7 Nov 2024 12:23:53 +1100 +Subject: [PATCH] Updated vkd3d to 756b98f093ba26e8cd4d4fed1caa04a5c0d0bc35. + +--- + libs/vkd3d/libs/vkd3d-shader/fx.c | 47 +- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 133 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 6 + + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 113 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 954 ++++++++++++- + libs/vkd3d/libs/vkd3d-shader/ir.c | 25 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 11 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 1182 +++-------------- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + + libs/vkd3d/libs/vkd3d/command.c | 30 +- + 11 files changed, 1392 insertions(+), 1111 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 5382dd94f98..9b1ef3bb2e0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -514,6 +514,8 @@ enum fx_4_type_constants + FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf, + FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10, + FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, ++ FX_4_OBJECT_TYPE_RTV = 0x13, ++ FX_4_OBJECT_TYPE_DSV = 0x14, + FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, + + FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, +@@ -527,7 +529,12 @@ enum fx_4_type_constants + FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22, + FX_5_OBJECT_TYPE_UAV_3D = 0x23, + FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24, ++ FX_5_OBJECT_TYPE_SRV_RAW_BUFFER = 0x25, ++ FX_5_OBJECT_TYPE_UAV_RAW_BUFFER = 0x26, ++ FX_5_OBJECT_TYPE_SRV_STRUCTURED_BUFFER = 0x27, + FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28, ++ FX_5_OBJECT_TYPE_SRV_APPEND_STRUCTURED_BUFFER = 0x2b, ++ FX_5_OBJECT_TYPE_SRV_CONSUME_STRUCTURED_BUFFER = 0x2c, + + /* Types */ + FX_4_TYPE_CLASS_NUMERIC = 1, +@@ -613,6 +620,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + [HLSL_SAMPLER_DIM_3D] = "RWTexture3D", + [HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer", + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", ++ [HLSL_SAMPLER_DIM_RAW_BUFFER] = "RWByteAddressBuffer", + }; + + switch (type->class) +@@ -821,17 +829,18 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D, + [HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER, + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER, ++ [HLSL_SAMPLER_DIM_RAW_BUFFER] = FX_5_OBJECT_TYPE_UAV_RAW_BUFFER, + }; + + put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); + } + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) + { +- put_u32_unaligned(buffer, 20); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV); + } + else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) + { +- put_u32_unaligned(buffer, 19); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV); + } + else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) + { +@@ -3315,27 +3324,19 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + vkd3d_shader_free_shader_code(&output); + } + +-static bool fx_4_is_shader_resource(const struct fx_4_binary_type *type) ++static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) + { + switch (type->typeinfo) + { +- case FX_4_OBJECT_TYPE_TEXTURE: +- case FX_4_OBJECT_TYPE_TEXTURE_1D: +- case FX_4_OBJECT_TYPE_TEXTURE_1DARRAY: +- case FX_4_OBJECT_TYPE_TEXTURE_2D: +- case FX_4_OBJECT_TYPE_TEXTURE_2DARRAY: +- case FX_4_OBJECT_TYPE_TEXTURE_2DMS: +- case FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY: +- case FX_4_OBJECT_TYPE_TEXTURE_3D: +- case FX_4_OBJECT_TYPE_TEXTURE_CUBE: +- case FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY: +- case FX_5_OBJECT_TYPE_UAV_1D: +- case FX_5_OBJECT_TYPE_UAV_1DARRAY: +- case FX_5_OBJECT_TYPE_UAV_2D: +- case FX_5_OBJECT_TYPE_UAV_2DARRAY: +- case FX_5_OBJECT_TYPE_UAV_3D: +- case FX_5_OBJECT_TYPE_UAV_BUFFER: +- case FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER: ++ case FX_4_OBJECT_TYPE_STRING: ++ case FX_4_OBJECT_TYPE_PIXEL_SHADER: ++ case FX_4_OBJECT_TYPE_VERTEX_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: ++ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: ++ case FX_5_OBJECT_TYPE_HULL_SHADER: ++ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: + return true; + default: + return false; +@@ -3347,6 +3348,9 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct + unsigned int i, element_count; + uint32_t value; + ++ if (!fx_4_object_has_initializer(type)) ++ return; ++ + vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); + element_count = max(type->element_count, 1); + for (i = 0; i < element_count; ++i) +@@ -3407,8 +3411,7 @@ static void fx_4_parse_objects(struct fx_parser *parser) + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); + +- if (!fx_4_is_shader_resource(&type)) +- fx_4_parse_object_initializer(parser, &type); ++ fx_4_parse_object_initializer(parser, &type); + vkd3d_string_buffer_printf(&parser->buffer, ";\n"); + + fx_parse_fx_4_annotations(parser); +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 363054cb6d9..0df0e30f399 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -763,16 +763,37 @@ static void shader_glsl_default(struct vkd3d_glsl_generator *gen) + vkd3d_string_buffer_printf(gen->buffer, "default:\n"); + } + ++static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset) ++{ ++ switch (offset_size) ++ { ++ case 1: ++ vkd3d_string_buffer_printf(buffer, "%d", offset->u); ++ break; ++ case 2: ++ vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v); ++ break; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Invalid texel offset size %u.", offset_size); ++ /* fall through */ ++ case 3: ++ vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w); ++ break; ++ } ++} ++ + static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { ++ unsigned int resource_id, resource_idx, resource_space, sample_count; + const struct glsl_resource_type_info *resource_type_info; +- unsigned int resource_id, resource_idx, resource_space; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_component_type sampled_type; + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_string_buffer *fetch; + enum vkd3d_data_type data_type; +- struct glsl_src coord, lod; ++ struct glsl_src coord; + struct glsl_dst dst; + uint32_t coord_mask; + +@@ -790,6 +811,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ + { + resource_type = d->resource_type; + resource_space = d->register_space; ++ sample_count = d->sample_count; + sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); + data_type = vkd3d_data_type_from_component_type(sampled_type); + } +@@ -799,6 +821,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ + "Internal compiler error: Undeclared resource descriptor %u.", resource_id); + resource_space = 0; + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ sample_count = 1; + data_type = VKD3D_DATA_FLOAT; + } + +@@ -815,7 +838,6 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ + + glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&coord, gen, &ins->src[0], coord_mask); +- glsl_src_init(&lod, gen, &ins->src[0], VKD3DSP_WRITEMASK_3); + fetch = vkd3d_string_buffer_get(&gen->string_buffers); + + vkd3d_string_buffer_printf(fetch, "texelFetch("); +@@ -823,14 +845,23 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ + resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); + vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); + if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER) +- vkd3d_string_buffer_printf(fetch, ", %s", lod.str->buffer); ++ { ++ vkd3d_string_buffer_printf(fetch, ", "); ++ if (ins->opcode != VKD3DSIH_LD2DMS) ++ shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type); ++ else if (sample_count == 1) ++ /* If the resource isn't a true multisample resource, this is the ++ * "lod" parameter instead of the "sample" parameter. */ ++ vkd3d_string_buffer_printf(fetch, "0"); ++ else ++ shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type); ++ } + vkd3d_string_buffer_printf(fetch, ")"); + shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, fetch); +- glsl_src_cleanup(&lod, &gen->string_buffers); + glsl_src_cleanup(&coord, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); + } +@@ -868,8 +899,9 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s + + static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { +- bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow; ++ bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow; + const struct glsl_resource_type_info *resource_type_info; ++ const struct vkd3d_shader_src_param *resource, *sampler; + unsigned int resource_id, resource_idx, resource_space; + unsigned int sampler_id, sampler_idx, sampler_space; + const struct vkd3d_shader_descriptor_info1 *d; +@@ -881,23 +913,24 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + struct glsl_dst dst; + + bias = ins->opcode == VKD3DSIH_SAMPLE_B; +- gather = ins->opcode == VKD3DSIH_GATHER4; ++ dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO; ++ gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO; + grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; + lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins); + shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + +- if (vkd3d_shader_instruction_has_texel_offset(ins)) +- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled texel sample offset."); ++ resource = &ins->src[1 + dynamic_offset]; ++ sampler = &ins->src[2 + dynamic_offset]; + +- if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr +- || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) ++ if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr ++ || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + +- resource_id = ins->src[1].reg.idx[0].offset; +- resource_idx = ins->src[1].reg.idx[1].offset; ++ resource_id = resource->reg.idx[0].offset; ++ resource_idx = resource->reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) + { + resource_type = d->resource_type; +@@ -927,8 +960,8 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + array = false; + } + +- sampler_id = ins->src[2].reg.idx[0].offset; +- sampler_idx = ins->src[2].reg.idx[1].offset; ++ sampler_id = sampler->reg.idx[0].offset; ++ sampler_idx = sampler->reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) + { + sampler_space = d->register_space; +@@ -958,13 +991,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + sample = vkd3d_string_buffer_get(&gen->string_buffers); + + if (gather) +- vkd3d_string_buffer_printf(sample, "textureGather("); ++ vkd3d_string_buffer_printf(sample, "textureGather"); + else if (grad) +- vkd3d_string_buffer_printf(sample, "textureGrad("); ++ vkd3d_string_buffer_printf(sample, "textureGrad"); + else if (lod) +- vkd3d_string_buffer_printf(sample, "textureLod("); ++ vkd3d_string_buffer_printf(sample, "textureLod"); + else +- vkd3d_string_buffer_printf(sample, "texture("); ++ vkd3d_string_buffer_printf(sample, "texture"); ++ vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : ""); + shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); + vkd3d_string_buffer_printf(sample, ", "); + if (shadow) +@@ -985,18 +1019,32 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + { + vkd3d_string_buffer_printf(sample, ", 0.0"); + } +- else if (bias || lod) ++ else if (lod) + { + vkd3d_string_buffer_printf(sample, ", "); + shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + } +- if (gather) ++ if (offset) ++ { ++ vkd3d_string_buffer_printf(sample, ", "); ++ if (dynamic_offset) ++ shader_glsl_print_src(sample, gen, &ins->src[1], ++ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type); ++ else ++ shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset); ++ } ++ if (bias) + { +- if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0))) ++ vkd3d_string_buffer_printf(sample, ", "); ++ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ } ++ else if (gather) ++ { ++ if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0))) + vkd3d_string_buffer_printf(sample, ", %d", component_idx); + } + vkd3d_string_buffer_printf(sample, ")"); +- shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); ++ shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); + +@@ -1268,7 +1316,13 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st + "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, + "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))"); ++ break; + ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ if (version->type != VKD3D_SHADER_TYPE_PIXEL) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type); ++ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))"); + break; + + case VKD3D_SHADER_SV_TARGET: +@@ -1390,6 +1444,9 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); + break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index); ++ break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output component type %#x.", e->component_type); +@@ -1499,6 +1556,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + shader_glsl_cast(gen, ins, "uint", "uvec"); + break; + case VKD3DSIH_GATHER4: ++ case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: +@@ -1553,6 +1611,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + shader_glsl_cast(gen, ins, "float", "vec"); + break; + case VKD3DSIH_LD: ++ case VKD3DSIH_LD2DMS: + shader_glsl_ld(gen, ins); + break; + case VKD3DSIH_LD_UAV_TYPED: +@@ -1911,6 +1970,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator + struct vkd3d_string_buffer *buffer = gen->buffer; + enum vkd3d_shader_component_type component_type; + const char *sampler_type, *sampler_type_prefix; ++ enum vkd3d_shader_resource_type resource_type; + unsigned int binding_idx; + bool shadow = false; + +@@ -1936,18 +1996,32 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator + return; + } + +- if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) ++ resource_type = srv->resource_type; ++ if (srv->sample_count == 1) ++ { ++ /* The OpenGL API distinguishes between multi-sample textures with ++ * sample count 1 and single-sample textures. Direct3D and Vulkan ++ * don't make this distinction at the API level, but Direct3D shaders ++ * are capable of expressing both. We therefore map such multi-sample ++ * textures to their single-sample equivalents here. */ ++ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; ++ } ++ ++ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { + sampler_type = resource_type_info->type_suffix; + if (shadow && !resource_type_info->shadow) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, +- "Comparison samplers are not supported with resource type %#x.", srv->resource_type); ++ "Comparison samplers are not supported with resource type %#x.", resource_type); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x for combined resource/sampler " +- "for resource %u, space %u and sampler %u, space %u.", srv->resource_type, ++ "for resource %u, space %u and sampler %u, space %u.", resource_type, + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + sampler_type = ""; + } +@@ -1972,7 +2046,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator + break; + } + +- if (!shader_glsl_get_combined_sampler_binding(gen, crs, srv->resource_type, &binding_idx)) ++ if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx)) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "No descriptor binding specified for combined resource/sampler " +@@ -2213,6 +2287,9 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, "uvec4"); + break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, "ivec4"); ++ break; + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, "vec4"); + break; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 1f90a4ba805..96de18dc886 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -2774,6 +2774,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + return string; + + case HLSL_CLASS_TEXTURE: ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); ++ return string; ++ } ++ + if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + vkd3d_string_buffer_printf(string, "Texture"); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 18effcc5be1..8dace11916a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -74,6 +74,7 @@ ANY (.) + BlendState {return KW_BLENDSTATE; } + break {return KW_BREAK; } + Buffer {return KW_BUFFER; } ++ByteAddressBuffer {return KW_BYTEADDRESSBUFFER; } + case {return KW_CASE; } + cbuffer {return KW_CBUFFER; } + centroid {return KW_CENTROID; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index dcbba46ede6..60aade732db 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -53,7 +53,7 @@ struct parse_parameter + struct parse_initializer initializer; + }; + +-struct parse_colon_attribute ++struct parse_colon_attributes + { + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; +@@ -5175,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *expr; + ++ if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); ++ + if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + return false; +@@ -5599,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct + return false; + } + ++static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; ++ struct hlsl_ir_node *load; ++ unsigned int value_dim; ++ ++ if (params->args_count != 1 && params->args_count != 2) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.", ++ params->args_count); ++ return false; ++ } ++ ++ if (params->args_count == 2) ++ { ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ return false; ++ } ++ ++ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name); ++ return false; ++ } ++ ++ if (!strcmp(name, "Load")) ++ value_dim = 1; ++ else if (!strcmp(name, "Load2")) ++ value_dim = 2; ++ else if (!strcmp(name, "Load3")) ++ value_dim = 3; ++ else ++ value_dim = 4; ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ return false; ++ ++ load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); ++ load_params.resource = object; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, load); ++ return true; ++} ++ + static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -5608,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *load; + bool multisampled; + ++ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ return add_raw_load_method_call(ctx, block, object, name, params, loc); ++ + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name); +@@ -6260,7 +6316,10 @@ texture_methods[] = + + { "GetDimensions", add_getdimensions_method_call, "00111111111110" }, + +- { "Load", add_load_method_call, "00111011110110" }, ++ { "Load", add_load_method_call, "00111011110111" }, ++ { "Load2", add_raw_load_method_call, "00000000000001" }, ++ { "Load3", add_raw_load_method_call, "00000000000001" }, ++ { "Load4", add_raw_load_method_call, "00000000000001" }, + + { "Sample", add_sample_method_call, "00111111001000" }, + { "SampleBias", add_sample_lod_method_call, "00111111001000" }, +@@ -6490,7 +6549,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct parse_if_body if_body; + enum parse_assign_op assign_op; + struct hlsl_reg_reservation reg_reservation; +- struct parse_colon_attribute colon_attribute; ++ struct parse_colon_attributes colon_attributes; + struct hlsl_semantic semantic; + enum hlsl_buffer_type buffer_type; + enum hlsl_sampler_dim sampler_dim; +@@ -6505,6 +6564,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_BLENDSTATE + %token KW_BREAK + %token KW_BUFFER ++%token KW_BYTEADDRESSBUFFER + %token KW_CASE + %token KW_CONSTANTBUFFER + %token KW_CBUFFER +@@ -6687,7 +6747,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type buffer_type + +-%type colon_attribute ++%type colon_attributes + + %type field + %type fields_list +@@ -6875,7 +6935,7 @@ effect_group: + } + + buffer_declaration: +- var_modifiers buffer_type any_identifier colon_attribute annotations_opt ++ var_modifiers buffer_type any_identifier colon_attributes annotations_opt + { + if ($4.semantic.name) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); +@@ -7200,7 +7260,7 @@ func_declaration: + + func_prototype_no_attrs: + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ +- var_modifiers type var_identifier '(' parameters ')' colon_attribute ++ var_modifiers type var_identifier '(' parameters ')' colon_attributes + { + uint32_t modifiers = $1; + struct hlsl_ir_var *var; +@@ -7377,28 +7437,39 @@ var_identifier: + VAR_IDENTIFIER + | NEW_IDENTIFIER + +-colon_attribute: ++colon_attributes: + %empty + { + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; + } +- | semantic ++ | colon_attributes semantic + { +- $$.semantic = $1; +- $$.reg_reservation.reg_type = 0; +- $$.reg_reservation.offset_type = 0; ++ hlsl_cleanup_semantic(&$$.semantic); ++ $$.semantic = $2; + } +- | register_reservation ++ | colon_attributes register_reservation + { +- $$.semantic = (struct hlsl_semantic){0}; +- $$.reg_reservation = $1; ++ if ($$.reg_reservation.reg_type) ++ hlsl_fixme(ctx, &@2, "Multiple register() reservations."); ++ ++ $$.reg_reservation.reg_type = $2.reg_type; ++ $$.reg_reservation.reg_index = $2.reg_index; ++ $$.reg_reservation.reg_space = $2.reg_space; + } +- | packoffset_reservation ++ | colon_attributes packoffset_reservation + { +- $$.semantic = (struct hlsl_semantic){0}; +- $$.reg_reservation = $1; ++ if (ctx->cur_buffer == ctx->globals_buffer) ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "The packoffset() reservation is only allowed within 'cbuffer' blocks."); ++ } ++ else ++ { ++ $$.reg_reservation.offset_type = $2.offset_type; ++ $$.reg_reservation.offset_index = $2.offset_index; ++ } + } + + semantic: +@@ -7594,7 +7665,7 @@ parameter: + } + + parameter_decl: +- var_modifiers type_no_void any_identifier arrays colon_attribute ++ var_modifiers type_no_void any_identifier arrays colon_attributes + { + uint32_t modifiers = $1; + struct hlsl_type *type; +@@ -7863,6 +7934,10 @@ type_no_void: + + $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); + } ++ | KW_BYTEADDRESSBUFFER ++ { ++ $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0); ++ } + | uav_type '<' resource_format '>' + { + validate_uav_type(ctx, $1, $3, &@3); +@@ -8095,7 +8170,7 @@ variables_def_typed: + } + + variable_decl: +- any_identifier arrays colon_attribute annotations_opt ++ any_identifier arrays colon_attributes annotations_opt + { + $$ = hlsl_alloc(ctx, sizeof(*$$)); + $$->loc = @1; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 213e403dcbd..bea16fd4da6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -6520,6 +6520,21 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + } + + mask = (1 << var->data_type->dimx) - 1; ++ ++ if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output ++ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ { ++ if (var->data_type->dimx > 1) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "PSIZE output must have only 1 component in this shader model."); ++ /* For some reason the writemask has all components set. */ ++ mask = VKD3DSP_WRITEMASK_ALL; ++ } ++ if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 ++ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "FOG output must have only 1 component in this shader model."); ++ + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + } +@@ -6817,6 +6832,16 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, + } + } + ++static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, ++ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) ++{ ++ VKD3D_ASSERT(instr->reg.allocated); ++ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ dst->reg.idx[0].offset = instr->reg.id; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = instr->reg.writemask; ++} ++ + static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) + { +@@ -6842,6 +6867,25 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + dst_param->write_mask = instr->reg.writemask; + } + ++static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) ++ return; ++ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ src_param = &ins->src[0]; ++ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++} ++ + /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ + static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, +@@ -6866,10 +6910,7 @@ static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, + return; + + dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->reg.dimension = VSIR_DIMENSION_VEC4; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); + dst_param->modifiers = dst_mod; + + for (i = 0; i < src_count; ++i) +@@ -7216,6 +7257,8 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + + if (deref->var->is_output_semantic) + { ++ const char *semantic_name = deref->var->semantic.name; ++ + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +@@ -7225,7 +7268,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + type = VKD3DSPR_TEMP; + register_index = 0; + } +- else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, ++ else if (!sm1_register_from_semantic_name(&version, semantic_name, + deref->var->semantic.index, true, &type, ®ister_index)) + { + VKD3D_ASSERT(reg.allocated); +@@ -7234,6 +7277,14 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + } + else + writemask = (1u << deref->var->data_type->dimx) - 1; ++ ++ if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") ++ || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) ++ { ++ /* These are always 1-component, but for some reason are written ++ * with a writemask containing all components. */ ++ writemask = VKD3DSP_WRITEMASK_ALL; ++ } + } + else + VKD3D_ASSERT(reg.allocated); +@@ -7642,6 +7693,123 @@ static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, + hlsl_replace_node(instr, vsir_instr); + } + ++static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const bool output = var->is_output_semantic; ++ enum vkd3d_shader_sysval_semantic semantic; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_register_type type; ++ enum vkd3d_shader_opcode opcode; ++ uint32_t write_mask; ++ unsigned int idx; ++ bool has_idx; ++ ++ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); ++ if (semantic == ~0u) ++ semantic = VKD3D_SHADER_SV_NONE; ++ ++ if (var->is_input_semantic) ++ { ++ switch (semantic) ++ { ++ case VKD3D_SHADER_SV_NONE: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; ++ break; ++ ++ case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; ++ break; ++ ++ default: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; ++ break; ++ } ++ } ++ else ++ { ++ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) ++ opcode = VKD3DSIH_DCL_OUTPUT; ++ else ++ opcode = VKD3DSIH_DCL_OUTPUT_SIV; ++ } ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx)) ++ { ++ if (has_idx) ++ idx = var->semantic.index; ++ write_mask = (1u << var->data_type->dimx) - 1; ++ } ++ else ++ { ++ if (output) ++ type = VKD3DSPR_OUTPUT; ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ type = VKD3DSPR_PATCHCONST; ++ else ++ type = VKD3DSPR_INPUT; ++ ++ has_idx = true; ++ idx = var->regs[HLSL_REGSET_NUMERIC].id; ++ write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0))) ++ return; ++ ++ if (opcode == VKD3DSIH_DCL_OUTPUT) ++ { ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE ++ || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT); ++ dst_param = &ins->declaration.dst; ++ } ++ else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) ++ { ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); ++ dst_param = &ins->declaration.dst; ++ } ++ else ++ { ++ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); ++ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic, ++ var->semantic.index); ++ dst_param = &ins->declaration.register_semantic.reg; ++ } ++ ++ if (has_idx) ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ dst_param->reg.idx[0].offset = idx; ++ } ++ else ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); ++ } ++ ++ if (shader_sm4_is_scalar_register(&dst_param->reg)) ++ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; ++ else ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ dst_param->write_mask = write_mask; ++ ++ if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); ++ ++ add_last_vsir_instr_to_block(ctx, program, block); ++} ++ + static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, + uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) + { +@@ -7674,76 +7842,754 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, + add_last_vsir_instr_to_block(ctx, program, block); + } + +-static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +- struct vsir_program *program, struct hlsl_ir_expr *expr) ++static bool type_is_float(const struct hlsl_type *type) + { +- switch (expr->op) +- { +- case HLSL_OP1_ABS: +- generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); +- return true; ++ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; ++} + +- default: +- return false; +- } ++static bool type_is_integer(const struct hlsl_type *type) ++{ ++ return type->e.numeric.type == HLSL_TYPE_BOOL ++ || type->e.numeric.type == HLSL_TYPE_INT ++ || type->e.numeric.type == HLSL_TYPE_UINT; + } + +-static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_expr *expr, uint32_t bits) + { +- struct hlsl_ir_node *instr, *next; ++ struct hlsl_ir_node *operand = expr->operands[0].node; ++ const struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_constant_value value = {0}; ++ struct vkd3d_shader_instruction *ins; + +- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2))) ++ return; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask); ++ ++ value.u[0].u = bits; ++ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0); ++} ++ ++static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr) ++{ ++ const struct hlsl_ir_node *arg1 = expr->operands[0].node; ++ const struct hlsl_type *dst_type = expr->node.data_type; ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ static const union + { +- if (instr->data_type) +- { +- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) ++ uint32_t u; ++ float f; ++ } one = { .f = 1.0 }; ++ ++ /* Narrowing casts were already lowered. */ ++ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); ++ ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ switch (src_type->e.numeric.type) + { +- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); +- break; ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u); ++ return true; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); ++ return false; ++ ++ default: ++ vkd3d_unreachable(); + } +- } ++ break; + +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- vkd3d_unreachable(); ++ case HLSL_TYPE_INT: ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true); ++ return true; + +- case HLSL_IR_CONSTANT: +- /* In SM4 all constants are inlined. */ +- break; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; + +- case HLSL_IR_EXPR: +- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr))) +- replace_instr_with_last_vsir_instr(ctx, program, instr); +- break; ++ case HLSL_TYPE_BOOL: ++ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); ++ return true; + +- case HLSL_IR_SWIZZLE: +- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); +- replace_instr_with_last_vsir_instr(ctx, program, instr); +- break; ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); ++ return false; + +- default: +- break; +- } ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_UINT: ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); ++ return true; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); ++ return false; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); ++ return false; ++ ++ case HLSL_TYPE_BOOL: ++ /* Casts to bool should have already been lowered. */ ++ default: ++ vkd3d_unreachable(); + } + } + +-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, +- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, ++ enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx) + { +- struct hlsl_block block = {0}; +- struct hlsl_scope *scope; +- struct hlsl_ir_var *var; +- uint32_t temp_count; ++ struct vkd3d_shader_dst_param *dst_param, *null_param; ++ const struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int i, src_count; + +- compute_liveness(ctx, func); +- mark_indexable_vars(ctx, func); +- temp_count = allocate_temp_registers(ctx, func); +- if (ctx->result) ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) ++ { ++ if (expr->operands[i].node) ++ src_count = i + 1; ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count))) + return; +- program->temp_count = max(program->temp_count, temp_count); + +- hlsl_block_init(&block); ++ dst_param = &ins->dst[dst_idx]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ null_param = &ins->dst[1 - dst_idx]; ++ vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0); ++ null_param->reg.dimension = VSIR_DIMENSION_NONE; ++ ++ for (i = 0; i < src_count; ++i) ++ vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask); ++} ++ ++static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_expr *expr) ++{ ++ struct hlsl_ir_node *operand = expr->operands[0].node; ++ const struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_constant_value value = {0}; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(type_is_float(expr->node.data_type)); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2))) ++ return; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ value.u[0].f = 1.0f; ++ value.u[1].f = 1.0f; ++ value.u[2].f = 1.0f; ++ value.u[3].f = 1.0f; ++ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, ++ VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); ++ ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); ++} ++ ++static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name) ++{ ++ const struct hlsl_type *dst_type = expr->node.data_type; ++ const struct hlsl_type *src_type = NULL; ++ ++ VKD3D_ASSERT(expr->node.reg.allocated); ++ if (expr->operands[0].node) ++ src_type = expr->operands[0].node->data_type; ++ ++ switch (expr->op) ++ { ++ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: ++ sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr); ++ return true; ++ ++ case HLSL_OP1_ABS: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); ++ return true; ++ ++ case HLSL_OP1_BIT_NOT: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_CAST: ++ return sm4_generate_vsir_instr_expr_cast(ctx, program, expr); ++ ++ case HLSL_OP1_CEIL: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_COS: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1); ++ return true; ++ ++ case HLSL_OP1_DSX: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSX_COARSE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSX_FINE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSY: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSY_COARSE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSY_FINE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_EXP2: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_F16TOF32: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_F32TOF16: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_FLOOR: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_FRACT: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_LOG2: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_LOGIC_NOT: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_NEG: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP1_RCP: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ /* SM5 comes with a RCP opcode */ ++ if (hlsl_version_ge(ctx, 5, 0)) ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true); ++ else ++ sm4_generate_vsir_rcp_using_div(ctx, program, expr); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP1_REINTERPRET: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_ROUND: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_RSQ: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_SAT: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); ++ return true; ++ ++ case HLSL_OP1_SIN: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0); ++ return true; ++ ++ case HLSL_OP1_SQRT: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_TRUNC: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_ADD: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_BIT_AND: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_BIT_OR: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_BIT_XOR: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_DIV: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_DOT: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ switch (expr->operands[0].node->data_type->dimx) ++ { ++ case 4: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); ++ return true; ++ ++ case 3: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); ++ return true; ++ ++ case 2: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false); ++ return true; ++ ++ case 1: ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_EQUAL: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_GEQUAL: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_LESS: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_LOGIC_AND: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_LOGIC_OR: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_LSHIFT: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true); ++ return true; ++ ++ case HLSL_OP3_MAD: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MAX: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MIN: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MOD: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_UINT: ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MUL: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ /* Using IMUL instead of UMUL because we're taking the low ++ * bits, and the native compiler generates IMUL. */ ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_NEQUAL: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_RSHIFT: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, ++ dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP3_TERNARY: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); ++ return false; ++ } ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++{ ++ struct vkd3d_string_buffer *dst_type_string; ++ struct hlsl_ir_node *instr, *next; ++ struct hlsl_ir_switch_case *c; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->data_type) ++ { ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); ++ break; ++ } ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ /* In SM4 all constants are inlined. */ ++ break; ++ ++ case HLSL_IR_EXPR: ++ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) ++ break; ++ ++ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ ++ hlsl_release_string_buffer(ctx, dst_type_string); ++ break; ++ ++ case HLSL_IR_IF: ++ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); ++ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); ++ break; ++ ++ case HLSL_IR_LOOP: ++ sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); ++ break; ++ ++ case HLSL_IR_SWITCH: ++ LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) ++ sm4_generate_vsir_block(ctx, &c->body, program); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ default: ++ break; ++ } ++ } ++} ++ ++static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++{ ++ bool is_patch_constant_func = func == ctx->patch_constant_func; ++ struct hlsl_block block = {0}; ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ uint32_t temp_count; ++ ++ compute_liveness(ctx, func); ++ mark_indexable_vars(ctx, func); ++ temp_count = allocate_temp_registers(ctx, func); ++ if (ctx->result) ++ return; ++ program->temp_count = max(program->temp_count, temp_count); ++ ++ hlsl_block_init(&block); ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if ((var->is_input_semantic && var->last_read) ++ || (var->is_output_semantic && var->first_write)) ++ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ } + + if (temp_count) + sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 9b50a308e11..836e0ade32a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -1932,6 +1932,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + const struct shader_signature *signature; + const struct signature_element *e; + ++ write_mask = dst_param->write_mask; ++ + switch (reg->type) + { + case VKD3DSPR_OUTPUT: +@@ -1987,6 +1989,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + signature = normaliser->output_signature; + reg->type = VKD3DSPR_OUTPUT; + dcl_params = normaliser->output_dcl_params; ++ /* Fog and point size are scalar, but fxc/d3dcompiler emits a full ++ * write mask when writing to them. */ ++ if (reg->idx[0].offset > 0) ++ write_mask = VKD3DSP_WRITEMASK_0; + break; + + default: +@@ -1994,7 +2000,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + } + + id_idx = reg->idx_count - 1; +- write_mask = dst_param->write_mask; + if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + vkd3d_unreachable(); + e = &signature->elements[element_idx]; +@@ -6214,6 +6219,14 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr + return VKD3D_OK; + } + ++static bool is_pre_rasterization_shader(enum vkd3d_shader_type type) ++{ ++ return type == VKD3D_SHADER_TYPE_VERTEX ++ || type == VKD3D_SHADER_TYPE_HULL ++ || type == VKD3D_SHADER_TYPE_DOMAIN ++ || type == VKD3D_SHADER_TYPE_GEOMETRY; ++} ++ + static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, size_t *ret_pos) + { +@@ -6244,10 +6257,7 @@ static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *pro + if (program->has_point_size) + return VKD3D_OK; + +- if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX +- && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY +- && program->shader_version.type != VKD3D_SHADER_TYPE_HULL +- && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) ++ if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; + + for (unsigned int i = 0; i < program->parameter_count; ++i) +@@ -6298,10 +6308,7 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra + if (!program->has_point_size) + return VKD3D_OK; + +- if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX +- && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY +- && program->shader_version.type != VKD3D_SHADER_TYPE_HULL +- && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) ++ if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; + + for (unsigned int i = 0; i < program->parameter_count; ++i) +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index fb7ce063c85..3fa4d68a48a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -8904,15 +8904,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler + uint32_t base_coordinate_id, component_idx; + uint32_t constituents[VKD3D_VEC4_SIZE]; + struct vkd3d_shader_image image; ++ bool storage_buffer_uav = false; + uint32_t indices[2]; + unsigned int i, j; + SpvOp op; + + resource = &src[instruction->src_count - 1]; +- resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); + +- if (resource->reg.type == VKD3DSPR_UAV +- && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) ++ if (resource->reg.type == VKD3DSPR_UAV) ++ { ++ resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); ++ storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource); ++ } ++ ++ if (storage_buffer_uav) + { + texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id); +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index bbd2f761d29..9c41e2c2053 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -1719,7 +1719,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID, VKD3D_SM4_SWIZZLE_NONE}, + {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL, VKD3D_SM4_SWIZZLE_INVALID}, +- {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_VEC4}, ++ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_SCALAR}, + {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY, VKD3D_SM4_SWIZZLE_VEC4}, +@@ -2235,7 +2235,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui + return true; + } + +-static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) ++bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) + { + switch (reg->type) + { +@@ -2995,20 +2995,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + + static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); + +-static bool type_is_integer(const struct hlsl_type *type) +-{ +- switch (type->e.numeric.type) +- { +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return true; +- +- default: +- return false; +- } +-} +- + bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) + { +@@ -4845,7 +4831,15 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct + } + else + { +- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; ++ switch (component_type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; ++ break; ++ default: ++ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; ++ break; ++ } + } + instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + +@@ -4856,135 +4850,62 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct + } + } + +-static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, +- const struct hlsl_ir_var *var, bool is_patch_constant_func) ++static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) + { +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- const bool output = var->is_output_semantic; +- enum vkd3d_shader_sysval_semantic semantic; +- bool has_idx; +- + struct sm4_instruction instr = + { +- .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4, +- .dst_count = 1, +- }; +- +- if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) +- { +- if (has_idx) +- { +- instr.dsts[0].reg.idx[0].offset = var->semantic.index; +- instr.dsts[0].reg.idx_count = 1; +- } +- else +- { +- instr.dsts[0].reg.idx_count = 0; +- } +- instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1; +- } +- else +- { +- if (output) +- instr.dsts[0].reg.type = VKD3DSPR_OUTPUT; +- else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) +- instr.dsts[0].reg.type = VKD3DSPR_PATCHCONST; +- else +- instr.dsts[0].reg.type = VKD3DSPR_INPUT; +- +- instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; +- instr.dsts[0].reg.idx_count = 1; +- instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; +- } +- +- if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) +- instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; +- +- sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, +- tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); +- if (semantic == ~0u) +- semantic = VKD3D_SHADER_SV_NONE; +- +- if (var->is_input_semantic) +- { +- switch (semantic) +- { +- case VKD3D_SHADER_SV_NONE: +- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; +- break; +- +- case VKD3D_SHADER_SV_INSTANCE_ID: +- case VKD3D_SHADER_SV_IS_FRONT_FACE: +- case VKD3D_SHADER_SV_PRIMITIVE_ID: +- case VKD3D_SHADER_SV_SAMPLE_INDEX: +- case VKD3D_SHADER_SV_VERTEX_ID: +- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; +- break; ++ .opcode = VKD3D_SM4_OP_DCL_TEMPS, + +- default: +- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; +- break; +- } ++ .idx = {count}, ++ .idx_count = 1, ++ }; + +- if (version->type == VKD3D_SHADER_TYPE_PIXEL) +- { +- enum vkd3d_shader_interpolation_mode mode; ++ write_sm4_instruction(tpf, &instr); ++} + +- mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); +- instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; +- } +- } +- else ++static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) ++{ ++ struct sm4_instruction instr = + { +- if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) +- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; +- else +- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; +- } ++ .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, + +- if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) +- { +- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET +- || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); +- } +- else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) +- { +- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); +- } +- else +- { +- VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); +- instr.idx_count = 1; +- instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); +- } ++ .idx = {temp->register_idx, temp->register_size, temp->component_count}, ++ .idx_count = 3, ++ }; + + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) ++static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) + { + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM4_OP_DCL_TEMPS, ++ .opcode = opcode, + +- .idx = {count}, +- .idx_count = 1, ++ .dsts[0] = *dst, ++ .dst_count = 1, ++ ++ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) ++static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) + { + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, ++ .opcode = opcode, + +- .idx = {temp->register_idx, temp->register_size, temp->component_count}, +- .idx_count = 3, ++ .dsts[0] = semantic->reg, ++ .dst_count = 1, ++ ++ .idx[0] = semantic->sysval_semantic, ++ .idx_count = 1, ++ ++ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +@@ -5111,125 +5032,6 @@ static void write_sm4_ret(const struct tpf_compiler *tpf) + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask); +- instr.srcs[0].modifiers = src_mod; +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); +- sm4_dst_from_node(&instr.dsts[dst_idx], dst); +- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; +- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; +- instr.dsts[1 - dst_idx].reg.idx_count = 0; +- instr.dst_count = 2; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-/* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, +- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, +- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); +- sm4_dst_from_node(&instr.dsts[dst_idx], dst); +- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; +- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; +- instr.dsts[1 - dst_idx].reg.idx_count = 0; +- instr.dst_count = 2; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, +- const struct hlsl_ir_node *src3) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask); +- instr.src_count = 3; +- +- write_sm4_instruction(tpf, &instr); +-} +- + static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, +@@ -5240,12 +5042,15 @@ static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_no + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; ++ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + if (uav) + instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; ++ else if (raw) ++ instr.opcode = VKD3D_SM5_OP_LD_RAW; + else + instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; + +@@ -5441,742 +5246,67 @@ static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ + write_sm4_instruction(tpf, &instr); + } + +-static bool type_is_float(const struct hlsl_type *type) ++static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) + { +- return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; +-} ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_IF, ++ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, ++ .src_count = 1, ++ }; + +-static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, +- const struct hlsl_ir_node *arg, uint32_t mask) +-{ +- struct sm4_instruction instr; ++ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); + +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_AND; ++ sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); ++ write_sm4_instruction(tpf, &instr); + +- sm4_dst_from_node(&instr.dsts[0], &expr->node); +- instr.dst_count = 1; ++ write_sm4_block(tpf, &iff->then_block); + +- sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask); +- instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; +- instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR; +- instr.srcs[1].reg.u.immconst_u32[0] = mask; +- instr.src_count = 2; ++ if (!list_empty(&iff->else_block.instrs)) ++ { ++ instr.opcode = VKD3D_SM4_OP_ELSE; ++ instr.src_count = 0; ++ write_sm4_instruction(tpf, &instr); ++ ++ write_sm4_block(tpf, &iff->else_block); ++ } + ++ instr.opcode = VKD3D_SM4_OP_ENDIF; ++ instr.src_count = 0; + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) ++static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) + { +- static const union +- { +- uint32_t u; +- float f; +- } one = { .f = 1.0 }; +- const struct hlsl_ir_node *arg1 = expr->operands[0].node; +- const struct hlsl_type *dst_type = expr->node.data_type; +- const struct hlsl_type *src_type = arg1->data_type; +- +- /* Narrowing casts were already lowered. */ +- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); ++ struct sm4_instruction instr = {0}; + +- switch (dst_type->e.numeric.type) ++ switch (jump->type) + { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(tpf, expr, arg1, one.u); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); +- break; +- +- default: +- vkd3d_unreachable(); +- } ++ case HLSL_IR_JUMP_BREAK: ++ instr.opcode = VKD3D_SM4_OP_BREAK; + break; + +- case HLSL_TYPE_INT: +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(tpf, expr, arg1, 1); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); +- break; +- +- default: +- vkd3d_unreachable(); +- } ++ case HLSL_IR_JUMP_CONTINUE: ++ instr.opcode = VKD3D_SM4_OP_CONTINUE; + break; + +- case HLSL_TYPE_UINT: +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(tpf, expr, arg1, 1); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); +- break; ++ case HLSL_IR_JUMP_DISCARD_NZ: ++ { ++ instr.opcode = VKD3D_SM4_OP_DISCARD; ++ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; + +- default: +- vkd3d_unreachable(); +- } ++ memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); ++ instr.src_count = 1; ++ sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); + break; ++ } + +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); +- break; ++ case HLSL_IR_JUMP_RETURN: ++ vkd3d_unreachable(); + +- case HLSL_TYPE_BOOL: +- /* Casts to bool should have already been lowered. */ + default: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; +- instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; +- instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- const struct hlsl_ir_node *arg1 = expr->operands[0].node; +- const struct hlsl_ir_node *arg2 = expr->operands[1].node; +- const struct hlsl_ir_node *arg3 = expr->operands[2].node; +- const struct hlsl_type *dst_type = expr->node.data_type; +- struct vkd3d_string_buffer *dst_type_string; +- +- VKD3D_ASSERT(expr->node.reg.allocated); +- +- if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) +- return; +- +- switch (expr->op) +- { +- case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: +- if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) +- write_sm4_rasterizer_sample_count(tpf, &expr->node); +- else +- hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +- "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); +- break; +- +- case HLSL_OP1_ABS: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_BIT_NOT: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_CAST: +- write_sm4_cast(tpf, expr); +- break; +- +- case HLSL_OP1_CEIL: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_COS: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); +- break; +- +- case HLSL_OP1_DSX: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSX_COARSE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSX_FINE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSY: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSY_COARSE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSY_FINE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_EXP2: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_F16TOF32: +- VKD3D_ASSERT(type_is_float(dst_type)); +- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_F32TOF16: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); +- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_FLOOR: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_FRACT: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_LOG2: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_LOGIC_NOT: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_NEG: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_RCP: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- /* SM5 comes with a RCP opcode */ +- if (vkd3d_shader_ver_ge(version, 5, 0)) +- { +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); +- } +- else +- { +- /* For SM4, implement as DIV dst, 1.0, src */ +- struct sm4_instruction instr; +- struct hlsl_constant_value one; +- +- VKD3D_ASSERT(type_is_float(dst_type)); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_DIV; +- +- sm4_dst_from_node(&instr.dsts[0], &expr->node); +- instr.dst_count = 1; +- +- for (unsigned int i = 0; i < 4; i++) +- one.u[i].f = 1.0f; +- sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +- } +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_REINTERPRET: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_ROUND: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_RSQ: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_SAT: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV +- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), +- &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_SIN: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); +- break; +- +- case HLSL_OP1_SQRT: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_TRUNC: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP2_ADD: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_BIT_AND: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_BIT_OR: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_BIT_XOR: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_DIV: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_DOT: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- switch (arg1->data_type->dimx) +- { +- case 4: +- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); +- break; +- +- case 3: +- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); +- break; +- +- case 2: +- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); +- break; +- +- case 1: +- default: +- vkd3d_unreachable(); +- } +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_EQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_GEQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_LESS: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_LOGIC_AND: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_LOGIC_OR: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_LSHIFT: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_MAX: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_MIN: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_MOD: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_MUL: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- /* Using IMUL instead of UMUL because we're taking the low +- * bits, and the native compiler generates IMUL. */ +- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_NEQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_RSHIFT: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, +- &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP3_TERNARY: +- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); +- break; +- +- case HLSL_OP3_MAD: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); +- } +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); +- } +- +- hlsl_release_string_buffer(tpf->ctx, dst_type_string); +-} +- +-static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_IF, +- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, +- .src_count = 1, +- }; +- +- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); +- +- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &iff->then_block); +- +- if (!list_empty(&iff->else_block.instrs)) +- { +- instr.opcode = VKD3D_SM4_OP_ELSE; +- instr.src_count = 0; +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &iff->else_block); +- } +- +- instr.opcode = VKD3D_SM4_OP_ENDIF; +- instr.src_count = 0; +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) +-{ +- struct sm4_instruction instr = {0}; +- +- switch (jump->type) +- { +- case HLSL_IR_JUMP_BREAK: +- instr.opcode = VKD3D_SM4_OP_BREAK; +- break; +- +- case HLSL_IR_JUMP_CONTINUE: +- instr.opcode = VKD3D_SM4_OP_CONTINUE; +- break; +- +- case HLSL_IR_JUMP_DISCARD_NZ: +- { +- instr.opcode = VKD3D_SM4_OP_DISCARD; +- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; +- +- memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); +- instr.src_count = 1; +- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); +- break; +- } +- +- case HLSL_IR_JUMP_RETURN: +- vkd3d_unreachable(); +- +- default: +- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); +- return; ++ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ return; + } + + write_sm4_instruction(tpf, &instr); +@@ -6506,11 +5636,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + } + + instr.opcode = info->opcode; ++ instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + instr.dst_count = ins->dst_count; + instr.src_count = ins->src_count; + + for (unsigned int i = 0; i < ins->dst_count; ++i) ++ { + instr.dsts[i] = ins->dst[i]; ++ ++ if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE) ++ { ++ /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */ ++ VKD3D_ASSERT(ins->dst_count == 1); ++ instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE; ++ instr.extra_bits |= VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ } ++ } + for (unsigned int i = 0; i < ins->src_count; ++i) + instr.srcs[i] = ins->src[i]; + +@@ -6529,7 +5670,99 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); + break; + ++ case VKD3DSIH_DCL_INPUT: ++ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_PS: ++ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_PS_SGV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_PS_SIV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_SGV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_SIV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_DCL_OUTPUT: ++ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0); ++ break; ++ ++ case VKD3DSIH_DCL_OUTPUT_SIV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_ADD: ++ case VKD3DSIH_AND: ++ case VKD3DSIH_DIV: ++ case VKD3DSIH_DP2: ++ case VKD3DSIH_DP3: ++ case VKD3DSIH_DP4: ++ case VKD3DSIH_DSX: ++ case VKD3DSIH_DSX_COARSE: ++ case VKD3DSIH_DSX_FINE: ++ case VKD3DSIH_DSY: ++ case VKD3DSIH_DSY_COARSE: ++ case VKD3DSIH_DSY_FINE: ++ case VKD3DSIH_EQO: ++ case VKD3DSIH_EXP: ++ case VKD3DSIH_F16TOF32: ++ case VKD3DSIH_F32TOF16: ++ case VKD3DSIH_FRC: ++ case VKD3DSIH_FTOI: ++ case VKD3DSIH_FTOU: ++ case VKD3DSIH_GEO: ++ case VKD3DSIH_IADD: ++ case VKD3DSIH_IEQ: ++ case VKD3DSIH_IGE: ++ case VKD3DSIH_ILT: ++ case VKD3DSIH_IMAD: ++ case VKD3DSIH_IMAX: ++ case VKD3DSIH_IMIN: ++ case VKD3DSIH_IMUL: ++ case VKD3DSIH_INE: ++ case VKD3DSIH_INEG: ++ case VKD3DSIH_ISHL: ++ case VKD3DSIH_ISHR: ++ case VKD3DSIH_ITOF: ++ case VKD3DSIH_LOG: ++ case VKD3DSIH_LTO: ++ case VKD3DSIH_MAD: ++ case VKD3DSIH_MAX: ++ case VKD3DSIH_MIN: + case VKD3DSIH_MOV: ++ case VKD3DSIH_MOVC: ++ case VKD3DSIH_MUL: ++ case VKD3DSIH_NEU: ++ case VKD3DSIH_NOT: ++ case VKD3DSIH_OR: ++ case VKD3DSIH_RCP: ++ case VKD3DSIH_ROUND_NE: ++ case VKD3DSIH_ROUND_NI: ++ case VKD3DSIH_ROUND_PI: ++ case VKD3DSIH_ROUND_Z: ++ case VKD3DSIH_RSQ: ++ case VKD3DSIH_SAMPLE_INFO: ++ case VKD3DSIH_SINCOS: ++ case VKD3DSIH_SQRT: ++ case VKD3DSIH_UDIV: ++ case VKD3DSIH_UGE: ++ case VKD3DSIH_ULT: ++ case VKD3DSIH_UMAX: ++ case VKD3DSIH_UMIN: ++ case VKD3DSIH_USHR: ++ case VKD3DSIH_UTOF: ++ case VKD3DSIH_XOR: + tpf_simple_instruction(tpf, ins); + break; + +@@ -6568,10 +5801,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b + case HLSL_IR_CONSTANT: + vkd3d_unreachable(); + +- case HLSL_IR_EXPR: +- write_sm4_expr(tpf, hlsl_ir_expr(instr)); +- break; +- + case HLSL_IR_IF: + write_sm4_if(tpf, hlsl_ir_if(instr)); + break; +@@ -6621,16 +5850,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b + + static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) + { +- struct hlsl_ctx *ctx = tpf->ctx; +- const struct hlsl_ir_var *var; +- +- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if ((var->is_input_semantic && var->last_read) +- || (var->is_output_semantic && var->first_write)) +- tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); +- } +- + if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) + tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); + +@@ -6648,6 +5867,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec + const struct hlsl_buffer *cbuffer; + struct hlsl_ctx *ctx = tpf->ctx; + size_t token_count_position; ++ uint32_t global_flags = 0; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { +@@ -6669,6 +5889,27 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec + put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); + token_count_position = put_u32(&buffer, 0); + ++ if (version->major == 4) ++ { ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ const struct extern_resource *resource = &extern_resources[i]; ++ const struct hlsl_type *type = resource->component_type; ++ ++ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; ++ break; ++ } ++ } ++ } ++ ++ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) ++ global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; ++ ++ if (global_flags) ++ write_sm4_dcl_global_flags(tpf, global_flags); ++ + if (version->type == VKD3D_SHADER_TYPE_HULL) + { + tpf_write_hs_decls(tpf); +@@ -6703,9 +5944,6 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec + write_sm4_dcl_textures(tpf, resource, true); + } + +- if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) +- write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); +- + if (version->type == VKD3D_SHADER_TYPE_HULL) + tpf_write_hs_control_point_phase(tpf); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 1b6c37343d1..db18e6d12bc 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1620,6 +1620,7 @@ bool sm1_usage_from_semantic_name(const char *semantic_name, + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); + bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); ++bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); + bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index ed4cc370639..a55a97f6f2f 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -4804,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; + const struct vkd3d_vk_device_procs *vk_procs; + VkBuffer buffers[ARRAY_SIZE(list->strides)]; ++ struct d3d12_device *device = list->device; ++ unsigned int i, stride, max_view_count; + struct d3d12_resource *resource; + bool invalidate = false; +- unsigned int i, stride; + + TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); + +- vk_procs = &list->device->vk_procs; +- null_resources = &list->device->null_resources; +- gpu_va_allocator = &list->device->gpu_va_allocator; ++ vk_procs = &device->vk_procs; ++ null_resources = &device->null_resources; ++ gpu_va_allocator = &device->gpu_va_allocator; + + if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides))) + { +@@ -4820,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + return; + } + ++ max_view_count = device->vk_info.device_limits.maxVertexInputBindings; ++ if (start_slot < max_view_count) ++ max_view_count -= start_slot; ++ else ++ max_view_count = 0; ++ ++ /* Although simply skipping unsupported binding slots isn't especially ++ * likely to work well in the general case, applications sometimes ++ * explicitly set all 32 vertex buffer bindings slots supported by ++ * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is ++ * an example of such an application. */ ++ if (view_count > max_view_count) ++ { ++ for (i = max_view_count; i < view_count; ++i) ++ { ++ if (views && views[i].BufferLocation) ++ WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i); ++ } ++ view_count = max_view_count; ++ } ++ + for (i = 0; i < view_count; ++i) + { + if (views && views[i].BufferLocation) +-- +2.45.2 +