From e232cccc6ae14e18e58810452e2839e9dde2c54e Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 25 Jun 2025 10:56:46 +1000 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-a8ca1f95c50561a16de5769646dcda0c045.patch | 2 +- ...-7c0da1747a89a8cea27a42ace51b318e9d8.patch | 2 +- ...-9d490b83d4d6c9b8cbb5bcfa5a2e6aadf25.patch | 931 ++++++ ...-d8edf20c2b4224384d8e206c620bfbd61c5.patch | 2731 +++++++++++++++++ ...-5452e79a19c0b895d0c2ac22d3da5595a57.patch | 699 +++++ 5 files changed, 4363 insertions(+), 2 deletions(-) create mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-9d490b83d4d6c9b8cbb5bcfa5a2e6aadf25.patch create mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-d8edf20c2b4224384d8e206c620bfbd61c5.patch create mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-5452e79a19c0b895d0c2ac22d3da5595a57.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a8ca1f95c50561a16de5769646dcda0c045.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-a8ca1f95c50561a16de5769646dcda0c045.patch index c7f56b6d..a542150b 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a8ca1f95c50561a16de5769646dcda0c045.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-a8ca1f95c50561a16de5769646dcda0c045.patch @@ -1,4 +1,4 @@ -From 714c260cf3d63ad5a7f8b39e111af9aa81cf2768 Mon Sep 17 00:00:00 2001 +From 0bc76a33b06b9eaa7ab3af65f9a585551aa2ccb1 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Mon, 26 May 2025 07:03:34 +1000 Subject: [PATCH] Updated vkd3d to a8ca1f95c50561a16de5769646dcda0c045b7a46. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7c0da1747a89a8cea27a42ace51b318e9d8.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7c0da1747a89a8cea27a42ace51b318e9d8.patch index 799efb04..9ad63a5c 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7c0da1747a89a8cea27a42ace51b318e9d8.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7c0da1747a89a8cea27a42ace51b318e9d8.patch @@ -1,4 +1,4 @@ -From 9b68296a60ed1150298f9638098765d0e92d406a Mon Sep 17 00:00:00 2001 +From aa0d79ecdfc9853212445b41f9bd68256d71e2e4 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 18 Jun 2025 08:49:39 +1000 Subject: [PATCH] Updated vkd3d to 7c0da1747a89a8cea27a42ace51b318e9d843a19. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-9d490b83d4d6c9b8cbb5bcfa5a2e6aadf25.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9d490b83d4d6c9b8cbb5bcfa5a2e6aadf25.patch new file mode 100644 index 00000000..d8a34d53 --- /dev/null +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9d490b83d4d6c9b8cbb5bcfa5a2e6aadf25.patch @@ -0,0 +1,931 @@ +From 39d7eeb59fd2ccac48bc83e78fd56dbe104bdf77 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 20 Jun 2025 07:18:12 +1000 +Subject: [PATCH] Updated vkd3d to 9d490b83d4d6c9b8cbb5bcfa5a2e6aadf25e558b. + +--- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 92 +++--- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 23 ++ + libs/vkd3d/libs/vkd3d-shader/ir.c | 263 +++++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 84 +++--- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 2 + + libs/vkd3d/libs/vkd3d/state.c | 12 + + 8 files changed, 346 insertions(+), 132 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 1c7ed0d9e11..9ebcb6870e9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -643,6 +643,7 @@ struct sm6_type + + enum sm6_value_type + { ++ VALUE_TYPE_INVALID, + VALUE_TYPE_FUNCTION, + VALUE_TYPE_DATA, + VALUE_TYPE_HANDLE, +@@ -652,7 +653,6 @@ enum sm6_value_type + VALUE_TYPE_GROUPSHAREDMEM, + VALUE_TYPE_CONSTANT, + VALUE_TYPE_UNDEFINED, +- VALUE_TYPE_INVALID, + }; + + struct sm6_index +@@ -2229,6 +2229,11 @@ static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) + return value->value_type == VALUE_TYPE_FUNCTION; + } + ++static bool sm6_value_is_invalid(const struct sm6_value *value) ++{ ++ return value->value_type == VALUE_TYPE_INVALID; ++} ++ + static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) + { + VKD3D_ASSERT(sm6_value_is_function_dcl(fn)); +@@ -2599,7 +2604,6 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str + case VALUE_TYPE_SSA: + register_init_with_id(reg, VKD3DSPR_SSA, data_type, value->u.ssa.id); + reg->dimension = sm6_type_is_scalar(value->type) ? VSIR_DIMENSION_SCALAR : VSIR_DIMENSION_VEC4; +- register_convert_to_minimum_precision(reg); + break; + + case VALUE_TYPE_ICB: +@@ -2607,7 +2611,6 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str + reg->idx[0].offset = value->u.icb.id; + register_index_address_init(®->idx[1], value->u.icb.index.index, sm6); + reg->idx[1].is_in_bounds = value->u.icb.index.is_in_bounds; +- register_convert_to_minimum_precision(reg); + break; + + case VALUE_TYPE_IDXTEMP: +@@ -2615,7 +2618,6 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str + reg->idx[0].offset = value->u.idxtemp.id; + register_index_address_init(®->idx[1], value->u.idxtemp.index.index, sm6); + reg->idx[1].is_in_bounds = value->u.idxtemp.index.is_in_bounds; +- register_convert_to_minimum_precision(reg); + break; + + case VALUE_TYPE_GROUPSHAREDMEM: +@@ -2629,7 +2631,6 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str + vsir_register_init(reg, scalar_type->u.width == 64 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST, + data_type, 0); + reg->u = value->u.constant.immconst; +- register_convert_to_minimum_precision(reg); + break; + + case VALUE_TYPE_UNDEFINED: +@@ -2643,6 +2644,7 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str + vkd3d_unreachable(); + } + ++ register_convert_to_minimum_precision(reg); + reg->non_uniform = value->non_uniform; + } + +@@ -3720,7 +3722,10 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 + dst->structure_stride = 0; + sm6_register_from_value(&ins->declaration.tgsm_raw.reg.reg, dst, sm6); + ins->declaration.tgsm_raw.alignment = alignment; +- byte_count = elem_type->u.width / 8u; ++ byte_count = elem_type->u.width / CHAR_BIT; ++ /* Convert minimum precision types to their 32-bit equivalent. */ ++ if (byte_count == 2) ++ byte_count = 4; + if (byte_count != 4) + { + FIXME("Unsupported byte count %u.\n", byte_count); +@@ -3741,7 +3746,10 @@ static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const str + dst_param_init(&ins->declaration.tgsm_structured.reg); + dst->value_type = VALUE_TYPE_GROUPSHAREDMEM; + dst->u.groupsharedmem.id = sm6->tgsm_count++; +- dst->structure_stride = elem_type->u.width / 8u; ++ dst->structure_stride = elem_type->u.width / CHAR_BIT; ++ /* Convert minimum precision types to their 32-bit equivalent. */ ++ if (dst->structure_stride == 2) ++ dst->structure_stride = 4; + sm6_register_from_value(&ins->declaration.tgsm_structured.reg.reg, dst, sm6); + if (dst->structure_stride != 4) + { +@@ -4824,6 +4832,10 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) + return VKD3DSIH_ISINF; + case DX_ISFINITE: + return VKD3DSIH_ISFINITE; ++ case DX_COS: ++ return VKD3DSIH_COS; ++ case DX_SIN: ++ return VKD3DSIH_SIN; + case DX_TAN: + return VKD3DSIH_TAN; + case DX_ACOS: +@@ -6172,30 +6184,6 @@ static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsi + ins->dst->modifiers = VKD3DSPDM_SATURATE; + } + +-static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, +- const struct sm6_value **operands, struct function_emission_state *state) +-{ +- struct sm6_value *dst = sm6_parser_get_current_value(sm6); +- struct vkd3d_shader_instruction *ins = state->ins; +- struct vkd3d_shader_dst_param *dst_params; +- struct vkd3d_shader_src_param *src_param; +- unsigned int index; +- +- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SINCOS); +- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) +- return; +- src_param_init_from_value(src_param, operands[0], sm6); +- +- sm6_parser_init_ssa_value(sm6, dst); +- +- index = op == DX_COS; +- dst_params = instruction_dst_params_alloc(ins, 2, sm6); +- dst_param_init(&dst_params[0]); +- dst_param_init(&dst_params[1]); +- sm6_register_from_value(&dst_params[index].reg, dst, sm6); +- vsir_dst_param_init_null(&dst_params[index ^ 1]); +-} +- + static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -6599,7 +6587,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, + [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, + [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, +- [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, ++ [DX_COS ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, + [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, +@@ -6668,7 +6656,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_SAMPLE_INDEX ] = {"i", "", sm6_parser_emit_dx_sample_index}, + [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, +- [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, ++ [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, + [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, +@@ -6702,11 +6690,13 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + }; + + static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, +- const struct sm6_type *ret_type, bool is_return) ++ const struct sm6_type *ret_type) + { + const struct sm6_type *type = value->type; + +- if (info_type != 'H' && !sm6_value_is_register(value)) ++ if (info_type != 'H' && info_type != 'v' && !sm6_value_is_register(value)) ++ return false; ++ if (!type && info_type != 'v') + return false; + + switch (info_type) +@@ -6741,7 +6731,7 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc + case 'g': + return sm6_type_is_floating_point(type); + case 'H': +- return (is_return || sm6_value_is_handle(value)) && type == sm6->handle_type; ++ return sm6_value_is_handle(value) && type == sm6->handle_type; + case 'D': + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Dimensions"); + case 'S': +@@ -6749,7 +6739,7 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc + case 'V': + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.fouri32"); + case 'v': +- return !type; ++ return sm6_value_is_invalid(value) && !type; + case 'o': + /* TODO: some type checking may be possible */ + return true; +@@ -6769,18 +6759,10 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ + + info = &sm6_dx_op_table[op]; + +- VKD3D_ASSERT(info->ret_type[0]); +- if (!sm6_parser_validate_operand_type(sm6, dst, info->ret_type[0], NULL, true)) +- { +- WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); +- /* Return type validation failure is not so critical. We only need to set +- * a data type for the SSA result. */ +- } +- + for (i = 0; i < operand_count; ++i) + { + const struct sm6_value *value = operands[i]; +- if (!sm6_parser_validate_operand_type(sm6, value, info->operand_info[i], dst->type, false)) ++ if (!sm6_parser_validate_operand_type(sm6, value, info->operand_info[i], dst->type)) + { + WARN("Failed to validate operand %u for dx intrinsic id %u, '%s'.\n", i + 1, op, name); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, +@@ -6823,10 +6805,22 @@ static void sm6_parser_decode_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_op + return; + } + +- if (sm6_parser_validate_dx_op(sm6, op, name, operands, operand_count, dst)) +- sm6_dx_op_table[op].handler(sm6, op, operands, state); +- else ++ if (!sm6_parser_validate_dx_op(sm6, op, name, operands, operand_count, dst)) ++ { + sm6_parser_emit_unhandled(sm6, state->ins, dst); ++ return; ++ } ++ ++ sm6_dx_op_table[op].handler(sm6, op, operands, state); ++ ++ VKD3D_ASSERT(sm6_dx_op_table[op].ret_type[0]); ++ if (!sm6_parser_validate_operand_type(sm6, dst, sm6_dx_op_table[op].ret_type[0], NULL)) ++ { ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Failed to validate return type for dx intrinsic id %u, '%s'.", op, name); ++ /* Return type validation failure is not so critical. We only need to set ++ * a data type for the SSA result. */ ++ } + } + + static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_record *record, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 653ddd2e8be..2b88a04a120 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -3652,6 +3652,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_LOG2] = "log2", + [HLSL_OP1_LOGIC_NOT] = "!", + [HLSL_OP1_NEG] = "-", ++ [HLSL_OP1_NOISE] = "noise", + [HLSL_OP1_RCP] = "rcp", + [HLSL_OP1_REINTERPRET] = "reinterpret", + [HLSL_OP1_ROUND] = "round", +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 63c9733ec7b..bb37f0be6cf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -734,6 +734,7 @@ enum hlsl_ir_expr_op + HLSL_OP1_ISINF, + HLSL_OP1_LOG2, + HLSL_OP1_LOGIC_NOT, ++ HLSL_OP1_NOISE, + HLSL_OP1_NEG, + HLSL_OP1_RCP, + HLSL_OP1_REINTERPRET, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index bad9d33094b..fa3688fad18 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -4294,6 +4294,28 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + return true; + } + ++static bool intrinsic_noise(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *type = params->args[0]->data_type, *ret_type; ++ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; ++ ++ type = params->args[0]->data_type; ++ if (type->class == HLSL_CLASS_MATRIX) ++ { ++ struct vkd3d_string_buffer *string; ++ if ((string = hlsl_type_to_string(ctx, type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong argument type for noise(): expected vector or scalar, but got '%s'.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ ++ args[0] = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); ++ ret_type = hlsl_get_scalar_type(ctx, args[0]->data_type->e.numeric.type); ++ ++ return !!add_expr(ctx, params->instrs, HLSL_OP1_NOISE, args, ret_type, loc); ++} ++ + static bool intrinsic_normalize(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -5258,6 +5280,7 @@ intrinsic_functions[] = + {"min", 2, true, intrinsic_min}, + {"modf", 2, true, intrinsic_modf}, + {"mul", 2, true, intrinsic_mul}, ++ {"noise", 1, true, intrinsic_noise}, + {"normalize", 1, true, intrinsic_normalize}, + {"pow", 2, true, intrinsic_pow}, + {"radians", 1, true, intrinsic_radians}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 7546a1f557b..1429c3a8778 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -62,6 +62,7 @@ const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) + [VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED ] = "check_access_fully_mapped", + [VKD3DSIH_CMP ] = "cmp", + [VKD3DSIH_CND ] = "cnd", ++ [VKD3DSIH_COS ] = "cos", + [VKD3DSIH_CONTINUE ] = "continue", + [VKD3DSIH_CONTINUEP ] = "continuec", + [VKD3DSIH_COUNTBITS ] = "countbits", +@@ -290,6 +291,7 @@ const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) + [VKD3DSIH_SETP ] = "setp", + [VKD3DSIH_SGE ] = "sge", + [VKD3DSIH_SGN ] = "sgn", ++ [VKD3DSIH_SIN ] = "sin", + [VKD3DSIH_SINCOS ] = "sincos", + [VKD3DSIH_SLT ] = "slt", + [VKD3DSIH_SQRT ] = "sqrt", +@@ -587,26 +589,37 @@ static void vsir_src_param_init_sampler(struct vkd3d_shader_src_param *src, unsi + src->reg.dimension = VSIR_DIMENSION_NONE; + } + +-static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) ++static void src_param_init_ssa_scalar(struct vkd3d_shader_src_param *src, unsigned int idx, ++ enum vkd3d_data_type data_type) + { +- vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); ++ vsir_src_param_init(src, VKD3DSPR_SSA, data_type, 1); + src->reg.idx[0].offset = idx; + } + ++static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ src_param_init_ssa_scalar(src, idx, VKD3D_DATA_BOOL); ++} ++ + static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigned int idx) + { +- vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); +- src->reg.idx[0].offset = idx; ++ src_param_init_ssa_scalar(src, idx, VKD3D_DATA_FLOAT); + } + +-static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) ++static void src_param_init_ssa_vec4(struct vkd3d_shader_src_param *src, unsigned int idx, ++ enum vkd3d_data_type data_type) + { +- vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ vsir_src_param_init(src, VKD3DSPR_SSA, data_type, 1); + src->reg.idx[0].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + ++static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ src_param_init_ssa_vec4(src, idx, VKD3D_DATA_FLOAT); ++} ++ + static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); +@@ -657,26 +670,37 @@ void vsir_dst_param_init_null(struct vkd3d_shader_dst_param *dst) + dst->write_mask = 0; + } + +-static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++static void dst_param_init_ssa_scalar(struct vkd3d_shader_dst_param *dst, unsigned int idx, ++ enum vkd3d_data_type data_type) + { +- vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); ++ vsir_dst_param_init(dst, VKD3DSPR_SSA, data_type, 1); + dst->reg.idx[0].offset = idx; + } + ++static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ dst_param_init_ssa_scalar(dst, idx, VKD3D_DATA_BOOL); ++} ++ + static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { +- vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); +- dst->reg.idx[0].offset = idx; ++ dst_param_init_ssa_scalar(dst, idx, VKD3D_DATA_FLOAT); + } + +-static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++static void dst_param_init_ssa_vec4(struct vkd3d_shader_dst_param *dst, unsigned int idx, ++ enum vkd3d_data_type data_type) + { +- vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ vsir_dst_param_init(dst, VKD3DSPR_SSA, data_type, 1); + dst->reg.idx[0].offset = idx; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = VKD3DSP_WRITEMASK_ALL; + } + ++static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ dst_param_init_ssa_vec4(dst, idx, VKD3D_DATA_FLOAT); ++} ++ + static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); +@@ -1079,50 +1103,130 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = sincos - instructions->elements; +- struct vkd3d_shader_instruction *ins; +- unsigned int s; ++ struct vkd3d_shader_instruction *ins, *mov; ++ unsigned int s, count; + +- if (sincos->dst_count != 1) +- return VKD3D_OK; ++ count = 1 + vkd3d_popcount(sincos->dst[0].write_mask & (VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1)); + +- if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + sincos = &instructions->elements[pos]; + + ins = &instructions->elements[pos + 1]; + +- if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SINCOS, 2, 1))) ++ /* Save the source in a SSA in case a destination collides with the source. */ ++ mov = ins++; ++ if (!(vsir_instruction_init_with_params(program, mov, &sincos->location, VKD3DSIH_MOV, 1, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + +- ins->flags = sincos->flags; ++ mov->src[0] = sincos->src[0]; + +- *ins->src = *sincos->src; + /* Set the source swizzle to replicate the first component. */ + s = vsir_swizzle_get_component(sincos->src->swizzle, 0); +- ins->src->swizzle = vkd3d_shader_create_swizzle(s, s, s, s); ++ mov->src[0].swizzle = vkd3d_shader_create_swizzle(s, s, s, s); ++ ++ dst_param_init_ssa_scalar(&mov->dst[0], program->ssa_count, sincos->src[0].reg.data_type); + + if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_1) + { ++ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SIN, 1, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins->flags = sincos->flags; ++ ++ src_param_init_ssa_scalar(&ins->src[0], program->ssa_count, sincos->src[0].reg.data_type); ++ + ins->dst[0] = *sincos->dst; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_1; ++ ++ ++ins; + } +- else ++ ++ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_0) + { +- vsir_dst_param_init_null(&ins->dst[0]); ++ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_COS, 1, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins->flags = sincos->flags; ++ ++ src_param_init_ssa_scalar(&ins->src[0], program->ssa_count, sincos->src[0].reg.data_type); ++ ++ ins->dst[0] = *sincos->dst; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; ++ ++ ++ins; + } + +- if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_0) ++ vkd3d_shader_instruction_make_nop(sincos); ++ ++program->ssa_count; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_lower_sm4_sincos(struct vsir_program *program, ++ struct vkd3d_shader_instruction *sincos, struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = sincos - instructions->elements; ++ struct vkd3d_shader_instruction *ins, *mov; ++ unsigned int count = 1; ++ ++ if (sincos->dst_count != 2) + { +- ins->dst[1] = *sincos->dst; +- ins->dst[1].write_mask = VKD3DSP_WRITEMASK_0; ++ vkd3d_shader_error(ctx->message_context, &sincos->location, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, ++ "Internal compiler error: invalid destination count %u for SINCOS.", ++ sincos->dst_count); ++ return VKD3D_ERROR; + } +- else ++ ++ if (sincos->dst[0].reg.type != VKD3DSPR_NULL) ++ ++count; ++ if (sincos->dst[1].reg.type != VKD3DSPR_NULL) ++ ++count; ++ ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, count)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ sincos = &instructions->elements[pos]; ++ ++ ins = &instructions->elements[pos + 1]; ++ ++ /* Save the source in a SSA in case a destination collides with the source. */ ++ mov = ins++; ++ if (!(vsir_instruction_init_with_params(program, mov, &sincos->location, VKD3DSIH_MOV, 1, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ mov->src[0] = sincos->src[0]; ++ dst_param_init_ssa_vec4(&mov->dst[0], program->ssa_count, sincos->src[0].reg.data_type); ++ ++ if (sincos->dst[0].reg.type != VKD3DSPR_NULL) + { +- vsir_dst_param_init_null(&ins->dst[1]); ++ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SIN, 1, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins->flags = sincos->flags; ++ ++ src_param_init_ssa_vec4(&ins->src[0], program->ssa_count, sincos->src[0].reg.data_type); ++ ins->dst[0] = sincos->dst[0]; ++ ++ ++ins; ++ } ++ ++ if (sincos->dst[1].reg.type != VKD3DSPR_NULL) ++ { ++ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_COS, 1, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins->flags = sincos->flags; ++ ++ src_param_init_ssa_vec4(&ins->src[0], program->ssa_count, sincos->src[0].reg.data_type); ++ ins->dst[0] = sincos->dst[1]; ++ ++ ++ins; + } + +- /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(sincos); ++ ++program->ssa_count; + + return VKD3D_OK; + } +@@ -1375,8 +1479,16 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + break; + + case VKD3DSIH_SINCOS: +- if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) +- return ret; ++ if (ins->dst_count == 1) ++ { ++ if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) ++ return ret; ++ } ++ else ++ { ++ if ((ret = vsir_program_lower_sm4_sincos(program, ins, ctx)) < 0) ++ return ret; ++ } + break; + + case VKD3DSIH_TEXLD: +@@ -8659,9 +8771,31 @@ static void vsir_validate_io_src_param(struct validation_context *ctx, + "Invalid register type %#x used as source parameter.", src->reg.type); + } + ++#define F64_BIT (1u << VKD3D_DATA_DOUBLE) ++#define F32_BIT (1u << VKD3D_DATA_FLOAT) ++#define F16_BIT (1u << VKD3D_DATA_HALF) ++ ++#define I32_BIT (1u << VKD3D_DATA_INT) ++ ++#define U64_BIT (1u << VKD3D_DATA_UINT64) ++#define U32_BIT (1u << VKD3D_DATA_UINT) ++#define U16_BIT (1u << VKD3D_DATA_UINT16) ++ + static void vsir_validate_src_param(struct validation_context *ctx, + const struct vkd3d_shader_src_param *src) + { ++ static const struct ++ { ++ uint32_t data_type_mask; ++ } ++ src_modifier_data[] = ++ { ++ [VKD3DSPSM_NEG] = {F64_BIT | F32_BIT | F16_BIT | I32_BIT | U64_BIT | U32_BIT | U16_BIT}, ++ [VKD3DSPSM_BIAS] = {F32_BIT}, ++ [VKD3DSPSM_BIASNEG] = {F32_BIT}, ++ [VKD3DSPSM_SIGN] = {F32_BIT}, ++ [VKD3DSPSM_SIGNNEG] = {F32_BIT}, ++ }; + vsir_validate_register(ctx, &src->reg); + + if (src->swizzle & ~0x03030303u) +@@ -8676,6 +8810,13 @@ static void vsir_validate_src_param(struct validation_context *ctx, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", + src->modifiers); + ++ if (src->modifiers < ARRAY_SIZE(src_modifier_data) && src_modifier_data[src->modifiers].data_type_mask) ++ { ++ if (!(src_modifier_data[src->modifiers].data_type_mask & (1u << src->reg.data_type))) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, ++ "Source has invalid modifier %#x for data type %u.", src->modifiers, src->reg.data_type); ++ } ++ + switch (src->reg.type) + { + case VKD3DSPR_SSA: +@@ -9284,6 +9425,60 @@ static void vsir_validate_hull_shader_phase(struct validation_context *ctx, + ctx->dcl_temps_found = false; + } + ++static void vsir_validate_elementwise_operation(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction, const bool types[VKD3D_DATA_COUNT]) ++{ ++ enum vkd3d_data_type dst_data_type; ++ unsigned int i; ++ ++ if (instruction->dst_count < 1) ++ return; ++ ++ dst_data_type = instruction->dst[0].reg.data_type; ++ ++ if (dst_data_type >= VKD3D_DATA_COUNT) ++ return; ++ ++ if (!types[dst_data_type]) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for elementwise operation \"%s\" (%#x).", ++ dst_data_type, vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); ++ ++ for (i = 0; i < instruction->src_count; ++i) ++ { ++ if (instruction->src[i].reg.data_type != dst_data_type) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Data type %#x for operand %u doesn't match the destination data type %#x " ++ "for elementwise operation \"%s\" (%#x).", ++ instruction->src[i].reg.data_type, i, dst_data_type, ++ vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); ++ } ++} ++ ++static void vsir_validate_float_elementwise_operation(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ static const bool types[VKD3D_DATA_COUNT] = ++ { ++ [VKD3D_DATA_FLOAT] = true, ++ }; ++ ++ vsir_validate_elementwise_operation(ctx, instruction, types); ++} ++ ++static void vsir_validate_logic_elementwise_operation(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ static const bool types[VKD3D_DATA_COUNT] = ++ { ++ [VKD3D_DATA_UINT] = true, ++ [VKD3D_DATA_UINT64] = true, ++ [VKD3D_DATA_BOOL] = true, ++ }; ++ ++ vsir_validate_elementwise_operation(ctx, instruction, types); ++} ++ + static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) + { + size_t i; +@@ -9969,6 +10164,12 @@ struct vsir_validator_instruction_desc + + static const struct vsir_validator_instruction_desc vsir_validator_instructions[] = + { ++ [VKD3DSIH_ABS] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_ACOS] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_ADD] = {1, 2, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_AND] = {1, 2, vsir_validate_logic_elementwise_operation}, ++ [VKD3DSIH_ASIN] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_ATAN] = {1, 1, vsir_validate_float_elementwise_operation}, + [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, + [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index debf7ac29f5..0413cd7c344 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -1551,6 +1551,29 @@ static void vkd3d_spirv_build_op_name(struct vkd3d_spirv_builder *builder, + vkd3d_spirv_build_string(stream, name, name_size); + } + ++static uint32_t vkd3d_spirv_build_op_string(struct vkd3d_spirv_builder *builder, const char *s) ++{ ++ struct vkd3d_spirv_stream *stream = &builder->debug_stream; ++ uint32_t result_id = vkd3d_spirv_alloc_id(builder); ++ unsigned int size; ++ ++ size = vkd3d_spirv_string_word_count(s); ++ vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(SpvOpString, 2 + size)); ++ vkd3d_spirv_build_word(stream, result_id); ++ vkd3d_spirv_build_string(stream, s, size); ++ ++ return result_id; ++} ++ ++static void vkd3d_spirv_build_op_source(struct vkd3d_spirv_builder *builder, const char *source_name) ++{ ++ struct vkd3d_spirv_stream *stream = &builder->debug_stream; ++ uint32_t source_id; ++ ++ source_id = vkd3d_spirv_build_op_string(builder, source_name ? source_name : ""); ++ vkd3d_spirv_build_op3(stream, SpvOpSource, 0, 0, source_id); ++} ++ + static void vkd3d_spirv_build_op_member_name(struct vkd3d_spirv_builder *builder, + uint32_t type_id, uint32_t member, const char *fmt, ...) + { +@@ -2462,18 +2485,6 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_fabs(struct vkd3d_spirv_builder + return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450FAbs, result_type, operand); + } + +-static uint32_t vkd3d_spirv_build_op_glsl_std450_sin(struct vkd3d_spirv_builder *builder, +- uint32_t result_type, uint32_t operand) +-{ +- return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450Sin, result_type, operand); +-} +- +-static uint32_t vkd3d_spirv_build_op_glsl_std450_cos(struct vkd3d_spirv_builder *builder, +- uint32_t result_type, uint32_t operand) +-{ +- return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450Cos, result_type, operand); +-} +- + static uint32_t vkd3d_spirv_build_op_glsl_std450_max(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t x, uint32_t y) + { +@@ -2565,7 +2576,8 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder + return vkd3d_spirv_get_type_id(builder, component_type, component_count); + } + +-static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) ++static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, ++ const char *entry_point, const char *source_name) + { + vkd3d_spirv_stream_init(&builder->debug_stream); + vkd3d_spirv_stream_init(&builder->annotation_stream); +@@ -2580,6 +2592,7 @@ static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const + + rb_init(&builder->declarations, vkd3d_spirv_declaration_compare); + ++ vkd3d_spirv_build_op_source(builder, source_name); + builder->main_function_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op_name(builder, builder->main_function_id, "%s", entry_point); + } +@@ -3173,7 +3186,8 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + compiler->spirv_target_info = target_info; + } + +- vkd3d_spirv_builder_init(&compiler->spirv_builder, spirv_compiler_get_entry_point_name(compiler)); ++ vkd3d_spirv_builder_init(&compiler->spirv_builder, ++ spirv_compiler_get_entry_point_name(compiler), compile_info->source_name); + + compiler->formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT + | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; +@@ -7708,6 +7722,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + {VKD3DSIH_ACOS, GLSLstd450Acos}, + {VKD3DSIH_ASIN, GLSLstd450Asin}, + {VKD3DSIH_ATAN, GLSLstd450Atan}, ++ {VKD3DSIH_COS, GLSLstd450Cos}, + {VKD3DSIH_DFMA, GLSLstd450Fma}, + {VKD3DSIH_DMAX, GLSLstd450NMax}, + {VKD3DSIH_DMIN, GLSLstd450NMin}, +@@ -7730,6 +7745,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + {VKD3DSIH_ROUND_PI, GLSLstd450Ceil}, + {VKD3DSIH_ROUND_Z, GLSLstd450Trunc}, + {VKD3DSIH_RSQ, GLSLstd450InverseSqrt}, ++ {VKD3DSIH_SIN, GLSLstd450Sin}, + {VKD3DSIH_SQRT, GLSLstd450Sqrt}, + {VKD3DSIH_TAN, GLSLstd450Tan}, + {VKD3DSIH_UMAX, GLSLstd450UMax}, +@@ -7999,41 +8015,6 @@ static void spirv_compiler_emit_rcp(struct spirv_compiler *compiler, + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +-static void spirv_compiler_emit_sincos(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_dst_param *dst_sin = &instruction->dst[0]; +- const struct vkd3d_shader_dst_param *dst_cos = &instruction->dst[1]; +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_src_param *src = instruction->src; +- uint32_t type_id, src_id, sin_id = 0, cos_id = 0; +- +- if (dst_sin->reg.type != VKD3DSPR_NULL) +- { +- type_id = spirv_compiler_get_type_id_for_dst(compiler, dst_sin); +- src_id = spirv_compiler_emit_load_src(compiler, src, dst_sin->write_mask); +- +- sin_id = vkd3d_spirv_build_op_glsl_std450_sin(builder, type_id, src_id); +- } +- +- if (dst_cos->reg.type != VKD3DSPR_NULL) +- { +- if (dst_sin->reg.type == VKD3DSPR_NULL || dst_cos->write_mask != dst_sin->write_mask) +- { +- type_id = spirv_compiler_get_type_id_for_dst(compiler, dst_cos); +- src_id = spirv_compiler_emit_load_src(compiler, src, dst_cos->write_mask); +- } +- +- cos_id = vkd3d_spirv_build_op_glsl_std450_cos(builder, type_id, src_id); +- } +- +- if (sin_id) +- spirv_compiler_emit_store_dst(compiler, dst_sin, sin_id); +- +- if (cos_id) +- spirv_compiler_emit_store_dst(compiler, dst_cos, cos_id); +-} +- + static void spirv_compiler_emit_imul(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -10711,6 +10692,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_ACOS: + case VKD3DSIH_ASIN: + case VKD3DSIH_ATAN: ++ case VKD3DSIH_COS: + case VKD3DSIH_HCOS: + case VKD3DSIH_HSIN: + case VKD3DSIH_HTAN: +@@ -10733,6 +10715,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: ++ case VKD3DSIH_SIN: + case VKD3DSIH_SQRT: + case VKD3DSIH_TAN: + case VKD3DSIH_UMAX: +@@ -10748,9 +10731,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_RCP: + spirv_compiler_emit_rcp(compiler, instruction); + break; +- case VKD3DSIH_SINCOS: +- spirv_compiler_emit_sincos(compiler, instruction); +- break; + case VKD3DSIH_IMUL: + case VKD3DSIH_UMUL: + spirv_compiler_emit_imul(compiler, instruction); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 2ae0a57d237..cefd9f753a1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -311,6 +311,7 @@ enum vkd3d_shader_opcode + VKD3DSIH_CND, + VKD3DSIH_CONTINUE, + VKD3DSIH_CONTINUEP, ++ VKD3DSIH_COS, + VKD3DSIH_COUNTBITS, + VKD3DSIH_CRS, + VKD3DSIH_CUT, +@@ -537,6 +538,7 @@ enum vkd3d_shader_opcode + VKD3DSIH_SETP, + VKD3DSIH_SGE, + VKD3DSIH_SGN, ++ VKD3DSIH_SIN, + VKD3DSIH_SINCOS, + VKD3DSIH_SLT, + VKD3DSIH_SQRT, +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 819185796c0..0a5bd1122e3 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -2383,7 +2383,9 @@ static HRESULT create_shader_stage(struct d3d12_device *device, + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_shader_compile_info compile_info; + struct VkShaderModuleCreateInfo shader_desc; ++ struct vkd3d_shader_dxbc_desc dxbc_desc; + struct vkd3d_shader_code spirv = {0}; ++ char source_name[33]; + VkResult vr; + int ret; + +@@ -2416,6 +2418,16 @@ static HRESULT create_shader_stage(struct d3d12_device *device, + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + ++ if ((ret = vkd3d_shader_parse_dxbc(&(struct vkd3d_shader_code){code->pShaderBytecode, code->BytecodeLength}, ++ 0, &dxbc_desc, NULL)) >= 0) ++ { ++ sprintf(source_name, "%08x%08x%08x%08x", dxbc_desc.checksum[0], ++ dxbc_desc.checksum[1], dxbc_desc.checksum[2], dxbc_desc.checksum[3]); ++ vkd3d_shader_free_dxbc(&dxbc_desc); ++ TRACE("Compiling shader \"%s\".\n", source_name); ++ compile_info.source_name = source_name; ++ } ++ + if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 + || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + { +-- +2.47.2 + diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-d8edf20c2b4224384d8e206c620bfbd61c5.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-d8edf20c2b4224384d8e206c620bfbd61c5.patch new file mode 100644 index 00000000..4d04b655 --- /dev/null +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-d8edf20c2b4224384d8e206c620bfbd61c5.patch @@ -0,0 +1,2731 @@ +From 94b973b990678b228f5e7df4d1bd1157c3458712 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 24 Jun 2025 13:36:03 +1000 +Subject: [PATCH] Updated vkd3d to d8edf20c2b4224384d8e206c620bfbd61c56219d. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 5 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 + + libs/vkd3d/libs/vkd3d-shader/fx.c | 574 +++++----- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 16 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 7 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 129 ++- + libs/vkd3d/libs/vkd3d-shader/ir.c | 1009 ++++++++++++++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 259 ++++- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 10 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 + + .../libs/vkd3d-shader/vkd3d_shader_private.h | 10 + + 11 files changed, 1680 insertions(+), 344 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index fbc0235cdd0..f19a6283197 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -2137,6 +2137,9 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; + int result; + ++ if ((result = vsir_allocate_temp_registers(program, message_context))) ++ return result; ++ + d3dbc.program = program; + d3dbc.message_context = message_context; + switch (version->type) +@@ -2156,7 +2159,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + } + + put_u32(buffer, sm1_version(version->type, version->major, version->minor)); +- d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab); ++ d3dbc_write_comment(&d3dbc, TAG_CTAB, ctab); + d3dbc_write_semantic_dcls(&d3dbc); + d3dbc_write_program_instructions(&d3dbc); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 9ebcb6870e9..71fa81ec163 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -5180,6 +5180,8 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr + src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type); + if (data_type_is_64_bit(src_param->reg.data_type)) + src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); ++ else ++ register_convert_to_minimum_precision(&src_param->reg); + + instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 95a172fd827..0ab1a676400 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -251,6 +251,8 @@ struct fx_write_context_ops + void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); ++ void (*write_state_assignment)(const struct hlsl_ir_var *var, ++ struct hlsl_state_block_entry *entry, struct fx_write_context *fx); + bool are_child_effects_supported; + }; + +@@ -313,6 +315,15 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + fx->ops->write_pass(var, fx); + } + ++static void write_state_assignment(const struct hlsl_ir_var *var, ++ struct hlsl_state_block_entry *entry, struct fx_write_context *fx) ++{ ++ fx->ops->write_state_assignment(var, entry, fx); ++} ++ ++static uint32_t write_state_block(struct hlsl_ir_var *var, ++ unsigned int block_index, struct fx_write_context *fx); ++ + static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) + { + struct hlsl_ctx *ctx = fx->ctx; +@@ -348,8 +359,6 @@ static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_con + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); + static const char * get_fx_4_type_name(const struct hlsl_type *type); + static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); +-static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, +- uint32_t count_offset, struct fx_write_context *fx); + + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) + { +@@ -502,17 +511,22 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f + return string_entry->offset; + } + ++static void fx_4_decompose_state_blocks(struct hlsl_ir_var *var, struct fx_write_context *fx); ++ + static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->structured; +- uint32_t name_offset, count_offset; ++ uint32_t name_offset, count_offset, count; + + name_offset = write_string(var->name, fx); + put_u32(buffer, name_offset); + count_offset = put_u32(buffer, 0); + ++ fx_4_decompose_state_blocks(var, fx); ++ + write_fx_4_annotations(var->annotations, fx); +- write_fx_4_state_block(var, 0, count_offset, fx); ++ count = write_state_block(var, 0, fx); ++ set_u32(buffer, count_offset, count); + } + + static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offset, struct fx_write_context *fx) +@@ -775,9 +789,10 @@ static const struct rhs_named_value fx_2_filter_values[] = + { NULL } + }; + +-struct fx_2_state ++struct fx_state + { + const char *name; ++ enum hlsl_type_class container; + enum hlsl_type_class class; + enum state_property_component_type type; + unsigned int dimx; +@@ -786,215 +801,215 @@ struct fx_2_state + const struct rhs_named_value *values; + }; + +-static const struct fx_2_state fx_2_pass_states[] = +-{ +- { "ZEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 0, fx_2_zenable_values }, +- { "FillMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 1, fx_2_fillmode_values }, +- { "ShadeMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 2, fx_2_shademode_values }, +- { "ZWriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 3 }, +- { "AlphaTestEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 4 }, +- { "LastPixel", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 5 }, +- { "SrcBlend", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 6, fx_2_blendmode_values }, +- { "DestBlend", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 7, fx_2_blendmode_values }, +- { "CullMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 8, fx_2_cullmode_values }, +- { "ZFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9, fx_2_cmpfunc_values }, +- { "AlphaRef", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 10 }, +- { "AlphaFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11, fx_2_cmpfunc_values }, +- { "DitherEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12 }, +- { "AlphaBlendEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13 }, +- { "FogEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 14 }, +- { "SpecularEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, +- { "FogColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 16 }, +- { "FogTableMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 17, fx_2_fogmode_values }, +- { "FogStart", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 18 }, +- { "FogEnd", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 19 }, +- { "FogDensity", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 20 }, +- { "RangeFogEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 21 }, +- { "StencilEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 22 }, +- { "StencilFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, fx_2_stencilcaps_values }, +- { "StencilZFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, fx_2_stencilcaps_values }, +- { "StencilPass", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 25, fx_2_stencilcaps_values }, +- { "StencilFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 26, fx_2_cmpfunc_values }, +- { "StencilRef", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 27 }, +- { "StencilMask", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28 }, +- { "StencilWriteMask", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29 }, +- { "TextureFactor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30 }, +- { "Wrap0", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, fx_2_wrap_values }, +- { "Wrap1", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, fx_2_wrap_values }, +- { "Wrap2", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, fx_2_wrap_values }, +- { "Wrap3", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, fx_2_wrap_values }, +- { "Wrap4", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, fx_2_wrap_values }, +- { "Wrap5", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 36, fx_2_wrap_values }, +- { "Wrap6", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 37, fx_2_wrap_values }, +- { "Wrap7", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, fx_2_wrap_values }, +- { "Wrap8", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, fx_2_wrap_values }, +- { "Wrap9", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, fx_2_wrap_values }, +- { "Wrap10", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, fx_2_wrap_values }, +- { "Wrap11", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, fx_2_wrap_values }, +- { "Wrap12", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, fx_2_wrap_values }, +- { "Wrap13", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 44, fx_2_wrap_values }, +- { "Wrap14", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, fx_2_wrap_values }, +- { "Wrap15", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, fx_2_wrap_values }, +- { "Clipping", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47 }, +- { "Lighting", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48 }, +- { "Ambient", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 49 }, +- { "FogVertexMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50, fx_2_fogmode_values }, +- { "ColorVertex", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51 }, +- { "LocalViewer", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 52 }, +- { "NormalizeNormals", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 53 }, +- +- { "DiffuseMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 54, fx_2_materialcolorsource_values }, +- { "SpecularMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 55, fx_2_materialcolorsource_values }, +- { "AmbientMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 56, fx_2_materialcolorsource_values }, +- { "EmissiveMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 57, fx_2_materialcolorsource_values }, +- +- { "VertexBlend", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 58, fx_2_vertexblend_values }, +- { "ClipPlaneEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 59, fx_2_clipplane_values }, +- { "PointSize", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 60 }, +- { "PointSize_Min", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 61 }, +- { "PointSize_Max", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 62 }, +- { "PointSpriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 63 }, +- { "PointScaleEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 64 }, +- { "PointScale_A", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 65 }, +- { "PointScale_B", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 66 }, +- { "PointScale_C", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 67 }, +- +- { "MultiSampleAntialias", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 68 }, +- { "MultiSampleMask", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 69 }, +- { "PatchEdgeStyle", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 70, fx_2_patchedgestyle_values }, +- { "DebugMonitorToken", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 71 }, +- { "IndexedVertexBlendEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 72 }, +- { "ColorWriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 73, fx_2_colorwriteenable_values }, +- { "TweenFactor", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 74 }, +- { "BlendOp", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 75, fx_2_blendop_values }, +- { "PositionDegree", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 76, fx_2_degree_values }, +- { "NormalDegree", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 77, fx_2_degree_values }, +- { "ScissorTestEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 78 }, +- { "SlopeScaleDepthBias", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 79 }, +- +- { "AntialiasedLineEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 80 }, +- { "MinTessellationLevel", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 81 }, +- { "MaxTessellationLevel", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 82 }, +- { "AdaptiveTess_X", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 83 }, +- { "AdaptiveTess_Y", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 84 }, +- { "AdaptiveTess_Z", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 85 }, +- { "AdaptiveTess_W", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 86 }, +- { "EnableAdaptiveTessellation",HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 87 }, +- { "TwoSidedStencilMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 88 }, +- { "StencilFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 89, fx_2_stencilcaps_values }, +- { "StencilZFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 90, fx_2_stencilcaps_values }, +- { "StencilPass", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 91, fx_2_stencilcaps_values }, +- { "StencilFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 92, fx_2_cmpfunc_values }, +- +- { "ColorWriteEnable1", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 93, fx_2_colorwriteenable_values }, +- { "ColorWriteEnable2", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 94, fx_2_colorwriteenable_values }, +- { "ColorWriteEnable3", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 95, fx_2_colorwriteenable_values }, +- { "BlendFactor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 96 }, +- { "SRGBWriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 97 }, +- { "DepthBias", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 98 }, +- { "SeparateAlphaBlendEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 99 }, +- { "SrcBlendAlpha", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 100, fx_2_blendmode_values }, +- { "DestBlendAlpha", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 101, fx_2_blendmode_values }, +- { "BlendOpAlpha", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 102, fx_2_blendmode_values }, +- +- { "ColorOp", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 103, fx_2_textureop_values }, +- { "ColorArg0", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 104, fx_2_colorarg_values }, +- { "ColorArg1", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 105, fx_2_colorarg_values }, +- { "ColorArg2", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 106, fx_2_colorarg_values }, +- { "AlphaOp", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 107, fx_2_textureop_values }, +- { "AlphaArg0", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 108, fx_2_colorarg_values }, +- { "AlphaArg1", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 109, fx_2_colorarg_values }, +- { "AlphaArg2", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 110, fx_2_colorarg_values }, +- { "ResultArg", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 111, fx_2_colorarg_values }, +- { "BumpEnvMat00", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 112 }, +- { "BumpEnvMat01", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 113 }, +- { "BumpEnvMat10", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 114 }, +- { "BumpEnvMat11", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 115 }, +- { "TexCoordIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 116 }, +- { "BumpEnvLScale", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 117 }, +- { "BumpEnvLOffset", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 118 }, +- { "TextureTransformFlags", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 119, fx_2_texturetransform_values }, +- { "Constant", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 120 }, +- { "PatchSegments", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 121 }, +- { "FVF", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 122 }, +- +- { "ProjectionTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 123 }, +- { "ViewTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 124 }, +- { "WorldTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 256, 125 }, +- { "TextureTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 8, 126 }, +- +- { "MaterialAmbient", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 127 }, +- { "MaterialDiffuse", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 128 }, +- { "MaterialSpecular", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 129 }, +- { "MaterialEmissive", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 130 }, +- { "MaterialPower", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 131 }, +- +- { "LightType", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 132, fx_2_lighttype_values }, +- { "LightDiffuse", HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 133 }, +- { "LightSpecular", HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 134 }, +- { "LightAmbient", HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 135 }, +- { "LightPosition", HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 136 }, +- { "LightDirection", HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 137 }, +- { "LightRange", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 138 }, +- { "LightFalloff", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 139 }, +- { "LightAttenuation0", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 140 }, +- { "LightAttenuation1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 141 }, +- { "LightAttenuation2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 142 }, +- { "LightTheta", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 143 }, +- { "LightPhi", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 144 }, +- { "LightEnable", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 145 }, +- +- { "VertexShader", HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 }, +- { "PixelShader", HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 147 }, +- +- { "VertexShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 148 }, +- { "VertexShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 149 }, +- { "VertexShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 150 }, +- { "VertexShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 151 }, +- { "VertexShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 152 }, +- { "VertexShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 153 }, +- { "VertexShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 154 }, +- { "VertexShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 155 }, +- +- { "PixelShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 156 }, +- { "PixelShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 157 }, +- { "PixelShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 158 }, +- { "PixelShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 159 }, +- { "PixelShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 160 }, +- { "PixelShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 161 }, +- { "PixelShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 162 }, +- { "PixelShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 163 }, +- +- { "Texture", HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 261, 164 }, +- { "AddressU", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 165, fx_2_address_values }, +- { "AddressV", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 166, fx_2_address_values }, +- { "AddressW", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 167, fx_2_address_values }, +- { "BorderColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 168 }, +- { "MagFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 169, fx_2_filter_values }, +- { "MinFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 170, fx_2_filter_values }, +- { "MipFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 171, fx_2_filter_values }, +- { "MipMapLodBias", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 172 }, +- { "MaxMipLevel", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 173 }, +- { "MaxAnisotropy", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 174 }, +- { "SRGBTexture", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 175 }, +- { "ElementIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 176 }, ++static const struct fx_state fx_2_pass_states[] = ++{ ++ { "ZEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 0, fx_2_zenable_values }, ++ { "FillMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 1, fx_2_fillmode_values }, ++ { "ShadeMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 2, fx_2_shademode_values }, ++ { "ZWriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 3 }, ++ { "AlphaTestEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 4 }, ++ { "LastPixel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 5 }, ++ { "SrcBlend", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 6, fx_2_blendmode_values }, ++ { "DestBlend", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 7, fx_2_blendmode_values }, ++ { "CullMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 8, fx_2_cullmode_values }, ++ { "ZFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9, fx_2_cmpfunc_values }, ++ { "AlphaRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 10 }, ++ { "AlphaFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11, fx_2_cmpfunc_values }, ++ { "DitherEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12 }, ++ { "AlphaBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13 }, ++ { "FogEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 14 }, ++ { "SpecularEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, ++ { "FogColor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 16 }, ++ { "FogTableMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 17, fx_2_fogmode_values }, ++ { "FogStart", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 18 }, ++ { "FogEnd", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 19 }, ++ { "FogDensity", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 20 }, ++ { "RangeFogEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 21 }, ++ { "StencilEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 22 }, ++ { "StencilFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, fx_2_stencilcaps_values }, ++ { "StencilZFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, fx_2_stencilcaps_values }, ++ { "StencilPass", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 25, fx_2_stencilcaps_values }, ++ { "StencilFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 26, fx_2_cmpfunc_values }, ++ { "StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 27 }, ++ { "StencilMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28 }, ++ { "StencilWriteMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29 }, ++ { "TextureFactor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30 }, ++ { "Wrap0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, fx_2_wrap_values }, ++ { "Wrap1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, fx_2_wrap_values }, ++ { "Wrap2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, fx_2_wrap_values }, ++ { "Wrap3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, fx_2_wrap_values }, ++ { "Wrap4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, fx_2_wrap_values }, ++ { "Wrap5", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 36, fx_2_wrap_values }, ++ { "Wrap6", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 37, fx_2_wrap_values }, ++ { "Wrap7", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, fx_2_wrap_values }, ++ { "Wrap8", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, fx_2_wrap_values }, ++ { "Wrap9", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, fx_2_wrap_values }, ++ { "Wrap10", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, fx_2_wrap_values }, ++ { "Wrap11", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, fx_2_wrap_values }, ++ { "Wrap12", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, fx_2_wrap_values }, ++ { "Wrap13", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 44, fx_2_wrap_values }, ++ { "Wrap14", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, fx_2_wrap_values }, ++ { "Wrap15", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, fx_2_wrap_values }, ++ { "Clipping", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47 }, ++ { "Lighting", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48 }, ++ { "Ambient", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 49 }, ++ { "FogVertexMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50, fx_2_fogmode_values }, ++ { "ColorVertex", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51 }, ++ { "LocalViewer", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 52 }, ++ { "NormalizeNormals", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 53 }, ++ ++ { "DiffuseMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 54, fx_2_materialcolorsource_values }, ++ { "SpecularMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 55, fx_2_materialcolorsource_values }, ++ { "AmbientMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 56, fx_2_materialcolorsource_values }, ++ { "EmissiveMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 57, fx_2_materialcolorsource_values }, ++ ++ { "VertexBlend", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 58, fx_2_vertexblend_values }, ++ { "ClipPlaneEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 59, fx_2_clipplane_values }, ++ { "PointSize", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 60 }, ++ { "PointSize_Min", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 61 }, ++ { "PointSize_Max", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 62 }, ++ { "PointSpriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 63 }, ++ { "PointScaleEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 64 }, ++ { "PointScale_A", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 65 }, ++ { "PointScale_B", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 66 }, ++ { "PointScale_C", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 67 }, ++ ++ { "MultiSampleAntialias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 68 }, ++ { "MultiSampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 69 }, ++ { "PatchEdgeStyle", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 70, fx_2_patchedgestyle_values }, ++ { "DebugMonitorToken", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 71 }, ++ { "IndexedVertexBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 72 }, ++ { "ColorWriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 73, fx_2_colorwriteenable_values }, ++ { "TweenFactor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 74 }, ++ { "BlendOp", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 75, fx_2_blendop_values }, ++ { "PositionDegree", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 76, fx_2_degree_values }, ++ { "NormalDegree", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 77, fx_2_degree_values }, ++ { "ScissorTestEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 78 }, ++ { "SlopeScaleDepthBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 79 }, ++ ++ { "AntialiasedLineEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 80 }, ++ { "MinTessellationLevel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 81 }, ++ { "MaxTessellationLevel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 82 }, ++ { "AdaptiveTess_X", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 83 }, ++ { "AdaptiveTess_Y", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 84 }, ++ { "AdaptiveTess_Z", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 85 }, ++ { "AdaptiveTess_W", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 86 }, ++ { "EnableAdaptiveTessellation",HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 87 }, ++ { "TwoSidedStencilMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 88 }, ++ { "StencilFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 89, fx_2_stencilcaps_values }, ++ { "StencilZFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 90, fx_2_stencilcaps_values }, ++ { "StencilPass", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 91, fx_2_stencilcaps_values }, ++ { "StencilFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 92, fx_2_cmpfunc_values }, ++ ++ { "ColorWriteEnable1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 93, fx_2_colorwriteenable_values }, ++ { "ColorWriteEnable2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 94, fx_2_colorwriteenable_values }, ++ { "ColorWriteEnable3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 95, fx_2_colorwriteenable_values }, ++ { "BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 96 }, ++ { "SRGBWriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 97 }, ++ { "DepthBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 98 }, ++ { "SeparateAlphaBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 99 }, ++ { "SrcBlendAlpha", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 100, fx_2_blendmode_values }, ++ { "DestBlendAlpha", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 101, fx_2_blendmode_values }, ++ { "BlendOpAlpha", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 102, fx_2_blendmode_values }, ++ ++ { "ColorOp", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 103, fx_2_textureop_values }, ++ { "ColorArg0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 104, fx_2_colorarg_values }, ++ { "ColorArg1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 105, fx_2_colorarg_values }, ++ { "ColorArg2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 106, fx_2_colorarg_values }, ++ { "AlphaOp", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 107, fx_2_textureop_values }, ++ { "AlphaArg0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 108, fx_2_colorarg_values }, ++ { "AlphaArg1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 109, fx_2_colorarg_values }, ++ { "AlphaArg2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 110, fx_2_colorarg_values }, ++ { "ResultArg", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 111, fx_2_colorarg_values }, ++ { "BumpEnvMat00", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 112 }, ++ { "BumpEnvMat01", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 113 }, ++ { "BumpEnvMat10", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 114 }, ++ { "BumpEnvMat11", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 115 }, ++ { "TexCoordIndex", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 116 }, ++ { "BumpEnvLScale", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 117 }, ++ { "BumpEnvLOffset", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 118 }, ++ { "TextureTransformFlags", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 119, fx_2_texturetransform_values }, ++ { "Constant", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 120 }, ++ { "PatchSegments", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 121 }, ++ { "FVF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 122 }, ++ ++ { "ProjectionTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 123 }, ++ { "ViewTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 124 }, ++ { "WorldTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 256, 125 }, ++ { "TextureTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 8, 126 }, ++ ++ { "MaterialDiffuse", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 127 }, ++ { "MaterialAmbient", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 128 }, ++ { "MaterialSpecular", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 129 }, ++ { "MaterialEmissive", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 130 }, ++ { "MaterialPower", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 131 }, ++ ++ { "LightType", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 132, fx_2_lighttype_values }, ++ { "LightDiffuse", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 133 }, ++ { "LightSpecular", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 134 }, ++ { "LightAmbient", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 135 }, ++ { "LightPosition", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 136 }, ++ { "LightDirection", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 137 }, ++ { "LightRange", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 138 }, ++ { "LightFalloff", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 139 }, ++ { "LightAttenuation0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 140 }, ++ { "LightAttenuation1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 141 }, ++ { "LightAttenuation2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 142 }, ++ { "LightTheta", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 143 }, ++ { "LightPhi", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 144 }, ++ { "LightEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 145 }, ++ ++ { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 }, ++ { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 147 }, ++ ++ { "VertexShaderConstantF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 148 }, ++ { "VertexShaderConstantB", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 149 }, ++ { "VertexShaderConstantI", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 150 }, ++ { "VertexShaderConstant", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 151 }, ++ { "VertexShaderConstant1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 152 }, ++ { "VertexShaderConstant2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 153 }, ++ { "VertexShaderConstant3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 154 }, ++ { "VertexShaderConstant4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 155 }, ++ ++ { "PixelShaderConstantF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 156 }, ++ { "PixelShaderConstantB", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 157 }, ++ { "PixelShaderConstantI", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 158 }, ++ { "PixelShaderConstant", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 159 }, ++ { "PixelShaderConstant1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 160 }, ++ { "PixelShaderConstant2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 161 }, ++ { "PixelShaderConstant3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 162 }, ++ { "PixelShaderConstant4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 163 }, ++ ++ { "Texture", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 261, 164 }, ++ { "AddressU", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 165, fx_2_address_values }, ++ { "AddressV", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 166, fx_2_address_values }, ++ { "AddressW", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 167, fx_2_address_values }, ++ { "BorderColor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 168 }, ++ { "MagFilter", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 169, fx_2_filter_values }, ++ { "MinFilter", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 170, fx_2_filter_values }, ++ { "MipFilter", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 171, fx_2_filter_values }, ++ { "MipMapLodBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 172 }, ++ { "MaxMipLevel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 173 }, ++ { "MaxAnisotropy", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 174 }, ++ { "SRGBTexture", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 175 }, ++ { "ElementIndex", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 176 }, + }; + +-static const struct fx_2_state fx_2_sampler_states[] = +-{ +- { "Texture", HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 }, +- { "AddressU", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 165, fx_2_address_values }, +- { "AddressV", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 166, fx_2_address_values }, +- { "AddressW", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 167, fx_2_address_values }, +- { "BorderColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 168 }, +- { "MagFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 169, fx_2_filter_values }, +- { "MinFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 170, fx_2_filter_values }, +- { "MipFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 171, fx_2_filter_values }, +- { "MipMapLodBias", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 172 }, +- { "MaxMipLevel", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 173 }, +- { "MaxAnisotropy", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 174 }, +- { "SRGBTexture", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 175 }, +- { "ElementIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 176 }, ++static const struct fx_state fx_2_sampler_states[] = ++{ ++ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 }, ++ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 165, fx_2_address_values }, ++ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 166, fx_2_address_values }, ++ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 167, fx_2_address_values }, ++ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 168 }, ++ { "MagFilter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 169, fx_2_filter_values }, ++ { "MinFilter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 170, fx_2_filter_values }, ++ { "MipFilter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 171, fx_2_filter_values }, ++ { "MipMapLodBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 172 }, ++ { "MaxMipLevel", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 173 }, ++ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 174 }, ++ { "SRGBTexture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 175 }, ++ { "ElementIndex", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 176 }, + }; + + static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) +@@ -1979,12 +1994,21 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte + put_u32(buffer, value_offset); + } + ++static void write_fx_2_state_assignment(const struct hlsl_ir_var *var, ++ struct hlsl_state_block_entry *entry, struct fx_write_context *fx) ++{ ++ struct hlsl_ctx *ctx = fx->ctx; ++ ++ hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 state assignments is not implemented."); ++} ++ + static const struct fx_write_context_ops fx_2_ops = + { + .write_string = write_fx_2_string, + .write_technique = write_fx_2_technique, + .write_pass = write_fx_2_pass, + .write_annotation = write_fx_2_annotation, ++ .write_state_assignment = write_fx_2_state_assignment, + }; + + static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -2047,12 +2071,16 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + return fx_write_context_cleanup(&fx); + } + ++static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, ++ struct hlsl_state_block_entry *entry, struct fx_write_context *fx); ++ + static const struct fx_write_context_ops fx_4_ops = + { + .write_string = write_fx_4_string, + .write_technique = write_fx_4_technique, + .write_pass = write_fx_4_pass, + .write_annotation = write_fx_4_annotation, ++ .write_state_assignment = write_fx_4_state_assignment, + .are_child_effects_supported = true, + }; + +@@ -2660,18 +2688,7 @@ static const struct rhs_named_value null_values[] = + { NULL } + }; + +-static const struct fx_4_state +-{ +- const char *name; +- enum hlsl_type_class container; +- enum hlsl_type_class class; +- enum state_property_component_type type; +- unsigned int dimx; +- unsigned int array_size; +- int id; +- const struct rhs_named_value *values; +-} +-fx_4_states[] = ++static const struct fx_state fx_4_states[] = + { + { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, + { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, +@@ -2739,7 +2756,7 @@ fx_4_states[] = + { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, + }; + +-static const struct fx_4_state fx_5_blend_states[] = ++static const struct fx_state fx_5_blend_states[] = + { + { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, + { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, +@@ -2752,45 +2769,61 @@ static const struct fx_4_state fx_5_blend_states[] = + { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, + }; + +-struct fx_4_state_table ++struct fx_state_table + { +- const struct fx_4_state *ptr; ++ const struct fx_state *ptr; + unsigned int count; + }; + +-static struct fx_4_state_table fx_4_get_state_table(enum hlsl_type_class type_class, ++static struct fx_state_table fx_get_state_table(enum hlsl_type_class type_class, + unsigned int major, unsigned int minor) + { +- struct fx_4_state_table table; ++ struct fx_state_table table; + +- if (type_class == HLSL_CLASS_BLEND_STATE && (major == 5 || (major == 4 && minor == 1))) ++ if (major == 2) + { +- table.ptr = fx_5_blend_states; +- table.count = ARRAY_SIZE(fx_5_blend_states); ++ if (type_class == HLSL_CLASS_PASS) ++ { ++ table.ptr = fx_2_pass_states; ++ table.count = ARRAY_SIZE(fx_2_pass_states); ++ } ++ else ++ { ++ table.ptr = fx_2_sampler_states; ++ table.count = ARRAY_SIZE(fx_2_sampler_states); ++ } + } + else + { +- table.ptr = fx_4_states; +- table.count = ARRAY_SIZE(fx_4_states); ++ if (type_class == HLSL_CLASS_BLEND_STATE && (major == 5 || (major == 4 && minor == 1))) ++ { ++ table.ptr = fx_5_blend_states; ++ table.count = ARRAY_SIZE(fx_5_blend_states); ++ } ++ else ++ { ++ table.ptr = fx_4_states; ++ table.count = ARRAY_SIZE(fx_4_states); ++ } + } + + return table; + } + +-static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, ++static void resolve_fx_state_block_values(struct hlsl_ir_var *var, + struct hlsl_state_block_entry *entry, struct fx_write_context *fx) + { + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct replace_state_context replace_context; +- const struct fx_4_state *state = NULL; ++ const struct fx_state *state = NULL; + struct hlsl_type *state_type = NULL; + struct hlsl_ctx *ctx = fx->ctx; + enum hlsl_base_type base_type; +- struct fx_4_state_table table; ++ struct fx_state_table table; + struct hlsl_ir_node *node; + unsigned int i; + +- table = fx_4_get_state_table(type->class, ctx->profile->major_version, ctx->profile->minor_version); ++ table = fx_get_state_table(type->class, ctx->profile->major_version, ctx->profile->minor_version); + + for (i = 0; i < table.count; ++i) + { +@@ -3076,21 +3109,34 @@ static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct h + return decompose_fx_4_state_block_expand_array(var, block, entry_index, fx); + } + +-static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, +- uint32_t count_offset, struct fx_write_context *fx) ++static void fx_4_decompose_state_blocks(struct hlsl_ir_var *var, struct fx_write_context *fx) + { +- struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ unsigned int block_count = hlsl_get_multiarray_size(var->data_type); + struct hlsl_state_block *block; +- uint32_t i, count = 0; + +- if (var->state_blocks) ++ if (!var->state_blocks) ++ return; ++ ++ for (unsigned int i = 0; i < block_count; ++i) + { +- block = var->state_blocks[block_index]; ++ block = var->state_blocks[i]; + +- for (i = 0; i < block->count;) ++ for (unsigned int j = 0; j < block->count;) + { +- i += decompose_fx_4_state_block(var, block, i, fx); ++ j += decompose_fx_4_state_block(var, block, j, fx); + } ++ } ++} ++ ++static uint32_t write_state_block(struct hlsl_ir_var *var, unsigned int block_index, ++ struct fx_write_context *fx) ++{ ++ struct hlsl_state_block *block; ++ uint32_t i, count = 0; ++ ++ if (var->state_blocks) ++ { ++ block = var->state_blocks[block_index]; + + for (i = 0; i < block->count; ++i) + { +@@ -3101,27 +3147,29 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i + continue; + + /* Resolve special constant names and property names. */ +- resolve_fx_4_state_block_values(var, entry, fx); ++ resolve_fx_state_block_values(var, entry, fx); + +- write_fx_4_state_assignment(var, entry, fx); ++ write_state_assignment(var, entry, fx); + ++count; + } + } + +- set_u32(buffer, count_offset, count); ++ return count; + } + + static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) + { + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; + struct vkd3d_bytecode_buffer *buffer = &fx->structured; +- uint32_t count_offset; ++ uint32_t count_offset, count; ++ ++ fx_4_decompose_state_blocks(var, fx); + + for (i = 0; i < elements_count; ++i) + { + count_offset = put_u32(buffer, 0); +- +- write_fx_4_state_block(var, i, count_offset, fx); ++ count = write_state_block(var, i, fx); ++ set_u32(buffer, count_offset, count); + } + } + +@@ -3977,17 +4025,13 @@ static void fx_parse_fx_2_annotations(struct fx_parser *parser, uint32_t count) + vkd3d_string_buffer_printf(&parser->buffer, ">"); + } + +-static const struct fx_2_state *fx_2_get_state_by_id(enum hlsl_type_class container, uint32_t id) ++static const struct fx_state *fx_2_get_state_by_id(enum hlsl_type_class container, uint32_t id) + { +- const struct fx_2_state *table; +- unsigned int count; +- +- count = container == HLSL_CLASS_PASS ? ARRAY_SIZE(fx_2_pass_states) : ARRAY_SIZE(fx_2_sampler_states); +- table = container == HLSL_CLASS_PASS ? fx_2_pass_states : fx_2_sampler_states; ++ struct fx_state_table table = fx_get_state_table(container, 2, 0); + + /* State identifiers are sequential, no gaps */ +- if (id >= table[0].id && id <= table[count - 1].id) +- return &table[id - table[0].id]; ++ if (id >= table.ptr[0].id && id <= table.ptr[table.count - 1].id) ++ return &table.ptr[id - table.ptr[0].id]; + + return NULL; + } +@@ -3996,7 +4040,7 @@ static void fx_parse_fx_2_assignment(struct fx_parser *parser, enum hlsl_type_cl + const struct fx_assignment *entry) + { + const struct rhs_named_value *named_value = NULL; +- const struct fx_2_state *state; ++ const struct fx_state *state; + + if ((state = fx_2_get_state_by_id(container, entry->id))) + { +@@ -4700,7 +4744,7 @@ static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) + + static int fx_4_state_id_compare(const void *a, const void *b) + { +- const struct fx_4_state *state = b; ++ const struct fx_state *state = b; + int id = *(int *)a; + + return id - state->id; +@@ -5186,12 +5230,12 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + }; + const struct rhs_named_value *named_value; + struct fx_5_shader shader = { 0 }; +- struct fx_4_state_table table; ++ struct fx_state_table table; + unsigned int shader_type = 0; + uint32_t i, j, comp_count; +- struct fx_4_state *state; ++ struct fx_state *state; + +- table = fx_4_get_state_table(type_class, parser->version.major, parser->version.minor); ++ table = fx_get_state_table(type_class, parser->version.major, parser->version.minor); + + for (i = 0; i < count; ++i) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 2b88a04a120..73cd4da906a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -3531,21 +3531,7 @@ static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_der + + const char *debug_hlsl_writemask(unsigned int writemask) + { +- static const char components[] = {'x', 'y', 'z', 'w'}; +- char string[5]; +- unsigned int i = 0, pos = 0; +- +- VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); +- +- while (writemask) +- { +- if (writemask & 1) +- string[pos++] = components[i]; +- writemask >>= 1; +- i++; +- } +- string[pos] = '\0'; +- return vkd3d_dbg_sprintf(".%s", string); ++ return debug_vsir_writemask(writemask); + } + + const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index bb37f0be6cf..369181cada8 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -59,8 +59,7 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned + + static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) + { +- *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); +- *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); ++ vsir_swizzle_set_component(swizzle, idx, component); + } + + enum hlsl_type_class +@@ -309,6 +308,8 @@ struct hlsl_reg + unsigned int writemask; + /* Whether the register has been allocated. */ + bool allocated; ++ /* Currently only used for numeric registers. */ ++ enum vkd3d_shader_register_type type; + }; + + /* Types of instruction nodes for the IR. +@@ -1187,6 +1188,8 @@ struct hlsl_ctx + } constant_defs; + /* 'c' registers where the constants expected by SM2 sincos are stored. */ + struct hlsl_reg d3dsincosconst1, d3dsincosconst2; ++ /* Number of allocated SSA IDs, used in translation to vsir. */ ++ unsigned int ssa_count; + + /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in + * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index d4e29e16b7c..04bb2d98b26 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -5656,6 +5656,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + unsigned int writemask = hlsl_combine_writemasks(available_writemask, + vkd3d_write_mask_from_component_count(reg_size)); + ++ ret.type = VKD3DSPR_TEMP; + ret.id = reg_idx; + ret.writemask = hlsl_combine_writemasks(writemask, + vkd3d_write_mask_from_component_count(component_count)); +@@ -5666,6 +5667,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + } + } + ++ ret.type = VKD3DSPR_TEMP; + ret.id = allocator->reg_count; + ret.writemask = vkd3d_write_mask_from_component_count(component_count); + record_allocation(ctx, allocator, allocator->reg_count, +@@ -5692,6 +5694,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, + + record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); + ++ ret.type = VKD3DSPR_TEMP; + ret.id = reg_idx; + ret.allocation_size = 1; + ret.writemask = writemask; +@@ -5737,6 +5740,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + record_allocation(ctx, allocator, reg_idx + (reg_size / 4), + (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); + ++ ret.type = VKD3DSPR_TEMP; + ret.id = reg_idx; + ret.allocation_size = align(reg_size, 4) / 4; + ret.allocated = true; +@@ -5757,20 +5761,30 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); + } + +-static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) ++static const char *debug_register(struct hlsl_reg reg, const struct hlsl_type *type) + { + static const char writemask_offset[] = {'w','x','y','z'}; + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; ++ const char *class = "r"; ++ ++ if (reg.type == VKD3DSPR_CONST) ++ class = "c"; ++ else if (reg.type == VKD3DSPR_INPUT) ++ class = "v"; ++ else if (reg.type == VKD3DSPR_OUTPUT) ++ class = "o"; ++ else if (reg.type == VKD3DSPR_SSA) ++ class = "sr"; + + if (reg_size > 4 && !hlsl_type_is_patch_array(type)) + { + if (reg_size & 3) +- return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, reg.id + (reg_size / 4), ++ return vkd3d_dbg_sprintf("%s%u-%s%u.%c", class, reg.id, class, reg.id + (reg_size / 4), + writemask_offset[reg_size & 3]); + +- return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, reg.id + (reg_size / 4) - 1); ++ return vkd3d_dbg_sprintf("%s%u-%s%u", class, reg.id, class, reg.id + (reg_size / 4) - 1); + } +- return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); ++ return vkd3d_dbg_sprintf("%s%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); + } + + static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -5910,11 +5924,12 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct register_allocator *allocator) + { + unsigned int reg_writemask = 0, dst_writemask = 0; ++ bool is_per_component = false; + + if (instr->reg.allocated || !instr->last_read) + return; + +- if (instr->type == HLSL_IR_EXPR) ++ if (instr->type == HLSL_IR_EXPR && ctx->profile->major_version < 4) + { + switch (hlsl_ir_expr(instr)->op) + { +@@ -5928,20 +5943,42 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; + break; + ++ case HLSL_OP1_EXP2: ++ case HLSL_OP1_LOG2: ++ case HLSL_OP1_RCP: ++ case HLSL_OP1_RSQ: ++ /* These ops can only be written one component at a time in sm1, ++ * so it'll take more than one instruction to fill the variable ++ * and thus we can't use an SSA. ++ * FIXME: We should probably handle this by splitting at the vsir ++ * level instead. */ ++ is_per_component = true; ++ break; ++ + default: + break; + } + } + ++ VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); ++ + if (reg_writemask) +- instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, +- instr->last_read, reg_writemask, dst_writemask, 0, false); +- else ++ instr->reg = allocate_register_with_masks(ctx, allocator, ++ instr->index, instr->last_read, reg_writemask, dst_writemask, 0, false); ++ else if (is_per_component) + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, + instr->index, instr->last_read, instr->data_type); ++ else ++ { ++ instr->reg.writemask = vkd3d_write_mask_from_component_count(instr->data_type->e.numeric.dimx); ++ instr->reg.allocation_size = 1; ++ instr->reg.allocated = true; ++ instr->reg.type = VKD3DSPR_SSA; ++ instr->reg.id = ctx->ssa_count++; ++ } + + TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, +- debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); ++ debug_register(instr->reg, instr->data_type), instr->index, instr->last_read); + } + + static void allocate_variable_temp_register(struct hlsl_ctx *ctx, +@@ -5966,8 +6003,8 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, + var->first_write, var->last_read, var->data_type); + +- TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', +- var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); ++ TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, ++ debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); + } + } + } +@@ -6051,6 +6088,7 @@ static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int cou + if ((reg->allocated_mask & writemask) == writemask + && !memcmp(f, ®->value.f[j], count * sizeof(float))) + { ++ ret->type = VKD3DSPR_CONST; + ret->id = reg->index; + ret->allocation_size = 1; + ret->writemask = writemask; +@@ -6144,12 +6182,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + if (find_constant(ctx, f, type->e.numeric.dimx, &constant->reg)) + { + TRACE("Reusing already allocated constant %s for @%u.\n", +- debug_register('c', constant->reg, type), instr->index); ++ debug_register(constant->reg, type), instr->index); + break; + } + + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); +- TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); ++ constant->reg.type = VKD3DSPR_CONST; ++ TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register(constant->reg, type)); + + for (unsigned int x = 0, i = 0; x < 4; ++x) + { +@@ -6246,14 +6285,16 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl + type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + + ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); +- TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); ++ ctx->d3dsincosconst1.type = VKD3DSPR_CONST; ++ TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register(ctx->d3dsincosconst1, type)); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f, &instr->loc); + + ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); +- TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); ++ ctx->d3dsincosconst2.type = VKD3DSPR_CONST; ++ TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register(ctx->d3dsincosconst2, type)); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f, &instr->loc); +@@ -6301,12 +6342,13 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + } + ++ var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; + var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; + var->regs[HLSL_REGSET_NUMERIC].allocation_size = reg_size / 4; + var->regs[HLSL_REGSET_NUMERIC].writemask = VKD3DSP_WRITEMASK_ALL; + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + TRACE("Allocated reserved %s to %s.\n", var->name, +- debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); ++ debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } + } + +@@ -6322,8 +6364,9 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { + var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); ++ var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; + TRACE("Allocated %s to %s.\n", var->name, +- debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); ++ debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } + } + +@@ -6519,9 +6562,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, + reg_size, component_count, mode, var->force_align, vip_allocation); ++ var->regs[HLSL_REGSET_NUMERIC].type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + +- TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', +- var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); ++ TRACE("Allocated %s to %s (mode %d).\n", var->name, ++ debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); + } + } + +@@ -7733,8 +7777,6 @@ static void validate_and_record_stream_outputs(struct hlsl_ctx *ctx) + reported_invalid_index = true; + } + } +- +- /* TODO: check that maxvertexcount * outputdatasize <= 1024. */ + } + + static void validate_max_output_size(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +@@ -8376,7 +8418,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, + } + else + { +- vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ vsir_register_init(&src->reg, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + src->reg.idx[0].offset = instr->reg.id; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); +@@ -8657,7 +8699,7 @@ static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) + { + VKD3D_ASSERT(instr->reg.allocated); +- vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ vsir_dst_param_init(dst, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + dst->reg.idx[0].offset = instr->reg.id; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = instr->reg.writemask; +@@ -8767,13 +8809,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx + return; + + dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ vsir_register_init(&dst_param->reg, instr->reg.type, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + dst_param->write_mask = 1u << i; + + src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ vsir_register_init(&src_param->reg, operand->reg.type, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + c = vsir_swizzle_get_component(src_swizzle, i); +@@ -9372,7 +9414,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + + src_param = &ins->src[0]; + VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); ++ vsir_register_init(&src_param->reg, val->reg.type, vsir_data_type_from_hlsl_instruction(ctx, val), 1); + src_param->reg.idx[0].offset = val->reg.id; + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = swizzle; +@@ -9528,6 +9570,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + return; + } + ++ program->ssa_count = 0; + program->temp_count = allocate_temp_registers(ctx, entry_func); + if (ctx->result) + return; +@@ -9540,6 +9583,8 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + list_move_head(&entry_func->body.instrs, &block.instrs); + + sm1_generate_vsir_block(ctx, &entry_func->body, program); ++ ++ program->ssa_count = ctx->ssa_count; + } + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) +@@ -12323,6 +12368,8 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + } + } + ++ program->ssa_count = 0; ++ + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); +@@ -12336,6 +12383,8 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + + generate_vsir_scan_required_features(ctx, program); + generate_vsir_scan_global_flags(ctx, program, func); ++ ++ program->ssa_count = ctx->ssa_count; + } + + /* For some reason, for matrices, values from default value initializers end +@@ -13523,6 +13572,19 @@ static void process_entry_function(struct hlsl_ctx *ctx, + lower_ir(ctx, lower_matrix_swizzles, body); + lower_ir(ctx, lower_index_loads, body); + ++ if (entry_func->return_var) ++ { ++ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Geometry shaders cannot return values."); ++ else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT ++ && !entry_func->return_var->semantic.name) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, ++ "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); ++ ++ append_output_var_copy(ctx, entry_func, entry_func->return_var); ++ } ++ + for (i = 0; i < entry_func->parameters.count; ++i) + { + var = entry_func->parameters.vars[i]; +@@ -13627,18 +13689,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, + } + } + } ++ + if (entry_func->return_var) + { +- if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) +- hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +- "Geometry shaders cannot return values."); +- else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT +- && !entry_func->return_var->semantic.name) +- hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, +- "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); +- +- append_output_var_copy(ctx, entry_func, entry_func->return_var); +- + if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) + ctx->output_control_point_type = entry_func->return_var->data_type; + } +@@ -13882,6 +13935,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + return ctx->result; + } + ++ vsir_program_trace(&program); ++ + result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context); + vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&ctab); +@@ -13907,6 +13962,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + return ctx->result; + } + ++ vsir_program_trace(&program); ++ + result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context); + vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&rdef); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 1429c3a8778..8489d0b5ecb 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -503,6 +503,53 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, + return false; + } + ++const char *debug_vsir_writemask(unsigned int writemask) ++{ ++ static const char components[] = {'x', 'y', 'z', 'w'}; ++ char string[5]; ++ unsigned int i = 0, pos = 0; ++ ++ VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); ++ ++ while (writemask) ++ { ++ if (writemask & 1) ++ string[pos++] = components[i]; ++ writemask >>= 1; ++ i++; ++ } ++ string[pos] = '\0'; ++ return vkd3d_dbg_sprintf(".%s", string); ++} ++ ++static unsigned int vsir_combine_write_masks(unsigned int first, unsigned int second) ++{ ++ unsigned int ret = 0, j = 0; ++ ++ for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i) ++ { ++ if (first & (1u << i)) ++ { ++ if (second & (1u << j++)) ++ ret |= (1u << i); ++ } ++ } ++ ++ return ret; ++} ++ ++static uint32_t vsir_combine_swizzles(uint32_t first, uint32_t second) ++{ ++ uint32_t ret = 0; ++ ++ for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i) ++ { ++ unsigned int s = vsir_swizzle_get_component(second, i); ++ vsir_swizzle_set_component(&ret, i, vsir_swizzle_get_component(first, s)); ++ } ++ return ret; ++} ++ + void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) + { +@@ -7725,6 +7772,892 @@ static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *pro + return VKD3D_OK; + } + ++ ++/* Distinguishes between instruction sources which are masked, where the used ++ * components of the source are determined by the write mask, and sources which ++ * are not masked, where the used components are pre-defined. ++ * ++ * E.g. "add r0.yz, r1.xyzw, r2.xyzw" uses the .yz components of r1 and r2, and ++ * therefore those sources are considered "masked", but ++ * "dp3 r0.y, r1.xyzw, r2.xyzw" uses the .xyz components. */ ++static bool vsir_src_is_masked(enum vkd3d_shader_opcode opcode, unsigned int src_idx) ++{ ++ switch (opcode) ++ { ++ case VKD3DSIH_ABS: ++ case VKD3DSIH_ACOS: ++ case VKD3DSIH_ADD: ++ case VKD3DSIH_AND: ++ case VKD3DSIH_ASIN: ++ case VKD3DSIH_ATAN: ++ case VKD3DSIH_BFI: ++ case VKD3DSIH_BFREV: ++ case VKD3DSIH_CMP: ++ case VKD3DSIH_CND: ++ case VKD3DSIH_COS: ++ case VKD3DSIH_COUNTBITS: ++ case VKD3DSIH_DADD: /* NB: These are masked, but the mask is double-sized. */ ++ case VKD3DSIH_DDIV: ++ case VKD3DSIH_DFMA: ++ case VKD3DSIH_DIV: ++ case VKD3DSIH_DMAX: ++ case VKD3DSIH_DMIN: ++ case VKD3DSIH_DMOV: ++ case VKD3DSIH_DMOVC: ++ case VKD3DSIH_DMUL: ++ case VKD3DSIH_DRCP: ++ case VKD3DSIH_DSX: ++ case VKD3DSIH_DSX_COARSE: ++ case VKD3DSIH_DSX_FINE: ++ case VKD3DSIH_DSY: ++ case VKD3DSIH_DSY_COARSE: ++ case VKD3DSIH_DSY_FINE: ++ case VKD3DSIH_EQO: ++ case VKD3DSIH_EQU: ++ case VKD3DSIH_EXP: ++ case VKD3DSIH_EXPP: ++ case VKD3DSIH_F16TOF32: ++ case VKD3DSIH_F32TOF16: ++ case VKD3DSIH_FIRSTBIT_HI: ++ case VKD3DSIH_FIRSTBIT_LO: ++ case VKD3DSIH_FIRSTBIT_SHI: ++ case VKD3DSIH_FRC: ++ case VKD3DSIH_FREM: ++ case VKD3DSIH_FTOD: ++ case VKD3DSIH_FTOI: ++ case VKD3DSIH_FTOU: ++ case VKD3DSIH_GEO: ++ case VKD3DSIH_GEU: ++ case VKD3DSIH_HCOS: ++ case VKD3DSIH_HSIN: ++ case VKD3DSIH_HTAN: ++ case VKD3DSIH_IADD: ++ case VKD3DSIH_IBFE: ++ case VKD3DSIH_IDIV: ++ case VKD3DSIH_IEQ: ++ case VKD3DSIH_IGE: ++ case VKD3DSIH_ILT: ++ case VKD3DSIH_IMAD: ++ case VKD3DSIH_IMAX: ++ case VKD3DSIH_IMIN: ++ case VKD3DSIH_IMUL: ++ case VKD3DSIH_INE: ++ case VKD3DSIH_INEG: ++ case VKD3DSIH_ISFINITE: ++ case VKD3DSIH_ISHL: ++ case VKD3DSIH_ISHR: ++ case VKD3DSIH_ISINF: ++ case VKD3DSIH_ISNAN: ++ case VKD3DSIH_ITOD: ++ case VKD3DSIH_ITOF: ++ case VKD3DSIH_ITOI: ++ case VKD3DSIH_LOG: ++ case VKD3DSIH_LOGP: ++ case VKD3DSIH_LRP: ++ case VKD3DSIH_LTO: ++ case VKD3DSIH_LTU: ++ case VKD3DSIH_MAD: ++ case VKD3DSIH_MAX: ++ case VKD3DSIH_MIN: ++ case VKD3DSIH_MOV: ++ case VKD3DSIH_MOVA: ++ case VKD3DSIH_MOVC: ++ case VKD3DSIH_MSAD: /* FIXME: Is this correct? */ ++ case VKD3DSIH_MUL: ++ case VKD3DSIH_NEO: ++ case VKD3DSIH_NEU: ++ case VKD3DSIH_NOT: ++ case VKD3DSIH_OR: ++ case VKD3DSIH_ORD: ++ case VKD3DSIH_PHI: ++ case VKD3DSIH_POW: ++ case VKD3DSIH_QUAD_READ_ACROSS_D: ++ case VKD3DSIH_QUAD_READ_ACROSS_X: ++ case VKD3DSIH_QUAD_READ_ACROSS_Y: ++ case VKD3DSIH_RCP: ++ case VKD3DSIH_ROUND_NE: ++ case VKD3DSIH_ROUND_NI: ++ case VKD3DSIH_ROUND_PI: ++ case VKD3DSIH_ROUND_Z: ++ case VKD3DSIH_RSQ: ++ case VKD3DSIH_SETP: ++ case VKD3DSIH_SGE: ++ case VKD3DSIH_SGN: ++ case VKD3DSIH_SIN: ++ case VKD3DSIH_SINCOS: /* FIXME: Only for sm4. */ ++ case VKD3DSIH_SLT: ++ case VKD3DSIH_SQRT: ++ case VKD3DSIH_SUB: ++ case VKD3DSIH_SWAPC: ++ case VKD3DSIH_TAN: ++ case VKD3DSIH_UBFE: ++ case VKD3DSIH_UDIV: ++ case VKD3DSIH_UGE: ++ case VKD3DSIH_ULT: ++ case VKD3DSIH_UMAX: ++ case VKD3DSIH_UMIN: ++ case VKD3DSIH_UMUL: ++ case VKD3DSIH_UNO: ++ case VKD3DSIH_USHR: ++ case VKD3DSIH_UTOD: ++ case VKD3DSIH_UTOF: ++ case VKD3DSIH_UTOU: ++ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: ++ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: ++ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: ++ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: ++ case VKD3DSIH_WAVE_ALL_TRUE: ++ case VKD3DSIH_WAVE_ANY_TRUE: ++ case VKD3DSIH_WAVE_OP_ADD: ++ case VKD3DSIH_WAVE_OP_IMAX: ++ case VKD3DSIH_WAVE_OP_IMIN: ++ case VKD3DSIH_WAVE_OP_MAX: ++ case VKD3DSIH_WAVE_OP_MIN: ++ case VKD3DSIH_WAVE_OP_MUL: ++ case VKD3DSIH_WAVE_OP_UMAX: ++ case VKD3DSIH_WAVE_OP_UMIN: ++ case VKD3DSIH_WAVE_READ_LANE_FIRST: ++ case VKD3DSIH_XOR: ++ return true; ++ ++ /* Atomics can't have a writemask. */ ++ case VKD3DSIH_ATOMIC_AND: ++ case VKD3DSIH_ATOMIC_CMP_STORE: ++ case VKD3DSIH_ATOMIC_IADD: ++ case VKD3DSIH_ATOMIC_IMAX: ++ case VKD3DSIH_ATOMIC_IMIN: ++ case VKD3DSIH_ATOMIC_OR: ++ case VKD3DSIH_ATOMIC_UMAX: ++ case VKD3DSIH_ATOMIC_UMIN: ++ case VKD3DSIH_ATOMIC_XOR: ++ case VKD3DSIH_BEM: ++ case VKD3DSIH_BRANCH: ++ case VKD3DSIH_BREAK: ++ case VKD3DSIH_BREAKC: ++ case VKD3DSIH_BREAKP: ++ case VKD3DSIH_BUFINFO: ++ case VKD3DSIH_CALL: ++ case VKD3DSIH_CALLNZ: ++ case VKD3DSIH_CASE: ++ case VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED: /* FIXME: Is this correct? */ ++ case VKD3DSIH_CONTINUE: ++ case VKD3DSIH_CONTINUEP: ++ case VKD3DSIH_CRS: ++ case VKD3DSIH_CUT: ++ case VKD3DSIH_CUT_STREAM: ++ case VKD3DSIH_DCL: ++ case VKD3DSIH_DCL_CONSTANT_BUFFER: ++ case VKD3DSIH_DCL_FUNCTION_BODY: ++ case VKD3DSIH_DCL_FUNCTION_TABLE: ++ case VKD3DSIH_DCL_GLOBAL_FLAGS: ++ case VKD3DSIH_DCL_GS_INSTANCES: ++ case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: ++ case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: ++ case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: ++ case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: ++ case VKD3DSIH_DCL_INDEXABLE_TEMP: ++ case VKD3DSIH_DCL_INDEX_RANGE: ++ case VKD3DSIH_DCL_INPUT: ++ case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: ++ case VKD3DSIH_DCL_INPUT_PRIMITIVE: ++ case VKD3DSIH_DCL_INPUT_PS: ++ case VKD3DSIH_DCL_INPUT_PS_SGV: ++ case VKD3DSIH_DCL_INPUT_PS_SIV: ++ case VKD3DSIH_DCL_INPUT_SGV: ++ case VKD3DSIH_DCL_INPUT_SIV: ++ case VKD3DSIH_DCL_INTERFACE: ++ case VKD3DSIH_DCL_OUTPUT: ++ case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: ++ case VKD3DSIH_DCL_OUTPUT_SGV: ++ case VKD3DSIH_DCL_OUTPUT_SIV: ++ case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: ++ case VKD3DSIH_DCL_RESOURCE_RAW: ++ case VKD3DSIH_DCL_RESOURCE_STRUCTURED: ++ case VKD3DSIH_DCL_SAMPLER: ++ case VKD3DSIH_DCL_STREAM: ++ case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: ++ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: ++ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: ++ case VKD3DSIH_DCL_TGSM_RAW: ++ case VKD3DSIH_DCL_TGSM_STRUCTURED: ++ case VKD3DSIH_DCL_THREAD_GROUP: ++ case VKD3DSIH_DCL_UAV_RAW: ++ case VKD3DSIH_DCL_UAV_STRUCTURED: ++ case VKD3DSIH_DCL_UAV_TYPED: ++ case VKD3DSIH_DCL_VERTICES_OUT: ++ case VKD3DSIH_DEF: ++ case VKD3DSIH_DEFAULT: ++ case VKD3DSIH_DEFB: ++ case VKD3DSIH_DEFI: ++ case VKD3DSIH_DEQO: ++ case VKD3DSIH_DGEO: ++ case VKD3DSIH_DISCARD: ++ case VKD3DSIH_DLT: ++ case VKD3DSIH_DNE: ++ case VKD3DSIH_DP2: ++ case VKD3DSIH_DP2ADD: ++ case VKD3DSIH_DP3: ++ case VKD3DSIH_DP4: ++ case VKD3DSIH_DST: ++ case VKD3DSIH_DTOF: ++ case VKD3DSIH_DTOI: ++ case VKD3DSIH_DTOU: ++ case VKD3DSIH_ELSE: ++ case VKD3DSIH_EMIT: ++ case VKD3DSIH_EMIT_STREAM: ++ case VKD3DSIH_ENDIF: ++ case VKD3DSIH_ENDLOOP: ++ case VKD3DSIH_ENDREP: ++ case VKD3DSIH_ENDSWITCH: ++ case VKD3DSIH_FCALL: ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_DECLS: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ case VKD3DSIH_IF: ++ case VKD3DSIH_IFC: ++ /* It's unclear if any mapping is done for the source value. ++ * Does it require replicate swizzle? */ ++ case VKD3DSIH_IMM_ATOMIC_ALLOC: ++ case VKD3DSIH_IMM_ATOMIC_AND: ++ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_CONSUME: ++ case VKD3DSIH_IMM_ATOMIC_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_IADD: ++ case VKD3DSIH_IMM_ATOMIC_IMAX: ++ case VKD3DSIH_IMM_ATOMIC_IMIN: ++ case VKD3DSIH_IMM_ATOMIC_OR: ++ case VKD3DSIH_IMM_ATOMIC_UMAX: ++ case VKD3DSIH_IMM_ATOMIC_UMIN: ++ case VKD3DSIH_IMM_ATOMIC_XOR: ++ case VKD3DSIH_LABEL: ++ case VKD3DSIH_LOOP: ++ case VKD3DSIH_LIT: ++ case VKD3DSIH_M3x2: ++ case VKD3DSIH_M3x3: ++ case VKD3DSIH_M3x4: ++ case VKD3DSIH_M4x3: ++ case VKD3DSIH_M4x4: ++ case VKD3DSIH_NOP: ++ /* NRM writemask must be .xyz or .xyzw. */ ++ case VKD3DSIH_NRM: ++ case VKD3DSIH_PHASE: ++ case VKD3DSIH_REP: ++ case VKD3DSIH_RET: ++ case VKD3DSIH_RETP: ++ /* Store instructions always require a trivial writemask. */ ++ case VKD3DSIH_STORE_RAW: ++ case VKD3DSIH_STORE_STRUCTURED: ++ case VKD3DSIH_STORE_UAV_TYPED: ++ case VKD3DSIH_SWITCH: ++ case VKD3DSIH_SWITCH_MONOLITHIC: ++ case VKD3DSIH_SYNC: ++ case VKD3DSIH_TEX: ++ case VKD3DSIH_TEXBEM: ++ case VKD3DSIH_TEXBEML: ++ case VKD3DSIH_TEXCOORD: ++ case VKD3DSIH_TEXCRD: ++ case VKD3DSIH_TEXDEPTH: ++ case VKD3DSIH_TEXDP3: ++ case VKD3DSIH_TEXDP3TEX: ++ case VKD3DSIH_TEXKILL: ++ case VKD3DSIH_TEXLD: ++ case VKD3DSIH_TEXLDD: ++ case VKD3DSIH_TEXLDL: ++ case VKD3DSIH_TEXM3x2DEPTH: ++ case VKD3DSIH_TEXM3x2PAD: ++ case VKD3DSIH_TEXM3x2TEX: ++ case VKD3DSIH_TEXM3x3: ++ case VKD3DSIH_TEXM3x3DIFF: ++ case VKD3DSIH_TEXM3x3PAD: ++ case VKD3DSIH_TEXM3x3SPEC: ++ case VKD3DSIH_TEXM3x3TEX: ++ case VKD3DSIH_TEXM3x3VSPEC: ++ case VKD3DSIH_TEXREG2AR: ++ case VKD3DSIH_TEXREG2GB: ++ case VKD3DSIH_TEXREG2RGB: ++ case VKD3DSIH_WAVE_ACTIVE_BALLOT: ++ case VKD3DSIH_WAVE_ALL_BIT_COUNT: ++ case VKD3DSIH_WAVE_IS_FIRST_LANE: ++ case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: ++ return false; ++ ++ case VKD3DSIH_QUAD_READ_LANE_AT: ++ case VKD3DSIH_WAVE_READ_LANE_AT: ++ return (src_idx == 0); ++ ++ /* sm4 resource instructions are an odd case, since they're not actually ++ * per-component. However, the "swizzle" placed on the resource allows ++ * arbitrary destination writemasks to be used. ++ * ++ * This means that for the purposes of the "remapping" done by ++ * temp_allocator_set_dst(), we can basically treat those sources as ++ * "mapped", altering them when we reassign the destination writemask. */ ++ ++ /* FIXME: The documentation seems to say that these instructions behave ++ * this way, but is it correct? ++ * (It's silent about EVAL_*, but presumably they behave the same way.) */ ++ case VKD3DSIH_EVAL_CENTROID: ++ case VKD3DSIH_EVAL_SAMPLE_INDEX: ++ case VKD3DSIH_SAMPLE_INFO: ++ case VKD3DSIH_SAMPLE_POS: ++ return (src_idx == 0); ++ case VKD3DSIH_GATHER4: ++ case VKD3DSIH_GATHER4_C: ++ case VKD3DSIH_GATHER4_C_S: ++ case VKD3DSIH_GATHER4_S: ++ case VKD3DSIH_LD: ++ case VKD3DSIH_LD2DMS: ++ case VKD3DSIH_LD2DMS_S: ++ case VKD3DSIH_LD_RAW: ++ case VKD3DSIH_LD_RAW_S: ++ case VKD3DSIH_LD_S: ++ case VKD3DSIH_LD_UAV_TYPED: ++ case VKD3DSIH_LD_UAV_TYPED_S: ++ case VKD3DSIH_LOD: ++ case VKD3DSIH_RESINFO: ++ case VKD3DSIH_SAMPLE: ++ case VKD3DSIH_SAMPLE_B: ++ case VKD3DSIH_SAMPLE_B_CL_S: ++ case VKD3DSIH_SAMPLE_C: ++ case VKD3DSIH_SAMPLE_CL_S: ++ case VKD3DSIH_SAMPLE_C_CL_S: ++ case VKD3DSIH_SAMPLE_C_LZ: ++ case VKD3DSIH_SAMPLE_C_LZ_S: ++ case VKD3DSIH_SAMPLE_GRAD: ++ case VKD3DSIH_SAMPLE_GRAD_CL_S: ++ case VKD3DSIH_SAMPLE_LOD: ++ case VKD3DSIH_SAMPLE_LOD_S: ++ return (src_idx == 1); ++ case VKD3DSIH_GATHER4_PO: ++ case VKD3DSIH_GATHER4_PO_C: ++ case VKD3DSIH_GATHER4_PO_C_S: ++ case VKD3DSIH_GATHER4_PO_S: ++ case VKD3DSIH_LD_STRUCTURED: ++ case VKD3DSIH_LD_STRUCTURED_S: ++ return (src_idx == 2); ++ ++ case VKD3DSIH_INVALID: ++ case VKD3DSIH_COUNT: ++ break; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++struct liveness_tracker ++{ ++ struct liveness_tracker_reg ++ { ++ bool written; ++ bool fixed_mask; ++ uint8_t mask; ++ unsigned int first_write, last_access; ++ } *ssa_regs; ++}; ++ ++static void liveness_track_src(struct liveness_tracker *tracker, ++ struct vkd3d_shader_src_param *src, unsigned int index) ++{ ++ for (unsigned int k = 0; k < src->reg.idx_count; ++k) ++ { ++ if (src->reg.idx[k].rel_addr) ++ liveness_track_src(tracker, src->reg.idx[k].rel_addr, index); ++ } ++ ++ if (src->reg.type == VKD3DSPR_SSA) ++ tracker->ssa_regs[src->reg.idx[0].offset].last_access = index; ++} ++ ++static void liveness_track_dst(struct liveness_tracker *tracker, struct vkd3d_shader_dst_param *dst, ++ unsigned int index, const struct vkd3d_shader_version *version, enum vkd3d_shader_opcode opcode) ++{ ++ struct liveness_tracker_reg *reg; ++ ++ for (unsigned int k = 0; k < dst->reg.idx_count; ++k) ++ { ++ if (dst->reg.idx[k].rel_addr) ++ liveness_track_src(tracker, dst->reg.idx[k].rel_addr, index); ++ } ++ ++ if (dst->reg.type == VKD3DSPR_SSA) ++ reg = &tracker->ssa_regs[dst->reg.idx[0].offset]; ++ else ++ return; ++ ++ if (!reg->written) ++ reg->first_write = index; ++ reg->last_access = index; ++ reg->written = true; ++ reg->mask |= dst->write_mask; ++ ++ switch (opcode) ++ { ++ case VKD3DSIH_BEM: ++ case VKD3DSIH_CRS: ++ case VKD3DSIH_DST: ++ case VKD3DSIH_LIT: ++ case VKD3DSIH_M3x2: ++ case VKD3DSIH_M3x3: ++ case VKD3DSIH_M3x4: ++ case VKD3DSIH_M4x3: ++ case VKD3DSIH_M4x4: ++ case VKD3DSIH_NRM: ++ case VKD3DSIH_TEX: ++ case VKD3DSIH_TEXBEM: ++ case VKD3DSIH_TEXBEML: ++ case VKD3DSIH_TEXCOORD: ++ case VKD3DSIH_TEXCRD: ++ case VKD3DSIH_TEXDEPTH: ++ case VKD3DSIH_TEXDP3: ++ case VKD3DSIH_TEXDP3TEX: ++ case VKD3DSIH_TEXLD: ++ case VKD3DSIH_TEXLDD: ++ case VKD3DSIH_TEXLDL: ++ case VKD3DSIH_TEXM3x2DEPTH: ++ case VKD3DSIH_TEXM3x2PAD: ++ case VKD3DSIH_TEXM3x2TEX: ++ case VKD3DSIH_TEXM3x3: ++ case VKD3DSIH_TEXM3x3DIFF: ++ case VKD3DSIH_TEXM3x3PAD: ++ case VKD3DSIH_TEXM3x3SPEC: ++ case VKD3DSIH_TEXM3x3TEX: ++ case VKD3DSIH_TEXM3x3VSPEC: ++ case VKD3DSIH_TEXREG2AR: ++ case VKD3DSIH_TEXREG2GB: ++ case VKD3DSIH_TEXREG2RGB: ++ /* All of these instructions have fixed destinations—they can ++ * in some cases be masked, but the destination cannot be ++ * reallocated to a different set of components. */ ++ case VKD3DSIH_IDIV: ++ case VKD3DSIH_IMUL: ++ case VKD3DSIH_SWAPC: ++ case VKD3DSIH_UDIV: ++ case VKD3DSIH_UMUL: ++ /* These instructions don't have fixed destinations, but they have ++ * multiple destination and are per-component, meaning that the ++ * destination masks for each component have to match. ++ * This is a bit tricky to pull off, so for now we just force ++ * these to have a fixed mask as well. ++ * This assumes that the destination masks are equal to each other ++ * to begin with! */ ++ reg->fixed_mask = true; ++ break; ++ ++ case VKD3DSIH_SINCOS: ++ /* sm1 has a fixed destination like LIT, NRM. ++ * sm4 is two-component and masked, like IMUL. */ ++ if (version->major < 3) ++ { ++ /* We have the additional constraint here that sincos scratches ++ * whichever components of .xyz it doesn't write. We can achieve ++ * this by simply adding those components to reg->mask. */ ++ reg->mask |= 0x7; ++ } ++ reg->fixed_mask = true; ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static void liveness_tracker_cleanup(struct liveness_tracker *tracker) ++{ ++ vkd3d_free(tracker->ssa_regs); ++} ++ ++static enum vkd3d_result track_liveness(struct vsir_program *program, struct liveness_tracker *tracker) ++{ ++ struct liveness_tracker_reg *regs; ++ unsigned int loop_depth = 0; ++ unsigned int loop_start = 0; ++ ++ memset(tracker, 0, sizeof(*tracker)); ++ ++ if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ tracker->ssa_regs = regs; ++ ++ for (unsigned int i = 0; i < program->instructions.count; ++i) ++ { ++ const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ if (ins->opcode == VKD3DSIH_LOOP || ins->opcode == VKD3DSIH_REP) ++ { ++ if (!loop_depth++) ++ loop_start = i; ++ } ++ else if (ins->opcode == VKD3DSIH_ENDLOOP || ins->opcode == VKD3DSIH_ENDREP) ++ { ++ if (!--loop_depth) ++ { ++ /* Go through the allocator, find anything that was touched ++ * during the loop, and extend its liveness to the whole range ++ * of the loop. ++ * This isn't very sophisticated (e.g. we could try to avoid ++ * this for registers first written inside a loop body and only ++ * ever read inside one), but many of the cases that matter are ++ * affected by other optimizations such as copy propagation ++ * anyway. ++ * ++ * This is overkill for SSA registers. If an SSA register is ++ * written in loop L and last read in L, we don't need to touch ++ * its liveness. If it's last read in an inferior loop of L, we ++ * only need to extend its last-read to the end of L. (And it ++ * should be illegal for an SSA value to be read in a block ++ * containing L.) ++ * We don't try to perform this optimization yet, in the name of ++ * maximal simplicity, and also because this code is intended to ++ * be extended to non-SSA values. */ ++ for (unsigned int j = 0; j < program->ssa_count; ++j) ++ { ++ struct liveness_tracker_reg *reg = &tracker->ssa_regs[j]; ++ ++ if (reg->first_write > loop_start) ++ reg->first_write = loop_start; ++ if (reg->last_access < i) ++ reg->last_access = i; ++ } ++ } ++ } ++ ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ liveness_track_dst(tracker, &ins->dst[j], i, &program->shader_version, ins->opcode); ++ for (unsigned int j = 0; j < ins->src_count; ++j) ++ liveness_track_src(tracker, &ins->src[j], i); ++ } ++ ++ return VKD3D_OK; ++} ++ ++struct temp_allocator ++{ ++ struct vkd3d_shader_message_context *message_context; ++ struct temp_allocator_reg ++ { ++ uint8_t allocated_mask; ++ uint32_t temp_id; ++ } *ssa_regs; ++ size_t allocated_ssa_count; ++ enum vkd3d_result result; ++}; ++ ++static uint8_t get_available_writemask(const struct temp_allocator *allocator, ++ struct liveness_tracker *tracker, unsigned int first_write, unsigned int last_access, uint32_t temp_id) ++{ ++ uint8_t writemask = VKD3DSP_WRITEMASK_ALL; ++ ++ for (size_t i = 0; i < allocator->allocated_ssa_count; ++i) ++ { ++ const struct temp_allocator_reg *reg = &allocator->ssa_regs[i]; ++ const struct liveness_tracker_reg *liveness_reg = &tracker->ssa_regs[i]; ++ ++ /* We do not overlap if first write == last read: ++ * this is the case where we are allocating the result of that ++ * expression, e.g. "add r0, r0, r1". */ ++ ++ if (reg->temp_id == temp_id ++ && first_write < liveness_reg->last_access ++ && last_access > liveness_reg->first_write) ++ writemask &= ~reg->allocated_mask; ++ ++ if (!writemask) ++ return writemask; ++ } ++ ++ return writemask; ++} ++ ++static void temp_allocator_allocate(struct temp_allocator *allocator, struct liveness_tracker *tracker, ++ struct temp_allocator_reg *reg, const struct liveness_tracker_reg *liveness_reg, uint32_t base_id) ++{ ++ if (!liveness_reg->written) ++ return; ++ ++ for (uint32_t id = base_id;; ++id) ++ { ++ uint8_t available_mask = get_available_writemask(allocator, tracker, ++ liveness_reg->first_write, liveness_reg->last_access, id); ++ ++ if (liveness_reg->fixed_mask) ++ { ++ if ((available_mask & liveness_reg->mask) == liveness_reg->mask) ++ { ++ reg->temp_id = id; ++ reg->allocated_mask = liveness_reg->mask; ++ return; ++ } ++ } ++ else ++ { ++ /* For SSA values the mask is always zero-based and contiguous. ++ * We don't correctly handle cases where it's not, currently. */ ++ VKD3D_ASSERT((liveness_reg->mask | (liveness_reg->mask - 1)) == liveness_reg->mask); ++ ++ if (vkd3d_popcount(available_mask) >= vkd3d_popcount(liveness_reg->mask)) ++ { ++ reg->temp_id = id; ++ reg->allocated_mask = vsir_combine_write_masks(available_mask, liveness_reg->mask); ++ return; ++ } ++ } ++ } ++} ++ ++static void temp_allocator_set_src(struct temp_allocator *allocator, struct vkd3d_shader_src_param *src) ++{ ++ struct temp_allocator_reg *reg; ++ ++ for (unsigned int k = 0; k < src->reg.idx_count; ++k) ++ { ++ if (src->reg.idx[k].rel_addr) ++ temp_allocator_set_src(allocator, src->reg.idx[k].rel_addr); ++ } ++ ++ if (src->reg.type == VKD3DSPR_SSA) ++ reg = &allocator->ssa_regs[src->reg.idx[0].offset]; ++ else ++ return; ++ ++ src->reg.type = VKD3DSPR_TEMP; ++ src->reg.idx[0].offset = reg->temp_id; ++ src->swizzle = vsir_combine_swizzles(vsir_swizzle_from_writemask(reg->allocated_mask), src->swizzle); ++} ++ ++static uint32_t vsir_map_swizzle(uint32_t swizzle, unsigned int writemask) ++{ ++ unsigned int src_component = 0; ++ uint32_t ret = 0; ++ ++ /* Leave replicate swizzles alone; some instructions need them. */ ++ if (swizzle == VKD3D_SHADER_SWIZZLE(X, X, X, X) ++ || swizzle == VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y) ++ || swizzle == VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z) ++ || swizzle == VKD3D_SHADER_SWIZZLE(W, W, W, W)) ++ return swizzle; ++ ++ for (unsigned int dst_component = 0; dst_component < VKD3D_VEC4_SIZE; ++dst_component) ++ { ++ if (writemask & (1u << dst_component)) ++ vsir_swizzle_set_component(&ret, dst_component, vsir_swizzle_get_component(swizzle, src_component++)); ++ } ++ return ret; ++} ++ ++static void vsir_remap_immconst(struct vkd3d_shader_src_param *src, unsigned int writemask) ++{ ++ union vsir_immediate_constant prev = src->reg.u; ++ unsigned int src_component = 0; ++ ++ for (unsigned int dst_component = 0; dst_component < VKD3D_VEC4_SIZE; ++dst_component) ++ { ++ if (writemask & (1u << dst_component)) ++ src->reg.u.immconst_u32[dst_component] = prev.immconst_u32[src_component++]; ++ } ++} ++ ++static void vsir_remap_immconst64(struct vkd3d_shader_src_param *src, unsigned int writemask) ++{ ++ if (writemask == (VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3)) ++ src->reg.u.immconst_u64[1] = src->reg.u.immconst_u64[0]; ++} ++ ++static bool vsir_opcode_is_double(enum vkd3d_shader_opcode opcode) ++{ ++ switch (opcode) ++ { ++ case VKD3DSIH_DADD: ++ case VKD3DSIH_DDIV: ++ case VKD3DSIH_DFMA: ++ case VKD3DSIH_DMAX: ++ case VKD3DSIH_DMIN: ++ case VKD3DSIH_DMOV: ++ case VKD3DSIH_DMOVC: ++ case VKD3DSIH_DMUL: ++ case VKD3DSIH_DRCP: ++ case VKD3DSIH_DEQO: ++ case VKD3DSIH_DGEO: ++ case VKD3DSIH_DLT: ++ case VKD3DSIH_DNE: ++ case VKD3DSIH_DTOF: ++ case VKD3DSIH_DTOI: ++ case VKD3DSIH_DTOU: ++ case VKD3DSIH_FTOD: ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static void temp_allocator_set_dst(struct temp_allocator *allocator, ++ struct vkd3d_shader_dst_param *dst, const struct vkd3d_shader_instruction *ins) ++{ ++ struct temp_allocator_reg *reg; ++ ++ for (unsigned int k = 0; k < dst->reg.idx_count; ++k) ++ { ++ if (dst->reg.idx[k].rel_addr) ++ temp_allocator_set_src(allocator, dst->reg.idx[k].rel_addr); ++ } ++ ++ if (dst->reg.type == VKD3DSPR_SSA) ++ reg = &allocator->ssa_regs[dst->reg.idx[0].offset]; ++ else ++ return; ++ ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = reg->temp_id; ++ if (reg->allocated_mask != dst->write_mask) ++ { ++ dst->write_mask = reg->allocated_mask; ++ ++ if (vsir_opcode_is_double(ins->opcode)) ++ { ++ vkd3d_shader_error(allocator->message_context, &ins->location, ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Doubles are not currently handled."); ++ allocator->result = VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ for (unsigned int i = 0; i < ins->src_count; ++i) ++ { ++ struct vkd3d_shader_src_param *src = &ins->src[i]; ++ ++ if (vsir_src_is_masked(ins->opcode, i)) ++ { ++ if (src->reg.type == VKD3DSPR_IMMCONST) ++ vsir_remap_immconst(src, dst->write_mask); ++ else if (src->reg.type == VKD3DSPR_IMMCONST64) ++ vsir_remap_immconst64(src, dst->write_mask); ++ else ++ src->swizzle = vsir_map_swizzle(src->swizzle, dst->write_mask); ++ } ++ } ++ } ++} ++ ++enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct temp_allocator allocator = {0}; ++ struct temp_allocator_reg *regs; ++ struct liveness_tracker tracker; ++ uint32_t temp_count = 0; ++ enum vkd3d_result ret; ++ ++ if (!program->ssa_count) ++ return VKD3D_OK; ++ ++ if ((ret = track_liveness(program, &tracker))) ++ return ret; ++ ++ if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs)))) ++ { ++ liveness_tracker_cleanup(&tracker); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ allocator.message_context = message_context; ++ allocator.ssa_regs = regs; ++ ++ for (unsigned int i = 0; i < program->ssa_count; ++i) ++ { ++ const struct liveness_tracker_reg *liveness_reg = &tracker.ssa_regs[i]; ++ struct temp_allocator_reg *reg = &allocator.ssa_regs[i]; ++ ++ temp_allocator_allocate(&allocator, &tracker, reg, liveness_reg, program->temp_count); ++ TRACE("Allocated r%u%s to sr%u (liveness %u-%u).\n", ++ reg->temp_id, debug_vsir_writemask(reg->allocated_mask), i, ++ liveness_reg->first_write, liveness_reg->last_access); ++ ++allocator.allocated_ssa_count; ++ } ++ ++ for (unsigned int i = 0; i < program->instructions.count; ++i) ++ { ++ const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ /* Make sure we do the srcs first; setting the dst writemask may need ++ * to remap their swizzles. */ ++ for (unsigned int j = 0; j < ins->src_count; ++j) ++ temp_allocator_set_src(&allocator, &ins->src[j]); ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ temp_allocator_set_dst(&allocator, &ins->dst[j], ins); ++ } ++ ++ /* Rewrite dcl_temps to reflect the new temp count. ++ * Note that dcl_temps appears once per phase, and should reflect only the ++ * number of temps needed by that phase. ++ * Therefore we iterate backwards through the shader, finding the maximum ++ * register used by any instruction, update the dcl_temps at the beginning ++ * of each phase, and then reset the temp count back to 0 for the next ++ * phase (if any). */ ++ for (int i = program->instructions.count - 1; i >= 0; --i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ if (ins->opcode == VKD3DSIH_DCL_TEMPS) ++ { ++ ins->declaration.count = temp_count; ++ temp_count = 0; ++ continue; ++ } ++ if (temp_count && program->shader_version.major >= 4 ++ && (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE ++ || ins->opcode == VKD3DSIH_HS_FORK_PHASE ++ || ins->opcode == VKD3DSIH_HS_JOIN_PHASE)) ++ { ++ /* The phase didn't have a dcl_temps instruction, but we added ++ * temps here, so we need to insert one. */ ++ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) ++ { ++ vkd3d_free(regs); ++ liveness_tracker_cleanup(&tracker); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ ins = &program->instructions.elements[i + 1]; ++ vsir_instruction_init(ins, &program->instructions.elements[i].location, VKD3DSIH_DCL_TEMPS); ++ ins->declaration.count = temp_count; ++ temp_count = 0; ++ continue; ++ } ++ ++ /* No need to check sources. If we've produced an unwritten source then ++ * that's a bug somewhere in this pass. */ ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ { ++ if (ins->dst[j].reg.type == VKD3DSPR_TEMP) ++ { ++ temp_count = max(temp_count, ins->dst[j].reg.idx[0].offset + 1); ++ program->temp_count = max(program->temp_count, temp_count); ++ } ++ } ++ } ++ ++ if (temp_count && program->shader_version.major >= 4) ++ { ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, 0, 1)) ++ { ++ vkd3d_free(regs); ++ liveness_tracker_cleanup(&tracker); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ ins = &program->instructions.elements[0]; ++ vsir_instruction_init(ins, &program->instructions.elements[1].location, VKD3DSIH_DCL_TEMPS); ++ ins->declaration.count = temp_count; ++ } ++ ++ program->ssa_count = 0; ++ ++ vkd3d_free(regs); ++ liveness_tracker_cleanup(&tracker); ++ return allocator.result; ++} ++ + struct validation_context + { + struct vkd3d_shader_message_context *message_context; +@@ -8280,10 +9213,6 @@ static void vsir_validate_descriptor_indices(struct validation_context *ctx, + static void vsir_validate_constbuffer_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) + { +- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, +- "Invalid precision %#x for a CONSTBUFFER register.", reg->precision); +- + if (reg->dimension != VSIR_DIMENSION_VEC4) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension %#x for a CONSTBUFFER register.", reg->dimension); +@@ -8795,6 +9724,11 @@ static void vsir_validate_src_param(struct validation_context *ctx, + [VKD3DSPSM_BIASNEG] = {F32_BIT}, + [VKD3DSPSM_SIGN] = {F32_BIT}, + [VKD3DSPSM_SIGNNEG] = {F32_BIT}, ++ [VKD3DSPSM_COMP] = {F32_BIT}, ++ [VKD3DSPSM_X2] = {F32_BIT}, ++ [VKD3DSPSM_X2NEG] = {F32_BIT}, ++ [VKD3DSPSM_DZ] = {F32_BIT}, ++ [VKD3DSPSM_DW] = {F32_BIT}, + }; + vsir_validate_register(ctx, &src->reg); + +@@ -9455,6 +10389,17 @@ static void vsir_validate_elementwise_operation(struct validation_context *ctx, + } + } + ++static void vsir_validate_double_elementwise_operation(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ static const bool types[VKD3D_DATA_COUNT] = ++ { ++ [VKD3D_DATA_DOUBLE] = true, ++ }; ++ ++ vsir_validate_elementwise_operation(ctx, instruction, types); ++} ++ + static void vsir_validate_float_elementwise_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -9479,6 +10424,52 @@ static void vsir_validate_logic_elementwise_operation(struct validation_context + vsir_validate_elementwise_operation(ctx, instruction, types); + } + ++static void vsir_validate_comparison_operation(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction, const bool types[VKD3D_DATA_COUNT]) ++{ ++ enum vkd3d_data_type dst_data_type, src_data_type; ++ unsigned int i; ++ ++ if (instruction->dst_count < 1) ++ return; ++ ++ dst_data_type = instruction->dst[0].reg.data_type; ++ ++ if (dst_data_type != VKD3D_DATA_UINT && dst_data_type != VKD3D_DATA_BOOL) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for result of comparison operation \"%s\" (%#x).", ++ dst_data_type, vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); ++ ++ if (instruction->src_count < 1) ++ return; ++ ++ src_data_type = instruction->src[0].reg.data_type; ++ ++ if (src_data_type >= VKD3D_DATA_COUNT) ++ return; ++ ++ for (i = 1; i < instruction->src_count; ++i) ++ { ++ if (instruction->src[i].reg.data_type != src_data_type) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Data type %#x for operand %u doesn't match the first operands data type %#x " ++ "for comparison operation \"%s\" (%#x).", ++ instruction->src[i].reg.data_type, i, src_data_type, ++ vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); ++ } ++} ++ ++static void vsir_validate_double_comparison_operation(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ static const bool types[VKD3D_DATA_COUNT] = ++ { ++ [VKD3D_DATA_DOUBLE] = true, ++ }; ++ ++ vsir_validate_comparison_operation(ctx, instruction, types); ++} ++ + static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) + { + size_t i; +@@ -10171,6 +11162,16 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ + [VKD3DSIH_ASIN] = {1, 1, vsir_validate_float_elementwise_operation}, + [VKD3DSIH_ATAN] = {1, 1, vsir_validate_float_elementwise_operation}, + [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, ++ [VKD3DSIH_DADD] = {1, 2, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DDIV] = {1, 2, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DFMA] = {1, 3, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DGEO] = {1, 2, vsir_validate_double_comparison_operation}, ++ [VKD3DSIH_DIV] = {1, 2, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_DLT] = {1, 2, vsir_validate_double_comparison_operation}, ++ [VKD3DSIH_DMAX] = {1, 2, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DMIN] = {1, 2, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DMOV] = {1, 1, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DMUL] = {1, 2, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_FORK_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index 08519787b0a..ac9f7412d56 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -55,8 +55,14 @@ struct msl_generator + + struct msl_resource_type_info + { +- size_t read_coord_size; ++ /* The number of coordinates needed to address/sample the resource type. */ ++ size_t coord_size; ++ /* Whether the resource type is an array type. */ + bool array; ++ /* Whether the resource type has a shadow/comparison variant. */ ++ bool comparison; ++ /* The type suffix for the resource type. I.e., the "2d_ms" part of ++ * "texture2d_ms_array" or "depth2d_ms_array". */ + const char *type_suffix; + }; + +@@ -78,17 +84,17 @@ static const struct msl_resource_type_info *msl_get_resource_type_info(enum vkd3 + { + static const struct msl_resource_type_info info[] = + { +- [VKD3D_SHADER_RESOURCE_NONE] = {0, 0, "none"}, +- [VKD3D_SHADER_RESOURCE_BUFFER] = {1, 0, "_buffer"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_1D] = {1, 0, "1d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2D] = {2, 0, "2d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS] = {2, 0, "2d_ms"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_3D] = {3, 0, "3d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE] = {2, 0, "cube"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY] = {1, 1, "1d_array"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY] = {2, 1, "2d_array"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, "2d_ms_array"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {2, 1, "cube_array"}, ++ [VKD3D_SHADER_RESOURCE_NONE] = {0, 0, 0, "none"}, ++ [VKD3D_SHADER_RESOURCE_BUFFER] = {1, 0, 0, "_buffer"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1D] = {1, 0, 0, "1d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2D] = {2, 0, 1, "2d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS] = {2, 0, 1, "2d_ms"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_3D] = {3, 0, 0, "3d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE] = {3, 0, 1, "cube"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY] = {1, 1, 0, "1d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY] = {2, 1, 1, "2d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, 1, "2d_ms"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {3, 1, 1, "cube"}, + }; + + if (!t || t >= ARRAY_SIZE(info)) +@@ -228,6 +234,35 @@ static const struct vkd3d_shader_descriptor_binding *msl_get_cbv_binding(const s + return NULL; + } + ++static const struct vkd3d_shader_descriptor_binding *msl_get_sampler_binding(const struct msl_generator *gen, ++ unsigned int register_space, unsigned int register_idx) ++{ ++ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; ++ const struct vkd3d_shader_resource_binding *binding; ++ unsigned int i; ++ ++ if (!interface_info) ++ return NULL; ++ ++ for (i = 0; i < interface_info->binding_count; ++i) ++ { ++ binding = &interface_info->bindings[i]; ++ ++ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER) ++ continue; ++ if (binding->register_space != register_space) ++ continue; ++ if (binding->register_index != register_idx) ++ continue; ++ if (!msl_check_shader_visibility(gen, binding->shader_visibility)) ++ continue; ++ ++ return &binding->binding; ++ } ++ ++ return NULL; ++} ++ + static const struct vkd3d_shader_descriptor_binding *msl_get_srv_binding(const struct msl_generator *gen, + unsigned int register_space, unsigned int register_idx, enum vkd3d_shader_resource_type resource_type) + { +@@ -267,11 +302,17 @@ static void msl_print_cbv_name(struct vkd3d_string_buffer *buffer, unsigned int + vkd3d_string_buffer_printf(buffer, "descriptors[%u].buf()", binding); + } + ++static void msl_print_sampler_name(struct vkd3d_string_buffer *buffer, unsigned int binding) ++{ ++ vkd3d_string_buffer_printf(buffer, "descriptors[%u].as()", binding); ++} ++ + static void msl_print_srv_name(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, unsigned int binding, +- const struct msl_resource_type_info *resource_type_info, enum vkd3d_data_type resource_data_type) ++ const struct msl_resource_type_info *resource_type_info, enum vkd3d_data_type resource_data_type, bool compare) + { +- vkd3d_string_buffer_printf(buffer, "descriptors[%u].textype_suffix); ++ vkd3d_string_buffer_printf(buffer, "descriptors[%u].as<%s%s%s<", ++ binding, compare ? "depth" : "texture", resource_type_info->type_suffix, ++ resource_type_info->array ? "_array" : ""); + msl_print_resource_datatype(gen, buffer, resource_data_type); + vkd3d_string_buffer_printf(buffer, ">>()"); + } +@@ -877,7 +918,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct + "Internal compiler error: Unhandled resource type %#x.", resource_type); + resource_type_info = msl_get_resource_type_info(VKD3D_SHADER_RESOURCE_TEXTURE_2D); + } +- coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->read_coord_size); ++ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); + + if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type))) + { +@@ -895,7 +936,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct + read = vkd3d_string_buffer_get(&gen->string_buffers); + + vkd3d_string_buffer_printf(read, "as_type("); +- msl_print_srv_name(read, gen, srv_binding, resource_type_info, data_type); ++ msl_print_srv_name(read, gen, srv_binding, resource_type_info, data_type, false); + vkd3d_string_buffer_printf(read, ".read("); + msl_print_src_with_type(read, gen, &ins->src[0], coord_mask, VKD3D_DATA_UINT); + if (resource_type_info->array) +@@ -920,6 +961,181 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct + msl_dst_cleanup(&dst, &gen->string_buffers); + } + ++static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ const struct msl_resource_type_info *resource_type_info; ++ unsigned int resource_id, resource_idx, resource_space; ++ bool bias, compare, comparison_sampler, grad, lod_zero; ++ const struct vkd3d_shader_descriptor_binding *binding; ++ unsigned int sampler_id, sampler_idx, sampler_space; ++ const struct vkd3d_shader_descriptor_info1 *d; ++ enum vkd3d_shader_resource_type resource_type; ++ unsigned int srv_binding, sampler_binding; ++ struct vkd3d_string_buffer *sample; ++ enum vkd3d_data_type data_type; ++ uint32_t coord_mask; ++ struct msl_dst dst; ++ ++ bias = ins->opcode == VKD3DSIH_SAMPLE_B; ++ compare = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; ++ lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ ++ if (vkd3d_shader_instruction_has_texel_offset(ins)) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled texel sample offset."); ++ ++ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr ++ || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, ++ "Descriptor indexing is not supported."); ++ ++ resource_id = ins->src[1].reg.idx[0].offset; ++ resource_idx = ins->src[1].reg.idx[1].offset; ++ if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) ++ { ++ resource_space = d->register_space; ++ resource_type = d->resource_type; ++ data_type = d->resource_data_type; ++ } ++ else ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Undeclared resource descriptor %u.", resource_id); ++ resource_space = 0; ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ data_type = VKD3D_DATA_FLOAT; ++ } ++ ++ if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER ++ || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ++ || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, ++ "Sampling resource type %#x is not supported.", resource_type); ++ ++ if ((resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1D || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY) ++ && (bias || grad || lod_zero)) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, ++ "Resource type %#x does not support mipmapping.", resource_type); ++ ++ if (!(resource_type_info = msl_get_resource_type_info(resource_type))) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled resource type %#x.", resource_type); ++ resource_type_info = msl_get_resource_type_info(VKD3D_SHADER_RESOURCE_TEXTURE_2D); ++ } ++ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); ++ ++ if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type))) ++ { ++ srv_binding = binding->binding; ++ } ++ else ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, ++ "No descriptor binding specified for SRV %u (index %u, space %u).", ++ resource_id, resource_idx, resource_space); ++ srv_binding = 0; ++ } ++ ++ sampler_id = ins->src[2].reg.idx[0].offset; ++ sampler_idx = ins->src[2].reg.idx[1].offset; ++ if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) ++ { ++ sampler_space = d->register_space; ++ comparison_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; ++ ++ if (compare) ++ { ++ if (!comparison_sampler) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); ++ } ++ else ++ { ++ if (comparison_sampler) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); ++ } ++ } ++ else ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Undeclared sampler descriptor %u.", sampler_id); ++ sampler_space = 0; ++ } ++ ++ if ((binding = msl_get_sampler_binding(gen, sampler_space, sampler_idx))) ++ { ++ sampler_binding = binding->binding; ++ } ++ else ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, ++ "No descriptor binding specified for sampler %u (index %u, space %u).", ++ sampler_id, sampler_idx, sampler_space); ++ sampler_binding = 0; ++ } ++ ++ msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ sample = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT) ++ vkd3d_string_buffer_printf(sample, "as_type("); ++ msl_print_srv_name(sample, gen, srv_binding, resource_type_info, data_type, compare); ++ if (compare) ++ vkd3d_string_buffer_printf(sample, ".sample_compare("); ++ else ++ vkd3d_string_buffer_printf(sample, ".sample("); ++ msl_print_sampler_name(sample, sampler_binding); ++ vkd3d_string_buffer_printf(sample, ", "); ++ msl_print_src_with_type(sample, gen, &ins->src[0], coord_mask, ins->src[0].reg.data_type); ++ if (resource_type_info->array) ++ { ++ vkd3d_string_buffer_printf(sample, ", uint("); ++ msl_print_src_with_type(sample, gen, &ins->src[0], coord_mask + 1, ins->src[0].reg.data_type); ++ vkd3d_string_buffer_printf(sample, ")"); ++ } ++ if (compare) ++ { ++ if (!resource_type_info->comparison) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, ++ "Comparison samplers are not supported with resource type %#x.", resource_type); ++ vkd3d_string_buffer_printf(sample, ", "); ++ msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ } ++ if (grad) ++ { ++ vkd3d_string_buffer_printf(sample, ", gradient%s(", resource_type_info->type_suffix); ++ msl_print_src_with_type(sample, gen, &ins->src[3], coord_mask, ins->src[3].reg.data_type); ++ vkd3d_string_buffer_printf(sample, ", "); ++ msl_print_src_with_type(sample, gen, &ins->src[4], coord_mask, ins->src[4].reg.data_type); ++ vkd3d_string_buffer_printf(sample, ")"); ++ } ++ if (lod_zero) ++ { ++ vkd3d_string_buffer_printf(sample, ", level(0.0f)"); ++ } ++ if (bias) ++ { ++ vkd3d_string_buffer_printf(sample, ", bias("); ++ msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ vkd3d_string_buffer_printf(sample, ")"); ++ } ++ vkd3d_string_buffer_printf(sample, ")"); ++ if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT) ++ vkd3d_string_buffer_printf(sample, ")"); ++ if (!compare) ++ msl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); ++ ++ msl_print_assignment(gen, &dst, "%s", sample->buffer); ++ ++ vkd3d_string_buffer_release(&gen->string_buffers, sample); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ + static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) + { + struct msl_src src; +@@ -1086,6 +1302,13 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d + case VKD3DSIH_FTOU: + msl_cast(gen, ins, "uint"); + break; ++ case VKD3DSIH_SAMPLE: ++ case VKD3DSIH_SAMPLE_B: ++ case VKD3DSIH_SAMPLE_C: ++ case VKD3DSIH_SAMPLE_C_LZ: ++ case VKD3DSIH_SAMPLE_GRAD: ++ msl_sample(gen, ins); ++ break; + case VKD3DSIH_GEO: + case VKD3DSIH_IGE: + msl_relop(gen, ins, ">="); +@@ -1631,7 +1854,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader + " const device void *ptr;\n" + "\n" + " template\n" +- " constant T &tex() constant\n" ++ " constant T &as() constant\n" + " {\n" + " return reinterpret_cast(this->ptr);\n" + " }\n" +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 0413cd7c344..2bf6f5d9363 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -3522,8 +3522,12 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind + goto done; + } + +- resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER +- ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; ++ if (resource_type == VKD3D_SHADER_RESOURCE_NONE) ++ resource_type_flag = 0; ++ else if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ++ resource_type_flag = VKD3D_SHADER_BINDING_FLAG_BUFFER; ++ else ++ resource_type_flag = VKD3D_SHADER_BINDING_FLAG_IMAGE; + + if (is_uav_counter) + { +@@ -3567,7 +3571,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind + { + const struct vkd3d_shader_resource_binding *current = &shader_interface->bindings[i]; + +- if (!(current->flags & resource_type_flag)) ++ if ((current->flags & resource_type_flag) != resource_type_flag) + continue; + + if (!spirv_compiler_check_shader_visibility(compiler, current->shader_visibility)) +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index ae3fa1650bf..01af2f6ebbd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -4463,6 +4463,9 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struc + size_t i; + int ret; + ++ if ((ret = vsir_allocate_temp_registers(program, message_context))) ++ return ret; ++ + tpf.program = program; + tpf.buffer = NULL; + tpf.stat = &stat; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index cefd9f753a1..07e4b913e6f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1503,6 +1503,8 @@ struct vsir_program + size_t block_name_count; + }; + ++enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context); + void vsir_program_cleanup(struct vsir_program *program); + int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, +@@ -1850,6 +1852,8 @@ static inline uint32_t vsir_write_mask_32_from_64(uint32_t write_mask64) + } + } + ++const char *debug_vsir_writemask(unsigned int writemask); ++ + static inline uint32_t vsir_swizzle_64_from_32(uint32_t swizzle32) + { + switch (swizzle32) +@@ -1899,6 +1903,12 @@ static inline unsigned int vsir_swizzle_get_component(uint32_t swizzle, unsigned + return (swizzle >> VKD3D_SHADER_SWIZZLE_SHIFT(idx)) & VKD3D_SHADER_SWIZZLE_MASK; + } + ++static inline void vsir_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) ++{ ++ *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); ++ *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); ++} ++ + static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t write_mask) + { + unsigned int i, compacted_swizzle = 0; +-- +2.47.2 + diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-5452e79a19c0b895d0c2ac22d3da5595a57.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-5452e79a19c0b895d0c2ac22d3da5595a57.patch new file mode 100644 index 00000000..f099beb0 --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-5452e79a19c0b895d0c2ac22d3da5595a57.patch @@ -0,0 +1,699 @@ +From 27e2b6a5bbd3ab7dde5c4568d11545d28b108996 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 25 Jun 2025 07:27:01 +1000 +Subject: [PATCH] Updated vkd3d to 5452e79a19c0b895d0c2ac22d3da5595a57425eb. + +--- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 5 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 6 +- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 35 +--- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 8 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 15 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 68 +++++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 153 +++++++++++------- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 30 +--- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 2 + + 9 files changed, 187 insertions(+), 135 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index af5552635cb..21feb75d639 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -590,9 +590,10 @@ static void shader_print_double_literal(struct vkd3d_d3d_asm_compiler *compiler, + static void shader_print_int_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, int i, const char *suffix) + { ++ /* Note that we need to handle INT_MIN here as well. */ + if (i < 0) +- vkd3d_string_buffer_printf(&compiler->buffer, "%s-%s%d%s%s", +- prefix, compiler->colours.literal, -i, compiler->colours.reset, suffix); ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s-%s%u%s%s", ++ prefix, compiler->colours.literal, -(unsigned int)i, compiler->colours.reset, suffix); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%d%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 71fa81ec163..194c51a6ffd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -4516,7 +4516,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty + is_valid = is_int && !is_bool; + break; + case BINOP_MUL: +- op = is_int ? VKD3DSIH_UMUL : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL); ++ op = is_int ? VKD3DSIH_IMUL_LOW : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL); + is_valid = !is_bool; + break; + case BINOP_OR: +@@ -4603,7 +4603,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco + silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); + break; + case VKD3DSIH_IADD: +- case VKD3DSIH_UMUL: ++ case VKD3DSIH_IMUL_LOW: + case VKD3DSIH_ISHL: + silence_warning = !(flags & ~(OB_NO_UNSIGNED_WRAP | OB_NO_SIGNED_WRAP)); + break; +@@ -4637,7 +4637,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco + + dst->type = a->type; + +- if (handler_idx == VKD3DSIH_UMUL || handler_idx == VKD3DSIH_UDIV || handler_idx == VKD3DSIH_IDIV) ++ if (handler_idx == VKD3DSIH_UDIV || handler_idx == VKD3DSIH_IDIV) + { + struct vkd3d_shader_dst_param *dst_params = instruction_dst_params_alloc(ins, 2, sm6); + unsigned int index = code != BINOP_UDIV && code != BINOP_SDIV; +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 214632c00eb..40865d842f1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -1250,37 +1250,6 @@ static void shader_glsl_movc(struct vkd3d_glsl_generator *gen, const struct vkd3 + glsl_dst_cleanup(&dst, &gen->string_buffers); + } + +-static void shader_glsl_mul_extended(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +-{ +- struct glsl_src src[2]; +- struct glsl_dst dst; +- uint32_t mask; +- +- if (ins->dst[0].reg.type != VKD3DSPR_NULL) +- { +- /* FIXME: imulExtended()/umulExtended() from ARB_gpu_shader5/GLSL 4.00+. */ +- mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); +- shader_glsl_print_assignment(gen, &dst, ""); +- glsl_dst_cleanup(&dst, &gen->string_buffers); +- +- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled 64-bit integer multiplication."); +- } +- +- if (ins->dst[1].reg.type != VKD3DSPR_NULL) +- { +- mask = glsl_dst_init(&dst, gen, ins, &ins->dst[1]); +- glsl_src_init(&src[0], gen, &ins->src[0], mask); +- glsl_src_init(&src[1], gen, &ins->src[1], mask); +- +- shader_glsl_print_assignment(gen, &dst, "%s * %s", src[0].str->buffer, src[1].str->buffer); +- +- glsl_src_cleanup(&src[1], &gen->string_buffers); +- glsl_src_cleanup(&src[0], &gen->string_buffers); +- glsl_dst_cleanup(&dst, &gen->string_buffers); +- } +-} +- + static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_sysval_semantic sysval, unsigned int idx) + { +@@ -1584,8 +1553,8 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + case VKD3DSIH_UMIN: + shader_glsl_intrinsic(gen, ins, "min"); + break; +- case VKD3DSIH_IMUL: +- shader_glsl_mul_extended(gen, ins); ++ case VKD3DSIH_IMUL_LOW: ++ shader_glsl_binop(gen, ins, "*"); + break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 73cd4da906a..678ed324919 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -3831,11 +3831,11 @@ static void dump_ir_string(struct vkd3d_string_buffer *buffer, const struct hlsl + vkd3d_string_buffer_printf(buffer, "\"%s\"", debugstr_a(string->string)); + } + +-static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) ++static void dump_ir_store(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) + { + vkd3d_string_buffer_printf(buffer, "= ("); + dump_deref(buffer, &store->lhs); +- if (store->writemask != VKD3DSP_WRITEMASK_ALL) ++ if (store->writemask != VKD3DSP_WRITEMASK_ALL && type_is_single_reg(hlsl_deref_get_type(ctx, &store->lhs))) + vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_writemask(store->writemask)); + vkd3d_string_buffer_printf(buffer, " "); + dump_src(buffer, &store->rhs); +@@ -4033,7 +4033,7 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + break; + + case HLSL_IR_STORE: +- dump_ir_store(buffer, hlsl_ir_store(instr)); ++ dump_ir_store(ctx, buffer, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWITCH: +@@ -4124,7 +4124,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) + const struct hlsl_type *old_type = old->data_type, *new_type = new->data_type; + struct hlsl_src *src, *next; + +- if (hlsl_is_numeric_type(old_type)) ++ if (old_type && hlsl_is_numeric_type(old_type)) + { + VKD3D_ASSERT(hlsl_is_numeric_type(new_type)); + VKD3D_ASSERT(old_type->e.numeric.dimx == new_type->e.numeric.dimx); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index fa3688fad18..40353abd81b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -2559,6 +2559,13 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); + ++ if ((modifiers & HLSL_STORAGE_GROUPSHARED) && ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE) ++ { ++ modifiers &= ~HLSL_STORAGE_GROUPSHARED; ++ hlsl_warning(ctx, &var->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_MODIFIER, ++ "Ignoring the 'groupshared' modifier in a non-compute shader."); ++ } ++ + if (modifiers & HLSL_STORAGE_GROUPSHARED) + hlsl_fixme(ctx, &var->loc, "Group shared variables."); + +@@ -5144,10 +5151,10 @@ static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, + + static void validate_group_barrier_profile(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) + { +- if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE || hlsl_version_lt(ctx, 5, 0)) ++ if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +- "Group barriers can only be used in compute shaders 5.0 or higher."); ++ "Group barriers can only be used in compute shaders."); + } + } + +@@ -5171,10 +5178,10 @@ static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + if ((ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) +- || hlsl_version_lt(ctx, 5, 0)) ++ || hlsl_version_lt(ctx, 4, 0)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +- "DeviceMemoryBarrier() can only be used in pixel and compute shaders 5.0 or higher."); ++ "DeviceMemoryBarrier() can only be used in compute and pixel shaders 4.0 or higher."); + } + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV, loc); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 8489d0b5ecb..c26077e43d9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -208,6 +208,7 @@ const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) + [VKD3DSIH_IMM_ATOMIC_UMIN ] = "imm_atomic_umin", + [VKD3DSIH_IMM_ATOMIC_XOR ] = "imm_atomic_xor", + [VKD3DSIH_IMUL ] = "imul", ++ [VKD3DSIH_IMUL_LOW ] = "imul_low", + [VKD3DSIH_INE ] = "ine", + [VKD3DSIH_INEG ] = "ineg", + [VKD3DSIH_ISFINITE ] = "isfinite", +@@ -1145,6 +1146,24 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + return VKD3D_OK; + } + ++static enum vkd3d_result vsir_program_lower_imul(struct vsir_program *program, ++ struct vkd3d_shader_instruction *imul, struct vsir_transformation_context *ctx) ++{ ++ if (imul->dst[0].reg.type != VKD3DSPR_NULL) ++ { ++ vkd3d_shader_error(ctx->message_context, &imul->location, ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Extended multiplication is not implemented."); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ imul->dst[0] = imul->dst[1]; ++ imul->dst_count = 1; ++ imul->opcode = VKD3DSIH_IMUL_LOW; ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *program, + struct vkd3d_shader_instruction *sincos) + { +@@ -1525,6 +1544,12 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + vkd3d_shader_instruction_make_nop(ins); + break; + ++ case VKD3DSIH_IMUL: ++ case VKD3DSIH_UMUL: ++ if ((ret = vsir_program_lower_imul(program, ins, ctx)) < 0) ++ return ret; ++ break; ++ + case VKD3DSIH_SINCOS: + if (ins->dst_count == 1) + { +@@ -7841,6 +7866,7 @@ static bool vsir_src_is_masked(enum vkd3d_shader_opcode opcode, unsigned int src + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: + case VKD3DSIH_IMUL: ++ case VKD3DSIH_IMUL_LOW: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: + case VKD3DSIH_ISFINITE: +@@ -9610,15 +9636,37 @@ static void vsir_validate_dst_param(struct validation_context *ctx, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", + dst->modifiers); + ++ if (dst->modifiers & VKD3DSPDM_SATURATE) ++ { ++ switch (dst->reg.data_type) ++ { ++ case VKD3D_DATA_FLOAT: ++ case VKD3D_DATA_DOUBLE: ++ case VKD3D_DATA_HALF: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for destination with saturate modifier.", dst->reg.data_type); ++ break; ++ ++ } ++ } ++ + switch (dst->shift) + { + case 0: ++ break; ++ + case 1: + case 2: + case 3: + case 13: + case 14: + case 15: ++ if (dst->reg.data_type != VKD3D_DATA_FLOAT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for destination with shift.", dst->reg.data_type); + break; + + default: +@@ -9717,7 +9765,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, + { + uint32_t data_type_mask; + } +- src_modifier_data[] = ++ src_modifier_data[VKD3DSPSM_COUNT] = + { + [VKD3DSPSM_NEG] = {F64_BIT | F32_BIT | F16_BIT | I32_BIT | U64_BIT | U32_BIT | U16_BIT}, + [VKD3DSPSM_BIAS] = {F32_BIT}, +@@ -9729,7 +9777,15 @@ static void vsir_validate_src_param(struct validation_context *ctx, + [VKD3DSPSM_X2NEG] = {F32_BIT}, + [VKD3DSPSM_DZ] = {F32_BIT}, + [VKD3DSPSM_DW] = {F32_BIT}, ++ [VKD3DSPSM_ABS] = {F64_BIT | F32_BIT | F16_BIT}, ++ [VKD3DSPSM_ABSNEG] = {F64_BIT | F32_BIT | F16_BIT}, ++ /* This doesn't make a lot of sense. NOT is used only by D3DBC, and ++ * apparently only for IF instructions reading from a CONSTBOOL register. ++ * However, currently the D3DBC parser generates those registers of ++ * type float, so for the moment let's allow that. */ ++ [VKD3DSPSM_NOT] = {F32_BIT}, + }; ++ + vsir_validate_register(ctx, &src->reg); + + if (src->swizzle & ~0x03030303u) +@@ -9744,7 +9800,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", + src->modifiers); + +- if (src->modifiers < ARRAY_SIZE(src_modifier_data) && src_modifier_data[src->modifiers].data_type_mask) ++ if (src->modifiers != VKD3DSPSM_NONE && src->modifiers < ARRAY_SIZE(src_modifier_data)) + { + if (!(src_modifier_data[src->modifiers].data_type_mask & (1u << src->reg.data_type))) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, +@@ -11172,6 +11228,14 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ + [VKD3DSIH_DMIN] = {1, 2, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DMOV] = {1, 1, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DMUL] = {1, 2, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DNE] = {1, 2, vsir_validate_double_comparison_operation}, ++ [VKD3DSIH_DRCP] = {1, 1, vsir_validate_double_elementwise_operation}, ++ [VKD3DSIH_DSX] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_DSX_COARSE] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_DSX_FINE] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_DSY] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_DSY_COARSE] = {1, 1, vsir_validate_float_elementwise_operation}, ++ [VKD3DSIH_DSY_FINE] = {1, 1, vsir_validate_float_elementwise_operation}, + [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_FORK_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index ac9f7412d56..87c9e989ea4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -61,6 +61,8 @@ struct msl_resource_type_info + bool array; + /* Whether the resource type has a shadow/comparison variant. */ + bool comparison; ++ /* Whether the resource type supports texel sample offsets. */ ++ bool offset; + /* The type suffix for the resource type. I.e., the "2d_ms" part of + * "texture2d_ms_array" or "depth2d_ms_array". */ + const char *type_suffix; +@@ -84,17 +86,17 @@ static const struct msl_resource_type_info *msl_get_resource_type_info(enum vkd3 + { + static const struct msl_resource_type_info info[] = + { +- [VKD3D_SHADER_RESOURCE_NONE] = {0, 0, 0, "none"}, +- [VKD3D_SHADER_RESOURCE_BUFFER] = {1, 0, 0, "_buffer"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_1D] = {1, 0, 0, "1d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2D] = {2, 0, 1, "2d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS] = {2, 0, 1, "2d_ms"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_3D] = {3, 0, 0, "3d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE] = {3, 0, 1, "cube"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY] = {1, 1, 0, "1d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY] = {2, 1, 1, "2d"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, 1, "2d_ms"}, +- [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {3, 1, 1, "cube"}, ++ [VKD3D_SHADER_RESOURCE_NONE] = {0, 0, 0, 0, "none"}, ++ [VKD3D_SHADER_RESOURCE_BUFFER] = {1, 0, 0, 0, "_buffer"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1D] = {1, 0, 0, 0, "1d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2D] = {2, 0, 1, 1, "2d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS] = {2, 0, 1, 0, "2d_ms"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_3D] = {3, 0, 0, 1, "3d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE] = {3, 0, 1, 0, "cube"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY] = {1, 1, 0, 0, "1d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY] = {2, 1, 1, 1, "2d"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, 1, 0, "2d_ms"}, ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {3, 1, 1, 0, "cube"}, + }; + + if (!t || t >= ARRAY_SIZE(info)) +@@ -865,6 +867,27 @@ static void msl_default(struct msl_generator *gen) + vkd3d_string_buffer_printf(gen->buffer, "default:\n"); + } + ++static void msl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, ++ unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset) ++{ ++ switch (offset_size) ++ { ++ case 1: ++ vkd3d_string_buffer_printf(buffer, "%d", offset->u); ++ break; ++ case 2: ++ vkd3d_string_buffer_printf(buffer, "int2(%d, %d)", offset->u, offset->v); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Invalid texel offset size %u.", offset_size); ++ /* fall through */ ++ case 3: ++ vkd3d_string_buffer_printf(buffer, "int3(%d, %d, %d)", offset->u, offset->v, offset->w); ++ break; ++ } ++} ++ + static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + const struct msl_resource_type_info *resource_type_info; +@@ -963,9 +986,10 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct + + static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { ++ bool bias, compare, comparison_sampler, dynamic_offset, gather, grad, lod, lod_zero, offset; + const struct msl_resource_type_info *resource_type_info; ++ const struct vkd3d_shader_src_param *resource, *sampler; + unsigned int resource_id, resource_idx, resource_space; +- bool bias, compare, comparison_sampler, grad, lod_zero; + const struct vkd3d_shader_descriptor_binding *binding; + unsigned int sampler_id, sampler_idx, sampler_space; + const struct vkd3d_shader_descriptor_info1 *d; +@@ -973,25 +997,31 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst + unsigned int srv_binding, sampler_binding; + struct vkd3d_string_buffer *sample; + enum vkd3d_data_type data_type; ++ unsigned int component_idx; + uint32_t coord_mask; + struct msl_dst dst; + + bias = ins->opcode == VKD3DSIH_SAMPLE_B; +- compare = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ compare = ins->opcode == VKD3DSIH_GATHER4_C || ins->opcode == VKD3DSIH_SAMPLE_C ++ || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO; ++ gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_C ++ || ins->opcode == VKD3DSIH_GATHER4_PO; + grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; ++ lod = ins->opcode == VKD3DSIH_SAMPLE_LOD; + lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins); + +- if (vkd3d_shader_instruction_has_texel_offset(ins)) +- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +- "Internal compiler error: Unhandled texel sample offset."); ++ resource = &ins->src[1 + dynamic_offset]; ++ sampler = &ins->src[2 + dynamic_offset]; + +- if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr +- || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) ++ if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr ++ || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + +- resource_id = ins->src[1].reg.idx[0].offset; +- resource_idx = ins->src[1].reg.idx[1].offset; ++ resource_id = resource->reg.idx[0].offset; ++ resource_idx = resource->reg.idx[1].offset; + if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors, + VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) + { +@@ -1015,10 +1045,15 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst + "Sampling resource type %#x is not supported.", resource_type); + + if ((resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1D || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY) +- && (bias || grad || lod_zero)) ++ && (bias || grad || lod || lod_zero)) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Resource type %#x does not support mipmapping.", resource_type); + ++ if ((resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1D || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ++ || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_3D) && gather) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, ++ "Resource type %#x does not support gather operations.", resource_type); ++ + if (!(resource_type_info = msl_get_resource_type_info(resource_type))) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +@@ -1039,8 +1074,8 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst + srv_binding = 0; + } + +- sampler_id = ins->src[2].reg.idx[0].offset; +- sampler_idx = ins->src[2].reg.idx[1].offset; ++ sampler_id = sampler->reg.idx[0].offset; ++ sampler_idx = sampler->reg.idx[1].offset; + if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) + { +@@ -1085,7 +1120,11 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst + if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT) + vkd3d_string_buffer_printf(sample, "as_type("); + msl_print_srv_name(sample, gen, srv_binding, resource_type_info, data_type, compare); +- if (compare) ++ if (gather && compare) ++ vkd3d_string_buffer_printf(sample, ".gather_compare("); ++ else if (gather) ++ vkd3d_string_buffer_printf(sample, ".gather("); ++ else if (compare) + vkd3d_string_buffer_printf(sample, ".sample_compare("); + else + vkd3d_string_buffer_printf(sample, ".sample("); +@@ -1118,17 +1157,40 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst + { + vkd3d_string_buffer_printf(sample, ", level(0.0f)"); + } ++ else if (lod) ++ { ++ vkd3d_string_buffer_printf(sample, ", level("); ++ msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ vkd3d_string_buffer_printf(sample, ")"); ++ } + if (bias) + { + vkd3d_string_buffer_printf(sample, ", bias("); + msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + vkd3d_string_buffer_printf(sample, ")"); + } ++ if (offset) ++ { ++ if (!resource_type_info->offset) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, ++ "Texel sample offsets are not supported with resource type %#x.", resource_type); ++ vkd3d_string_buffer_printf(sample, ", "); ++ if (dynamic_offset) ++ msl_print_src_with_type(sample, gen, &ins->src[1], coord_mask, ins->src[1].reg.data_type); ++ else ++ msl_print_texel_offset(sample, gen, resource_type_info->coord_size, &ins->texel_offset); ++ } ++ if (gather && !compare && (component_idx = vsir_swizzle_get_component(sampler->swizzle, 0))) ++ { ++ if (!offset && resource_type_info->offset) ++ vkd3d_string_buffer_printf(sample, ", int2(0)"); ++ vkd3d_string_buffer_printf(sample, ", component::%c", "xyzw"[component_idx]); ++ } + vkd3d_string_buffer_printf(sample, ")"); + if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT) + vkd3d_string_buffer_printf(sample, ")"); +- if (!compare) +- msl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); ++ if (!compare || gather) ++ msl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask); + + msl_print_assignment(gen, &dst, "%s", sample->buffer); + +@@ -1191,37 +1253,6 @@ static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instru + msl_dst_cleanup(&dst, &gen->string_buffers); + } + +-static void msl_mul64(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +-{ +- struct msl_src src[2]; +- struct msl_dst dst; +- uint32_t mask; +- +- if (ins->dst[0].reg.type != VKD3DSPR_NULL) +- { +- /* TODO: mulhi(). */ +- mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); +- msl_print_assignment(gen, &dst, ""); +- msl_dst_cleanup(&dst, &gen->string_buffers); +- +- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +- "Internal compiler error: Unhandled 64-bit integer multiplication."); +- } +- +- if (ins->dst[1].reg.type != VKD3DSPR_NULL) +- { +- mask = msl_dst_init(&dst, gen, ins, &ins->dst[1]); +- msl_src_init(&src[0], gen, &ins->src[0], mask); +- msl_src_init(&src[1], gen, &ins->src[1], mask); +- +- msl_print_assignment(gen, &dst, "%s * %s", src[0].str->buffer, src[1].str->buffer); +- +- msl_src_cleanup(&src[1], &gen->string_buffers); +- msl_src_cleanup(&src[0], &gen->string_buffers); +- msl_dst_cleanup(&dst, &gen->string_buffers); +- } +-} +- + static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + msl_print_indent(gen->buffer, gen->indent); +@@ -1302,11 +1333,15 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d + case VKD3DSIH_FTOU: + msl_cast(gen, ins, "uint"); + break; ++ case VKD3DSIH_GATHER4: ++ case VKD3DSIH_GATHER4_C: ++ case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_GRAD: ++ case VKD3DSIH_SAMPLE_LOD: + msl_sample(gen, ins); + break; + case VKD3DSIH_GEO: +@@ -1337,8 +1372,8 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d + case VKD3DSIH_MIN: + msl_intrinsic(gen, ins, "min"); + break; +- case VKD3DSIH_IMUL: +- msl_mul64(gen, ins); ++ case VKD3DSIH_IMUL_LOW: ++ msl_binop(gen, ins, "*"); + break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 2bf6f5d9363..7eddf47151b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -7534,6 +7534,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + {VKD3DSIH_FREM, SpvOpFRem}, + {VKD3DSIH_FTOD, SpvOpFConvert}, + {VKD3DSIH_IADD, SpvOpIAdd}, ++ {VKD3DSIH_IMUL_LOW, SpvOpIMul}, + {VKD3DSIH_INEG, SpvOpSNegate}, + {VKD3DSIH_ISHL, SpvOpShiftLeftLogical}, + {VKD3DSIH_ISHR, SpvOpShiftRightArithmetic}, +@@ -8019,30 +8020,6 @@ static void spirv_compiler_emit_rcp(struct spirv_compiler *compiler, + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +-static void spirv_compiler_emit_imul(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_dst_param *dst = instruction->dst; +- const struct vkd3d_shader_src_param *src = instruction->src; +- uint32_t type_id, val_id, src0_id, src1_id; +- +- if (dst[0].reg.type != VKD3DSPR_NULL) +- FIXME("Extended multiplies not implemented.\n"); /* SpvOpSMulExtended/SpvOpUMulExtended */ +- +- if (dst[1].reg.type == VKD3DSPR_NULL) +- return; +- +- type_id = spirv_compiler_get_type_id_for_dst(compiler, &dst[1]); +- +- src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst[1].write_mask); +- src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst[1].write_mask); +- +- val_id = vkd3d_spirv_build_op_imul(builder, type_id, src0_id, src1_id); +- +- spirv_compiler_emit_store_dst(compiler, &dst[1], val_id); +-} +- + static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -10671,6 +10648,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_FREM: + case VKD3DSIH_FTOD: + case VKD3DSIH_IADD: ++ case VKD3DSIH_IMUL_LOW: + case VKD3DSIH_INEG: + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: +@@ -10735,10 +10713,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_RCP: + spirv_compiler_emit_rcp(compiler, instruction); + break; +- case VKD3DSIH_IMUL: +- case VKD3DSIH_UMUL: +- spirv_compiler_emit_imul(compiler, instruction); +- break; + case VKD3DSIH_IMAD: + spirv_compiler_emit_imad(compiler, instruction); + break; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 07e4b913e6f..254303f1bbb 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -183,6 +183,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, + VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE = 5306, ++ VKD3D_SHADER_WARNING_HLSL_IGNORED_MODIFIER = 5307, + + VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND = 6001, +@@ -455,6 +456,7 @@ enum vkd3d_shader_opcode + VKD3DSIH_IMM_ATOMIC_UMIN, + VKD3DSIH_IMM_ATOMIC_XOR, + VKD3DSIH_IMUL, ++ VKD3DSIH_IMUL_LOW, + VKD3DSIH_INE, + VKD3DSIH_INEG, + VKD3DSIH_ISFINITE, +-- +2.47.2 +