From 94caef272e428b3acc48a1cf0e92215c67f6d93b Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Mon, 10 Jul 2023 14:44:21 +1000 Subject: [PATCH] Updated vkd3d-latest patchset Squash to 1.8, then everything else. --- ...71e442af16228a977eebba82224f06f6d02.patch} | 763 +++-- ...2a3413e0f01524f2068bce12100906eb220.patch} | 2595 +++++++++++++++-- ...771e442af16228a977eebba82224f06f6d02.patch | 401 --- ...3aee386e2fdf2e0bf65e7006a380f204a1e5.patch | 2275 --------------- 4 files changed, 2821 insertions(+), 3213 deletions(-) rename patches/vkd3d-latest/{0002-Updated-vkd3d-to-5dee6561c2a9aeeb951cc9786a8b28eebf8.patch => 0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch} (98%) rename patches/vkd3d-latest/{0005-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch => 0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch} (61%) delete mode 100644 patches/vkd3d-latest/0003-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch delete mode 100644 patches/vkd3d-latest/0004-Update-vkd3d-to-3aee386e2fdf2e0bf65e7006a380f204a1e5.patch diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-5dee6561c2a9aeeb951cc9786a8b28eebf8.patch b/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch similarity index 98% rename from patches/vkd3d-latest/0002-Updated-vkd3d-to-5dee6561c2a9aeeb951cc9786a8b28eebf8.patch rename to patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch index 306b48fd..c055c6d5 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-5dee6561c2a9aeeb951cc9786a8b28eebf8.patch +++ b/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch @@ -1,7 +1,8 @@ -From abf5cf7db7444633f69d579cbc73da58ccf73581 Mon Sep 17 00:00:00 2001 +From b6a857d94b7957b4cb3902de424e637764ca2851 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 17 May 2023 08:35:40 +1000 -Subject: [PATCH 2/2] Updated vkd3d to 5dee6561c2a9aeeb951cc9786a8b28eebf8adc4d +Subject: [PATCH 1/2] Update vkd3d to 771e442af16228a977eebba82224f06f6d0202fe + (1.8) --- libs/vkd3d/Makefile.in | 6 +- @@ -11,35 +12,38 @@ Subject: [PATCH 2/2] Updated vkd3d to 5dee6561c2a9aeeb951cc9786a8b28eebf8adc4d libs/vkd3d/include/private/vkd3d_common.h | 3 +- libs/vkd3d/include/private/vkd3d_debug.h | 2 +- libs/vkd3d/include/private/vkd3d_test.h | 432 ++ + libs/vkd3d/include/vkd3d.h | 1 + libs/vkd3d/include/vkd3d_d3d9types.h | 237 + libs/vkd3d/include/vkd3d_d3dcompiler.h | 74 + + libs/vkd3d/include/vkd3d_shader.h | 1 + libs/vkd3d/include/vkd3d_utils.h | 108 + libs/vkd3d/include/vkd3d_windows.h | 284 + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-common/debug.c | 4 +- .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 25 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1121 +++- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1131 +++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1773 +----- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 617 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 229 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 619 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 235 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2355 +++++--- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1802 ++++-- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2362 +++++--- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1821 ++++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 358 +- libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 --- libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1073 ++++ + libs/vkd3d/libs/vkd3d-shader/ir.c | 1072 ++++ libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 6 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 1311 ++--- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 1325 ++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 5234 +++++++++++++++++ .../libs/vkd3d-shader/vkd3d_shader_main.c | 241 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 91 +- libs/vkd3d/libs/vkd3d/command.c | 160 +- libs/vkd3d/libs/vkd3d/device.c | 205 +- libs/vkd3d/libs/vkd3d/resource.c | 935 ++- + libs/vkd3d/libs/vkd3d/state.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 238 +- - 35 files changed, 14574 insertions(+), 8790 deletions(-) + 38 files changed, 14623 insertions(+), 8804 deletions(-) create mode 100644 libs/vkd3d/include/list.h create mode 100644 libs/vkd3d/include/private/list.h create mode 100644 libs/vkd3d/include/private/rbtree.h @@ -1489,6 +1493,18 @@ index 00000000000..081443c4fa6 + vkd3d_run_test(#test_pfn, test_pfn) + +#endif /* __VKD3D_TEST_H */ +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index ff2b15c51dc..72ed3ced671 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -76,6 +76,7 @@ enum vkd3d_api_version + VKD3D_API_VERSION_1_5, + VKD3D_API_VERSION_1_6, + VKD3D_API_VERSION_1_7, ++ VKD3D_API_VERSION_1_8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), + }; diff --git a/libs/vkd3d/include/vkd3d_d3d9types.h b/libs/vkd3d/include/vkd3d_d3d9types.h new file mode 100644 index 00000000000..75d0461409d @@ -1812,6 +1828,18 @@ index 00000000000..c934835dc0a + +#endif /* __D3DCOMPILER_H__ */ +#endif /* __VKD3D_D3DCOMPILER_H */ +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index 859b8c79792..274241546ea 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -49,6 +49,7 @@ enum vkd3d_shader_api_version + VKD3D_SHADER_API_VERSION_1_5, + VKD3D_SHADER_API_VERSION_1_6, + VKD3D_SHADER_API_VERSION_1_7, ++ VKD3D_SHADER_API_VERSION_1_8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), + }; diff --git a/libs/vkd3d/include/vkd3d_utils.h b/libs/vkd3d/include/vkd3d_utils.h new file mode 100644 index 00000000000..e8462563576 @@ -2342,7 +2370,7 @@ index 6cd2dcb270c..0a821b5c878 100644 end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index ed81137d225..c35f8ca0ff8 100644 +index ed81137d225..712613ac13b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1,4 +1,6 @@ @@ -2500,7 +2528,7 @@ index ed81137d225..c35f8ca0ff8 100644 if (ins->handler_idx == VKD3DSIH_INVALID) { -@@ -988,5 +992,1084 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi +@@ -988,5 +992,1094 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi *parser = &sm1->p; @@ -2794,10 +2822,13 @@ index ed81137d225..c35f8ca0ff8 100644 + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); ++ unsigned int r; + -+ if (!var->semantic.name && var->regs[regset].allocated) ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { ++ if (var->semantic.name || !var->regs[r].allocated) ++ continue; ++ + ++uniform_count; + + if (var->is_param && var->is_uniform) @@ -2834,20 +2865,23 @@ index ed81137d225..c35f8ca0ff8 100644 + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); ++ unsigned int r; + -+ if (!var->semantic.name && var->regs[regset].allocated) ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { ++ if (var->semantic.name || !var->regs[r].allocated) ++ continue; ++ + put_u32(buffer, 0); /* name */ -+ if (regset == HLSL_REGSET_NUMERIC) ++ if (r == HLSL_REGSET_NUMERIC) + { -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); -+ put_u32(buffer, var->data_type->reg_size[regset] / 4); ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); ++ put_u32(buffer, var->data_type->reg_size[r] / 4); + } + else + { -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); -+ put_u32(buffer, var->regs[regset].bind_count); ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); ++ put_u32(buffer, var->regs[r].bind_count); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ @@ -2858,12 +2892,16 @@ index ed81137d225..c35f8ca0ff8 100644 + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); ++ unsigned int r; + -+ if (!var->semantic.name && var->regs[regset].allocated) ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { -+ size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); -+ size_t name_offset; ++ size_t var_offset, name_offset; ++ ++ if (var->semantic.name || !var->regs[r].allocated) ++ continue; ++ ++ var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); @@ -5464,7 +5502,7 @@ index d99ea2e36b6..3e3f06faeb5 100644 { put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 64d6e87065b..f439c9f3383 100644 +index 64d6e87065b..ba5bcfbfaf0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) @@ -6847,7 +6885,14 @@ index 64d6e87065b..f439c9f3383 100644 {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -@@ -2999,10 +3242,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, +@@ -2993,16 +3236,16 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, + + rb_init(&ctx->functions, compare_function_rb); + +- list_init(&ctx->static_initializers); ++ hlsl_block_init(&ctx->static_initializers); + list_init(&ctx->extern_vars); + list_init(&ctx->buffers); if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, @@ -6861,7 +6906,7 @@ index 64d6e87065b..f439c9f3383 100644 ctx->cur_buffer = ctx->globals_buffer; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index ccbf22a5801..a7ff1f23858 100644 +index ccbf22a5801..bce48e94b24 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -21,10 +21,12 @@ @@ -7119,7 +7164,19 @@ index ccbf22a5801..a7ff1f23858 100644 }; struct hlsl_ctx -@@ -780,8 +832,9 @@ struct hlsl_resource_load_params +@@ -744,9 +796,8 @@ struct hlsl_ctx + struct hlsl_type *Void; + } builtin_types; + +- /* List of the instruction nodes for initializing static variables; linked by the +- * hlsl_ir_node.entry fields. */ +- struct list static_initializers; ++ /* List of the instruction nodes for initializing static variables. */ ++ struct hlsl_block static_initializers; + + /* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. + * Only used for SM1 profiles. */ +@@ -780,8 +831,9 @@ struct hlsl_resource_load_params { struct hlsl_type *format; enum hlsl_resource_load_type type; @@ -7131,7 +7188,7 @@ index ccbf22a5801..a7ff1f23858 100644 }; static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) -@@ -850,6 +903,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node +@@ -850,6 +902,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); } @@ -7159,7 +7216,7 @@ index ccbf22a5801..a7ff1f23858 100644 static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) { src->node = node; -@@ -873,6 +947,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) +@@ -873,6 +946,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) return ptr; } @@ -7175,7 +7232,7 @@ index ccbf22a5801..a7ff1f23858 100644 static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) { void *ret = vkd3d_realloc(ptr, size); -@@ -948,6 +1031,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) +@@ -948,6 +1030,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) switch (dim) { case HLSL_SAMPLER_DIM_1D: @@ -7184,7 +7241,7 @@ index ccbf22a5801..a7ff1f23858 100644 return 1; case HLSL_SAMPLER_DIM_1DARRAY: case HLSL_SAMPLER_DIM_2D: -@@ -974,11 +1059,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -974,11 +1058,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); @@ -7198,7 +7255,7 @@ index ccbf22a5801..a7ff1f23858 100644 bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -@@ -986,6 +1072,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl +@@ -986,6 +1071,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); @@ -7206,7 +7263,7 @@ index ccbf22a5801..a7ff1f23858 100644 bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); void hlsl_cleanup_deref(struct hlsl_deref *deref); -@@ -1012,64 +1099,73 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); +@@ -1012,64 +1098,73 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); @@ -7303,7 +7360,7 @@ index ccbf22a5801..a7ff1f23858 100644 const struct hlsl_reg_reservation *reg_reservation); void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -@@ -1101,6 +1197,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); +@@ -1101,6 +1196,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); @@ -7313,7 +7370,7 @@ index ccbf22a5801..a7ff1f23858 100644 unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); -@@ -1109,12 +1208,16 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); +@@ -1109,12 +1207,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); @@ -7323,6 +7380,7 @@ index ccbf22a5801..a7ff1f23858 100644 unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); ++bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -7380,7 +7438,7 @@ index adff1da04d8..e9ae3ccf3d3 100644 FIXME("Malformed preprocessor line directive?\n"); BEGIN(INITIAL); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index fd1eaf6ec95..209428f761a 100644 +index fd1eaf6ec95..0e07fe578e1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -85,8 +85,8 @@ struct parse_function @@ -8203,7 +8261,7 @@ index fd1eaf6ec95..209428f761a 100644 return reservation; } -@@ -1122,53 +1092,32 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +@@ -1122,53 +1092,37 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) return list; } @@ -8212,13 +8270,18 @@ index fd1eaf6ec95..209428f761a 100644 + const struct vkd3d_shader_location *loc) { - if (node->data_type->type != HLSL_CLASS_SCALAR) -- return 0; + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; + unsigned int ret = 0; ++ bool progress; ++ ++ if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) + return 0; - switch (node->type) -- { ++ do + { - case HLSL_IR_CONSTANT: - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(node); @@ -8241,17 +8304,16 @@ index fd1eaf6ec95..209428f761a 100644 - vkd3d_unreachable(); - } - } -+ if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -+ return 0; - +- - case HLSL_IR_EXPR: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_SWIZZLE: - FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); - return 0; -+ while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL)); ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, block); ++ } while (progress); - case HLSL_IR_CALL: - case HLSL_IR_IF: @@ -8277,7 +8339,7 @@ index fd1eaf6ec95..209428f761a 100644 } static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) -@@ -1180,20 +1129,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t +@@ -1180,20 +1134,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) return true; @@ -8303,7 +8365,7 @@ index fd1eaf6ec95..209428f761a 100644 } /* Both matrices */ -@@ -1226,7 +1175,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl +@@ -1226,7 +1180,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) { @@ -8312,7 +8374,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -1237,7 +1186,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1237,7 +1191,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; } @@ -8321,7 +8383,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -1264,17 +1213,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1264,17 +1218,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct if (t1->dimx == 1 && t1->dimy == 1) { @@ -8342,7 +8404,7 @@ index fd1eaf6ec95..209428f761a 100644 { *type = HLSL_CLASS_MATRIX; *dimx = min(t1->dimx, t2->dimx); -@@ -1284,13 +1233,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1284,13 +1238,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct { if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) { @@ -8358,7 +8420,7 @@ index fd1eaf6ec95..209428f761a 100644 *dimx = t2->dimx; *dimy = t2->dimy; } -@@ -1306,55 +1255,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, +@@ -1306,55 +1260,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *expr; unsigned int i; @@ -8431,7 +8493,7 @@ index fd1eaf6ec95..209428f761a 100644 } if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) -@@ -1407,7 +1351,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct +@@ -1407,7 +1356,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type; @@ -8440,7 +8502,7 @@ index fd1eaf6ec95..209428f761a 100644 arg->data_type->dimx, arg->data_type->dimy); if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) -@@ -1416,20 +1360,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct +@@ -1416,20 +1365,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct return add_expr(ctx, instrs, op, args, bool_type, loc); } @@ -8474,7 +8536,7 @@ index fd1eaf6ec95..209428f761a 100644 if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL; -@@ -1441,13 +1392,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str +@@ -1441,13 +1397,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str } static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, @@ -8490,7 +8552,7 @@ index fd1eaf6ec95..209428f761a 100644 return list1; } -@@ -1499,13 +1450,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str +@@ -1499,13 +1455,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str } static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, @@ -8506,7 +8568,7 @@ index fd1eaf6ec95..209428f761a 100644 return list1; } -@@ -1596,7 +1547,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis +@@ -1596,7 +1552,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis enum hlsl_ir_expr_op op; unsigned dim; @@ -8515,7 +8577,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -1607,7 +1558,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis +@@ -1607,7 +1563,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; } @@ -8524,7 +8586,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -1618,9 +1569,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis +@@ -1618,9 +1574,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; } @@ -8536,7 +8598,7 @@ index fd1eaf6ec95..209428f761a 100644 dim = arg1->data_type->dimx; else dim = min(arg1->data_type->dimx, arg2->data_type->dimx); -@@ -1702,7 +1653,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1702,7 +1658,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; @@ -8545,7 +8607,7 @@ index fd1eaf6ec95..209428f761a 100644 unsigned int writemask = 0; if (assign_op == ASSIGN_OP_SUB) -@@ -1720,13 +1671,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1720,13 +1676,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in return NULL; } @@ -8561,7 +8623,7 @@ index fd1eaf6ec95..209428f761a 100644 { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { -@@ -1735,10 +1686,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1735,10 +1691,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } else if (lhs->type == HLSL_IR_SWIZZLE) { @@ -8575,7 +8637,7 @@ index fd1eaf6ec95..209428f761a 100644 hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); if (!invert_swizzle(&s, &writemask, &width)) -@@ -1751,10 +1703,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1751,10 +1708,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } @@ -8588,7 +8650,7 @@ index fd1eaf6ec95..209428f761a 100644 } else { -@@ -1763,18 +1715,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1763,18 +1720,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } } @@ -8616,7 +8678,7 @@ index fd1eaf6ec95..209428f761a 100644 assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); if (resource_type->base_type != HLSL_TYPE_UAV) -@@ -1787,25 +1740,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1787,25 +1745,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components."); @@ -8683,23 +8745,23 @@ index fd1eaf6ec95..209428f761a 100644 - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; + struct hlsl_deref deref; -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) -+ return NULL; - if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) ++ if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) + return NULL; +- list_add_tail(instrs, &store->node.entry); ++ + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); - return NULL; -- list_add_tail(instrs, &store->node.entry); ++ return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); } /* Don't use the instruction itself as a source, as this makes structure -@@ -1813,37 +1811,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1813,37 +1816,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; @@ -8748,7 +8810,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; } -@@ -1861,10 +1859,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +@@ -1861,10 +1864,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, for (k = 0; k < src_comp_count; ++k) { @@ -8760,7 +8822,7 @@ index fd1eaf6ec95..209428f761a 100644 struct hlsl_block block; if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) -@@ -1872,10 +1868,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +@@ -1872,10 +1873,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -8773,7 +8835,7 @@ index fd1eaf6ec95..209428f761a 100644 return; list_move_tail(instrs, &block.instrs); -@@ -1885,12 +1881,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +@@ -1885,12 +1886,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) { @@ -8789,7 +8851,7 @@ index fd1eaf6ec95..209428f761a 100644 { unsigned int i; -@@ -1905,12 +1901,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s +@@ -1905,12 +1906,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s static bool type_has_numeric_components(struct hlsl_type *type) { @@ -8805,7 +8867,7 @@ index fd1eaf6ec95..209428f761a 100644 { unsigned int i; -@@ -1934,7 +1930,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -1934,7 +1935,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_type *type; bool local = true; @@ -8814,7 +8876,7 @@ index fd1eaf6ec95..209428f761a 100644 assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (!(statements_list = make_empty_list(ctx))) -@@ -1966,7 +1962,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -1966,7 +1967,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t type = basic_type; @@ -8823,7 +8885,7 @@ index fd1eaf6ec95..209428f761a 100644 { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -2035,7 +2031,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2035,7 +2036,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } vkd3d_free(v->arrays.sizes); @@ -8832,7 +8894,7 @@ index fd1eaf6ec95..209428f761a 100644 { free_parse_variable_def(v); continue; -@@ -2043,6 +2039,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2043,6 +2044,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t var->buffer = ctx->cur_buffer; @@ -8846,7 +8908,7 @@ index fd1eaf6ec95..209428f761a 100644 if (ctx->cur_scope == ctx->globals) { local = false; -@@ -2148,7 +2151,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2148,7 +2156,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else { @@ -8855,7 +8917,16 @@ index fd1eaf6ec95..209428f761a 100644 assert(v->initializer.args_count == 1); list_add_tail(v->initializer.instrs, &load->node.entry); -@@ -2164,9 +2167,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2156,7 +2164,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + } + + if (modifiers & HLSL_STORAGE_STATIC) +- list_move_tail(&ctx->static_initializers, v->initializer.instrs); ++ list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); + else + list_move_tail(statements_list, v->initializer.instrs); + vkd3d_free(v->initializer.args); +@@ -2164,9 +2172,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -8866,28 +8937,28 @@ index fd1eaf6ec95..209428f761a 100644 /* Initialize statics to zero by default. */ -@@ -2181,9 +2182,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2181,9 +2187,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &zero->node.entry); -+ list_add_tail(&ctx->static_initializers, &zero->entry); ++ hlsl_block_add_instr(&ctx->static_initializers, zero); - if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) -+ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) ++ if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) { vkd3d_free(v); continue; -@@ -2194,7 +2195,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2194,7 +2200,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &store->node.entry); -+ list_add_tail(&ctx->static_initializers, &store->entry); ++ hlsl_block_add_instr(&ctx->static_initializers, store); } vkd3d_free(v); } -@@ -2279,7 +2280,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +@@ -2279,7 +2285,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) return arg; @@ -8896,7 +8967,7 @@ index fd1eaf6ec95..209428f761a 100644 return add_implicit_conversion(ctx, params->instrs, arg, type, loc); } -@@ -2315,12 +2316,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * +@@ -2315,12 +2321,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * base = expr_common_base_type(base, arg_type->base_type); @@ -8911,7 +8982,7 @@ index fd1eaf6ec95..209428f761a 100644 { matrices = true; dimx = min(dimx, arg_type->dimx); -@@ -2369,7 +2370,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, +@@ -2369,7 +2375,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; @@ -8920,7 +8991,7 @@ index fd1eaf6ec95..209428f761a 100644 return convert_args(ctx, params, type, loc); } -@@ -2383,20 +2384,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, +@@ -2383,20 +2389,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -8945,7 +9016,7 @@ index fd1eaf6ec95..209428f761a 100644 count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) -@@ -2404,50 +2403,121 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, +@@ -2404,52 +2408,123 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) return false; @@ -9007,7 +9078,8 @@ index fd1eaf6ec95..209428f761a 100644 - string->buffer); - hlsl_release_string_buffer(ctx, string); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); -+ } + } +- data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); + else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + { + if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) @@ -9093,10 +9165,12 @@ index fd1eaf6ec95..209428f761a 100644 + "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); - } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); ++ } ++ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); -@@ -2483,7 +2553,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); +@@ -2483,7 +2558,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9105,7 +9179,7 @@ index fd1eaf6ec95..209428f761a 100644 struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; struct hlsl_type *cast_type; -@@ -2504,35 +2574,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, +@@ -2504,35 +2579,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; @@ -9170,7 +9244,7 @@ index fd1eaf6ec95..209428f761a 100644 static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2565,8 +2655,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, +@@ -2565,8 +2660,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, static bool intrinsic_exp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9180,7 +9254,7 @@ index fd1eaf6ec95..209428f761a 100644 if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; -@@ -2574,9 +2663,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, +@@ -2574,9 +2668,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; @@ -9192,7 +9266,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); -@@ -2604,6 +2693,43 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, +@@ -2604,6 +2698,43 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); } @@ -9236,7 +9310,7 @@ index fd1eaf6ec95..209428f761a 100644 static bool intrinsic_frac(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2635,7 +2761,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, +@@ -2635,7 +2766,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *arg, *dot; @@ -9245,7 +9319,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -2692,20 +2818,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, +@@ -2692,20 +2823,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, static bool intrinsic_lit(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9273,7 +9347,7 @@ index fd1eaf6ec95..209428f761a 100644 { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); return false; -@@ -2726,37 +2850,35 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, +@@ -2726,37 +2855,35 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, return false; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -9324,7 +9398,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) -@@ -2765,20 +2887,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, +@@ -2765,20 +2892,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) return false; @@ -9396,7 +9470,7 @@ index fd1eaf6ec95..209428f761a 100644 static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2808,15 +2977,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2808,15 +2982,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, struct hlsl_ir_load *load; struct hlsl_ir_var *var; @@ -9415,7 +9489,7 @@ index fd1eaf6ec95..209428f761a 100644 { vect_count++; cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); -@@ -2854,13 +3023,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2854,13 +3028,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, for (j = 0; j < matrix_type->dimy; ++j) { struct hlsl_ir_node *instr = NULL; @@ -9430,7 +9504,7 @@ index fd1eaf6ec95..209428f761a 100644 if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) return false; -@@ -2868,7 +3035,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2868,7 +3040,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) return false; @@ -9439,7 +9513,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; if (instr) -@@ -2882,13 +3049,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2882,13 +3054,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } } @@ -9455,7 +9529,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; list_add_tail(params->instrs, &load->node.entry); -@@ -2901,7 +3068,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, +@@ -2901,7 +3073,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *dot, *rsq, *arg; @@ -9464,7 +9538,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -2986,6 +3153,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, +@@ -2986,6 +3158,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); } @@ -9507,7 +9581,7 @@ index fd1eaf6ec95..209428f761a 100644 static bool intrinsic_sin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -3001,8 +3204,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, +@@ -3001,8 +3209,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9517,7 +9591,7 @@ index fd1eaf6ec95..209428f761a 100644 if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; -@@ -3022,9 +3224,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -3022,9 +3229,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; @@ -9529,7 +9603,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) -@@ -3035,16 +3237,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -3035,16 +3242,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; @@ -9550,7 +9624,7 @@ index fd1eaf6ec95..209428f761a 100644 return false; if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) -@@ -3081,7 +3283,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, +@@ -3081,7 +3288,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, return false; type = ge->data_type; @@ -9559,7 +9633,7 @@ index fd1eaf6ec95..209428f761a 100644 return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } -@@ -3090,9 +3292,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3090,9 +3297,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * { struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; const struct hlsl_type *sampler_type; @@ -9570,7 +9644,7 @@ index fd1eaf6ec95..209428f761a 100644 if (params->args_count != 2 && params->args_count != 4) { -@@ -3107,7 +3307,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3107,7 +3312,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * } sampler_type = params->args[0]->data_type; @@ -9579,7 +9653,7 @@ index fd1eaf6ec95..209428f761a 100644 || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; -@@ -3118,24 +3318,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3118,24 +3323,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * name, ctx->builtin_types.sampler[dim]->name, string->buffer); hlsl_release_string_buffer(ctx, string); } @@ -9607,7 +9681,7 @@ index fd1eaf6ec95..209428f761a 100644 return true; } -@@ -3156,13 +3351,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -3156,13 +3356,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_ir_node *arg = params->args[0]; struct hlsl_type *arg_type = arg->data_type; @@ -9624,7 +9698,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -3174,7 +3370,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -3174,7 +3375,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return false; } @@ -9633,7 +9707,7 @@ index fd1eaf6ec95..209428f761a 100644 { list_add_tail(params->instrs, &arg->entry); return true; -@@ -3190,21 +3386,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -3190,21 +3391,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { for (j = 0; j < arg_type->dimy; ++j) { @@ -9713,7 +9787,7 @@ index fd1eaf6ec95..209428f761a 100644 return true; } -@@ -3220,22 +3470,31 @@ static const struct intrinsic_function +@@ -3220,22 +3475,31 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ @@ -9745,7 +9819,7 @@ index fd1eaf6ec95..209428f761a 100644 {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, {"mul", 2, true, intrinsic_mul}, -@@ -3245,6 +3504,7 @@ intrinsic_functions[] = +@@ -3245,6 +3509,7 @@ intrinsic_functions[] = {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, @@ -9753,7 +9827,7 @@ index fd1eaf6ec95..209428f761a 100644 {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, -@@ -3252,6 +3512,7 @@ intrinsic_functions[] = +@@ -3252,6 +3517,7 @@ intrinsic_functions[] = {"tex2D", -1, false, intrinsic_tex2D}, {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, @@ -9761,7 +9835,7 @@ index fd1eaf6ec95..209428f761a 100644 }; static int intrinsic_function_name_compare(const void *a, const void *b) -@@ -3291,11 +3552,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3291,11 +3557,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (param->storage_modifiers & HLSL_STORAGE_IN) { @@ -9775,7 +9849,7 @@ index fd1eaf6ec95..209428f761a 100644 } } -@@ -3316,7 +3577,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3316,7 +3582,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Output argument to \"%s\" is const.", decl->func->name); @@ -9784,7 +9858,7 @@ index fd1eaf6ec95..209428f761a 100644 goto fail; list_add_tail(args->instrs, &load->node.entry); -@@ -3329,7 +3590,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3329,7 +3595,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, { struct hlsl_ir_load *load; @@ -9793,7 +9867,7 @@ index fd1eaf6ec95..209428f761a 100644 goto fail; list_add_tail(args->instrs, &load->node.entry); } -@@ -3360,7 +3621,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3360,7 +3626,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, for (i = 0; i < args->args_count; ++i) { @@ -9802,7 +9876,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -3397,20 +3658,20 @@ fail: +@@ -3397,20 +3663,20 @@ fail: } static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, @@ -9826,7 +9900,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -3455,320 +3716,526 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) +@@ -3455,320 +3721,526 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) } } @@ -10607,7 +10681,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -3846,6 +4313,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3846,6 +4318,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_OUT @@ -10615,7 +10689,7 @@ index fd1eaf6ec95..209428f761a 100644 %token KW_PASS %token KW_PIXELSHADER %token KW_PRECISE -@@ -3854,6 +4322,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3854,6 +4327,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_RETURN %token KW_REGISTER %token KW_ROW_MAJOR @@ -10624,7 +10698,7 @@ index fd1eaf6ec95..209428f761a 100644 %token KW_RWTEXTURE1D %token KW_RWTEXTURE2D %token KW_RWTEXTURE3D -@@ -3933,6 +4403,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3933,6 +4408,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type conditional_expr %type declaration %type declaration_statement @@ -10632,7 +10706,7 @@ index fd1eaf6ec95..209428f761a 100644 %type equality_expr %type expr %type expr_optional -@@ -3968,6 +4439,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3968,6 +4444,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type attribute %type attribute_list @@ -10640,7 +10714,7 @@ index fd1eaf6ec95..209428f761a 100644 %type boolean -@@ -3999,6 +4471,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3999,6 +4476,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type parameters %type register_opt @@ -10648,7 +10722,7 @@ index fd1eaf6ec95..209428f761a 100644 %type texture_type texture_ms_type uav_type -@@ -4037,7 +4510,7 @@ buffer_declaration: +@@ -4037,7 +4515,7 @@ buffer_declaration: if ($3.semantic.name) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); @@ -10657,7 +10731,7 @@ index fd1eaf6ec95..209428f761a 100644 YYABORT; } -@@ -4261,6 +4734,14 @@ attribute_list: +@@ -4261,6 +4739,14 @@ attribute_list: $$.attrs[$$.count++] = $2; } @@ -10672,7 +10746,7 @@ index fd1eaf6ec95..209428f761a 100644 func_declaration: func_prototype compound_statement { -@@ -4349,8 +4830,11 @@ func_prototype_no_attrs: +@@ -4349,8 +4835,11 @@ func_prototype_no_attrs: "Semantics are not allowed on void functions."); } @@ -10685,7 +10759,7 @@ index fd1eaf6ec95..209428f761a 100644 if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) { -@@ -4476,17 +4960,24 @@ var_identifier: +@@ -4476,17 +4965,24 @@ var_identifier: colon_attribute: %empty { @@ -10714,7 +10788,7 @@ index fd1eaf6ec95..209428f761a 100644 $$.reg_reservation = $1; } -@@ -4499,6 +4990,9 @@ semantic: +@@ -4499,6 +4995,9 @@ semantic: ; $$.name = $2; $$.index = atoi(p); @@ -10724,7 +10798,7 @@ index fd1eaf6ec95..209428f761a 100644 *p = 0; } -@@ -4518,6 +5012,21 @@ register_opt: +@@ -4518,6 +5017,21 @@ register_opt: vkd3d_free($6); } @@ -10746,7 +10820,7 @@ index fd1eaf6ec95..209428f761a 100644 parameters: scope_start { -@@ -4536,7 +5045,7 @@ param_list: +@@ -4536,7 +5050,7 @@ param_list: parameter { memset(&$$, 0, sizeof($$)); @@ -10755,7 +10829,7 @@ index fd1eaf6ec95..209428f761a 100644 { ERR("Error adding function parameter %s.\n", $1.name); YYABORT; -@@ -4545,7 +5054,7 @@ param_list: +@@ -4545,7 +5059,7 @@ param_list: | param_list ',' parameter { $$ = $1; @@ -10764,7 +10838,7 @@ index fd1eaf6ec95..209428f761a 100644 { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "Parameter \"%s\" is already declared.", $3.name); -@@ -4624,7 +5133,15 @@ texture_ms_type: +@@ -4624,7 +5138,15 @@ texture_ms_type: } uav_type: @@ -10781,7 +10855,7 @@ index fd1eaf6ec95..209428f761a 100644 { $$ = HLSL_SAMPLER_DIM_1D; } -@@ -4640,7 +5157,7 @@ uav_type: +@@ -4640,7 +5162,7 @@ uav_type: type_no_void: KW_VECTOR '<' type ',' C_INTEGER '>' { @@ -10790,7 +10864,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -4667,7 +5184,7 @@ type_no_void: +@@ -4667,7 +5189,7 @@ type_no_void: } | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' { @@ -10799,7 +10873,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -4702,6 +5219,10 @@ type_no_void: +@@ -4702,6 +5224,10 @@ type_no_void: { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; } @@ -10810,7 +10884,7 @@ index fd1eaf6ec95..209428f761a 100644 | KW_SAMPLER1D { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; -@@ -4716,7 +5237,7 @@ type_no_void: +@@ -4716,7 +5242,7 @@ type_no_void: } | KW_SAMPLERCUBE { @@ -10819,7 +10893,7 @@ index fd1eaf6ec95..209428f761a 100644 } | KW_TEXTURE { -@@ -4740,23 +5261,58 @@ type_no_void: +@@ -4740,23 +5266,58 @@ type_no_void: } | texture_ms_type '<' type ',' shift_expr '>' { @@ -10886,7 +10960,7 @@ index fd1eaf6ec95..209428f761a 100644 $$ = hlsl_new_uav_type(ctx, $1, $3); } | TYPE_IDENTIFIER -@@ -4779,7 +5335,7 @@ type_no_void: +@@ -4779,7 +5340,7 @@ type_no_void: | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); @@ -10895,7 +10969,7 @@ index fd1eaf6ec95..209428f761a 100644 hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" redefined as a structure.", $2); vkd3d_free($2); } -@@ -4934,10 +5490,17 @@ arrays: +@@ -4934,10 +5495,17 @@ arrays: } | '[' expr ']' arrays { @@ -10905,7 +10979,7 @@ index fd1eaf6ec95..209428f761a 100644 + unsigned int size; - destroy_instr_list($2); -+ hlsl_block_init(&block); ++ hlsl_clone_block(ctx, &block, &ctx->static_initializers); + list_move_tail(&block.instrs, $2); + + size = evaluate_static_expression_as_uint(ctx, &block, &@2); @@ -10915,7 +10989,7 @@ index fd1eaf6ec95..209428f761a 100644 $$ = $4; -@@ -4988,59 +5551,59 @@ var_modifiers: +@@ -4988,59 +5556,59 @@ var_modifiers: } | KW_EXTERN var_modifiers { @@ -10989,7 +11063,7 @@ index fd1eaf6ec95..209428f761a 100644 } -@@ -5145,6 +5708,7 @@ statement: +@@ -5145,6 +5713,7 @@ statement: declaration_statement | expr_statement | compound_statement @@ -10997,7 +11071,7 @@ index fd1eaf6ec95..209428f761a 100644 | jump_statement | selection_statement | loop_statement -@@ -5152,7 +5716,7 @@ statement: +@@ -5152,7 +5721,7 @@ statement: jump_statement: KW_RETURN expr ';' { @@ -11006,7 +11080,7 @@ index fd1eaf6ec95..209428f761a 100644 YYABORT; $$ = $2; } -@@ -5160,65 +5724,81 @@ jump_statement: +@@ -5160,65 +5729,81 @@ jump_statement: { if (!($$ = make_empty_list(ctx))) YYABORT; @@ -11111,7 +11185,7 @@ index fd1eaf6ec95..209428f761a 100644 hlsl_pop_scope(ctx); } -@@ -5250,31 +5830,31 @@ func_arguments: +@@ -5250,31 +5835,31 @@ func_arguments: primary_expr: C_FLOAT { @@ -11150,7 +11224,7 @@ index fd1eaf6ec95..209428f761a 100644 YYABORT; } } -@@ -5288,7 +5868,7 @@ primary_expr: +@@ -5288,7 +5873,7 @@ primary_expr: hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); YYABORT; } @@ -11159,7 +11233,7 @@ index fd1eaf6ec95..209428f761a 100644 YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; -@@ -5316,7 +5896,7 @@ primary_expr: +@@ -5316,7 +5901,7 @@ primary_expr: if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) YYABORT; @@ -11168,7 +11242,7 @@ index fd1eaf6ec95..209428f761a 100644 YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; -@@ -5332,7 +5912,7 @@ postfix_expr: +@@ -5332,7 +5917,7 @@ postfix_expr: primary_expr | postfix_expr OP_INC { @@ -11177,7 +11251,7 @@ index fd1eaf6ec95..209428f761a 100644 { destroy_instr_list($1); YYABORT; -@@ -5341,7 +5921,7 @@ postfix_expr: +@@ -5341,7 +5926,7 @@ postfix_expr: } | postfix_expr OP_DEC { @@ -11186,7 +11260,7 @@ index fd1eaf6ec95..209428f761a 100644 { destroy_instr_list($1); YYABORT; -@@ -5352,7 +5932,7 @@ postfix_expr: +@@ -5352,7 +5937,7 @@ postfix_expr: { struct hlsl_ir_node *node = node_from_list($1); @@ -11195,7 +11269,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct hlsl_type *type = node->data_type; const struct hlsl_struct_field *field; -@@ -5365,20 +5945,20 @@ postfix_expr: +@@ -5365,20 +5950,20 @@ postfix_expr: } field_idx = field - type->e.record.fields; @@ -11220,7 +11294,7 @@ index fd1eaf6ec95..209428f761a 100644 $$ = $1; } else -@@ -5391,10 +5971,10 @@ postfix_expr: +@@ -5391,10 +5976,10 @@ postfix_expr: { struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); @@ -11233,7 +11307,7 @@ index fd1eaf6ec95..209428f761a 100644 { destroy_instr_list($1); YYABORT; -@@ -5412,7 +5992,7 @@ postfix_expr: +@@ -5412,7 +5997,7 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } @@ -11242,7 +11316,7 @@ index fd1eaf6ec95..209428f761a 100644 { struct vkd3d_string_buffer *string; -@@ -5432,7 +6012,7 @@ postfix_expr: +@@ -5432,7 +6017,7 @@ postfix_expr: YYABORT; } @@ -11251,7 +11325,7 @@ index fd1eaf6ec95..209428f761a 100644 { free_parse_initializer(&$4); YYABORT; -@@ -5459,7 +6039,7 @@ unary_expr: +@@ -5459,7 +6044,7 @@ unary_expr: postfix_expr | OP_INC unary_expr { @@ -11260,7 +11334,7 @@ index fd1eaf6ec95..209428f761a 100644 { destroy_instr_list($2); YYABORT; -@@ -5468,7 +6048,7 @@ unary_expr: +@@ -5468,7 +6053,7 @@ unary_expr: } | OP_DEC unary_expr { @@ -11269,7 +11343,7 @@ index fd1eaf6ec95..209428f761a 100644 { destroy_instr_list($2); YYABORT; -@@ -5545,31 +6125,31 @@ mul_expr: +@@ -5545,31 +6130,31 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { @@ -11307,7 +11381,7 @@ index fd1eaf6ec95..209428f761a 100644 } shift_expr: -@@ -5587,30 +6167,30 @@ relational_expr: +@@ -5587,30 +6172,30 @@ relational_expr: shift_expr | relational_expr '<' shift_expr { @@ -11344,7 +11418,7 @@ index fd1eaf6ec95..209428f761a 100644 } bitand_expr: -@@ -5652,7 +6232,26 @@ conditional_expr: +@@ -5652,7 +6237,26 @@ conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { @@ -11373,7 +11447,7 @@ index fd1eaf6ec95..209428f761a 100644 assignment_expr: diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index ab59875738c..4317604bdfa 100644 +index ab59875738c..765b1907426 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str @@ -12487,6 +12561,15 @@ index ab59875738c..4317604bdfa 100644 return progress; } +@@ -1379,7 +1649,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b + return progress; + } + +-static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) ++bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) + { + struct copy_propagation_state state; + bool progress; @@ -1471,7 +1741,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ static bool is_vec1(const struct hlsl_type *type) @@ -12803,7 +12886,7 @@ index ab59875738c..4317604bdfa 100644 + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) + return false; + list_add_before(&instr->entry, &frc->entry); -+ + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); @@ -12813,9 +12896,10 @@ index ab59875738c..4317604bdfa 100644 + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); -+ return true; -+} -+ + return true; + } + +-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_type *type = instr->data_type, *arg_type; @@ -12844,11 +12928,10 @@ index ab59875738c..4317604bdfa 100644 + + expr->op = HLSL_OP2_NEQUAL; + hlsl_src_from_node(&expr->operands[1], zero); - - return true; - } - --struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++ ++ return true; ++} ++ +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { @@ -12879,12 +12962,12 @@ index ab59875738c..4317604bdfa 100644 return NULL; - list_add_tail(&iff->else_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&else_block, store); -+ + +- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) + if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) + return NULL; + list_add_tail(instrs, &iff->entry); - -- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) ++ + if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; list_add_tail(instrs, &load->node.entry); @@ -13238,28 +13321,27 @@ index ab59875738c..4317604bdfa 100644 regset = hlsl_type_get_regset(var->data_type); - if (var->reg_reservation.type) -+ if (var->reg_reservation.reg_type) ++ if (var->reg_reservation.reg_type && var->regs[regset].bind_count) { - if (var->reg_reservation.type != get_regset_name(regset)) + if (var->reg_reservation.reg_type != get_regset_name(regset)) { struct vkd3d_string_buffer *type_string; -@@ -2277,8 +2713,11 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -2277,8 +2713,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[regset].allocated = true; - var->regs[regset].id = var->reg_reservation.index; - TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); + var->regs[regset].id = var->reg_reservation.reg_index; -+ var->regs[regset].bind_count = var->data_type->reg_size[regset]; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, + var->reg_reservation.reg_index + var->regs[regset].bind_count); } } } -@@ -2286,9 +2725,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -2286,9 +2724,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend @@ -13272,7 +13354,7 @@ index ab59875738c..4317604bdfa 100644 static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; -@@ -2296,7 +2735,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2296,7 +2734,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { @@ -13281,7 +13363,7 @@ index ab59875738c..4317604bdfa 100644 switch (instr->type) { -@@ -2311,9 +2750,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2311,9 +2749,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; @@ -13293,7 +13375,7 @@ index ab59875738c..4317604bdfa 100644 break; } case HLSL_IR_EXPR: -@@ -2322,16 +2761,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2322,16 +2760,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i; for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) @@ -13314,7 +13396,7 @@ index ab59875738c..4317604bdfa 100644 break; } case HLSL_IR_LOAD: -@@ -2339,9 +2778,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2339,9 +2777,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr); var = load->src.var; @@ -13326,7 +13408,7 @@ index ab59875738c..4317604bdfa 100644 break; } case HLSL_IR_LOOP: -@@ -2357,22 +2796,30 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2357,22 +2795,30 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); var = load->resource.var; @@ -13364,7 +13446,7 @@ index ab59875738c..4317604bdfa 100644 break; } case HLSL_IR_RESOURCE_STORE: -@@ -2380,18 +2827,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2380,18 +2826,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); var = store->resource.var; @@ -13396,7 +13478,7 @@ index ab59875738c..4317604bdfa 100644 break; } case HLSL_IR_CONSTANT: -@@ -2426,127 +2881,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl +@@ -2426,127 +2880,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl compute_liveness_recurse(&entry_func->body, 0, 0); } @@ -13601,7 +13683,7 @@ index ab59875738c..4317604bdfa 100644 } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -2565,14 +3035,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct +@@ -2565,14 +3034,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); } @@ -13703,7 +13785,7 @@ index ab59875738c..4317604bdfa 100644 var->first_write, var->last_read, var->data_type); TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', -@@ -2580,15 +3135,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2580,15 +3134,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir } } @@ -13726,7 +13808,7 @@ index ab59875738c..4317604bdfa 100644 instr->data_type); TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -@@ -2599,8 +3159,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -2599,8 +3158,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -13737,7 +13819,7 @@ index ab59875738c..4317604bdfa 100644 break; } -@@ -2609,21 +3169,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -2609,21 +3168,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ @@ -13762,7 +13844,7 @@ index ab59875738c..4317604bdfa 100644 break; } -@@ -2633,7 +3193,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -2633,7 +3192,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl } } @@ -13772,7 +13854,7 @@ index ab59875738c..4317604bdfa 100644 { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr; -@@ -2649,7 +3210,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2649,7 +3209,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b unsigned int x, y, i, writemask, end_reg; unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; @@ -13781,7 +13863,7 @@ index ab59875738c..4317604bdfa 100644 TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, -@@ -2662,7 +3223,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2662,7 +3222,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b defs->count = end_reg; } @@ -13790,7 +13872,7 @@ index ab59875738c..4317604bdfa 100644 if (!(writemask = constant->reg.writemask)) writemask = (1u << type->dimx) - 1; -@@ -2671,12 +3232,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2671,12 +3231,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b { for (x = 0, i = 0; x < 4; ++x) { @@ -13805,7 +13887,7 @@ index ab59875738c..4317604bdfa 100644 switch (type->base_type) { -@@ -2714,15 +3275,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2714,15 +3274,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -13824,7 +13906,7 @@ index ab59875738c..4317604bdfa 100644 break; } -@@ -2734,10 +3295,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2734,10 +3294,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { @@ -13837,7 +13919,7 @@ index ab59875738c..4317604bdfa 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { -@@ -2748,12 +3309,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -2748,12 +3308,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue; @@ -13853,7 +13935,7 @@ index ab59875738c..4317604bdfa 100644 } /* Simple greedy temporary register allocation pass that just assigns a unique -@@ -2762,15 +3325,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -2762,15 +3324,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * does not handle constants. */ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { @@ -13892,7 +13974,7 @@ index ab59875738c..4317604bdfa 100644 { [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", -@@ -2791,7 +3372,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -2791,7 +3371,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var D3DDECLUSAGE usage; uint32_t usage_idx; @@ -13906,7 +13988,7 @@ index ab59875738c..4317604bdfa 100644 { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); -@@ -2800,8 +3386,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -2800,8 +3385,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if ((!output && !var->last_read) || (output && !var->first_write)) return; @@ -13915,7 +13997,7 @@ index ab59875738c..4317604bdfa 100644 } else { -@@ -2827,6 +3411,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -2827,6 +3410,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; @@ -13923,7 +14005,7 @@ index ab59875738c..4317604bdfa 100644 var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -@@ -2853,23 +3438,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 +@@ -2853,23 +3437,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { @@ -14047,7 +14129,7 @@ index ab59875738c..4317604bdfa 100644 } static void allocate_buffers(struct hlsl_ctx *ctx) -@@ -2880,15 +3559,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -2880,15 +3558,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -14067,7 +14149,7 @@ index ab59875738c..4317604bdfa 100644 LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->used_size) -@@ -2896,28 +3577,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -2896,28 +3576,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (buffer->type == HLSL_BUFFER_CONSTANT) { @@ -14104,7 +14186,7 @@ index ab59875738c..4317604bdfa 100644 buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; -@@ -2939,13 +3622,17 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum +@@ -2939,13 +3621,29 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; @@ -14112,18 +14194,31 @@ index ab59875738c..4317604bdfa 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (!var->regs[regset].allocated) +- if (!var->regs[regset].allocated) ++ if (var->reg_reservation.reg_type == get_regset_name(regset) ++ && var->data_type->reg_size[regset]) ++ { ++ /* Vars with a reservation prevent non-reserved vars from being ++ * bound there even if the reserved vars aren't used. */ ++ start = var->reg_reservation.reg_index; ++ count = var->data_type->reg_size[regset]; ++ } ++ else if (var->regs[regset].allocated) ++ { ++ start = var->regs[regset].id; ++ count = var->regs[regset].bind_count; ++ } ++ else ++ { continue; ++ } - if (index == var->regs[regset].id) -+ start = var->regs[regset].id; -+ count = var->regs[regset].bind_count; -+ + if (start <= index && index < start + count) return var; } return NULL; -@@ -2956,7 +3643,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +@@ -2956,7 +3654,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; @@ -14131,7 +14226,7 @@ index ab59875738c..4317604bdfa 100644 if (regset == HLSL_REGSET_UAVS) { -@@ -2968,19 +3654,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +@@ -2968,19 +3665,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } } @@ -14156,7 +14251,7 @@ index ab59875738c..4317604bdfa 100644 if (var->regs[regset].id < min_index) { -@@ -2988,28 +3672,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +@@ -2988,28 +3683,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); @@ -14213,7 +14308,7 @@ index ab59875738c..4317604bdfa 100644 ++index; } } -@@ -3034,12 +3734,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl +@@ -3034,12 +3745,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return false; /* We should always have generated a cast to UINT. */ @@ -14229,7 +14324,7 @@ index ab59875738c..4317604bdfa 100644 { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) -@@ -3090,6 +3790,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl +@@ -3090,6 +3801,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; } @@ -14285,7 +14380,7 @@ index ab59875738c..4317604bdfa 100644 bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; -@@ -3102,13 +3851,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref +@@ -3102,13 +3862,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref } /* We should always have generated a cast to UINT. */ @@ -14301,7 +14396,7 @@ index ab59875738c..4317604bdfa 100644 size = deref->var->data_type->reg_size[deref->offset_regset]; if (*offset >= size) -@@ -3170,7 +3919,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a +@@ -3170,7 +3930,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a const struct hlsl_type *type = instr->data_type; const struct hlsl_ir_constant *constant; @@ -14310,7 +14405,7 @@ index ab59875738c..4317604bdfa 100644 || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) { struct vkd3d_string_buffer *string; -@@ -3190,15 +3939,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a +@@ -3190,15 +3950,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } constant = hlsl_ir_constant(instr); @@ -14348,8 +14443,12 @@ index ab59875738c..4317604bdfa 100644 int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { -@@ -3212,7 +3980,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - list_move_head(&body->instrs, &ctx->static_initializers); +@@ -3209,10 +3988,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + unsigned int i; + bool progress; + +- list_move_head(&body->instrs, &ctx->static_initializers); ++ list_move_head(&body->instrs, &ctx->static_initializers.instrs); memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); @@ -14357,7 +14456,7 @@ index ab59875738c..4317604bdfa 100644 vkd3d_free(recursive_call_ctx.backtrace); /* Avoid going into an infinite loop when processing call instructions. -@@ -3222,7 +3990,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -3222,7 +4001,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_return(ctx, entry_func, body, false); @@ -14368,7 +14467,7 @@ index ab59875738c..4317604bdfa 100644 LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { -@@ -3234,15 +4004,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -3234,15 +4015,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i]; @@ -14393,7 +14492,7 @@ index ab59875738c..4317604bdfa 100644 if (var->storage_modifiers & HLSL_STORAGE_IN) prepend_input_var_copy(ctx, &body->instrs, var); -@@ -3252,7 +4029,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -3252,7 +4040,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (entry_func->return_var) { @@ -14402,7 +14501,7 @@ index ab59875738c..4317604bdfa 100644 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); -@@ -3274,53 +4051,61 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -3274,60 +4062,71 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); @@ -14440,11 +14539,12 @@ index ab59875738c..4317604bdfa 100644 { - progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= copy_propagation_execute(ctx, body); +- progress |= copy_propagation_execute(ctx, body); - progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); } @@ -14488,12 +14588,12 @@ index ab59875738c..4317604bdfa 100644 compute_liveness(ctx, entry_func); -@@ -3328,6 +4113,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx); - allocate_register_reservations(ctx); -+ + calculate_resource_register_counts(ctx); ++ + allocate_register_reservations(ctx); + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) @@ -18807,10 +18907,10 @@ index 553a75818e7..00000000000 -} diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c new file mode 100644 -index 00000000000..d542fbb0d52 +index 00000000000..9eefb82c226 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -0,0 +1,1073 @@ +@@ -0,0 +1,1072 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * @@ -19528,8 +19628,7 @@ index 00000000000..d542fbb0d52 + } + } + element_count = new_count; -+ /* Signature 's' is a copy of the original signature struct, so we can replace -+ * the 'elements' pointer without freeing it. */ ++ vkd3d_free(s->elements); + s->elements = elements; + s->element_count = element_count; + @@ -19930,7 +20029,7 @@ index bb5a6b61de1..94079696280 100644 %% diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 53e13735937..cc0b63e8284 100644 +index 53e13735937..3542b5fac51 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -18,7 +18,7 @@ @@ -19951,7 +20050,7 @@ index 53e13735937..cc0b63e8284 100644 unsigned int index) { switch (sysval) -@@ -199,11 +199,6 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu +@@ -199,14 +199,9 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu } } @@ -19962,7 +20061,11 @@ index 53e13735937..cc0b63e8284 100644 - #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 - #define VKD3D_SPIRV_GENERATOR_VERSION 7 +-#define VKD3D_SPIRV_GENERATOR_VERSION 7 ++#define VKD3D_SPIRV_GENERATOR_VERSION 8 + #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) + + struct vkd3d_spirv_stream @@ -1967,11 +1962,9 @@ struct vkd3d_symbol_register_data uint32_t member_idx; enum vkd3d_shader_component_type component_type; @@ -20068,11 +20171,12 @@ index 53e13735937..cc0b63e8284 100644 } static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); -@@ -2304,13 +2307,33 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil +@@ -2304,13 +2307,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil return info && info->entry_point ? info->entry_point : "main"; } -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, +- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +static void spirv_compiler_destroy(struct spirv_compiler *compiler) +{ + vkd3d_free(compiler->control_flow_info); @@ -20090,11 +20194,15 @@ index 53e13735937..cc0b63e8284 100644 + + vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + ++ shader_signature_cleanup(&compiler->input_signature); ++ shader_signature_cleanup(&compiler->output_signature); ++ shader_signature_cleanup(&compiler->patch_constant_signature); ++ + vkd3d_free(compiler); +} + +static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { @@ -20105,7 +20213,7 @@ index 53e13735937..cc0b63e8284 100644 const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; -@@ -2402,9 +2425,9 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * +@@ -2402,9 +2429,12 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * compiler->shader_type = shader_version->type; @@ -20115,10 +20223,13 @@ index 53e13735937..cc0b63e8284 100644 + compiler->input_signature = shader_desc->input_signature; + compiler->output_signature = shader_desc->output_signature; + compiler->patch_constant_signature = shader_desc->patch_constant_signature; ++ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); ++ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); ++ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { -@@ -2437,6 +2460,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * +@@ -2437,6 +2467,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * compiler->scan_descriptor_info = scan_descriptor_info; @@ -20127,7 +20238,7 @@ index 53e13735937..cc0b63e8284 100644 vkd3d_string_buffer_cache_init(&compiler->string_buffers); spirv_compiler_emit_initial_declarations(compiler); -@@ -2857,7 +2882,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s +@@ -2857,7 +2889,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s { unsigned int idx; @@ -20136,7 +20247,7 @@ index 53e13735937..cc0b63e8284 100644 switch (reg->type) { case VKD3DSPR_RESOURCE: -@@ -2887,12 +2912,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s +@@ -2887,12 +2919,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s case VKD3DSPR_DEPTHOUTLE: snprintf(buffer, buffer_size, "oDepth"); break; @@ -20149,7 +20260,7 @@ index 53e13735937..cc0b63e8284 100644 case VKD3DSPR_GSINSTID: snprintf(buffer, buffer_size, "vGSInstanceID"); break; -@@ -2965,18 +2984,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, +@@ -2965,18 +2991,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, @@ -20180,7 +20291,7 @@ index 53e13735937..cc0b63e8284 100644 ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } -@@ -3169,7 +3196,6 @@ struct vkd3d_shader_register_info +@@ -3169,7 +3203,6 @@ struct vkd3d_shader_register_info unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; @@ -20188,7 +20299,7 @@ index 53e13735937..cc0b63e8284 100644 }; static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, -@@ -3192,7 +3218,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil +@@ -3192,7 +3225,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = 0; register_info->binding_base_idx = 0; register_info->is_aggregate = false; @@ -20196,7 +20307,7 @@ index 53e13735937..cc0b63e8284 100644 return true; } -@@ -3214,7 +3239,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil +@@ -3214,7 +3246,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = symbol->info.reg.structure_stride; register_info->binding_base_idx = symbol->info.reg.binding_base_idx; register_info->is_aggregate = symbol->info.reg.is_aggregate; @@ -20204,7 +20315,7 @@ index 53e13735937..cc0b63e8284 100644 return true; } -@@ -3344,41 +3368,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp +@@ -3344,41 +3375,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp } else if (register_info->is_aggregate) { @@ -20256,7 +20367,7 @@ index 53e13735937..cc0b63e8284 100644 indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); } -@@ -4249,35 +4254,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp +@@ -4249,35 +4261,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp if ((builtin = get_spirv_builtin_for_register(reg_type))) return builtin; @@ -20294,7 +20405,7 @@ index 53e13735937..cc0b63e8284 100644 static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) { struct vkd3d_shader_register r; -@@ -4288,6 +4270,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler +@@ -4288,6 +4277,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler r.type = VKD3DSPR_OUTPOINTID; r.idx[0].offset = ~0u; r.idx[1].offset = ~0u; @@ -20302,7 +20413,7 @@ index 53e13735937..cc0b63e8284 100644 return spirv_compiler_get_register_id(compiler, &r); } -@@ -4302,7 +4285,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co +@@ -4302,7 +4292,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co } static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, @@ -20311,7 +20422,7 @@ index 53e13735937..cc0b63e8284 100644 { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const char *name; -@@ -4310,7 +4293,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile +@@ -4310,7 +4300,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile if (!suffix) suffix = ""; @@ -20320,7 +20431,7 @@ index 53e13735937..cc0b63e8284 100644 { case VKD3DSIH_HS_CONTROL_POINT_PHASE: name = "control"; -@@ -4322,62 +4305,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile +@@ -4322,62 +4312,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile name = "join"; break; default: @@ -20388,7 +20499,7 @@ index 53e13735937..cc0b63e8284 100644 { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; const struct vkd3d_shader_transform_feedback_element *xfb_element; -@@ -4436,17 +4380,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, +@@ -4436,17 +4387,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); } @@ -20416,7 +20527,7 @@ index 53e13735937..cc0b63e8284 100644 vkd3d_spirv_add_iface_variable(builder, id); spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin); -@@ -4458,54 +4406,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp +@@ -4458,54 +4413,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp return id; } @@ -20500,7 +20611,7 @@ index 53e13735937..cc0b63e8284 100644 } static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, -@@ -4513,48 +4452,32 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4513,50 +4459,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_interpolation_mode interpolation_mode) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -20557,9 +20668,13 @@ index 53e13735937..cc0b63e8284 100644 + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx]; - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) +- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) ++ if ((compiler->shader_type == VKD3D_SHADER_TYPE_HULL || compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) ++ && !sysval && signature_element->sysval_semantic) sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); -@@ -4576,12 +4499,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + + builtin = get_spirv_builtin_for_sysval(compiler, sysval); +@@ -4576,12 +4507,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); } @@ -20579,7 +20694,7 @@ index 53e13735937..cc0b63e8284 100644 storage_class = SpvStorageClassInput; -@@ -4589,111 +4516,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4589,111 +4524,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) { @@ -20732,7 +20847,7 @@ index 53e13735937..cc0b63e8284 100644 { ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, i); -@@ -4708,7 +4592,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4708,7 +4600,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); @@ -20741,7 +20856,7 @@ index 53e13735937..cc0b63e8284 100644 } val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone); -@@ -4743,9 +4627,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, +@@ -4743,9 +4635,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t input_id; @@ -20753,7 +20868,7 @@ index 53e13735937..cc0b63e8284 100644 if (!(builtin = get_spirv_builtin_for_register(reg->type))) { -@@ -4763,19 +4646,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, +@@ -4763,19 +4654,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, input_id, SpvStorageClassInput, builtin->component_type, write_mask); @@ -20774,7 +20889,7 @@ index 53e13735937..cc0b63e8284 100644 switch (reg->type) { -@@ -4787,10 +4666,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil +@@ -4787,10 +4674,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil case VKD3DSPR_PRIMID: spirv_compiler_emit_input_register(compiler, dst); return; @@ -20785,7 +20900,7 @@ index 53e13735937..cc0b63e8284 100644 case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ return; -@@ -4798,22 +4673,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil +@@ -4798,22 +4681,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil FIXME("Unhandled shader phase input register %#x.\n", reg->type); return; } @@ -20808,7 +20923,7 @@ index 53e13735937..cc0b63e8284 100644 } static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, -@@ -4835,8 +4694,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) +@@ -4835,8 +4702,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; } @@ -20818,7 +20933,7 @@ index 53e13735937..cc0b63e8284 100644 { if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) { -@@ -4847,38 +4705,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa +@@ -4847,38 +4713,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); } @@ -20858,7 +20973,7 @@ index 53e13735937..cc0b63e8284 100644 uint32_t clip_distance_mask = 0, clip_distance_id = 0; uint32_t cull_distance_mask = 0, cull_distance_id = 0; const struct vkd3d_spirv_builtin *builtin; -@@ -4886,7 +4716,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * +@@ -4886,7 +4724,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * for (i = 0; i < output_signature->element_count; ++i) { @@ -20867,7 +20982,7 @@ index 53e13735937..cc0b63e8284 100644 switch (e->sysval_semantic) { -@@ -4921,7 +4751,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * +@@ -4921,7 +4759,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * for (i = 0; i < output_signature->element_count; ++i) { @@ -20876,7 +20991,7 @@ index 53e13735937..cc0b63e8284 100644 switch (e->sysval_semantic) { -@@ -4953,9 +4783,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4953,9 +4791,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t output_id; @@ -20888,7 +21003,7 @@ index 53e13735937..cc0b63e8284 100644 if (!(builtin = get_spirv_builtin_for_register(reg->type))) { -@@ -4969,7 +4798,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4969,7 +4806,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, output_id, SpvStorageClassOutput, builtin->component_type, write_mask); @@ -20896,7 +21011,7 @@ index 53e13735937..cc0b63e8284 100644 reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_execution_mode(compiler, reg); -@@ -4977,7 +4805,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4977,7 +4813,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, } static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, @@ -20905,7 +21020,7 @@ index 53e13735937..cc0b63e8284 100644 { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t *variable_id, id; -@@ -4993,7 +4821,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c +@@ -4993,7 +4829,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c return *variable_id; id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); @@ -20914,7 +21029,7 @@ index 53e13735937..cc0b63e8284 100644 vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); if (variable_id) -@@ -5005,44 +4833,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5005,44 +4841,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -20943,13 +21058,13 @@ index 53e13735937..cc0b63e8284 100644 - phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); -- -- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + is_patch_constant = is_in_fork_or_join_phase(compiler); -- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; +- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; +- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; +- - if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - &signature_idx, reg->idx[0].offset, dst->write_mask))) - { @@ -20970,7 +21085,7 @@ index 53e13735937..cc0b63e8284 100644 if (builtin) { component_type = builtin->component_type; -@@ -5058,128 +4876,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5058,128 +4884,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class = SpvStorageClassOutput; if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE @@ -21163,7 +21278,7 @@ index 53e13735937..cc0b63e8284 100644 { enum vkd3d_shader_input_sysval_semantic sysval; const struct vkd3d_spirv_builtin *builtin; -@@ -5198,14 +4991,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com +@@ -5198,14 +4999,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com } static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, @@ -21180,7 +21295,7 @@ index 53e13735937..cc0b63e8284 100644 unsigned int i, index, array_idx; uint32_t output_id; -@@ -5224,6 +5017,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi +@@ -5224,6 +5025,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi use_mask |= element->used_mask; } } @@ -21190,7 +21305,7 @@ index 53e13735937..cc0b63e8284 100644 write_mask &= dst_write_mask; if (!write_mask) -@@ -5294,22 +5090,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * +@@ -5294,22 +5098,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -21216,7 +21331,7 @@ index 53e13735937..cc0b63e8284 100644 function_id = compiler->epilogue_function_id; -@@ -5340,7 +5133,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * +@@ -5340,7 +5141,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); } @@ -21225,7 +21340,7 @@ index 53e13735937..cc0b63e8284 100644 output_index_id = spirv_compiler_emit_load_invocation_id(compiler); for (i = 0; i < signature->element_count; ++i) -@@ -5348,14 +5141,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * +@@ -5348,14 +5149,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * if (!compiler->output_info[i].id) continue; @@ -21242,7 +21357,7 @@ index 53e13735937..cc0b63e8284 100644 } vkd3d_spirv_build_op_return(&compiler->spirv_builder); -@@ -5375,28 +5166,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp +@@ -5375,28 +5174,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp dst.reg.type = VKD3DSPR_OUTPOINTID; dst.reg.idx[0].offset = ~0u; dst.reg.idx[1].offset = ~0u; @@ -21272,7 +21387,7 @@ index 53e13735937..cc0b63e8284 100644 static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; -@@ -5410,7 +5184,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp +@@ -5410,7 +5192,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); spirv_compiler_emit_hull_shader_builtins(compiler); @@ -21280,7 +21395,7 @@ index 53e13735937..cc0b63e8284 100644 break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); -@@ -5439,8 +5212,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp +@@ -5439,8 +5220,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) { vkd3d_spirv_builder_begin_main_function(builder); @@ -21290,7 +21405,7 @@ index 53e13735937..cc0b63e8284 100644 } } -@@ -5522,12 +5294,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil +@@ -5522,12 +5302,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil reg.type = VKD3DSPR_IDXTEMP; reg.idx[0].offset = temp->register_idx; reg.idx[1].offset = ~0u; @@ -21305,7 +21420,7 @@ index 53e13735937..cc0b63e8284 100644 spirv_compiler_emit_register_debug_name(builder, id, ®); -@@ -6097,6 +5870,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, +@@ -6097,6 +5878,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; @@ -21313,7 +21428,7 @@ index 53e13735937..cc0b63e8284 100644 uint32_t flags = instruction->flags; /* We don't distinguish between APPEND and COUNTER UAVs. */ -@@ -6104,8 +5878,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, +@@ -6104,8 +5886,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, if (flags) FIXME("Unhandled UAV flags %#x.\n", flags); @@ -21328,7 +21443,7 @@ index 53e13735937..cc0b63e8284 100644 } static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, -@@ -6185,10 +5964,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, +@@ -6185,10 +5972,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; @@ -21341,7 +21456,7 @@ index 53e13735937..cc0b63e8284 100644 else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); else -@@ -6224,7 +6002,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, +@@ -6224,7 +6010,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; @@ -21351,7 +21466,7 @@ index 53e13735937..cc0b63e8284 100644 spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); else spirv_compiler_emit_output_register(compiler, dst); -@@ -6242,64 +6021,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, +@@ -6242,64 +6029,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, spirv_compiler_emit_output(compiler, dst, sysval); } @@ -21416,7 +21531,7 @@ index 53e13735937..cc0b63e8284 100644 static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { -@@ -6495,157 +6216,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler +@@ -6495,157 +6224,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); } @@ -21606,7 +21721,7 @@ index 53e13735937..cc0b63e8284 100644 uint32_t invocation_id; unsigned int i; -@@ -6657,6 +6304,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile +@@ -6657,6 +6312,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile invocation.reg.idx[0].offset = ~0u; invocation.reg.idx[1].offset = ~0u; invocation.reg.idx[2].offset = ~0u; @@ -21614,7 +21729,7 @@ index 53e13735937..cc0b63e8284 100644 invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; memset(&input_reg, 0, sizeof(input_reg)); -@@ -6664,37 +6312,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile +@@ -6664,37 +6320,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile input_reg.data_type = VKD3D_DATA_FLOAT; input_reg.idx[0].rel_addr = &invocation; input_reg.idx[2].offset = ~0u; @@ -21667,7 +21782,7 @@ index 53e13735937..cc0b63e8284 100644 } static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, -@@ -6723,95 +6376,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi +@@ -6723,95 +6384,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); } @@ -21763,7 +21878,7 @@ index 53e13735937..cc0b63e8284 100644 static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -@@ -6854,46 +6418,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler +@@ -6854,46 +6426,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -21815,7 +21930,7 @@ index 53e13735937..cc0b63e8284 100644 spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); vkd3d_spirv_build_op_function_end(builder); -@@ -7575,10 +7114,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co +@@ -7575,10 +7122,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -21828,7 +21943,7 @@ index 53e13735937..cc0b63e8284 100644 spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); -@@ -7972,12 +7511,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c +@@ -7972,12 +7519,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c if (cf_info) cf_info->inside_block = false; @@ -21844,7 +21959,7 @@ index 53e13735937..cc0b63e8284 100644 case VKD3DSIH_TEXKILL: spirv_compiler_emit_kill(compiler, instruction); break; -@@ -8256,7 +7798,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, +@@ -8256,7 +7806,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, instruction, image.resource_type_info); } @@ -21853,7 +21968,7 @@ index 53e13735937..cc0b63e8284 100644 { operands_mask |= SpvImageOperandsSampleMask; image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -@@ -9521,58 +9063,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, +@@ -9521,58 +9071,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); } @@ -21912,7 +22027,7 @@ index 53e13735937..cc0b63e8284 100644 /* This function is called after declarations are processed. */ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) { -@@ -9581,8 +9071,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) +@@ -9581,8 +9079,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->xfb_info && compiler->xfb_info->element_count && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) spirv_compiler_emit_point_size(compiler); @@ -21921,7 +22036,7 @@ index 53e13735937..cc0b63e8284 100644 } static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) -@@ -9660,9 +9148,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9660,9 +9156,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_SIV: spirv_compiler_emit_dcl_output_siv(compiler, instruction); break; @@ -21931,7 +22046,7 @@ index 53e13735937..cc0b63e8284 100644 case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; -@@ -9699,10 +9184,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9699,10 +9192,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_THREAD_GROUP: spirv_compiler_emit_dcl_thread_group(compiler, instruction); break; @@ -21942,7 +22057,7 @@ index 53e13735937..cc0b63e8284 100644 case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: -@@ -9826,6 +9307,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9826,6 +9315,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CONTINUE: case VKD3DSIH_CONTINUEP: case VKD3DSIH_DEFAULT: @@ -21950,7 +22065,7 @@ index 53e13735937..cc0b63e8284 100644 case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: case VKD3DSIH_ENDLOOP: -@@ -9947,28 +9429,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9947,28 +9437,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; } @@ -21979,7 +22094,7 @@ index 53e13735937..cc0b63e8284 100644 + && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) + { + result = instruction_array_normalise_hull_shader_control_point_io(&instructions, -+ &parser->shader_desc.input_signature); ++ &compiler->input_signature); + } + if (result >= 0) + result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, @@ -22014,7 +22129,7 @@ index 53e13735937..cc0b63e8284 100644 else vkd3d_spirv_build_op_function_end(builder); -@@ -10023,23 +9532,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -10023,23 +9540,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, return VKD3D_OK; } @@ -30054,6 +30169,28 @@ index 8c050cfeb32..ea7b6859cc1 100644 } else { +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index c964ea8fe3a..5e46b467252 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -1958,7 +1958,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, + + const struct vkd3d_shader_compile_option options[] = + { +- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, + {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, + {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, + }; +@@ -2011,7 +2011,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER + + const struct vkd3d_shader_compile_option options[] = + { +- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, + {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, + }; + diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 77b795d6278..b0150754434 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch similarity index 61% rename from patches/vkd3d-latest/0005-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch rename to patches/vkd3d-latest/0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch index cd429a8c..5a460b60 100644 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch @@ -1,33 +1,54 @@ -From c622a898f3be5415056811aa6c4662ce4e9c43d0 Mon Sep 17 00:00:00 2001 +From 7bc54927391a36890539a83febc49679486a8bc9 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes -Date: Fri, 7 Jul 2023 09:53:08 +1000 -Subject: [PATCH] Updated vkd3d to 2a3413e0f01524f2068bce12100906eb2200c965. +Date: Wed, 28 Jun 2023 16:27:03 +1000 +Subject: [PATCH 2/2] Updated vkd3d to + 2a3413e0f01524f2068bce12100906eb2200c965. --- + include/d3d12.idl | 4 +- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 2 + .../include/private/vkd3d_shader_utils.h | 63 + - libs/vkd3d/include/vkd3d_shader.h | 5 + + libs/vkd3d/include/vkd3d.h | 35 + + libs/vkd3d/include/vkd3d_shader.h | 116 +- libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 48 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 28 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 311 ++++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 16 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 919 ++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 36 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 10 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1112 ++++++++++------- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 117 +- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 65 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 919 +++++++++++++ + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 59 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 19 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1196 ++++++++++------- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 189 ++- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 363 +++-- libs/vkd3d/libs/vkd3d-shader/spirv.c | 21 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 23 + - .../libs/vkd3d-shader/vkd3d_shader_main.c | 48 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 17 + - libs/vkd3d/libs/vkd3d/command.c | 38 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 37 +- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 164 ++- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 20 + + libs/vkd3d/libs/vkd3d/command.c | 222 ++- + libs/vkd3d/libs/vkd3d/device.c | 2 + + libs/vkd3d/libs/vkd3d/resource.c | 51 +- libs/vkd3d/libs/vkd3d/state.c | 10 +- - 20 files changed, 1976 insertions(+), 620 deletions(-) + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 47 +- + 25 files changed, 3143 insertions(+), 789 deletions(-) create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c +diff --git a/include/d3d12.idl b/include/d3d12.idl +index 4fec32d2656..c6064939e1f 100644 +--- a/include/d3d12.idl ++++ b/include/d3d12.idl +@@ -2243,8 +2243,8 @@ interface ID3D12CommandQueue : ID3D12Pageable + ID3D12Heap *heap, + UINT range_count, + const D3D12_TILE_RANGE_FLAGS *range_flags, +- UINT *heap_range_offsets, +- UINT *range_tile_counts, ++ const UINT *heap_range_offsets, ++ const UINT *range_tile_counts, + D3D12_TILE_MAPPING_FLAGS flags); + + void CopyTileMappings(ID3D12Resource *dst_resource, diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 1ba0e9f71e1..f647af11d07 100644 --- a/libs/vkd3d/Makefile.in @@ -129,11 +150,70 @@ index 00000000000..00052a89988 +} + +#endif /* __VKD3D_SHADER_UTILS_H */ +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index 72ed3ced671..2ccda47248a 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); + VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); + + VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); ++ ++/** ++ * Acquire the Vulkan queue backing a command queue. ++ * ++ * While a queue is acquired by the client, it is locked so that ++ * neither the vkd3d library nor other threads can submit work to ++ * it. For that reason it should be released as soon as possible with ++ * vkd3d_release_vk_queue(). The lock is not reentrant, so the same ++ * queue must not be acquired more than once by the same thread. ++ * ++ * Work submitted through the Direct3D 12 API exposed by vkd3d is not ++ * always immediately submitted to the Vulkan queue; sometimes it is ++ * kept in another internal queue, which might not necessarily be ++ * empty at the time vkd3d_acquire_vk_queue() is called. For this ++ * reason, work submitted directly to the Vulkan queue might appear to ++ * the Vulkan driver as being submitted before other work submitted ++ * though the Direct3D 12 API. If this is not desired, it is ++ * recommended to synchronize work submission using an ID3D12Fence ++ * object, by submitting to the queue a signal operation after all the ++ * Direct3D 12 work is submitted and waiting for it before calling ++ * vkd3d_acquire_vk_queue(). ++ * ++ * \since 1.0 ++ */ + VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); ++ ++/** ++ * Release the Vulkan queue backing a command queue. ++ * ++ * This must be paired to an earlier corresponding ++ * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan ++ * queue returned by vkd3d_acquire_vk_queue() must not be used any ++ * more. ++ * ++ * \since 1.0 ++ */ + VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); + + VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 5a10dbe087b..6c17a07b9d2 100644 +index 274241546ea..6c17a07b9d2 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -616,6 +616,11 @@ enum vkd3d_shader_source_type +@@ -85,6 +85,11 @@ enum vkd3d_shader_structure_type + * \since 1.3 + */ + VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, ++ /** ++ * The structure is a vkd3d_shader_scan_signature_info structure. ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -611,6 +616,11 @@ enum vkd3d_shader_source_type * model 1, 2, and 3 shaders. \since 1.3 */ VKD3D_SHADER_SOURCE_D3D_BYTECODE, @@ -145,6 +225,173 @@ index 5a10dbe087b..6c17a07b9d2 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), }; +@@ -620,7 +630,7 @@ enum vkd3d_shader_target_type + { + /** + * The shader has no type or is to be ignored. This is not a valid value +- * for vkd3d_shader_compile() or vkd3d_shader_scan(). ++ * for vkd3d_shader_compile(). + */ + VKD3D_SHADER_TARGET_NONE, + /** +@@ -1551,6 +1561,64 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com + | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); + } + ++/** ++ * A chained structure containing descriptions of shader inputs and outputs. ++ * ++ * This structure is currently implemented only for DXBC and legacy D3D bytecode ++ * source types. ++ * For DXBC shaders, the returned information is parsed directly from the ++ * signatures embedded in the DXBC shader. ++ * For legacy D3D shaders, the returned information is synthesized based on ++ * registers declared or used by shader instructions. ++ * For all other shader types, the structure is zeroed. ++ * ++ * All members (except for \ref type and \ref next) are output-only. ++ * ++ * This structure is passed to vkd3d_shader_scan() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * Members of this structure are allocated by vkd3d-shader and should be freed ++ * with vkd3d_shader_free_scan_signature_info() when no longer needed. ++ * ++ * All signatures may contain pointers into the input shader, and should only ++ * be accessed while the input shader remains valid. ++ * ++ * Signature elements are synthesized from legacy Direct3D bytecode as follows: ++ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an ++ * uppercase string corresponding to the HLSL name for the usage, e.g. ++ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. ++ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the ++ * usage index. ++ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. ++ * ++ * Signature elements are synthesized for any input or output register declared ++ * or used in a legacy Direct3D bytecode shader, including the following: ++ * - Shader model 1 and 2 colour and texture coordinate registers. ++ * - The shader model 1 pixel shader output register. ++ * - Shader model 1 and 2 vertex shader output registers (position, fog, and ++ * point size). ++ * - Shader model 3 pixel shader system value input registers (pixel position ++ * and face). ++ * ++ * \since 1.9 ++ */ ++struct vkd3d_shader_scan_signature_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** The shader input varyings. */ ++ struct vkd3d_shader_signature input; ++ ++ /** The shader output varyings. */ ++ struct vkd3d_shader_signature output; ++ ++ /** The shader patch constant varyings. */ ++ struct vkd3d_shader_signature patch_constant; ++}; ++ + #ifdef LIBVKD3D_SHADER_SOURCE + # define VKD3D_SHADER_API VKD3D_EXPORT + #else +@@ -1625,6 +1693,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported + * following chained structures: + * - vkd3d_shader_interface_info + * - vkd3d_shader_scan_descriptor_info ++ * - vkd3d_shader_scan_signature_info + * - vkd3d_shader_spirv_domain_shader_target_info + * - vkd3d_shader_spirv_target_info + * - vkd3d_shader_transform_feedback_info +@@ -1784,6 +1853,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver + * Parse shader source code or byte code, returning various types of requested + * information. + * ++ * The \a source_type member of \a compile_info must be set to the type of the ++ * shader. ++ * ++ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which ++ * case vkd3d_shader_scan() will return information about the shader in ++ * isolation. Alternatively, it may be set to a valid compilation target for the ++ * shader, in which case vkd3d_shader_scan() will return information that ++ * reflects the interface for a shader as it will be compiled to that target. ++ * In this case other chained structures may be appended to \a compile_info as ++ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, ++ * such as vkd3d_shader_spirv_target_info. ++ * ++ * (For a hypothetical example, suppose the source shader distinguishes float ++ * and integer texture data, but the target environment does not support integer ++ * textures. In this case vkd3d_shader_compile() might translate integer ++ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would ++ * accurately report whether the texture expects integer or float data, but ++ * using the relevant specific target type would report ++ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) ++ * + * Currently this function supports the following code types: + * - VKD3D_SHADER_SOURCE_DXBC_TPF + * +@@ -1791,6 +1880,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver + * \n + * The DXBC_TPF scanner supports the following chained structures: + * - vkd3d_shader_scan_descriptor_info ++ * - vkd3d_shader_scan_signature_info + * \n + * Although the \a compile_info parameter is read-only, chained structures + * passed to this function need not be, and may serve as output parameters, +@@ -1827,12 +1917,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); + + /** +- * Read the input signature of a compiled shader, returning a structural ++ * Read the input signature of a compiled DXBC shader, returning a structural + * description which can be easily parsed by C code. + * + * This function parses a compiled shader. To parse a standalone root signature, + * use vkd3d_shader_parse_root_signature(). + * ++ * This function only parses DXBC shaders, and only retrieves the input ++ * signature. To retrieve signatures from other shader types, or other signature ++ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. ++ * This function returns the same input signature that is returned in ++ * struct vkd3d_shader_scan_signature_info. ++ * + * \param dxbc Compiled byte code, in DXBC format. + * + * \param signature Output location in which the parsed root signature will be +@@ -2022,6 +2118,19 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb + VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, + const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); + ++/** ++ * Free members of struct vkd3d_shader_scan_signature_info allocated by ++ * vkd3d_shader_scan(). ++ * ++ * This function may free members of vkd3d_shader_scan_signature_info, but ++ * does not free the structure itself. ++ * ++ * \param info Scan information to free. ++ * ++ * \since 1.9 ++ */ ++VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); ++ + #endif /* VKD3D_SHADER_NO_PROTOTYPES */ + + /** Type of vkd3d_shader_get_version(). */ +@@ -2087,6 +2196,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, + typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, + const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); + ++/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ ++typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); ++ + #ifdef __cplusplus + } + #endif /* __cplusplus */ diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index b363efbd360..aa7df5bd764 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c @@ -255,7 +502,7 @@ index 0a821b5c878..d72402eb250 100644 const char *name; int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index d2a4666a50a..369112ce18d 100644 +index 712613ac13b..369112ce18d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -260,9 +260,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = @@ -282,10 +529,249 @@ index d2a4666a50a..369112ce18d 100644 /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, -@@ -729,6 +729,16 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * - semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); +@@ -490,6 +490,255 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader + dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; } ++static struct signature_element *find_signature_element(const struct shader_signature *signature, ++ const char *semantic_name, unsigned int semantic_index) ++{ ++ struct signature_element *e = signature->elements; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) ++ && e[i].semantic_index == semantic_index) ++ return &e[i]; ++ } ++ ++ return NULL; ++} ++ ++static struct signature_element *find_signature_element_by_register_index( ++ const struct shader_signature *signature, unsigned int register_index) ++{ ++ struct signature_element *e = signature->elements; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ if (e[i].register_index == register_index) ++ return &e[i]; ++ } ++ ++ return NULL; ++} ++ ++static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, ++ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, ++ unsigned int register_index, bool is_dcl, unsigned int mask) ++{ ++ struct shader_signature *signature; ++ struct signature_element *element; ++ ++ if (output) ++ signature = &sm1->p.shader_desc.output_signature; ++ else ++ signature = &sm1->p.shader_desc.input_signature; ++ ++ if ((element = find_signature_element(signature, name, index))) ++ { ++ element->mask |= mask; ++ if (!is_dcl) ++ element->used_mask |= mask; ++ return true; ++ } ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ return false; ++ element = &signature->elements[signature->element_count++]; ++ ++ element->semantic_name = name; ++ element->semantic_index = index; ++ element->stream_index = 0; ++ element->sysval_semantic = sysval; ++ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->register_index = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = is_dcl ? 0 : mask; ++ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; ++ ++ return true; ++} ++ ++static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, ++ unsigned int register_index, unsigned int mask) ++{ ++ struct shader_signature *signature; ++ struct signature_element *element; ++ ++ if (output) ++ signature = &sm1->p.shader_desc.output_signature; ++ else ++ signature = &sm1->p.shader_desc.input_signature; ++ ++ if (!(element = find_signature_element_by_register_index(signature, register_index))) ++ { ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, ++ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); ++ return; ++ } ++ ++ element->used_mask |= mask; ++} ++ ++static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) ++{ ++ unsigned int register_index = reg->idx[0].offset; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL ++ && sm1->p.shader_version.major == 1 && !register_index) ++ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); ++ return true; ++ ++ case VKD3DSPR_INPUT: ++ /* For vertex shaders or sm3 pixel shaders, we should have already ++ * had a DCL instruction. Otherwise, this is a colour input. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) ++ { ++ add_signature_mask(sm1, false, register_index, mask); ++ return true; ++ } ++ return add_signature_element(sm1, false, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_TEXTURE: ++ /* For vertex shaders, this is ADDR. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ return true; ++ return add_signature_element(sm1, false, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_OUTPUT: ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ { ++ /* For sm < 2 vertex shaders, this is TEXCRDOUT. ++ * ++ * For sm3 vertex shaders, this is OUTPUT, but we already ++ * should have had a DCL instruction. */ ++ if (sm1->p.shader_version.major == 3) ++ { ++ add_signature_mask(sm1, true, register_index, mask); ++ return true; ++ } ++ return add_signature_element(sm1, true, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ } ++ /* fall through */ ++ ++ case VKD3DSPR_ATTROUT: ++ case VKD3DSPR_COLOROUT: ++ return add_signature_element(sm1, true, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_DEPTHOUT: ++ return add_signature_element(sm1, true, "DEPTH", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ case VKD3DSPR_RASTOUT: ++ switch (register_index) ++ { ++ case 0: ++ return add_signature_element(sm1, true, "POSITION", 0, ++ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); ++ ++ case 1: ++ return add_signature_element(sm1, true, "FOG", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ case 2: ++ return add_signature_element(sm1, true, "PSIZE", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ default: ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, ++ "Invalid rasterizer output index %u.", register_index); ++ return true; ++ } ++ ++ case VKD3DSPR_MISCTYPE: ++ switch (register_index) ++ { ++ case 0: ++ return add_signature_element(sm1, false, "VPOS", 0, ++ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); ++ ++ case 1: ++ return add_signature_element(sm1, false, "VFACE", 0, ++ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); ++ ++ default: ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, ++ "Invalid miscellaneous fragment input index %u.", register_index); ++ return true; ++ } ++ ++ default: ++ return true; ++ } ++} ++ ++static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_semantic *semantic) ++{ ++ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ unsigned int mask = semantic->resource.reg.write_mask; ++ bool output; ++ ++ static const char sm1_semantic_names[][13] = ++ { ++ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", ++ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", ++ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", ++ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", ++ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", ++ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", ++ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", ++ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", ++ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", ++ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", ++ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", ++ [VKD3D_DECL_USAGE_FOG ] = "FOG", ++ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", ++ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", ++ }; ++ ++ if (reg->type == VKD3DSPR_OUTPUT) ++ output = true; ++ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) ++ output = false; ++ else /* vpos and vface don't have a semantic. */ ++ return add_signature_element_from_register(sm1, reg, true, mask); ++ ++ /* sm2 pixel shaders use DCL but don't provide a semantic. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) ++ return add_signature_element_from_register(sm1, reg, true, mask); ++ ++ /* With the exception of vertex POSITION output, none of these are system ++ * values. Pixel POSITION input is not equivalent to SV_Position; the closer ++ * equivalent is VPOS, which is not declared as a semantic. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ ++ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], ++ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); ++} ++ +static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, unsigned int mask) +{ + uint32_t register_index = reg->idx[0].offset; @@ -299,26 +785,53 @@ index d2a4666a50a..369112ce18d 100644 /* Read a parameter token from the input stream, and possibly a relative * addressing token. */ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, -@@ -1083,7 +1093,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str +@@ -640,6 +889,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = semantic->resource.reg.reg.idx[0].offset; ++ ++ add_signature_element_from_semantic(sm1, semantic); + } + + static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, +@@ -744,6 +995,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, + } + } + ++static unsigned int mask_from_swizzle(unsigned int swizzle) ++{ ++ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); ++} ++ + static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) + { + struct vkd3d_shader_src_param *src_params, *predicate; +@@ -832,7 +1091,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + { + /* Destination token */ if (ins->dst_count) - { ++ { shader_sm1_read_dst_param(sm1, &p, dst_param); -- add_signature_element_from_register(sm1, &dst_param->reg, false, dst_param->write_mask); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask); - } ++ } /* Predication token */ -@@ -1094,8 +1104,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + if (ins->predicate) +@@ -840,7 +1102,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + + /* Other source tokens */ for (i = 0; i < ins->src_count; ++i) - { ++ { shader_sm1_read_src_param(sm1, &p, &src_params[i]); -- add_signature_element_from_register(sm1, &src_params[i].reg, -- false, mask_from_swizzle(src_params[i].swizzle)); + shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle)); - } ++ } } -@@ -1809,12 +1818,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + if (sm1->abort) +@@ -1553,12 +1818,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ for (i = 0; i < ctx->constant_defs.count; ++i) { @@ -333,7 +846,7 @@ index d2a4666a50a..369112ce18d 100644 }; if (ctx->profile->major_version > 1) -@@ -1823,7 +1833,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ +@@ -1567,7 +1833,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ write_sm1_dst_register(buffer, ®); for (x = 0; x < 4; ++x) @@ -342,6 +855,53 @@ index d2a4666a50a..369112ce18d 100644 } } +@@ -1844,6 +2110,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + ++static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); ++ ++ switch (jump->type) ++ { ++ case HLSL_IR_JUMP_DISCARD_NEG: ++ { ++ struct hlsl_reg *reg = &jump->condition.node->reg; ++ ++ struct sm1_instruction instr = ++ { ++ .opcode = VKD3D_SM1_OP_TEXKILL, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = reg->id, ++ .dst.writemask = reg->writemask, ++ .has_dst = 1, ++ }; ++ ++ write_sm1_instruction(ctx, buffer, &instr); ++ break; ++ } ++ ++ default: ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ } ++} ++ + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_load *load = hlsl_ir_load(instr); +@@ -2038,6 +2333,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + write_sm1_expr(ctx, buffer, instr); + break; + ++ case HLSL_IR_JUMP: ++ write_sm1_jump(ctx, buffer, instr); ++ break; ++ + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 3e3f06faeb5..716b7bdb721 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -1311,10 +1871,48 @@ index 00000000000..67dcd26a0e0 + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index acc2a89cce0..4e9af15c1be 100644 +index ba5bcfbfaf0..4e9af15c1be 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2124,18 +2124,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) +@@ -1432,7 +1432,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v + } + + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, +- const struct vkd3d_shader_location *loc) ++ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_jump *jump; + +@@ -1440,6 +1440,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type + return NULL; + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); + jump->type = type; ++ hlsl_src_from_node(&jump->condition, condition); + return &jump->node; + } + +@@ -1585,9 +1586,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma + return dst; + } + +-static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) ++static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) + { +- return hlsl_new_jump(ctx, src->type, &src->node.loc); ++ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); + } + + static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) +@@ -1728,7 +1729,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + return clone_index(ctx, map, hlsl_ir_index(instr)); + + case HLSL_IR_JUMP: +- return clone_jump(ctx, hlsl_ir_jump(instr)); ++ return clone_jump(ctx, map, hlsl_ir_jump(instr)); + + case HLSL_IR_LOAD: + return clone_load(ctx, map, hlsl_ir_load(instr)); +@@ -2123,18 +2124,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) { static const char * const names[] = { @@ -1345,23 +1943,23 @@ index acc2a89cce0..4e9af15c1be 100644 }; if (type >= ARRAY_SIZE(names)) -@@ -2147,11 +2147,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) +@@ -2146,10 +2147,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) { static const char * const names[] = { -- [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", -- [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", +- "HLSL_IR_JUMP_BREAK", +- "HLSL_IR_JUMP_CONTINUE", +- "HLSL_IR_JUMP_DISCARD", +- "HLSL_IR_JUMP_RETURN", + [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", + [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", - [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", -- [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", -- [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", ++ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", + [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", + [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", }; assert(type < ARRAY_SIZE(names)); -@@ -2339,7 +2339,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) +@@ -2337,7 +2339,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", @@ -1373,8 +1971,50 @@ index acc2a89cce0..4e9af15c1be 100644 [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", +@@ -2418,8 +2424,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i + vkd3d_string_buffer_printf(buffer, "continue"); + break; + +- case HLSL_IR_JUMP_DISCARD: +- vkd3d_string_buffer_printf(buffer, "discard"); ++ case HLSL_IR_JUMP_DISCARD_NEG: ++ vkd3d_string_buffer_printf(buffer, "discard_neg"); ++ break; ++ ++ case HLSL_IR_JUMP_DISCARD_NZ: ++ vkd3d_string_buffer_printf(buffer, "discard_nz"); + break; + + case HLSL_IR_JUMP_RETURN: +@@ -2703,6 +2713,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node) + + static void free_ir_jump(struct hlsl_ir_jump *jump) + { ++ hlsl_src_remove(&jump->condition); + vkd3d_free(jump); + } + +@@ -3127,8 +3138,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) + { +- unsigned int n_variants = 0; + const char *const *variants; ++ unsigned int n_variants; + + switch (bt) + { +@@ -3148,6 +3159,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + break; + + default: ++ n_variants = 0; ++ variants = NULL; + break; + } + diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index f2d0a36b045..17ac36a57c6 100644 +index bce48e94b24..17ac36a57c6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -502,7 +502,11 @@ enum hlsl_ir_expr_op @@ -1389,7 +2029,26 @@ index f2d0a36b045..17ac36a57c6 100644 HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, -@@ -806,7 +810,11 @@ struct hlsl_ctx +@@ -558,7 +562,8 @@ enum hlsl_ir_jump_type + { + HLSL_IR_JUMP_BREAK, + HLSL_IR_JUMP_CONTINUE, +- HLSL_IR_JUMP_DISCARD, ++ HLSL_IR_JUMP_DISCARD_NEG, ++ HLSL_IR_JUMP_DISCARD_NZ, + HLSL_IR_JUMP_RETURN, + }; + +@@ -566,6 +571,8 @@ struct hlsl_ir_jump + { + struct hlsl_ir_node node; + enum hlsl_ir_jump_type type; ++ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ ++ struct hlsl_src condition; + }; + + struct hlsl_ir_swizzle +@@ -803,7 +810,11 @@ struct hlsl_ctx * Only used for SM1 profiles. */ struct hlsl_constant_defs { @@ -1402,8 +2061,26 @@ index f2d0a36b045..17ac36a57c6 100644 size_t count, size; } constant_defs; /* Number of temp. registers required for the shader to run, i.e. the largest temp register +@@ -1120,7 +1131,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, +- enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); ++ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); + + void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); + +@@ -1132,6 +1143,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, ++ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); + + struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); + struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index b487c5c138f..42fa2129e40 100644 +index 0e07fe578e1..42fa2129e40 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -53,7 +53,7 @@ struct parse_initializer @@ -1487,7 +2164,26 @@ index b487c5c138f..42fa2129e40 100644 static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { -@@ -402,19 +442,19 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, +@@ -273,9 +313,6 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + return hlsl_types_are_componentwise_equal(ctx, src, dst); + } + +-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +- unsigned int comp, const struct vkd3d_shader_location *loc); +- + static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + { +@@ -333,7 +370,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + + dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); + +- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) ++ if (!(component_load = hlsl_add_load_component(ctx, instrs, node, src_idx, loc))) + return NULL; + + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) +@@ -405,29 +442,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, return modifiers | mod; } @@ -1511,7 +2207,10 @@ index b487c5c138f..42fa2129e40 100644 hlsl_block_init(&then_block); -@@ -424,7 +464,7 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_lis +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) + return false; + hlsl_block_add_instr(&then_block, jump); if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; @@ -1520,7 +2219,7 @@ index b487c5c138f..42fa2129e40 100644 return true; } -@@ -451,10 +491,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att +@@ -454,10 +491,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att return false; } @@ -1534,7 +2233,16 @@ index b487c5c138f..42fa2129e40 100644 struct hlsl_ir_node *loop; unsigned int i; -@@ -488,38 +528,34 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const +@@ -476,7 +513,7 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const + } + else + { +- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); ++ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); + } + } + else if (!strcmp(attr->name, "loop") +@@ -491,38 +528,34 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const } } @@ -1587,7 +2295,7 @@ index b487c5c138f..42fa2129e40 100644 return NULL; } -@@ -536,7 +572,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer +@@ -539,7 +572,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer static void free_parse_initializer(struct parse_initializer *initializer) { @@ -1596,7 +2304,7 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free(initializer->args); } -@@ -622,7 +658,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -625,7 +658,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod return NULL; } @@ -1605,7 +2313,7 @@ index b487c5c138f..42fa2129e40 100644 struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; -@@ -634,7 +670,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, +@@ -637,7 +670,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, { struct hlsl_ir_node *store; @@ -1614,16 +2322,25 @@ index b487c5c138f..42fa2129e40 100644 return false; if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) -@@ -655,7 +691,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, +@@ -656,14 +689,14 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); + } - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) return false; - list_add_tail(instrs, &jump->entry); + hlsl_block_add_instr(block, jump); return true; } -@@ -701,7 +737,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct + +-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, + unsigned int comp, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *load, *store; +@@ -704,7 +737,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct return true; } @@ -1632,7 +2349,33 @@ index b487c5c138f..42fa2129e40 100644 enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc); -@@ -1104,20 +1140,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str +@@ -830,6 +863,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) + return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; + } + ++static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); ++} ++ ++static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return !shader_profile_version_ge(ctx, major, minor); ++} ++ + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + struct hlsl_type *type, unsigned int modifiers, struct list *defs) + { +@@ -1020,7 +1063,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + +- if (ctx->profile->major_version < 4) ++ if (shader_profile_version_lt(ctx, 4, 0)) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); +@@ -1097,20 +1140,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str { struct hlsl_ir_constant *constant; struct hlsl_ir_node *node; @@ -1687,7 +2430,7 @@ index b487c5c138f..42fa2129e40 100644 if (node->type == HLSL_IR_CONSTANT) { constant = hlsl_ir_constant(node); -@@ -1126,9 +1192,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str +@@ -1119,9 +1192,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str else { hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, @@ -1700,7 +2443,16 @@ index b487c5c138f..42fa2129e40 100644 return ret; } -@@ -1341,12 +1409,12 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * +@@ -1284,7 +1359,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, + { + if (operands[j]) + { +- if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, instrs, operands[j], i, loc))) + return NULL; + + cell_operands[j] = load; +@@ -1334,12 +1409,12 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * } } @@ -1715,7 +2467,7 @@ index b487c5c138f..42fa2129e40 100644 } static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -@@ -1354,7 +1422,7 @@ static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct +@@ -1347,7 +1422,7 @@ static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct { check_integer_type(ctx, arg); @@ -1724,7 +2476,7 @@ index b487c5c138f..42fa2129e40 100644 } static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -@@ -1385,7 +1453,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str +@@ -1378,7 +1453,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); } @@ -1733,7 +2485,7 @@ index b487c5c138f..42fa2129e40 100644 enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { -@@ -1394,24 +1462,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str +@@ -1387,24 +1462,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str common_type = get_common_numeric_type(ctx, arg1, arg2, loc); @@ -1761,7 +2513,7 @@ index b487c5c138f..42fa2129e40 100644 } static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -@@ -1421,19 +1478,7 @@ static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct +@@ -1414,19 +1478,7 @@ static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct check_integer_type(ctx, arg1); check_integer_type(ctx, arg2); @@ -1782,7 +2534,7 @@ index b487c5c138f..42fa2129e40 100644 } static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, -@@ -1461,17 +1506,6 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str +@@ -1454,17 +1506,6 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str return add_expr(ctx, instrs, op, args, return_type, loc); } @@ -1800,7 +2552,7 @@ index b487c5c138f..42fa2129e40 100644 static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) -@@ -1495,18 +1529,6 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct +@@ -1488,18 +1529,6 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct return add_expr(ctx, instrs, op, args, common_type, loc); } @@ -1819,7 +2571,7 @@ index b487c5c138f..42fa2129e40 100644 static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) -@@ -1538,19 +1560,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l +@@ -1531,19 +1560,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l return add_expr(ctx, instrs, op, args, return_type, loc); } @@ -1840,7 +2592,27 @@ index b487c5c138f..42fa2129e40 100644 struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -@@ -1594,13 +1604,60 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis +@@ -1557,8 +1574,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg1->data_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s.\n", string->buffer); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } +@@ -1568,8 +1584,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg2->data_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s.\n", string->buffer); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } +@@ -1589,13 +1604,60 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis common_type = hlsl_get_vector_type(ctx, base, dim); ret_type = hlsl_get_scalar_type(ctx, base); @@ -1904,7 +2676,7 @@ index b487c5c138f..42fa2129e40 100644 } static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) -@@ -1668,7 +1725,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1663,7 +1725,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in if (assign_op == ASSIGN_OP_SUB) { @@ -1913,7 +2685,7 @@ index b487c5c138f..42fa2129e40 100644 return NULL; assign_op = ASSIGN_OP_ADD; } -@@ -1677,7 +1734,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1672,7 +1734,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum hlsl_ir_expr_op op = op_from_assignment(assign_op); assert(op); @@ -1922,7 +2694,16 @@ index b487c5c138f..42fa2129e40 100644 return NULL; } -@@ -1858,7 +1915,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem +@@ -1779,7 +1841,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + return NULL; + list_add_tail(instrs, &cell->entry); + +- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) ++ if (!(load = hlsl_add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) +@@ -1853,7 +1915,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem return true; } @@ -1931,11 +2712,11 @@ index b487c5c138f..42fa2129e40 100644 struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); -@@ -1873,17 +1930,17 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +@@ -1868,17 +1930,17 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_type *dst_comp_type; struct hlsl_block block; -- if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) +- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, k, &src->loc))) return; @@ -1952,7 +2733,7 @@ index b487c5c138f..42fa2129e40 100644 ++*store_index; } -@@ -1929,211 +1986,234 @@ static bool type_has_numeric_components(struct hlsl_type *type) +@@ -1924,211 +1986,234 @@ static bool type_has_numeric_components(struct hlsl_type *type) return false; } @@ -2195,7 +2976,7 @@ index b487c5c138f..42fa2129e40 100644 - type_has_object_components(var->data_type, true)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables."); +- "Target profile doesn't support objects as struct members in uniform variables.\n"); - } + if (ctx->cur_scope == ctx->globals) + { @@ -2345,7 +3126,7 @@ index b487c5c138f..42fa2129e40 100644 if (v->initializer.args_count) { -@@ -2148,8 +2228,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2143,8 +2228,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, "Expected %u components in initializer, but got %u.", hlsl_type_component_count(type), size); @@ -2355,7 +3136,7 @@ index b487c5c138f..42fa2129e40 100644 continue; } -@@ -2164,16 +2243,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2159,16 +2243,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); assert(v->initializer.args_count == 1); @@ -2377,7 +3158,7 @@ index b487c5c138f..42fa2129e40 100644 } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { -@@ -2183,32 +2260,33 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -2178,32 +2260,33 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t if (type_has_object_components(var->data_type, false)) { @@ -2416,7 +3197,7 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free(var_list); return statements_list; } -@@ -2291,7 +2369,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +@@ -2286,7 +2369,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); @@ -2425,7 +3206,7 @@ index b487c5c138f..42fa2129e40 100644 } static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -2303,7 +2381,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p +@@ -2298,7 +2381,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p { struct hlsl_ir_node *new_arg; @@ -2434,7 +3215,7 @@ index b487c5c138f..42fa2129e40 100644 return false; params->args[i] = new_arg; } -@@ -2399,25 +2477,25 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, +@@ -2394,25 +2477,25 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; @@ -2451,7 +3232,7 @@ index b487c5c138f..42fa2129e40 100644 count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { -- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) +- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) return false; @@ -2464,7 +3245,7 @@ index b487c5c138f..42fa2129e40 100644 } static bool intrinsic_any(struct hlsl_ctx *ctx, -@@ -2436,28 +2514,28 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, +@@ -2431,28 +2514,28 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, { if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; @@ -2489,7 +3270,7 @@ index b487c5c138f..42fa2129e40 100644 count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { -- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) +- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) return false; @@ -2498,7 +3279,7 @@ index b487c5c138f..42fa2129e40 100644 return false; } -@@ -2496,7 +2574,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, +@@ -2491,7 +2574,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); operands[0] = params->args[0]; @@ -2507,7 +3288,7 @@ index b487c5c138f..42fa2129e40 100644 } static bool intrinsic_asuint(struct hlsl_ctx *ctx, -@@ -2532,7 +2610,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, +@@ -2527,7 +2610,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; @@ -2516,16 +3297,42 @@ index b487c5c138f..42fa2129e40 100644 } static bool intrinsic_clamp(struct hlsl_ctx *ctx, -@@ -2572,7 +2650,7 @@ static bool intrinsic_clip(struct hlsl_ctx *ctx, - - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) - return false; -- list_add_tail(params->instrs, &jump->entry); -+ hlsl_block_add_instr(params->instrs, jump); - - return true; +@@ -2544,6 +2627,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); } -@@ -2604,34 +2682,34 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, + ++static bool intrinsic_clip(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *condition, *jump; ++ ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) ++ return false; ++ ++ condition = params->args[0]; ++ ++ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, condition->data_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, jump); ++ ++ return true; ++} ++ + static bool intrinsic_cos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2571,34 +2682,34 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, cast_type = hlsl_get_vector_type(ctx, base, 3); @@ -2567,7 +3374,7 @@ index b487c5c138f..42fa2129e40 100644 if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false; -@@ -2650,6 +2728,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, +@@ -2617,6 +2728,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); } @@ -2596,7 +3403,7 @@ index b487c5c138f..42fa2129e40 100644 static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2661,6 +2761,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, +@@ -2628,6 +2761,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); } @@ -2625,7 +3432,7 @@ index b487c5c138f..42fa2129e40 100644 static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2701,7 +2823,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, +@@ -2668,7 +2823,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; @@ -2634,7 +3441,7 @@ index b487c5c138f..42fa2129e40 100644 if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false; -@@ -2748,7 +2870,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer +@@ -2715,7 +2870,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) return false; @@ -2643,7 +3450,7 @@ index b487c5c138f..42fa2129e40 100644 if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) return false; -@@ -2759,10 +2881,10 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer +@@ -2726,10 +2881,10 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) return false; @@ -2656,7 +3463,7 @@ index b487c5c138f..42fa2129e40 100644 return false; return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); -@@ -2839,7 +2961,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, +@@ -2806,7 +2961,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, } static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, @@ -2665,7 +3472,7 @@ index b487c5c138f..42fa2129e40 100644 const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *log, *mul; -@@ -2894,15 +3016,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, +@@ -2861,15 +3016,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, init_value.u[3].f = 1.0f; if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; @@ -2684,7 +3491,7 @@ index b487c5c138f..42fa2129e40 100644 /* Diffuse component. */ if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) -@@ -2910,31 +3032,31 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, +@@ -2877,31 +3032,31 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; @@ -2723,7 +3530,7 @@ index b487c5c138f..42fa2129e40 100644 return true; } -@@ -3046,10 +3168,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -3013,10 +3168,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, ret_type = hlsl_get_scalar_type(ctx, base); } @@ -2736,22 +3543,22 @@ index b487c5c138f..42fa2129e40 100644 return false; if (!(var = hlsl_new_synthetic_var(ctx, "mul", matrix_type, loc))) -@@ -3067,10 +3189,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -3034,10 +3189,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, { struct hlsl_ir_node *value1, *value2, *mul; -- if (!(value1 = hlsl_add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) +- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) + if (!(value1 = hlsl_add_load_component(ctx, block_to_list(params->instrs), + cast1, j * cast1->data_type->dimx + k, loc))) return false; -- if (!(value2 = hlsl_add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) +- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + if (!(value2 = hlsl_add_load_component(ctx, block_to_list(params->instrs), + cast2, k * cast2->data_type->dimx + i, loc))) return false; if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) -@@ -3089,15 +3213,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -3056,15 +3213,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; @@ -2770,7 +3577,7 @@ index b487c5c138f..42fa2129e40 100644 } static bool intrinsic_normalize(struct hlsl_ctx *ctx, -@@ -3202,22 +3326,22 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, +@@ -3169,22 +3326,22 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) return false; @@ -2798,7 +3605,7 @@ index b487c5c138f..42fa2129e40 100644 return false; if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) -@@ -3262,7 +3386,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -3229,7 +3386,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; @@ -2807,7 +3614,7 @@ index b487c5c138f..42fa2129e40 100644 if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false; -@@ -3275,11 +3399,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -3242,11 +3399,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; @@ -2821,7 +3628,7 @@ index b487c5c138f..42fa2129e40 100644 if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false; -@@ -3316,13 +3440,13 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, +@@ -3283,13 +3440,13 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; @@ -2837,7 +3644,16 @@ index b487c5c138f..42fa2129e40 100644 } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -3357,7 +3481,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3308,7 +3465,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + + if (params->args_count == 4) + { +- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); ++ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); + } + + sampler_type = params->args[0]->data_type; +@@ -3324,7 +3481,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); } @@ -2846,7 +3662,7 @@ index b487c5c138f..42fa2129e40 100644 hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) coords = params->args[1]; -@@ -3368,7 +3492,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3335,7 +3492,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; @@ -2855,7 +3671,16 @@ index b487c5c138f..42fa2129e40 100644 return true; } -@@ -3410,7 +3534,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -3369,7 +3526,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + + if ((string = hlsl_type_to_string(ctx, arg_type))) + hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", ++ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; +@@ -3377,7 +3534,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, if (arg_type->class == HLSL_CLASS_SCALAR) { @@ -2864,11 +3689,11 @@ index b487c5c138f..42fa2129e40 100644 return true; } -@@ -3426,18 +3550,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -3393,18 +3550,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_block block; -- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) +- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, j * arg->data_type->dimx + i, loc))) return false; @@ -2886,7 +3711,7 @@ index b487c5c138f..42fa2129e40 100644 return true; } -@@ -3477,13 +3601,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, +@@ -3444,13 +3601,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) return false; @@ -2902,7 +3727,20 @@ index b487c5c138f..42fa2129e40 100644 arg = swizzle; } -@@ -3519,7 +3643,11 @@ intrinsic_functions[] = +@@ -3458,7 +3615,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + +- if (ctx->profile->major_version >= 4) ++ if (shader_profile_version_ge(ctx, 4, 0)) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); + + return true; +@@ -3482,10 +3639,15 @@ intrinsic_functions[] = + {"asfloat", 1, true, intrinsic_asfloat}, + {"asuint", -1, true, intrinsic_asuint}, + {"clamp", 3, true, intrinsic_clamp}, ++ {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, @@ -2914,7 +3752,7 @@ index b487c5c138f..42fa2129e40 100644 {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, -@@ -3583,7 +3711,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3549,7 +3711,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, { struct hlsl_ir_node *cast; @@ -2923,7 +3761,7 @@ index b487c5c138f..42fa2129e40 100644 goto fail; args->args[i] = cast; arg = cast; -@@ -3595,13 +3723,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3561,13 +3723,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; @@ -2939,7 +3777,7 @@ index b487c5c138f..42fa2129e40 100644 for (i = 0; i < decl->parameters.count; ++i) { -@@ -3618,9 +3746,9 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3584,9 +3746,9 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; @@ -2951,7 +3789,7 @@ index b487c5c138f..42fa2129e40 100644 goto fail; } } -@@ -3631,7 +3759,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3597,7 +3759,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; @@ -2960,7 +3798,7 @@ index b487c5c138f..42fa2129e40 100644 } else { -@@ -3640,7 +3768,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3606,7 +3768,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) goto fail; @@ -2969,7 +3807,7 @@ index b487c5c138f..42fa2129e40 100644 } } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), -@@ -3689,7 +3817,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3655,7 +3817,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, goto fail; } vkd3d_free(args->args); @@ -2978,7 +3816,7 @@ index b487c5c138f..42fa2129e40 100644 fail: free_parse_initializer(args); -@@ -3726,10 +3854,10 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type +@@ -3692,10 +3854,10 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; @@ -2991,7 +3829,7 @@ index b487c5c138f..42fa2129e40 100644 } static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) -@@ -4306,6 +4434,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -4272,6 +4434,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type char *name; DWORD modifiers; struct hlsl_ir_node *instr; @@ -2999,7 +3837,7 @@ index b487c5c138f..42fa2129e40 100644 struct list *list; struct parse_fields fields; struct parse_function function; -@@ -4438,33 +4567,23 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -4404,33 +4567,23 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type bitand_expr %type bitor_expr %type bitxor_expr @@ -3035,7 +3873,7 @@ index b487c5c138f..42fa2129e40 100644 %token VAR_IDENTIFIER %token NEW_IDENTIFIER -@@ -4480,6 +4599,16 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -4446,6 +4599,16 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type attribute_list %type attribute_list_optional @@ -3052,7 +3890,7 @@ index b487c5c138f..42fa2129e40 100644 %type boolean %type buffer_type -@@ -4527,6 +4656,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -4493,6 +4656,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type type_spec %type variable_decl %type variable_def @@ -3060,7 +3898,7 @@ index b487c5c138f..42fa2129e40 100644 %% -@@ -4595,25 +4725,19 @@ preproc_directive: +@@ -4561,25 +4725,19 @@ preproc_directive: } } @@ -3095,7 +3933,7 @@ index b487c5c138f..42fa2129e40 100644 } struct_spec: -@@ -4736,7 +4860,7 @@ attribute: +@@ -4702,7 +4860,7 @@ attribute: } $$->name = $2; list_init(&$$->instrs); @@ -3104,7 +3942,7 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free($4.instrs); $$->loc = @$; $$->args_count = $4.args_count; -@@ -4792,15 +4916,15 @@ func_declaration: +@@ -4758,15 +4916,15 @@ func_declaration: "Function \"%s\" is already defined.", decl->func->name); hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously defined here.", decl->func->name); @@ -3123,7 +3961,7 @@ index b487c5c138f..42fa2129e40 100644 /* Semantics are taken from whichever definition has a body. * We can't just replace the hlsl_ir_var pointers, though: if -@@ -4977,7 +5101,7 @@ func_prototype: +@@ -4943,7 +5101,7 @@ func_prototype: compound_statement: '{' '}' { @@ -3132,7 +3970,30 @@ index b487c5c138f..42fa2129e40 100644 YYABORT; } | '{' scope_start statement_list '}' -@@ -5393,7 +5517,7 @@ type: +@@ -5261,7 +5419,12 @@ type_no_void: + { + validate_texture_format_type(ctx, $3, &@3); + +- /* TODO: unspecified sample count is not allowed for all targets */ ++ if (shader_profile_version_lt(ctx, 4, 1)) ++ { ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); ++ } ++ + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } + | texture_ms_type '<' type ',' shift_expr '>' +@@ -5325,7 +5488,7 @@ type_no_void: + $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); + if ($$->is_minimum_precision) + { +- if (ctx->profile->major_version < 4) ++ if (shader_profile_version_lt(ctx, 4, 0)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support minimum-precision types."); +@@ -5354,7 +5517,7 @@ type: declaration_statement: declaration @@ -3141,7 +4002,7 @@ index b487c5c138f..42fa2129e40 100644 | typedef { if (!($$ = make_empty_list(ctx))) -@@ -5455,22 +5579,11 @@ type_spec: +@@ -5416,23 +5579,12 @@ type_spec: } declaration: @@ -3155,18 +4016,19 @@ index b487c5c138f..42fa2129e40 100644 + if (!($$ = initialize_vars(ctx, $1))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); -- } -- + } + -variables_def_optional: - %empty - { - $$ = NULL; - } +- } - | variables_def - +- variables_def: variable_def -@@ -5485,6 +5598,33 @@ variables_def: + { +@@ -5446,6 +5598,33 @@ variables_def: list_add_tail($$, &$3->entry); } @@ -3200,7 +4062,7 @@ index b487c5c138f..42fa2129e40 100644 variable_decl: any_identifier arrays colon_attribute { -@@ -5500,7 +5640,7 @@ state: +@@ -5461,7 +5640,7 @@ state: any_identifier '=' expr ';' { vkd3d_free($1); @@ -3209,7 +4071,7 @@ index b487c5c138f..42fa2129e40 100644 } state_block_start: -@@ -5526,6 +5666,38 @@ variable_def: +@@ -5487,6 +5666,38 @@ variable_def: ctx->in_state_block = 0; } @@ -3248,7 +4110,7 @@ index b487c5c138f..42fa2129e40 100644 arrays: %empty { -@@ -5534,17 +5706,12 @@ arrays: +@@ -5495,17 +5706,12 @@ arrays: } | '[' expr ']' arrays { @@ -3268,7 +4130,7 @@ index b487c5c138f..42fa2129e40 100644 $$ = $4; -@@ -5661,7 +5828,7 @@ complex_initializer: +@@ -5622,7 +5828,7 @@ complex_initializer: YYABORT; } $$.args[0] = node_from_list($1); @@ -3277,7 +4139,7 @@ index b487c5c138f..42fa2129e40 100644 $$.braces = false; } | '{' complex_initializer_list '}' -@@ -5692,7 +5859,7 @@ complex_initializer_list: +@@ -5653,7 +5859,7 @@ complex_initializer_list: $$.args = new_args; for (i = 0; i < $3.args_count; ++i) $$.args[$$.args_count++] = $3.args[i]; @@ -3286,7 +4148,7 @@ index b487c5c138f..42fa2129e40 100644 free_parse_initializer(&$3); } -@@ -5709,7 +5876,7 @@ initializer_expr_list: +@@ -5670,7 +5876,7 @@ initializer_expr_list: YYABORT; } $$.args[0] = node_from_list($1); @@ -3295,7 +4157,7 @@ index b487c5c138f..42fa2129e40 100644 $$.braces = false; } | initializer_expr_list ',' initializer_expr -@@ -5725,7 +5892,7 @@ initializer_expr_list: +@@ -5686,7 +5892,7 @@ initializer_expr_list: } $$.args = new_args; $$.args[$$.args_count++] = node_from_list($3); @@ -3304,7 +4166,7 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free($3); } -@@ -5744,15 +5911,17 @@ statement_list: +@@ -5705,15 +5911,17 @@ statement_list: | statement_list statement { $$ = $1; @@ -3325,7 +4187,7 @@ index b487c5c138f..42fa2129e40 100644 | jump_statement | selection_statement | loop_statement -@@ -5760,52 +5929,47 @@ statement: +@@ -5721,47 +5929,47 @@ statement: jump_statement: KW_RETURN expr ';' { @@ -3348,18 +4210,19 @@ index b487c5c138f..42fa2129e40 100644 - KW_DISCARD ';' + | KW_DISCARD ';' { - struct hlsl_ir_node *discard, *c; +- struct hlsl_ir_node *discard; ++ struct hlsl_ir_node *discard, *c; - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; - - if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) - return false; -- list_add_tail($$, &c->entry); +- if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) ++ ++ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) ++ return false; + hlsl_block_add_instr($$, c); - - if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) ++ ++ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) return false; - list_add_tail($$, &discard->entry); + hlsl_block_add_instr($$, discard); @@ -3393,7 +4256,7 @@ index b487c5c138f..42fa2129e40 100644 if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string; -@@ -5816,7 +5980,7 @@ selection_statement: +@@ -5772,7 +5980,7 @@ selection_statement: hlsl_release_string_buffer(ctx, string); } $$ = $3; @@ -3402,7 +4265,7 @@ index b487c5c138f..42fa2129e40 100644 } if_body: -@@ -5847,14 +6011,14 @@ loop_statement: +@@ -5803,14 +6011,14 @@ loop_statement: } | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { @@ -3419,7 +4282,7 @@ index b487c5c138f..42fa2129e40 100644 YYABORT; } | expr -@@ -5870,7 +6034,7 @@ func_arguments: +@@ -5826,7 +6034,7 @@ func_arguments: { $$.args = NULL; $$.args_count = 0; @@ -3428,7 +4291,7 @@ index b487c5c138f..42fa2129e40 100644 YYABORT; $$.braces = false; } -@@ -5924,7 +6088,7 @@ primary_expr: +@@ -5880,7 +6088,7 @@ primary_expr: } | '(' expr ')' { @@ -3437,7 +4300,7 @@ index b487c5c138f..42fa2129e40 100644 } | var_identifier '(' func_arguments ')' { -@@ -6018,10 +6182,10 @@ postfix_expr: +@@ -5974,10 +6182,10 @@ postfix_expr: } | postfix_expr '[' expr ']' { @@ -3451,7 +4314,7 @@ index b487c5c138f..42fa2129e40 100644 if (!add_array_access(ctx, $1, array, index, &@2)) { -@@ -6071,7 +6235,7 @@ postfix_expr: +@@ -6027,7 +6235,7 @@ postfix_expr: { struct hlsl_ir_node *object = node_from_list($1); @@ -3460,7 +4323,7 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free($5.instrs); if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) -@@ -6110,7 +6274,7 @@ unary_expr: +@@ -6066,7 +6274,7 @@ unary_expr: } | '-' unary_expr { @@ -3469,7 +4332,7 @@ index b487c5c138f..42fa2129e40 100644 $$ = $2; } | '~' unary_expr -@@ -6174,119 +6338,118 @@ mul_expr: +@@ -6130,119 +6338,118 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { @@ -3611,7 +4474,7 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free($5); if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) -@@ -6370,9 +6533,12 @@ assign_op: +@@ -6326,9 +6533,12 @@ assign_op: expr: assignment_expr @@ -3626,10 +4489,107 @@ index b487c5c138f..42fa2129e40 100644 vkd3d_free($3); } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index b980ed567aa..8927e291183 100644 +index 765b1907426..8927e291183 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -3254,10 +3254,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, +@@ -666,7 +666,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, + return; + list_add_after(&cf_instr->entry, &load->node.entry); + +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) + return; + hlsl_block_add_instr(&then_block, jump); + +@@ -1889,7 +1889,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + + if (rhs->type != HLSL_IR_LOAD) + { +- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); ++ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); + return false; + } + +@@ -2584,6 +2584,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return true; + } + ++static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; ++ static const struct hlsl_constant_value zero_value; ++ struct hlsl_type *arg_type, *cmp_type; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; ++ struct hlsl_ir_jump *jump; ++ unsigned int i, count; ++ struct list instrs; ++ ++ if (instr->type != HLSL_IR_JUMP) ++ return false; ++ jump = hlsl_ir_jump(instr); ++ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) ++ return false; ++ ++ list_init(&instrs); ++ ++ arg_type = jump->condition.node->data_type; ++ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) ++ return false; ++ list_add_tail(&instrs, &zero->entry); ++ ++ operands[0] = jump->condition.node; ++ operands[1] = zero; ++ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); ++ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) ++ return false; ++ list_add_tail(&instrs, &cmp->entry); ++ ++ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) ++ return false; ++ list_add_tail(&instrs, &bool_false->entry); ++ ++ or = bool_false; ++ ++ count = hlsl_type_component_count(cmp_type); ++ for (i = 0; i < count; ++i) ++ { ++ if (!(load = hlsl_add_load_component(ctx, &instrs, cmp, i, &instr->loc))) ++ return false; ++ ++ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) ++ return NULL; ++ list_add_tail(&instrs, &or->entry); ++ } ++ ++ list_move_tail(&instr->entry, &instrs); ++ hlsl_src_remove(&jump->condition); ++ hlsl_src_from_node(&jump->condition, or); ++ jump->type = HLSL_IR_JUMP_DISCARD_NZ; ++ ++ return true; ++} ++ + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + switch (instr->type) +@@ -2848,8 +2903,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + index->idx.node->last_read = last_read; + break; + } +- case HLSL_IR_CONSTANT: + case HLSL_IR_JUMP: ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); ++ ++ if (jump->condition.node) ++ jump->condition.node->last_read = last_read; ++ break; ++ } ++ case HLSL_IR_CONSTANT: + break; + } + } +@@ -3192,10 +3254,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } } @@ -3664,7 +4624,7 @@ index b980ed567aa..8927e291183 100644 struct hlsl_ir_node *instr; LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -3268,66 +3291,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -3206,66 +3291,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, { struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); const struct hlsl_type *type = instr->data_type; @@ -3768,7 +4728,7 @@ index b980ed567aa..8927e291183 100644 } break; -@@ -3359,8 +3368,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -3297,8 +3368,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var; @@ -3777,7 +4737,7 @@ index b980ed567aa..8927e291183 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform && var->last_read) -@@ -3377,6 +3384,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -3315,6 +3384,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } } @@ -3786,8 +4746,19 @@ index b980ed567aa..8927e291183 100644 vkd3d_free(allocator.allocations); } +@@ -4062,6 +4133,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); + ++ if (profile->major_version >= 4) ++ { ++ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); ++ } + hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); + do diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 570773cd335..01c438ae212 100644 +index 301113c8477..01c438ae212 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, @@ -3869,34 +4840,16 @@ index 570773cd335..01c438ae212 100644 { switch (type) { -@@ -380,7 +425,7 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); +@@ -223,7 +268,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (src1->node.data_type->base_type) - { -@@ -416,7 +461,7 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (src1->node.data_type->base_type) - { -@@ -455,7 +500,7 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (src1->node.data_type->base_type) - { -@@ -614,7 +659,7 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons +-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { + enum hlsl_base_type type = dst_type->base_type; +@@ -232,64 +277,73 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); @@ -3905,16 +4858,395 @@ index 570773cd335..01c438ae212 100644 { switch (type) { -@@ -648,7 +693,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; +- break; +- +- case HLSL_TYPE_DOUBLE: +- dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; +- break; +- + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; + break; + + default: +- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); - for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { - switch (src1->node.data_type->base_type) +- switch (src1->node.data_type->base_type) ++ switch (type) { -@@ -721,6 +766,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; + break; + +- case HLSL_TYPE_DOUBLE: +- dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; +- break; ++ default: ++ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); + ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: +- dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; + break; + + default: +- vkd3d_unreachable(); ++ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; + } +- +- dst->u[k].u *= ~0u; + } + return true; + } +@@ -363,45 +417,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + +-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, +- const struct vkd3d_shader_location *loc) ++static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (type) ++ switch (src1->node.data_type->base_type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: +- if (src2->value.u[k].i == 0) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); +- return false; +- } +- if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) +- dst->u[k].i = 0; +- else +- dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ dst->u[k].u *= ~0u; ++ } ++ return true; ++} ++ ++static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; ++ break; ++ ++ case HLSL_TYPE_INT: ++ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- if (src2->value.u[k].u == 0) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); +- return false; +- } +- dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; + break; + + default: +- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); +- return false; ++ vkd3d_unreachable(); + } ++ ++ dst->u[k].u *= ~0u; ++ } ++ return true; ++} ++ ++static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; ++ break; ++ ++ case HLSL_TYPE_INT: ++ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ dst->u[k].u *= ~0u; + } + return true; + } +@@ -419,6 +544,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); ++ break; ++ + case HLSL_TYPE_INT: + dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); + break; +@@ -448,6 +582,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); ++ break; ++ + case HLSL_TYPE_INT: + dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); + break; +@@ -464,8 +607,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + +-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, ++ const struct vkd3d_shader_location *loc) + { + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; +@@ -478,19 +622,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + switch (type) + { + case HLSL_TYPE_INT: ++ if (src2->value.u[k].i == 0) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); ++ return false; ++ } ++ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) ++ dst->u[k].i = 0; ++ else ++ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; ++ break; ++ + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; ++ if (src2->value.u[k].u == 0) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); ++ return false; ++ } ++ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; + break; + + default: +- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { + enum hlsl_base_type type = dst_type->base_type; +@@ -503,41 +663,60 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; + break; + + default: +- FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (type) ++ switch (src1->node.data_type->base_type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; + break; + + default: +- FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); +- return false; ++ vkd3d_unreachable(); + } ++ ++ dst->u[k].u *= ~0u; + } + return true; + } +@@ -587,24 +766,42 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_neg(ctx, &res, instr->data_type, arg1); break; @@ -3925,6 +5257,67 @@ index 570773cd335..01c438ae212 100644 case HLSL_OP2_ADD: success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break; + +- case HLSL_OP2_MUL: +- success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_LOGIC_AND: ++ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_NEQUAL: +- success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_LOGIC_OR: ++ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_BIT_XOR: ++ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_DIV: + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + break; + +- case HLSL_OP2_MOD: +- success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); ++ case HLSL_OP2_EQUAL: ++ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_GEQUAL: ++ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_LESS: ++ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_MAX: +@@ -615,16 +812,16 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_min(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_BIT_XOR: +- success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_MOD: ++ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + break; + +- case HLSL_OP2_BIT_AND: +- success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_MUL: ++ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_BIT_OR: +- success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_NEQUAL: ++ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + break; + + default: diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 3542b5fac51..5535a6503d6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -4002,7 +5395,7 @@ index 3542b5fac51..5535a6503d6 100644 compiler->location.line = 1; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index d6322bb14f1..290fdcb3f62 100644 +index d066b13ee4e..290fdcb3f62 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -989,6 +989,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i @@ -4054,8 +5447,48 @@ index d6322bb14f1..290fdcb3f62 100644 case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); +@@ -4780,19 +4803,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, + instr.opcode = VKD3D_SM4_OP_BREAK; + break; + +- case HLSL_IR_JUMP_DISCARD: ++ case HLSL_IR_JUMP_DISCARD_NZ: + { +- struct sm4_register *reg = &instr.srcs[0].reg; +- + instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); +- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.src_count = 1; +- reg->type = VKD3D_SM4_RT_IMMCONST; +- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; +- reg->immconst_uint[0] = ~0u; +- ++ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); + break; + } + +@@ -4800,7 +4817,7 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, + vkd3d_unreachable(); + + default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return; + } + +@@ -5016,7 +5033,7 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { +- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); ++ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); + return; + } + diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index d2f98491ce0..33d8c60e59a 100644 +index 343fdb2252e..33d8c60e59a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -415,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t @@ -4067,7 +5500,108 @@ index d2f98491ce0..33d8c60e59a 100644 default: FIXME("Unhandled source type %#x.\n", type); return "bin"; -@@ -1205,6 +1207,24 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, +@@ -440,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, + shader_get_source_type_suffix(source_type), shader->code, shader->size); + } + ++static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) ++{ ++ struct vkd3d_shader_scan_signature_info *signature_info; ++ ++ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) ++ { ++ memset(&signature_info->input, 0, sizeof(signature_info->input)); ++ memset(&signature_info->output, 0, sizeof(signature_info->output)); ++ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); ++ } ++} ++ + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_message_context *message_context, const char *source_name, + const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, +@@ -526,6 +540,43 @@ void vkd3d_shader_free_messages(char *messages) + vkd3d_free(messages); + } + ++static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, ++ const struct shader_signature *src) ++{ ++ unsigned int i; ++ ++ signature->element_count = src->element_count; ++ if (!src->elements) ++ { ++ assert(!signature->element_count); ++ signature->elements = NULL; ++ return true; ++ } ++ ++ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) ++ return false; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ struct vkd3d_shader_signature_element *d = &signature->elements[i]; ++ struct signature_element *e = &src->elements[i]; ++ ++ d->semantic_name = e->semantic_name; ++ d->semantic_index = e->semantic_index; ++ d->stream_index = e->stream_index; ++ d->sysval_semantic = e->sysval_semantic; ++ d->component_type = e->component_type; ++ d->register_index = e->register_index; ++ if (e->register_count > 1) ++ FIXME("Arrayed elements are not supported yet.\n"); ++ d->mask = e->mask; ++ d->used_mask = e->used_mask; ++ d->min_precision = e->min_precision; ++ } ++ ++ return true; ++} ++ + struct vkd3d_shader_scan_context + { + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; +@@ -1070,6 +1121,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) + { + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_signature_info *signature_info; + struct vkd3d_shader_instruction *instruction; + struct vkd3d_shader_scan_context context; + int ret = VKD3D_OK; +@@ -1080,6 +1132,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + scan_descriptor_info->descriptors = NULL; + scan_descriptor_info->descriptor_count = 0; + } ++ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); + + vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); + +@@ -1099,6 +1152,21 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + } + } + ++ if (!ret && signature_info) ++ { ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) ++ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, ++ &parser->shader_desc.output_signature) ++ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, ++ &parser->shader_desc.patch_constant_signature)) ++ { ++ vkd3d_shader_free_scan_signature_info(signature_info); ++ if (scan_descriptor_info) ++ vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ } ++ + vkd3d_shader_scan_context_cleanup(&context); + return ret; + } +@@ -1139,6 +1207,24 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, return ret; } @@ -4092,7 +5626,16 @@ index d2f98491ce0..33d8c60e59a 100644 int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; -@@ -1237,6 +1257,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char +@@ -1152,6 +1238,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) + return ret; + ++ init_scan_signature_info(compile_info); ++ + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) +@@ -1169,6 +1257,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char ret = scan_d3dbc(compile_info, &message_context); break; @@ -4103,7 +5646,7 @@ index d2f98491ce0..33d8c60e59a 100644 default: ERR("Unsupported source type %#x.\n", compile_info->source_type); ret = VKD3D_ERROR_INVALID_ARGUMENT; -@@ -1359,6 +1383,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ +@@ -1291,6 +1383,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ return VKD3D_ERROR; } @@ -4128,7 +5671,16 @@ index d2f98491ce0..33d8c60e59a 100644 int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { -@@ -1391,6 +1433,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1305,6 +1415,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) + return ret; + ++ init_scan_signature_info(compile_info); ++ + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) +@@ -1321,6 +1433,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, ret = compile_d3d_bytecode(compile_info, out, &message_context); break; @@ -4139,7 +5691,67 @@ index d2f98491ce0..33d8c60e59a 100644 default: vkd3d_unreachable(); } -@@ -1568,6 +1614,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns +@@ -1339,6 +1455,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ + vkd3d_free(scan_descriptor_info->descriptors); + } + ++void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) ++{ ++ TRACE("info %p.\n", info); ++ ++ vkd3d_shader_free_shader_signature(&info->input); ++ vkd3d_shader_free_shader_signature(&info->output); ++ vkd3d_shader_free_shader_signature(&info->patch_constant); ++} ++ + void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) + { + TRACE("shader_code %p.\n", shader_code); +@@ -1401,43 +1526,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu + desc->version = 0; + } + +-static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, +- const struct shader_signature *src) +-{ +- unsigned int i; +- +- signature->element_count = src->element_count; +- if (!src->elements) +- { +- assert(!signature->element_count); +- signature->elements = NULL; +- return true; +- } +- +- if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) +- return false; +- +- for (i = 0; i < signature->element_count; ++i) +- { +- struct vkd3d_shader_signature_element *d = &signature->elements[i]; +- struct signature_element *e = &src->elements[i]; +- +- d->semantic_name = e->semantic_name; +- d->semantic_index = e->semantic_index; +- d->stream_index = e->stream_index; +- d->sysval_semantic = e->sysval_semantic; +- d->component_type = e->component_type; +- d->register_index = e->register_index; +- if (e->register_count > 1) +- FIXME("Arrayed elements are not supported yet.\n"); +- d->mask = e->mask; +- d->used_mask = e->used_mask; +- d->min_precision = e->min_precision; +- } +- +- return true; +-} +- + void shader_signature_cleanup(struct shader_signature *signature) + { + vkd3d_free(signature->elements); +@@ -1526,6 +1614,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns VKD3D_SHADER_SOURCE_DXBC_TPF, VKD3D_SHADER_SOURCE_HLSL, VKD3D_SHADER_SOURCE_D3D_BYTECODE, @@ -4147,7 +5759,7 @@ index d2f98491ce0..33d8c60e59a 100644 }; TRACE("count %p.\n", count); -@@ -1606,6 +1653,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1564,6 +1653,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( switch (source_type) { @@ -4156,11 +5768,15 @@ index d2f98491ce0..33d8c60e59a 100644 *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 528a6651782..85fca964227 100644 +index 406d53a3391..85fca964227 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -149,6 +149,17 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, +@@ -145,8 +145,21 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, + VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, + VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, ++ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, ++ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, + @@ -4177,7 +5793,15 @@ index 528a6651782..85fca964227 100644 }; enum vkd3d_shader_opcode -@@ -814,9 +825,12 @@ struct vkd3d_shader_desc +@@ -802,6 +815,7 @@ struct signature_element + struct shader_signature + { + struct signature_element *elements; ++ size_t elements_capacity; + unsigned int element_count; + }; + +@@ -811,9 +825,12 @@ struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; @@ -4190,7 +5814,7 @@ index 528a6651782..85fca964227 100644 }; struct vkd3d_shader_register_semantic -@@ -1170,6 +1184,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi +@@ -1167,6 +1184,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); @@ -4199,7 +5823,7 @@ index 528a6651782..85fca964227 100644 void free_shader_desc(struct vkd3d_shader_desc *desc); -@@ -1342,6 +1358,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, +@@ -1339,6 +1358,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, } #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) @@ -4208,10 +5832,100 @@ index 528a6651782..85fca964227 100644 #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 1fc6c00deff..de355ccd10d 100644 +index 32439eec7eb..53cb5d9582c 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -6064,8 +6064,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if +@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF + static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); + static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value); ++static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); + static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); + static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); + +@@ -3624,7 +3625,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ + static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, + struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, + const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, +- unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) ++ unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) + { + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; +@@ -3651,6 +3652,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + buffer_image_copy.bufferImageHeight = 0; + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + src_format, src_sub_resource_idx, src_desc->MipLevels); ++ buffer_image_copy.imageSubresource.layerCount = layer_count; + src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + buffer_image_copy.imageOffset.x = 0; + buffer_image_copy.imageOffset.y = 0; +@@ -3658,7 +3660,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); + + buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * +- buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; ++ buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; + if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) + { + ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); +@@ -3684,6 +3686,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + dst_format, dst_sub_resource_idx, dst_desc->MipLevels); ++ buffer_image_copy.imageSubresource.layerCount = layer_count; + dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + + assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == +@@ -3813,7 +3816,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, dst->u.SubresourceIndex, dst_format, +- src_resource, src->u.SubresourceIndex, src_format); ++ src_resource, src->u.SubresourceIndex, src_format, 1); + return; + } + +@@ -3835,6 +3838,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + { + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *dst_resource, *src_resource; ++ const struct vkd3d_format *dst_format, *src_format; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy vk_buffer_copy; + VkImageCopy vk_image_copy; +@@ -3867,16 +3871,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + else + { + layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); ++ dst_format = dst_resource->format; ++ src_format = src_resource->format; + + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); + assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + ++ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) ++ { ++ for (i = 0; i < dst_resource->desc.MipLevels; ++i) ++ { ++ d3d12_command_list_copy_incompatible_texture_region(list, ++ dst_resource, i, dst_format, ++ src_resource, i, src_format, layer_count); ++ } ++ return; ++ } ++ + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, +- src_resource->format, dst_resource->format, NULL, 0, 0, 0); ++ src_format, dst_format, NULL, 0, 0, 0); + vk_image_copy.dstSubresource.layerCount = layer_count; + vk_image_copy.srcSubresource.layerCount = layer_count; + VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, +@@ -6063,8 +6080,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; } @@ -4247,7 +5961,179 @@ index 1fc6c00deff..de355ccd10d 100644 vkd3d_free(array->ops); } -@@ -6353,8 +6380,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu +@@ -6162,17 +6206,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc + return &array->ops[array->count++]; + } + ++static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) ++{ ++ void *buffer; ++ ++ *dst = NULL; ++ if (src) ++ { ++ if (!(buffer = vkd3d_calloc(count, elem_size))) ++ return false; ++ memcpy(buffer, src, count * elem_size); ++ *dst = buffer; ++ } ++ return true; ++} ++ ++static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) ++{ ++ vkd3d_free(update_mappings->region_start_coordinates); ++ vkd3d_free(update_mappings->region_sizes); ++ vkd3d_free(update_mappings->range_flags); ++ vkd3d_free(update_mappings->heap_range_offsets); ++ vkd3d_free(update_mappings->range_tile_counts); ++} ++ + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, + ID3D12Resource *resource, UINT region_count, + const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, + ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, +- UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) ++ const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) + { +- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " ++ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); ++ struct vkd3d_cs_update_mappings update_mappings = {0}; ++ struct vkd3d_cs_op_data *op; ++ ++ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " +- "range_tile_counts %p, flags %#x stub!\n", ++ "range_tile_counts %p, flags %#x.\n", + iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, + range_flags, heap_range_offsets, range_tile_counts, flags); ++ ++ if (!region_count || !range_count) ++ return; ++ ++ if (!command_queue->supports_sparse_binding) ++ { ++ FIXME("Command queue %p does not support sparse binding.\n", command_queue); ++ return; ++ } ++ ++ if (!resource_impl->tiles.subresource_count) ++ { ++ WARN("Resource %p is not a tiled resource.\n", resource_impl); ++ return; ++ } ++ ++ if (region_count > 1 && !region_start_coordinates) ++ { ++ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); ++ return; ++ } ++ ++ if (range_count > 1 && !range_tile_counts) ++ { ++ WARN("Range tile counts must not be NULL when range count is > 1.\n"); ++ return; ++ } ++ ++ update_mappings.resource = resource_impl; ++ update_mappings.heap = heap_impl; ++ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, ++ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) ++ { ++ ERR("Failed to allocate region start coordinates.\n"); ++ return; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.region_sizes, ++ region_sizes, sizeof(*region_sizes), region_count)) ++ { ++ ERR("Failed to allocate region sizes.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.range_flags, ++ range_flags, sizeof(*range_flags), range_count)) ++ { ++ ERR("Failed to allocate range flags.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, ++ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) ++ { ++ ERR("Failed to allocate heap range offsets.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, ++ range_tile_counts, sizeof(*range_tile_counts), range_count)) ++ { ++ ERR("Failed to allocate range tile counts.\n"); ++ goto free_clones; ++ } ++ update_mappings.region_count = region_count; ++ update_mappings.range_count = range_count; ++ update_mappings.flags = flags; ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ goto unlock_mutex; ++ } ++ ++ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; ++ op->u.update_mappings = update_mappings; ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++ return; ++ ++unlock_mutex: ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++free_clones: ++ update_mappings_cleanup(&update_mappings); + } + + static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, +@@ -6183,10 +6341,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command + const D3D12_TILE_REGION_SIZE *region_size, + D3D12_TILE_MAPPING_FLAGS flags) + { +- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " +- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", ++ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); ++ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct vkd3d_cs_op_data *op; ++ ++ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " ++ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", + iface, dst_resource, dst_region_start_coordinate, src_resource, + src_region_start_coordinate, region_size, flags); ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ return; ++ } ++ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; ++ op->u.copy_mappings.dst_resource = dst_resource_impl; ++ op->u.copy_mappings.src_resource = src_resource_impl; ++ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; ++ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; ++ op->u.copy_mappings.region_size = *region_size; ++ op->u.copy_mappings.flags = flags; ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++ vkd3d_mutex_unlock(&command_queue->op_mutex); + } + + static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, +@@ -6214,8 +6396,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu ERR("Failed to submit queue(s), vr %d.\n", vr); vkd3d_queue_release(vkd3d_queue); @@ -4256,7 +6142,7 @@ index 1fc6c00deff..de355ccd10d 100644 } static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) -@@ -6412,7 +6437,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm +@@ -6273,7 +6453,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { ERR("Failed to add op.\n"); @@ -4265,7 +6151,7 @@ index 1fc6c00deff..de355ccd10d 100644 } op->opcode = VKD3D_CS_OP_EXECUTE; op->u.execute.buffers = buffers; -@@ -6420,6 +6445,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm +@@ -6281,6 +6461,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm d3d12_command_queue_submit_locked(command_queue); @@ -4273,7 +6159,7 @@ index 1fc6c00deff..de355ccd10d 100644 vkd3d_mutex_unlock(&command_queue->op_mutex); return; } -@@ -6487,6 +6513,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * +@@ -6348,6 +6529,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { @@ -4281,7 +6167,7 @@ index 1fc6c00deff..de355ccd10d 100644 hr = E_OUTOFMEMORY; goto done; } -@@ -6825,6 +6852,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if +@@ -6686,6 +6868,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { @@ -4289,7 +6175,7 @@ index 1fc6c00deff..de355ccd10d 100644 hr = E_OUTOFMEMORY; goto done; } -@@ -7061,12 +7089,10 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * +@@ -6922,22 +7105,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * return d3d12_command_queue_fixup_after_flush_locked(queue); } d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); @@ -4302,7 +6188,19 @@ index 1fc6c00deff..de355ccd10d 100644 break; case VKD3D_CS_OP_EXECUTE: -@@ -7086,6 +7112,8 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break; + ++ case VKD3D_CS_OP_UPDATE_MAPPINGS: ++ FIXME("Tiled resource binding is not supported yet.\n"); ++ update_mappings_cleanup(&op->u.update_mappings); ++ break; ++ ++ case VKD3D_CS_OP_COPY_MAPPINGS: ++ FIXME("Tiled resource mapping copying is not supported yet.\n"); ++ break; ++ + default: vkd3d_unreachable(); } @@ -4311,6 +6209,150 @@ index 1fc6c00deff..de355ccd10d 100644 *flushed_any |= true; } +@@ -7000,6 +7192,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, + if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) + goto fail_destroy_op_mutex; + ++ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); ++ + d3d12_device_add_ref(queue->device = device); + + return S_OK; +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 4263dcf4184..b9a8943cc08 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + vulkan_info->device_limits = physical_device_info->properties2.properties.limits; + vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; ++ vulkan_info->sparse_binding = features->sparseBinding; ++ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; + vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; + vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; + vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index ea7b6859cc1..4c07d326504 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, + VkImageFormatListCreateInfoKHR format_list; + const struct vkd3d_format *format; + VkImageCreateInfo image_info; ++ uint32_t count; + VkResult vr; + + if (resource) +@@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, + if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) + resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; + ++ if (sparse_resource) ++ { ++ count = 0; ++ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, ++ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); ++ ++ if (!count) ++ { ++ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", ++ image_info.format, image_info.imageType, image_info.samples, image_info.usage); ++ return E_INVALIDARG; ++ } ++ } ++ + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) + WARN("Failed to create Vulkan image, vr %d.\n", vr); + +@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + D3D12_RESOURCE_DESC validated_desc; + VkMemoryRequirements requirements; + VkImage vk_image; ++ bool tiled; + HRESULT hr; + + assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); +@@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + desc = &validated_desc; + } + ++ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; ++ + /* XXX: We have to create an image to get its memory requirements. */ +- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) ++ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); + VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); +@@ -1039,12 +1057,12 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, + box->back = d3d12_resource_desc_get_depth(&resource->desc, level); + } + +-/* ID3D12Resource */ +-static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) ++static void d3d12_resource_init_tiles(struct d3d12_resource *resource) + { +- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); ++ resource->tiles.subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); + } + ++/* ID3D12Resource */ + static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, + REFIID riid, void **object) + { +@@ -1661,6 +1679,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d + return E_INVALIDARG; + } + ++ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) ++ { ++ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) ++ { ++ WARN("The device does not support tiled 3D images.\n"); ++ return E_INVALIDARG; ++ } ++ if (format->plane_count > 1) ++ { ++ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", ++ format->dxgi_format); ++ return E_INVALIDARG; ++ } ++ } ++ + if (!d3d12_resource_validate_texture_format(desc, format) + || !d3d12_resource_validate_texture_alignment(desc, format)) + return E_INVALIDARG; +@@ -1722,6 +1755,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + + resource->desc = *desc; + ++ if (!heap_properties && !device->vk_info.sparse_binding) ++ { ++ WARN("The device does not support tiled images.\n"); ++ return E_INVALIDARG; ++ } ++ + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) + return E_INVALIDARG; + +@@ -1787,6 +1826,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + resource->heap = NULL; + resource->heap_offset = 0; + ++ memset(&resource->tiles, 0, sizeof(resource->tiles)); ++ + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) + { + d3d12_resource_destroy(resource, device); +@@ -1972,6 +2013,8 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + desc, initial_state, optimized_clear_value, &object))) + return hr; + ++ d3d12_resource_init_tiles(object); ++ + TRACE("Created reserved resource %p.\n", object); + + *resource = object; diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 5e46b467252..2d8138245d8 100644 --- a/libs/vkd3d/libs/vkd3d/state.c @@ -4365,6 +6407,111 @@ index 5e46b467252..2d8138245d8 100644 return vkd3d_shader_scan(&compile_info, NULL); } +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index b0150754434..c5259420acf 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -147,9 +147,12 @@ struct vkd3d_vulkan_info + unsigned int max_vertex_attrib_divisor; + + VkPhysicalDeviceLimits device_limits; +- VkPhysicalDeviceSparseProperties sparse_properties; + struct vkd3d_device_descriptor_limits descriptor_limits; + ++ VkPhysicalDeviceSparseProperties sparse_properties; ++ bool sparse_binding; ++ bool sparse_residency_3d; ++ + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + + unsigned int shader_extension_count; +@@ -670,6 +673,11 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); + #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 + #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 + ++struct d3d12_resource_tile_info ++{ ++ unsigned int subresource_count; ++}; ++ + /* ID3D12Resource */ + struct d3d12_resource + { +@@ -698,9 +706,16 @@ struct d3d12_resource + + struct d3d12_device *device; + ++ struct d3d12_resource_tile_info tiles; ++ + struct vkd3d_private_store private_store; + }; + ++static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); ++} ++ + static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) + { + return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; +@@ -1454,6 +1469,8 @@ enum vkd3d_cs_op + VKD3D_CS_OP_WAIT, + VKD3D_CS_OP_SIGNAL, + VKD3D_CS_OP_EXECUTE, ++ VKD3D_CS_OP_UPDATE_MAPPINGS, ++ VKD3D_CS_OP_COPY_MAPPINGS, + }; + + struct vkd3d_cs_wait +@@ -1474,6 +1491,30 @@ struct vkd3d_cs_execute + unsigned int buffer_count; + }; + ++struct vkd3d_cs_update_mappings ++{ ++ struct d3d12_resource *resource; ++ struct d3d12_heap *heap; ++ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; ++ D3D12_TILE_REGION_SIZE *region_sizes; ++ D3D12_TILE_RANGE_FLAGS *range_flags; ++ UINT *heap_range_offsets; ++ UINT *range_tile_counts; ++ UINT region_count; ++ UINT range_count; ++ D3D12_TILE_MAPPING_FLAGS flags; ++}; ++ ++struct vkd3d_cs_copy_mappings ++{ ++ struct d3d12_resource *dst_resource; ++ struct d3d12_resource *src_resource; ++ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; ++ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; ++ D3D12_TILE_REGION_SIZE region_size; ++ D3D12_TILE_MAPPING_FLAGS flags; ++}; ++ + struct vkd3d_cs_op_data + { + enum vkd3d_cs_op opcode; +@@ -1482,6 +1523,8 @@ struct vkd3d_cs_op_data + struct vkd3d_cs_wait wait; + struct vkd3d_cs_signal signal; + struct vkd3d_cs_execute execute; ++ struct vkd3d_cs_update_mappings update_mappings; ++ struct vkd3d_cs_copy_mappings copy_mappings; + } u; + }; + +@@ -1519,6 +1562,8 @@ struct d3d12_command_queue + * set, aux_op_queue.count must be zero. */ + struct d3d12_command_queue_op_array aux_op_queue; + ++ bool supports_sparse_binding; ++ + struct vkd3d_private_store private_store; + }; + -- 2.40.1 diff --git a/patches/vkd3d-latest/0003-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch b/patches/vkd3d-latest/0003-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch deleted file mode 100644 index 8df40b9d..00000000 --- a/patches/vkd3d-latest/0003-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch +++ /dev/null @@ -1,401 +0,0 @@ -From 18393e5d5e96df14252e917403165614e0e77471 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 23 Jun 2023 14:15:46 +1000 -Subject: [PATCH] Update vkd3d to 771e442af16228a977eebba82224f06f6d0202fe - (1.8) - ---- - libs/vkd3d/include/vkd3d.h | 1 + - libs/vkd3d/include/vkd3d_shader.h | 1 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 36 +++++++++++++-------- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 6 ++-- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 17 ++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 33 ++++++++++++------- - libs/vkd3d/libs/vkd3d-shader/ir.c | 3 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 16 ++++++--- - libs/vkd3d/libs/vkd3d/state.c | 4 +-- - 10 files changed, 77 insertions(+), 42 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index ff2b15c51dc..72ed3ced671 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -76,6 +76,7 @@ enum vkd3d_api_version - VKD3D_API_VERSION_1_5, - VKD3D_API_VERSION_1_6, - VKD3D_API_VERSION_1_7, -+ VKD3D_API_VERSION_1_8, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), - }; -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 859b8c79792..274241546ea 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -49,6 +49,7 @@ enum vkd3d_shader_api_version - VKD3D_SHADER_API_VERSION_1_5, - VKD3D_SHADER_API_VERSION_1_6, - VKD3D_SHADER_API_VERSION_1_7, -+ VKD3D_SHADER_API_VERSION_1_8, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index c35f8ca0ff8..712613ac13b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1281,10 +1281,13 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); -+ unsigned int r; - -- if (!var->semantic.name && var->regs[regset].allocated) -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -+ if (var->semantic.name || !var->regs[r].allocated) -+ continue; -+ - ++uniform_count; - - if (var->is_param && var->is_uniform) -@@ -1321,20 +1324,23 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); -+ unsigned int r; - -- if (!var->semantic.name && var->regs[regset].allocated) -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -+ if (var->semantic.name || !var->regs[r].allocated) -+ continue; -+ - put_u32(buffer, 0); /* name */ -- if (regset == HLSL_REGSET_NUMERIC) -+ if (r == HLSL_REGSET_NUMERIC) - { -- put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); -- put_u32(buffer, var->data_type->reg_size[regset] / 4); -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); -+ put_u32(buffer, var->data_type->reg_size[r] / 4); - } - else - { -- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); -- put_u32(buffer, var->regs[regset].bind_count); -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -+ put_u32(buffer, var->regs[r].bind_count); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ -@@ -1345,12 +1351,16 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); -+ unsigned int r; - -- if (!var->semantic.name && var->regs[regset].allocated) -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -- size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); -- size_t name_offset; -+ size_t var_offset, name_offset; -+ -+ if (var->semantic.name || !var->regs[r].allocated) -+ continue; -+ -+ var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index f439c9f3383..ba5bcfbfaf0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -3236,7 +3236,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - - rb_init(&ctx->functions, compare_function_rb); - -- list_init(&ctx->static_initializers); -+ hlsl_block_init(&ctx->static_initializers); - list_init(&ctx->extern_vars); - - list_init(&ctx->buffers); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index a7ff1f23858..bce48e94b24 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -796,9 +796,8 @@ struct hlsl_ctx - struct hlsl_type *Void; - } builtin_types; - -- /* List of the instruction nodes for initializing static variables; linked by the -- * hlsl_ir_node.entry fields. */ -- struct list static_initializers; -+ /* List of the instruction nodes for initializing static variables. */ -+ struct hlsl_block static_initializers; - - /* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. - * Only used for SM1 profiles. */ -@@ -1214,6 +1213,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); - struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); - -+bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 209428f761a..0e07fe578e1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1098,12 +1098,17 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; - unsigned int ret = 0; -+ bool progress; - - if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) - return 0; - -- while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL)); -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, block); -+ } while (progress); - - node = node_from_list(&block->instrs); - if (node->type == HLSL_IR_CONSTANT) -@@ -2159,7 +2164,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - } - - if (modifiers & HLSL_STORAGE_STATIC) -- list_move_tail(&ctx->static_initializers, v->initializer.instrs); -+ list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); - else - list_move_tail(statements_list, v->initializer.instrs); - vkd3d_free(v->initializer.args); -@@ -2182,9 +2187,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - vkd3d_free(v); - continue; - } -- list_add_tail(&ctx->static_initializers, &zero->entry); -+ hlsl_block_add_instr(&ctx->static_initializers, zero); - -- if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) -+ if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) - { - vkd3d_free(v); - continue; -@@ -2195,7 +2200,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - vkd3d_free(v); - continue; - } -- list_add_tail(&ctx->static_initializers, &store->entry); -+ hlsl_block_add_instr(&ctx->static_initializers, store); - } - vkd3d_free(v); - } -@@ -5494,7 +5499,7 @@ arrays: - uint32_t *new_array; - unsigned int size; - -- hlsl_block_init(&block); -+ hlsl_clone_block(ctx, &block, &ctx->static_initializers); - list_move_tail(&block.instrs, $2); - - size = evaluate_static_expression_as_uint(ctx, &block, &@2); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 4317604bdfa..765b1907426 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1649,7 +1649,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b - return progress; - } - --static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) -+bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) - { - struct copy_propagation_state state; - bool progress; -@@ -2698,7 +2698,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - continue; - regset = hlsl_type_get_regset(var->data_type); - -- if (var->reg_reservation.reg_type) -+ if (var->reg_reservation.reg_type && var->regs[regset].bind_count) - { - if (var->reg_reservation.reg_type != get_regset_name(regset)) - { -@@ -2714,7 +2714,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - { - var->regs[regset].allocated = true; - var->regs[regset].id = var->reg_reservation.reg_index; -- var->regs[regset].bind_count = var->data_type->reg_size[regset]; - TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index, var->reg_reservation.reg_type, - var->reg_reservation.reg_index + var->regs[regset].bind_count); -@@ -3626,11 +3625,23 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) - { -- if (!var->regs[regset].allocated) -+ if (var->reg_reservation.reg_type == get_regset_name(regset) -+ && var->data_type->reg_size[regset]) -+ { -+ /* Vars with a reservation prevent non-reserved vars from being -+ * bound there even if the reserved vars aren't used. */ -+ start = var->reg_reservation.reg_index; -+ count = var->data_type->reg_size[regset]; -+ } -+ else if (var->regs[regset].allocated) -+ { -+ start = var->regs[regset].id; -+ count = var->regs[regset].bind_count; -+ } -+ else -+ { - continue; -- -- start = var->regs[regset].id; -- count = var->regs[regset].bind_count; -+ } - - if (start <= index && index < start + count) - return var; -@@ -3977,7 +3988,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - unsigned int i; - bool progress; - -- list_move_head(&body->instrs, &ctx->static_initializers); -+ list_move_head(&body->instrs, &ctx->static_initializers.instrs); - - memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); -@@ -4073,7 +4084,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -- progress |= copy_propagation_execute(ctx, body); -+ progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - } -@@ -4112,10 +4123,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - if (TRACE_ON()) - rb_for_each_entry(&ctx->functions, dump_function, ctx); - -- allocate_register_reservations(ctx); -- - calculate_resource_register_counts(ctx); - -+ allocate_register_reservations(ctx); -+ - allocate_temp_registers(ctx, entry_func); - if (profile->major_version < 4) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index d542fbb0d52..9eefb82c226 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -715,8 +715,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - } - } - element_count = new_count; -- /* Signature 's' is a copy of the original signature struct, so we can replace -- * the 'elements' pointer without freeing it. */ -+ vkd3d_free(s->elements); - s->elements = elements; - s->element_count = element_count; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index cc0b63e8284..3542b5fac51 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -201,7 +201,7 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - - #define VKD3D_SPIRV_VERSION 0x00010000 - #define VKD3D_SPIRV_GENERATOR_ID 18 --#define VKD3D_SPIRV_GENERATOR_VERSION 7 -+#define VKD3D_SPIRV_GENERATOR_VERSION 8 - #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) - - struct vkd3d_spirv_stream -@@ -2324,11 +2324,15 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - - vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); - -+ shader_signature_cleanup(&compiler->input_signature); -+ shader_signature_cleanup(&compiler->output_signature); -+ shader_signature_cleanup(&compiler->patch_constant_signature); -+ - vkd3d_free(compiler); - } - - static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, -- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) - { -@@ -2428,6 +2432,9 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - compiler->input_signature = shader_desc->input_signature; - compiler->output_signature = shader_desc->output_signature; - compiler->patch_constant_signature = shader_desc->patch_constant_signature; -+ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -+ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -+ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); - - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { -@@ -4479,7 +4486,8 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - element_idx = shader_register_get_io_indices(reg, array_sizes); - signature_element = &shader_signature->elements[element_idx]; - -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) -+ if ((compiler->shader_type == VKD3D_SHADER_TYPE_HULL || compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) -+ && !sysval && signature_element->sysval_semantic) - sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); - - builtin = get_spirv_builtin_for_sysval(compiler, sysval); -@@ -9450,7 +9458,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) - { - result = instruction_array_normalise_hull_shader_control_point_io(&instructions, -- &parser->shader_desc.input_signature); -+ &compiler->input_signature); - } - if (result >= 0) - result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index c964ea8fe3a..5e46b467252 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -1958,7 +1958,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, - - const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, - {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, - {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, - }; -@@ -2011,7 +2011,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - - const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, - {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, - }; - --- -2.40.1 - diff --git a/patches/vkd3d-latest/0004-Update-vkd3d-to-3aee386e2fdf2e0bf65e7006a380f204a1e5.patch b/patches/vkd3d-latest/0004-Update-vkd3d-to-3aee386e2fdf2e0bf65e7006a380f204a1e5.patch deleted file mode 100644 index c40334e0..00000000 --- a/patches/vkd3d-latest/0004-Update-vkd3d-to-3aee386e2fdf2e0bf65e7006a380f204a1e5.patch +++ /dev/null @@ -1,2275 +0,0 @@ -From ce88a7e3005280267ba52e537fb82d45fa54d5f2 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 28 Jun 2023 16:27:03 +1000 -Subject: [PATCH] Update vkd3d to 3aee386e2fdf2e0bf65e7006a380f204a1e571f4 - ---- - libs/vkd3d/include/vkd3d_shader.h | 111 ++++++- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 289 ++++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 31 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 9 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 102 ++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 72 +++- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 314 +++++++++++++----- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 14 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 116 ++++--- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 3 + - libs/vkd3d/libs/vkd3d/command.c | 160 ++++++++- - libs/vkd3d/libs/vkd3d/device.c | 2 + - libs/vkd3d/libs/vkd3d/resource.c | 51 ++- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 47 ++- - 14 files changed, 1134 insertions(+), 187 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 274241546ea..5a10dbe087b 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -85,6 +85,11 @@ enum vkd3d_shader_structure_type - * \since 1.3 - */ - VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, -+ /** -+ * The structure is a vkd3d_shader_scan_signature_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -620,7 +625,7 @@ enum vkd3d_shader_target_type - { - /** - * The shader has no type or is to be ignored. This is not a valid value -- * for vkd3d_shader_compile() or vkd3d_shader_scan(). -+ * for vkd3d_shader_compile(). - */ - VKD3D_SHADER_TARGET_NONE, - /** -@@ -1551,6 +1556,64 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com - | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); - } - -+/** -+ * A chained structure containing descriptions of shader inputs and outputs. -+ * -+ * This structure is currently implemented only for DXBC and legacy D3D bytecode -+ * source types. -+ * For DXBC shaders, the returned information is parsed directly from the -+ * signatures embedded in the DXBC shader. -+ * For legacy D3D shaders, the returned information is synthesized based on -+ * registers declared or used by shader instructions. -+ * For all other shader types, the structure is zeroed. -+ * -+ * All members (except for \ref type and \ref next) are output-only. -+ * -+ * This structure is passed to vkd3d_shader_scan() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * Members of this structure are allocated by vkd3d-shader and should be freed -+ * with vkd3d_shader_free_scan_signature_info() when no longer needed. -+ * -+ * All signatures may contain pointers into the input shader, and should only -+ * be accessed while the input shader remains valid. -+ * -+ * Signature elements are synthesized from legacy Direct3D bytecode as follows: -+ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an -+ * uppercase string corresponding to the HLSL name for the usage, e.g. -+ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. -+ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the -+ * usage index. -+ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. -+ * -+ * Signature elements are synthesized for any input or output register declared -+ * or used in a legacy Direct3D bytecode shader, including the following: -+ * - Shader model 1 and 2 colour and texture coordinate registers. -+ * - The shader model 1 pixel shader output register. -+ * - Shader model 1 and 2 vertex shader output registers (position, fog, and -+ * point size). -+ * - Shader model 3 pixel shader system value input registers (pixel position -+ * and face). -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_scan_signature_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** The shader input varyings. */ -+ struct vkd3d_shader_signature input; -+ -+ /** The shader output varyings. */ -+ struct vkd3d_shader_signature output; -+ -+ /** The shader patch constant varyings. */ -+ struct vkd3d_shader_signature patch_constant; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -1625,6 +1688,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * following chained structures: - * - vkd3d_shader_interface_info - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * - vkd3d_shader_spirv_domain_shader_target_info - * - vkd3d_shader_spirv_target_info - * - vkd3d_shader_transform_feedback_info -@@ -1784,6 +1848,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * Parse shader source code or byte code, returning various types of requested - * information. - * -+ * The \a source_type member of \a compile_info must be set to the type of the -+ * shader. -+ * -+ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which -+ * case vkd3d_shader_scan() will return information about the shader in -+ * isolation. Alternatively, it may be set to a valid compilation target for the -+ * shader, in which case vkd3d_shader_scan() will return information that -+ * reflects the interface for a shader as it will be compiled to that target. -+ * In this case other chained structures may be appended to \a compile_info as -+ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, -+ * such as vkd3d_shader_spirv_target_info. -+ * -+ * (For a hypothetical example, suppose the source shader distinguishes float -+ * and integer texture data, but the target environment does not support integer -+ * textures. In this case vkd3d_shader_compile() might translate integer -+ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would -+ * accurately report whether the texture expects integer or float data, but -+ * using the relevant specific target type would report -+ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) -+ * - * Currently this function supports the following code types: - * - VKD3D_SHADER_SOURCE_DXBC_TPF - * -@@ -1791,6 +1875,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * \n - * The DXBC_TPF scanner supports the following chained structures: - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * \n - * Although the \a compile_info parameter is read-only, chained structures - * passed to this function need not be, and may serve as output parameters, -@@ -1827,12 +1912,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); - - /** -- * Read the input signature of a compiled shader, returning a structural -+ * Read the input signature of a compiled DXBC shader, returning a structural - * description which can be easily parsed by C code. - * - * This function parses a compiled shader. To parse a standalone root signature, - * use vkd3d_shader_parse_root_signature(). - * -+ * This function only parses DXBC shaders, and only retrieves the input -+ * signature. To retrieve signatures from other shader types, or other signature -+ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. -+ * This function returns the same input signature that is returned in -+ * struct vkd3d_shader_scan_signature_info. -+ * - * \param dxbc Compiled byte code, in DXBC format. - * - * \param signature Output location in which the parsed root signature will be -@@ -2022,6 +2113,19 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb - VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** -+ * Free members of struct vkd3d_shader_scan_signature_info allocated by -+ * vkd3d_shader_scan(). -+ * -+ * This function may free members of vkd3d_shader_scan_signature_info, but -+ * does not free the structure itself. -+ * -+ * \param info Scan information to free. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); -+ - #endif /* VKD3D_SHADER_NO_PROTOTYPES */ - - /** Type of vkd3d_shader_get_version(). */ -@@ -2087,6 +2191,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, - typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ -+typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); -+ - #ifdef __cplusplus - } - #endif /* __cplusplus */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 712613ac13b..d2a4666a50a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -490,6 +490,245 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader - dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; - } - -+static struct signature_element *find_signature_element(const struct shader_signature *signature, -+ const char *semantic_name, unsigned int semantic_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) -+ && e[i].semantic_index == semantic_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+static struct signature_element *find_signature_element_by_register_index( -+ const struct shader_signature *signature, unsigned int register_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (e[i].register_index == register_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, -+ unsigned int register_index, bool is_dcl, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if ((element = find_signature_element(signature, name, index))) -+ { -+ element->mask |= mask; -+ if (!is_dcl) -+ element->used_mask |= mask; -+ return true; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ return false; -+ element = &signature->elements[signature->element_count++]; -+ -+ element->semantic_name = name; -+ element->semantic_index = index; -+ element->stream_index = 0; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = is_dcl ? 0 : mask; -+ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; -+ -+ return true; -+} -+ -+static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ unsigned int register_index, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if (!(element = find_signature_element_by_register_index(signature, register_index))) -+ { -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, -+ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); -+ return; -+ } -+ -+ element->used_mask |= mask; -+} -+ -+static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) -+{ -+ unsigned int register_index = reg->idx[0].offset; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL -+ && sm1->p.shader_version.major == 1 && !register_index) -+ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); -+ return true; -+ -+ case VKD3DSPR_INPUT: -+ /* For vertex shaders or sm3 pixel shaders, we should have already -+ * had a DCL instruction. Otherwise, this is a colour input. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, false, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, false, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_TEXTURE: -+ /* For vertex shaders, this is ADDR. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ return true; -+ return add_signature_element(sm1, false, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_OUTPUT: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ { -+ /* For sm < 2 vertex shaders, this is TEXCRDOUT. -+ * -+ * For sm3 vertex shaders, this is OUTPUT, but we already -+ * should have had a DCL instruction. */ -+ if (sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, true, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, true, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ } -+ /* fall through */ -+ -+ case VKD3DSPR_ATTROUT: -+ case VKD3DSPR_COLOROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_DEPTHOUT: -+ return add_signature_element(sm1, true, "DEPTH", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case VKD3DSPR_RASTOUT: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, true, "POSITION", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, true, "FOG", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case 2: -+ return add_signature_element(sm1, true, "PSIZE", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid rasterizer output index %u.", register_index); -+ return true; -+ } -+ -+ case VKD3DSPR_MISCTYPE: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, false, "VPOS", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, false, "VFACE", 0, -+ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid miscellaneous fragment input index %u.", register_index); -+ return true; -+ } -+ -+ default: -+ return true; -+ } -+} -+ -+static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_semantic *semantic) -+{ -+ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ unsigned int mask = semantic->resource.reg.write_mask; -+ bool output; -+ -+ static const char sm1_semantic_names[][13] = -+ { -+ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", -+ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", -+ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", -+ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", -+ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", -+ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", -+ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", -+ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", -+ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", -+ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", -+ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", -+ [VKD3D_DECL_USAGE_FOG ] = "FOG", -+ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", -+ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", -+ }; -+ -+ if (reg->type == VKD3DSPR_OUTPUT) -+ output = true; -+ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) -+ output = false; -+ else /* vpos and vface don't have a semantic. */ -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* sm2 pixel shaders use DCL but don't provide a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* With the exception of vertex POSITION output, none of these are system -+ * values. Pixel POSITION input is not equivalent to SV_Position; the closer -+ * equivalent is VPOS, which is not declared as a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ -+ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], -+ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); -+} -+ - /* Read a parameter token from the input stream, and possibly a relative - * addressing token. */ - static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, -@@ -640,6 +879,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, - range = &semantic->resource.range; - range->space = 0; - range->first = range->last = semantic->resource.reg.reg.idx[0].offset; -+ -+ add_signature_element_from_semantic(sm1, semantic); - } - - static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, -@@ -744,6 +985,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, - } - } - -+static unsigned int mask_from_swizzle(unsigned int swizzle) -+{ -+ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); -+} -+ - static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { - struct vkd3d_shader_src_param *src_params, *predicate; -@@ -832,7 +1081,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - { - /* Destination token */ - if (ins->dst_count) -+ { - shader_sm1_read_dst_param(sm1, &p, dst_param); -+ add_signature_element_from_register(sm1, &dst_param->reg, false, dst_param->write_mask); -+ } - - /* Predication token */ - if (ins->predicate) -@@ -840,7 +1092,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - - /* Other source tokens */ - for (i = 0; i < ins->src_count; ++i) -+ { - shader_sm1_read_src_param(sm1, &p, &src_params[i]); -+ add_signature_element_from_register(sm1, &src_params[i].reg, -+ false, mask_from_swizzle(src_params[i].swizzle)); -+ } - } - - if (sm1->abort) -@@ -1844,6 +2100,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - -+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ switch (jump->type) -+ { -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ { -+ struct hlsl_reg *reg = &jump->condition.node->reg; -+ -+ struct sm1_instruction instr = -+ { -+ .opcode = VKD3D_SM1_OP_TEXKILL, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = reg->id, -+ .dst.writemask = reg->writemask, -+ .has_dst = 1, -+ }; -+ -+ write_sm1_instruction(ctx, buffer, &instr); -+ break; -+ } -+ -+ default: -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ } -+} -+ - static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_load *load = hlsl_ir_load(instr); -@@ -2038,6 +2323,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - write_sm1_expr(ctx, buffer, instr); - break; - -+ case HLSL_IR_JUMP: -+ write_sm1_jump(ctx, buffer, instr); -+ break; -+ - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index ba5bcfbfaf0..acc2a89cce0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1432,7 +1432,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v - } - - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, -- const struct vkd3d_shader_location *loc) -+ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_jump *jump; - -@@ -1440,6 +1440,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - return NULL; - init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); - jump->type = type; -+ hlsl_src_from_node(&jump->condition, condition); - return &jump->node; - } - -@@ -1585,9 +1586,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma - return dst; - } - --static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) -+static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) - { -- return hlsl_new_jump(ctx, src->type, &src->node.loc); -+ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); - } - - static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) -@@ -1728,7 +1729,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - return clone_index(ctx, map, hlsl_ir_index(instr)); - - case HLSL_IR_JUMP: -- return clone_jump(ctx, hlsl_ir_jump(instr)); -+ return clone_jump(ctx, map, hlsl_ir_jump(instr)); - - case HLSL_IR_LOAD: - return clone_load(ctx, map, hlsl_ir_load(instr)); -@@ -2146,10 +2147,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - { - static const char * const names[] = - { -- "HLSL_IR_JUMP_BREAK", -- "HLSL_IR_JUMP_CONTINUE", -- "HLSL_IR_JUMP_DISCARD", -- "HLSL_IR_JUMP_RETURN", -+ [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", -+ [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", -+ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", -+ [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", -+ [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", - }; - - assert(type < ARRAY_SIZE(names)); -@@ -2418,8 +2420,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - vkd3d_string_buffer_printf(buffer, "continue"); - break; - -- case HLSL_IR_JUMP_DISCARD: -- vkd3d_string_buffer_printf(buffer, "discard"); -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ vkd3d_string_buffer_printf(buffer, "discard_neg"); -+ break; -+ -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ vkd3d_string_buffer_printf(buffer, "discard_nz"); - break; - - case HLSL_IR_JUMP_RETURN: -@@ -2703,6 +2709,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node) - - static void free_ir_jump(struct hlsl_ir_jump *jump) - { -+ hlsl_src_remove(&jump->condition); - vkd3d_free(jump); - } - -@@ -3127,8 +3134,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) - { -- unsigned int n_variants = 0; - const char *const *variants; -+ unsigned int n_variants; - - switch (bt) - { -@@ -3148,6 +3155,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - break; - - default: -+ n_variants = 0; -+ variants = NULL; - break; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index bce48e94b24..f2d0a36b045 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -558,7 +558,8 @@ enum hlsl_ir_jump_type - { - HLSL_IR_JUMP_BREAK, - HLSL_IR_JUMP_CONTINUE, -- HLSL_IR_JUMP_DISCARD, -+ HLSL_IR_JUMP_DISCARD_NEG, -+ HLSL_IR_JUMP_DISCARD_NZ, - HLSL_IR_JUMP_RETURN, - }; - -@@ -566,6 +567,8 @@ struct hlsl_ir_jump - { - struct hlsl_ir_node node; - enum hlsl_ir_jump_type type; -+ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ -+ struct hlsl_src condition; - }; - - struct hlsl_ir_swizzle -@@ -1120,7 +1123,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, -- enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); -+ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); - - void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); - -@@ -1132,6 +1135,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); - - struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); - struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 0e07fe578e1..b487c5c138f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -273,9 +273,6 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - return hlsl_types_are_componentwise_equal(ctx, src, dst); - } - --static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc); -- - static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { -@@ -333,7 +330,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - -- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) -+ if (!(component_load = hlsl_add_load_component(ctx, instrs, node, src_idx, loc))) - return NULL; - - if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) -@@ -421,7 +418,7 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_lis - - hlsl_block_init(&then_block); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) - return false; - hlsl_block_add_instr(&then_block, jump); - -@@ -476,7 +473,7 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const - } - else - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); -+ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); - } - } - else if (!strcmp(attr->name, "loop") -@@ -656,14 +653,14 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); - } - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) - return false; - list_add_tail(instrs, &jump->entry); - - return true; - } - --static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *load, *store; -@@ -830,6 +827,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) - return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; - } - -+static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -+} -+ -+static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return !shader_profile_version_ge(ctx, major, minor); -+} -+ - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - struct hlsl_type *type, unsigned int modifiers, struct list *defs) - { -@@ -1020,7 +1027,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const - struct hlsl_reg_reservation reservation = {0}; - char *endptr; - -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - return reservation; - - reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -@@ -1284,7 +1291,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, - { - if (operands[j]) - { -- if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, operands[j], i, loc))) - return NULL; - - cell_operands[j] = load; -@@ -1557,8 +1564,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg1->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } -@@ -1568,8 +1574,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg2->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } -@@ -1779,7 +1784,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - return NULL; - list_add_tail(instrs, &cell->entry); - -- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) - return NULL; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -@@ -1868,7 +1873,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_type *dst_comp_type; - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) - return; - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); -@@ -2069,7 +2074,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - type_has_object_components(var->data_type, true)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables.\n"); -+ "Target profile doesn't support objects as struct members in uniform variables."); - } - - if ((func = hlsl_get_func_decl(ctx, var->name))) -@@ -2405,7 +2410,7 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -@@ -2449,7 +2454,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -@@ -2544,6 +2549,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); - } - -+static bool intrinsic_clip(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *condition, *jump; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ -+ condition = params->args[0]; -+ -+ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, condition->data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) -+ return false; -+ list_add_tail(params->instrs, &jump->entry); -+ -+ return true; -+} -+ - static bool intrinsic_cos(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3034,10 +3067,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *value1, *value2, *mul; - -- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) -+ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) - return false; - -- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) -+ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) -@@ -3308,7 +3341,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (params->args_count == 4) - { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); -+ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); - } - - sampler_type = params->args[0]->data_type; -@@ -3369,7 +3402,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - - if ((string = hlsl_type_to_string(ctx, arg_type))) - hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", -+ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; -@@ -3393,7 +3426,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - { - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) - return false; - - if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) -@@ -3458,7 +3491,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; - -- if (ctx->profile->major_version >= 4) -+ if (shader_profile_version_ge(ctx, 4, 0)) - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); - - return true; -@@ -3482,6 +3515,7 @@ intrinsic_functions[] = - {"asfloat", 1, true, intrinsic_asfloat}, - {"asuint", -1, true, intrinsic_asuint}, - {"clamp", 3, true, intrinsic_clamp}, -+ {"clip", 1, true, intrinsic_clip}, - {"cos", 1, true, intrinsic_cos}, - {"cross", 2, true, intrinsic_cross}, - {"ddx", 1, true, intrinsic_ddx}, -@@ -5261,7 +5295,12 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- /* TODO: unspecified sample count is not allowed for all targets */ -+ if (shader_profile_version_lt(ctx, 4, 1)) -+ { -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -+ } -+ - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } - | texture_ms_type '<' type ',' shift_expr '>' -@@ -5325,7 +5364,7 @@ type_no_void: - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); - if ($$->is_minimum_precision) - { -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support minimum-precision types."); -@@ -5736,11 +5775,16 @@ jump_statement: - discard_statement: - KW_DISCARD ';' - { -- struct hlsl_ir_node *discard; -+ struct hlsl_ir_node *discard, *c; - - if (!($$ = make_empty_list(ctx))) - YYABORT; -- if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) -+ -+ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) -+ return false; -+ list_add_tail($$, &c->entry); -+ -+ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) - return false; - list_add_tail($$, &discard->entry); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 765b1907426..b980ed567aa 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -666,7 +666,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, - return; - list_add_after(&cf_instr->entry, &load->node.entry); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) - return; - hlsl_block_add_instr(&then_block, jump); - -@@ -1889,7 +1889,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - - if (rhs->type != HLSL_IR_LOAD) - { -- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); - return false; - } - -@@ -2584,6 +2584,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return true; - } - -+static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -+ static const struct hlsl_constant_value zero_value; -+ struct hlsl_type *arg_type, *cmp_type; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_jump *jump; -+ unsigned int i, count; -+ struct list instrs; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ return false; -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) -+ return false; -+ -+ list_init(&instrs); -+ -+ arg_type = jump->condition.node->data_type; -+ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) -+ return false; -+ list_add_tail(&instrs, &zero->entry); -+ -+ operands[0] = jump->condition.node; -+ operands[1] = zero; -+ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); -+ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) -+ return false; -+ list_add_tail(&instrs, &cmp->entry); -+ -+ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) -+ return false; -+ list_add_tail(&instrs, &bool_false->entry); -+ -+ or = bool_false; -+ -+ count = hlsl_type_component_count(cmp_type); -+ for (i = 0; i < count; ++i) -+ { -+ if (!(load = hlsl_add_load_component(ctx, &instrs, cmp, i, &instr->loc))) -+ return false; -+ -+ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) -+ return NULL; -+ list_add_tail(&instrs, &or->entry); -+ } -+ -+ list_move_tail(&instr->entry, &instrs); -+ hlsl_src_remove(&jump->condition); -+ hlsl_src_from_node(&jump->condition, or); -+ jump->type = HLSL_IR_JUMP_DISCARD_NZ; -+ -+ return true; -+} -+ - static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - switch (instr->type) -@@ -2848,8 +2903,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - index->idx.node->last_read = last_read; - break; - } -- case HLSL_IR_CONSTANT: - case HLSL_IR_JUMP: -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ if (jump->condition.node) -+ jump->condition.node->last_read = last_read; -+ break; -+ } -+ case HLSL_IR_CONSTANT: - break; - } - } -@@ -4062,6 +4124,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - -+ if (profile->major_version >= 4) -+ { -+ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -+ } - hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); - while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); - do -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 301113c8477..570773cd335 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -223,7 +223,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { - enum hlsl_base_type type = dst_type->base_type; -@@ -232,64 +232,73 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -- break; -- -- case HLSL_TYPE_DOUBLE: -- dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -- break; -- - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; - break; - - default: -- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (type) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; - break; - -- case HLSL_TYPE_DOUBLE: -- dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -- break; -+ default: -+ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} - -+static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; -+ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; - break; - - default: -- vkd3d_unreachable(); -+ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; - } -- -- dst->u[k].u *= ~0u; - } - return true; - } -@@ -363,45 +372,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -- const struct vkd3d_shader_location *loc) -+static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < 4; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: -- if (src2->value.u[k].i == 0) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -- return false; -- } -- if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -- dst->u[k].i = 0; -- else -- dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < 4; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- if (src2->value.u[k].u == 0) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -- return false; -- } -- dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; - break; - - default: -- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); -- return false; -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < 4; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } -@@ -419,6 +499,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: - dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); - break; -@@ -448,6 +537,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: - dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); - break; -@@ -464,8 +562,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -+ const struct vkd3d_shader_location *loc) - { - enum hlsl_base_type type = dst_type->base_type; - unsigned int k; -@@ -478,19 +577,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - switch (type) - { - case HLSL_TYPE_INT: -+ if (src2->value.u[k].i == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -+ dst->u[k].i = 0; -+ else -+ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ break; -+ - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; -+ if (src2->value.u[k].u == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { - enum hlsl_base_type type = dst_type->base_type; -@@ -499,45 +614,64 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < 4; ++k) - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; -+ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < 4; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } -@@ -591,20 +725,34 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_add(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_MUL: -- success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_LOGIC_AND: -+ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_NEQUAL: -- success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_LOGIC_OR: -+ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_BIT_XOR: -+ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_DIV: - success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_MOD: -- success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); -+ case HLSL_OP2_EQUAL: -+ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_GEQUAL: -+ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_LESS: -+ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_MAX: -@@ -615,16 +763,16 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_min(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_XOR: -- success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_MOD: -+ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_BIT_AND: -- success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_MUL: -+ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_OR: -- success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_NEQUAL: -+ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); - break; - - default: -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index d066b13ee4e..d6322bb14f1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -4780,19 +4780,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - -- case HLSL_IR_JUMP_DISCARD: -+ case HLSL_IR_JUMP_DISCARD_NZ: - { -- struct sm4_register *reg = &instr.srcs[0].reg; -- - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.src_count = 1; -- reg->type = VKD3D_SM4_RT_IMMCONST; -- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -- reg->immconst_uint[0] = ~0u; -- -+ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - -@@ -4800,7 +4794,7 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } - -@@ -5016,7 +5010,7 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { -- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); -+ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 343fdb2252e..d2f98491ce0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -440,6 +440,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, - shader_get_source_type_suffix(source_type), shader->code, shader->size); - } - -+static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) -+{ -+ struct vkd3d_shader_scan_signature_info *signature_info; -+ -+ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) -+ { -+ memset(&signature_info->input, 0, sizeof(signature_info->input)); -+ memset(&signature_info->output, 0, sizeof(signature_info->output)); -+ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); -+ } -+} -+ - bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_message_context *message_context, const char *source_name, - const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, -@@ -526,6 +538,43 @@ void vkd3d_shader_free_messages(char *messages) - vkd3d_free(messages); - } - -+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -+ const struct shader_signature *src) -+{ -+ unsigned int i; -+ -+ signature->element_count = src->element_count; -+ if (!src->elements) -+ { -+ assert(!signature->element_count); -+ signature->elements = NULL; -+ return true; -+ } -+ -+ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -+ return false; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct vkd3d_shader_signature_element *d = &signature->elements[i]; -+ struct signature_element *e = &src->elements[i]; -+ -+ d->semantic_name = e->semantic_name; -+ d->semantic_index = e->semantic_index; -+ d->stream_index = e->stream_index; -+ d->sysval_semantic = e->sysval_semantic; -+ d->component_type = e->component_type; -+ d->register_index = e->register_index; -+ if (e->register_count > 1) -+ FIXME("Arrayed elements are not supported yet.\n"); -+ d->mask = e->mask; -+ d->used_mask = e->used_mask; -+ d->min_precision = e->min_precision; -+ } -+ -+ return true; -+} -+ - struct vkd3d_shader_scan_context - { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -@@ -1070,6 +1119,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) - { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_signature_info *signature_info; - struct vkd3d_shader_instruction *instruction; - struct vkd3d_shader_scan_context context; - int ret = VKD3D_OK; -@@ -1080,6 +1130,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - scan_descriptor_info->descriptors = NULL; - scan_descriptor_info->descriptor_count = 0; - } -+ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); - - vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); - -@@ -1099,6 +1150,21 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - } - } - -+ if (!ret && signature_info) -+ { -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -+ &parser->shader_desc.output_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -+ &parser->shader_desc.patch_constant_signature)) -+ { -+ vkd3d_shader_free_scan_signature_info(signature_info); -+ if (scan_descriptor_info) -+ vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ - vkd3d_shader_scan_context_cleanup(&context); - return ret; - } -@@ -1152,6 +1218,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1305,6 +1373,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1339,6 +1409,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ - vkd3d_free(scan_descriptor_info->descriptors); - } - -+void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) -+{ -+ TRACE("info %p.\n", info); -+ -+ vkd3d_shader_free_shader_signature(&info->input); -+ vkd3d_shader_free_shader_signature(&info->output); -+ vkd3d_shader_free_shader_signature(&info->patch_constant); -+} -+ - void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) - { - TRACE("shader_code %p.\n", shader_code); -@@ -1401,43 +1480,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu - desc->version = 0; - } - --static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -- const struct shader_signature *src) --{ -- unsigned int i; -- -- signature->element_count = src->element_count; -- if (!src->elements) -- { -- assert(!signature->element_count); -- signature->elements = NULL; -- return true; -- } -- -- if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -- return false; -- -- for (i = 0; i < signature->element_count; ++i) -- { -- struct vkd3d_shader_signature_element *d = &signature->elements[i]; -- struct signature_element *e = &src->elements[i]; -- -- d->semantic_name = e->semantic_name; -- d->semantic_index = e->semantic_index; -- d->stream_index = e->stream_index; -- d->sysval_semantic = e->sysval_semantic; -- d->component_type = e->component_type; -- d->register_index = e->register_index; -- if (e->register_count > 1) -- FIXME("Arrayed elements are not supported yet.\n"); -- d->mask = e->mask; -- d->used_mask = e->used_mask; -- d->min_precision = e->min_precision; -- } -- -- return true; --} -- - void shader_signature_cleanup(struct shader_signature *signature) - { - vkd3d_free(signature->elements); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 406d53a3391..528a6651782 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -145,6 +145,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, - VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, - VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, -+ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, - - VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, - }; -@@ -802,6 +804,7 @@ struct signature_element - struct shader_signature - { - struct signature_element *elements; -+ size_t elements_capacity; - unsigned int element_count; - }; - -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 32439eec7eb..1fc6c00deff 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF - static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); - static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, - struct d3d12_fence *fence, uint64_t value); -+static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); - static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); - static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); - -@@ -6162,17 +6163,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc - return &array->ops[array->count++]; - } - -+static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) -+{ -+ void *buffer; -+ -+ *dst = NULL; -+ if (src) -+ { -+ if (!(buffer = vkd3d_calloc(count, elem_size))) -+ return false; -+ memcpy(buffer, src, count * elem_size); -+ *dst = buffer; -+ } -+ return true; -+} -+ -+static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) -+{ -+ vkd3d_free(update_mappings->region_start_coordinates); -+ vkd3d_free(update_mappings->region_sizes); -+ vkd3d_free(update_mappings->range_flags); -+ vkd3d_free(update_mappings->heap_range_offsets); -+ vkd3d_free(update_mappings->range_tile_counts); -+} -+ - static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, - ID3D12Resource *resource, UINT region_count, - const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, - ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, -- UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) -+ const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " -+ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); -+ struct vkd3d_cs_update_mappings update_mappings = {0}; -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " - "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " -- "range_tile_counts %p, flags %#x stub!\n", -+ "range_tile_counts %p, flags %#x.\n", - iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, - range_flags, heap_range_offsets, range_tile_counts, flags); -+ -+ if (!region_count || !range_count) -+ return; -+ -+ if (!command_queue->supports_sparse_binding) -+ { -+ FIXME("Command queue %p does not support sparse binding.\n", command_queue); -+ return; -+ } -+ -+ if (!resource_impl->tiles.subresource_count) -+ { -+ WARN("Resource %p is not a tiled resource.\n", resource_impl); -+ return; -+ } -+ -+ if (region_count > 1 && !region_start_coordinates) -+ { -+ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); -+ return; -+ } -+ -+ if (range_count > 1 && !range_tile_counts) -+ { -+ WARN("Range tile counts must not be NULL when range count is > 1.\n"); -+ return; -+ } -+ -+ update_mappings.resource = resource_impl; -+ update_mappings.heap = heap_impl; -+ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, -+ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) -+ { -+ ERR("Failed to allocate region start coordinates.\n"); -+ return; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.region_sizes, -+ region_sizes, sizeof(*region_sizes), region_count)) -+ { -+ ERR("Failed to allocate region sizes.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_flags, -+ range_flags, sizeof(*range_flags), range_count)) -+ { -+ ERR("Failed to allocate range flags.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, -+ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) -+ { -+ ERR("Failed to allocate heap range offsets.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, -+ range_tile_counts, sizeof(*range_tile_counts), range_count)) -+ { -+ ERR("Failed to allocate range tile counts.\n"); -+ goto free_clones; -+ } -+ update_mappings.region_count = region_count; -+ update_mappings.range_count = range_count; -+ update_mappings.flags = flags; -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ goto unlock_mutex; -+ } -+ -+ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; -+ op->u.update_mappings = update_mappings; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+ return; -+ -+unlock_mutex: -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+free_clones: -+ update_mappings_cleanup(&update_mappings); - } - - static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, -@@ -6183,10 +6298,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command - const D3D12_TILE_REGION_SIZE *region_size, - D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", -+ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); -+ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -+ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", - iface, dst_resource, dst_region_start_coordinate, src_resource, - src_region_start_coordinate, region_size, flags); -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ return; -+ } -+ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; -+ op->u.copy_mappings.dst_resource = dst_resource_impl; -+ op->u.copy_mappings.src_resource = src_resource_impl; -+ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; -+ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; -+ op->u.copy_mappings.region_size = *region_size; -+ op->u.copy_mappings.flags = flags; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); - } - - static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, -@@ -6934,6 +7073,15 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; - -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ FIXME("Tiled resource binding is not supported yet.\n"); -+ update_mappings_cleanup(&op->u.update_mappings); -+ break; -+ -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ FIXME("Tiled resource mapping copying is not supported yet.\n"); -+ break; -+ - default: - vkd3d_unreachable(); - } -@@ -7000,6 +7148,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, - if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) - goto fail_destroy_op_mutex; - -+ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); -+ - d3d12_device_add_ref(queue->device = device); - - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 4263dcf4184..b9a8943cc08 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->sparse_binding = features->sparseBinding; -+ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; - vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; - vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index ea7b6859cc1..4c07d326504 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - VkImageFormatListCreateInfoKHR format_list; - const struct vkd3d_format *format; - VkImageCreateInfo image_info; -+ uint32_t count; - VkResult vr; - - if (resource) -@@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) - resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; - -+ if (sparse_resource) -+ { -+ count = 0; -+ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, -+ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); -+ -+ if (!count) -+ { -+ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", -+ image_info.format, image_info.imageType, image_info.samples, image_info.usage); -+ return E_INVALIDARG; -+ } -+ } -+ - if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) - WARN("Failed to create Vulkan image, vr %d.\n", vr); - -@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - D3D12_RESOURCE_DESC validated_desc; - VkMemoryRequirements requirements; - VkImage vk_image; -+ bool tiled; - HRESULT hr; - - assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -@@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - desc = &validated_desc; - } - -+ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; -+ - /* XXX: We have to create an image to get its memory requirements. */ -- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) -+ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) - { - VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); - VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); -@@ -1039,12 +1057,12 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, - box->back = d3d12_resource_desc_get_depth(&resource->desc, level); - } - --/* ID3D12Resource */ --static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+static void d3d12_resource_init_tiles(struct d3d12_resource *resource) - { -- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+ resource->tiles.subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); - } - -+/* ID3D12Resource */ - static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, - REFIID riid, void **object) - { -@@ -1661,6 +1679,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d - return E_INVALIDARG; - } - -+ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) -+ { -+ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) -+ { -+ WARN("The device does not support tiled 3D images.\n"); -+ return E_INVALIDARG; -+ } -+ if (format->plane_count > 1) -+ { -+ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", -+ format->dxgi_format); -+ return E_INVALIDARG; -+ } -+ } -+ - if (!d3d12_resource_validate_texture_format(desc, format) - || !d3d12_resource_validate_texture_alignment(desc, format)) - return E_INVALIDARG; -@@ -1722,6 +1755,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - - resource->desc = *desc; - -+ if (!heap_properties && !device->vk_info.sparse_binding) -+ { -+ WARN("The device does not support tiled images.\n"); -+ return E_INVALIDARG; -+ } -+ - if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) - return E_INVALIDARG; - -@@ -1787,6 +1826,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - resource->heap = NULL; - resource->heap_offset = 0; - -+ memset(&resource->tiles, 0, sizeof(resource->tiles)); -+ - if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) - { - d3d12_resource_destroy(resource, device); -@@ -1972,6 +2013,8 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - desc, initial_state, optimized_clear_value, &object))) - return hr; - -+ d3d12_resource_init_tiles(object); -+ - TRACE("Created reserved resource %p.\n", object); - - *resource = object; -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index b0150754434..c5259420acf 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -147,9 +147,12 @@ struct vkd3d_vulkan_info - unsigned int max_vertex_attrib_divisor; - - VkPhysicalDeviceLimits device_limits; -- VkPhysicalDeviceSparseProperties sparse_properties; - struct vkd3d_device_descriptor_limits descriptor_limits; - -+ VkPhysicalDeviceSparseProperties sparse_properties; -+ bool sparse_binding; -+ bool sparse_residency_3d; -+ - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; - - unsigned int shader_extension_count; -@@ -670,6 +673,11 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); - #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 - #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 - -+struct d3d12_resource_tile_info -+{ -+ unsigned int subresource_count; -+}; -+ - /* ID3D12Resource */ - struct d3d12_resource - { -@@ -698,9 +706,16 @@ struct d3d12_resource - - struct d3d12_device *device; - -+ struct d3d12_resource_tile_info tiles; -+ - struct vkd3d_private_store private_store; - }; - -+static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+{ -+ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+} -+ - static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) - { - return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; -@@ -1454,6 +1469,8 @@ enum vkd3d_cs_op - VKD3D_CS_OP_WAIT, - VKD3D_CS_OP_SIGNAL, - VKD3D_CS_OP_EXECUTE, -+ VKD3D_CS_OP_UPDATE_MAPPINGS, -+ VKD3D_CS_OP_COPY_MAPPINGS, - }; - - struct vkd3d_cs_wait -@@ -1474,6 +1491,30 @@ struct vkd3d_cs_execute - unsigned int buffer_count; - }; - -+struct vkd3d_cs_update_mappings -+{ -+ struct d3d12_resource *resource; -+ struct d3d12_heap *heap; -+ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; -+ D3D12_TILE_REGION_SIZE *region_sizes; -+ D3D12_TILE_RANGE_FLAGS *range_flags; -+ UINT *heap_range_offsets; -+ UINT *range_tile_counts; -+ UINT region_count; -+ UINT range_count; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ -+struct vkd3d_cs_copy_mappings -+{ -+ struct d3d12_resource *dst_resource; -+ struct d3d12_resource *src_resource; -+ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; -+ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; -+ D3D12_TILE_REGION_SIZE region_size; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ - struct vkd3d_cs_op_data - { - enum vkd3d_cs_op opcode; -@@ -1482,6 +1523,8 @@ struct vkd3d_cs_op_data - struct vkd3d_cs_wait wait; - struct vkd3d_cs_signal signal; - struct vkd3d_cs_execute execute; -+ struct vkd3d_cs_update_mappings update_mappings; -+ struct vkd3d_cs_copy_mappings copy_mappings; - } u; - }; - -@@ -1519,6 +1562,8 @@ struct d3d12_command_queue - * set, aux_op_queue.count must be zero. */ - struct d3d12_command_queue_op_array aux_op_queue; - -+ bool supports_sparse_binding; -+ - struct vkd3d_private_store private_store; - }; - --- -2.40.1 -