From 18f976c338fe7a8031f6533c3d26e46d223dcff3 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 23 Jan 2025 08:14:29 +1100 Subject: [PATCH] Updated vkd3d-latest patchset --- ...5b2d62e59a6365e32aac3fa37fe16ab3582.patch} | 2403 ++++++++++++----- ...-5bfcd811824e9ca03c09a54204bff645225.patch | 570 ++++ ...-a082daeb56c239b41d67b5df5abceb342c0.patch | 340 --- ...-5b2d62e59a6365e32aac3fa37fe16ab3582.patch | 1119 -------- 4 files changed, 2362 insertions(+), 2070 deletions(-) rename patches/vkd3d-latest/{0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch => 0001-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch} (91%) create mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-5bfcd811824e9ca03c09a54204bff645225.patch delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch delete mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch similarity index 91% rename from patches/vkd3d-latest/0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch index 6954a02f..a0d979d4 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch @@ -1,36 +1,39 @@ -From 0ccf563391895bc762112674fe5ab5fff4302a01 Mon Sep 17 00:00:00 2001 +From 419cefedf7c0b02c10a86894d4348ce300b34518 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 29 Nov 2024 07:14:57 +1100 -Subject: [PATCH] Updated vkd3d to c7e173a1ffa1ba6916dd549bf9f32225440d1ec6. +Subject: [PATCH] Updated vkd3d to 5b2d62e59a6365e32aac3fa37fe16ab3582deae4. --- libs/vkd3d/include/private/vkd3d_common.h | 2 +- + libs/vkd3d/include/vkd3d.h | 29 +- libs/vkd3d/include/vkd3d_shader.h | 219 + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 335 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 339 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1118 ++--- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 + libs/vkd3d/libs/vkd3d-shader/dxil.c | 87 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 175 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 387 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 158 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 478 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 187 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 671 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 3839 ++++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 879 +++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 4112 ++++++++++++++--- .../libs/vkd3d-shader/hlsl_constant_ops.c | 363 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 2094 +++++++-- - libs/vkd3d/libs/vkd3d-shader/msl.c | 465 +- + libs/vkd3d/libs/vkd3d-shader/msl.c | 464 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 760 ++-- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 2232 +--------- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 761 +-- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 2251 +-------- .../libs/vkd3d-shader/vkd3d_shader_main.c | 34 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 86 +- - libs/vkd3d/libs/vkd3d/command.c | 273 +- - libs/vkd3d/libs/vkd3d/device.c | 59 +- + libs/vkd3d/libs/vkd3d/command.c | 330 +- + libs/vkd3d/libs/vkd3d/device.c | 93 +- + libs/vkd3d/libs/vkd3d/resource.c | 9 +- libs/vkd3d/libs/vkd3d/state.c | 242 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 63 +- - 26 files changed, 8687 insertions(+), 5024 deletions(-) + libs/vkd3d/libs/vkd3d/utils.c | 2 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 70 +- + 29 files changed, 9441 insertions(+), 5032 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index ec1dd70c9b2..fd62730f948 100644 @@ -45,6 +48,60 @@ index ec1dd70c9b2..fd62730f948 100644 return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index b18fd14f4c3..2376693421c 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -411,9 +411,13 @@ VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); + * the Vulkan driver as being submitted before other work submitted + * though the Direct3D 12 API. If this is not desired, it is + * recommended to synchronize work submission using an ID3D12Fence +- * object, by submitting to the queue a signal operation after all the +- * Direct3D 12 work is submitted and waiting for it before calling +- * vkd3d_acquire_vk_queue(). ++ * object: ++ * 1. submit work through the Direct3D 12 API; ++ * 2. call vkd3d_queue_signal_on_cpu(); ++ * 3. wait for the fence to be signalled; ++ * 4. call vkd3d_acquire_vk_queue(); it is guaranteed that all work submitted ++ * at point 1 has already been submitted to Vulkan (though not necessarily ++ * executed). + * + * \since 1.0 + */ +@@ -466,6 +470,21 @@ VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void + */ + VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback); + ++/** ++ * Signal a fence on the CPU once all the currently outstanding queue work is ++ * submitted to Vulkan. ++ * ++ * The fence will be signalled on the CPU (as if ID3D12Fence_Signal() was ++ * called) once all the work submitted through the Direct3D 12 API before ++ * vkd3d_queue_signal_on_cpu() is called has left the internal queue and has ++ * been submitted to the underlying Vulkan queue. Read the documentation for ++ * vkd3d_acquire_vk_queue() for more details. ++ * ++ * \since 1.15 ++ */ ++VKD3D_API HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *queue, ++ ID3D12Fence *fence, uint64_t value); ++ + #endif /* VKD3D_NO_PROTOTYPES */ + + /* +@@ -512,6 +531,10 @@ typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const + /** Type of vkd3d_set_log_callback(). \since 1.4 */ + typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback); + ++/** Type of vkd3d_queue_signal_on_cpu(). \since 1.15 */ ++typedef HRESULT (*PFN_vkd3d_queue_signal_on_cpu)(ID3D12CommandQueue *queue, ++ ID3D12Fence *fence, uint64_t value); ++ + #ifdef __cplusplus + } + #endif /* __cplusplus */ diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index f95caa2f825..af55d63a5c8 100644 --- a/libs/vkd3d/include/vkd3d_shader.h @@ -309,7 +366,7 @@ index f60ef7db769..c2c6ad67804 100644 #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 7c5444f63a3..69e14e0c7bf 100644 +index 7c5444f63a3..0639da83aa6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] = @@ -675,6 +732,17 @@ index 7c5444f63a3..69e14e0c7bf 100644 break; } +@@ -1346,8 +1180,8 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + bool is_sm_5_1 = vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1); + + if (reg->idx[0].rel_addr || reg->type == VKD3DSPR_IMMCONSTBUFFER +- || reg->type == VKD3DSPR_INCONTROLPOINT || (reg->type == VKD3DSPR_INPUT +- && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY ++ || reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT ++ || (reg->type == VKD3DSPR_INPUT && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY + || compiler->shader_version.type == VKD3D_SHADER_TYPE_HULL))) + { + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); @@ -2132,8 +1966,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DEF: @@ -2836,7 +2904,7 @@ index 0df0e30f399..ab6604bd703 100644 vkd3d_glsl_generator_init(&generator, program, compile_info, descriptor_info, combined_sampler_info, message_context); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 96de18dc886..858186a1071 100644 +index 96de18dc886..48d9d4e0023 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -192,18 +192,20 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) @@ -3214,7 +3282,33 @@ index 96de18dc886..858186a1071 100644 return &swizzle->node; } -@@ -2031,7 +2066,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v +@@ -1996,6 +2031,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch + return &constant->node; + } + ++struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, ++ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, ++ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_interlocked *interlocked; ++ ++ if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked)))) ++ return NULL; ++ ++ init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc); ++ interlocked->op = op; ++ hlsl_copy_deref(ctx, &interlocked->dst, dst); ++ hlsl_src_from_node(&interlocked->coords, coords); ++ hlsl_src_from_node(&interlocked->cmp_value, cmp_value); ++ hlsl_src_from_node(&interlocked->value, value); ++ ++ return &interlocked->node; ++} ++ + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) + { + struct hlsl_type *type = index->val.node->data_type; +@@ -2031,7 +2085,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) type = type->e.resource.format; else if (type->class == HLSL_CLASS_MATRIX) @@ -3223,7 +3317,7 @@ index 96de18dc886..858186a1071 100644 else type = hlsl_get_element_type_from_path_index(ctx, type, idx); -@@ -2054,8 +2089,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type +@@ -2054,8 +2108,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return &jump->node; } @@ -3234,7 +3328,7 @@ index 96de18dc886..858186a1071 100644 unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; -@@ -2066,6 +2101,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +@@ -2066,6 +2120,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); @@ -3245,7 +3339,7 @@ index 96de18dc886..858186a1071 100644 loop->unroll_type = unroll_type; loop->unroll_limit = unroll_limit; return &loop->node; -@@ -2221,14 +2260,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ +@@ -2221,14 +2279,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { @@ -3269,7 +3363,7 @@ index 96de18dc886..858186a1071 100644 hlsl_block_cleanup(&body); return NULL; } -@@ -2310,8 +2356,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr +@@ -2310,8 +2375,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { @@ -3284,7 +3378,45 @@ index 96de18dc886..858186a1071 100644 } static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, -@@ -2533,9 +2583,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -2325,6 +2394,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr + return dst; + } + ++static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, ++ struct clone_instr_map *map, struct hlsl_ir_interlocked *src) ++{ ++ struct hlsl_ir_interlocked *dst; ++ ++ if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) ++ return NULL; ++ init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc); ++ dst->op = src->op; ++ ++ if (!clone_deref(ctx, map, &dst->dst, &src->dst)) ++ { ++ vkd3d_free(dst); ++ return NULL; ++ } ++ clone_src(map, &dst->coords, &src->coords); ++ clone_src(map, &dst->cmp_value, &src->cmp_value); ++ clone_src(map, &dst->value, &src->value); ++ return &dst->node; ++} ++ + static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_compile *compile) + { +@@ -2525,6 +2615,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + case HLSL_IR_SWIZZLE: + return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + ++ case HLSL_IR_INTERLOCKED: ++ return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); ++ + case HLSL_IR_COMPILE: + return clone_compile(ctx, map, hlsl_ir_compile(instr)); + +@@ -2533,9 +2626,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); @@ -3294,7 +3426,7 @@ index 96de18dc886..858186a1071 100644 } vkd3d_unreachable(); -@@ -2693,10 +2740,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha +@@ -2693,10 +2783,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha return NULL; } @@ -3306,7 +3438,7 @@ index 96de18dc886..858186a1071 100644 static const char *const base_types[] = { [HLSL_TYPE_FLOAT] = "float", -@@ -2720,31 +2765,29 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2720,31 +2808,29 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", }; @@ -3347,7 +3479,7 @@ index 96de18dc886..858186a1071 100644 case HLSL_CLASS_ARRAY: { -@@ -2753,88 +2796,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2753,88 +2839,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) ; @@ -3405,13 +3537,13 @@ index 96de18dc886..858186a1071 100644 { VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); - vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); -+ vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]); - } +- } - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); -- } ++ vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]); + } - return string; + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); @@ -3476,7 +3608,7 @@ index 96de18dc886..858186a1071 100644 case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: -@@ -2857,8 +2897,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2857,8 +2940,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru break; } @@ -3496,7 +3628,12 @@ index 96de18dc886..858186a1071 100644 } struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -@@ -2968,7 +3017,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) +@@ -2964,11 +3056,11 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + [HLSL_IR_STORE ] = "HLSL_IR_STORE", + [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", + [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", ++ [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED", + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", @@ -3504,7 +3641,7 @@ index 96de18dc886..858186a1071 100644 }; if (type >= ARRAY_SIZE(names)) -@@ -3022,7 +3070,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer +@@ -3022,7 +3114,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); hlsl_release_string_buffer(ctx, string); } @@ -3514,7 +3651,7 @@ index 96de18dc886..858186a1071 100644 if (var->semantic.name) vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); } -@@ -3103,42 +3152,36 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) +@@ -3103,42 +3196,36 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) return vkd3d_dbg_sprintf(".%s", string); } @@ -3571,7 +3708,7 @@ index 96de18dc886..858186a1071 100644 { const union hlsl_constant_value_component *value = &constant->value.u[x]; -@@ -3164,12 +3207,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl +@@ -3164,12 +3251,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl case HLSL_TYPE_UINT: vkd3d_string_buffer_printf(buffer, "%u ", value->u); break; @@ -3585,7 +3722,11 @@ index 96de18dc886..858186a1071 100644 vkd3d_string_buffer_printf(buffer, "}"); } -@@ -3201,13 +3241,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) +@@ -3198,16 +3282,15 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_F32TOF16] = "f32tof16", + [HLSL_OP1_FLOOR] = "floor", + [HLSL_OP1_FRACT] = "fract", ++ [HLSL_OP1_ISINF] = "isinf", [HLSL_OP1_LOG2] = "log2", [HLSL_OP1_LOGIC_NOT] = "!", [HLSL_OP1_NEG] = "-", @@ -3599,7 +3740,7 @@ index 96de18dc886..858186a1071 100644 [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", -@@ -3217,7 +3255,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) +@@ -3217,7 +3300,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_BIT_AND] = "&", [HLSL_OP2_BIT_OR] = "|", [HLSL_OP2_BIT_XOR] = "^", @@ -3607,7 +3748,7 @@ index 96de18dc886..858186a1071 100644 [HLSL_OP2_DIV] = "/", [HLSL_OP2_DOT] = "dot", [HLSL_OP2_EQUAL] = "==", -@@ -3398,15 +3435,17 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls +@@ -3398,15 +3480,17 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls unsigned int i; dump_src(buffer, &swizzle->val); @@ -3629,7 +3770,54 @@ index 96de18dc886..858186a1071 100644 } } -@@ -3562,11 +3601,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, +@@ -3418,6 +3502,35 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ + vkd3d_string_buffer_printf(buffer, "]"); + } + ++static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked) ++{ ++ static const char *const op_names[] = ++ { ++ [HLSL_INTERLOCKED_ADD] = "add", ++ [HLSL_INTERLOCKED_AND] = "and", ++ [HLSL_INTERLOCKED_CMP_EXCH] = "cmp_exch", ++ [HLSL_INTERLOCKED_EXCH] = "exch", ++ [HLSL_INTERLOCKED_MAX] = "max", ++ [HLSL_INTERLOCKED_MIN] = "min", ++ [HLSL_INTERLOCKED_OR] = "or", ++ [HLSL_INTERLOCKED_XOR] = "xor", ++ }; ++ ++ VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); ++ vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]); ++ dump_deref(buffer, &interlocked->dst); ++ vkd3d_string_buffer_printf(buffer, ", coords = "); ++ dump_src(buffer, &interlocked->coords); ++ if (interlocked->cmp_value.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", cmp_value = "); ++ dump_src(buffer, &interlocked->cmp_value); ++ } ++ vkd3d_string_buffer_printf(buffer, ", value = "); ++ dump_src(buffer, &interlocked->value); ++ vkd3d_string_buffer_printf(buffer, ")"); ++} ++ + static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_compile *compile) + { +@@ -3551,6 +3664,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); + break; + ++ case HLSL_IR_INTERLOCKED: ++ dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); ++ break; ++ + case HLSL_IR_COMPILE: + dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); + break; +@@ -3562,11 +3679,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; @@ -3641,7 +3829,7 @@ index 96de18dc886..858186a1071 100644 } } -@@ -3625,10 +3659,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) +@@ -3625,10 +3737,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { @@ -3659,7 +3847,7 @@ index 96de18dc886..858186a1071 100644 LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) { -@@ -3719,6 +3758,7 @@ static void free_ir_load(struct hlsl_ir_load *load) +@@ -3719,6 +3836,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { hlsl_block_cleanup(&loop->body); @@ -3667,7 +3855,34 @@ index 96de18dc886..858186a1071 100644 vkd3d_free(loop); } -@@ -3875,10 +3915,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) +@@ -3778,6 +3896,15 @@ static void free_ir_index(struct hlsl_ir_index *index) + vkd3d_free(index); + } + ++static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) ++{ ++ hlsl_cleanup_deref(&interlocked->dst); ++ hlsl_src_remove(&interlocked->coords); ++ hlsl_src_remove(&interlocked->cmp_value); ++ hlsl_src_remove(&interlocked->value); ++ vkd3d_free(interlocked); ++} ++ + static void free_ir_compile(struct hlsl_ir_compile *compile) + { + unsigned int i; +@@ -3864,6 +3991,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + free_ir_switch(hlsl_ir_switch(node)); + break; + ++ case HLSL_IR_INTERLOCKED: ++ free_ir_interlocked(hlsl_ir_interlocked(node)); ++ break; ++ + case HLSL_IR_COMPILE: + free_ir_compile(hlsl_ir_compile(node)); + break; +@@ -3875,10 +4006,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; @@ -3678,7 +3893,7 @@ index 96de18dc886..858186a1071 100644 } } -@@ -3977,8 +4013,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function +@@ -3977,8 +4104,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) { @@ -3688,7 +3903,7 @@ index 96de18dc886..858186a1071 100644 /* Leave replicate swizzles alone; some instructions need them. */ if (swizzle == HLSL_SWIZZLE(X, X, X, X) -@@ -3987,13 +4023,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) +@@ -3987,13 +4114,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) || swizzle == HLSL_SWIZZLE(W, W, W, W)) return swizzle; @@ -3705,7 +3920,7 @@ index 96de18dc886..858186a1071 100644 } return ret; } -@@ -4046,7 +4079,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim +@@ -4046,7 +4170,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim for (i = 0; i < dim; ++i) { unsigned int s = hlsl_swizzle_get_component(second, i); @@ -3714,7 +3929,7 @@ index 96de18dc886..858186a1071 100644 } return ret; } -@@ -4304,7 +4337,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -4304,7 +4428,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) } ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); @@ -3724,7 +3939,7 @@ index 96de18dc886..858186a1071 100644 ctx->builtin_types.error = hlsl_new_simple_type(ctx, "", HLSL_CLASS_ERROR); hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 075c76cb0e2..d712a325322 100644 +index 075c76cb0e2..e9845f8f887 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ @@ -3835,7 +4050,12 @@ index 075c76cb0e2..d712a325322 100644 } e; /* Number of numeric register components used by one value of this type, for each regset. -@@ -330,8 +323,6 @@ enum hlsl_ir_node_type +@@ -326,12 +319,11 @@ enum hlsl_ir_node_type + HLSL_IR_STORE, + HLSL_IR_SWIZZLE, + HLSL_IR_SWITCH, ++ HLSL_IR_INTERLOCKED, + HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, @@ -3844,7 +4064,7 @@ index 075c76cb0e2..d712a325322 100644 }; /* Common data for every type of IR instruction node. */ -@@ -524,6 +515,10 @@ struct hlsl_ir_var +@@ -524,6 +516,10 @@ struct hlsl_ir_var * element of a struct, and thus needs to be aligned when packed in the signature. */ bool force_align; @@ -3855,23 +4075,23 @@ index 075c76cb0e2..d712a325322 100644 uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; -@@ -644,21 +639,30 @@ struct hlsl_ir_if +@@ -644,21 +640,30 @@ struct hlsl_ir_if struct hlsl_block else_block; }; -enum hlsl_ir_loop_unroll_type +enum hlsl_loop_unroll_type -+{ + { +- HLSL_IR_LOOP_UNROLL, +- HLSL_IR_LOOP_FORCE_UNROLL, +- HLSL_IR_LOOP_FORCE_LOOP + HLSL_LOOP_UNROLL, + HLSL_LOOP_FORCE_UNROLL, + HLSL_LOOP_FORCE_LOOP +}; + +enum hlsl_loop_type - { -- HLSL_IR_LOOP_UNROLL, -- HLSL_IR_LOOP_FORCE_UNROLL, -- HLSL_IR_LOOP_FORCE_LOOP ++{ + HLSL_LOOP_FOR, + HLSL_LOOP_WHILE, + HLSL_LOOP_DO_WHILE @@ -3891,7 +4111,11 @@ index 075c76cb0e2..d712a325322 100644 }; struct hlsl_ir_switch_case -@@ -703,13 +707,11 @@ enum hlsl_ir_expr_op +@@ -700,16 +705,15 @@ enum hlsl_ir_expr_op + HLSL_OP1_F32TOF16, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, ++ HLSL_OP1_ISINF, HLSL_OP1_LOG2, HLSL_OP1_LOGIC_NOT, HLSL_OP1_NEG, @@ -3905,7 +4129,7 @@ index 075c76cb0e2..d712a325322 100644 HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ HLSL_OP1_SQRT, -@@ -719,7 +721,6 @@ enum hlsl_ir_expr_op +@@ -719,7 +723,6 @@ enum hlsl_ir_expr_op HLSL_OP2_BIT_AND, HLSL_OP2_BIT_OR, HLSL_OP2_BIT_XOR, @@ -3913,7 +4137,7 @@ index 075c76cb0e2..d712a325322 100644 HLSL_OP2_DIV, HLSL_OP2_DOT, HLSL_OP2_EQUAL, -@@ -781,7 +782,17 @@ struct hlsl_ir_swizzle +@@ -781,7 +784,17 @@ struct hlsl_ir_swizzle { struct hlsl_ir_node node; struct hlsl_src val; @@ -3932,7 +4156,7 @@ index 075c76cb0e2..d712a325322 100644 }; struct hlsl_ir_index -@@ -844,6 +855,10 @@ enum hlsl_resource_load_type +@@ -844,6 +857,10 @@ enum hlsl_resource_load_type HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, HLSL_RESOURCE_GATHER_ALPHA, @@ -3943,24 +4167,57 @@ index 075c76cb0e2..d712a325322 100644 HLSL_RESOURCE_SAMPLE_INFO, HLSL_RESOURCE_RESINFO, }; -@@ -934,16 +949,6 @@ struct hlsl_ir_stateblock_constant +@@ -934,14 +951,30 @@ struct hlsl_ir_stateblock_constant char *name; }; -/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. - * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ -struct hlsl_ir_vsir_instruction_ref --{ ++enum hlsl_interlocked_op + { - struct hlsl_ir_node node; -- ++ HLSL_INTERLOCKED_ADD, ++ HLSL_INTERLOCKED_AND, ++ HLSL_INTERLOCKED_CMP_EXCH, ++ HLSL_INTERLOCKED_EXCH, ++ HLSL_INTERLOCKED_MAX, ++ HLSL_INTERLOCKED_MIN, ++ HLSL_INTERLOCKED_OR, ++ HLSL_INTERLOCKED_XOR, ++}; + - /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ - unsigned int vsir_instr_idx; --}; -- ++/* Represents an interlocked operation. ++ * ++ * The data_type of the node indicates whether or not the original value is returned. ++ * If the original value is not returned, the data_type is set to NULL. ++ * Otherwise, the data_type is set to the type of the original value. ++ */ ++struct hlsl_ir_interlocked ++{ ++ struct hlsl_ir_node node; ++ enum hlsl_interlocked_op op; ++ struct hlsl_deref dst; ++ struct hlsl_src coords, cmp_value, value; + }; + struct hlsl_scope +@@ -1241,6 +1274,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n + return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); + } + ++static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node) ++{ ++ VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED); ++ return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); ++} ++ + static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) { - /* Item entry for hlsl_ctx.scopes. */ -@@ -1259,12 +1264,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co + VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); +@@ -1259,12 +1298,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); } @@ -3973,7 +4230,7 @@ index 075c76cb0e2..d712a325322 100644 static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); -@@ -1442,6 +1441,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); +@@ -1442,6 +1475,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); @@ -3982,7 +4239,7 @@ index 075c76cb0e2..d712a325322 100644 void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, -@@ -1519,6 +1520,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond +@@ -1519,6 +1554,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); @@ -3991,12 +4248,15 @@ index 075c76cb0e2..d712a325322 100644 struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); -@@ -1550,8 +1553,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty +@@ -1550,8 +1587,14 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, ++ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, ++ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc); @@ -4005,7 +4265,7 @@ index 075c76cb0e2..d712a325322 100644 struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -@@ -1588,9 +1594,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned +@@ -1588,9 +1631,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc); @@ -4015,7 +4275,7 @@ index 075c76cb0e2..d712a325322 100644 void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -@@ -1645,24 +1648,35 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere +@@ -1645,24 +1685,35 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); @@ -4041,21 +4301,21 @@ index 075c76cb0e2..d712a325322 100644 - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + char *name; + bool is_user_packed; - --int tpf_compile(struct vsir_program *program, uint64_t config_flags, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); ++ + /* The data type of a single component of the resource. This might be + * different from the data type of the resource itself in 4.0 profiles, + * where an array (or multi-dimensional array) is handled as a single + * resource, unlike in 5.0. */ + struct hlsl_type *component_type; --enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, -- unsigned int storage_modifiers); +-int tpf_compile(struct vsir_program *program, uint64_t config_flags, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, +- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + enum hlsl_regset regset; + unsigned int id, space, index, bind_count; -+ + +-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, +- unsigned int storage_modifiers); + struct vkd3d_shader_location loc; +}; @@ -4090,7 +4350,7 @@ index 8dace11916a..31fb30521e9 100644 typedef {return KW_TYPEDEF; } unsigned {return KW_UNSIGNED; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 60aade732db..e6eaac78994 100644 +index 60aade732db..da2f482b148 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -247,18 +247,19 @@ static bool type_contains_only_numerics(const struct hlsl_type *type) @@ -4278,7 +4538,12 @@ index 60aade732db..e6eaac78994 100644 } } } -@@ -678,8 +671,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx +@@ -674,12 +667,11 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: ++ case HLSL_IR_INTERLOCKED: + case HLSL_IR_STATEBLOCK_CONSTANT: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; @@ -4287,7 +4552,7 @@ index 60aade732db..e6eaac78994 100644 } } -@@ -738,11 +729,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str +@@ -738,11 +730,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str return res.number.u; } @@ -4301,7 +4566,7 @@ index 60aade732db..e6eaac78994 100644 unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; -@@ -773,11 +764,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -773,11 +765,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, hlsl_block_cleanup(&expr); } @@ -4315,7 +4580,7 @@ index 60aade732db..e6eaac78994 100644 } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) -@@ -790,7 +781,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -790,7 +782,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, } } @@ -4324,7 +4589,7 @@ index 60aade732db..e6eaac78994 100644 if (!init && !(init = make_empty_block(ctx))) goto oom; -@@ -798,15 +789,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -798,15 +790,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, if (!append_conditional_break(ctx, cond)) goto oom; @@ -4342,7 +4607,7 @@ index 60aade732db..e6eaac78994 100644 goto oom; hlsl_block_add_instr(init, loop); -@@ -860,6 +848,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -860,6 +849,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ @@ -4350,7 +4615,7 @@ index 60aade732db..e6eaac78994 100644 bool m_swizzle; unsigned int inc, x, y; -@@ -888,12 +877,13 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -888,12 +878,13 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod x = swizzle[i + 2] - '1'; } @@ -4367,7 +4632,7 @@ index 60aade732db..e6eaac78994 100644 } /* Vector swizzle */ -@@ -920,10 +910,9 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -920,10 +911,9 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod break; } @@ -4380,7 +4645,7 @@ index 60aade732db..e6eaac78994 100644 } if (valid) return hlsl_new_swizzle(ctx, swiz, component, value, loc); -@@ -1035,7 +1024,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str +@@ -1035,7 +1025,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str { unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); @@ -4389,7 +4654,7 @@ index 60aade732db..e6eaac78994 100644 { struct vkd3d_string_buffer *string; -@@ -1192,6 +1181,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -1192,6 +1182,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in struct fields."); @@ -4398,7 +4663,7 @@ index 60aade732db..e6eaac78994 100644 } field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); -@@ -1282,6 +1273,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, +@@ -1282,6 +1274,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in typedefs."); @@ -4411,7 +4676,19 @@ index 60aade732db..e6eaac78994 100644 } if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) -@@ -1580,7 +1577,7 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * +@@ -1325,6 +1323,11 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); + ++ if ((param->modifiers & HLSL_STORAGE_OUT) && !(param->modifiers & HLSL_STORAGE_IN) ++ && (param->type->modifiers & HLSL_MODIFIER_CONST)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Parameter '%s' is declared as both \"out\" and \"const\".", param->name); ++ + if (param->reg_reservation.offset_type) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on function parameters."); +@@ -1580,7 +1583,7 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) { /* Scalar vars can be converted to pretty much everything */ @@ -4420,7 +4697,7 @@ index 60aade732db..e6eaac78994 100644 return true; if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) -@@ -1595,13 +1592,13 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t +@@ -1595,13 +1598,13 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) return true; @@ -4438,7 +4715,7 @@ index 60aade732db..e6eaac78994 100644 return true; } -@@ -1661,37 +1658,37 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1661,37 +1664,37 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; } @@ -4489,7 +4766,7 @@ index 60aade732db..e6eaac78994 100644 } } -@@ -1719,7 +1716,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl +@@ -1719,7 +1722,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl return NULL; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -4498,7 +4775,7 @@ index 60aade732db..e6eaac78994 100644 { struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; struct hlsl_block store_block; -@@ -1822,7 +1819,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct +@@ -1822,7 +1825,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct return arg; bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, @@ -4507,7 +4784,7 @@ index 60aade732db..e6eaac78994 100644 if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) return NULL; -@@ -1985,11 +1982,11 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls +@@ -1985,11 +1988,11 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls } if (arg1->data_type->class == HLSL_CLASS_SCALAR) @@ -4522,7 +4799,7 @@ index 60aade732db..e6eaac78994 100644 if (dim == 1) op = HLSL_OP2_MUL; -@@ -2092,8 +2089,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned +@@ -2092,8 +2095,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { if (*writemask & (1 << i)) { @@ -4533,7 +4810,7 @@ index 60aade732db..e6eaac78994 100644 if (new_writemask & (1 << s)) return false; new_writemask |= 1 << s; -@@ -2107,9 +2104,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned +@@ -2107,9 +2110,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { for (j = 0; j < width; ++j) { @@ -4545,7 +4822,7 @@ index 60aade732db..e6eaac78994 100644 } } -@@ -2119,22 +2116,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned +@@ -2119,22 +2122,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; } @@ -4576,7 +4853,7 @@ index 60aade732db..e6eaac78994 100644 if (new_writemask & (1 << idx)) return false; new_writemask |= 1 << idx; -@@ -2142,22 +2139,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un +@@ -2142,22 +2145,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un } width = bit; @@ -4605,7 +4882,7 @@ index 60aade732db..e6eaac78994 100644 *writemask = new_writemask; *ret_width = width; return true; -@@ -2193,8 +2190,8 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2193,8 +2196,8 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc if (hlsl_is_numeric_type(lhs_type)) { @@ -4616,7 +4893,7 @@ index 60aade732db..e6eaac78994 100644 } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) -@@ -2211,28 +2208,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2211,28 +2214,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; @@ -4656,7 +4933,7 @@ index 60aade732db..e6eaac78994 100644 } if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) -@@ -2275,13 +2278,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2275,13 +2284,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); @@ -4672,7 +4949,7 @@ index 60aade732db..e6eaac78994 100644 if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) { -@@ -2298,14 +2301,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2298,14 +2307,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc hlsl_init_deref_from_index_chain(ctx, &deref, lhs); @@ -4690,7 +4967,7 @@ index 60aade732db..e6eaac78994 100644 if (!(writemask & (1 << idx))) continue; -@@ -2335,7 +2338,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2335,7 +2344,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc VKD3D_ASSERT(!matrix_writemask); @@ -4699,7 +4976,7 @@ index 60aade732db..e6eaac78994 100644 { struct hlsl_ir_node *cell, *load, *store, *c; struct hlsl_deref deref; -@@ -2670,26 +2673,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +@@ -2670,26 +2679,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Only innermost array size can be implicit."); @@ -4734,7 +5011,7 @@ index 60aade732db..e6eaac78994 100644 } else { -@@ -2908,7 +2915,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2908,7 +2921,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->initializer.args[0] = node_from_block(v->initializer.instrs); } @@ -4744,7 +5021,7 @@ index 60aade732db..e6eaac78994 100644 if (is_default_values_initializer) { -@@ -2993,13 +3001,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ +@@ -2993,13 +3007,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ return true; } @@ -4883,7 +5160,7 @@ index 60aade732db..e6eaac78994 100644 if (!(entry = rb_get(&ctx->functions, name))) return NULL; -@@ -3007,18 +3139,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, +@@ -3007,18 +3145,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { @@ -4908,9 +5185,8 @@ index 60aade732db..e6eaac78994 100644 + candidates.candidates[0] = decl; + candidates.count = 1; + continue; - } -- compatible_match = decl; - } ++ } ++ } + + if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates, + &candidates.capacity, candidates.count + 1, sizeof(decl)))) @@ -4919,9 +5195,8 @@ index 60aade732db..e6eaac78994 100644 + return NULL; + } + candidates.candidates[candidates.count++] = decl; - } - -- return compatible_match; ++ } ++ + if (!candidates.count) + return NULL; + @@ -4936,11 +5211,13 @@ index 60aade732db..e6eaac78994 100644 + hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]); + hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer); + vkd3d_string_buffer_clear(s); -+ } + } +- compatible_match = decl; + hlsl_release_string_buffer(ctx, s); -+ } -+ } -+ + } + } + +- return compatible_match; + decl = candidates.candidates[0]; + vkd3d_free(candidates.candidates); + @@ -4948,7 +5225,7 @@ index 60aade732db..e6eaac78994 100644 } static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -@@ -3164,7 +3336,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +@@ -3164,7 +3342,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (!type_is_integer(type->e.numeric.type)) return arg; @@ -4957,7 +5234,7 @@ index 60aade732db..e6eaac78994 100644 return add_implicit_conversion(ctx, params->instrs, arg, type, loc); } -@@ -3203,13 +3375,13 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * +@@ -3203,13 +3381,13 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * if (arg_type->class == HLSL_CLASS_VECTOR) { vectors = true; @@ -4974,7 +5251,7 @@ index 60aade732db..e6eaac78994 100644 } } -@@ -3254,7 +3426,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, +@@ -3254,7 +3432,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; if (type_is_integer(type->e.numeric.type)) @@ -4983,7 +5260,7 @@ index 60aade732db..e6eaac78994 100644 return convert_args(ctx, params, type, loc); } -@@ -3267,7 +3439,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, +@@ -3267,7 +3445,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; @@ -4992,7 +5269,7 @@ index 60aade732db..e6eaac78994 100644 return convert_args(ctx, params, type, loc); } -@@ -3334,7 +3506,7 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, +@@ -3334,7 +3512,7 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, const struct hlsl_type *type, enum hlsl_base_type base_type) { @@ -5001,7 +5278,7 @@ index 60aade732db..e6eaac78994 100644 } static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -3855,7 +4027,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, +@@ -3855,7 +4033,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) return false; @@ -5010,7 +5287,7 @@ index 60aade732db..e6eaac78994 100644 if (dim == 1) return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); -@@ -3939,7 +4111,7 @@ static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3939,7 +4117,7 @@ static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer * return false; type = params->args[0]->data_type; if (!(type->class == HLSL_CLASS_SCALAR @@ -5019,7 +5296,27 @@ index 60aade732db..e6eaac78994 100644 { struct vkd3d_string_buffer *string; if ((string = hlsl_type_to_string(ctx, type))) -@@ -4371,15 +4543,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4141,6 +4319,19 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, + return !!add_user_call(ctx, func, params, false, loc); + } + ++static bool intrinsic_isinf(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *type = params->args[0]->data_type, *bool_type; ++ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; ++ ++ bool_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, ++ type->e.numeric.dimx, type->e.numeric.dimy); ++ ++ args[0] = params->args[0]; ++ return !!add_expr(ctx, params->instrs, HLSL_OP1_ISINF, args, bool_type, loc); ++} ++ + static bool intrinsic_ldexp(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4371,15 +4562,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (arg1->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; @@ -5038,7 +5335,7 @@ index 60aade732db..e6eaac78994 100644 if (vect_count == 0) { -@@ -4387,12 +4559,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4387,12 +4578,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } else if (vect_count == 1) { @@ -5054,7 +5351,7 @@ index 60aade732db..e6eaac78994 100644 ret_type = hlsl_get_scalar_type(ctx, base); } -@@ -4406,23 +4578,23 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4406,23 +4597,23 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, return false; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -5083,7 +5380,7 @@ index 60aade732db..e6eaac78994 100644 return false; if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) -@@ -4439,7 +4611,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4439,7 +4630,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } } @@ -5092,7 +5389,7 @@ index 60aade732db..e6eaac78994 100644 return false; hlsl_block_add_block(params->instrs, &block); } -@@ -4632,7 +4804,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, +@@ -4632,7 +4823,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, static const struct hlsl_constant_value zero_value; struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, @@ -5101,7 +5398,7 @@ index 60aade732db..e6eaac78994 100644 if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) return false; -@@ -5086,22 +5258,23 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -5086,22 +5277,23 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return true; } @@ -5130,7 +5427,7 @@ index 60aade732db..e6eaac78994 100644 return false; hlsl_block_add_block(params->instrs, &block); } -@@ -5131,7 +5304,8 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, +@@ -5131,7 +5323,8 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; struct hlsl_type *arg_type = arg->data_type; @@ -5140,7 +5437,217 @@ index 60aade732db..e6eaac78994 100644 { struct vkd3d_string_buffer *string; -@@ -5447,6 +5621,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type +@@ -5187,6 +5380,185 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, + return true; + } + ++static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name) ++{ ++ struct hlsl_ir_node *lhs, *coords, *val, *cmp_val = NULL, *orig_val = NULL; ++ struct hlsl_ir_node *interlocked, *void_ret; ++ struct hlsl_type *lhs_type, *val_type; ++ struct vkd3d_string_buffer *string; ++ struct hlsl_deref dst_deref; ++ ++ if (hlsl_version_lt(ctx, 5, 0)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Interlocked functions can only be used in shader model 5.0 or higher."); ++ ++ if (op != HLSL_INTERLOCKED_CMP_EXCH && op != HLSL_INTERLOCKED_EXCH ++ && params->args_count != 2 && params->args_count != 3) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.", ++ name, params->args_count); ++ return false; ++ } ++ ++ lhs = params->args[0]; ++ lhs_type = lhs->data_type; ++ ++ if (op == HLSL_INTERLOCKED_CMP_EXCH) ++ { ++ cmp_val = params->args[1]; ++ val = params->args[2]; ++ if (params->args_count == 4) ++ orig_val = params->args[3]; ++ } ++ else ++ { ++ val = params->args[1]; ++ if (params->args_count == 3) ++ orig_val = params->args[2]; ++ } ++ ++ if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT ++ && lhs_type->e.numeric.type != HLSL_TYPE_INT)) ++ { ++ if ((string = hlsl_type_to_string(ctx, lhs_type))) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.", ++ name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ return false; ++ } ++ ++ /* Interlocked*() functions always take uint for the value parameters, ++ * except for InterlockedMax()/InterlockedMin(). */ ++ if (op == HLSL_INTERLOCKED_MAX || op == HLSL_INTERLOCKED_MIN) ++ { ++ enum hlsl_base_type val_base_type = val->data_type->e.numeric.type; ++ ++ /* Floating values are always cast to signed integers. */ ++ if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE) ++ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); ++ else ++ val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type); ++ } ++ else ++ { ++ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); ++ } ++ ++ if (cmp_val && !(cmp_val = add_implicit_conversion(ctx, params->instrs, cmp_val, val_type, loc))) ++ return false; ++ if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc))) ++ return false; ++ ++ /* TODO: groupshared variables */ ++ if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) ++ { ++ if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) ++ { ++ hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets."); ++ return false; ++ } ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node)) ++ return false; ++ coords = hlsl_ir_index(lhs)->idx.node; ++ ++ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ ++ if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); ++ return false; ++ } ++ } ++ else ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); ++ return false; ++ } ++ ++ interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, cmp_val, val, loc); ++ hlsl_cleanup_deref(&dst_deref); ++ if (!interlocked) ++ return false; ++ hlsl_block_add_instr(params->instrs, interlocked); ++ ++ if (orig_val) ++ { ++ if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST) ++ hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, ++ "Output argument to '%s' is const.", name); ++ ++ if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked)) ++ return false; ++ } ++ ++ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, void_ret); ++ ++ return true; ++} ++ ++static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd"); ++} ++ ++static bool intrinsic_InterlockedAnd(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_AND, params, loc, "InterlockedAnd"); ++} ++ ++static bool intrinsic_InterlockedCompareExchange(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareExchange"); ++} ++ ++static bool intrinsic_InterlockedCompareStore(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareStore"); ++} ++ ++static bool intrinsic_InterlockedExchange(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_EXCH, params, loc, "InterlockedExchange"); ++} ++ ++static bool intrinsic_InterlockedMax(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MAX, params, loc, "InterlockedMax"); ++} ++ ++static bool intrinsic_InterlockedMin(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MIN, params, loc, "InterlockedMin"); ++} ++ ++static bool intrinsic_InterlockedOr(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_OR, params, loc, "InterlockedOr"); ++} ++ ++static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); ++} ++ + static const struct intrinsic_function + { + const char *name; +@@ -5200,6 +5572,15 @@ intrinsic_functions[] = + /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, ++ {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, ++ {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd}, ++ {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, ++ {"InterlockedCompareStore", 3, true, intrinsic_InterlockedCompareStore}, ++ {"InterlockedExchange", 3, true, intrinsic_InterlockedExchange}, ++ {"InterlockedMax", -1, true, intrinsic_InterlockedMax}, ++ {"InterlockedMin", -1, true, intrinsic_InterlockedMin}, ++ {"InterlockedOr", -1, true, intrinsic_InterlockedOr}, ++ {"InterlockedXor", -1, true, intrinsic_InterlockedXor}, + {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, + {"all", 1, true, intrinsic_all}, +@@ -5236,6 +5617,7 @@ intrinsic_functions[] = + {"fmod", 2, true, intrinsic_fmod}, + {"frac", 1, true, intrinsic_frac}, + {"fwidth", 1, true, intrinsic_fwidth}, ++ {"isinf", 1, true, intrinsic_isinf}, + {"ldexp", 2, true, intrinsic_ldexp}, + {"length", 1, true, intrinsic_length}, + {"lerp", 3, true, intrinsic_lerp}, +@@ -5447,6 +5829,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type struct hlsl_ir_load *load; struct hlsl_ir_var *var; @@ -5158,7 +5665,7 @@ index 60aade732db..e6eaac78994 100644 if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL; -@@ -5483,6 +5668,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5483,6 +5876,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_error(ctx, &cond->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Ternary condition type '%s' is not numeric.", string->buffer); hlsl_release_string_buffer(ctx, string); @@ -5166,7 +5673,7 @@ index 60aade732db..e6eaac78994 100644 } if (first->data_type->class <= HLSL_CLASS_LAST_NUMERIC -@@ -5491,21 +5677,22 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5491,21 +5885,22 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, if (!(common_type = get_common_numeric_type(ctx, first, second, &first->loc))) return false; @@ -5194,7 +5701,7 @@ index 60aade732db..e6eaac78994 100644 { /* This condition looks wrong but is correct. * floatN is compatible with float1xN, but not with floatNx1. */ -@@ -5523,7 +5710,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5523,7 +5918,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, } cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, @@ -5203,7 +5710,7 @@ index 60aade732db..e6eaac78994 100644 if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; } -@@ -5551,7 +5738,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5551,7 +5946,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, } cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, @@ -5212,7 +5719,7 @@ index 60aade732db..e6eaac78994 100644 if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; -@@ -5923,7 +6110,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -5923,7 +6318,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc return false; } @@ -5221,7 +5728,7 @@ index 60aade732db..e6eaac78994 100644 { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Method %s() requires at least %u channels.", name, read_channel + 1); -@@ -5944,6 +6131,87 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -5944,6 +6339,87 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc return true; } @@ -5309,7 +5816,7 @@ index 60aade732db..e6eaac78994 100644 static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) { -@@ -6311,6 +6579,11 @@ texture_methods[] = +@@ -6311,6 +6787,11 @@ texture_methods[] = { "Gather", add_gather_method_call, "00010101001000" }, { "GatherAlpha", add_gather_method_call, "00010101001000" }, { "GatherBlue", add_gather_method_call, "00010101001000" }, @@ -5321,7 +5828,7 @@ index 60aade732db..e6eaac78994 100644 { "GatherGreen", add_gather_method_call, "00010101001000" }, { "GatherRed", add_gather_method_call, "00010101001000" }, -@@ -6553,6 +6826,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6553,6 +7034,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_semantic semantic; enum hlsl_buffer_type buffer_type; enum hlsl_sampler_dim sampler_dim; @@ -5329,7 +5836,7 @@ index 60aade732db..e6eaac78994 100644 struct hlsl_attribute *attr; struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; -@@ -6596,6 +6870,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6596,6 +7078,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_INLINE %token KW_INOUT %token KW_LINEAR @@ -5337,7 +5844,7 @@ index 60aade732db..e6eaac78994 100644 %token KW_MATRIX %token KW_NAMESPACE %token KW_NOINTERPOLATION -@@ -6605,6 +6880,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6605,6 +7088,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER @@ -5345,7 +5852,7 @@ index 60aade732db..e6eaac78994 100644 %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -6654,6 +6930,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6654,6 +7138,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_TEXTURE3D %token KW_TEXTURECUBE %token KW_TEXTURECUBEARRAY @@ -5353,7 +5860,7 @@ index 60aade732db..e6eaac78994 100644 %token KW_TRUE %token KW_TYPEDEF %token KW_UNSIGNED -@@ -6784,6 +7061,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6784,6 +7269,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type semantic @@ -5362,7 +5869,7 @@ index 60aade732db..e6eaac78994 100644 %type state_block %type state_block_index_opt -@@ -7684,7 +7963,10 @@ parameter_decl: +@@ -7684,7 +8171,10 @@ parameter_decl: { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in function parameters."); @@ -5373,7 +5880,7 @@ index 60aade732db..e6eaac78994 100644 type = hlsl_new_array_type(ctx, type, $4.sizes[i]); } vkd3d_free($4.sizes); -@@ -7805,6 +8087,20 @@ rov_type: +@@ -7805,6 +8295,20 @@ rov_type: $$ = HLSL_SAMPLER_DIM_3D; } @@ -5394,7 +5901,7 @@ index 60aade732db..e6eaac78994 100644 resource_format: var_modifiers type { -@@ -7948,6 +8244,10 @@ type_no_void: +@@ -7948,6 +8452,10 @@ type_no_void: validate_uav_type(ctx, $1, $3, &@4); $$ = hlsl_new_uav_type(ctx, $1, $3, true); } @@ -5405,7 +5912,7 @@ index 60aade732db..e6eaac78994 100644 | KW_RWBYTEADDRESSBUFFER { $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); -@@ -8088,14 +8388,9 @@ typedef: +@@ -8088,14 +8596,9 @@ typedef: } if (modifiers) @@ -5421,7 +5928,7 @@ index 60aade732db..e6eaac78994 100644 if (!add_typedef(ctx, type, $4)) YYABORT; } -@@ -8753,25 +9048,25 @@ if_body: +@@ -8753,25 +9256,25 @@ if_body: loop_statement: attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement { @@ -5451,7 +5958,7 @@ index 60aade732db..e6eaac78994 100644 hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } -@@ -8979,17 +9274,24 @@ primary_expr: +@@ -8979,17 +9482,24 @@ primary_expr: struct hlsl_ir_load *load; struct hlsl_ir_var *var; @@ -5483,7 +5990,7 @@ index 60aade732db..e6eaac78994 100644 } | '(' expr ')' { -@@ -9149,23 +9451,8 @@ postfix_expr: +@@ -9149,23 +9659,8 @@ postfix_expr: | var_modifiers type '(' initializer_expr_list ')' { if ($1) @@ -5507,7 +6014,7 @@ index 60aade732db..e6eaac78994 100644 if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { -@@ -9233,11 +9520,8 @@ unary_expr: +@@ -9233,11 +9728,8 @@ unary_expr: | '(' var_modifiers type arrays ')' unary_expr { if ($2) @@ -5519,7 +6026,7 @@ index 60aade732db..e6eaac78994 100644 if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) { -@@ -9381,10 +9665,7 @@ assignment_expr: +@@ -9381,10 +9873,7 @@ assignment_expr: struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) @@ -5531,7 +6038,7 @@ index 60aade732db..e6eaac78994 100644 destroy_block($1); if (!add_assignment(ctx, $3, lhs, $2, rhs)) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index d11ff481f6b..c666599b342 100644 +index d11ff481f6b..8d817b051ce 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -19,9 +19,14 @@ @@ -5568,7 +6075,18 @@ index d11ff481f6b..c666599b342 100644 if (output) { if (index >= semantic->reported_duplicated_output_next_index) -@@ -1031,7 +1039,7 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * +@@ -731,6 +739,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); + return res; + ++ case HLSL_IR_INTERLOCKED: ++ res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr); ++ return res; ++ + default: + return false; + } +@@ -1031,7 +1043,7 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { @@ -5577,7 +6095,7 @@ index d11ff481f6b..c666599b342 100644 struct hlsl_ir_node *store, *zero; struct hlsl_ir_load *coords_load; struct hlsl_deref coords_deref; -@@ -1075,7 +1083,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins +@@ -1075,7 +1087,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins struct hlsl_deref var_deref; struct hlsl_type *matrix_type; struct hlsl_ir_var *var; @@ -5586,7 +6104,7 @@ index d11ff481f6b..c666599b342 100644 if (instr->type != HLSL_IR_SWIZZLE) return false; -@@ -1088,14 +1096,12 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins +@@ -1088,14 +1100,12 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins return false; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -5603,7 +6121,7 @@ index d11ff481f6b..c666599b342 100644 if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) return false; -@@ -1140,7 +1146,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1140,7 +1150,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); @@ -5612,7 +6130,7 @@ index d11ff481f6b..c666599b342 100644 if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) return false; -@@ -1176,7 +1182,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1176,7 +1186,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_init_simple_deref_from_var(&row_deref, var); @@ -5621,7 +6139,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_ir_node *c; -@@ -1225,7 +1231,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s +@@ -1225,7 +1235,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type; @@ -5630,7 +6148,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_ir_node *new_cast, *swizzle; -@@ -1236,9 +1242,10 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s +@@ -1236,9 +1246,10 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s return false; hlsl_block_add_instr(block, new_cast); @@ -5643,7 +6161,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_block_add_instr(block, swizzle); } -@@ -1358,8 +1365,10 @@ struct copy_propagation_var_def +@@ -1358,8 +1369,10 @@ struct copy_propagation_var_def struct copy_propagation_state { @@ -5656,7 +6174,7 @@ index d11ff481f6b..c666599b342 100644 }; static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) -@@ -1381,6 +1390,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte +@@ -1381,6 +1394,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte vkd3d_free(var_def); } @@ -5695,7 +6213,7 @@ index d11ff481f6b..c666599b342 100644 static struct copy_propagation_value *copy_propagation_get_value_at_time( struct copy_propagation_component_trace *trace, unsigned int time) { -@@ -1398,9 +1439,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( +@@ -1398,9 +1443,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, const struct hlsl_ir_var *var, unsigned int component, unsigned int time) { @@ -5708,7 +6226,7 @@ index d11ff481f6b..c666599b342 100644 if (entry) { struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); -@@ -1426,7 +1468,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co +@@ -1426,7 +1472,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { @@ -5718,7 +6236,7 @@ index d11ff481f6b..c666599b342 100644 struct copy_propagation_var_def *var_def; unsigned int component_count = hlsl_type_component_count(var->data_type); int res; -@@ -1439,7 +1482,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h +@@ -1439,7 +1486,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h var_def->var = var; @@ -5727,7 +6245,7 @@ index d11ff481f6b..c666599b342 100644 VKD3D_ASSERT(!res); return var_def; -@@ -1596,7 +1639,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, +@@ -1596,7 +1643,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); return false; } @@ -5736,7 +6254,7 @@ index d11ff481f6b..c666599b342 100644 } TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", -@@ -1678,6 +1721,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +@@ -1678,6 +1725,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: @@ -5744,7 +6262,7 @@ index d11ff481f6b..c666599b342 100644 case HLSL_CLASS_NULL: break; -@@ -1719,10 +1763,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, +@@ -1719,10 +1767,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, return false; load = hlsl_ir_load(swizzle->val.node); @@ -5757,7 +6275,23 @@ index d11ff481f6b..c666599b342 100644 return true; return false; -@@ -1818,18 +1862,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s +@@ -1792,6 +1840,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, + return progress; + } + ++static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, ++ struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) ++{ ++ bool progress = false; ++ ++ progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index); ++ return progress; ++} ++ + static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) + { +@@ -1818,18 +1875,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s } } @@ -5776,7 +6310,7 @@ index d11ff481f6b..c666599b342 100644 static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_block *block, unsigned int time) { -@@ -1898,16 +1930,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1898,16 +1943,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *state) { @@ -5803,7 +6337,7 @@ index d11ff481f6b..c666599b342 100644 /* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for -@@ -1922,14 +1957,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if +@@ -1922,14 +1970,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, struct copy_propagation_state *state) { @@ -5824,7 +6358,7 @@ index d11ff481f6b..c666599b342 100644 return progress; } -@@ -1937,15 +1974,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l +@@ -1937,15 +1987,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, struct copy_propagation_state *state) { @@ -5845,7 +6379,7 @@ index d11ff481f6b..c666599b342 100644 } LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -@@ -1964,6 +2002,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1964,6 +2015,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) { @@ -5858,7 +6392,13 @@ index d11ff481f6b..c666599b342 100644 switch (instr->type) { case HLSL_IR_LOAD: -@@ -2001,6 +2045,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1998,9 +2055,15 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b + progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); + break; + ++ case HLSL_IR_INTERLOCKED: ++ progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); ++ default: break; } @@ -5868,7 +6408,7 @@ index d11ff481f6b..c666599b342 100644 } return progress; -@@ -2013,7 +2060,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -2013,7 +2076,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc index_instructions(block, 2); @@ -5877,7 +6417,7 @@ index d11ff481f6b..c666599b342 100644 progress = copy_propagation_transform_block(ctx, block, &state); -@@ -2053,10 +2100,10 @@ static enum validation_result validate_component_index_range_from_deref(struct h +@@ -2053,10 +2116,10 @@ static enum validation_result validate_component_index_range_from_deref(struct h switch (type->class) { case HLSL_CLASS_VECTOR: @@ -5890,7 +6430,32 @@ index d11ff481f6b..c666599b342 100644 return DEREF_VALIDATION_OUT_OF_BOUNDS; } break; -@@ -2187,7 +2234,7 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins +@@ -2178,6 +2241,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + validate_component_index_range_from_deref(ctx, &store->lhs); + break; + } ++ case HLSL_IR_INTERLOCKED: ++ { ++ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); ++ ++ if (!interlocked->dst.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Accessed resource must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Accessed resource from \"%s\" must be determinable at compile time.", ++ interlocked->dst.var->name); ++ note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource"); ++ } ++ break; ++ } + default: + break; + } +@@ -2187,7 +2268,7 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins static bool is_vec1(const struct hlsl_type *type) { @@ -5899,7 +6464,7 @@ index d11ff481f6b..c666599b342 100644 } static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2364,18 +2411,20 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins +@@ -2364,18 +2445,20 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type; @@ -5923,7 +6488,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_block_add_instr(block, swizzle); -@@ -2401,11 +2450,12 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -2401,11 +2484,12 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *new_swizzle; uint32_t combined_swizzle; @@ -5939,7 +6504,7 @@ index d11ff481f6b..c666599b342 100644 return false; list_add_before(&instr->entry, &new_swizzle->entry); -@@ -2425,11 +2475,11 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i +@@ -2425,11 +2509,11 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return false; swizzle = hlsl_ir_swizzle(instr); @@ -5954,7 +6519,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_replace_node(instr, swizzle->val.node); -@@ -2589,6 +2639,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2589,6 +2673,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) { struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; @@ -5962,7 +6527,7 @@ index d11ff481f6b..c666599b342 100644 struct hlsl_constant_value value; struct hlsl_ir_load *vector_load; enum hlsl_ir_expr_op op; -@@ -2597,7 +2648,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2597,7 +2682,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir return false; hlsl_block_add_instr(block, &vector_load->node); @@ -5971,7 +6536,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_block_add_instr(block, swizzle); -@@ -2605,14 +2656,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2605,14 +2690,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir value.u[1].u = 1; value.u[2].u = 2; value.u[3].u = 3; @@ -5988,7 +6553,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_block_add_instr(block, eq); -@@ -2621,7 +2672,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2621,7 +2706,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir hlsl_block_add_instr(block, eq); op = HLSL_OP2_DOT; @@ -5997,7 +6562,7 @@ index d11ff481f6b..c666599b342 100644 op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; /* Note: We may be creating a DOT for bool vectors here, which we need to lower to -@@ -2748,7 +2799,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n +@@ -2748,7 +2833,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; hlsl_block_add_instr(block, equals); @@ -6007,7 +6572,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_block_add_instr(block, equals); -@@ -2788,6 +2840,108 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n +@@ -2788,6 +2874,116 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n return true; } @@ -6052,6 +6617,7 @@ index d11ff481f6b..c666599b342 100644 + load = hlsl_ir_resource_load(instr); + + if (load->load_type != HLSL_RESOURCE_SAMPLE ++ && load->load_type != HLSL_RESOURCE_SAMPLE_GRAD + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) + return false; @@ -6079,6 +6645,13 @@ index d11ff481f6b..c666599b342 100644 + return false; + vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name); + ++ if (load->texel_offset.node) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Texel offsets are not supported on profiles lower than 4.0.\n"); ++ return false; ++ } ++ + TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) @@ -6116,7 +6689,7 @@ index d11ff481f6b..c666599b342 100644 /* Lower combined samples and sampler variables to synthesized separated textures and samplers. * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2808,6 +2962,10 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in +@@ -2808,6 +3004,10 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in case HLSL_RESOURCE_GATHER_GREEN: case HLSL_RESOURCE_GATHER_BLUE: case HLSL_RESOURCE_GATHER_ALPHA: @@ -6127,7 +6700,7 @@ index d11ff481f6b..c666599b342 100644 case HLSL_RESOURCE_RESINFO: case HLSL_RESOURCE_SAMPLE_CMP: case HLSL_RESOURCE_SAMPLE_CMP_LZ: -@@ -2899,6 +3057,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl +@@ -2899,6 +3099,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl list_add_tail(list, &to_add->extern_entry); } @@ -6155,7 +6728,122 @@ index d11ff481f6b..c666599b342 100644 static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) { struct list separated_resources; -@@ -3010,7 +3189,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -2920,11 +3141,24 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) + return false; + } + +-/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ ++/* Turn CAST to int or uint as follows: ++ * ++ * CAST(x) = x - FRACT(x) + extra ++ * ++ * where ++ * ++ * extra = FRACT(x) > 0 && x < 0 ++ * ++ * where the comparisons in the extra term are performed using CMP or SLT ++ * depending on whether this is a pixel or vertex shader, respectively. ++ * ++ * A REINTERPET (which is written as a mere MOV) is also applied to the final ++ * result for type consistency. ++ */ + static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; +- struct hlsl_ir_node *arg, *floor, *res; ++ struct hlsl_ir_node *arg, *res; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) +@@ -2939,12 +3173,83 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) + return false; + +- if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, floor); ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ { ++ struct hlsl_ir_node *fract, *neg_fract, *has_fract, *floor, *extra, *zero, *one; ++ struct hlsl_constant_value zero_value, one_value; ++ ++ memset(&zero_value, 0, sizeof(zero_value)); ++ if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, fract); ++ ++ if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg_fract); ++ ++ if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one))) ++ return false; ++ hlsl_block_add_instr(block, has_fract); ++ ++ if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract))) ++ return false; ++ hlsl_block_add_instr(block, extra); ++ ++ if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) ++ return false; ++ hlsl_block_add_instr(block, floor); ++ ++ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, floor, extra))) ++ return false; ++ hlsl_block_add_instr(block, res); ++ } ++ else ++ { ++ struct hlsl_ir_node *neg_arg, *is_neg, *fract, *neg_fract, *has_fract, *floor; ++ ++ if (!(neg_arg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg_arg); ++ ++ if (!(is_neg = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg, neg_arg))) ++ return false; ++ hlsl_block_add_instr(block, is_neg); ++ ++ if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, fract); ++ ++ if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg_fract); ++ ++ if (!(has_fract = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg_fract, fract))) ++ return false; ++ hlsl_block_add_instr(block, has_fract); ++ ++ if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) ++ return false; ++ hlsl_block_add_instr(block, floor); ++ ++ if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) ++ return false; ++ hlsl_block_add_instr(block, res); ++ } + + memset(operands, 0, sizeof(operands)); +- operands[0] = floor; ++ operands[0] = res; + if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, res); +@@ -3010,7 +3315,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DOT) return false; @@ -6164,7 +6852,7 @@ index d11ff481f6b..c666599b342 100644 return false; if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -3034,11 +3213,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -3034,11 +3339,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h return false; hlsl_block_add_instr(block, mul); @@ -6180,7 +6868,7 @@ index d11ff481f6b..c666599b342 100644 return false; hlsl_block_add_instr(block, add_y); -@@ -3202,7 +3383,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3202,7 +3509,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct type = arg->data_type; /* Reduce the range of the input angles to [-pi, pi]. */ @@ -6189,7 +6877,7 @@ index d11ff481f6b..c666599b342 100644 { half_value.u[i].f = 0.5; two_pi_value.u[i].f = 2.0 * M_PI; -@@ -3230,7 +3411,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3230,7 +3537,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return false; hlsl_block_add_instr(block, reduced); @@ -6198,7 +6886,7 @@ index d11ff481f6b..c666599b342 100644 { if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) return false; -@@ -3243,7 +3424,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3243,7 +3550,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct struct hlsl_deref var_deref; struct hlsl_ir_load *var_load; @@ -6207,7 +6895,7 @@ index d11ff481f6b..c666599b342 100644 { uint32_t s = hlsl_swizzle_from_writemask(1 << i); -@@ -3256,7 +3437,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3256,7 +3563,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return false; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -6216,7 +6904,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_block store_block; -@@ -3292,7 +3473,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st +@@ -3292,7 +3599,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st return false; arg = expr->operands[0].node; @@ -6225,7 +6913,7 @@ index d11ff481f6b..c666599b342 100644 /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); -@@ -3354,7 +3535,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -3354,7 +3661,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, @@ -6234,7 +6922,7 @@ index d11ff481f6b..c666599b342 100644 if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) return false; -@@ -3375,6 +3556,51 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -3375,6 +3682,51 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return true; } @@ -6286,7 +6974,7 @@ index d11ff481f6b..c666599b342 100644 static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { -@@ -3393,7 +3619,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node +@@ -3393,7 +3745,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; @@ -6295,7 +6983,7 @@ index d11ff481f6b..c666599b342 100644 if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) return false; -@@ -3519,7 +3745,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -3519,7 +3871,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; @@ -6304,7 +6992,7 @@ index d11ff481f6b..c666599b342 100644 if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) return false; -@@ -3579,7 +3805,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -3579,7 +3931,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h if (expr->op != HLSL_OP3_CMP) return false; @@ -6313,7 +7001,7 @@ index d11ff481f6b..c666599b342 100644 for (i = 0; i < 3; ++i) { -@@ -3649,7 +3875,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -3649,7 +4001,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; /* Narrowing casts should have already been lowered. */ @@ -6322,7 +7010,7 @@ index d11ff481f6b..c666599b342 100644 zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); if (!zero) -@@ -3675,7 +3901,8 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -3675,7 +4027,8 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) { @@ -6332,7 +7020,7 @@ index d11ff481f6b..c666599b342 100644 if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) return NULL; -@@ -3711,13 +3938,13 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -3711,13 +4064,13 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; if (type->e.numeric.type != HLSL_TYPE_INT) return false; @@ -6348,7 +7036,7 @@ index d11ff481f6b..c666599b342 100644 high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; -@@ -3777,9 +4004,9 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -3777,9 +4130,9 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; if (type->e.numeric.type != HLSL_TYPE_INT) return false; @@ -6360,7 +7048,7 @@ index d11ff481f6b..c666599b342 100644 high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; -@@ -3870,8 +4097,8 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -3870,8 +4223,8 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru { arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; @@ -6371,7 +7059,7 @@ index d11ff481f6b..c666599b342 100644 is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) -@@ -3920,7 +4147,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -3920,7 +4273,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; if (type->e.numeric.type != HLSL_TYPE_FLOAT) return false; @@ -6380,7 +7068,7 @@ index d11ff481f6b..c666599b342 100644 if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; -@@ -3942,7 +4169,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -3942,7 +4295,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) return false; @@ -6389,7 +7077,7 @@ index d11ff481f6b..c666599b342 100644 one_value.u[i].f = 1.0f; if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) return false; -@@ -4000,7 +4227,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -4000,7 +4353,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst if (!arg) continue; @@ -6398,7 +7086,7 @@ index d11ff481f6b..c666599b342 100644 if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) return false; hlsl_block_add_instr(block, arg_cast); -@@ -4008,7 +4235,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -4008,7 +4361,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst operands[i] = arg_cast; } @@ -6407,7 +7095,7 @@ index d11ff481f6b..c666599b342 100644 if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) return false; hlsl_block_add_instr(block, float_expr); -@@ -4049,7 +4276,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -4049,7 +4402,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, operands[0] = jump->condition.node; operands[1] = zero; @@ -6417,7 +7105,7 @@ index d11ff481f6b..c666599b342 100644 if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) return false; hlsl_block_add_instr(&block, cmp); -@@ -4093,7 +4321,7 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v +@@ -4093,7 +4447,7 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return false; cond = jump->condition.node; @@ -6426,7 +7114,12 @@ index d11ff481f6b..c666599b342 100644 hlsl_block_init(&block); -@@ -4162,9 +4390,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -4158,13 +4512,11 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_LOOP: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: ++ case HLSL_IR_INTERLOCKED: + break; case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); @@ -6436,7 +7129,7 @@ index d11ff481f6b..c666599b342 100644 } return false; -@@ -4304,9 +4529,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -4304,9 +4656,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); @@ -6446,7 +7139,27 @@ index d11ff481f6b..c666599b342 100644 case HLSL_IR_STORE: { -@@ -4494,6 +4716,9 @@ struct register_allocator +@@ -4410,6 +4759,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + index->idx.node->last_read = last_read; + break; + } ++ case HLSL_IR_INTERLOCKED: ++ { ++ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); ++ ++ var = interlocked->dst.var; ++ var->last_read = max(var->last_read, last_read); ++ deref_mark_last_read(&interlocked->dst, last_read); ++ interlocked->coords.node->last_read = last_read; ++ interlocked->value.node->last_read = last_read; ++ if (interlocked->cmp_value.node) ++ interlocked->cmp_value.node->last_read = last_read; ++ break; ++ } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); +@@ -4494,6 +4856,9 @@ struct register_allocator /* Two allocations with different mode can't share the same register. */ int mode; @@ -6456,7 +7169,7 @@ index d11ff481f6b..c666599b342 100644 } *allocations; size_t count, capacity; -@@ -4513,7 +4738,7 @@ struct register_allocator +@@ -4513,7 +4878,7 @@ struct register_allocator }; static unsigned int get_available_writemask(const struct register_allocator *allocator, @@ -6465,7 +7178,7 @@ index d11ff481f6b..c666599b342 100644 { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; -@@ -4532,6 +4757,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all +@@ -4532,6 +4897,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all writemask &= ~allocation->writemask; if (allocation->mode != mode) writemask = 0; @@ -6474,7 +7187,7 @@ index d11ff481f6b..c666599b342 100644 } if (!writemask) -@@ -4542,7 +4769,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all +@@ -4542,7 +4909,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all } static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, @@ -6483,7 +7196,7 @@ index d11ff481f6b..c666599b342 100644 { struct allocation *allocation; -@@ -4556,16 +4783,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a +@@ -4556,16 +4923,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation->first_write = first_write; allocation->last_read = last_read; allocation->mode = mode; @@ -6513,7 +7226,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; -@@ -4579,7 +4815,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a +@@ -4579,7 +4955,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { unsigned int available_writemask = get_available_writemask(allocator, @@ -6522,7 +7235,7 @@ index d11ff481f6b..c666599b342 100644 if (vkd3d_popcount(available_writemask) >= pref) { -@@ -4589,7 +4825,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a +@@ -4589,7 +4965,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); @@ -6532,7 +7245,7 @@ index d11ff481f6b..c666599b342 100644 return ret; } } -@@ -4598,13 +4835,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a +@@ -4598,13 +4975,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, @@ -6550,7 +7263,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_reg ret = {0}; uint32_t reg_idx; -@@ -4614,11 +4852,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct +@@ -4614,11 +4992,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct for (reg_idx = 0;; ++reg_idx) { if ((get_available_writemask(allocator, first_write, last_read, @@ -6564,7 +7277,7 @@ index d11ff481f6b..c666599b342 100644 ret.id = reg_idx; ret.allocation_size = 1; -@@ -4628,7 +4866,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct +@@ -4628,7 +5006,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct } static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, @@ -6573,7 +7286,7 @@ index d11ff481f6b..c666599b342 100644 { unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; unsigned int writemask; -@@ -4636,18 +4874,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig +@@ -4636,18 +5014,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig for (i = 0; i < (reg_size / 4); ++i) { @@ -6595,7 +7308,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_reg ret = {0}; uint32_t reg_idx; -@@ -4655,15 +4893,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo +@@ -4655,15 +5033,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo for (reg_idx = 0;; ++reg_idx) { @@ -6614,7 +7327,7 @@ index d11ff481f6b..c666599b342 100644 ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; -@@ -4679,9 +4917,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, +@@ -4679,9 +5057,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, /* FIXME: We could potentially pack structs or arrays more efficiently... */ if (type->class <= HLSL_CLASS_VECTOR) @@ -6627,7 +7340,18 @@ index d11ff481f6b..c666599b342 100644 } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -4859,8 +5098,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, +@@ -4804,6 +5183,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); + break; + ++ case HLSL_IR_INTERLOCKED: ++ register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst); ++ break; ++ + default: + break; + } +@@ -4859,8 +5242,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, } if (reg_writemask) @@ -6638,7 +7362,7 @@ index d11ff481f6b..c666599b342 100644 else instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); -@@ -5006,13 +5245,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -5006,13 +5389,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); VKD3D_ASSERT(hlsl_is_numeric_type(type)); @@ -6654,7 +7378,7 @@ index d11ff481f6b..c666599b342 100644 if (!(constant->reg.writemask & (1u << x))) continue; -@@ -5040,9 +5279,6 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -5040,9 +5423,6 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: FIXME("Double constant.\n"); return; @@ -6664,7 +7388,7 @@ index d11ff481f6b..c666599b342 100644 } record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); -@@ -5084,7 +5320,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -5084,7 +5464,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, } } @@ -6673,7 +7397,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_ir_var *var; -@@ -5092,8 +5328,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ +@@ -5092,8 +5472,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) { @@ -6684,7 +7408,7 @@ index d11ff481f6b..c666599b342 100644 if (to_sort_size > var_size) { -@@ -5105,7 +5341,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ +@@ -5105,7 +5485,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ list_add_tail(sorted, &to_sort->extern_entry); } @@ -6693,7 +7417,7 @@ index d11ff481f6b..c666599b342 100644 { struct list sorted = LIST_INIT(sorted); struct hlsl_ir_var *var, *next; -@@ -5113,7 +5349,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) +@@ -5113,7 +5493,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform) @@ -6702,7 +7426,7 @@ index d11ff481f6b..c666599b342 100644 } list_move_tail(&ctx->extern_vars, &sorted); } -@@ -5161,7 +5397,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5161,7 +5541,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var; @@ -6711,7 +7435,7 @@ index d11ff481f6b..c666599b342 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { -@@ -5181,14 +5417,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5181,14 +5561,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (i < bind_count) { @@ -6730,7 +7454,7 @@ index d11ff481f6b..c666599b342 100644 } var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; -@@ -5211,7 +5448,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5211,7 +5592,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { @@ -6739,7 +7463,7 @@ index d11ff481f6b..c666599b342 100644 TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } -@@ -5254,7 +5491,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun +@@ -5254,7 +5635,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { @@ -6749,7 +7473,7 @@ index d11ff481f6b..c666599b342 100644 break; } } -@@ -5266,7 +5504,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun +@@ -5266,7 +5648,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun return allocator.reg_count; } @@ -6759,7 +7483,7 @@ index d11ff481f6b..c666599b342 100644 { unsigned int i; -@@ -5311,6 +5550,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5311,6 +5694,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_register_type type; struct vkd3d_shader_version version; @@ -6768,7 +7492,7 @@ index d11ff481f6b..c666599b342 100644 uint32_t reg; bool builtin; -@@ -5363,6 +5604,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5363,6 +5748,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var * domains, it is allocated as if it was 'float[1]'. */ var->force_align = true; } @@ -6783,7 +7507,7 @@ index d11ff481f6b..c666599b342 100644 } if (builtin) -@@ -5374,10 +5623,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5374,10 +5767,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { int mode = (ctx->profile->major_version < 4) ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); @@ -6800,7 +7524,7 @@ index d11ff481f6b..c666599b342 100644 TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); -@@ -5831,7 +6083,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl +@@ -5831,7 +6227,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl switch (type->class) { case HLSL_CLASS_VECTOR: @@ -6809,7 +7533,7 @@ index d11ff481f6b..c666599b342 100644 return false; *start += idx; break; -@@ -5840,9 +6092,9 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl +@@ -5840,9 +6236,9 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl if (idx >= hlsl_type_major_size(type)) return false; if (hlsl_type_is_row_major(type)) @@ -6821,7 +7545,7 @@ index d11ff481f6b..c666599b342 100644 break; case HLSL_CLASS_ARRAY: -@@ -6419,6 +6671,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +@@ -6419,6 +6815,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); @@ -6829,7 +7553,7 @@ index d11ff481f6b..c666599b342 100644 progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); progress |= hlsl_copy_propagation_execute(ctx, body); progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -@@ -6430,8 +6683,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +@@ -6430,8 +6827,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) { @@ -6839,7 +7563,7 @@ index d11ff481f6b..c666599b342 100644 unsigned int register_index, mask, use_mask; const char *name = var->semantic.name; enum vkd3d_shader_register_type type; -@@ -6451,7 +6704,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog +@@ -6451,7 +6848,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) { register_index = has_idx ? var->semantic.index : ~0u; @@ -6848,7 +7572,7 @@ index d11ff481f6b..c666599b342 100644 } else { -@@ -6478,12 +6731,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog +@@ -6478,12 +6875,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog component_type = VKD3D_SHADER_COMPONENT_UINT; break; @@ -6862,7 +7586,7 @@ index d11ff481f6b..c666599b342 100644 break; } -@@ -6519,19 +6771,19 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog +@@ -6519,19 +6915,19 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog sysval = VKD3D_SHADER_SV_POSITION; } @@ -6885,7 +7609,7 @@ index d11ff481f6b..c666599b342 100644 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "FOG output must have only 1 component in this shader model."); -@@ -6636,7 +6888,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d +@@ -6636,7 +7032,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d swizzle = hlsl_swizzle_from_writemask(src_writemask); swizzle = hlsl_map_swizzle(swizzle, dst_writemask); @@ -6893,7 +7617,17 @@ index d11ff481f6b..c666599b342 100644 return swizzle; } -@@ -6812,7 +7063,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src +@@ -6726,7 +7121,8 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, + break; + + case HLSL_SAMPLER_DIM_GENERIC: +- /* These can appear in sm4-style combined sample instructions. */ ++ /* These can appear in sm4-style separate sample ++ * instructions that haven't been lowered. */ + hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); + continue; + +@@ -6812,7 +7208,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src } static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, @@ -6902,7 +7636,7 @@ index d11ff481f6b..c666599b342 100644 { struct hlsl_ir_constant *constant; -@@ -6821,7 +7072,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, +@@ -6821,7 +7217,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, /* In SM4 constants are inlined */ constant = hlsl_ir_constant(instr); vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, @@ -6911,52 +7645,41 @@ index d11ff481f6b..c666599b342 100644 } else { -@@ -6832,89 +7083,325 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, +@@ -6832,29 +7228,265 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } } -static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, - struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) --{ ++static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) + { - VKD3D_ASSERT(instr->reg.allocated); - vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); - dst->reg.idx[0].offset = instr->reg.id; - dst->reg.dimension = VSIR_DIMENSION_VEC4; - dst->write_mask = instr->reg.writemask; -} -- ++ const struct hlsl_ir_var *var = deref->var; ++ unsigned int offset_const_deref; + -static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_constant *constant) -+static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) - { +-{ - struct hlsl_ir_node *instr = &constant->node; - struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; -- -- VKD3D_ASSERT(instr->reg.allocated); -- VKD3D_ASSERT(constant->reg.allocated); -+ const struct hlsl_ir_var *var = deref->var; -+ unsigned int offset_const_deref; - -- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -- return; + reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; -- src_param = &ins->src[0]; -- vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = constant->reg.id; -- src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); +- VKD3D_ASSERT(instr->reg.allocated); +- VKD3D_ASSERT(constant->reg.allocated); + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->write_mask = instr->reg.writemask; --} +- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) +- return; + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); @@ -6968,20 +7691,11 @@ index d11ff481f6b..c666599b342 100644 + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; - --static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, -- struct vsir_program *program, struct hlsl_ir_expr *expr) --{ -- struct vkd3d_shader_src_param *src_param; -- struct hlsl_ir_node *instr = &expr->node; -- struct vkd3d_shader_instruction *ins; ++ + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; - -- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) -- return; -- ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; @@ -6989,55 +7703,28 @@ index d11ff481f6b..c666599b342 100644 + } + memset(idx_src, 0, sizeof(*idx_src)); + reg->idx[1].rel_addr = idx_src; - -- vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ + vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); + } + } - -- src_param = &ins->src[0]; -- vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); -- src_param->reg.dimension = VSIR_DIMENSION_VEC4; -- src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + return true; - } - --/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ --static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, -- struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, -- uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) ++} ++ +static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) - { -- struct hlsl_ir_node *instr = &expr->node; -- struct vkd3d_shader_dst_param *dst_param; -- struct vkd3d_shader_src_param *src_param; -- struct vkd3d_shader_instruction *ins; -- unsigned int i, src_count = 0; -- -- VKD3D_ASSERT(instr->reg.allocated); ++{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; - -- for (i = 0; i < HLSL_MAX_OPERANDS; ++i) ++ + if (var->is_uniform) - { -- if (expr->operands[i].node) -- src_count = i + 1; -- } -- VKD3D_ASSERT(!src_mod || src_count == 1); -- -- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) -- return; ++ { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); - -- dst_param = &ins->dst[0]; -- vsir_dst_from_hlsl_node(dst_param, ctx, instr); -- dst_param->modifiers = dst_mod; ++ + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3DSPR_RESOURCE; @@ -7098,9 +7785,7 @@ index d11ff481f6b..c666599b342 100644 + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - -- for (i = 0; i < src_count; ++i) -- { ++ + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; @@ -7242,70 +7927,10 @@ index d11ff481f6b..c666599b342 100644 + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; -+ -+ src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = constant->reg.id; -+ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = instr->reg.writemask; -+} -+ -+static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) -+ return; -+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ src_param = &ins->src[0]; -+ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+} -+ -+/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ -+static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, -+ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) -+{ -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i, src_count = 0; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) -+ { -+ if (expr->operands[i].node) -+ src_count = i + 1; -+ } -+ VKD3D_ASSERT(!src_mod || src_count == 1); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ dst_param->modifiers = dst_mod; -+ -+ for (i = 0; i < src_count; ++i) -+ { - struct hlsl_ir_node *operand = expr->operands[i].node; - src_param = &ins->src[i]; -@@ -7014,7 +7501,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); +@@ -7014,7 +7646,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, dst_type = instr->data_type; /* Narrowing casts were already lowered. */ @@ -7314,7 +7939,7 @@ index d11ff481f6b..c666599b342 100644 switch (dst_type->e.numeric.type) { -@@ -7040,9 +7527,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7040,9 +7672,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "The 'double' type is not supported for the %s profile.", ctx->profile->name); break; @@ -7324,7 +7949,7 @@ index d11ff481f6b..c666599b342 100644 } break; -@@ -7059,19 +7543,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7059,19 +7688,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_INT: case HLSL_TYPE_UINT: @@ -7345,7 +7970,7 @@ index d11ff481f6b..c666599b342 100644 } break; -@@ -7096,7 +7574,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7096,7 +7719,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_BOOL: /* Casts to bool should have already been lowered. */ @@ -7353,7 +7978,7 @@ index d11ff481f6b..c666599b342 100644 hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); break; -@@ -7178,7 +7655,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr +@@ -7178,7 +7800,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP2_DOT: @@ -7362,7 +7987,7 @@ index d11ff481f6b..c666599b342 100644 { case 3: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); -@@ -7276,7 +7753,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, +@@ -7276,7 +7898,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, register_index = reg.id; } else @@ -7371,7 +7996,7 @@ index d11ff481f6b..c666599b342 100644 if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) -@@ -7334,7 +7811,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, +@@ -7334,7 +7956,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, deref->var->semantic.index, false, &type, ®ister_index)) { @@ -7380,7 +8005,7 @@ index d11ff481f6b..c666599b342 100644 } else { -@@ -7472,9 +7949,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, +@@ -7472,9 +8094,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, dst_param->write_mask = instr->reg.writemask; swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); @@ -7391,7 +8016,7 @@ index d11ff481f6b..c666599b342 100644 src_param = &ins->src[0]; VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); -@@ -7539,7 +8015,7 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program +@@ -7539,7 +8160,7 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); return; } @@ -7400,7 +8025,7 @@ index d11ff481f6b..c666599b342 100644 if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) return; -@@ -7624,31 +8100,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo +@@ -7624,31 +8245,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo } static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, @@ -7434,7 +8059,7 @@ index d11ff481f6b..c666599b342 100644 generate_vsir_signature(ctx, program, entry_func); hlsl_block_init(&block); -@@ -7659,66 +8124,434 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl +@@ -7659,80 +8269,448 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); } @@ -7608,12 +8233,24 @@ index d11ff481f6b..c666599b342 100644 - { - case VKD3D_SHADER_SV_NONE: - opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; +- break; + case HLSL_CLASS_STRUCT: + return D3DXPT_VOID; -+ + +- case VKD3D_SHADER_SV_INSTANCE_ID: +- case VKD3D_SHADER_SV_IS_FRONT_FACE: +- case VKD3D_SHADER_SV_PRIMITIVE_ID: +- case VKD3D_SHADER_SV_SAMPLE_INDEX: +- case VKD3D_SHADER_SV_VERTEX_ID: +- opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; +- break; + case HLSL_CLASS_STRING: + return D3DXPT_STRING; -+ + +- default: +- opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + case HLSL_CLASS_PIXEL_SHADER: + return D3DXPT_PIXELSHADER; + @@ -7913,10 +8550,24 @@ index d11ff481f6b..c666599b342 100644 + { + case VKD3D_SHADER_SV_NONE: + opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; ++ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; ++ break; ++ ++ case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; ++ break; ++ ++ default: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; break; - -@@ -7749,7 +8582,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + } +@@ -7749,7 +8727,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs { if (has_idx) idx = var->semantic.index; @@ -7925,7 +8576,7 @@ index d11ff481f6b..c666599b342 100644 } else { -@@ -7806,8 +8639,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs +@@ -7806,8 +8784,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); @@ -7934,7 +8585,7 @@ index d11ff481f6b..c666599b342 100644 } static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -7819,8 +8650,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ +@@ -7819,8 +8795,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ return; ins->declaration.count = temp_count; @@ -7943,7 +8594,7 @@ index d11ff481f6b..c666599b342 100644 } static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, -@@ -7838,8 +8667,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, +@@ -7838,8 +8812,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; ins->declaration.indexable_temp.component_count = comp_count; ins->declaration.indexable_temp.has_function_scope = false; @@ -7952,7 +8603,7 @@ index d11ff481f6b..c666599b342 100644 } static bool type_is_float(const struct hlsl_type *type) -@@ -7891,7 +8718,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7891,7 +8863,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, } one = { .f = 1.0 }; /* Narrowing casts were already lowered. */ @@ -7961,7 +8612,7 @@ index d11ff481f6b..c666599b342 100644 switch (dst_type->e.numeric.type) { -@@ -7919,9 +8746,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7919,9 +8891,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); return false; @@ -7971,7 +8622,7 @@ index d11ff481f6b..c666599b342 100644 } break; -@@ -7945,9 +8769,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7945,9 +8914,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); return false; @@ -7981,7 +8632,7 @@ index d11ff481f6b..c666599b342 100644 } break; -@@ -7971,9 +8792,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7971,9 +8937,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); return false; @@ -7991,7 +8642,7 @@ index d11ff481f6b..c666599b342 100644 } break; -@@ -7983,9 +8801,10 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7983,9 +8946,10 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_BOOL: /* Casts to bool should have already been lowered. */ @@ -8004,7 +8655,7 @@ index d11ff481f6b..c666599b342 100644 } static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -8040,7 +8859,7 @@ static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, +@@ -8040,7 +9004,7 @@ static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, value.u[2].f = 1.0f; value.u[3].f = 1.0f; vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, @@ -8013,7 +8664,7 @@ index d11ff481f6b..c666599b342 100644 vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); } -@@ -8270,7 +9089,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -8270,7 +9234,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: @@ -8022,7 +8673,7 @@ index d11ff481f6b..c666599b342 100644 { case 4: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); -@@ -8505,188 +9324,1963 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -8505,188 +9469,2042 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } } @@ -8115,17 +8766,10 @@ index d11ff481f6b..c666599b342 100644 - break; + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) + return false; - -- case HLSL_IR_SWIZZLE: -- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -- replace_instr_with_last_vsir_instr(ctx, program, instr); -- break; ++ + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); - -- default: -- break; -- } ++ + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; @@ -8597,6 +9241,81 @@ index d11ff481f6b..c666599b342 100644 + } +} + ++static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_interlocked *interlocked) ++{ ++ ++ static const enum vkd3d_shader_opcode opcodes[] = ++ { ++ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD, ++ [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND, ++ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE, ++ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX, ++ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_ATOMIC_UMIN, ++ [HLSL_INTERLOCKED_OR] = VKD3DSIH_ATOMIC_OR, ++ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_ATOMIC_XOR, ++ }; ++ ++ static const enum vkd3d_shader_opcode imm_opcodes[] = ++ { ++ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD, ++ [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND, ++ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH, ++ [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH, ++ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX, ++ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_IMM_ATOMIC_UMIN, ++ [HLSL_INTERLOCKED_OR] = VKD3DSIH_IMM_ATOMIC_OR, ++ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_IMM_ATOMIC_XOR, ++ }; ++ ++ struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node; ++ struct hlsl_ir_node *coords = interlocked->coords.node; ++ struct hlsl_ir_node *instr = &interlocked->node; ++ bool is_imm = interlocked->node.reg.allocated; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ ++ opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; ++ ++ if (value->data_type->e.numeric.type == HLSL_TYPE_INT) ++ { ++ if (opcode == VKD3DSIH_ATOMIC_UMAX) ++ opcode = VKD3DSIH_ATOMIC_IMAX; ++ else if (opcode == VKD3DSIH_ATOMIC_UMIN) ++ opcode = VKD3DSIH_ATOMIC_IMIN; ++ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX) ++ opcode = VKD3DSIH_IMM_ATOMIC_IMAX; ++ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMIN) ++ opcode = VKD3DSIH_IMM_ATOMIC_IMIN; ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, ++ is_imm ? 2 : 1, cmp_value ? 3 : 2))) ++ return false; ++ ++ if (is_imm) ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ dst_param = is_imm ? &ins->dst[1] : &ins->dst[0]; ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0)) ++ return false; ++ dst_param->reg.dimension = VSIR_DIMENSION_NONE; ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ if (cmp_value) ++ { ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, cmp_value, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[2], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ } ++ else ++ { ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ } ++ ++ return true; ++} ++ +static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_jump *jump) +{ @@ -8774,6 +9493,10 @@ index d11ff481f6b..c666599b342 100644 + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + ++ case HLSL_IR_INTERLOCKED: ++ sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); ++ break; ++ + default: + break; + } @@ -9319,31 +10042,20 @@ index d11ff481f6b..c666599b342 100644 + + if (multisampled) + semantic->sample_count = component_type->sample_count; - } - } - --static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++ } ++} ++ +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program, so it can be used as input to tpf_compile() without relying + * on ctx and entry_func. */ +static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + uint64_t config_flags, struct vsir_program *program) - { -- bool is_patch_constant_func = func == ctx->patch_constant_func; -- struct hlsl_block block = {0}; -- struct hlsl_scope *scope; -- struct hlsl_ir_var *var; -- uint32_t temp_count; ++{ + struct vkd3d_shader_version version = {0}; + struct extern_resource *extern_resources; + unsigned int extern_resources_count; + const struct hlsl_buffer *cbuffer; - -- compute_liveness(ctx, func); -- mark_indexable_vars(ctx, func); -- temp_count = allocate_temp_registers(ctx, func); -- if (ctx->result) ++ + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; @@ -9351,8 +10063,7 @@ index d11ff481f6b..c666599b342 100644 + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -- program->temp_count = max(program->temp_count, temp_count); ++ return; + } + + generate_vsir_signature(ctx, program, func); @@ -9413,7 +10124,11 @@ index d11ff481f6b..c666599b342 100644 + generate_vsir_scan_required_features(ctx, program); + generate_vsir_scan_global_flags(ctx, program, func); +} -+ + +- case HLSL_IR_SWIZZLE: +- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); +- replace_instr_with_last_vsir_instr(ctx, program, instr); +- break; +/* For some reason, for matrices, values from default value initializers end + * up in different components than from regular initializers. Default value + * initializers fill the matrix in vertical reading order @@ -9425,7 +10140,9 @@ index d11ff481f6b..c666599b342 100644 +{ + unsigned int element_comp_count, element, x, y, i; + unsigned int base = 0; -+ + +- default: +- break; + switch (type->class) + { + case HLSL_CLASS_MATRIX: @@ -9636,7 +10353,7 @@ index d11ff481f6b..c666599b342 100644 + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); + ++field_count; -+ } + } + + fields_offset = bytecode_align(buffer); + @@ -9672,11 +10389,18 @@ index d11ff481f6b..c666599b342 100644 + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, name_offset); -+ } -+} -+ + } + } + +-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, +- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) -+{ + { +- bool is_patch_constant_func = func == ctx->patch_constant_func; +- struct hlsl_block block = {0}; +- struct hlsl_scope *scope; +- struct hlsl_ir_var *var; +- uint32_t temp_count; + uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + unsigned int cbuffer_count = 0, extern_resources_count, i, j; @@ -9686,7 +10410,11 @@ index d11ff481f6b..c666599b342 100644 + struct extern_resource *extern_resources; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; -+ + +- compute_liveness(ctx, func); +- mark_indexable_vars(ctx, func); +- temp_count = allocate_temp_registers(ctx, func); +- if (ctx->result) + static const uint16_t target_types[] = + { + 0xffff, /* PIXEL */ @@ -9788,44 +10516,34 @@ index d11ff481f6b..c666599b342 100644 + + if (!cbuffer->reg.allocated) + continue; - -- hlsl_block_init(&block); ++ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + ++var_count; + } - -- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if ((var->is_input_semantic && var->last_read) -- || (var->is_output_semantic && var->first_write)) -- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var_count); + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); - } - -- if (temp_count) -- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ } ++ + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; - -- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { -- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { + size_t vars_start = bytecode_align(&buffer); + + if (!cbuffer->reg.allocated) @@ -9834,13 +10552,11 @@ index d11ff481f6b..c666599b342 100644 + set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) ++ { + uint32_t flags = 0; + + if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - continue; -- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) ++ continue; + + if (var->is_read) + flags |= D3D_SVF_USED; @@ -9868,24 +10584,20 @@ index d11ff481f6b..c666599b342 100644 + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); + + if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - continue; - -- if (var->indexable) ++ continue; ++ + string_offset = put_string(&buffer, var->name); + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + + if (var->default_values) - { -- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; ++ { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; - -- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); ++ + default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); + set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); + @@ -9913,44 +10625,33 @@ index d11ff481f6b..c666599b342 100644 + var->default_values[k].number.u); + } + } - } ++ } + + ++j; - } - } - -- list_move_head(&func->body.instrs, &block.instrs); ++ } ++ } ++ + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); - -- hlsl_block_cleanup(&block); ++ + sm4_free_extern_resources(extern_resources, extern_resources_count); - -- sm4_generate_vsir_block(ctx, &func->body, program); ++ + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; -+ return; + return; +- program->temp_count = max(program->temp_count, temp_count); + } + rdef->code = buffer.data; + rdef->size = buffer.size; - } - --/* OBJECTIVE: Translate all the information from ctx and entry_func to the -- * vsir_program, so it can be used as input to tpf_compile() without relying -- * on ctx and entry_func. */ --static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -- uint64_t config_flags, struct vsir_program *program) ++} ++ +static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) - { -- struct vkd3d_shader_version version = {0}; ++{ + struct hlsl_ir_node *const_node, *store; - -- version.major = ctx->profile->major_version; -- version.minor = ctx->profile->minor_version; -- version.type = ctx->profile->type; ++ + if (!(const_node = hlsl_new_bool_constant(ctx, val, loc))) + return false; + hlsl_block_add_instr(block, const_node); @@ -9958,10 +10659,12 @@ index d11ff481f6b..c666599b342 100644 + if (!(store = hlsl_new_simple_store(ctx, var, const_node))) + return false; + hlsl_block_add_instr(block, store); -+ + +- hlsl_block_init(&block); + return true; +} -+ + +- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued); + @@ -9972,12 +10675,12 @@ index d11ff481f6b..c666599b342 100644 + struct hlsl_ir_var *var; + struct hlsl_block draft; + struct hlsl_ir_if *iff; - -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) ++ + if (node->type == HLSL_IR_IF) { -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -- return; +- if ((var->is_input_semantic && var->last_read) +- || (var->is_output_semantic && var->first_write)) +- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); + iff = hlsl_ir_if(node); + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) + return true; @@ -9986,76 +10689,112 @@ index d11ff481f6b..c666599b342 100644 + return false; } -- generate_vsir_signature(ctx, program, func); -- if (version.type == VKD3D_SHADER_TYPE_HULL) -- generate_vsir_signature(ctx, program, ctx->patch_constant_func); +- if (temp_count) +- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); - -- if (version.type == VKD3D_SHADER_TYPE_COMPUTE) +- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + if (node->type == HLSL_IR_JUMP) { -- program->thread_group_size.x = ctx->thread_count[0]; -- program->thread_group_size.y = ctx->thread_count[1]; -- program->thread_group_size.z = ctx->thread_count[2]; +- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) +- { +- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) +- continue; +- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) +- continue; + jump = hlsl_ir_jump(node); + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK) + return false; -+ + +- if (var->indexable) +- { +- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; +- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + hlsl_block_init(&draft); -+ + +- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); +- } +- } +- } + if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + var = loop_continued; + else + var = loop_broken; -+ + +- list_move_head(&func->body.instrs, &block.instrs); + if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc)) + return false; -+ + +- hlsl_block_cleanup(&block); + list_move_before(&jump->node.entry, &draft.instrs); + list_remove(&jump->node.entry); + hlsl_free_instr(&jump->node); -+ -+ return true; - } -- sm4_generate_vsir_add_function(ctx, func, config_flags, program); -- if (version.type == VKD3D_SHADER_TYPE_HULL) -- sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); +- sm4_generate_vsir_block(ctx, &func->body, program); ++ return true; ++ } ++ + return false; } --static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -- struct hlsl_block **found_block) +-/* OBJECTIVE: Translate all the information from ctx and entry_func to the +- * vsir_program, so it can be used as input to tpf_compile() without relying +- * on ctx and entry_func. */ +-static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +- uint64_t config_flags, struct vsir_program *program) +static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, + struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) { -- struct hlsl_ir_node *node; +- struct vkd3d_shader_version version = {0}; + struct hlsl_ir_node *cond, *iff; + struct hlsl_block then_block; + struct hlsl_ir_load *load; -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) +- version.major = ctx->profile->major_version; +- version.minor = ctx->profile->minor_version; +- version.type = ctx->profile->type; + hlsl_block_init(&then_block); -+ + +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) +- { +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; +- } + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; + hlsl_block_add_instr(dst, &load->node); -+ + +- generate_vsir_signature(ctx, program, func); +- if (version.type == VKD3D_SHADER_TYPE_HULL) +- generate_vsir_signature(ctx, program, ctx->patch_constant_func); + if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc))) + return NULL; + hlsl_block_add_instr(dst, cond); -+ + +- if (version.type == VKD3D_SHADER_TYPE_COMPUTE) +- { +- program->thread_group_size.x = ctx->thread_count[0]; +- program->thread_group_size.y = ctx->thread_count[1]; +- program->thread_group_size.z = ctx->thread_count[2]; +- } + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) + return NULL; + hlsl_block_add_instr(dst, iff); -+ + +- sm4_generate_vsir_add_function(ctx, func, config_flags, program); +- if (version.type == VKD3D_SHADER_TYPE_HULL) +- sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + return hlsl_ir_if(iff); -+} -+ + } + +-static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, +- struct hlsl_block **found_block) +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) -+{ + { +- struct hlsl_ir_node *node; + struct hlsl_ir_node *node, *next; -+ + +- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) { - if (node == stop_point) @@ -10111,7 +10850,7 @@ index d11ff481f6b..c666599b342 100644 } static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -@@ -8696,7 +11290,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru +@@ -8696,7 +11514,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return loop->unroll_limit; /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ @@ -10120,7 +10859,7 @@ index d11ff481f6b..c666599b342 100644 return 1024; /* SM4 limits implicit unrolling to 254 iterations. */ -@@ -8707,167 +11301,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru +@@ -8707,167 +11525,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return 1024; } @@ -10128,7 +10867,8 @@ index d11ff481f6b..c666599b342 100644 - struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state, unsigned int *index) -+{ + { +- unsigned int max_iterations, i; + size_t scopes_depth = state->scope_count - 1; + unsigned int current_index; + bool progress; @@ -10167,8 +10907,7 @@ index d11ff481f6b..c666599b342 100644 +} + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) - { -- unsigned int max_iterations, i; ++{ + struct hlsl_block draft, tmp_dst, loop_body; + struct hlsl_ir_var *broken, *continued; + unsigned int max_iterations, i, index; @@ -10501,7 +11240,112 @@ index d11ff481f6b..c666599b342 100644 } static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -@@ -9107,16 +11813,17 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -9026,6 +11956,95 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru + return true; + } + ++static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *call, *rhs, *store; ++ struct hlsl_ir_function_decl *func; ++ unsigned int component_count; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_expr *expr; ++ struct hlsl_ir_var *lhs; ++ const char *template; ++ char *body; ++ ++ static const char template_sm2[] = ++ "typedef bool%u boolX;\n" ++ "typedef float%u floatX;\n" ++ "boolX isinf(floatX x)\n" ++ "{\n" ++ " floatX v = 1 / x;\n" ++ " v = v * v;\n" ++ " return v <= 0;\n" ++ "}\n"; ++ ++ static const char template_sm3[] = ++ "typedef bool%u boolX;\n" ++ "typedef float%u floatX;\n" ++ "boolX isinf(floatX x)\n" ++ "{\n" ++ " floatX v = 1 / x;\n" ++ " return v <= 0;\n" ++ "}\n"; ++ ++ static const char template_sm4[] = ++ "typedef bool%u boolX;\n" ++ "typedef float%u floatX;\n" ++ "boolX isinf(floatX x)\n" ++ "{\n" ++ " return (asuint(x) & 0x7fffffff) == 0x7f800000;\n" ++ "}\n"; ++ ++ static const char template_int[] = ++ "typedef bool%u boolX;\n" ++ "typedef float%u floatX;\n" ++ "boolX isinf(floatX x)\n" ++ "{\n" ++ " return false;\n" ++ "}"; ++ ++ if (node->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(node); ++ ++ if (expr->op != HLSL_OP1_ISINF) ++ return false; ++ ++ rhs = expr->operands[0].node; ++ ++ if (hlsl_version_lt(ctx, 3, 0)) ++ template = template_sm2; ++ else if (hlsl_version_lt(ctx, 4, 0)) ++ template = template_sm3; ++ else if (type_is_integer(rhs->data_type)) ++ template = template_int; ++ else ++ template = template_sm4; ++ ++ component_count = hlsl_type_component_count(rhs->data_type); ++ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) ++ return false; ++ ++ if (!(func = hlsl_compile_internal_function(ctx, "isinf", body))) ++ return false; ++ ++ lhs = func->parameters.vars[0]; ++ ++ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) ++ return false; ++ hlsl_block_add_instr(block, store); ++ ++ if (!(call = hlsl_new_call(ctx, func, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, call); ++ ++ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, &load->node); ++ ++ return true; ++} ++ + static void process_entry_function(struct hlsl_ctx *ctx, + const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) + { +@@ -9059,6 +12078,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, + lower_ir(ctx, lower_f32tof16, body); + } + ++ lower_ir(ctx, lower_isinf, body); ++ + lower_return(ctx, entry_func, body, false); + + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); +@@ -9107,16 +12128,17 @@ static void process_entry_function(struct hlsl_ctx *ctx, append_output_var_copy(ctx, entry_func, entry_func->return_var); } @@ -10521,7 +11365,7 @@ index d11ff481f6b..c666599b342 100644 hlsl_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); -@@ -9126,9 +11833,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -9126,9 +12148,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); @@ -10536,7 +11380,7 @@ index d11ff481f6b..c666599b342 100644 hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); do -@@ -9136,7 +11847,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -9136,7 +12162,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, while (hlsl_transform_ir(ctx, dce, body, NULL)); hlsl_transform_ir(ctx, track_components_usage, body, NULL); @@ -10548,7 +11392,7 @@ index d11ff481f6b..c666599b342 100644 if (profile->major_version < 4) { -@@ -9241,14 +11955,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -9241,14 +12270,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry mark_indexable_vars(ctx, entry_func); allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); @@ -10566,7 +11410,7 @@ index d11ff481f6b..c666599b342 100644 if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx); -@@ -9265,7 +11981,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -9265,7 +12296,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry struct vsir_program program; int result; @@ -10579,7 +11423,7 @@ index d11ff481f6b..c666599b342 100644 if (ctx->result) { vsir_program_cleanup(&program); -@@ -9282,18 +12002,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -9282,18 +12317,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry case VKD3D_SHADER_TARGET_DXBC_TPF: { uint32_t config_flags = vkd3d_shader_init_config_flags(); @@ -14013,7 +14857,7 @@ index b0e89bededb..3678ad0bacf 100644 if (TRACE_ON()) vsir_program_trace(program); diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index df3edeaa4e6..bb85e62e94c 100644 +index df3edeaa4e6..e783128e236 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -41,6 +41,8 @@ struct msl_generator @@ -14025,7 +14869,7 @@ index df3edeaa4e6..bb85e62e94c 100644 const struct vkd3d_shader_interface_info *interface_info; const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; }; -@@ -153,6 +155,72 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, +@@ -153,6 +155,71 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, msl_print_register_datatype(buffer, gen, reg->data_type); break; @@ -14072,7 +14916,6 @@ index df3edeaa4e6..bb85e62e94c 100644 + vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); -+ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", @@ -14098,7 +14941,7 @@ index df3edeaa4e6..bb85e62e94c 100644 case VKD3DSPR_CONSTBUFFER: if (reg->idx_count != 3) { -@@ -215,19 +283,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, +@@ -215,19 +282,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) { const struct vkd3d_shader_register *reg = &vsir_src->reg; @@ -14147,7 +14990,7 @@ index df3edeaa4e6..bb85e62e94c 100644 } static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) -@@ -253,7 +345,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, +@@ -253,7 +344,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); @@ -14157,7 +15000,7 @@ index df3edeaa4e6..bb85e62e94c 100644 return write_mask; } -@@ -261,22 +354,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, +@@ -261,22 +353,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) { @@ -14189,7 +15032,7 @@ index df3edeaa4e6..bb85e62e94c 100644 vkd3d_string_buffer_printf(gen->buffer, ";\n"); } -@@ -288,6 +388,164 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i +@@ -288,6 +387,164 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } @@ -14354,7 +15197,7 @@ index df3edeaa4e6..bb85e62e94c 100644 static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { struct msl_src src; -@@ -303,6 +561,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc +@@ -303,6 +560,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc msl_dst_cleanup(&dst, &gen->string_buffers); } @@ -14386,7 +15229,7 @@ index df3edeaa4e6..bb85e62e94c 100644 static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { msl_print_indent(gen->buffer, gen->indent); -@@ -315,17 +598,119 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d +@@ -315,17 +597,119 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d switch (ins->opcode) { @@ -14509,7 +15352,7 @@ index df3edeaa4e6..bb85e62e94c 100644 default: msl_unhandled(gen, ins); break; -@@ -489,6 +874,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) +@@ -489,6 +873,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) if (e->sysval_semantic) { @@ -14526,7 +15369,7 @@ index df3edeaa4e6..bb85e62e94c 100644 msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); continue; -@@ -501,13 +896,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) +@@ -501,13 +895,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) continue; } @@ -14540,7 +15383,7 @@ index df3edeaa4e6..bb85e62e94c 100644 if(e->register_count > 1) { msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -@@ -551,6 +939,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) +@@ -551,6 +938,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) break; } @@ -14559,7 +15402,7 @@ index df3edeaa4e6..bb85e62e94c 100644 vkd3d_string_buffer_printf(buffer, ";\n"); } -@@ -602,6 +1002,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) +@@ -602,6 +1001,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) { e = &signature->elements[i]; @@ -14574,7 +15417,7 @@ index df3edeaa4e6..bb85e62e94c 100644 if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) continue; -@@ -690,6 +1098,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) +@@ -690,6 +1097,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); msl_print_write_mask(buffer, e->mask); } @@ -14585,7 +15428,7 @@ index df3edeaa4e6..bb85e62e94c 100644 else { vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); -@@ -711,6 +1123,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) +@@ -711,6 +1122,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) { e = &signature->elements[i]; @@ -14598,7 +15441,7 @@ index df3edeaa4e6..bb85e62e94c 100644 if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) continue; -@@ -770,9 +1188,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) +@@ -770,9 +1187,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); @@ -14613,7 +15456,7 @@ index df3edeaa4e6..bb85e62e94c 100644 if (gen->descriptor_info->descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); vkd3d_string_buffer_printf(gen->buffer, ");\n"); -@@ -790,6 +1213,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader +@@ -790,6 +1212,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); @@ -14622,7 +15465,7 @@ index df3edeaa4e6..bb85e62e94c 100644 if (gen->program->global_flags) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -@@ -808,6 +1233,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader +@@ -808,6 +1232,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader "void %s_main(thread vkd3d_vec4 *v, " "thread vkd3d_vec4 *o", gen->prefix); @@ -14631,7 +15474,7 @@ index df3edeaa4e6..bb85e62e94c 100644 if (gen->descriptor_info->descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); -@@ -887,7 +1314,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, +@@ -887,7 +1313,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) return ret; @@ -14653,7 +15496,7 @@ index 4a8d0fddae1..d167415c356 100644 #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index bdfd632ad12..efa76983546 100644 +index bdfd632ad12..b1caf61d512 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -34,6 +34,32 @@ @@ -15811,17 +16654,18 @@ index bdfd632ad12..efa76983546 100644 max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -@@ -10743,6 +10923,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10743,6 +10923,10 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count; -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ++ || (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler))) + spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler); -@@ -10823,7 +11006,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10823,7 +11007,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { struct vkd3d_shader_code text; @@ -15832,7 +16676,7 @@ index bdfd632ad12..efa76983546 100644 vkd3d_shader_free_shader_code(spirv); *spirv = text; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 9c41e2c2053..aa666086710 100644 +index 9c41e2c2053..1ecfe32de45 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -21,9 +21,7 @@ @@ -18052,7 +18896,7 @@ index 9c41e2c2053..aa666086710 100644 case VKD3DSIH_DCL_TEMPS: tpf_dcl_temps(tpf, ins->declaration.count); break; -@@ -5702,8 +4013,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5702,8 +4013,34 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); break; @@ -18069,6 +18913,15 @@ index 9c41e2c2053..aa666086710 100644 + break; + case VKD3DSIH_ADD: ++ case VKD3DSIH_ATOMIC_AND: ++ case VKD3DSIH_ATOMIC_CMP_STORE: ++ case VKD3DSIH_ATOMIC_IADD: ++ case VKD3DSIH_ATOMIC_IMAX: ++ case VKD3DSIH_ATOMIC_IMIN: ++ case VKD3DSIH_ATOMIC_UMAX: ++ case VKD3DSIH_ATOMIC_UMIN: ++ case VKD3DSIH_ATOMIC_OR: ++ case VKD3DSIH_ATOMIC_XOR: case VKD3DSIH_AND: + case VKD3DSIH_BREAK: + case VKD3DSIH_CASE: @@ -18078,7 +18931,7 @@ index 9c41e2c2053..aa666086710 100644 case VKD3DSIH_DIV: case VKD3DSIH_DP2: case VKD3DSIH_DP3: -@@ -5714,6 +4042,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5714,6 +4051,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_DSY: case VKD3DSIH_DSY_COARSE: case VKD3DSIH_DSY_FINE: @@ -18089,7 +18942,7 @@ index 9c41e2c2053..aa666086710 100644 case VKD3DSIH_EQO: case VKD3DSIH_EXP: case VKD3DSIH_F16TOF32: -@@ -5721,9 +4053,16 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5721,21 +4062,43 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_FRC: case VKD3DSIH_FTOI: case VKD3DSIH_FTOU: @@ -18106,7 +18959,21 @@ index 9c41e2c2053..aa666086710 100644 case VKD3DSIH_IGE: case VKD3DSIH_ILT: case VKD3DSIH_IMAD: -@@ -5735,7 +4074,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: ++ case VKD3DSIH_IMM_ATOMIC_AND: ++ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_IADD: ++ case VKD3DSIH_IMM_ATOMIC_IMAX: ++ case VKD3DSIH_IMM_ATOMIC_IMIN: ++ case VKD3DSIH_IMM_ATOMIC_UMAX: ++ case VKD3DSIH_IMM_ATOMIC_UMIN: ++ case VKD3DSIH_IMM_ATOMIC_OR: ++ case VKD3DSIH_IMM_ATOMIC_XOR: + case VKD3DSIH_IMUL: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: case VKD3DSIH_ISHL: case VKD3DSIH_ISHR: case VKD3DSIH_ITOF: @@ -18119,7 +18986,7 @@ index 9c41e2c2053..aa666086710 100644 case VKD3DSIH_LTO: case VKD3DSIH_MAD: case VKD3DSIH_MAX: -@@ -5747,14 +4091,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5747,14 +4110,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_NOT: case VKD3DSIH_OR: case VKD3DSIH_RCP: @@ -18145,7 +19012,7 @@ index 9c41e2c2053..aa666086710 100644 case VKD3DSIH_UDIV: case VKD3DSIH_UGE: case VKD3DSIH_ULT: -@@ -5772,102 +4127,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5772,102 +4146,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ } } @@ -18255,7 +19122,7 @@ index 9c41e2c2053..aa666086710 100644 static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { -@@ -5884,101 +4160,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec +@@ -5884,101 +4179,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec tpf->buffer = &buffer; @@ -18371,7 +19238,7 @@ index 9c41e2c2053..aa666086710 100644 /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ -@@ -5993,7 +4213,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) +@@ -5993,7 +4232,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) { struct vkd3d_bytecode_buffer buffer = {0}; const struct sm4_stat *stat = tpf->stat; @@ -18379,7 +19246,7 @@ index 9c41e2c2053..aa666086710 100644 put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); -@@ -6025,7 +4244,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) +@@ -6025,7 +4263,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); put_u32(&buffer, 0); /* Sample frequency */ @@ -18388,7 +19255,7 @@ index 9c41e2c2053..aa666086710 100644 { put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); -@@ -6037,15 +4256,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) +@@ -6037,15 +4275,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); } @@ -18415,7 +19282,7 @@ index 9c41e2c2053..aa666086710 100644 { enum vkd3d_shader_type shader_type = program->shader_version.type; struct tpf_compiler tpf = {0}; -@@ -6053,7 +4276,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, +@@ -6053,7 +4295,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, size_t i; int ret; @@ -18423,7 +19290,7 @@ index 9c41e2c2053..aa666086710 100644 tpf.program = program; tpf.buffer = NULL; tpf.stat = &stat; -@@ -6064,14 +4286,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, +@@ -6064,14 +4305,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); @@ -18734,10 +19601,75 @@ index be7c0b73a22..7e8ec156aad 100644 { unsigned int section_count; diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index a55a97f6f2f..6c7bf167910 100644 +index a55a97f6f2f..ce0c3b9128f 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -1255,6 +1255,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint +@@ -327,9 +327,12 @@ static void *vkd3d_fence_worker_main(void *arg) + struct vkd3d_waiting_fence *old_fences, *cur_fences = NULL; + struct vkd3d_fence_worker *worker = arg; + unsigned int i; ++ bool timeline; + + vkd3d_set_thread_name("vkd3d_fence"); + ++ timeline = worker->device->vk_info.KHR_timeline_semaphore; ++ + for (;;) + { + vkd3d_mutex_lock(&worker->mutex); +@@ -357,7 +360,12 @@ static void *vkd3d_fence_worker_main(void *arg) + vkd3d_mutex_unlock(&worker->mutex); + + for (i = 0; i < cur_fence_count; ++i) +- worker->wait_for_gpu_fence(worker, &cur_fences[i]); ++ { ++ if (timeline) ++ vkd3d_wait_for_gpu_timeline_semaphore(worker, &cur_fences[i]); ++ else ++ vkd3d_wait_for_gpu_fence(worker, &cur_fences[i]); ++ } + } + + vkd3d_free(cur_fences); +@@ -379,9 +387,6 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, + worker->fences = NULL; + worker->fences_size = 0; + +- worker->wait_for_gpu_fence = device->vk_info.KHR_timeline_semaphore +- ? vkd3d_wait_for_gpu_timeline_semaphore : vkd3d_wait_for_gpu_fence; +- + vkd3d_mutex_init(&worker->mutex); + + vkd3d_cond_init(&worker->cond); +@@ -399,6 +404,7 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, + static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, + struct d3d12_device *device) + { ++ unsigned int i; + HRESULT hr; + + TRACE("worker %p.\n", worker); +@@ -416,6 +422,9 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, + vkd3d_mutex_destroy(&worker->mutex); + vkd3d_cond_destroy(&worker->cond); + ++ for (i = 0; i < worker->fence_count; ++i) ++ d3d12_fence_decref(worker->fences[i].fence); ++ + vkd3d_free(worker->fences); + + return S_OK; +@@ -556,7 +565,8 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) + fence->old_vk_fences[i] = VK_NULL_HANDLE; + } + +- d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); ++ if (!device->vk_info.KHR_timeline_semaphore) ++ d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); + + vkd3d_mutex_unlock(&fence->mutex); +@@ -1255,6 +1265,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); } @@ -18812,7 +19744,7 @@ index a55a97f6f2f..6c7bf167910 100644 /* Command buffers */ static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, const char *message, ...) -@@ -1376,18 +1444,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat +@@ -1376,18 +1454,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat return true; } @@ -18831,7 +19763,7 @@ index a55a97f6f2f..6c7bf167910 100644 static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, struct vkd3d_view *view) { -@@ -1426,37 +1482,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all +@@ -1426,37 +1492,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all } static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( @@ -18914,7 +19846,7 @@ index a55a97f6f2f..6c7bf167910 100644 { ERR("Failed to add descriptor pool.\n"); VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL)); -@@ -1466,9 +1556,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( +@@ -1466,9 +1566,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( return vk_pool; } @@ -18927,7 +19859,7 @@ index a55a97f6f2f..6c7bf167910 100644 { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -1478,14 +1568,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( +@@ -1478,14 +1578,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( VkDescriptorSet vk_descriptor_set; VkResult vr; @@ -18947,7 +19879,7 @@ index a55a97f6f2f..6c7bf167910 100644 set_desc.descriptorSetCount = 1; set_desc.pSetLayouts = &vk_set_layout; if (unbounded) -@@ -1499,16 +1590,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( +@@ -1499,16 +1600,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0) return vk_descriptor_set; @@ -18969,7 +19901,7 @@ index a55a97f6f2f..6c7bf167910 100644 if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0) { FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr); -@@ -1534,38 +1626,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic +@@ -1534,38 +1636,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); } @@ -19036,7 +19968,7 @@ index a55a97f6f2f..6c7bf167910 100644 } for (i = 0; i < allocator->transfer_buffer_count; ++i) -@@ -1586,11 +1690,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato +@@ -1586,11 +1700,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato } allocator->view_count = 0; @@ -19050,7 +19982,7 @@ index a55a97f6f2f..6c7bf167910 100644 for (i = 0; i < allocator->framebuffer_count; ++i) { -@@ -1647,6 +1750,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo +@@ -1647,6 +1760,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo { struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); unsigned int refcount = vkd3d_atomic_decrement_u32(&allocator->refcount); @@ -19058,7 +19990,7 @@ index a55a97f6f2f..6c7bf167910 100644 TRACE("%p decreasing refcount to %u.\n", allocator, refcount); -@@ -1664,8 +1768,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo +@@ -1664,8 +1778,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->transfer_buffers); vkd3d_free(allocator->buffer_views); vkd3d_free(allocator->views); @@ -19072,7 +20004,7 @@ index a55a97f6f2f..6c7bf167910 100644 vkd3d_free(allocator->framebuffers); vkd3d_free(allocator->passes); -@@ -1822,6 +1929,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo +@@ -1822,6 +1939,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo struct vkd3d_queue *queue; VkResult vr; HRESULT hr; @@ -19080,7 +20012,7 @@ index a55a97f6f2f..6c7bf167910 100644 if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store))) return hr; -@@ -1851,11 +1959,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo +@@ -1851,11 +1969,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo return hresult_from_vk_result(vr); } @@ -19097,7 +20029,7 @@ index a55a97f6f2f..6c7bf167910 100644 allocator->passes = NULL; allocator->passes_size = 0; -@@ -1865,9 +1974,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo +@@ -1865,9 +1984,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->framebuffers_size = 0; allocator->framebuffer_count = 0; @@ -19112,7 +20044,7 @@ index a55a97f6f2f..6c7bf167910 100644 allocator->views = NULL; allocator->views_size = 0; -@@ -2749,7 +2860,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li +@@ -2749,7 +2870,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li } vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, @@ -19122,7 +20054,7 @@ index a55a97f6f2f..6c7bf167910 100644 bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set; } -@@ -2805,15 +2917,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +@@ -2805,15 +2927,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; } @@ -19140,7 +20072,7 @@ index a55a97f6f2f..6c7bf167910 100644 vk_image_info->sampler = VK_NULL_HANDLE; vk_image_info->imageView = u.view->v.u.vk_image_view; -@@ -2934,10 +3039,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list +@@ -2934,10 +3049,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list } static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write, @@ -19153,7 +20085,7 @@ index a55a97f6f2f..6c7bf167910 100644 switch (root_parameter->parameter_type) { -@@ -2956,6 +3062,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v +@@ -2956,6 +3072,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v } root_descriptor = &root_parameter->u.descriptor; @@ -19161,7 +20093,7 @@ index a55a97f6f2f..6c7bf167910 100644 vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; -@@ -3011,7 +3118,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list +@@ -3011,7 +3128,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list } if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], @@ -19170,7 +20102,7 @@ index a55a97f6f2f..6c7bf167910 100644 continue; ++descriptor_count; -@@ -3039,8 +3146,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma +@@ -3039,8 +3156,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma uav_counter_count = state->uav_counters.binding_count; if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes)))) return; @@ -19181,7 +20113,7 @@ index a55a97f6f2f..6c7bf167910 100644 goto done; for (i = 0; i < uav_counter_count; ++i) -@@ -4612,8 +4719,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, +@@ -4612,8 +4729,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, if (vk_info->KHR_push_descriptor) { @@ -19191,7 +20123,7 @@ index a55a97f6f2f..6c7bf167910 100644 VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); } -@@ -4621,7 +4727,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, +@@ -4621,7 +4737,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, { d3d12_command_list_prepare_descriptors(list, bind_point); vk_write_descriptor_set_from_root_descriptor(&descriptor_write, @@ -19200,7 +20132,7 @@ index a55a97f6f2f..6c7bf167910 100644 VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); -@@ -4685,8 +4791,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li +@@ -4685,8 +4801,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li if (vk_info->KHR_push_descriptor) { @@ -19210,7 +20142,7 @@ index a55a97f6f2f..6c7bf167910 100644 VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); } -@@ -4694,7 +4799,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li +@@ -4694,7 +4809,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li { d3d12_command_list_prepare_descriptors(list, bind_point); vk_write_descriptor_set_from_root_descriptor(&descriptor_write, @@ -19219,7 +20151,7 @@ index a55a97f6f2f..6c7bf167910 100644 VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); -@@ -5371,8 +5476,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, +@@ -5371,8 +5486,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, view->info.texture.vk_view_type, view->format->type, &pipeline); } @@ -19230,8 +20162,73 @@ index a55a97f6f2f..6c7bf167910 100644 { ERR("Failed to allocate descriptor set.\n"); return; +@@ -6345,6 +6460,7 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) + break; + + case VKD3D_CS_OP_SIGNAL: ++ case VKD3D_CS_OP_SIGNAL_ON_CPU: + d3d12_fence_decref(op->u.signal.fence); + break; + +@@ -7335,6 +7451,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + struct vkd3d_cs_op_data *op; + struct d3d12_fence *fence; + unsigned int i; ++ HRESULT hr; + + queue->is_flushing = true; + +@@ -7368,6 +7485,11 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); + break; + ++ case VKD3D_CS_OP_SIGNAL_ON_CPU: ++ if (FAILED(hr = d3d12_fence_Signal(&op->u.signal.fence->ID3D12Fence1_iface, op->u.signal.value))) ++ ERR("Failed to signal fence %p, hr %s.\n", op->u.signal.fence, debugstr_hresult(hr)); ++ break; ++ + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break; +@@ -7510,6 +7632,36 @@ void vkd3d_release_vk_queue(ID3D12CommandQueue *queue) + return vkd3d_queue_release(d3d12_queue->vkd3d_queue); + } + ++HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, uint64_t value) ++{ ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); ++ struct vkd3d_cs_op_data *op; ++ HRESULT hr = S_OK; ++ ++ TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ hr = E_OUTOFMEMORY; ++ goto done; ++ } ++ op->opcode = VKD3D_CS_OP_SIGNAL_ON_CPU; ++ op->u.signal.fence = fence; ++ op->u.signal.value = value; ++ ++ d3d12_fence_incref(fence); ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++done: ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++ return hr; ++} ++ + /* ID3D12CommandSignature */ + static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface) + { diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index e92373a36fa..9aa4adb6c06 100644 +index e92373a36fa..b51e2963efa 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des @@ -19319,6 +20316,149 @@ index e92373a36fa..9aa4adb6c06 100644 if (data->MaxSupportedFeatureLevel < fl && fl <= vulkan_info->max_feature_level) data->MaxSupportedFeatureLevel = fl; } +@@ -3574,12 +3557,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + return E_INVALIDARG; + } + +- if (data->Format == DXGI_FORMAT_UNKNOWN) +- { +- data->PlaneCount = 1; +- return S_OK; +- } +- + if (!(format = vkd3d_get_format(device, data->Format, false))) + format = vkd3d_get_format(device, data->Format, true); + if (!format) +@@ -4385,7 +4362,7 @@ static void d3d12_device_get_resource1_allocation_info(struct d3d12_device *devi + { + desc = &resource_descs[i]; + +- if (FAILED(d3d12_resource_validate_desc(desc, device))) ++ if (FAILED(d3d12_resource_validate_desc(desc, device, 0))) + { + WARN("Invalid resource desc.\n"); + goto invalid; +@@ -4716,10 +4693,11 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + uint64_t base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, + UINT64 *row_sizes, UINT64 *total_bytes) + { +- unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; ++ unsigned int i, sub_resource_idx, plane_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; + const struct vkd3d_format *format; + uint64_t offset, size, total; ++ DXGI_FORMAT plane_format; + + if (layouts) + memset(layouts, 0xff, sizeof(*layouts) * sub_resource_count); +@@ -4730,20 +4708,19 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + if (total_bytes) + *total_bytes = ~(uint64_t)0; + +- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) ++ if (!(format = vkd3d_get_format(device, desc->Format, true))) + { + WARN("Invalid format %#x.\n", desc->Format); + return; + } + +- if (FAILED(d3d12_resource_validate_desc(desc, device))) ++ if (FAILED(d3d12_resource_validate_desc(desc, device, VKD3D_VALIDATE_FORCE_ALLOW_DS))) + { + WARN("Invalid resource desc.\n"); + return; + } + +- plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) +- && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1; ++ plane_count = format->plane_count; + sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc); + + if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count)) +@@ -4754,21 +4731,31 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + + offset = 0; + total = 0; ++ plane_format = desc->Format; + for (i = 0; i < sub_resource_count; ++i) + { + sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane; ++ plane_idx = (first_sub_resource + i) / sub_resources_per_plane; + miplevel_idx = sub_resource_idx % desc->MipLevels; ++ ++ if (plane_count > 1) ++ { ++ plane_format = !plane_idx ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R8_TYPELESS; ++ format = vkd3d_get_format(device, plane_format, true); ++ } ++ + width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width); + height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height); + depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); + row_count = height / format->block_height; + row_size = (width / format->block_width) * format->byte_count * format->block_byte_count; +- row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); ++ /* Direct3D 12 requires double the alignment for dual planes. */ ++ row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count); + + if (layouts) + { + layouts[i].Offset = base_offset + offset; +- layouts[i].Footprint.Format = desc->Format; ++ layouts[i].Footprint.Format = plane_format; + layouts[i].Footprint.Width = width; + layouts[i].Footprint.Height = height; + layouts[i].Footprint.Depth = depth; +@@ -4780,7 +4767,7 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + row_sizes[i] = row_size; + + size = max(0, row_count - 1) * row_pitch + row_size; +- size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + size; ++ size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count) + size; + + total = offset + size; + offset = align(total, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index 1f7d90eb95f..eab97715944 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -951,7 +951,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + HRESULT hr; + + VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); +- VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); ++ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device, 0) == S_OK); + + if (!desc->MipLevels) + { +@@ -1847,7 +1847,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 + return true; + } + +-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) ++HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags) + { + const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; + const struct vkd3d_format *format; +@@ -1893,7 +1893,8 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 + return E_INVALIDARG; + } + +- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) ++ if (!(format = vkd3d_get_format(device, desc->Format, ++ desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL || flags & VKD3D_VALIDATE_FORCE_ALLOW_DS))) + { + WARN("Invalid format %#x.\n", desc->Format); + return E_INVALIDARG; +@@ -2013,7 +2014,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + resource->gpu_address = 0; + resource->flags = 0; + +- if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device))) ++ if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device, 0))) + return hr; + + resource->format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0); diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 2b0f81d3812..bd3c3758ecb 100644 --- a/libs/vkd3d/libs/vkd3d/state.c @@ -19778,11 +20918,24 @@ index 2b0f81d3812..bd3c3758ecb 100644 if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) +diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c +index 839bb173854..c2832a61f67 100644 +--- a/libs/vkd3d/libs/vkd3d/utils.c ++++ b/libs/vkd3d/libs/vkd3d/utils.c +@@ -29,7 +29,7 @@ + #define UINT VKD3D_FORMAT_TYPE_UINT + static const struct vkd3d_format vkd3d_formats[] = + { +- {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1}, ++ {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1, 0, 1}, + {DXGI_FORMAT_R32G32B32A32_TYPELESS, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R32G32B32A32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32B32A32_UINT, 16, 1, 1, 1, COLOR, 1, UINT}, diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 97a99782d6a..8488d5db3fa 100644 +index 97a99782d6a..fd1fbb1679a 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -58,12 +58,17 @@ +@@ -58,12 +58,19 @@ #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u #define VKD3D_MAX_DESCRIPTOR_SETS 64u @@ -19800,9 +20953,29 @@ index 97a99782d6a..8488d5db3fa 100644 +#define VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE 1024u + +#define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) ++ ++#define VKD3D_VALIDATE_FORCE_ALLOW_DS 0x1u extern uint64_t object_global_serial_id; +@@ -235,8 +242,6 @@ struct vkd3d_fence_worker + struct vkd3d_waiting_fence *fences; + size_t fences_size; + +- void (*wait_for_gpu_fence)(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *enqueued_fence); +- + struct vkd3d_queue *queue; + struct d3d12_device *device; + }; +@@ -529,7 +534,7 @@ struct vkd3d_resource_allocation_info + }; + + bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); +-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device); ++HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags); + void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, + UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, @@ -770,6 +775,25 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc); @@ -19901,7 +21074,15 @@ index 97a99782d6a..8488d5db3fa 100644 struct vkd3d_view **views; size_t views_size; -@@ -1516,8 +1554,6 @@ struct vkd3d_desc_object_cache +@@ -1324,6 +1362,7 @@ enum vkd3d_cs_op + { + VKD3D_CS_OP_WAIT, + VKD3D_CS_OP_SIGNAL, ++ VKD3D_CS_OP_SIGNAL_ON_CPU, + VKD3D_CS_OP_EXECUTE, + VKD3D_CS_OP_UPDATE_MAPPINGS, + VKD3D_CS_OP_COPY_MAPPINGS, +@@ -1516,8 +1555,6 @@ struct vkd3d_desc_object_cache size_t size; }; @@ -19910,7 +21091,7 @@ index 97a99782d6a..8488d5db3fa 100644 /* ID3D12Device */ struct d3d12_device { -@@ -1536,8 +1572,7 @@ struct d3d12_device +@@ -1536,8 +1573,7 @@ struct d3d12_device struct vkd3d_desc_object_cache view_desc_cache; struct vkd3d_desc_object_cache cbuffer_desc_cache; diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-5bfcd811824e9ca03c09a54204bff645225.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-5bfcd811824e9ca03c09a54204bff645225.patch new file mode 100644 index 00000000..e0323d7b --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-5bfcd811824e9ca03c09a54204bff645225.patch @@ -0,0 +1,570 @@ +From 220f08317515ea0cab6fa5a2eeaf4835486a6495 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 23 Jan 2025 07:16:22 +1100 +Subject: [PATCH] Updated vkd3d to 5bfcd811824e9ca03c09a54204bff645225c3408. + +--- + libs/vkd3d/libs/vkd3d-common/debug.c | 13 ++ + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 232 +++++++++----------- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 86 ++++++++ + libs/vkd3d/libs/vkd3d-shader/spirv.c | 18 +- + 4 files changed, 214 insertions(+), 135 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c +index 9a92f0ead02..32862024b90 100644 +--- a/libs/vkd3d/libs/vkd3d-common/debug.c ++++ b/libs/vkd3d/libs/vkd3d-common/debug.c +@@ -97,6 +97,17 @@ static void vkd3d_dbg_output(const char *fmt, ...) + va_end(args); + } + ++#if HAVE_PTHREAD_THREADID_NP ++static uint64_t get_pthread_threadid(void) ++{ ++ uint64_t thread_id; ++ ++ pthread_threadid_np(NULL, &thread_id); ++ ++ return thread_id; ++} ++#endif ++ + void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) + { + va_list args; +@@ -108,6 +119,8 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch + vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); + #elif HAVE_GETTID + vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); ++#elif HAVE_PTHREAD_THREADID_NP ++ vkd3d_dbg_output("vkd3d:%"PRIu64":%s:%s ", get_pthread_threadid(), debug_level_names[level], function); + #else + vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); + #endif +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index da2f482b148..7dbb051f913 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -359,79 +359,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl + if (src_type->class == HLSL_CLASS_NULL) + return node; + +- if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) +- { +- unsigned int src_comp_count = hlsl_type_component_count(src_type); +- unsigned int dst_comp_count = hlsl_type_component_count(dst_type); +- struct hlsl_deref var_deref; +- bool broadcast, matrix_cast; +- struct hlsl_ir_load *load; +- struct hlsl_ir_var *var; +- unsigned int dst_idx; +- +- broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1; +- matrix_cast = !broadcast && dst_comp_count != src_comp_count +- && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; +- VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); +- if (matrix_cast) +- { +- VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx); +- VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy); +- } +- +- if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) +- return NULL; +- hlsl_init_simple_deref_from_var(&var_deref, var); +- +- for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) +- { +- struct hlsl_ir_node *component_load; +- struct hlsl_type *dst_comp_type; +- struct hlsl_block store_block; +- unsigned int src_idx; +- +- if (broadcast) +- { +- src_idx = 0; +- } +- else if (matrix_cast) +- { +- unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx; +- +- src_idx = y * src_type->e.numeric.dimx + x; +- } +- else +- { +- src_idx = dst_idx; +- } +- +- dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); +- +- if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) +- return NULL; +- +- if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) +- return NULL; +- hlsl_block_add_instr(block, cast); +- +- if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) +- return NULL; +- hlsl_block_add_block(block, &store_block); +- } +- +- if (!(load = hlsl_new_var_load(ctx, var, loc))) +- return NULL; +- hlsl_block_add_instr(block, &load->node); ++ if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) ++ return NULL; ++ hlsl_block_add_instr(block, cast); + +- return &load->node; +- } +- else +- { +- if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) +- return NULL; +- hlsl_block_add_instr(block, cast); +- return cast; +- } ++ return cast; + } + + static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -2167,11 +2099,12 @@ static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle, + } + + static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, +- enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) ++ enum parse_assign_op assign_op, struct hlsl_ir_node *rhs, bool is_function_out_arg) + { + struct hlsl_type *lhs_type = lhs->data_type; + unsigned int writemask = 0, width = 0; + bool matrix_writemask = false; ++ bool first_cast = true; + + if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR) + { +@@ -2196,8 +2129,10 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + + if (hlsl_is_numeric_type(lhs_type)) + { +- writemask = (1 << lhs_type->e.numeric.dimx) - 1; +- width = lhs_type->e.numeric.dimx; ++ unsigned int size = hlsl_type_component_count(lhs_type); ++ ++ writemask = (1 << size) - 1; ++ width = size; + } + + if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) +@@ -2207,8 +2142,26 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + { + if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) + { +- hlsl_fixme(ctx, &lhs->loc, "Cast on the LHS."); +- return false; ++ struct hlsl_ir_node *cast = lhs; ++ lhs = hlsl_ir_expr(cast)->operands[0].node; ++ ++ if (hlsl_type_component_count(lhs->data_type) != hlsl_type_component_count(cast->data_type)) ++ { ++ hlsl_fixme(ctx, &cast->loc, "Size change on the LHS."); ++ return false; ++ } ++ if (hlsl_version_ge(ctx, 4, 0) && (!is_function_out_arg || !first_cast)) ++ { ++ hlsl_error(ctx, &cast->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, ++ "Base type casts are not allowed on the LHS for profiles >= 4."); ++ return false; ++ } ++ ++ lhs_type = lhs->data_type; ++ if (lhs_type->class == HLSL_CLASS_VECTOR || (lhs_type->class == HLSL_CLASS_MATRIX && matrix_writemask)) ++ lhs_type = hlsl_get_vector_type(ctx, lhs->data_type->e.numeric.type, width); ++ ++ first_cast = false; + } + else if (lhs->type == HLSL_IR_SWIZZLE) + { +@@ -2249,6 +2202,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + hlsl_block_add_instr(block, new_swizzle); + + lhs = swizzle->val.node; ++ lhs_type = hlsl_get_vector_type(ctx, lhs_type->e.numeric.type, width); + rhs = new_swizzle; + } + else +@@ -2258,6 +2212,12 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + } + } + ++ /* lhs casts could have resulted in a discrepancy between the ++ * rhs->data_type and the type of the variable that will be ulimately ++ * stored to. This is corrected. */ ++ if (!(rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc))) ++ return false; ++ + if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) + { + struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; +@@ -2413,7 +2373,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d + return false; + hlsl_block_add_instr(block, one); + +- if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) ++ if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one, false)) + return false; + + if (post) +@@ -3047,80 +3007,87 @@ static unsigned int hlsl_base_type_width(enum hlsl_base_type t) + return 0; + } + +-static int function_parameter_compare(const struct hlsl_ir_var *candidate, +- const struct hlsl_ir_var *ref, const struct hlsl_ir_node *arg) ++static uint32_t get_argument_conversion_mask(const struct hlsl_ir_var *parameter, const struct hlsl_ir_node *arg) + { ++ enum ++ { ++ COMPONENT_COUNT_WIDENING = 1u << 0, ++ COMPONENT_TYPE_NARROWING = 1u << 1, ++ COMPONENT_TYPE_MISMATCH = 1u << 2, ++ COMPONENT_TYPE_CLASS_MISMATCH = 1u << 3, ++ COMPONENT_COUNT_NARROWING = 1u << 4, ++ }; + struct + { + enum hlsl_base_type type; + enum hlsl_base_type class; + unsigned int count, width; +- } c, r, a; +- int ret; ++ } p, a; ++ uint32_t mask = 0; + + /* TODO: Non-numeric types. */ + if (!hlsl_is_numeric_type(arg->data_type)) + return 0; + +- c.type = candidate->data_type->e.numeric.type; +- c.class = hlsl_base_type_class(c.type); +- c.count = hlsl_type_component_count(candidate->data_type); +- c.width = hlsl_base_type_width(c.type); +- +- r.type = ref->data_type->e.numeric.type; +- r.class = hlsl_base_type_class(r.type); +- r.count = hlsl_type_component_count(ref->data_type); +- r.width = hlsl_base_type_width(r.type); ++ p.type = parameter->data_type->e.numeric.type; ++ p.class = hlsl_base_type_class(p.type); ++ p.count = hlsl_type_component_count(parameter->data_type); ++ p.width = hlsl_base_type_width(p.type); + + a.type = arg->data_type->e.numeric.type; + a.class = hlsl_base_type_class(a.type); + a.count = hlsl_type_component_count(arg->data_type); + a.width = hlsl_base_type_width(a.type); + +- /* Prefer candidates without component count narrowing. E.g., given an +- * float4 argument, half4 is a better match than float2. */ +- if ((ret = (a.count > r.count) - (a.count > c.count))) +- return ret; +- +- /* Prefer candidates with matching component type classes. E.g., given a +- * float argument, double is a better match than int. */ +- if ((ret = (a.class == c.class) - (a.class == r.class))) +- return ret; +- +- /* Prefer candidates with matching component types. E.g., given an int +- * argument, int4 is a better match than uint4. */ +- if ((ret = (a.type == c.type) - (a.type == r.type))) +- return ret; +- +- /* Prefer candidates without component type narrowing. E.g., given a float +- * argument, double is a better match than half. */ +- if ((ret = (a.width > r.width) - (a.width > c.width))) +- return ret; +- +- /* Prefer candidates without component count widening. E.g. given a float +- * argument, float is a better match than float2. */ +- return (a.count < r.count) - (a.count < c.count); ++ /* Component count narrowing. E.g., passing a float4 argument to a float2 ++ * or int2 parameter. */ ++ if (a.count > p.count) ++ mask |= COMPONENT_COUNT_NARROWING; ++ /* Different component type classes. E.g., passing an int argument to a ++ * float parameter. */ ++ if (a.class != p.class) ++ mask |= COMPONENT_TYPE_CLASS_MISMATCH; ++ /* Different component types. E.g., passing an int argument to an uint ++ * parameter. */ ++ if (a.type != p.type) ++ mask |= COMPONENT_TYPE_MISMATCH; ++ /* Component type narrowing. E.g., passing a float argument to a half ++ * parameter. */ ++ if (a.width > p.width) ++ mask |= COMPONENT_TYPE_NARROWING; ++ /* Component count widening. E.g., passing an int2 argument to an int4 ++ * parameter. */ ++ if (a.count < p.count) ++ mask |= COMPONENT_COUNT_WIDENING; ++ ++ return mask; + } + + static int function_compare(const struct hlsl_ir_function_decl *candidate, + const struct hlsl_ir_function_decl *ref, const struct parse_initializer *args) + { ++ uint32_t candidate_mask = 0, ref_mask = 0, c, r; + bool any_worse = false, any_better = false; + unsigned int i; + int ret; + + for (i = 0; i < args->args_count; ++i) + { +- ret = function_parameter_compare(candidate->parameters.vars[i], ref->parameters.vars[i], args->args[i]); +- if (ret < 0) ++ candidate_mask |= (c = get_argument_conversion_mask(candidate->parameters.vars[i], args->args[i])); ++ ref_mask |= (r = get_argument_conversion_mask(ref->parameters.vars[i], args->args[i])); ++ ++ if (c > r) + any_worse = true; +- else if (ret > 0) ++ else if (c < r) + any_better = true; + } + + /* We consider a candidate better if at least one parameter is a better + * match, and none are a worse match. */ +- return any_better - any_worse; ++ if ((ret = any_better - any_worse)) ++ return ret; ++ /* Otherwise, consider the kind of conversions across all parameters. */ ++ return vkd3d_u32_compare(ref_mask, candidate_mask); + } + + static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, +@@ -3228,20 +3195,19 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, + break; + arg = args->args[k]; + +- if (!hlsl_types_are_equal(arg->data_type, param->data_type)) +- { +- struct hlsl_ir_node *cast; +- +- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) +- return NULL; +- args->args[k] = cast; +- arg = cast; +- } +- + if (param->storage_modifiers & HLSL_STORAGE_IN) + { + struct hlsl_ir_node *store; + ++ if (!hlsl_types_are_equal(arg->data_type, param->data_type)) ++ { ++ struct hlsl_ir_node *cast; ++ ++ if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) ++ return NULL; ++ arg = cast; ++ } ++ + if (!(store = hlsl_new_simple_store(ctx, param, arg))) + return NULL; + hlsl_block_add_instr(args->instrs, store); +@@ -3309,7 +3275,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, + return NULL; + hlsl_block_add_instr(args->instrs, &load->node); + +- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) ++ if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node, true)) + return NULL; + } + } +@@ -5494,7 +5460,7 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op + hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Output argument to '%s' is const.", name); + +- if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked)) ++ if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked, true)) + return false; + } + +@@ -6431,7 +6397,7 @@ static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_bloc + if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) + return false; + +- if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) ++ if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load, false)) + return false; + + return true; +@@ -6587,7 +6553,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc + return false; + hlsl_block_add_instr(block, sample_info); + +- if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) ++ if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info, false)) + return false; + } + +@@ -9876,7 +9842,7 @@ assignment_expr: + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); + hlsl_block_add_block($3, $1); + destroy_block($1); +- if (!add_assignment(ctx, $3, lhs, $2, rhs)) ++ if (!add_assignment(ctx, $3, lhs, $2, rhs, false)) + YYABORT; + $$ = $3; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 8d817b051ce..20a4a3eaff5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -1075,6 +1075,90 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h + return &coords_load->node; + } + ++static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ unsigned int src_comp_count, dst_comp_count; ++ struct hlsl_type *src_type, *dst_type; ++ struct hlsl_deref var_deref; ++ bool broadcast, matrix_cast; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_node *arg; ++ struct hlsl_ir_var *var; ++ unsigned int dst_idx; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ ++ if (hlsl_ir_expr(instr)->op != HLSL_OP1_CAST) ++ return false; ++ ++ arg = hlsl_ir_expr(instr)->operands[0].node; ++ dst_type = instr->data_type; ++ src_type = arg->data_type; ++ ++ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR) ++ return false; ++ ++ src_comp_count = hlsl_type_component_count(src_type); ++ dst_comp_count = hlsl_type_component_count(dst_type); ++ broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1; ++ matrix_cast = !broadcast && dst_comp_count != src_comp_count ++ && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; ++ ++ VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); ++ if (matrix_cast) ++ { ++ VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx); ++ VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy); ++ } ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, &instr->loc))) ++ return false; ++ hlsl_init_simple_deref_from_var(&var_deref, var); ++ ++ for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) ++ { ++ struct hlsl_ir_node *component_load, *cast; ++ struct hlsl_type *dst_comp_type; ++ struct hlsl_block store_block; ++ unsigned int src_idx; ++ ++ if (broadcast) ++ { ++ src_idx = 0; ++ } ++ else if (matrix_cast) ++ { ++ unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx; ++ ++ src_idx = y * src_type->e.numeric.dimx + x; ++ } ++ else ++ { ++ src_idx = dst_idx; ++ } ++ ++ dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); ++ ++ if (!(component_load = hlsl_add_load_component(ctx, block, arg, src_idx, &arg->loc))) ++ return false; ++ ++ if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, &arg->loc))) ++ return false; ++ hlsl_block_add_instr(block, cast); ++ ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) ++ return false; ++ hlsl_block_add_block(block, &store_block); ++ } ++ ++ if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, &load->node); ++ ++ return true; ++} ++ + /* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that + * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of + * an assignment or as a value made from different components of the matrix. The former cases should +@@ -6790,6 +6874,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + { + bool progress; + ++ lower_ir(ctx, lower_complex_casts, body); + lower_ir(ctx, lower_matrix_swizzles, body); + + lower_ir(ctx, lower_broadcasts, body); +@@ -12084,6 +12169,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, + + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); + ++ lower_ir(ctx, lower_complex_casts, body); + lower_ir(ctx, lower_matrix_swizzles, body); + lower_ir(ctx, lower_index_loads, body); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index b1caf61d512..5be3110ed6d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -8193,6 +8193,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src0_id, src1_id, type_id, result_id; ++ uint32_t write_mask = dst->write_mask; + unsigned int component_count; + SpvOp op; + +@@ -8223,8 +8224,21 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + + component_count = vsir_write_mask_component_count(dst->write_mask); + +- src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); +- src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); ++ switch (instruction->opcode) ++ { ++ case VKD3DSIH_DEQO: ++ case VKD3DSIH_DGEO: ++ case VKD3DSIH_DLT: ++ case VKD3DSIH_DNE: ++ write_mask = vkd3d_write_mask_from_component_count(component_count); ++ break; ++ ++ default: ++ break; ++ } ++ ++ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], write_mask); ++ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], write_mask); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch deleted file mode 100644 index 667bef7e..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch +++ /dev/null @@ -1,340 +0,0 @@ -From c3d5c3b03aa6a746797f6e1debf17f0978ed68c0 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sat, 18 Jan 2025 16:15:28 +1100 -Subject: [PATCH] Updated vkd3d to a082daeb56c239b41d67b5df5abceb342c0b32b9. - ---- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 14 ++ - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 198 +++++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 1 - - 5 files changed, 207 insertions(+), 8 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 858186a1071..23f54d3edec 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -3238,6 +3238,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_F32TOF16] = "f32tof16", - [HLSL_OP1_FLOOR] = "floor", - [HLSL_OP1_FRACT] = "fract", -+ [HLSL_OP1_ISINF] = "isinf", - [HLSL_OP1_LOG2] = "log2", - [HLSL_OP1_LOGIC_NOT] = "!", - [HLSL_OP1_NEG] = "-", -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index d712a325322..4d78dbebb34 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -704,6 +704,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_F32TOF16, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, -+ HLSL_OP1_ISINF, - HLSL_OP1_LOG2, - HLSL_OP1_LOGIC_NOT, - HLSL_OP1_NEG, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index e6eaac78994..e5a03067d16 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -4313,6 +4313,19 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, - return !!add_user_call(ctx, func, params, false, loc); - } - -+static bool intrinsic_isinf(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *type = params->args[0]->data_type, *bool_type; -+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; -+ -+ bool_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, -+ type->e.numeric.dimx, type->e.numeric.dimy); -+ -+ args[0] = params->args[0]; -+ return !!add_expr(ctx, params->instrs, HLSL_OP1_ISINF, args, bool_type, loc); -+} -+ - static bool intrinsic_ldexp(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -5410,6 +5423,7 @@ intrinsic_functions[] = - {"fmod", 2, true, intrinsic_fmod}, - {"frac", 1, true, intrinsic_frac}, - {"fwidth", 1, true, intrinsic_fwidth}, -+ {"isinf", 1, true, intrinsic_isinf}, - {"ldexp", 2, true, intrinsic_ldexp}, - {"length", 1, true, intrinsic_length}, - {"lerp", 3, true, intrinsic_lerp}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index c666599b342..cef6a87c8b6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -2881,6 +2881,7 @@ static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - load = hlsl_ir_resource_load(instr); - - if (load->load_type != HLSL_RESOURCE_SAMPLE -+ && load->load_type != HLSL_RESOURCE_SAMPLE_GRAD - && load->load_type != HLSL_RESOURCE_SAMPLE_LOD - && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) - return false; -@@ -2908,6 +2909,13 @@ static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - return false; - vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name); - -+ if (load->texel_offset.node) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "Texel offsets are not supported on profiles lower than 4.0.\n"); -+ return false; -+ } -+ - TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer)); - - if (!(var = hlsl_get_var(ctx->globals, name->buffer))) -@@ -3099,11 +3107,24 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) - return false; - } - --/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ -+/* Turn CAST to int or uint as follows: -+ * -+ * CAST(x) = x - FRACT(x) + extra -+ * -+ * where -+ * -+ * extra = FRACT(x) > 0 && x < 0 -+ * -+ * where the comparisons in the extra term are performed using CMP or SLT -+ * depending on whether this is a pixel or vertex shader, respectively. -+ * -+ * A REINTERPET (which is written as a mere MOV) is also applied to the final -+ * result for type consistency. -+ */ - static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -- struct hlsl_ir_node *arg, *floor, *res; -+ struct hlsl_ir_node *arg, *res; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) -@@ -3118,12 +3139,83 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) - return false; - -- if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, floor); -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ { -+ struct hlsl_ir_node *fract, *neg_fract, *has_fract, *floor, *extra, *zero, *one; -+ struct hlsl_constant_value zero_value, one_value; -+ -+ memset(&zero_value, 0, sizeof(zero_value)); -+ if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, fract); -+ -+ if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg_fract); -+ -+ if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one))) -+ return false; -+ hlsl_block_add_instr(block, has_fract); -+ -+ if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract))) -+ return false; -+ hlsl_block_add_instr(block, extra); -+ -+ if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) -+ return false; -+ hlsl_block_add_instr(block, floor); -+ -+ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, floor, extra))) -+ return false; -+ hlsl_block_add_instr(block, res); -+ } -+ else -+ { -+ struct hlsl_ir_node *neg_arg, *is_neg, *fract, *neg_fract, *has_fract, *floor; -+ -+ if (!(neg_arg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg_arg); -+ -+ if (!(is_neg = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg, neg_arg))) -+ return false; -+ hlsl_block_add_instr(block, is_neg); -+ -+ if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, fract); -+ -+ if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg_fract); -+ -+ if (!(has_fract = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg_fract, fract))) -+ return false; -+ hlsl_block_add_instr(block, has_fract); -+ -+ if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) -+ return false; -+ hlsl_block_add_instr(block, floor); -+ -+ if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) -+ return false; -+ hlsl_block_add_instr(block, res); -+ } - - memset(operands, 0, sizeof(operands)); -- operands[0] = floor; -+ operands[0] = res; - if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, res); -@@ -6977,7 +7069,8 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, - break; - - case HLSL_SAMPLER_DIM_GENERIC: -- /* These can appear in sm4-style combined sample instructions. */ -+ /* These can appear in sm4-style separate sample -+ * instructions that haven't been lowered. */ - hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); - continue; - -@@ -11732,6 +11825,95 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru - return true; - } - -+static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *call, *rhs, *store; -+ struct hlsl_ir_function_decl *func; -+ unsigned int component_count; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_ir_var *lhs; -+ const char *template; -+ char *body; -+ -+ static const char template_sm2[] = -+ "typedef bool%u boolX;\n" -+ "typedef float%u floatX;\n" -+ "boolX isinf(floatX x)\n" -+ "{\n" -+ " floatX v = 1 / x;\n" -+ " v = v * v;\n" -+ " return v <= 0;\n" -+ "}\n"; -+ -+ static const char template_sm3[] = -+ "typedef bool%u boolX;\n" -+ "typedef float%u floatX;\n" -+ "boolX isinf(floatX x)\n" -+ "{\n" -+ " floatX v = 1 / x;\n" -+ " return v <= 0;\n" -+ "}\n"; -+ -+ static const char template_sm4[] = -+ "typedef bool%u boolX;\n" -+ "typedef float%u floatX;\n" -+ "boolX isinf(floatX x)\n" -+ "{\n" -+ " return (asuint(x) & 0x7fffffff) == 0x7f800000;\n" -+ "}\n"; -+ -+ static const char template_int[] = -+ "typedef bool%u boolX;\n" -+ "typedef float%u floatX;\n" -+ "boolX isinf(floatX x)\n" -+ "{\n" -+ " return false;\n" -+ "}"; -+ -+ if (node->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(node); -+ -+ if (expr->op != HLSL_OP1_ISINF) -+ return false; -+ -+ rhs = expr->operands[0].node; -+ -+ if (hlsl_version_lt(ctx, 3, 0)) -+ template = template_sm2; -+ else if (hlsl_version_lt(ctx, 4, 0)) -+ template = template_sm3; -+ else if (type_is_integer(rhs->data_type)) -+ template = template_int; -+ else -+ template = template_sm4; -+ -+ component_count = hlsl_type_component_count(rhs->data_type); -+ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) -+ return false; -+ -+ if (!(func = hlsl_compile_internal_function(ctx, "isinf", body))) -+ return false; -+ -+ lhs = func->parameters.vars[0]; -+ -+ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ if (!(call = hlsl_new_call(ctx, func, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, call); -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} -+ - static void process_entry_function(struct hlsl_ctx *ctx, - const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) - { -@@ -11765,6 +11947,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, - lower_ir(ctx, lower_f32tof16, body); - } - -+ lower_ir(ctx, lower_isinf, body); -+ - lower_return(ctx, entry_func, body, false); - - while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index bb85e62e94c..e783128e236 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -198,7 +198,6 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", - reg->u.immconst_u32[0], reg->u.immconst_u32[1], - reg->u.immconst_u32[2], reg->u.immconst_u32[3]); -- vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); - break; - case VKD3D_DATA_FLOAT: - vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", --- -2.45.2 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch deleted file mode 100644 index 0cdbc9c1..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch +++ /dev/null @@ -1,1119 +0,0 @@ -From 1b071c5cb4a8559f992c0a9cb46b784abe640cb5 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 22 Jan 2025 07:08:19 +1100 -Subject: [PATCH] Updated vkd3d to 5b2d62e59a6365e32aac3fa37fe16ab3582deae4. - ---- - libs/vkd3d/include/vkd3d.h | 29 ++- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 4 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 90 +++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 36 ++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 194 ++++++++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 131 +++++++++++++ - libs/vkd3d/libs/vkd3d-shader/spirv.c | 3 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 19 ++ - libs/vkd3d/libs/vkd3d/command.c | 57 +++++- - libs/vkd3d/libs/vkd3d/device.c | 34 ++-- - libs/vkd3d/libs/vkd3d/resource.c | 9 +- - libs/vkd3d/libs/vkd3d/utils.c | 2 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 7 +- - 13 files changed, 581 insertions(+), 34 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index b18fd14f4c3..2376693421c 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -411,9 +411,13 @@ VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); - * the Vulkan driver as being submitted before other work submitted - * though the Direct3D 12 API. If this is not desired, it is - * recommended to synchronize work submission using an ID3D12Fence -- * object, by submitting to the queue a signal operation after all the -- * Direct3D 12 work is submitted and waiting for it before calling -- * vkd3d_acquire_vk_queue(). -+ * object: -+ * 1. submit work through the Direct3D 12 API; -+ * 2. call vkd3d_queue_signal_on_cpu(); -+ * 3. wait for the fence to be signalled; -+ * 4. call vkd3d_acquire_vk_queue(); it is guaranteed that all work submitted -+ * at point 1 has already been submitted to Vulkan (though not necessarily -+ * executed). - * - * \since 1.0 - */ -@@ -466,6 +470,21 @@ VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void - */ - VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback); - -+/** -+ * Signal a fence on the CPU once all the currently outstanding queue work is -+ * submitted to Vulkan. -+ * -+ * The fence will be signalled on the CPU (as if ID3D12Fence_Signal() was -+ * called) once all the work submitted through the Direct3D 12 API before -+ * vkd3d_queue_signal_on_cpu() is called has left the internal queue and has -+ * been submitted to the underlying Vulkan queue. Read the documentation for -+ * vkd3d_acquire_vk_queue() for more details. -+ * -+ * \since 1.15 -+ */ -+VKD3D_API HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *queue, -+ ID3D12Fence *fence, uint64_t value); -+ - #endif /* VKD3D_NO_PROTOTYPES */ - - /* -@@ -512,6 +531,10 @@ typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const - /** Type of vkd3d_set_log_callback(). \since 1.4 */ - typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback); - -+/** Type of vkd3d_queue_signal_on_cpu(). \since 1.15 */ -+typedef HRESULT (*PFN_vkd3d_queue_signal_on_cpu)(ID3D12CommandQueue *queue, -+ ID3D12Fence *fence, uint64_t value); -+ - #ifdef __cplusplus - } - #endif /* __cplusplus */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 69e14e0c7bf..0639da83aa6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -1180,8 +1180,8 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - bool is_sm_5_1 = vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1); - - if (reg->idx[0].rel_addr || reg->type == VKD3DSPR_IMMCONSTBUFFER -- || reg->type == VKD3DSPR_INCONTROLPOINT || (reg->type == VKD3DSPR_INPUT -- && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY -+ || reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT -+ || (reg->type == VKD3DSPR_INPUT && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY - || compiler->shader_version.type == VKD3D_SHADER_TYPE_HULL))) - { - vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 23f54d3edec..48d9d4e0023 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2031,6 +2031,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch - return &constant->node; - } - -+struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, -+ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, -+ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_interlocked *interlocked; -+ -+ if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked)))) -+ return NULL; -+ -+ init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc); -+ interlocked->op = op; -+ hlsl_copy_deref(ctx, &interlocked->dst, dst); -+ hlsl_src_from_node(&interlocked->coords, coords); -+ hlsl_src_from_node(&interlocked->cmp_value, cmp_value); -+ hlsl_src_from_node(&interlocked->value, value); -+ -+ return &interlocked->node; -+} -+ - bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) - { - struct hlsl_type *type = index->val.node->data_type; -@@ -2375,6 +2394,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr - return dst; - } - -+static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, -+ struct clone_instr_map *map, struct hlsl_ir_interlocked *src) -+{ -+ struct hlsl_ir_interlocked *dst; -+ -+ if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) -+ return NULL; -+ init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc); -+ dst->op = src->op; -+ -+ if (!clone_deref(ctx, map, &dst->dst, &src->dst)) -+ { -+ vkd3d_free(dst); -+ return NULL; -+ } -+ clone_src(map, &dst->coords, &src->coords); -+ clone_src(map, &dst->cmp_value, &src->cmp_value); -+ clone_src(map, &dst->value, &src->value); -+ return &dst->node; -+} -+ - static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, - struct clone_instr_map *map, struct hlsl_ir_compile *compile) - { -@@ -2575,6 +2615,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - case HLSL_IR_SWIZZLE: - return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); - -+ case HLSL_IR_INTERLOCKED: -+ return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); -+ - case HLSL_IR_COMPILE: - return clone_compile(ctx, map, hlsl_ir_compile(instr)); - -@@ -3013,6 +3056,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - [HLSL_IR_STORE ] = "HLSL_IR_STORE", - [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", - [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", -+ [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED", - - [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", - [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", -@@ -3458,6 +3502,35 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ - vkd3d_string_buffer_printf(buffer, "]"); - } - -+static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked) -+{ -+ static const char *const op_names[] = -+ { -+ [HLSL_INTERLOCKED_ADD] = "add", -+ [HLSL_INTERLOCKED_AND] = "and", -+ [HLSL_INTERLOCKED_CMP_EXCH] = "cmp_exch", -+ [HLSL_INTERLOCKED_EXCH] = "exch", -+ [HLSL_INTERLOCKED_MAX] = "max", -+ [HLSL_INTERLOCKED_MIN] = "min", -+ [HLSL_INTERLOCKED_OR] = "or", -+ [HLSL_INTERLOCKED_XOR] = "xor", -+ }; -+ -+ VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); -+ vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]); -+ dump_deref(buffer, &interlocked->dst); -+ vkd3d_string_buffer_printf(buffer, ", coords = "); -+ dump_src(buffer, &interlocked->coords); -+ if (interlocked->cmp_value.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", cmp_value = "); -+ dump_src(buffer, &interlocked->cmp_value); -+ } -+ vkd3d_string_buffer_printf(buffer, ", value = "); -+ dump_src(buffer, &interlocked->value); -+ vkd3d_string_buffer_printf(buffer, ")"); -+} -+ - static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - const struct hlsl_ir_compile *compile) - { -@@ -3591,6 +3664,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); - break; - -+ case HLSL_IR_INTERLOCKED: -+ dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); -+ break; -+ - case HLSL_IR_COMPILE: - dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); - break; -@@ -3819,6 +3896,15 @@ static void free_ir_index(struct hlsl_ir_index *index) - vkd3d_free(index); - } - -+static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) -+{ -+ hlsl_cleanup_deref(&interlocked->dst); -+ hlsl_src_remove(&interlocked->coords); -+ hlsl_src_remove(&interlocked->cmp_value); -+ hlsl_src_remove(&interlocked->value); -+ vkd3d_free(interlocked); -+} -+ - static void free_ir_compile(struct hlsl_ir_compile *compile) - { - unsigned int i; -@@ -3905,6 +3991,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - free_ir_switch(hlsl_ir_switch(node)); - break; - -+ case HLSL_IR_INTERLOCKED: -+ free_ir_interlocked(hlsl_ir_interlocked(node)); -+ break; -+ - case HLSL_IR_COMPILE: - free_ir_compile(hlsl_ir_compile(node)); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 4d78dbebb34..e9845f8f887 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -319,6 +319,7 @@ enum hlsl_ir_node_type - HLSL_IR_STORE, - HLSL_IR_SWIZZLE, - HLSL_IR_SWITCH, -+ HLSL_IR_INTERLOCKED, - - HLSL_IR_COMPILE, - HLSL_IR_SAMPLER_STATE, -@@ -950,6 +951,32 @@ struct hlsl_ir_stateblock_constant - char *name; - }; - -+enum hlsl_interlocked_op -+{ -+ HLSL_INTERLOCKED_ADD, -+ HLSL_INTERLOCKED_AND, -+ HLSL_INTERLOCKED_CMP_EXCH, -+ HLSL_INTERLOCKED_EXCH, -+ HLSL_INTERLOCKED_MAX, -+ HLSL_INTERLOCKED_MIN, -+ HLSL_INTERLOCKED_OR, -+ HLSL_INTERLOCKED_XOR, -+}; -+ -+/* Represents an interlocked operation. -+ * -+ * The data_type of the node indicates whether or not the original value is returned. -+ * If the original value is not returned, the data_type is set to NULL. -+ * Otherwise, the data_type is set to the type of the original value. -+ */ -+struct hlsl_ir_interlocked -+{ -+ struct hlsl_ir_node node; -+ enum hlsl_interlocked_op op; -+ struct hlsl_deref dst; -+ struct hlsl_src coords, cmp_value, value; -+}; -+ - struct hlsl_scope - { - /* Item entry for hlsl_ctx.scopes. */ -@@ -1247,6 +1274,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n - return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); - } - -+static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED); -+ return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); -+} -+ - static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) - { - VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); -@@ -1554,6 +1587,9 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty - struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, -+ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, -+ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, - struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, - unsigned int unroll_limit, const struct vkd3d_shader_location *loc); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index e5a03067d16..da2f482b148 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -667,6 +667,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_SWITCH: -+ case HLSL_IR_INTERLOCKED: - case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); -@@ -1322,6 +1323,11 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); - -+ if ((param->modifiers & HLSL_STORAGE_OUT) && !(param->modifiers & HLSL_STORAGE_IN) -+ && (param->type->modifiers & HLSL_MODIFIER_CONST)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Parameter '%s' is declared as both \"out\" and \"const\".", param->name); -+ - if (param->reg_reservation.offset_type) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() is not allowed on function parameters."); -@@ -5374,6 +5380,185 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, - return true; - } - -+static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name) -+{ -+ struct hlsl_ir_node *lhs, *coords, *val, *cmp_val = NULL, *orig_val = NULL; -+ struct hlsl_ir_node *interlocked, *void_ret; -+ struct hlsl_type *lhs_type, *val_type; -+ struct vkd3d_string_buffer *string; -+ struct hlsl_deref dst_deref; -+ -+ if (hlsl_version_lt(ctx, 5, 0)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "Interlocked functions can only be used in shader model 5.0 or higher."); -+ -+ if (op != HLSL_INTERLOCKED_CMP_EXCH && op != HLSL_INTERLOCKED_EXCH -+ && params->args_count != 2 && params->args_count != 3) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.", -+ name, params->args_count); -+ return false; -+ } -+ -+ lhs = params->args[0]; -+ lhs_type = lhs->data_type; -+ -+ if (op == HLSL_INTERLOCKED_CMP_EXCH) -+ { -+ cmp_val = params->args[1]; -+ val = params->args[2]; -+ if (params->args_count == 4) -+ orig_val = params->args[3]; -+ } -+ else -+ { -+ val = params->args[1]; -+ if (params->args_count == 3) -+ orig_val = params->args[2]; -+ } -+ -+ if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT -+ && lhs_type->e.numeric.type != HLSL_TYPE_INT)) -+ { -+ if ((string = hlsl_type_to_string(ctx, lhs_type))) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.", -+ name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ return false; -+ } -+ -+ /* Interlocked*() functions always take uint for the value parameters, -+ * except for InterlockedMax()/InterlockedMin(). */ -+ if (op == HLSL_INTERLOCKED_MAX || op == HLSL_INTERLOCKED_MIN) -+ { -+ enum hlsl_base_type val_base_type = val->data_type->e.numeric.type; -+ -+ /* Floating values are always cast to signed integers. */ -+ if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE) -+ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); -+ else -+ val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type); -+ } -+ else -+ { -+ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); -+ } -+ -+ if (cmp_val && !(cmp_val = add_implicit_conversion(ctx, params->instrs, cmp_val, val_type, loc))) -+ return false; -+ if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc))) -+ return false; -+ -+ /* TODO: groupshared variables */ -+ if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) -+ { -+ if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) -+ { -+ hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets."); -+ return false; -+ } -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node)) -+ return false; -+ coords = hlsl_ir_index(lhs)->idx.node; -+ -+ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ -+ if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); -+ return false; -+ } -+ } -+ else -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); -+ return false; -+ } -+ -+ interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, cmp_val, val, loc); -+ hlsl_cleanup_deref(&dst_deref); -+ if (!interlocked) -+ return false; -+ hlsl_block_add_instr(params->instrs, interlocked); -+ -+ if (orig_val) -+ { -+ if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST) -+ hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, -+ "Output argument to '%s' is const.", name); -+ -+ if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked)) -+ return false; -+ } -+ -+ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, void_ret); -+ -+ return true; -+} -+ -+static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd"); -+} -+ -+static bool intrinsic_InterlockedAnd(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_AND, params, loc, "InterlockedAnd"); -+} -+ -+static bool intrinsic_InterlockedCompareExchange(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareExchange"); -+} -+ -+static bool intrinsic_InterlockedCompareStore(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareStore"); -+} -+ -+static bool intrinsic_InterlockedExchange(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_EXCH, params, loc, "InterlockedExchange"); -+} -+ -+static bool intrinsic_InterlockedMax(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MAX, params, loc, "InterlockedMax"); -+} -+ -+static bool intrinsic_InterlockedMin(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MIN, params, loc, "InterlockedMin"); -+} -+ -+static bool intrinsic_InterlockedOr(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_OR, params, loc, "InterlockedOr"); -+} -+ -+static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); -+} -+ - static const struct intrinsic_function - { - const char *name; -@@ -5387,6 +5572,15 @@ intrinsic_functions[] = - /* Note: these entries should be kept in alphabetical order. */ - {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, - {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, -+ {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, -+ {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd}, -+ {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, -+ {"InterlockedCompareStore", 3, true, intrinsic_InterlockedCompareStore}, -+ {"InterlockedExchange", 3, true, intrinsic_InterlockedExchange}, -+ {"InterlockedMax", -1, true, intrinsic_InterlockedMax}, -+ {"InterlockedMin", -1, true, intrinsic_InterlockedMin}, -+ {"InterlockedOr", -1, true, intrinsic_InterlockedOr}, -+ {"InterlockedXor", -1, true, intrinsic_InterlockedXor}, - {"abs", 1, true, intrinsic_abs}, - {"acos", 1, true, intrinsic_acos}, - {"all", 1, true, intrinsic_all}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index cef6a87c8b6..8d817b051ce 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -739,6 +739,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); - return res; - -+ case HLSL_IR_INTERLOCKED: -+ res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr); -+ return res; -+ - default: - return false; - } -@@ -1836,6 +1840,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, - return progress; - } - -+static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, -+ struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) -+{ -+ bool progress = false; -+ -+ progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index); -+ return progress; -+} -+ - static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, - struct copy_propagation_state *state) - { -@@ -2042,6 +2055,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b - progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); - break; - -+ case HLSL_IR_INTERLOCKED: -+ progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); -+ - default: - break; - } -@@ -2225,6 +2241,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - validate_component_index_range_from_deref(ctx, &store->lhs); - break; - } -+ case HLSL_IR_INTERLOCKED: -+ { -+ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); -+ -+ if (!interlocked->dst.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Accessed resource must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Accessed resource from \"%s\" must be determinable at compile time.", -+ interlocked->dst.var->name); -+ note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource"); -+ } -+ break; -+ } - default: - break; - } -@@ -4478,6 +4512,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_LOOP: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_SWITCH: -+ case HLSL_IR_INTERLOCKED: - break; - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ -@@ -4724,6 +4759,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - index->idx.node->last_read = last_read; - break; - } -+ case HLSL_IR_INTERLOCKED: -+ { -+ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); -+ -+ var = interlocked->dst.var; -+ var->last_read = max(var->last_read, last_read); -+ deref_mark_last_read(&interlocked->dst, last_read); -+ interlocked->coords.node->last_read = last_read; -+ interlocked->value.node->last_read = last_read; -+ if (interlocked->cmp_value.node) -+ interlocked->cmp_value.node->last_read = last_read; -+ break; -+ } - case HLSL_IR_JUMP: - { - struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -@@ -5135,6 +5183,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); - break; - -+ case HLSL_IR_INTERLOCKED: -+ register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst); -+ break; -+ - default: - break; - } -@@ -9942,6 +9994,81 @@ static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - } - } - -+static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_interlocked *interlocked) -+{ -+ -+ static const enum vkd3d_shader_opcode opcodes[] = -+ { -+ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD, -+ [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND, -+ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE, -+ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX, -+ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_ATOMIC_UMIN, -+ [HLSL_INTERLOCKED_OR] = VKD3DSIH_ATOMIC_OR, -+ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_ATOMIC_XOR, -+ }; -+ -+ static const enum vkd3d_shader_opcode imm_opcodes[] = -+ { -+ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD, -+ [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND, -+ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH, -+ [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH, -+ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX, -+ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_IMM_ATOMIC_UMIN, -+ [HLSL_INTERLOCKED_OR] = VKD3DSIH_IMM_ATOMIC_OR, -+ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_IMM_ATOMIC_XOR, -+ }; -+ -+ struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node; -+ struct hlsl_ir_node *coords = interlocked->coords.node; -+ struct hlsl_ir_node *instr = &interlocked->node; -+ bool is_imm = interlocked->node.reg.allocated; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ -+ opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; -+ -+ if (value->data_type->e.numeric.type == HLSL_TYPE_INT) -+ { -+ if (opcode == VKD3DSIH_ATOMIC_UMAX) -+ opcode = VKD3DSIH_ATOMIC_IMAX; -+ else if (opcode == VKD3DSIH_ATOMIC_UMIN) -+ opcode = VKD3DSIH_ATOMIC_IMIN; -+ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX) -+ opcode = VKD3DSIH_IMM_ATOMIC_IMAX; -+ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMIN) -+ opcode = VKD3DSIH_IMM_ATOMIC_IMIN; -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, -+ is_imm ? 2 : 1, cmp_value ? 3 : 2))) -+ return false; -+ -+ if (is_imm) -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ dst_param = is_imm ? &ins->dst[1] : &ins->dst[0]; -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0)) -+ return false; -+ dst_param->reg.dimension = VSIR_DIMENSION_NONE; -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ if (cmp_value) -+ { -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, cmp_value, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(&ins->src[2], ctx, value, VKD3DSP_WRITEMASK_ALL); -+ } -+ else -+ { -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); -+ } -+ -+ return true; -+} -+ - static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, - struct vsir_program *program, const struct hlsl_ir_jump *jump) - { -@@ -10119,6 +10246,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo - generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); - break; - -+ case HLSL_IR_INTERLOCKED: -+ sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); -+ break; -+ - default: - break; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index efa76983546..b1caf61d512 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -10923,7 +10923,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - compiler->input_control_point_count = program->input_control_point_count; - compiler->output_control_point_count = program->output_control_point_count; - -- if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN -+ || (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler))) - spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); - - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index aa666086710..1ecfe32de45 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -4026,6 +4026,15 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - break; - - case VKD3DSIH_ADD: -+ case VKD3DSIH_ATOMIC_AND: -+ case VKD3DSIH_ATOMIC_CMP_STORE: -+ case VKD3DSIH_ATOMIC_IADD: -+ case VKD3DSIH_ATOMIC_IMAX: -+ case VKD3DSIH_ATOMIC_IMIN: -+ case VKD3DSIH_ATOMIC_UMAX: -+ case VKD3DSIH_ATOMIC_UMIN: -+ case VKD3DSIH_ATOMIC_OR: -+ case VKD3DSIH_ATOMIC_XOR: - case VKD3DSIH_AND: - case VKD3DSIH_BREAK: - case VKD3DSIH_CASE: -@@ -4068,6 +4077,16 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - case VKD3DSIH_IMAD: - case VKD3DSIH_IMAX: - case VKD3DSIH_IMIN: -+ case VKD3DSIH_IMM_ATOMIC_AND: -+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: -+ case VKD3DSIH_IMM_ATOMIC_EXCH: -+ case VKD3DSIH_IMM_ATOMIC_IADD: -+ case VKD3DSIH_IMM_ATOMIC_IMAX: -+ case VKD3DSIH_IMM_ATOMIC_IMIN: -+ case VKD3DSIH_IMM_ATOMIC_UMAX: -+ case VKD3DSIH_IMM_ATOMIC_UMIN: -+ case VKD3DSIH_IMM_ATOMIC_OR: -+ case VKD3DSIH_IMM_ATOMIC_XOR: - case VKD3DSIH_IMUL: - case VKD3DSIH_INE: - case VKD3DSIH_INEG: -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 6c7bf167910..ce0c3b9128f 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -327,9 +327,12 @@ static void *vkd3d_fence_worker_main(void *arg) - struct vkd3d_waiting_fence *old_fences, *cur_fences = NULL; - struct vkd3d_fence_worker *worker = arg; - unsigned int i; -+ bool timeline; - - vkd3d_set_thread_name("vkd3d_fence"); - -+ timeline = worker->device->vk_info.KHR_timeline_semaphore; -+ - for (;;) - { - vkd3d_mutex_lock(&worker->mutex); -@@ -357,7 +360,12 @@ static void *vkd3d_fence_worker_main(void *arg) - vkd3d_mutex_unlock(&worker->mutex); - - for (i = 0; i < cur_fence_count; ++i) -- worker->wait_for_gpu_fence(worker, &cur_fences[i]); -+ { -+ if (timeline) -+ vkd3d_wait_for_gpu_timeline_semaphore(worker, &cur_fences[i]); -+ else -+ vkd3d_wait_for_gpu_fence(worker, &cur_fences[i]); -+ } - } - - vkd3d_free(cur_fences); -@@ -379,9 +387,6 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, - worker->fences = NULL; - worker->fences_size = 0; - -- worker->wait_for_gpu_fence = device->vk_info.KHR_timeline_semaphore -- ? vkd3d_wait_for_gpu_timeline_semaphore : vkd3d_wait_for_gpu_fence; -- - vkd3d_mutex_init(&worker->mutex); - - vkd3d_cond_init(&worker->cond); -@@ -399,6 +404,7 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, - static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, - struct d3d12_device *device) - { -+ unsigned int i; - HRESULT hr; - - TRACE("worker %p.\n", worker); -@@ -416,6 +422,9 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, - vkd3d_mutex_destroy(&worker->mutex); - vkd3d_cond_destroy(&worker->cond); - -+ for (i = 0; i < worker->fence_count; ++i) -+ d3d12_fence_decref(worker->fences[i].fence); -+ - vkd3d_free(worker->fences); - - return S_OK; -@@ -556,7 +565,8 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) - fence->old_vk_fences[i] = VK_NULL_HANDLE; - } - -- d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); -+ if (!device->vk_info.KHR_timeline_semaphore) -+ d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); - VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); - - vkd3d_mutex_unlock(&fence->mutex); -@@ -6450,6 +6460,7 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) - break; - - case VKD3D_CS_OP_SIGNAL: -+ case VKD3D_CS_OP_SIGNAL_ON_CPU: - d3d12_fence_decref(op->u.signal.fence); - break; - -@@ -7440,6 +7451,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - struct vkd3d_cs_op_data *op; - struct d3d12_fence *fence; - unsigned int i; -+ HRESULT hr; - - queue->is_flushing = true; - -@@ -7473,6 +7485,11 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); - break; - -+ case VKD3D_CS_OP_SIGNAL_ON_CPU: -+ if (FAILED(hr = d3d12_fence_Signal(&op->u.signal.fence->ID3D12Fence1_iface, op->u.signal.value))) -+ ERR("Failed to signal fence %p, hr %s.\n", op->u.signal.fence, debugstr_hresult(hr)); -+ break; -+ - case VKD3D_CS_OP_EXECUTE: - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; -@@ -7615,6 +7632,36 @@ void vkd3d_release_vk_queue(ID3D12CommandQueue *queue) - return vkd3d_queue_release(d3d12_queue->vkd3d_queue); - } - -+HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, uint64_t value) -+{ -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); -+ struct vkd3d_cs_op_data *op; -+ HRESULT hr = S_OK; -+ -+ TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ hr = E_OUTOFMEMORY; -+ goto done; -+ } -+ op->opcode = VKD3D_CS_OP_SIGNAL_ON_CPU; -+ op->u.signal.fence = fence; -+ op->u.signal.value = value; -+ -+ d3d12_fence_incref(fence); -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+done: -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+ return hr; -+} -+ - /* ID3D12CommandSignature */ - static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface) - { -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 9aa4adb6c06..b51e2963efa 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -3557,12 +3557,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - return E_INVALIDARG; - } - -- if (data->Format == DXGI_FORMAT_UNKNOWN) -- { -- data->PlaneCount = 1; -- return S_OK; -- } -- - if (!(format = vkd3d_get_format(device, data->Format, false))) - format = vkd3d_get_format(device, data->Format, true); - if (!format) -@@ -4368,7 +4362,7 @@ static void d3d12_device_get_resource1_allocation_info(struct d3d12_device *devi - { - desc = &resource_descs[i]; - -- if (FAILED(d3d12_resource_validate_desc(desc, device))) -+ if (FAILED(d3d12_resource_validate_desc(desc, device, 0))) - { - WARN("Invalid resource desc.\n"); - goto invalid; -@@ -4699,10 +4693,11 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - uint64_t base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, - UINT64 *row_sizes, UINT64 *total_bytes) - { -- unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; -+ unsigned int i, sub_resource_idx, plane_idx, miplevel_idx, row_count, row_size, row_pitch; - unsigned int width, height, depth, plane_count, sub_resources_per_plane; - const struct vkd3d_format *format; - uint64_t offset, size, total; -+ DXGI_FORMAT plane_format; - - if (layouts) - memset(layouts, 0xff, sizeof(*layouts) * sub_resource_count); -@@ -4713,20 +4708,19 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - if (total_bytes) - *total_bytes = ~(uint64_t)0; - -- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) -+ if (!(format = vkd3d_get_format(device, desc->Format, true))) - { - WARN("Invalid format %#x.\n", desc->Format); - return; - } - -- if (FAILED(d3d12_resource_validate_desc(desc, device))) -+ if (FAILED(d3d12_resource_validate_desc(desc, device, VKD3D_VALIDATE_FORCE_ALLOW_DS))) - { - WARN("Invalid resource desc.\n"); - return; - } - -- plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) -- && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1; -+ plane_count = format->plane_count; - sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc); - - if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count)) -@@ -4737,21 +4731,31 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - - offset = 0; - total = 0; -+ plane_format = desc->Format; - for (i = 0; i < sub_resource_count; ++i) - { - sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane; -+ plane_idx = (first_sub_resource + i) / sub_resources_per_plane; - miplevel_idx = sub_resource_idx % desc->MipLevels; -+ -+ if (plane_count > 1) -+ { -+ plane_format = !plane_idx ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R8_TYPELESS; -+ format = vkd3d_get_format(device, plane_format, true); -+ } -+ - width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width); - height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height); - depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); - row_count = height / format->block_height; - row_size = (width / format->block_width) * format->byte_count * format->block_byte_count; -- row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); -+ /* Direct3D 12 requires double the alignment for dual planes. */ -+ row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count); - - if (layouts) - { - layouts[i].Offset = base_offset + offset; -- layouts[i].Footprint.Format = desc->Format; -+ layouts[i].Footprint.Format = plane_format; - layouts[i].Footprint.Width = width; - layouts[i].Footprint.Height = height; - layouts[i].Footprint.Depth = depth; -@@ -4763,7 +4767,7 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - row_sizes[i] = row_size; - - size = max(0, row_count - 1) * row_pitch + row_size; -- size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + size; -+ size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count) + size; - - total = offset + size; - offset = align(total, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 1f7d90eb95f..eab97715944 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -951,7 +951,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - HRESULT hr; - - VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -- VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); -+ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device, 0) == S_OK); - - if (!desc->MipLevels) - { -@@ -1847,7 +1847,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 - return true; - } - --HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) -+HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags) - { - const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; - const struct vkd3d_format *format; -@@ -1893,7 +1893,8 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 - return E_INVALIDARG; - } - -- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) -+ if (!(format = vkd3d_get_format(device, desc->Format, -+ desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL || flags & VKD3D_VALIDATE_FORCE_ALLOW_DS))) - { - WARN("Invalid format %#x.\n", desc->Format); - return E_INVALIDARG; -@@ -2013,7 +2014,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - resource->gpu_address = 0; - resource->flags = 0; - -- if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device))) -+ if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device, 0))) - return hr; - - resource->format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0); -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index 839bb173854..c2832a61f67 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -29,7 +29,7 @@ - #define UINT VKD3D_FORMAT_TYPE_UINT - static const struct vkd3d_format vkd3d_formats[] = - { -- {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1}, -+ {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1, 0, 1}, - {DXGI_FORMAT_R32G32B32A32_TYPELESS, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1, TYPELESS}, - {DXGI_FORMAT_R32G32B32A32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32B32A32_UINT, 16, 1, 1, 1, COLOR, 1, UINT}, -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 8488d5db3fa..fd1fbb1679a 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -70,6 +70,8 @@ - - #define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) - -+#define VKD3D_VALIDATE_FORCE_ALLOW_DS 0x1u -+ - extern uint64_t object_global_serial_id; - - struct d3d12_command_list; -@@ -240,8 +242,6 @@ struct vkd3d_fence_worker - struct vkd3d_waiting_fence *fences; - size_t fences_size; - -- void (*wait_for_gpu_fence)(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *enqueued_fence); -- - struct vkd3d_queue *queue; - struct d3d12_device *device; - }; -@@ -534,7 +534,7 @@ struct vkd3d_resource_allocation_info - }; - - bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); --HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device); -+HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags); - void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, - UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, -@@ -1362,6 +1362,7 @@ enum vkd3d_cs_op - { - VKD3D_CS_OP_WAIT, - VKD3D_CS_OP_SIGNAL, -+ VKD3D_CS_OP_SIGNAL_ON_CPU, - VKD3D_CS_OP_EXECUTE, - VKD3D_CS_OP_UPDATE_MAPPINGS, - VKD3D_CS_OP_COPY_MAPPINGS, --- -2.45.2 -